{ "best_metric": 2.1108951568603516, "best_model_checkpoint": "/home/nlp/matan_avitan/git/vec2text/saves/train_on_bios/checkpoint-259966", "epoch": 49.916666666666664, "eval_steps": 625, "global_step": 259966, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 9.6e-05, "loss": 10.0588, "step": 12 }, { "epoch": 0.0, "learning_rate": 0.000192, "loss": 9.0806, "step": 24 }, { "epoch": 0.01, "learning_rate": 0.000288, "loss": 8.8436, "step": 36 }, { "epoch": 0.01, "learning_rate": 0.000384, "loss": 8.5908, "step": 48 }, { "epoch": 0.01, "learning_rate": 0.00048, "loss": 8.4078, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.000576, "loss": 8.254, "step": 72 }, { "epoch": 0.02, "learning_rate": 0.0006720000000000001, "loss": 8.0822, "step": 84 }, { "epoch": 0.02, "learning_rate": 0.000768, "loss": 7.9411, "step": 96 }, { "epoch": 0.02, "learning_rate": 0.000864, "loss": 7.7923, "step": 108 }, { "epoch": 0.02, "learning_rate": 0.00096, "loss": 7.6285, "step": 120 }, { "epoch": 0.03, "learning_rate": 0.001, "loss": 7.4645, "step": 132 }, { "epoch": 0.03, "learning_rate": 0.001, "loss": 7.3039, "step": 144 }, { "epoch": 0.03, "learning_rate": 0.001, "loss": 7.1368, "step": 156 }, { "epoch": 0.03, "learning_rate": 0.001, "loss": 6.9908, "step": 168 }, { "epoch": 0.03, "learning_rate": 0.001, "loss": 6.8706, "step": 180 }, { "epoch": 0.04, "learning_rate": 0.001, "loss": 6.7611, "step": 192 }, { "epoch": 0.04, "learning_rate": 0.001, "loss": 6.668, "step": 204 }, { "epoch": 0.04, "learning_rate": 0.001, "loss": 6.5827, "step": 216 }, { "epoch": 0.04, "learning_rate": 0.001, "loss": 6.5161, "step": 228 }, { "epoch": 0.05, "learning_rate": 0.001, "loss": 6.4537, "step": 240 }, { "epoch": 0.05, "learning_rate": 0.001, "loss": 6.3921, "step": 252 }, { "epoch": 0.05, "learning_rate": 0.001, "loss": 6.3573, "step": 264 }, { "epoch": 0.05, "learning_rate": 0.001, "loss": 6.3111, "step": 276 }, { "epoch": 0.06, "learning_rate": 0.001, "loss": 6.267, "step": 288 }, { "epoch": 0.06, "learning_rate": 0.001, "loss": 6.2284, "step": 300 }, { "epoch": 0.06, "learning_rate": 0.001, "loss": 6.1643, "step": 312 }, { "epoch": 0.06, "learning_rate": 0.001, "loss": 6.1673, "step": 324 }, { "epoch": 0.06, "learning_rate": 0.001, "loss": 6.1339, "step": 336 }, { "epoch": 0.07, "learning_rate": 0.001, "loss": 6.1094, "step": 348 }, { "epoch": 0.07, "learning_rate": 0.001, "loss": 6.0815, "step": 360 }, { "epoch": 0.07, "learning_rate": 0.001, "loss": 6.058, "step": 372 }, { "epoch": 0.07, "learning_rate": 0.001, "loss": 6.0346, "step": 384 }, { "epoch": 0.08, "learning_rate": 0.001, "loss": 5.9907, "step": 396 }, { "epoch": 0.08, "learning_rate": 0.001, "loss": 5.9701, "step": 408 }, { "epoch": 0.08, "learning_rate": 0.001, "loss": 5.9687, "step": 420 }, { "epoch": 0.08, "learning_rate": 0.001, "loss": 5.9362, "step": 432 }, { "epoch": 0.09, "learning_rate": 0.001, "loss": 5.9169, "step": 444 }, { "epoch": 0.09, "learning_rate": 0.001, "loss": 5.8856, "step": 456 }, { "epoch": 0.09, "learning_rate": 0.001, "loss": 5.8743, "step": 468 }, { "epoch": 0.09, "learning_rate": 0.001, "loss": 5.8533, "step": 480 }, { "epoch": 0.09, "learning_rate": 0.001, "loss": 5.8393, "step": 492 }, { "epoch": 0.1, "learning_rate": 0.001, "loss": 5.8127, "step": 504 }, { "epoch": 0.1, "learning_rate": 0.001, "loss": 5.7933, "step": 516 }, { "epoch": 0.1, "learning_rate": 0.001, "loss": 5.7799, "step": 528 }, { "epoch": 0.1, "learning_rate": 0.001, "loss": 5.7717, "step": 540 }, { "epoch": 0.11, "learning_rate": 0.001, "loss": 5.7435, "step": 552 }, { "epoch": 0.11, "learning_rate": 0.001, "loss": 5.7376, "step": 564 }, { "epoch": 0.11, "learning_rate": 0.001, "loss": 5.7223, "step": 576 }, { "epoch": 0.11, "learning_rate": 0.001, "loss": 5.6961, "step": 588 }, { "epoch": 0.12, "learning_rate": 0.001, "loss": 5.6851, "step": 600 }, { "epoch": 0.12, "learning_rate": 0.001, "loss": 5.6679, "step": 612 }, { "epoch": 0.12, "learning_rate": 0.001, "loss": 5.6604, "step": 624 }, { "epoch": 0.12, "eval_ag_news_accuracy": 0.125, "eval_ag_news_bleu_score": 0.7076014520866056, "eval_ag_news_bleu_score_sem": 0.03414116463338299, "eval_ag_news_emb_cos_sim": 0.21779999136924744, "eval_ag_news_emb_cos_sim_sem": 0.0047666913456596936, "eval_ag_news_emb_top1_equal": 0.015625, "eval_ag_news_emb_top1_equal_sem": 0.011004959004867984, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 6.598365306854248, "eval_ag_news_n_ngrams_match_1": 2.252, "eval_ag_news_n_ngrams_match_2": 0.162, "eval_ag_news_n_ngrams_match_3": 0.02, "eval_ag_news_num_pred_words": 25.952, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 733.894515808332, "eval_ag_news_pred_num_tokens": 62.6171875, "eval_ag_news_rouge_score": 0.06449897772590718, "eval_ag_news_runtime": 11.491, "eval_ag_news_samples_per_second": 43.512, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.08013141855362527, "eval_ag_news_token_set_f1_sem": 0.0030168278878077133, "eval_ag_news_token_set_precision": 0.049011952780084765, "eval_ag_news_token_set_recall": 0.37340358913233485, "eval_ag_news_true_num_tokens": 56.09375, "step": 625 }, { "epoch": 0.12, "eval_anthropic_toxic_prompts_accuracy": 0.03975, "eval_anthropic_toxic_prompts_bleu_score": 0.6415384184960544, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.04574896028341354, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.16644136607646942, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.006283958794261147, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 6.232697486877441, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 0.846, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.038, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.004, "eval_anthropic_toxic_prompts_num_pred_words": 21.81, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 509.1269962366393, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.703125, "eval_anthropic_toxic_prompts_rouge_score": 0.050466309613359225, "eval_anthropic_toxic_prompts_runtime": 11.0626, "eval_anthropic_toxic_prompts_samples_per_second": 45.197, "eval_anthropic_toxic_prompts_steps_per_second": 0.09, "eval_anthropic_toxic_prompts_token_set_f1": 0.07067156960891947, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.003784809498639409, "eval_anthropic_toxic_prompts_token_set_precision": 0.04966828658845425, "eval_anthropic_toxic_prompts_token_set_recall": 0.21656006934242233, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 625 }, { "epoch": 0.12, "eval_arxiv_accuracy": 0.1473125, "eval_arxiv_bleu_score": 0.7817510336725016, "eval_arxiv_bleu_score_sem": 0.02753647830505165, "eval_arxiv_emb_cos_sim": 0.2147493064403534, "eval_arxiv_emb_cos_sim_sem": 0.003958968396644919, "eval_arxiv_emb_top1_equal": 0.0390625, "eval_arxiv_emb_top1_equal_sem": 0.017191973462108996, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 6.37405252456665, "eval_arxiv_n_ngrams_match_1": 2.168, "eval_arxiv_n_ngrams_match_2": 0.128, "eval_arxiv_n_ngrams_match_3": 0.002, "eval_arxiv_num_pred_words": 28.868, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 586.4295400542229, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.05873686213768187, "eval_arxiv_runtime": 10.5122, "eval_arxiv_samples_per_second": 47.564, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.06343817217647532, "eval_arxiv_token_set_f1_sem": 0.0020879189531666778, "eval_arxiv_token_set_precision": 0.03457650963707991, "eval_arxiv_token_set_recall": 0.6167626984126982, "eval_arxiv_true_num_tokens": 64.0, "step": 625 }, { "epoch": 0.12, "eval_python_code_alpaca_accuracy": 0.06821875, "eval_python_code_alpaca_bleu_score": 1.0450194418170362, "eval_python_code_alpaca_bleu_score_sem": 0.035346566865746755, "eval_python_code_alpaca_emb_cos_sim": 0.1467011570930481, "eval_python_code_alpaca_emb_cos_sim_sem": 0.005004210568987345, "eval_python_code_alpaca_emb_top1_equal": 0.0, "eval_python_code_alpaca_emb_top1_equal_sem": 0.0, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 6.127904891967773, "eval_python_code_alpaca_n_ngrams_match_1": 1.634, "eval_python_code_alpaca_n_ngrams_match_2": 0.008, "eval_python_code_alpaca_n_ngrams_match_3": 0.002, "eval_python_code_alpaca_num_pred_words": 22.842, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 458.47459992795496, "eval_python_code_alpaca_pred_num_tokens": 62.765625, "eval_python_code_alpaca_rouge_score": 0.08181687675669569, "eval_python_code_alpaca_runtime": 10.8834, "eval_python_code_alpaca_samples_per_second": 45.942, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.11634515823362704, "eval_python_code_alpaca_token_set_f1_sem": 0.0026517246590390905, "eval_python_code_alpaca_token_set_precision": 0.06694592367448347, "eval_python_code_alpaca_token_set_recall": 0.6800358585858585, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 625 }, { "epoch": 0.12, "eval_wikibio_accuracy": 0.13315625, "eval_wikibio_bleu_score": 0.7767738705191737, "eval_wikibio_bleu_score_sem": 0.059009427236449793, "eval_wikibio_emb_cos_sim": 0.2736821472644806, "eval_wikibio_emb_cos_sim_sem": 0.007596860125700324, "eval_wikibio_emb_top1_equal": 0.03125, "eval_wikibio_emb_top1_equal_sem": 0.015439349450344106, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 5.738673210144043, "eval_wikibio_n_ngrams_match_1": 2.176, "eval_wikibio_n_ngrams_match_2": 0.332, "eval_wikibio_n_ngrams_match_3": 0.08, "eval_wikibio_num_pred_words": 29.816, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 310.65196754973863, "eval_wikibio_pred_num_tokens": 62.0625, "eval_wikibio_rouge_score": 0.06659998277535806, "eval_wikibio_runtime": 9.9756, "eval_wikibio_samples_per_second": 50.122, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.09418651497142766, "eval_wikibio_token_set_f1_sem": 0.0044443076778509834, "eval_wikibio_token_set_precision": 0.06717098926076812, "eval_wikibio_token_set_recall": 0.26601288770994785, "eval_wikibio_true_num_tokens": 61.1328125, "step": 625 }, { "epoch": 0.12, "eval_nq_accuracy": 0.2171875, "eval_nq_bleu_score": 1.5076008565197407, "eval_nq_bleu_score_sem": 0.08413672986162012, "eval_nq_emb_cos_sim": 0.265103280544281, "eval_nq_emb_cos_sim_sem": 0.00787410999773908, "eval_nq_emb_top1_equal": 0.078125, "eval_nq_emb_top1_equal_sem": 0.023813825516515504, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 5.423642635345459, "eval_nq_n_ngrams_match_1": 6.076, "eval_nq_n_ngrams_match_2": 1.036, "eval_nq_n_ngrams_match_3": 0.226, "eval_nq_num_pred_words": 35.068, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 226.70341817027813, "eval_nq_pred_num_tokens": 61.453125, "eval_nq_rouge_score": 0.1211578668951579, "eval_nq_runtime": 10.3634, "eval_nq_samples_per_second": 48.247, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.15437157625852624, "eval_nq_token_set_f1_sem": 0.00485683145133116, "eval_nq_token_set_precision": 0.10064937158566319, "eval_nq_token_set_recall": 0.501620294333327, "eval_nq_true_num_tokens": 64.0, "step": 625 }, { "epoch": 0.12, "learning_rate": 0.001, "loss": 5.6436, "step": 636 }, { "epoch": 0.12, "learning_rate": 0.001, "loss": 5.6172, "step": 648 }, { "epoch": 0.13, "learning_rate": 0.001, "loss": 5.6093, "step": 660 }, { "epoch": 0.13, "learning_rate": 0.001, "loss": 5.6059, "step": 672 }, { "epoch": 0.13, "learning_rate": 0.001, "loss": 5.5919, "step": 684 }, { "epoch": 0.13, "learning_rate": 0.001, "loss": 5.5829, "step": 696 }, { "epoch": 0.14, "learning_rate": 0.001, "loss": 5.5524, "step": 708 }, { "epoch": 0.14, "learning_rate": 0.001, "loss": 5.5551, "step": 720 }, { "epoch": 0.14, "learning_rate": 0.001, "loss": 5.5233, "step": 732 }, { "epoch": 0.14, "learning_rate": 0.001, "loss": 5.5187, "step": 744 }, { "epoch": 0.15, "learning_rate": 0.001, "loss": 5.5111, "step": 756 }, { "epoch": 0.15, "learning_rate": 0.001, "loss": 5.4952, "step": 768 }, { "epoch": 0.15, "learning_rate": 0.001, "loss": 5.4776, "step": 780 }, { "epoch": 0.15, "learning_rate": 0.001, "loss": 5.4606, "step": 792 }, { "epoch": 0.15, "learning_rate": 0.001, "loss": 5.4627, "step": 804 }, { "epoch": 0.16, "learning_rate": 0.001, "loss": 5.4523, "step": 816 }, { "epoch": 0.16, "learning_rate": 0.001, "loss": 5.4317, "step": 828 }, { "epoch": 0.16, "learning_rate": 0.001, "loss": 5.4328, "step": 840 }, { "epoch": 0.16, "learning_rate": 0.001, "loss": 5.421, "step": 852 }, { "epoch": 0.17, "learning_rate": 0.001, "loss": 5.4102, "step": 864 }, { "epoch": 0.17, "learning_rate": 0.001, "loss": 5.3909, "step": 876 }, { "epoch": 0.17, "learning_rate": 0.001, "loss": 5.3922, "step": 888 }, { "epoch": 0.17, "learning_rate": 0.001, "loss": 5.382, "step": 900 }, { "epoch": 0.18, "learning_rate": 0.001, "loss": 5.3687, "step": 912 }, { "epoch": 0.18, "learning_rate": 0.001, "loss": 5.3497, "step": 924 }, { "epoch": 0.18, "learning_rate": 0.001, "loss": 5.345, "step": 936 }, { "epoch": 0.18, "learning_rate": 0.001, "loss": 5.3393, "step": 948 }, { "epoch": 0.18, "learning_rate": 0.001, "loss": 5.3281, "step": 960 }, { "epoch": 0.19, "learning_rate": 0.001, "loss": 5.3162, "step": 972 }, { "epoch": 0.19, "learning_rate": 0.001, "loss": 5.3198, "step": 984 }, { "epoch": 0.19, "learning_rate": 0.001, "loss": 5.2957, "step": 996 }, { "epoch": 0.19, "learning_rate": 0.001, "loss": 5.2922, "step": 1008 }, { "epoch": 0.2, "learning_rate": 0.001, "loss": 5.284, "step": 1020 }, { "epoch": 0.2, "learning_rate": 0.001, "loss": 5.2713, "step": 1032 }, { "epoch": 0.2, "learning_rate": 0.001, "loss": 5.2516, "step": 1044 }, { "epoch": 0.2, "learning_rate": 0.001, "loss": 5.2495, "step": 1056 }, { "epoch": 0.21, "learning_rate": 0.001, "loss": 5.2442, "step": 1068 }, { "epoch": 0.21, "learning_rate": 0.001, "loss": 5.2302, "step": 1080 }, { "epoch": 0.21, "learning_rate": 0.001, "loss": 5.2222, "step": 1092 }, { "epoch": 0.21, "learning_rate": 0.001, "loss": 5.2026, "step": 1104 }, { "epoch": 0.21, "learning_rate": 0.001, "loss": 5.1956, "step": 1116 }, { "epoch": 0.22, "learning_rate": 0.001, "loss": 5.1944, "step": 1128 }, { "epoch": 0.22, "learning_rate": 0.001, "loss": 5.1968, "step": 1140 }, { "epoch": 0.22, "learning_rate": 0.001, "loss": 5.1797, "step": 1152 }, { "epoch": 0.22, "learning_rate": 0.001, "loss": 5.1708, "step": 1164 }, { "epoch": 0.23, "learning_rate": 0.001, "loss": 5.1591, "step": 1176 }, { "epoch": 0.23, "learning_rate": 0.001, "loss": 5.1583, "step": 1188 }, { "epoch": 0.23, "learning_rate": 0.001, "loss": 5.1363, "step": 1200 }, { "epoch": 0.23, "learning_rate": 0.001, "loss": 5.1344, "step": 1212 }, { "epoch": 0.24, "learning_rate": 0.001, "loss": 5.1172, "step": 1224 }, { "epoch": 0.24, "learning_rate": 0.001, "loss": 5.1126, "step": 1236 }, { "epoch": 0.24, "learning_rate": 0.001, "loss": 5.1075, "step": 1248 }, { "epoch": 0.24, "eval_ag_news_accuracy": 0.14090625, "eval_ag_news_bleu_score": 1.2347401162864637, "eval_ag_news_bleu_score_sem": 0.049933663695827225, "eval_ag_news_emb_cos_sim": 0.27115195989608765, "eval_ag_news_emb_cos_sim_sem": 0.007412213465285081, "eval_ag_news_emb_top1_equal": 0.03125, "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 6.150851249694824, "eval_ag_news_n_ngrams_match_1": 4.292, "eval_ag_news_n_ngrams_match_2": 0.43, "eval_ag_news_n_ngrams_match_3": 0.082, "eval_ag_news_num_pred_words": 33.876, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 469.1165521854841, "eval_ag_news_pred_num_tokens": 62.28125, "eval_ag_news_rouge_score": 0.10640590724771785, "eval_ag_news_runtime": 10.8415, "eval_ag_news_samples_per_second": 46.119, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.1386755571959265, "eval_ag_news_token_set_f1_sem": 0.003893407533770644, "eval_ag_news_token_set_precision": 0.09695214653796702, "eval_ag_news_token_set_recall": 0.3288685225973883, "eval_ag_news_true_num_tokens": 56.09375, "step": 1250 }, { "epoch": 0.24, "eval_anthropic_toxic_prompts_accuracy": 0.04528125, "eval_anthropic_toxic_prompts_bleu_score": 1.029767975734896, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05067200138853742, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.2049163579940796, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007331099221147833, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.0, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 5.778225421905518, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 1.714, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.134, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.012, "eval_anthropic_toxic_prompts_num_pred_words": 30.022, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 323.1851639280978, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.2265625, "eval_anthropic_toxic_prompts_rouge_score": 0.06978179678525673, "eval_anthropic_toxic_prompts_runtime": 9.717, "eval_anthropic_toxic_prompts_samples_per_second": 51.456, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.11283037790319016, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0043284125012183995, "eval_anthropic_toxic_prompts_token_set_precision": 0.09766515866904854, "eval_anthropic_toxic_prompts_token_set_recall": 0.1815796536295251, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1250 }, { "epoch": 0.24, "eval_arxiv_accuracy": 0.166125, "eval_arxiv_bleu_score": 0.7123611794447666, "eval_arxiv_bleu_score_sem": 0.0359630612674515, "eval_arxiv_emb_cos_sim": 0.23540450632572174, "eval_arxiv_emb_cos_sim_sem": 0.004519463200650853, "eval_arxiv_emb_top1_equal": 0.09375, "eval_arxiv_emb_top1_equal_sem": 0.025864720141013958, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 5.919602394104004, "eval_arxiv_n_ngrams_match_1": 2.836, "eval_arxiv_n_ngrams_match_2": 0.14, "eval_arxiv_n_ngrams_match_3": 0.004, "eval_arxiv_num_pred_words": 16.358, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 372.2636702164734, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.076071754092774, "eval_arxiv_runtime": 10.4855, "eval_arxiv_samples_per_second": 47.685, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.08661780722950992, "eval_arxiv_token_set_f1_sem": 0.0025155717633808484, "eval_arxiv_token_set_precision": 0.05187472128557089, "eval_arxiv_token_set_recall": 0.34516107648801186, "eval_arxiv_true_num_tokens": 64.0, "step": 1250 }, { "epoch": 0.24, "eval_python_code_alpaca_accuracy": 0.0756875, "eval_python_code_alpaca_bleu_score": 1.2812924148682532, "eval_python_code_alpaca_bleu_score_sem": 0.051918882153093135, "eval_python_code_alpaca_emb_cos_sim": 0.17265605926513672, "eval_python_code_alpaca_emb_cos_sim_sem": 0.005516537122773645, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 5.793320178985596, "eval_python_code_alpaca_n_ngrams_match_1": 2.068, "eval_python_code_alpaca_n_ngrams_match_2": 0.05, "eval_python_code_alpaca_n_ngrams_match_3": 0.002, "eval_python_code_alpaca_num_pred_words": 14.108, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 328.100570572479, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.11775269936913832, "eval_python_code_alpaca_runtime": 9.4971, "eval_python_code_alpaca_samples_per_second": 52.648, "eval_python_code_alpaca_steps_per_second": 0.105, "eval_python_code_alpaca_token_set_f1": 0.14949715891184465, "eval_python_code_alpaca_token_set_f1_sem": 0.003332086710413263, "eval_python_code_alpaca_token_set_precision": 0.10263999337191698, "eval_python_code_alpaca_token_set_recall": 0.39765223691856894, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1250 }, { "epoch": 0.24, "eval_wikibio_accuracy": 0.149375, "eval_wikibio_bleu_score": 1.5607964385224509, "eval_wikibio_bleu_score_sem": 0.08738723969099557, "eval_wikibio_emb_cos_sim": 0.31114959716796875, "eval_wikibio_emb_cos_sim_sem": 0.008725569192434765, "eval_wikibio_emb_top1_equal": 0.046875, "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 5.656447887420654, "eval_wikibio_n_ngrams_match_1": 3.778, "eval_wikibio_n_ngrams_match_2": 0.722, "eval_wikibio_n_ngrams_match_3": 0.166, "eval_wikibio_num_pred_words": 24.21, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 286.1304676571906, "eval_wikibio_pred_num_tokens": 60.7734375, "eval_wikibio_rouge_score": 0.11962860085433155, "eval_wikibio_runtime": 9.8295, "eval_wikibio_samples_per_second": 50.867, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.14192687736854545, "eval_wikibio_token_set_f1_sem": 0.005483227298569409, "eval_wikibio_token_set_precision": 0.11340557673465658, "eval_wikibio_token_set_recall": 0.2624057504726079, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1250 }, { "epoch": 0.24, "eval_nq_accuracy": 0.25834375, "eval_nq_bleu_score": 2.4396129370396635, "eval_nq_bleu_score_sem": 0.12587618744705167, "eval_nq_emb_cos_sim": 0.3528878092765808, "eval_nq_emb_cos_sim_sem": 0.009834176846607543, "eval_nq_emb_top1_equal": 0.0625, "eval_nq_emb_top1_equal_sem": 0.02147948148198014, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 4.811520576477051, "eval_nq_n_ngrams_match_1": 9.094, "eval_nq_n_ngrams_match_2": 1.872, "eval_nq_n_ngrams_match_3": 0.484, "eval_nq_num_pred_words": 36.498, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 122.91838228692436, "eval_nq_pred_num_tokens": 61.9296875, "eval_nq_rouge_score": 0.17772016140424224, "eval_nq_runtime": 10.0914, "eval_nq_samples_per_second": 49.547, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.21363380083645164, "eval_nq_token_set_f1_sem": 0.004906987320581662, "eval_nq_token_set_precision": 0.14957695896721576, "eval_nq_token_set_recall": 0.46750880878337747, "eval_nq_true_num_tokens": 64.0, "step": 1250 }, { "epoch": 0.24, "learning_rate": 0.001, "loss": 5.0774, "step": 1260 }, { "epoch": 0.24, "learning_rate": 0.001, "loss": 5.0766, "step": 1272 }, { "epoch": 0.25, "learning_rate": 0.001, "loss": 5.0612, "step": 1284 }, { "epoch": 0.25, "learning_rate": 0.001, "loss": 5.0689, "step": 1296 }, { "epoch": 0.25, "learning_rate": 0.001, "loss": 5.0523, "step": 1308 }, { "epoch": 0.25, "learning_rate": 0.001, "loss": 5.0493, "step": 1320 }, { "epoch": 0.26, "learning_rate": 0.001, "loss": 5.0359, "step": 1332 }, { "epoch": 0.26, "learning_rate": 0.001, "loss": 5.0325, "step": 1344 }, { "epoch": 0.26, "learning_rate": 0.001, "loss": 5.0255, "step": 1356 }, { "epoch": 0.26, "learning_rate": 0.001, "loss": 5.0155, "step": 1368 }, { "epoch": 0.26, "learning_rate": 0.001, "loss": 5.003, "step": 1380 }, { "epoch": 0.27, "learning_rate": 0.001, "loss": 4.9981, "step": 1392 }, { "epoch": 0.27, "learning_rate": 0.001, "loss": 4.9914, "step": 1404 }, { "epoch": 0.27, "learning_rate": 0.001, "loss": 4.978, "step": 1416 }, { "epoch": 0.27, "learning_rate": 0.001, "loss": 4.9747, "step": 1428 }, { "epoch": 0.28, "learning_rate": 0.001, "loss": 4.9638, "step": 1440 }, { "epoch": 0.28, "learning_rate": 0.001, "loss": 4.9597, "step": 1452 }, { "epoch": 0.28, "learning_rate": 0.001, "loss": 4.9502, "step": 1464 }, { "epoch": 0.28, "learning_rate": 0.001, "loss": 4.9458, "step": 1476 }, { "epoch": 0.29, "learning_rate": 0.001, "loss": 4.9398, "step": 1488 }, { "epoch": 0.29, "learning_rate": 0.001, "loss": 4.9153, "step": 1500 }, { "epoch": 0.29, "learning_rate": 0.001, "loss": 4.9193, "step": 1512 }, { "epoch": 0.29, "learning_rate": 0.001, "loss": 4.9167, "step": 1524 }, { "epoch": 0.29, "learning_rate": 0.001, "loss": 4.9042, "step": 1536 }, { "epoch": 0.3, "learning_rate": 0.001, "loss": 4.8995, "step": 1548 }, { "epoch": 0.3, "learning_rate": 0.001, "loss": 4.8845, "step": 1560 }, { "epoch": 0.3, "learning_rate": 0.001, "loss": 4.8734, "step": 1572 }, { "epoch": 0.3, "learning_rate": 0.001, "loss": 4.8802, "step": 1584 }, { "epoch": 0.31, "learning_rate": 0.001, "loss": 4.855, "step": 1596 }, { "epoch": 0.31, "learning_rate": 0.001, "loss": 4.876, "step": 1608 }, { "epoch": 0.31, "learning_rate": 0.001, "loss": 4.8573, "step": 1620 }, { "epoch": 0.31, "learning_rate": 0.001, "loss": 4.8555, "step": 1632 }, { "epoch": 0.32, "learning_rate": 0.001, "loss": 4.8417, "step": 1644 }, { "epoch": 0.32, "learning_rate": 0.001, "loss": 4.837, "step": 1656 }, { "epoch": 0.32, "learning_rate": 0.001, "loss": 4.8314, "step": 1668 }, { "epoch": 0.32, "learning_rate": 0.001, "loss": 4.8279, "step": 1680 }, { "epoch": 0.32, "learning_rate": 0.001, "loss": 4.8274, "step": 1692 }, { "epoch": 0.33, "learning_rate": 0.001, "loss": 4.8241, "step": 1704 }, { "epoch": 0.33, "learning_rate": 0.001, "loss": 4.8036, "step": 1716 }, { "epoch": 0.33, "learning_rate": 0.001, "loss": 4.8028, "step": 1728 }, { "epoch": 0.33, "learning_rate": 0.001, "loss": 4.803, "step": 1740 }, { "epoch": 0.34, "learning_rate": 0.001, "loss": 4.796, "step": 1752 }, { "epoch": 0.34, "learning_rate": 0.001, "loss": 4.7845, "step": 1764 }, { "epoch": 0.34, "learning_rate": 0.001, "loss": 4.7982, "step": 1776 }, { "epoch": 0.34, "learning_rate": 0.001, "loss": 4.7735, "step": 1788 }, { "epoch": 0.35, "learning_rate": 0.001, "loss": 4.766, "step": 1800 }, { "epoch": 0.35, "learning_rate": 0.001, "loss": 4.7596, "step": 1812 }, { "epoch": 0.35, "learning_rate": 0.001, "loss": 4.751, "step": 1824 }, { "epoch": 0.35, "learning_rate": 0.001, "loss": 4.7453, "step": 1836 }, { "epoch": 0.35, "learning_rate": 0.001, "loss": 4.741, "step": 1848 }, { "epoch": 0.36, "learning_rate": 0.001, "loss": 4.744, "step": 1860 }, { "epoch": 0.36, "learning_rate": 0.001, "loss": 4.7302, "step": 1872 }, { "epoch": 0.36, "eval_ag_news_accuracy": 0.1599375, "eval_ag_news_bleu_score": 1.5492879738464664, "eval_ag_news_bleu_score_sem": 0.05764465401039229, "eval_ag_news_emb_cos_sim": 0.3223033845424652, "eval_ag_news_emb_cos_sim_sem": 0.009460101175560086, "eval_ag_news_emb_top1_equal": 0.03125, "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 5.799770832061768, "eval_ag_news_n_ngrams_match_1": 5.07, "eval_ag_news_n_ngrams_match_2": 0.562, "eval_ag_news_n_ngrams_match_3": 0.096, "eval_ag_news_num_pred_words": 35.554, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 330.22387451316945, "eval_ag_news_pred_num_tokens": 61.015625, "eval_ag_news_rouge_score": 0.12322004814007076, "eval_ag_news_runtime": 10.0912, "eval_ag_news_samples_per_second": 49.548, "eval_ag_news_steps_per_second": 0.099, "eval_ag_news_token_set_f1": 0.1565384936721467, "eval_ag_news_token_set_f1_sem": 0.00383566004187165, "eval_ag_news_token_set_precision": 0.11254903170419046, "eval_ag_news_token_set_recall": 0.3222754532574314, "eval_ag_news_true_num_tokens": 56.09375, "step": 1875 }, { "epoch": 0.36, "eval_anthropic_toxic_prompts_accuracy": 0.0491875, "eval_anthropic_toxic_prompts_bleu_score": 1.214368903496161, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06396182987976606, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.22693181037902832, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00968666090118589, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.0, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 5.420243263244629, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 1.762, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.15, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.022, "eval_anthropic_toxic_prompts_num_pred_words": 31.006, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 225.93407727423414, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.0859375, "eval_anthropic_toxic_prompts_rouge_score": 0.0748185811340395, "eval_anthropic_toxic_prompts_runtime": 16.2273, "eval_anthropic_toxic_prompts_samples_per_second": 30.812, "eval_anthropic_toxic_prompts_steps_per_second": 0.062, "eval_anthropic_toxic_prompts_token_set_f1": 0.11576930926170863, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.004504946056209329, "eval_anthropic_toxic_prompts_token_set_precision": 0.10694872887009735, "eval_anthropic_toxic_prompts_token_set_recall": 0.1707539659626981, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 1875 }, { "epoch": 0.36, "eval_arxiv_accuracy": 0.18940625, "eval_arxiv_bleu_score": 1.0254523469573953, "eval_arxiv_bleu_score_sem": 0.03942704472519825, "eval_arxiv_emb_cos_sim": 0.2698151469230652, "eval_arxiv_emb_cos_sim_sem": 0.005664845955782909, "eval_arxiv_emb_top1_equal": 0.140625, "eval_arxiv_emb_top1_equal_sem": 0.030847557647994725, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 5.602920055389404, "eval_arxiv_n_ngrams_match_1": 4.622, "eval_arxiv_n_ngrams_match_2": 0.442, "eval_arxiv_n_ngrams_match_3": 0.012, "eval_arxiv_num_pred_words": 22.514, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 271.21722156321505, "eval_arxiv_pred_num_tokens": 62.796875, "eval_arxiv_rouge_score": 0.12605652585231003, "eval_arxiv_runtime": 9.8688, "eval_arxiv_samples_per_second": 50.665, "eval_arxiv_steps_per_second": 0.101, "eval_arxiv_token_set_f1": 0.116413515795875, "eval_arxiv_token_set_f1_sem": 0.0030111242923623062, "eval_arxiv_token_set_precision": 0.07199300037495716, "eval_arxiv_token_set_recall": 0.3818007419738849, "eval_arxiv_true_num_tokens": 64.0, "step": 1875 }, { "epoch": 0.36, "eval_python_code_alpaca_accuracy": 0.07840625, "eval_python_code_alpaca_bleu_score": 1.9046998170337497, "eval_python_code_alpaca_bleu_score_sem": 0.053333640205557684, "eval_python_code_alpaca_emb_cos_sim": 0.19778388738632202, "eval_python_code_alpaca_emb_cos_sim_sem": 0.005881379341274675, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 5.510037899017334, "eval_python_code_alpaca_n_ngrams_match_1": 2.732, "eval_python_code_alpaca_n_ngrams_match_2": 0.104, "eval_python_code_alpaca_n_ngrams_match_3": 0.004, "eval_python_code_alpaca_num_pred_words": 15.638, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 247.1604940299708, "eval_python_code_alpaca_pred_num_tokens": 62.8671875, "eval_python_code_alpaca_rouge_score": 0.14933505420530963, "eval_python_code_alpaca_runtime": 10.4618, "eval_python_code_alpaca_samples_per_second": 47.793, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.1731586772698891, "eval_python_code_alpaca_token_set_f1_sem": 0.003806368806522163, "eval_python_code_alpaca_token_set_precision": 0.1299234640088901, "eval_python_code_alpaca_token_set_recall": 0.32511951610481055, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 1875 }, { "epoch": 0.36, "eval_wikibio_accuracy": 0.157625, "eval_wikibio_bleu_score": 1.9069811407916746, "eval_wikibio_bleu_score_sem": 0.09573417862296345, "eval_wikibio_emb_cos_sim": 0.33923864364624023, "eval_wikibio_emb_cos_sim_sem": 0.008974367966095145, "eval_wikibio_emb_top1_equal": 0.015625, "eval_wikibio_emb_top1_equal_sem": 0.011004959004867984, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 5.6722869873046875, "eval_wikibio_n_ngrams_match_1": 4.24, "eval_wikibio_n_ngrams_match_2": 0.85, "eval_wikibio_n_ngrams_match_3": 0.204, "eval_wikibio_num_pred_words": 28.526, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 290.6985987537537, "eval_wikibio_pred_num_tokens": 60.6796875, "eval_wikibio_rouge_score": 0.12333534589947309, "eval_wikibio_runtime": 9.9222, "eval_wikibio_samples_per_second": 50.392, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.1597959754397675, "eval_wikibio_token_set_f1_sem": 0.005884556348911185, "eval_wikibio_token_set_precision": 0.13065982922471994, "eval_wikibio_token_set_recall": 0.2603553779766988, "eval_wikibio_true_num_tokens": 61.1328125, "step": 1875 }, { "epoch": 0.36, "eval_nq_accuracy": 0.2873125, "eval_nq_bleu_score": 3.2101702127784275, "eval_nq_bleu_score_sem": 0.1718251519000673, "eval_nq_emb_cos_sim": 0.40342414379119873, "eval_nq_emb_cos_sim_sem": 0.010269663110113403, "eval_nq_emb_top1_equal": 0.1015625, "eval_nq_emb_top1_equal_sem": 0.026804565886848545, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 4.413300037384033, "eval_nq_n_ngrams_match_1": 11.13, "eval_nq_n_ngrams_match_2": 2.414, "eval_nq_n_ngrams_match_3": 0.668, "eval_nq_num_pred_words": 40.122, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 82.54140426988461, "eval_nq_pred_num_tokens": 60.0234375, "eval_nq_rouge_score": 0.211280173412138, "eval_nq_runtime": 10.3836, "eval_nq_samples_per_second": 48.153, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.25103762490627235, "eval_nq_token_set_f1_sem": 0.004830772676500314, "eval_nq_token_set_precision": 0.18247951034071636, "eval_nq_token_set_recall": 0.46558207220765374, "eval_nq_true_num_tokens": 64.0, "step": 1875 }, { "epoch": 0.36, "learning_rate": 0.001, "loss": 4.7273, "step": 1884 }, { "epoch": 0.36, "learning_rate": 0.001, "loss": 4.7098, "step": 1896 }, { "epoch": 0.37, "learning_rate": 0.001, "loss": 4.726, "step": 1908 }, { "epoch": 0.37, "learning_rate": 0.001, "loss": 4.7215, "step": 1920 }, { "epoch": 0.37, "learning_rate": 0.001, "loss": 4.707, "step": 1932 }, { "epoch": 0.37, "learning_rate": 0.001, "loss": 4.7097, "step": 1944 }, { "epoch": 0.38, "learning_rate": 0.001, "loss": 4.6983, "step": 1956 }, { "epoch": 0.38, "learning_rate": 0.001, "loss": 4.6958, "step": 1968 }, { "epoch": 0.38, "learning_rate": 0.001, "loss": 4.6876, "step": 1980 }, { "epoch": 0.38, "learning_rate": 0.001, "loss": 4.6728, "step": 1992 }, { "epoch": 0.38, "learning_rate": 0.001, "loss": 4.6791, "step": 2004 }, { "epoch": 0.39, "learning_rate": 0.001, "loss": 4.676, "step": 2016 }, { "epoch": 0.39, "learning_rate": 0.001, "loss": 4.6698, "step": 2028 }, { "epoch": 0.39, "learning_rate": 0.001, "loss": 4.6587, "step": 2040 }, { "epoch": 0.39, "learning_rate": 0.001, "loss": 4.6566, "step": 2052 }, { "epoch": 0.4, "learning_rate": 0.001, "loss": 4.6514, "step": 2064 }, { "epoch": 0.4, "learning_rate": 0.001, "loss": 4.6358, "step": 2076 }, { "epoch": 0.4, "learning_rate": 0.001, "loss": 4.6437, "step": 2088 }, { "epoch": 0.4, "learning_rate": 0.001, "loss": 4.6498, "step": 2100 }, { "epoch": 0.41, "learning_rate": 0.001, "loss": 4.6218, "step": 2112 }, { "epoch": 0.41, "learning_rate": 0.001, "loss": 4.6455, "step": 2124 }, { "epoch": 0.41, "learning_rate": 0.001, "loss": 4.629, "step": 2136 }, { "epoch": 0.41, "learning_rate": 0.001, "loss": 4.6146, "step": 2148 }, { "epoch": 0.41, "learning_rate": 0.001, "loss": 4.6128, "step": 2160 }, { "epoch": 0.42, "learning_rate": 0.001, "loss": 4.6049, "step": 2172 }, { "epoch": 0.42, "learning_rate": 0.001, "loss": 4.6053, "step": 2184 }, { "epoch": 0.42, "learning_rate": 0.001, "loss": 4.6002, "step": 2196 }, { "epoch": 0.42, "learning_rate": 0.001, "loss": 4.6119, "step": 2208 }, { "epoch": 0.43, "learning_rate": 0.001, "loss": 4.5884, "step": 2220 }, { "epoch": 0.43, "learning_rate": 0.001, "loss": 4.5804, "step": 2232 }, { "epoch": 0.43, "learning_rate": 0.001, "loss": 4.5959, "step": 2244 }, { "epoch": 0.43, "learning_rate": 0.001, "loss": 4.5874, "step": 2256 }, { "epoch": 0.44, "learning_rate": 0.001, "loss": 4.5806, "step": 2268 }, { "epoch": 0.44, "learning_rate": 0.001, "loss": 4.5749, "step": 2280 }, { "epoch": 0.44, "learning_rate": 0.001, "loss": 4.5643, "step": 2292 }, { "epoch": 0.44, "learning_rate": 0.001, "loss": 4.5557, "step": 2304 }, { "epoch": 0.44, "learning_rate": 0.001, "loss": 4.5603, "step": 2316 }, { "epoch": 0.45, "learning_rate": 0.001, "loss": 4.5447, "step": 2328 }, { "epoch": 0.45, "learning_rate": 0.001, "loss": 4.5369, "step": 2340 }, { "epoch": 0.45, "learning_rate": 0.001, "loss": 4.545, "step": 2352 }, { "epoch": 0.45, "learning_rate": 0.001, "loss": 4.5483, "step": 2364 }, { "epoch": 0.46, "learning_rate": 0.001, "loss": 4.5438, "step": 2376 }, { "epoch": 0.46, "learning_rate": 0.001, "loss": 4.5408, "step": 2388 }, { "epoch": 0.46, "learning_rate": 0.001, "loss": 4.5387, "step": 2400 }, { "epoch": 0.46, "learning_rate": 0.001, "loss": 4.5375, "step": 2412 }, { "epoch": 0.47, "learning_rate": 0.001, "loss": 4.5164, "step": 2424 }, { "epoch": 0.47, "learning_rate": 0.001, "loss": 4.5216, "step": 2436 }, { "epoch": 0.47, "learning_rate": 0.001, "loss": 4.4997, "step": 2448 }, { "epoch": 0.47, "learning_rate": 0.001, "loss": 4.5179, "step": 2460 }, { "epoch": 0.47, "learning_rate": 0.001, "loss": 4.5004, "step": 2472 }, { "epoch": 0.48, "learning_rate": 0.001, "loss": 4.5037, "step": 2484 }, { "epoch": 0.48, "learning_rate": 0.001, "loss": 4.4875, "step": 2496 }, { "epoch": 0.48, "eval_ag_news_accuracy": 0.173125, "eval_ag_news_bleu_score": 1.8445900736953396, "eval_ag_news_bleu_score_sem": 0.06262184334737668, "eval_ag_news_emb_cos_sim": 0.34900230169296265, "eval_ag_news_emb_cos_sim_sem": 0.010179393121681672, "eval_ag_news_emb_top1_equal": 0.03125, "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 5.598976135253906, "eval_ag_news_n_ngrams_match_1": 6.342, "eval_ag_news_n_ngrams_match_2": 0.744, "eval_ag_news_n_ngrams_match_3": 0.124, "eval_ag_news_num_pred_words": 40.05, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 270.1496690566818, "eval_ag_news_pred_num_tokens": 62.5390625, "eval_ag_news_rouge_score": 0.14732981524733085, "eval_ag_news_runtime": 10.1945, "eval_ag_news_samples_per_second": 49.046, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.18426404790619708, "eval_ag_news_token_set_f1_sem": 0.003983054075476968, "eval_ag_news_token_set_precision": 0.14057732009040233, "eval_ag_news_token_set_recall": 0.30661308725535086, "eval_ag_news_true_num_tokens": 56.09375, "step": 2500 }, { "epoch": 0.48, "eval_anthropic_toxic_prompts_accuracy": 0.05278125, "eval_anthropic_toxic_prompts_bleu_score": 1.1399798872031897, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.04618923291342836, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.2499910593032837, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009365932603641098, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0234375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.013424676090873717, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 5.1824951171875, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.388, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.244, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.032, "eval_anthropic_toxic_prompts_num_pred_words": 39.23, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 178.126703981217, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.75, "eval_anthropic_toxic_prompts_rouge_score": 0.08504647404745881, "eval_anthropic_toxic_prompts_runtime": 10.2011, "eval_anthropic_toxic_prompts_samples_per_second": 49.014, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.1415453788063794, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.004915337389797356, "eval_anthropic_toxic_prompts_token_set_precision": 0.1424269313439444, "eval_anthropic_toxic_prompts_token_set_recall": 0.1785590425989663, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 2500 }, { "epoch": 0.48, "eval_arxiv_accuracy": 0.2069375, "eval_arxiv_bleu_score": 1.0817968392037858, "eval_arxiv_bleu_score_sem": 0.043877314138167575, "eval_arxiv_emb_cos_sim": 0.2864419221878052, "eval_arxiv_emb_cos_sim_sem": 0.006383267306064941, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 5.378298759460449, "eval_arxiv_n_ngrams_match_1": 4.564, "eval_arxiv_n_ngrams_match_2": 0.464, "eval_arxiv_n_ngrams_match_3": 0.02, "eval_arxiv_num_pred_words": 23.218, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 216.6533822090273, "eval_arxiv_pred_num_tokens": 62.8046875, "eval_arxiv_rouge_score": 0.12174756200256615, "eval_arxiv_runtime": 10.1745, "eval_arxiv_samples_per_second": 49.143, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.12196276733542068, "eval_arxiv_token_set_f1_sem": 0.0035965513475812072, "eval_arxiv_token_set_precision": 0.07694164495554813, "eval_arxiv_token_set_recall": 0.39201808371367247, "eval_arxiv_true_num_tokens": 64.0, "step": 2500 }, { "epoch": 0.48, "eval_python_code_alpaca_accuracy": 0.0823125, "eval_python_code_alpaca_bleu_score": 1.9885140531274417, "eval_python_code_alpaca_bleu_score_sem": 0.05715660482036896, "eval_python_code_alpaca_emb_cos_sim": 0.2083890736103058, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0067747779429612395, "eval_python_code_alpaca_emb_top1_equal": 0.0, "eval_python_code_alpaca_emb_top1_equal_sem": 0.0, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 5.2765960693359375, "eval_python_code_alpaca_n_ngrams_match_1": 3.026, "eval_python_code_alpaca_n_ngrams_match_2": 0.16, "eval_python_code_alpaca_n_ngrams_match_3": 0.012, "eval_python_code_alpaca_num_pred_words": 18.666, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 195.70258226560196, "eval_python_code_alpaca_pred_num_tokens": 62.90625, "eval_python_code_alpaca_rouge_score": 0.14774207463910968, "eval_python_code_alpaca_runtime": 9.3776, "eval_python_code_alpaca_samples_per_second": 53.319, "eval_python_code_alpaca_steps_per_second": 0.107, "eval_python_code_alpaca_token_set_f1": 0.19409139501907424, "eval_python_code_alpaca_token_set_f1_sem": 0.0041145243937218984, "eval_python_code_alpaca_token_set_precision": 0.14745978200103235, "eval_python_code_alpaca_token_set_recall": 0.3656227900840133, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 2500 }, { "epoch": 0.48, "eval_wikibio_accuracy": 0.17415625, "eval_wikibio_bleu_score": 2.195020967443216, "eval_wikibio_bleu_score_sem": 0.11868718815415513, "eval_wikibio_emb_cos_sim": 0.3559015989303589, "eval_wikibio_emb_cos_sim_sem": 0.010704055531191504, "eval_wikibio_emb_top1_equal": 0.046875, "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 5.531292915344238, "eval_wikibio_n_ngrams_match_1": 4.522, "eval_wikibio_n_ngrams_match_2": 1.014, "eval_wikibio_n_ngrams_match_3": 0.312, "eval_wikibio_num_pred_words": 29.83, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 252.47012259156008, "eval_wikibio_pred_num_tokens": 62.765625, "eval_wikibio_rouge_score": 0.134333882608771, "eval_wikibio_runtime": 9.8046, "eval_wikibio_samples_per_second": 50.996, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.16498288286324034, "eval_wikibio_token_set_f1_sem": 0.005735143889755407, "eval_wikibio_token_set_precision": 0.13822869179740693, "eval_wikibio_token_set_recall": 0.2534312355646855, "eval_wikibio_true_num_tokens": 61.1328125, "step": 2500 }, { "epoch": 0.48, "eval_nq_accuracy": 0.31165625, "eval_nq_bleu_score": 3.640149850234891, "eval_nq_bleu_score_sem": 0.16268640178805646, "eval_nq_emb_cos_sim": 0.44711968302726746, "eval_nq_emb_cos_sim_sem": 0.01117219177285864, "eval_nq_emb_top1_equal": 0.09375, "eval_nq_emb_top1_equal_sem": 0.025864720141013958, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 4.153674602508545, "eval_nq_n_ngrams_match_1": 12.522, "eval_nq_n_ngrams_match_2": 2.8, "eval_nq_n_ngrams_match_3": 0.75, "eval_nq_num_pred_words": 43.906, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 63.66752382504825, "eval_nq_pred_num_tokens": 62.375, "eval_nq_rouge_score": 0.22695565292984432, "eval_nq_runtime": 10.08, "eval_nq_samples_per_second": 49.603, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.2699038320927348, "eval_nq_token_set_f1_sem": 0.004896122256853701, "eval_nq_token_set_precision": 0.20154225312448248, "eval_nq_token_set_recall": 0.46019639800401324, "eval_nq_true_num_tokens": 64.0, "step": 2500 }, { "epoch": 0.48, "learning_rate": 0.001, "loss": 4.5039, "step": 2508 }, { "epoch": 0.48, "learning_rate": 0.001, "loss": 4.4939, "step": 2520 }, { "epoch": 0.49, "learning_rate": 0.001, "loss": 4.4845, "step": 2532 }, { "epoch": 0.49, "learning_rate": 0.001, "loss": 4.4728, "step": 2544 }, { "epoch": 0.49, "learning_rate": 0.001, "loss": 4.4719, "step": 2556 }, { "epoch": 0.49, "learning_rate": 0.001, "loss": 4.4653, "step": 2568 }, { "epoch": 0.5, "learning_rate": 0.001, "loss": 4.4568, "step": 2580 }, { "epoch": 0.5, "learning_rate": 0.001, "loss": 4.4561, "step": 2592 }, { "epoch": 0.5, "learning_rate": 0.001, "loss": 4.4529, "step": 2604 }, { "epoch": 0.5, "learning_rate": 0.001, "loss": 4.4449, "step": 2616 }, { "epoch": 0.5, "learning_rate": 0.001, "loss": 4.44, "step": 2628 }, { "epoch": 0.51, "learning_rate": 0.001, "loss": 4.4493, "step": 2640 }, { "epoch": 0.51, "learning_rate": 0.001, "loss": 4.4416, "step": 2652 }, { "epoch": 0.51, "learning_rate": 0.001, "loss": 4.4307, "step": 2664 }, { "epoch": 0.51, "learning_rate": 0.001, "loss": 4.4218, "step": 2676 }, { "epoch": 0.52, "learning_rate": 0.001, "loss": 4.4288, "step": 2688 }, { "epoch": 0.52, "learning_rate": 0.001, "loss": 4.4201, "step": 2700 }, { "epoch": 0.52, "learning_rate": 0.001, "loss": 4.4118, "step": 2712 }, { "epoch": 0.52, "learning_rate": 0.001, "loss": 4.4214, "step": 2724 }, { "epoch": 0.53, "learning_rate": 0.001, "loss": 4.4147, "step": 2736 }, { "epoch": 0.53, "learning_rate": 0.001, "loss": 4.3993, "step": 2748 }, { "epoch": 0.53, "learning_rate": 0.001, "loss": 4.4106, "step": 2760 }, { "epoch": 0.53, "learning_rate": 0.001, "loss": 4.3987, "step": 2772 }, { "epoch": 0.53, "learning_rate": 0.001, "loss": 4.3855, "step": 2784 }, { "epoch": 0.54, "learning_rate": 0.001, "loss": 4.3955, "step": 2796 }, { "epoch": 0.54, "learning_rate": 0.001, "loss": 4.3875, "step": 2808 }, { "epoch": 0.54, "learning_rate": 0.001, "loss": 4.3776, "step": 2820 }, { "epoch": 0.54, "learning_rate": 0.001, "loss": 4.3871, "step": 2832 }, { "epoch": 0.55, "learning_rate": 0.001, "loss": 4.3715, "step": 2844 }, { "epoch": 0.55, "learning_rate": 0.001, "loss": 4.3594, "step": 2856 }, { "epoch": 0.55, "learning_rate": 0.001, "loss": 4.3716, "step": 2868 }, { "epoch": 0.55, "learning_rate": 0.001, "loss": 4.36, "step": 2880 }, { "epoch": 0.56, "learning_rate": 0.001, "loss": 4.3628, "step": 2892 }, { "epoch": 0.56, "learning_rate": 0.001, "loss": 4.3503, "step": 2904 }, { "epoch": 0.56, "learning_rate": 0.001, "loss": 4.3434, "step": 2916 }, { "epoch": 0.56, "learning_rate": 0.001, "loss": 4.348, "step": 2928 }, { "epoch": 0.56, "learning_rate": 0.001, "loss": 4.3386, "step": 2940 }, { "epoch": 0.57, "learning_rate": 0.001, "loss": 4.35, "step": 2952 }, { "epoch": 0.57, "learning_rate": 0.001, "loss": 4.3359, "step": 2964 }, { "epoch": 0.57, "learning_rate": 0.001, "loss": 4.3359, "step": 2976 }, { "epoch": 0.57, "learning_rate": 0.001, "loss": 4.331, "step": 2988 }, { "epoch": 0.58, "learning_rate": 0.001, "loss": 4.3312, "step": 3000 }, { "epoch": 0.58, "learning_rate": 0.001, "loss": 4.3276, "step": 3012 }, { "epoch": 0.58, "learning_rate": 0.001, "loss": 4.3165, "step": 3024 }, { "epoch": 0.58, "learning_rate": 0.001, "loss": 4.3023, "step": 3036 }, { "epoch": 0.59, "learning_rate": 0.001, "loss": 4.3061, "step": 3048 }, { "epoch": 0.59, "learning_rate": 0.001, "loss": 4.3034, "step": 3060 }, { "epoch": 0.59, "learning_rate": 0.001, "loss": 4.3033, "step": 3072 }, { "epoch": 0.59, "learning_rate": 0.001, "loss": 4.299, "step": 3084 }, { "epoch": 0.59, "learning_rate": 0.001, "loss": 4.3062, "step": 3096 }, { "epoch": 0.6, "learning_rate": 0.001, "loss": 4.3031, "step": 3108 }, { "epoch": 0.6, "learning_rate": 0.001, "loss": 4.2838, "step": 3120 }, { "epoch": 0.6, "eval_ag_news_accuracy": 0.187625, "eval_ag_news_bleu_score": 1.8920297212136834, "eval_ag_news_bleu_score_sem": 0.0733760096541259, "eval_ag_news_emb_cos_sim": 0.3659282922744751, "eval_ag_news_emb_cos_sim_sem": 0.011287593795348515, "eval_ag_news_emb_top1_equal": 0.03125, "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 5.389096736907959, "eval_ag_news_n_ngrams_match_1": 6.128, "eval_ag_news_n_ngrams_match_2": 0.796, "eval_ag_news_n_ngrams_match_3": 0.156, "eval_ag_news_num_pred_words": 39.094, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 219.00547662150623, "eval_ag_news_pred_num_tokens": 62.9921875, "eval_ag_news_rouge_score": 0.14004064993515936, "eval_ag_news_runtime": 10.1745, "eval_ag_news_samples_per_second": 49.142, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.17740503442976457, "eval_ag_news_token_set_f1_sem": 0.004214815177846998, "eval_ag_news_token_set_precision": 0.13406466089971986, "eval_ag_news_token_set_recall": 0.31599748653246834, "eval_ag_news_true_num_tokens": 56.09375, "step": 3125 }, { "epoch": 0.6, "eval_anthropic_toxic_prompts_accuracy": 0.05465625, "eval_anthropic_toxic_prompts_bleu_score": 1.075822339485072, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.048404389862727715, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.26767683029174805, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010647807927675683, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 5.050642490386963, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.176, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.254, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.032, "eval_anthropic_toxic_prompts_num_pred_words": 36.548, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 156.12273962943607, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9921875, "eval_anthropic_toxic_prompts_rouge_score": 0.07795176579412504, "eval_anthropic_toxic_prompts_runtime": 9.8737, "eval_anthropic_toxic_prompts_samples_per_second": 50.639, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.13360386866283155, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005031129819239987, "eval_anthropic_toxic_prompts_token_set_precision": 0.12835372473219997, "eval_anthropic_toxic_prompts_token_set_recall": 0.19578409827717358, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 3125 }, { "epoch": 0.6, "eval_arxiv_accuracy": 0.22521875, "eval_arxiv_bleu_score": 1.2205938925532436, "eval_arxiv_bleu_score_sem": 0.0468696597158843, "eval_arxiv_emb_cos_sim": 0.2796843647956848, "eval_arxiv_emb_cos_sim_sem": 0.00787956538044321, "eval_arxiv_emb_top1_equal": 0.140625, "eval_arxiv_emb_top1_equal_sem": 0.030847557647994725, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 5.159443378448486, "eval_arxiv_n_ngrams_match_1": 5.176, "eval_arxiv_n_ngrams_match_2": 0.538, "eval_arxiv_n_ngrams_match_3": 0.024, "eval_arxiv_num_pred_words": 24.594, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 174.06753889108148, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.13216383019848524, "eval_arxiv_runtime": 9.8668, "eval_arxiv_samples_per_second": 50.675, "eval_arxiv_steps_per_second": 0.101, "eval_arxiv_token_set_f1": 0.13262861383240265, "eval_arxiv_token_set_f1_sem": 0.003971516873948744, "eval_arxiv_token_set_precision": 0.08465219272068342, "eval_arxiv_token_set_recall": 0.40846352640942785, "eval_arxiv_true_num_tokens": 64.0, "step": 3125 }, { "epoch": 0.6, "eval_python_code_alpaca_accuracy": 0.08265625, "eval_python_code_alpaca_bleu_score": 1.9714998209550776, "eval_python_code_alpaca_bleu_score_sem": 0.058439947030860105, "eval_python_code_alpaca_emb_cos_sim": 0.2215101569890976, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007273838776178736, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 5.178214073181152, "eval_python_code_alpaca_n_ngrams_match_1": 2.906, "eval_python_code_alpaca_n_ngrams_match_2": 0.224, "eval_python_code_alpaca_n_ngrams_match_3": 0.022, "eval_python_code_alpaca_num_pred_words": 20.886, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 177.36576569008702, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.12684910688521828, "eval_python_code_alpaca_runtime": 9.6209, "eval_python_code_alpaca_samples_per_second": 51.97, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.18422290235573677, "eval_python_code_alpaca_token_set_f1_sem": 0.004977650352202118, "eval_python_code_alpaca_token_set_precision": 0.13892467991777335, "eval_python_code_alpaca_token_set_recall": 0.36219662137827774, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 3125 }, { "epoch": 0.6, "eval_wikibio_accuracy": 0.206375, "eval_wikibio_bleu_score": 1.1004412593994752, "eval_wikibio_bleu_score_sem": 0.0834567302990565, "eval_wikibio_emb_cos_sim": 0.3168962001800537, "eval_wikibio_emb_cos_sim_sem": 0.010303981840057905, "eval_wikibio_emb_top1_equal": 0.0703125, "eval_wikibio_emb_top1_equal_sem": 0.022687306110270106, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 5.310210704803467, "eval_wikibio_n_ngrams_match_1": 2.192, "eval_wikibio_n_ngrams_match_2": 0.454, "eval_wikibio_n_ngrams_match_3": 0.13, "eval_wikibio_num_pred_words": 26.6, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 202.39286904538946, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.06197691456225775, "eval_wikibio_runtime": 10.3005, "eval_wikibio_samples_per_second": 48.541, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.07692352378772646, "eval_wikibio_token_set_f1_sem": 0.0053252201477126705, "eval_wikibio_token_set_precision": 0.06385281469822855, "eval_wikibio_token_set_recall": 0.13676271463183348, "eval_wikibio_true_num_tokens": 61.1328125, "step": 3125 }, { "epoch": 0.6, "eval_nq_accuracy": 0.33565625, "eval_nq_bleu_score": 3.6132990904335807, "eval_nq_bleu_score_sem": 0.1731479228516426, "eval_nq_emb_cos_sim": 0.44149720668792725, "eval_nq_emb_cos_sim_sem": 0.012091645029459032, "eval_nq_emb_top1_equal": 0.125, "eval_nq_emb_top1_equal_sem": 0.02934655822437397, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.933677911758423, "eval_nq_n_ngrams_match_1": 12.394, "eval_nq_n_ngrams_match_2": 2.834, "eval_nq_n_ngrams_match_3": 0.772, "eval_nq_num_pred_words": 42.312, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 51.094553773691246, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.22498549500134118, "eval_nq_runtime": 10.3286, "eval_nq_samples_per_second": 48.409, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.2641090096234247, "eval_nq_token_set_f1_sem": 0.0055344501872298885, "eval_nq_token_set_precision": 0.19725537998809747, "eval_nq_token_set_recall": 0.45519858497107596, "eval_nq_true_num_tokens": 64.0, "step": 3125 }, { "epoch": 0.6, "learning_rate": 0.001, "loss": 4.2847, "step": 3132 }, { "epoch": 0.6, "learning_rate": 0.001, "loss": 4.2833, "step": 3144 }, { "epoch": 0.61, "learning_rate": 0.001, "loss": 4.2858, "step": 3156 }, { "epoch": 0.61, "learning_rate": 0.001, "loss": 4.2695, "step": 3168 }, { "epoch": 0.61, "learning_rate": 0.001, "loss": 4.2798, "step": 3180 }, { "epoch": 0.61, "learning_rate": 0.001, "loss": 4.2716, "step": 3192 }, { "epoch": 0.62, "learning_rate": 0.001, "loss": 4.2707, "step": 3204 }, { "epoch": 0.62, "learning_rate": 0.001, "loss": 4.2579, "step": 3216 }, { "epoch": 0.62, "learning_rate": 0.001, "loss": 4.2496, "step": 3228 }, { "epoch": 0.62, "learning_rate": 0.001, "loss": 4.2648, "step": 3240 }, { "epoch": 0.62, "learning_rate": 0.001, "loss": 4.2558, "step": 3252 }, { "epoch": 0.63, "learning_rate": 0.001, "loss": 4.242, "step": 3264 }, { "epoch": 0.63, "learning_rate": 0.001, "loss": 4.2317, "step": 3276 }, { "epoch": 0.63, "learning_rate": 0.001, "loss": 4.2625, "step": 3288 }, { "epoch": 0.63, "learning_rate": 0.001, "loss": 4.2318, "step": 3300 }, { "epoch": 0.64, "learning_rate": 0.001, "loss": 4.2332, "step": 3312 }, { "epoch": 0.64, "learning_rate": 0.001, "loss": 4.2369, "step": 3324 }, { "epoch": 0.64, "learning_rate": 0.001, "loss": 4.232, "step": 3336 }, { "epoch": 0.64, "learning_rate": 0.001, "loss": 4.2251, "step": 3348 }, { "epoch": 0.65, "learning_rate": 0.001, "loss": 4.23, "step": 3360 }, { "epoch": 0.65, "learning_rate": 0.001, "loss": 4.2216, "step": 3372 }, { "epoch": 0.65, "learning_rate": 0.001, "loss": 4.216, "step": 3384 }, { "epoch": 0.65, "learning_rate": 0.001, "loss": 4.2182, "step": 3396 }, { "epoch": 0.65, "learning_rate": 0.001, "loss": 4.2014, "step": 3408 }, { "epoch": 0.66, "learning_rate": 0.001, "loss": 4.2048, "step": 3420 }, { "epoch": 0.66, "learning_rate": 0.001, "loss": 4.2042, "step": 3432 }, { "epoch": 0.66, "learning_rate": 0.001, "loss": 4.1948, "step": 3444 }, { "epoch": 0.66, "learning_rate": 0.001, "loss": 4.1884, "step": 3456 }, { "epoch": 0.67, "learning_rate": 0.001, "loss": 4.1988, "step": 3468 }, { "epoch": 0.67, "learning_rate": 0.001, "loss": 4.2, "step": 3480 }, { "epoch": 0.67, "learning_rate": 0.001, "loss": 4.1891, "step": 3492 }, { "epoch": 0.67, "learning_rate": 0.001, "loss": 4.1916, "step": 3504 }, { "epoch": 0.68, "learning_rate": 0.001, "loss": 4.1804, "step": 3516 }, { "epoch": 0.68, "learning_rate": 0.001, "loss": 4.1824, "step": 3528 }, { "epoch": 0.68, "learning_rate": 0.001, "loss": 4.1767, "step": 3540 }, { "epoch": 0.68, "learning_rate": 0.001, "loss": 4.1624, "step": 3552 }, { "epoch": 0.68, "learning_rate": 0.001, "loss": 4.1738, "step": 3564 }, { "epoch": 0.69, "learning_rate": 0.001, "loss": 4.1647, "step": 3576 }, { "epoch": 0.69, "learning_rate": 0.001, "loss": 4.1543, "step": 3588 }, { "epoch": 0.69, "learning_rate": 0.001, "loss": 4.169, "step": 3600 }, { "epoch": 0.69, "learning_rate": 0.001, "loss": 4.1549, "step": 3612 }, { "epoch": 0.7, "learning_rate": 0.001, "loss": 4.1656, "step": 3624 }, { "epoch": 0.7, "learning_rate": 0.001, "loss": 4.1506, "step": 3636 }, { "epoch": 0.7, "learning_rate": 0.001, "loss": 4.1565, "step": 3648 }, { "epoch": 0.7, "learning_rate": 0.001, "loss": 4.145, "step": 3660 }, { "epoch": 0.71, "learning_rate": 0.001, "loss": 4.1563, "step": 3672 }, { "epoch": 0.71, "learning_rate": 0.001, "loss": 4.1406, "step": 3684 }, { "epoch": 0.71, "learning_rate": 0.001, "loss": 4.1233, "step": 3696 }, { "epoch": 0.71, "learning_rate": 0.001, "loss": 4.1229, "step": 3708 }, { "epoch": 0.71, "learning_rate": 0.001, "loss": 4.1353, "step": 3720 }, { "epoch": 0.72, "learning_rate": 0.001, "loss": 4.1276, "step": 3732 }, { "epoch": 0.72, "learning_rate": 0.001, "loss": 4.1091, "step": 3744 }, { "epoch": 0.72, "eval_ag_news_accuracy": 0.1999375, "eval_ag_news_bleu_score": 1.8619877025600262, "eval_ag_news_bleu_score_sem": 0.0650624380628498, "eval_ag_news_emb_cos_sim": 0.38478976488113403, "eval_ag_news_emb_cos_sim_sem": 0.011722462344105887, "eval_ag_news_emb_top1_equal": 0.0546875, "eval_ag_news_emb_top1_equal_sem": 0.020175758285348722, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 5.255814552307129, "eval_ag_news_n_ngrams_match_1": 6.242, "eval_ag_news_n_ngrams_match_2": 0.806, "eval_ag_news_n_ngrams_match_3": 0.142, "eval_ag_news_num_pred_words": 38.902, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 191.6775536766177, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.14476029481218228, "eval_ag_news_runtime": 10.3413, "eval_ag_news_samples_per_second": 48.35, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.18357471062976743, "eval_ag_news_token_set_f1_sem": 0.0041610935295574025, "eval_ag_news_token_set_precision": 0.13850072206670916, "eval_ag_news_token_set_recall": 0.32681119622119503, "eval_ag_news_true_num_tokens": 56.09375, "step": 3750 }, { "epoch": 0.72, "eval_anthropic_toxic_prompts_accuracy": 0.0558125, "eval_anthropic_toxic_prompts_bleu_score": 1.2702086804584747, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0557275274868974, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.2815111577510834, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011618550606191295, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.982417106628418, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.44, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.272, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.052, "eval_anthropic_toxic_prompts_num_pred_words": 37.69, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 145.8264340484558, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.08829251290210152, "eval_anthropic_toxic_prompts_runtime": 9.7406, "eval_anthropic_toxic_prompts_samples_per_second": 51.331, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.1470798080901539, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.004943615235098987, "eval_anthropic_toxic_prompts_token_set_precision": 0.14105713088319935, "eval_anthropic_toxic_prompts_token_set_recall": 0.2037022235617993, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 3750 }, { "epoch": 0.72, "eval_arxiv_accuracy": 0.230375, "eval_arxiv_bleu_score": 1.4517498825506656, "eval_arxiv_bleu_score_sem": 0.05051521468972786, "eval_arxiv_emb_cos_sim": 0.3207044303417206, "eval_arxiv_emb_cos_sim_sem": 0.007364829870846708, "eval_arxiv_emb_top1_equal": 0.1328125, "eval_arxiv_emb_top1_equal_sem": 0.030114394778901498, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 5.018895626068115, "eval_arxiv_n_ngrams_match_1": 5.57, "eval_arxiv_n_ngrams_match_2": 0.68, "eval_arxiv_n_ngrams_match_3": 0.044, "eval_arxiv_num_pred_words": 28.4, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 151.24418139694512, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.13883675375698562, "eval_arxiv_runtime": 11.3887, "eval_arxiv_samples_per_second": 43.903, "eval_arxiv_steps_per_second": 0.088, "eval_arxiv_token_set_f1": 0.1404308603982505, "eval_arxiv_token_set_f1_sem": 0.0038565303084726805, "eval_arxiv_token_set_precision": 0.08953093709897436, "eval_arxiv_token_set_recall": 0.43586337262382197, "eval_arxiv_true_num_tokens": 64.0, "step": 3750 }, { "epoch": 0.72, "eval_python_code_alpaca_accuracy": 0.0875625, "eval_python_code_alpaca_bleu_score": 1.8910110482747022, "eval_python_code_alpaca_bleu_score_sem": 0.0552021381501231, "eval_python_code_alpaca_emb_cos_sim": 0.23446616530418396, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0076346138237984645, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 5.079406261444092, "eval_python_code_alpaca_n_ngrams_match_1": 2.754, "eval_python_code_alpaca_n_ngrams_match_2": 0.17, "eval_python_code_alpaca_n_ngrams_match_3": 0.014, "eval_python_code_alpaca_num_pred_words": 19.398, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 160.67862650568006, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.131813315690313, "eval_python_code_alpaca_runtime": 10.4434, "eval_python_code_alpaca_samples_per_second": 47.877, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.18785229022437347, "eval_python_code_alpaca_token_set_f1_sem": 0.0045256177229473235, "eval_python_code_alpaca_token_set_precision": 0.1364999232069587, "eval_python_code_alpaca_token_set_recall": 0.39858400063988386, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 3750 }, { "epoch": 0.72, "eval_wikibio_accuracy": 0.22246875, "eval_wikibio_bleu_score": 1.4159646195438642, "eval_wikibio_bleu_score_sem": 0.09932930841696949, "eval_wikibio_emb_cos_sim": 0.35904812812805176, "eval_wikibio_emb_cos_sim_sem": 0.01094617600499643, "eval_wikibio_emb_top1_equal": 0.0703125, "eval_wikibio_emb_top1_equal_sem": 0.022687306110270106, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 5.181839942932129, "eval_wikibio_n_ngrams_match_1": 2.942, "eval_wikibio_n_ngrams_match_2": 0.632, "eval_wikibio_n_ngrams_match_3": 0.19, "eval_wikibio_num_pred_words": 27.358, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 178.01003817296467, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.08354534387190918, "eval_wikibio_runtime": 9.9779, "eval_wikibio_samples_per_second": 50.111, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.10982181694521252, "eval_wikibio_token_set_f1_sem": 0.005646462481343112, "eval_wikibio_token_set_precision": 0.08833275999052514, "eval_wikibio_token_set_recall": 0.19017553563916095, "eval_wikibio_true_num_tokens": 61.1328125, "step": 3750 }, { "epoch": 0.72, "eval_nq_accuracy": 0.35415625, "eval_nq_bleu_score": 3.813795763528188, "eval_nq_bleu_score_sem": 0.18499382516869395, "eval_nq_emb_cos_sim": 0.4574257731437683, "eval_nq_emb_cos_sim_sem": 0.011819713562866744, "eval_nq_emb_top1_equal": 0.09375, "eval_nq_emb_top1_equal_sem": 0.025864720141013958, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.7624354362487793, "eval_nq_n_ngrams_match_1": 12.78, "eval_nq_n_ngrams_match_2": 3.006, "eval_nq_n_ngrams_match_3": 0.844, "eval_nq_num_pred_words": 43.766, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 43.05315160675132, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.22971694995211947, "eval_nq_runtime": 10.2976, "eval_nq_samples_per_second": 48.555, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.2739734811777524, "eval_nq_token_set_f1_sem": 0.005255913027903776, "eval_nq_token_set_precision": 0.20585654841798381, "eval_nq_token_set_recall": 0.4722101911422006, "eval_nq_true_num_tokens": 64.0, "step": 3750 }, { "epoch": 0.72, "learning_rate": 0.001, "loss": 4.1213, "step": 3756 }, { "epoch": 0.72, "learning_rate": 0.001, "loss": 4.1201, "step": 3768 }, { "epoch": 0.73, "learning_rate": 0.001, "loss": 4.1253, "step": 3780 }, { "epoch": 0.73, "learning_rate": 0.001, "loss": 4.1286, "step": 3792 }, { "epoch": 0.73, "learning_rate": 0.001, "loss": 4.1157, "step": 3804 }, { "epoch": 0.73, "learning_rate": 0.001, "loss": 4.1178, "step": 3816 }, { "epoch": 0.74, "learning_rate": 0.001, "loss": 4.1197, "step": 3828 }, { "epoch": 0.74, "learning_rate": 0.001, "loss": 4.1097, "step": 3840 }, { "epoch": 0.74, "learning_rate": 0.001, "loss": 4.0928, "step": 3852 }, { "epoch": 0.74, "learning_rate": 0.001, "loss": 4.1009, "step": 3864 }, { "epoch": 0.74, "learning_rate": 0.001, "loss": 4.1068, "step": 3876 }, { "epoch": 0.75, "learning_rate": 0.001, "loss": 4.0965, "step": 3888 }, { "epoch": 0.75, "learning_rate": 0.001, "loss": 4.0932, "step": 3900 }, { "epoch": 0.75, "learning_rate": 0.001, "loss": 4.0941, "step": 3912 }, { "epoch": 0.75, "learning_rate": 0.001, "loss": 4.0824, "step": 3924 }, { "epoch": 0.76, "learning_rate": 0.001, "loss": 4.0905, "step": 3936 }, { "epoch": 0.76, "learning_rate": 0.001, "loss": 4.0942, "step": 3948 }, { "epoch": 0.76, "learning_rate": 0.001, "loss": 4.0807, "step": 3960 }, { "epoch": 0.76, "learning_rate": 0.001, "loss": 4.0921, "step": 3972 }, { "epoch": 0.76, "learning_rate": 0.001, "loss": 4.0837, "step": 3984 }, { "epoch": 0.77, "learning_rate": 0.001, "loss": 4.0746, "step": 3996 }, { "epoch": 0.77, "learning_rate": 0.001, "loss": 4.0648, "step": 4008 }, { "epoch": 0.77, "learning_rate": 0.001, "loss": 4.0671, "step": 4020 }, { "epoch": 0.77, "learning_rate": 0.001, "loss": 4.0805, "step": 4032 }, { "epoch": 0.78, "learning_rate": 0.001, "loss": 4.0727, "step": 4044 }, { "epoch": 0.78, "learning_rate": 0.001, "loss": 4.0677, "step": 4056 }, { "epoch": 0.78, "learning_rate": 0.001, "loss": 4.0532, "step": 4068 }, { "epoch": 0.78, "learning_rate": 0.001, "loss": 4.0569, "step": 4080 }, { "epoch": 0.79, "learning_rate": 0.001, "loss": 4.055, "step": 4092 }, { "epoch": 0.79, "learning_rate": 0.001, "loss": 4.0537, "step": 4104 }, { "epoch": 0.79, "learning_rate": 0.001, "loss": 4.0515, "step": 4116 }, { "epoch": 0.79, "learning_rate": 0.001, "loss": 4.0491, "step": 4128 }, { "epoch": 0.79, "learning_rate": 0.001, "loss": 4.0431, "step": 4140 }, { "epoch": 0.8, "learning_rate": 0.001, "loss": 4.0428, "step": 4152 }, { "epoch": 0.8, "learning_rate": 0.001, "loss": 4.0421, "step": 4164 }, { "epoch": 0.8, "learning_rate": 0.001, "loss": 4.0461, "step": 4176 }, { "epoch": 0.8, "learning_rate": 0.001, "loss": 4.0405, "step": 4188 }, { "epoch": 0.81, "learning_rate": 0.001, "loss": 4.0363, "step": 4200 }, { "epoch": 0.81, "learning_rate": 0.001, "loss": 4.0254, "step": 4212 }, { "epoch": 0.81, "learning_rate": 0.001, "loss": 4.0286, "step": 4224 }, { "epoch": 0.81, "learning_rate": 0.001, "loss": 4.0117, "step": 4236 }, { "epoch": 0.82, "learning_rate": 0.001, "loss": 4.0182, "step": 4248 }, { "epoch": 0.82, "learning_rate": 0.001, "loss": 4.0169, "step": 4260 }, { "epoch": 0.82, "learning_rate": 0.001, "loss": 4.005, "step": 4272 }, { "epoch": 0.82, "learning_rate": 0.001, "loss": 4.0182, "step": 4284 }, { "epoch": 0.82, "learning_rate": 0.001, "loss": 4.022, "step": 4296 }, { "epoch": 0.83, "learning_rate": 0.001, "loss": 4.0206, "step": 4308 }, { "epoch": 0.83, "learning_rate": 0.001, "loss": 4.0101, "step": 4320 }, { "epoch": 0.83, "learning_rate": 0.001, "loss": 4.0047, "step": 4332 }, { "epoch": 0.83, "learning_rate": 0.001, "loss": 4.0135, "step": 4344 }, { "epoch": 0.84, "learning_rate": 0.001, "loss": 4.0004, "step": 4356 }, { "epoch": 0.84, "learning_rate": 0.001, "loss": 4.0053, "step": 4368 }, { "epoch": 0.84, "eval_ag_news_accuracy": 0.2065625, "eval_ag_news_bleu_score": 1.871361272034575, "eval_ag_news_bleu_score_sem": 0.0669665206205916, "eval_ag_news_emb_cos_sim": 0.4182249903678894, "eval_ag_news_emb_cos_sim_sem": 0.011004487487000269, "eval_ag_news_emb_top1_equal": 0.0234375, "eval_ag_news_emb_top1_equal_sem": 0.013424676090873717, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 5.128519535064697, "eval_ag_news_n_ngrams_match_1": 6.61, "eval_ag_news_n_ngrams_match_2": 0.87, "eval_ag_news_n_ngrams_match_3": 0.16, "eval_ag_news_num_pred_words": 42.274, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 168.76707926065117, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.15322416291529034, "eval_ag_news_runtime": 10.7869, "eval_ag_news_samples_per_second": 46.353, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.1920964020685739, "eval_ag_news_token_set_f1_sem": 0.0042704652911768745, "eval_ag_news_token_set_precision": 0.148605437255678, "eval_ag_news_token_set_recall": 0.31855377856718653, "eval_ag_news_true_num_tokens": 56.09375, "step": 4375 }, { "epoch": 0.84, "eval_anthropic_toxic_prompts_accuracy": 0.0604375, "eval_anthropic_toxic_prompts_bleu_score": 1.2532835276454266, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05331171100443469, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.3187675178050995, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011214266181473656, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.8185715675354, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.652, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.368, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.082, "eval_anthropic_toxic_prompts_num_pred_words": 41.29, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 123.78814142986258, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.09247397774172243, "eval_anthropic_toxic_prompts_runtime": 9.9578, "eval_anthropic_toxic_prompts_samples_per_second": 50.212, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.16147119908169896, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005113289051911548, "eval_anthropic_toxic_prompts_token_set_precision": 0.16160135370102474, "eval_anthropic_toxic_prompts_token_set_recall": 0.20611317144977448, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 4375 }, { "epoch": 0.84, "eval_arxiv_accuracy": 0.23803125, "eval_arxiv_bleu_score": 1.5904026286564472, "eval_arxiv_bleu_score_sem": 0.051126216309231934, "eval_arxiv_emb_cos_sim": 0.3372165560722351, "eval_arxiv_emb_cos_sim_sem": 0.007429207206313575, "eval_arxiv_emb_top1_equal": 0.15625, "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.879188060760498, "eval_arxiv_n_ngrams_match_1": 6.224, "eval_arxiv_n_ngrams_match_2": 0.788, "eval_arxiv_n_ngrams_match_3": 0.06, "eval_arxiv_num_pred_words": 30.362, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 131.52383116140922, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.15318202399655065, "eval_arxiv_runtime": 10.2322, "eval_arxiv_samples_per_second": 48.865, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.15064753576784035, "eval_arxiv_token_set_f1_sem": 0.0038878295756771355, "eval_arxiv_token_set_precision": 0.09639888091771633, "eval_arxiv_token_set_recall": 0.44283950377295084, "eval_arxiv_true_num_tokens": 64.0, "step": 4375 }, { "epoch": 0.84, "eval_python_code_alpaca_accuracy": 0.0880625, "eval_python_code_alpaca_bleu_score": 1.805712411774633, "eval_python_code_alpaca_bleu_score_sem": 0.060076659656152144, "eval_python_code_alpaca_emb_cos_sim": 0.24359974265098572, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007952776431940155, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.917336940765381, "eval_python_code_alpaca_n_ngrams_match_1": 3.066, "eval_python_code_alpaca_n_ngrams_match_2": 0.278, "eval_python_code_alpaca_n_ngrams_match_3": 0.038, "eval_python_code_alpaca_num_pred_words": 23.632, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 136.6382524846553, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.1275283288666622, "eval_python_code_alpaca_runtime": 10.3593, "eval_python_code_alpaca_samples_per_second": 48.266, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.19763638405963282, "eval_python_code_alpaca_token_set_f1_sem": 0.004958243877292779, "eval_python_code_alpaca_token_set_precision": 0.1474496415170886, "eval_python_code_alpaca_token_set_recall": 0.41065891886377515, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 4375 }, { "epoch": 0.84, "eval_wikibio_accuracy": 0.21459375, "eval_wikibio_bleu_score": 1.7894438470152427, "eval_wikibio_bleu_score_sem": 0.11503339179080427, "eval_wikibio_emb_cos_sim": 0.39553847908973694, "eval_wikibio_emb_cos_sim_sem": 0.011656311548279024, "eval_wikibio_emb_top1_equal": 0.078125, "eval_wikibio_emb_top1_equal_sem": 0.023813825516515504, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 5.125114440917969, "eval_wikibio_n_ngrams_match_1": 3.732, "eval_wikibio_n_ngrams_match_2": 0.956, "eval_wikibio_n_ngrams_match_3": 0.294, "eval_wikibio_num_pred_words": 32.488, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 168.19338875630183, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.10842636858697531, "eval_wikibio_runtime": 10.2257, "eval_wikibio_samples_per_second": 48.896, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.12859984932092094, "eval_wikibio_token_set_f1_sem": 0.006442311437587149, "eval_wikibio_token_set_precision": 0.11314253221462418, "eval_wikibio_token_set_recall": 0.1890339515749286, "eval_wikibio_true_num_tokens": 61.1328125, "step": 4375 }, { "epoch": 0.84, "eval_nq_accuracy": 0.3668125, "eval_nq_bleu_score": 4.249049033939418, "eval_nq_bleu_score_sem": 0.21391976509794372, "eval_nq_emb_cos_sim": 0.4965488314628601, "eval_nq_emb_cos_sim_sem": 0.011120440711243325, "eval_nq_emb_top1_equal": 0.046875, "eval_nq_emb_top1_equal_sem": 0.01875615101164758, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.631835460662842, "eval_nq_n_ngrams_match_1": 13.792, "eval_nq_n_ngrams_match_2": 3.378, "eval_nq_n_ngrams_match_3": 0.984, "eval_nq_num_pred_words": 46.09, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 37.7821005731044, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.24789971514855919, "eval_nq_runtime": 10.2112, "eval_nq_samples_per_second": 48.966, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.29225597797313135, "eval_nq_token_set_f1_sem": 0.005254747588326533, "eval_nq_token_set_precision": 0.22302434740971663, "eval_nq_token_set_recall": 0.47552982854746434, "eval_nq_true_num_tokens": 64.0, "step": 4375 }, { "epoch": 0.84, "learning_rate": 0.001, "loss": 3.9943, "step": 4380 }, { "epoch": 0.84, "learning_rate": 0.001, "loss": 4.0075, "step": 4392 }, { "epoch": 0.85, "learning_rate": 0.001, "loss": 4.0101, "step": 4404 }, { "epoch": 0.85, "learning_rate": 0.001, "loss": 4.0002, "step": 4416 }, { "epoch": 0.85, "learning_rate": 0.001, "loss": 3.9845, "step": 4428 }, { "epoch": 0.85, "learning_rate": 0.001, "loss": 3.9987, "step": 4440 }, { "epoch": 0.85, "learning_rate": 0.001, "loss": 3.987, "step": 4452 }, { "epoch": 0.86, "learning_rate": 0.001, "loss": 3.9701, "step": 4464 }, { "epoch": 0.86, "learning_rate": 0.001, "loss": 3.9805, "step": 4476 }, { "epoch": 0.86, "learning_rate": 0.001, "loss": 3.9753, "step": 4488 }, { "epoch": 0.86, "learning_rate": 0.001, "loss": 3.9649, "step": 4500 }, { "epoch": 0.87, "learning_rate": 0.001, "loss": 3.9742, "step": 4512 }, { "epoch": 0.87, "learning_rate": 0.001, "loss": 3.9667, "step": 4524 }, { "epoch": 0.87, "learning_rate": 0.001, "loss": 3.9714, "step": 4536 }, { "epoch": 0.87, "learning_rate": 0.001, "loss": 3.9829, "step": 4548 }, { "epoch": 0.88, "learning_rate": 0.001, "loss": 3.986, "step": 4560 }, { "epoch": 0.88, "learning_rate": 0.001, "loss": 3.9601, "step": 4572 }, { "epoch": 0.88, "learning_rate": 0.001, "loss": 3.9694, "step": 4584 }, { "epoch": 0.88, "learning_rate": 0.001, "loss": 3.9547, "step": 4596 }, { "epoch": 0.88, "learning_rate": 0.001, "loss": 3.9506, "step": 4608 }, { "epoch": 0.89, "learning_rate": 0.001, "loss": 3.9554, "step": 4620 }, { "epoch": 0.89, "learning_rate": 0.001, "loss": 3.9578, "step": 4632 }, { "epoch": 0.89, "learning_rate": 0.001, "loss": 3.9557, "step": 4644 }, { "epoch": 0.89, "learning_rate": 0.001, "loss": 3.9428, "step": 4656 }, { "epoch": 0.9, "learning_rate": 0.001, "loss": 3.9414, "step": 4668 }, { "epoch": 0.9, "learning_rate": 0.001, "loss": 3.9395, "step": 4680 }, { "epoch": 0.9, "learning_rate": 0.001, "loss": 3.9461, "step": 4692 }, { "epoch": 0.9, "learning_rate": 0.001, "loss": 3.9403, "step": 4704 }, { "epoch": 0.91, "learning_rate": 0.001, "loss": 3.9436, "step": 4716 }, { "epoch": 0.91, "learning_rate": 0.001, "loss": 3.9382, "step": 4728 }, { "epoch": 0.91, "learning_rate": 0.001, "loss": 3.9338, "step": 4740 }, { "epoch": 0.91, "learning_rate": 0.001, "loss": 3.9329, "step": 4752 }, { "epoch": 0.91, "learning_rate": 0.001, "loss": 3.9366, "step": 4764 }, { "epoch": 0.92, "learning_rate": 0.001, "loss": 3.938, "step": 4776 }, { "epoch": 0.92, "learning_rate": 0.001, "loss": 3.9251, "step": 4788 }, { "epoch": 0.92, "learning_rate": 0.001, "loss": 3.9298, "step": 4800 }, { "epoch": 0.92, "learning_rate": 0.001, "loss": 3.9208, "step": 4812 }, { "epoch": 0.93, "learning_rate": 0.001, "loss": 3.9149, "step": 4824 }, { "epoch": 0.93, "learning_rate": 0.001, "loss": 3.911, "step": 4836 }, { "epoch": 0.93, "learning_rate": 0.001, "loss": 3.9112, "step": 4848 }, { "epoch": 0.93, "learning_rate": 0.001, "loss": 3.9072, "step": 4860 }, { "epoch": 0.94, "learning_rate": 0.001, "loss": 3.9138, "step": 4872 }, { "epoch": 0.94, "learning_rate": 0.001, "loss": 3.9082, "step": 4884 }, { "epoch": 0.94, "learning_rate": 0.001, "loss": 3.9071, "step": 4896 }, { "epoch": 0.94, "learning_rate": 0.001, "loss": 3.9095, "step": 4908 }, { "epoch": 0.94, "learning_rate": 0.001, "loss": 3.9083, "step": 4920 }, { "epoch": 0.95, "learning_rate": 0.001, "loss": 3.9102, "step": 4932 }, { "epoch": 0.95, "learning_rate": 0.001, "loss": 3.9082, "step": 4944 }, { "epoch": 0.95, "learning_rate": 0.001, "loss": 3.9016, "step": 4956 }, { "epoch": 0.95, "learning_rate": 0.001, "loss": 3.9075, "step": 4968 }, { "epoch": 0.96, "learning_rate": 0.001, "loss": 3.8916, "step": 4980 }, { "epoch": 0.96, "learning_rate": 0.001, "loss": 3.8996, "step": 4992 }, { "epoch": 0.96, "eval_ag_news_accuracy": 0.2135, "eval_ag_news_bleu_score": 2.106691925728337, "eval_ag_news_bleu_score_sem": 0.077828882577352, "eval_ag_news_emb_cos_sim": 0.4324933886528015, "eval_ag_news_emb_cos_sim_sem": 0.010735109197465429, "eval_ag_news_emb_top1_equal": 0.0546875, "eval_ag_news_emb_top1_equal_sem": 0.020175758285348722, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 5.042166709899902, "eval_ag_news_n_ngrams_match_1": 6.972, "eval_ag_news_n_ngrams_match_2": 0.918, "eval_ag_news_n_ngrams_match_3": 0.196, "eval_ag_news_num_pred_words": 41.766, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 154.80506958862475, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.16074004098204467, "eval_ag_news_runtime": 10.7798, "eval_ag_news_samples_per_second": 46.383, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.19950133558324398, "eval_ag_news_token_set_f1_sem": 0.00428042751071768, "eval_ag_news_token_set_precision": 0.15435039810338688, "eval_ag_news_token_set_recall": 0.3339399142544686, "eval_ag_news_true_num_tokens": 56.09375, "step": 5000 }, { "epoch": 0.96, "eval_anthropic_toxic_prompts_accuracy": 0.06021875, "eval_anthropic_toxic_prompts_bleu_score": 1.2758001126979703, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.055173894615778714, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.32390105724334717, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01071198638270114, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.792735576629639, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.536, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.348, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.072, "eval_anthropic_toxic_prompts_num_pred_words": 38.92, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 120.6309128169782, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.08958896386189649, "eval_anthropic_toxic_prompts_runtime": 9.8634, "eval_anthropic_toxic_prompts_samples_per_second": 50.692, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.157478964395896, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005107972361080511, "eval_anthropic_toxic_prompts_token_set_precision": 0.1552205257452103, "eval_anthropic_toxic_prompts_token_set_recall": 0.21541269712103858, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 5000 }, { "epoch": 0.96, "eval_arxiv_accuracy": 0.24671875, "eval_arxiv_bleu_score": 1.61273635765576, "eval_arxiv_bleu_score_sem": 0.05466130060070739, "eval_arxiv_emb_cos_sim": 0.3310911953449249, "eval_arxiv_emb_cos_sim_sem": 0.008236715799091002, "eval_arxiv_emb_top1_equal": 0.1484375, "eval_arxiv_emb_top1_equal_sem": 0.031548465007086954, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.787303447723389, "eval_arxiv_n_ngrams_match_1": 6.426, "eval_arxiv_n_ngrams_match_2": 0.758, "eval_arxiv_n_ngrams_match_3": 0.058, "eval_arxiv_num_pred_words": 29.766, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 119.97740672012935, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.15514998462643764, "eval_arxiv_runtime": 9.9264, "eval_arxiv_samples_per_second": 50.371, "eval_arxiv_steps_per_second": 0.101, "eval_arxiv_token_set_f1": 0.16217454625457758, "eval_arxiv_token_set_f1_sem": 0.004081556464616621, "eval_arxiv_token_set_precision": 0.10590238642913431, "eval_arxiv_token_set_recall": 0.43923258693272826, "eval_arxiv_true_num_tokens": 64.0, "step": 5000 }, { "epoch": 0.96, "eval_python_code_alpaca_accuracy": 0.090875, "eval_python_code_alpaca_bleu_score": 1.799517111232777, "eval_python_code_alpaca_bleu_score_sem": 0.056360050673125536, "eval_python_code_alpaca_emb_cos_sim": 0.24102574586868286, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007653049118754933, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.885411262512207, "eval_python_code_alpaca_n_ngrams_match_1": 2.828, "eval_python_code_alpaca_n_ngrams_match_2": 0.204, "eval_python_code_alpaca_n_ngrams_match_3": 0.022, "eval_python_code_alpaca_num_pred_words": 21.402, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 132.34488263253957, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.12120203822033535, "eval_python_code_alpaca_runtime": 9.7802, "eval_python_code_alpaca_samples_per_second": 51.124, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.18527682422778052, "eval_python_code_alpaca_token_set_f1_sem": 0.004852335637347622, "eval_python_code_alpaca_token_set_precision": 0.14098828842063427, "eval_python_code_alpaca_token_set_recall": 0.3938896141698001, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 5000 }, { "epoch": 0.96, "eval_wikibio_accuracy": 0.2325625, "eval_wikibio_bleu_score": 2.338179388643287, "eval_wikibio_bleu_score_sem": 0.1289790578864121, "eval_wikibio_emb_cos_sim": 0.42805343866348267, "eval_wikibio_emb_cos_sim_sem": 0.014510646277584588, "eval_wikibio_emb_top1_equal": 0.0625, "eval_wikibio_emb_top1_equal_sem": 0.02147948148198014, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 5.084056377410889, "eval_wikibio_n_ngrams_match_1": 4.716, "eval_wikibio_n_ngrams_match_2": 1.188, "eval_wikibio_n_ngrams_match_3": 0.356, "eval_wikibio_num_pred_words": 32.336, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 161.42754067166106, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.1416051293678927, "eval_wikibio_runtime": 10.0452, "eval_wikibio_samples_per_second": 49.775, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.165290252508428, "eval_wikibio_token_set_f1_sem": 0.00636284664152837, "eval_wikibio_token_set_precision": 0.1467745112418667, "eval_wikibio_token_set_recall": 0.2362651750709804, "eval_wikibio_true_num_tokens": 61.1328125, "step": 5000 }, { "epoch": 0.96, "eval_nq_accuracy": 0.37525, "eval_nq_bleu_score": 4.557390861796473, "eval_nq_bleu_score_sem": 0.23243420417502222, "eval_nq_emb_cos_sim": 0.5068303346633911, "eval_nq_emb_cos_sim_sem": 0.011473194028211318, "eval_nq_emb_top1_equal": 0.0625, "eval_nq_emb_top1_equal_sem": 0.02147948148198014, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.537583589553833, "eval_nq_n_ngrams_match_1": 14.022, "eval_nq_n_ngrams_match_2": 3.49, "eval_nq_n_ngrams_match_3": 1.104, "eval_nq_num_pred_words": 45.286, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 34.38373351312476, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.2531003356575011, "eval_nq_runtime": 10.5932, "eval_nq_samples_per_second": 47.2, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.2972630078217043, "eval_nq_token_set_f1_sem": 0.0054111995513725095, "eval_nq_token_set_precision": 0.22921126040451803, "eval_nq_token_set_recall": 0.47529325357232133, "eval_nq_true_num_tokens": 64.0, "step": 5000 }, { "epoch": 0.96, "learning_rate": 0.001, "loss": 3.8936, "step": 5004 }, { "epoch": 0.96, "learning_rate": 0.001, "loss": 3.888, "step": 5016 }, { "epoch": 0.97, "learning_rate": 0.001, "loss": 3.8867, "step": 5028 }, { "epoch": 0.97, "learning_rate": 0.001, "loss": 3.8891, "step": 5040 }, { "epoch": 0.97, "learning_rate": 0.001, "loss": 3.8846, "step": 5052 }, { "epoch": 0.97, "learning_rate": 0.001, "loss": 3.8876, "step": 5064 }, { "epoch": 0.97, "learning_rate": 0.001, "loss": 3.8796, "step": 5076 }, { "epoch": 0.98, "learning_rate": 0.001, "loss": 3.8946, "step": 5088 }, { "epoch": 0.98, "learning_rate": 0.001, "loss": 3.8845, "step": 5100 }, { "epoch": 0.98, "learning_rate": 0.001, "loss": 3.881, "step": 5112 }, { "epoch": 0.98, "learning_rate": 0.001, "loss": 3.8666, "step": 5124 }, { "epoch": 0.99, "learning_rate": 0.001, "loss": 3.8721, "step": 5136 }, { "epoch": 0.99, "learning_rate": 0.001, "loss": 3.881, "step": 5148 }, { "epoch": 0.99, "learning_rate": 0.001, "loss": 3.8671, "step": 5160 }, { "epoch": 0.99, "learning_rate": 0.001, "loss": 3.8598, "step": 5172 }, { "epoch": 1.0, "learning_rate": 0.001, "loss": 3.8737, "step": 5184 }, { "epoch": 1.0, "learning_rate": 0.001, "loss": 3.8576, "step": 5196 }, { "epoch": 1.0, "learning_rate": 0.001, "loss": 3.8691, "step": 5208 }, { "epoch": 1.0, "learning_rate": 0.001, "loss": 3.8648, "step": 5220 }, { "epoch": 1.0, "learning_rate": 0.001, "loss": 3.8565, "step": 5232 }, { "epoch": 1.01, "learning_rate": 0.001, "loss": 3.8543, "step": 5244 }, { "epoch": 1.01, "learning_rate": 0.001, "loss": 3.8476, "step": 5256 }, { "epoch": 1.01, "learning_rate": 0.001, "loss": 3.8492, "step": 5268 }, { "epoch": 1.01, "learning_rate": 0.001, "loss": 3.8457, "step": 5280 }, { "epoch": 1.02, "learning_rate": 0.001, "loss": 3.829, "step": 5292 }, { "epoch": 1.02, "learning_rate": 0.001, "loss": 3.8285, "step": 5304 }, { "epoch": 1.02, "learning_rate": 0.001, "loss": 3.831, "step": 5316 }, { "epoch": 1.02, "learning_rate": 0.001, "loss": 3.8359, "step": 5328 }, { "epoch": 1.03, "learning_rate": 0.001, "loss": 3.8366, "step": 5340 }, { "epoch": 1.03, "learning_rate": 0.001, "loss": 3.8273, "step": 5352 }, { "epoch": 1.03, "learning_rate": 0.001, "loss": 3.835, "step": 5364 }, { "epoch": 1.03, "learning_rate": 0.001, "loss": 3.8325, "step": 5376 }, { "epoch": 1.03, "learning_rate": 0.001, "loss": 3.831, "step": 5388 }, { "epoch": 1.04, "learning_rate": 0.001, "loss": 3.8246, "step": 5400 }, { "epoch": 1.04, "learning_rate": 0.001, "loss": 3.8218, "step": 5412 }, { "epoch": 1.04, "learning_rate": 0.001, "loss": 3.8199, "step": 5424 }, { "epoch": 1.04, "learning_rate": 0.001, "loss": 3.8166, "step": 5436 }, { "epoch": 1.05, "learning_rate": 0.001, "loss": 3.8245, "step": 5448 }, { "epoch": 1.05, "learning_rate": 0.001, "loss": 3.8157, "step": 5460 }, { "epoch": 1.05, "learning_rate": 0.001, "loss": 3.8211, "step": 5472 }, { "epoch": 1.05, "learning_rate": 0.001, "loss": 3.8138, "step": 5484 }, { "epoch": 1.06, "learning_rate": 0.001, "loss": 3.8078, "step": 5496 }, { "epoch": 1.06, "learning_rate": 0.001, "loss": 3.8049, "step": 5508 }, { "epoch": 1.06, "learning_rate": 0.001, "loss": 3.8187, "step": 5520 }, { "epoch": 1.06, "learning_rate": 0.001, "loss": 3.8156, "step": 5532 }, { "epoch": 1.06, "learning_rate": 0.001, "loss": 3.8031, "step": 5544 }, { "epoch": 1.07, "learning_rate": 0.001, "loss": 3.7988, "step": 5556 }, { "epoch": 1.07, "learning_rate": 0.001, "loss": 3.8095, "step": 5568 }, { "epoch": 1.07, "learning_rate": 0.001, "loss": 3.7981, "step": 5580 }, { "epoch": 1.07, "learning_rate": 0.001, "loss": 3.7886, "step": 5592 }, { "epoch": 1.08, "learning_rate": 0.001, "loss": 3.8051, "step": 5604 }, { "epoch": 1.08, "learning_rate": 0.001, "loss": 3.7981, "step": 5616 }, { "epoch": 1.08, "eval_ag_news_accuracy": 0.21903125, "eval_ag_news_bleu_score": 2.0852212979487286, "eval_ag_news_bleu_score_sem": 0.0772518840990318, "eval_ag_news_emb_cos_sim": 0.45783761143684387, "eval_ag_news_emb_cos_sim_sem": 0.011666158105762381, "eval_ag_news_emb_top1_equal": 0.078125, "eval_ag_news_emb_top1_equal_sem": 0.023813825516515504, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.948986530303955, "eval_ag_news_n_ngrams_match_1": 6.886, "eval_ag_news_n_ngrams_match_2": 0.95, "eval_ag_news_n_ngrams_match_3": 0.168, "eval_ag_news_num_pred_words": 41.724, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 141.0319598510883, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.16286868877192381, "eval_ag_news_runtime": 13.7559, "eval_ag_news_samples_per_second": 36.348, "eval_ag_news_steps_per_second": 0.073, "eval_ag_news_token_set_f1": 0.2019419518206525, "eval_ag_news_token_set_f1_sem": 0.00438958396582974, "eval_ag_news_token_set_precision": 0.15462844390118893, "eval_ag_news_token_set_recall": 0.3451922715581034, "eval_ag_news_true_num_tokens": 56.09375, "step": 5625 }, { "epoch": 1.08, "eval_anthropic_toxic_prompts_accuracy": 0.063125, "eval_anthropic_toxic_prompts_bleu_score": 1.3700619974288806, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.065019037336038, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.3364288806915283, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011141132179684142, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.73537540435791, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.64, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.358, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.072, "eval_anthropic_toxic_prompts_num_pred_words": 39.0, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 113.90621158112307, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.09601927839850205, "eval_anthropic_toxic_prompts_runtime": 14.1978, "eval_anthropic_toxic_prompts_samples_per_second": 35.217, "eval_anthropic_toxic_prompts_steps_per_second": 0.07, "eval_anthropic_toxic_prompts_token_set_f1": 0.17096730800689502, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0054358416627761875, "eval_anthropic_toxic_prompts_token_set_precision": 0.1633684673481088, "eval_anthropic_toxic_prompts_token_set_recall": 0.23170527979202382, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 5625 }, { "epoch": 1.08, "eval_arxiv_accuracy": 0.25628125, "eval_arxiv_bleu_score": 1.5205202660326982, "eval_arxiv_bleu_score_sem": 0.05244642374548896, "eval_arxiv_emb_cos_sim": 0.35741275548934937, "eval_arxiv_emb_cos_sim_sem": 0.009298090665517733, "eval_arxiv_emb_top1_equal": 0.109375, "eval_arxiv_emb_top1_equal_sem": 0.027695207821224692, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.674412250518799, "eval_arxiv_n_ngrams_match_1": 6.126, "eval_arxiv_n_ngrams_match_2": 0.69, "eval_arxiv_n_ngrams_match_3": 0.052, "eval_arxiv_num_pred_words": 28.542, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 107.1695597244559, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.1524461639578788, "eval_arxiv_runtime": 13.2423, "eval_arxiv_samples_per_second": 37.758, "eval_arxiv_steps_per_second": 0.076, "eval_arxiv_token_set_f1": 0.16228751855782006, "eval_arxiv_token_set_f1_sem": 0.0040447406491193215, "eval_arxiv_token_set_precision": 0.10593070802353624, "eval_arxiv_token_set_recall": 0.43974020961337784, "eval_arxiv_true_num_tokens": 64.0, "step": 5625 }, { "epoch": 1.08, "eval_python_code_alpaca_accuracy": 0.092875, "eval_python_code_alpaca_bleu_score": 1.9706582151946554, "eval_python_code_alpaca_bleu_score_sem": 0.05821691067171762, "eval_python_code_alpaca_emb_cos_sim": 0.25949281454086304, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007641797885836048, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.772955894470215, "eval_python_code_alpaca_n_ngrams_match_1": 3.392, "eval_python_code_alpaca_n_ngrams_match_2": 0.268, "eval_python_code_alpaca_n_ngrams_match_3": 0.036, "eval_python_code_alpaca_num_pred_words": 24.73, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 118.26831445247535, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.13994791171993992, "eval_python_code_alpaca_runtime": 12.8739, "eval_python_code_alpaca_samples_per_second": 38.838, "eval_python_code_alpaca_steps_per_second": 0.078, "eval_python_code_alpaca_token_set_f1": 0.2212643767192042, "eval_python_code_alpaca_token_set_f1_sem": 0.0048426131391084365, "eval_python_code_alpaca_token_set_precision": 0.16368758003188943, "eval_python_code_alpaca_token_set_recall": 0.44087733508321764, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 5625 }, { "epoch": 1.08, "eval_wikibio_accuracy": 0.23365625, "eval_wikibio_bleu_score": 2.568111483794025, "eval_wikibio_bleu_score_sem": 0.14700819701895634, "eval_wikibio_emb_cos_sim": 0.4436691403388977, "eval_wikibio_emb_cos_sim_sem": 0.013843581520757589, "eval_wikibio_emb_top1_equal": 0.046875, "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.976119041442871, "eval_wikibio_n_ngrams_match_1": 4.93, "eval_wikibio_n_ngrams_match_2": 1.328, "eval_wikibio_n_ngrams_match_3": 0.382, "eval_wikibio_num_pred_words": 31.756, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 144.91089574727437, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.15660539151735578, "eval_wikibio_runtime": 12.4222, "eval_wikibio_samples_per_second": 40.251, "eval_wikibio_steps_per_second": 0.081, "eval_wikibio_token_set_f1": 0.1738614240702313, "eval_wikibio_token_set_f1_sem": 0.006722789829280542, "eval_wikibio_token_set_precision": 0.15653899173453673, "eval_wikibio_token_set_recall": 0.23529594477293445, "eval_wikibio_true_num_tokens": 61.1328125, "step": 5625 }, { "epoch": 1.08, "eval_nq_accuracy": 0.3849375, "eval_nq_bleu_score": 4.976035605292294, "eval_nq_bleu_score_sem": 0.2442060204228987, "eval_nq_emb_cos_sim": 0.5293945074081421, "eval_nq_emb_cos_sim_sem": 0.0113166395594176, "eval_nq_emb_top1_equal": 0.125, "eval_nq_emb_top1_equal_sem": 0.02934655822437397, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.4444541931152344, "eval_nq_n_ngrams_match_1": 14.606, "eval_nq_n_ngrams_match_2": 3.786, "eval_nq_n_ngrams_match_3": 1.268, "eval_nq_num_pred_words": 45.808, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 31.326180734586615, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.26750302501509104, "eval_nq_runtime": 13.693, "eval_nq_samples_per_second": 36.515, "eval_nq_steps_per_second": 0.073, "eval_nq_token_set_f1": 0.3101056589551926, "eval_nq_token_set_f1_sem": 0.005158972462744809, "eval_nq_token_set_precision": 0.24073283826221548, "eval_nq_token_set_recall": 0.48357738017709817, "eval_nq_true_num_tokens": 64.0, "step": 5625 }, { "epoch": 1.08, "learning_rate": 0.001, "loss": 3.797, "step": 5628 }, { "epoch": 1.08, "learning_rate": 0.001, "loss": 3.7992, "step": 5640 }, { "epoch": 1.09, "learning_rate": 0.001, "loss": 3.7971, "step": 5652 }, { "epoch": 1.09, "learning_rate": 0.001, "loss": 3.7993, "step": 5664 }, { "epoch": 1.09, "learning_rate": 0.001, "loss": 3.7937, "step": 5676 }, { "epoch": 1.09, "learning_rate": 0.001, "loss": 3.7853, "step": 5688 }, { "epoch": 1.09, "learning_rate": 0.001, "loss": 3.789, "step": 5700 }, { "epoch": 1.1, "learning_rate": 0.001, "loss": 3.7834, "step": 5712 }, { "epoch": 1.1, "learning_rate": 0.001, "loss": 3.7776, "step": 5724 }, { "epoch": 1.1, "learning_rate": 0.001, "loss": 3.7867, "step": 5736 }, { "epoch": 1.1, "learning_rate": 0.001, "loss": 3.7764, "step": 5748 }, { "epoch": 1.11, "learning_rate": 0.001, "loss": 3.778, "step": 5760 }, { "epoch": 1.11, "learning_rate": 0.001, "loss": 3.7792, "step": 5772 }, { "epoch": 1.11, "learning_rate": 0.001, "loss": 3.7707, "step": 5784 }, { "epoch": 1.11, "learning_rate": 0.001, "loss": 3.7826, "step": 5796 }, { "epoch": 1.12, "learning_rate": 0.001, "loss": 3.7763, "step": 5808 }, { "epoch": 1.12, "learning_rate": 0.001, "loss": 3.7738, "step": 5820 }, { "epoch": 1.12, "learning_rate": 0.001, "loss": 3.7572, "step": 5832 }, { "epoch": 1.12, "learning_rate": 0.001, "loss": 3.7643, "step": 5844 }, { "epoch": 1.12, "learning_rate": 0.001, "loss": 3.7509, "step": 5856 }, { "epoch": 1.13, "learning_rate": 0.001, "loss": 3.7663, "step": 5868 }, { "epoch": 1.13, "learning_rate": 0.001, "loss": 3.7724, "step": 5880 }, { "epoch": 1.13, "learning_rate": 0.001, "loss": 3.7559, "step": 5892 }, { "epoch": 1.13, "learning_rate": 0.001, "loss": 3.7517, "step": 5904 }, { "epoch": 1.14, "learning_rate": 0.001, "loss": 3.7599, "step": 5916 }, { "epoch": 1.14, "learning_rate": 0.001, "loss": 3.7574, "step": 5928 }, { "epoch": 1.14, "learning_rate": 0.001, "loss": 3.7599, "step": 5940 }, { "epoch": 1.14, "learning_rate": 0.001, "loss": 3.7517, "step": 5952 }, { "epoch": 1.15, "learning_rate": 0.001, "loss": 3.7533, "step": 5964 }, { "epoch": 1.15, "learning_rate": 0.001, "loss": 3.7509, "step": 5976 }, { "epoch": 1.15, "learning_rate": 0.001, "loss": 3.7469, "step": 5988 }, { "epoch": 1.15, "learning_rate": 0.001, "loss": 3.7452, "step": 6000 }, { "epoch": 1.15, "learning_rate": 0.001, "loss": 3.7334, "step": 6012 }, { "epoch": 1.16, "learning_rate": 0.001, "loss": 3.7406, "step": 6024 }, { "epoch": 1.16, "learning_rate": 0.001, "loss": 3.743, "step": 6036 }, { "epoch": 1.16, "learning_rate": 0.001, "loss": 3.7536, "step": 6048 }, { "epoch": 1.16, "learning_rate": 0.001, "loss": 3.7452, "step": 6060 }, { "epoch": 1.17, "learning_rate": 0.001, "loss": 3.7401, "step": 6072 }, { "epoch": 1.17, "learning_rate": 0.001, "loss": 3.7392, "step": 6084 }, { "epoch": 1.17, "learning_rate": 0.001, "loss": 3.737, "step": 6096 }, { "epoch": 1.17, "learning_rate": 0.001, "loss": 3.7437, "step": 6108 }, { "epoch": 1.18, "learning_rate": 0.001, "loss": 3.7357, "step": 6120 }, { "epoch": 1.18, "learning_rate": 0.001, "loss": 3.7265, "step": 6132 }, { "epoch": 1.18, "learning_rate": 0.001, "loss": 3.7375, "step": 6144 }, { "epoch": 1.18, "learning_rate": 0.001, "loss": 3.7297, "step": 6156 }, { "epoch": 1.18, "learning_rate": 0.001, "loss": 3.7449, "step": 6168 }, { "epoch": 1.19, "learning_rate": 0.001, "loss": 3.7342, "step": 6180 }, { "epoch": 1.19, "learning_rate": 0.001, "loss": 3.7372, "step": 6192 }, { "epoch": 1.19, "learning_rate": 0.001, "loss": 3.7281, "step": 6204 }, { "epoch": 1.19, "learning_rate": 0.001, "loss": 3.7244, "step": 6216 }, { "epoch": 1.2, "learning_rate": 0.001, "loss": 3.7202, "step": 6228 }, { "epoch": 1.2, "learning_rate": 0.001, "loss": 3.7292, "step": 6240 }, { "epoch": 1.2, "eval_ag_news_accuracy": 0.223625, "eval_ag_news_bleu_score": 2.2003893664469225, "eval_ag_news_bleu_score_sem": 0.07600295288295764, "eval_ag_news_emb_cos_sim": 0.46764129400253296, "eval_ag_news_emb_cos_sim_sem": 0.012417467827317579, "eval_ag_news_emb_top1_equal": 0.078125, "eval_ag_news_emb_top1_equal_sem": 0.023813825516515504, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.886475086212158, "eval_ag_news_n_ngrams_match_1": 7.52, "eval_ag_news_n_ngrams_match_2": 1.008, "eval_ag_news_n_ngrams_match_3": 0.162, "eval_ag_news_num_pred_words": 42.634, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 132.48574917054702, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.17402363367654597, "eval_ag_news_runtime": 13.7861, "eval_ag_news_samples_per_second": 36.268, "eval_ag_news_steps_per_second": 0.073, "eval_ag_news_token_set_f1": 0.21561945554735026, "eval_ag_news_token_set_f1_sem": 0.004142957390752193, "eval_ag_news_token_set_precision": 0.16989426677959563, "eval_ag_news_token_set_recall": 0.34137294115375916, "eval_ag_news_true_num_tokens": 56.09375, "step": 6250 }, { "epoch": 1.2, "eval_anthropic_toxic_prompts_accuracy": 0.0645625, "eval_anthropic_toxic_prompts_bleu_score": 1.39568488779284, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05493467295759028, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.35283318161964417, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012606663713193966, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.620936393737793, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.878, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.438, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.102, "eval_anthropic_toxic_prompts_num_pred_words": 41.662, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 101.58911501621725, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.10442383902310974, "eval_anthropic_toxic_prompts_runtime": 12.5933, "eval_anthropic_toxic_prompts_samples_per_second": 39.704, "eval_anthropic_toxic_prompts_steps_per_second": 0.079, "eval_anthropic_toxic_prompts_token_set_f1": 0.18060501522240538, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0052871735653605475, "eval_anthropic_toxic_prompts_token_set_precision": 0.18159040562726841, "eval_anthropic_toxic_prompts_token_set_recall": 0.22196139809902857, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 6250 }, { "epoch": 1.2, "eval_arxiv_accuracy": 0.2576875, "eval_arxiv_bleu_score": 1.78556157209702, "eval_arxiv_bleu_score_sem": 0.054154260798773265, "eval_arxiv_emb_cos_sim": 0.3743008077144623, "eval_arxiv_emb_cos_sim_sem": 0.009504373805742749, "eval_arxiv_emb_top1_equal": 0.1484375, "eval_arxiv_emb_top1_equal_sem": 0.031548465007086954, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.627006530761719, "eval_arxiv_n_ngrams_match_1": 7.1, "eval_arxiv_n_ngrams_match_2": 0.86, "eval_arxiv_n_ngrams_match_3": 0.064, "eval_arxiv_num_pred_words": 32.822, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 102.20765026211305, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.17126011039447742, "eval_arxiv_runtime": 13.9229, "eval_arxiv_samples_per_second": 35.912, "eval_arxiv_steps_per_second": 0.072, "eval_arxiv_token_set_f1": 0.17412617347653625, "eval_arxiv_token_set_f1_sem": 0.0041455396705183, "eval_arxiv_token_set_precision": 0.11760084720737768, "eval_arxiv_token_set_recall": 0.41740520863090214, "eval_arxiv_true_num_tokens": 64.0, "step": 6250 }, { "epoch": 1.2, "eval_python_code_alpaca_accuracy": 0.0935625, "eval_python_code_alpaca_bleu_score": 1.8913629355214616, "eval_python_code_alpaca_bleu_score_sem": 0.053854219366134104, "eval_python_code_alpaca_emb_cos_sim": 0.2851085662841797, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008405237338683778, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.660333156585693, "eval_python_code_alpaca_n_ngrams_match_1": 3.96, "eval_python_code_alpaca_n_ngrams_match_2": 0.354, "eval_python_code_alpaca_n_ngrams_match_3": 0.056, "eval_python_code_alpaca_num_pred_words": 29.21, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 105.67128137621717, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.15402839704368126, "eval_python_code_alpaca_runtime": 13.0877, "eval_python_code_alpaca_samples_per_second": 38.204, "eval_python_code_alpaca_steps_per_second": 0.076, "eval_python_code_alpaca_token_set_f1": 0.2385022744973973, "eval_python_code_alpaca_token_set_f1_sem": 0.004628218158418873, "eval_python_code_alpaca_token_set_precision": 0.19286711580654978, "eval_python_code_alpaca_token_set_recall": 0.3931378201845569, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 6250 }, { "epoch": 1.2, "eval_wikibio_accuracy": 0.2395625, "eval_wikibio_bleu_score": 2.898854425610419, "eval_wikibio_bleu_score_sem": 0.1332226159733259, "eval_wikibio_emb_cos_sim": 0.49516212940216064, "eval_wikibio_emb_cos_sim_sem": 0.014601578103246842, "eval_wikibio_emb_top1_equal": 0.046875, "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.961064338684082, "eval_wikibio_n_ngrams_match_1": 6.064, "eval_wikibio_n_ngrams_match_2": 1.618, "eval_wikibio_n_ngrams_match_3": 0.464, "eval_wikibio_num_pred_words": 32.302, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 142.74564478506045, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.18800090028871008, "eval_wikibio_runtime": 13.1337, "eval_wikibio_samples_per_second": 38.07, "eval_wikibio_steps_per_second": 0.076, "eval_wikibio_token_set_f1": 0.20637467220203892, "eval_wikibio_token_set_f1_sem": 0.006449955585241066, "eval_wikibio_token_set_precision": 0.1918647587293939, "eval_wikibio_token_set_recall": 0.25269720979397575, "eval_wikibio_true_num_tokens": 61.1328125, "step": 6250 }, { "epoch": 1.2, "eval_nq_accuracy": 0.38953125, "eval_nq_bleu_score": 4.960028595506113, "eval_nq_bleu_score_sem": 0.21695526516078933, "eval_nq_emb_cos_sim": 0.5501593351364136, "eval_nq_emb_cos_sim_sem": 0.011242187151523107, "eval_nq_emb_top1_equal": 0.1171875, "eval_nq_emb_top1_equal_sem": 0.02854125312152025, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.377890110015869, "eval_nq_n_ngrams_match_1": 15.26, "eval_nq_n_ngrams_match_2": 3.924, "eval_nq_n_ngrams_match_3": 1.21, "eval_nq_num_pred_words": 47.27, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 29.308867345525517, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.27758628528050355, "eval_nq_runtime": 13.1406, "eval_nq_samples_per_second": 38.05, "eval_nq_steps_per_second": 0.076, "eval_nq_token_set_f1": 0.321318805484751, "eval_nq_token_set_f1_sem": 0.005013425778853512, "eval_nq_token_set_precision": 0.25456366425467947, "eval_nq_token_set_recall": 0.47418945549125713, "eval_nq_true_num_tokens": 64.0, "step": 6250 }, { "epoch": 1.2, "learning_rate": 0.001, "loss": 3.7221, "step": 6252 }, { "epoch": 1.2, "learning_rate": 0.001, "loss": 3.7203, "step": 6264 }, { "epoch": 1.21, "learning_rate": 0.001, "loss": 3.7212, "step": 6276 }, { "epoch": 1.21, "learning_rate": 0.001, "loss": 3.7213, "step": 6288 }, { "epoch": 1.21, "learning_rate": 0.001, "loss": 3.7104, "step": 6300 }, { "epoch": 1.21, "learning_rate": 0.001, "loss": 3.7183, "step": 6312 }, { "epoch": 1.21, "learning_rate": 0.001, "loss": 3.7201, "step": 6324 }, { "epoch": 1.22, "learning_rate": 0.001, "loss": 3.7067, "step": 6336 }, { "epoch": 1.22, "learning_rate": 0.001, "loss": 3.7074, "step": 6348 }, { "epoch": 1.22, "learning_rate": 0.001, "loss": 3.7074, "step": 6360 }, { "epoch": 1.22, "learning_rate": 0.001, "loss": 3.707, "step": 6372 }, { "epoch": 1.23, "learning_rate": 0.001, "loss": 3.7127, "step": 6384 }, { "epoch": 1.23, "learning_rate": 0.001, "loss": 3.7058, "step": 6396 }, { "epoch": 1.23, "learning_rate": 0.001, "loss": 3.7065, "step": 6408 }, { "epoch": 1.23, "learning_rate": 0.001, "loss": 3.7012, "step": 6420 }, { "epoch": 1.24, "learning_rate": 0.001, "loss": 3.6866, "step": 6432 }, { "epoch": 1.24, "learning_rate": 0.001, "loss": 3.7008, "step": 6444 }, { "epoch": 1.24, "learning_rate": 0.001, "loss": 3.6935, "step": 6456 }, { "epoch": 1.24, "learning_rate": 0.001, "loss": 3.6989, "step": 6468 }, { "epoch": 1.24, "learning_rate": 0.001, "loss": 3.6949, "step": 6480 }, { "epoch": 1.25, "learning_rate": 0.001, "loss": 3.6986, "step": 6492 }, { "epoch": 1.25, "learning_rate": 0.001, "loss": 3.7, "step": 6504 }, { "epoch": 1.25, "learning_rate": 0.001, "loss": 3.6935, "step": 6516 }, { "epoch": 1.25, "learning_rate": 0.001, "loss": 3.6948, "step": 6528 }, { "epoch": 1.26, "learning_rate": 0.001, "loss": 3.6923, "step": 6540 }, { "epoch": 1.26, "learning_rate": 0.001, "loss": 3.6941, "step": 6552 }, { "epoch": 1.26, "learning_rate": 0.001, "loss": 3.679, "step": 6564 }, { "epoch": 1.26, "learning_rate": 0.001, "loss": 3.6793, "step": 6576 }, { "epoch": 1.26, "learning_rate": 0.001, "loss": 3.6846, "step": 6588 }, { "epoch": 1.27, "learning_rate": 0.001, "loss": 3.6882, "step": 6600 }, { "epoch": 1.27, "learning_rate": 0.001, "loss": 3.6831, "step": 6612 }, { "epoch": 1.27, "learning_rate": 0.001, "loss": 3.6803, "step": 6624 }, { "epoch": 1.27, "learning_rate": 0.001, "loss": 3.6874, "step": 6636 }, { "epoch": 1.28, "learning_rate": 0.001, "loss": 3.6734, "step": 6648 }, { "epoch": 1.28, "learning_rate": 0.001, "loss": 3.6657, "step": 6660 }, { "epoch": 1.28, "learning_rate": 0.001, "loss": 3.6804, "step": 6672 }, { "epoch": 1.28, "learning_rate": 0.001, "loss": 3.6708, "step": 6684 }, { "epoch": 1.29, "learning_rate": 0.001, "loss": 3.6885, "step": 6696 }, { "epoch": 1.29, "learning_rate": 0.001, "loss": 3.6637, "step": 6708 }, { "epoch": 1.29, "learning_rate": 0.001, "loss": 3.6682, "step": 6720 }, { "epoch": 1.29, "learning_rate": 0.001, "loss": 3.6622, "step": 6732 }, { "epoch": 1.29, "learning_rate": 0.001, "loss": 3.6721, "step": 6744 }, { "epoch": 1.3, "learning_rate": 0.001, "loss": 3.6739, "step": 6756 }, { "epoch": 1.3, "learning_rate": 0.001, "loss": 3.6681, "step": 6768 }, { "epoch": 1.3, "learning_rate": 0.001, "loss": 3.6696, "step": 6780 }, { "epoch": 1.3, "learning_rate": 0.001, "loss": 3.665, "step": 6792 }, { "epoch": 1.31, "learning_rate": 0.001, "loss": 3.6653, "step": 6804 }, { "epoch": 1.31, "learning_rate": 0.001, "loss": 3.6636, "step": 6816 }, { "epoch": 1.31, "learning_rate": 0.001, "loss": 3.662, "step": 6828 }, { "epoch": 1.31, "learning_rate": 0.001, "loss": 3.6646, "step": 6840 }, { "epoch": 1.32, "learning_rate": 0.001, "loss": 3.6456, "step": 6852 }, { "epoch": 1.32, "learning_rate": 0.001, "loss": 3.656, "step": 6864 }, { "epoch": 1.32, "eval_ag_news_accuracy": 0.2288125, "eval_ag_news_bleu_score": 2.359588628164019, "eval_ag_news_bleu_score_sem": 0.07888955746474915, "eval_ag_news_emb_cos_sim": 0.49569883942604065, "eval_ag_news_emb_cos_sim_sem": 0.01201664998088996, "eval_ag_news_emb_top1_equal": 0.09375, "eval_ag_news_emb_top1_equal_sem": 0.025864720141013958, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.809776306152344, "eval_ag_news_n_ngrams_match_1": 7.838, "eval_ag_news_n_ngrams_match_2": 1.138, "eval_ag_news_n_ngrams_match_3": 0.254, "eval_ag_news_num_pred_words": 42.598, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 122.7041662799646, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.1838621076063327, "eval_ag_news_runtime": 10.7475, "eval_ag_news_samples_per_second": 46.522, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.22020565527904667, "eval_ag_news_token_set_f1_sem": 0.004352469007725244, "eval_ag_news_token_set_precision": 0.17789081828133854, "eval_ag_news_token_set_recall": 0.3290245425284207, "eval_ag_news_true_num_tokens": 56.09375, "step": 6875 }, { "epoch": 1.32, "eval_anthropic_toxic_prompts_accuracy": 0.06628125, "eval_anthropic_toxic_prompts_bleu_score": 1.426577494431888, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05917457012513431, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.352634072303772, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011052422283191822, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.573147296905518, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 2.974, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.454, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.104, "eval_anthropic_toxic_prompts_num_pred_words": 41.436, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 96.84844141057704, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.10815269320897025, "eval_anthropic_toxic_prompts_runtime": 10.3478, "eval_anthropic_toxic_prompts_samples_per_second": 48.319, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.1814536768355274, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005195580488782654, "eval_anthropic_toxic_prompts_token_set_precision": 0.19031185557393956, "eval_anthropic_toxic_prompts_token_set_recall": 0.2187545456208069, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 6875 }, { "epoch": 1.32, "eval_arxiv_accuracy": 0.2654375, "eval_arxiv_bleu_score": 1.8755043412212522, "eval_arxiv_bleu_score_sem": 0.05895104761607006, "eval_arxiv_emb_cos_sim": 0.39819103479385376, "eval_arxiv_emb_cos_sim_sem": 0.00897163698202054, "eval_arxiv_emb_top1_equal": 0.1171875, "eval_arxiv_emb_top1_equal_sem": 0.02854125312152025, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.549220561981201, "eval_arxiv_n_ngrams_match_1": 7.446, "eval_arxiv_n_ngrams_match_2": 0.942, "eval_arxiv_n_ngrams_match_3": 0.088, "eval_arxiv_num_pred_words": 33.468, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 94.55867695631777, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.17669009847892378, "eval_arxiv_runtime": 10.13, "eval_arxiv_samples_per_second": 49.358, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.18453232926216256, "eval_arxiv_token_set_f1_sem": 0.004204942824465272, "eval_arxiv_token_set_precision": 0.1267731601602522, "eval_arxiv_token_set_recall": 0.41309199186316686, "eval_arxiv_true_num_tokens": 64.0, "step": 6875 }, { "epoch": 1.32, "eval_python_code_alpaca_accuracy": 0.096375, "eval_python_code_alpaca_bleu_score": 2.0690713383744743, "eval_python_code_alpaca_bleu_score_sem": 0.06504442325736357, "eval_python_code_alpaca_emb_cos_sim": 0.2938317656517029, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008932365338831499, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.578368186950684, "eval_python_code_alpaca_n_ngrams_match_1": 3.982, "eval_python_code_alpaca_n_ngrams_match_2": 0.452, "eval_python_code_alpaca_n_ngrams_match_3": 0.07, "eval_python_code_alpaca_num_pred_words": 28.472, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 97.35539870683805, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.14943516113949668, "eval_python_code_alpaca_runtime": 10.0591, "eval_python_code_alpaca_samples_per_second": 49.706, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.2358850037931322, "eval_python_code_alpaca_token_set_f1_sem": 0.005003715120287452, "eval_python_code_alpaca_token_set_precision": 0.19193841138712459, "eval_python_code_alpaca_token_set_recall": 0.39438893841796385, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 6875 }, { "epoch": 1.32, "eval_wikibio_accuracy": 0.24259375, "eval_wikibio_bleu_score": 3.2213731771417753, "eval_wikibio_bleu_score_sem": 0.15564141135127274, "eval_wikibio_emb_cos_sim": 0.5043472051620483, "eval_wikibio_emb_cos_sim_sem": 0.014252313755082142, "eval_wikibio_emb_top1_equal": 0.078125, "eval_wikibio_emb_top1_equal_sem": 0.023813825516515504, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.8358540534973145, "eval_wikibio_n_ngrams_match_1": 6.44, "eval_wikibio_n_ngrams_match_2": 1.822, "eval_wikibio_n_ngrams_match_3": 0.55, "eval_wikibio_num_pred_words": 33.56, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 125.94610199582124, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2040199948293262, "eval_wikibio_runtime": 10.1916, "eval_wikibio_samples_per_second": 49.06, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.21533535497686207, "eval_wikibio_token_set_f1_sem": 0.006517295930449522, "eval_wikibio_token_set_precision": 0.20380042686358332, "eval_wikibio_token_set_recall": 0.2625841405555016, "eval_wikibio_true_num_tokens": 61.1328125, "step": 6875 }, { "epoch": 1.32, "eval_nq_accuracy": 0.39715625, "eval_nq_bleu_score": 5.412779838370413, "eval_nq_bleu_score_sem": 0.251853099021192, "eval_nq_emb_cos_sim": 0.5769795775413513, "eval_nq_emb_cos_sim_sem": 0.011507360634374476, "eval_nq_emb_top1_equal": 0.1328125, "eval_nq_emb_top1_equal_sem": 0.030114394778901498, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.3065907955169678, "eval_nq_n_ngrams_match_1": 15.466, "eval_nq_n_ngrams_match_2": 4.076, "eval_nq_n_ngrams_match_3": 1.442, "eval_nq_num_pred_words": 46.512, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 27.291922942887755, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.2815078504978107, "eval_nq_runtime": 10.4269, "eval_nq_samples_per_second": 47.953, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.3240591933206866, "eval_nq_token_set_f1_sem": 0.00509215338977105, "eval_nq_token_set_precision": 0.25852701274901707, "eval_nq_token_set_recall": 0.47132384335838723, "eval_nq_true_num_tokens": 64.0, "step": 6875 }, { "epoch": 1.32, "learning_rate": 0.001, "loss": 3.6588, "step": 6876 }, { "epoch": 1.32, "learning_rate": 0.001, "loss": 3.644, "step": 6888 }, { "epoch": 1.32, "learning_rate": 0.001, "loss": 3.6498, "step": 6900 }, { "epoch": 1.33, "learning_rate": 0.001, "loss": 3.6553, "step": 6912 }, { "epoch": 1.33, "learning_rate": 0.001, "loss": 3.6518, "step": 6924 }, { "epoch": 1.33, "learning_rate": 0.001, "loss": 3.6361, "step": 6936 }, { "epoch": 1.33, "learning_rate": 0.001, "loss": 3.6353, "step": 6948 }, { "epoch": 1.34, "learning_rate": 0.001, "loss": 3.6405, "step": 6960 }, { "epoch": 1.34, "learning_rate": 0.001, "loss": 3.6307, "step": 6972 }, { "epoch": 1.34, "learning_rate": 0.001, "loss": 3.6434, "step": 6984 }, { "epoch": 1.34, "learning_rate": 0.001, "loss": 3.6407, "step": 6996 }, { "epoch": 1.35, "learning_rate": 0.001, "loss": 3.6464, "step": 7008 }, { "epoch": 1.35, "learning_rate": 0.001, "loss": 3.6493, "step": 7020 }, { "epoch": 1.35, "learning_rate": 0.001, "loss": 3.6467, "step": 7032 }, { "epoch": 1.35, "learning_rate": 0.001, "loss": 3.6396, "step": 7044 }, { "epoch": 1.35, "learning_rate": 0.001, "loss": 3.6316, "step": 7056 }, { "epoch": 1.36, "learning_rate": 0.001, "loss": 3.6286, "step": 7068 }, { "epoch": 1.36, "learning_rate": 0.001, "loss": 3.6355, "step": 7080 }, { "epoch": 1.36, "learning_rate": 0.001, "loss": 3.6289, "step": 7092 }, { "epoch": 1.36, "learning_rate": 0.001, "loss": 3.6338, "step": 7104 }, { "epoch": 1.37, "learning_rate": 0.001, "loss": 3.6279, "step": 7116 }, { "epoch": 1.37, "learning_rate": 0.001, "loss": 3.632, "step": 7128 }, { "epoch": 1.37, "learning_rate": 0.001, "loss": 3.624, "step": 7140 }, { "epoch": 1.37, "learning_rate": 0.001, "loss": 3.6223, "step": 7152 }, { "epoch": 1.38, "learning_rate": 0.001, "loss": 3.6262, "step": 7164 }, { "epoch": 1.38, "learning_rate": 0.001, "loss": 3.6229, "step": 7176 }, { "epoch": 1.38, "learning_rate": 0.001, "loss": 3.6279, "step": 7188 }, { "epoch": 1.38, "learning_rate": 0.001, "loss": 3.617, "step": 7200 }, { "epoch": 1.38, "learning_rate": 0.001, "loss": 3.603, "step": 7212 }, { "epoch": 1.39, "learning_rate": 0.001, "loss": 3.615, "step": 7224 }, { "epoch": 1.39, "learning_rate": 0.001, "loss": 3.6169, "step": 7236 }, { "epoch": 1.39, "learning_rate": 0.001, "loss": 3.6196, "step": 7248 }, { "epoch": 1.39, "learning_rate": 0.001, "loss": 3.6218, "step": 7260 }, { "epoch": 1.4, "learning_rate": 0.001, "loss": 3.6292, "step": 7272 }, { "epoch": 1.4, "learning_rate": 0.001, "loss": 3.615, "step": 7284 }, { "epoch": 1.4, "learning_rate": 0.001, "loss": 3.6064, "step": 7296 }, { "epoch": 1.4, "learning_rate": 0.001, "loss": 3.6111, "step": 7308 }, { "epoch": 1.41, "learning_rate": 0.001, "loss": 3.6091, "step": 7320 }, { "epoch": 1.41, "learning_rate": 0.001, "loss": 3.6179, "step": 7332 }, { "epoch": 1.41, "learning_rate": 0.001, "loss": 3.6042, "step": 7344 }, { "epoch": 1.41, "learning_rate": 0.001, "loss": 3.6149, "step": 7356 }, { "epoch": 1.41, "learning_rate": 0.001, "loss": 3.6052, "step": 7368 }, { "epoch": 1.42, "learning_rate": 0.001, "loss": 3.6146, "step": 7380 }, { "epoch": 1.42, "learning_rate": 0.001, "loss": 3.5991, "step": 7392 }, { "epoch": 1.42, "learning_rate": 0.001, "loss": 3.6124, "step": 7404 }, { "epoch": 1.42, "learning_rate": 0.001, "loss": 3.6048, "step": 7416 }, { "epoch": 1.43, "learning_rate": 0.001, "loss": 3.6032, "step": 7428 }, { "epoch": 1.43, "learning_rate": 0.001, "loss": 3.6121, "step": 7440 }, { "epoch": 1.43, "learning_rate": 0.001, "loss": 3.6015, "step": 7452 }, { "epoch": 1.43, "learning_rate": 0.001, "loss": 3.5937, "step": 7464 }, { "epoch": 1.44, "learning_rate": 0.001, "loss": 3.6078, "step": 7476 }, { "epoch": 1.44, "learning_rate": 0.001, "loss": 3.5833, "step": 7488 }, { "epoch": 1.44, "learning_rate": 0.001, "loss": 3.5957, "step": 7500 }, { "epoch": 1.44, "eval_ag_news_accuracy": 0.2325, "eval_ag_news_bleu_score": 2.322543213016337, "eval_ag_news_bleu_score_sem": 0.07634130693049505, "eval_ag_news_emb_cos_sim": 0.5002522468566895, "eval_ag_news_emb_cos_sim_sem": 0.012586354200567779, "eval_ag_news_emb_top1_equal": 0.109375, "eval_ag_news_emb_top1_equal_sem": 0.027695207821224692, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.758804798126221, "eval_ag_news_n_ngrams_match_1": 7.97, "eval_ag_news_n_ngrams_match_2": 1.164, "eval_ag_news_n_ngrams_match_3": 0.248, "eval_ag_news_num_pred_words": 42.966, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 116.60647430260462, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.1884654914558187, "eval_ag_news_runtime": 11.2505, "eval_ag_news_samples_per_second": 44.443, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.2279867938606758, "eval_ag_news_token_set_f1_sem": 0.00423510729547071, "eval_ag_news_token_set_precision": 0.18120436383489927, "eval_ag_news_token_set_recall": 0.3558759956074076, "eval_ag_news_true_num_tokens": 56.09375, "step": 7500 }, { "epoch": 1.44, "eval_anthropic_toxic_prompts_accuracy": 0.06959375, "eval_anthropic_toxic_prompts_bleu_score": 1.4314448699640545, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.056008639486394375, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.3752596378326416, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011371612220765966, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.4814229011535645, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.016, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.458, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.096, "eval_anthropic_toxic_prompts_num_pred_words": 43.096, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 88.36031125762983, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.10682182327045758, "eval_anthropic_toxic_prompts_runtime": 9.9976, "eval_anthropic_toxic_prompts_samples_per_second": 50.012, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.19416970230318364, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005311672541103707, "eval_anthropic_toxic_prompts_token_set_precision": 0.19473575142660043, "eval_anthropic_toxic_prompts_token_set_recall": 0.23870822025649782, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 7500 }, { "epoch": 1.44, "eval_arxiv_accuracy": 0.26834375, "eval_arxiv_bleu_score": 1.9023309392053043, "eval_arxiv_bleu_score_sem": 0.06088470989707018, "eval_arxiv_emb_cos_sim": 0.4109267592430115, "eval_arxiv_emb_cos_sim_sem": 0.00933169355758206, "eval_arxiv_emb_top1_equal": 0.171875, "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.517910003662109, "eval_arxiv_n_ngrams_match_1": 7.428, "eval_arxiv_n_ngrams_match_2": 0.966, "eval_arxiv_n_ngrams_match_3": 0.102, "eval_arxiv_num_pred_words": 32.916, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 91.64386234787013, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.1792532314009046, "eval_arxiv_runtime": 10.2043, "eval_arxiv_samples_per_second": 48.999, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.1857198052309753, "eval_arxiv_token_set_f1_sem": 0.004262952480010026, "eval_arxiv_token_set_precision": 0.12788930066399967, "eval_arxiv_token_set_recall": 0.4141109842090706, "eval_arxiv_true_num_tokens": 64.0, "step": 7500 }, { "epoch": 1.44, "eval_python_code_alpaca_accuracy": 0.09915625, "eval_python_code_alpaca_bleu_score": 2.104059565783548, "eval_python_code_alpaca_bleu_score_sem": 0.058495974532946075, "eval_python_code_alpaca_emb_cos_sim": 0.33410459756851196, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008442297589415015, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.495411396026611, "eval_python_code_alpaca_n_ngrams_match_1": 4.276, "eval_python_code_alpaca_n_ngrams_match_2": 0.44, "eval_python_code_alpaca_n_ngrams_match_3": 0.082, "eval_python_code_alpaca_num_pred_words": 31.144, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 89.60502455457436, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.15650192715310313, "eval_python_code_alpaca_runtime": 9.8854, "eval_python_code_alpaca_samples_per_second": 50.58, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.25125026346892704, "eval_python_code_alpaca_token_set_f1_sem": 0.004747398014444095, "eval_python_code_alpaca_token_set_precision": 0.20902072223053395, "eval_python_code_alpaca_token_set_recall": 0.3877599743416273, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 7500 }, { "epoch": 1.44, "eval_wikibio_accuracy": 0.2471875, "eval_wikibio_bleu_score": 3.337191110123847, "eval_wikibio_bleu_score_sem": 0.14538434540431933, "eval_wikibio_emb_cos_sim": 0.5329362154006958, "eval_wikibio_emb_cos_sim_sem": 0.01433505328706509, "eval_wikibio_emb_top1_equal": 0.0546875, "eval_wikibio_emb_top1_equal_sem": 0.020175758285348722, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.786613464355469, "eval_wikibio_n_ngrams_match_1": 6.854, "eval_wikibio_n_ngrams_match_2": 1.882, "eval_wikibio_n_ngrams_match_3": 0.546, "eval_wikibio_num_pred_words": 34.36, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 119.89465285764378, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2151911417248924, "eval_wikibio_runtime": 10.1195, "eval_wikibio_samples_per_second": 49.41, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.2283322086217695, "eval_wikibio_token_set_f1_sem": 0.006563763563716991, "eval_wikibio_token_set_precision": 0.2180544644561713, "eval_wikibio_token_set_recall": 0.2689410196017307, "eval_wikibio_true_num_tokens": 61.1328125, "step": 7500 }, { "epoch": 1.44, "eval_nq_accuracy": 0.40428125, "eval_nq_bleu_score": 5.407400379278418, "eval_nq_bleu_score_sem": 0.2571898131032587, "eval_nq_emb_cos_sim": 0.5908706784248352, "eval_nq_emb_cos_sim_sem": 0.012227750374190868, "eval_nq_emb_top1_equal": 0.125, "eval_nq_emb_top1_equal_sem": 0.02934655822437397, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.2414193153381348, "eval_nq_n_ngrams_match_1": 15.792, "eval_nq_n_ngrams_match_2": 4.186, "eval_nq_n_ngrams_match_3": 1.412, "eval_nq_num_pred_words": 47.138, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 25.569987880718834, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.2855418074436533, "eval_nq_runtime": 10.5726, "eval_nq_samples_per_second": 47.292, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.32821161027984797, "eval_nq_token_set_f1_sem": 0.005102521485572631, "eval_nq_token_set_precision": 0.26286134082350254, "eval_nq_token_set_recall": 0.476825728387506, "eval_nq_true_num_tokens": 64.0, "step": 7500 }, { "epoch": 1.44, "learning_rate": 0.001, "loss": 3.5943, "step": 7512 }, { "epoch": 1.44, "learning_rate": 0.001, "loss": 3.585, "step": 7524 }, { "epoch": 1.45, "learning_rate": 0.001, "loss": 3.5871, "step": 7536 }, { "epoch": 1.45, "learning_rate": 0.001, "loss": 3.5742, "step": 7548 }, { "epoch": 1.45, "learning_rate": 0.001, "loss": 3.5853, "step": 7560 }, { "epoch": 1.45, "learning_rate": 0.001, "loss": 3.584, "step": 7572 }, { "epoch": 1.46, "learning_rate": 0.001, "loss": 3.5859, "step": 7584 }, { "epoch": 1.46, "learning_rate": 0.001, "loss": 3.5973, "step": 7596 }, { "epoch": 1.46, "learning_rate": 0.001, "loss": 3.584, "step": 7608 }, { "epoch": 1.46, "learning_rate": 0.001, "loss": 3.5711, "step": 7620 }, { "epoch": 1.47, "learning_rate": 0.001, "loss": 3.5689, "step": 7632 }, { "epoch": 1.47, "learning_rate": 0.001, "loss": 3.5813, "step": 7644 }, { "epoch": 1.47, "learning_rate": 0.001, "loss": 3.582, "step": 7656 }, { "epoch": 1.47, "learning_rate": 0.001, "loss": 3.586, "step": 7668 }, { "epoch": 1.47, "learning_rate": 0.001, "loss": 3.5783, "step": 7680 }, { "epoch": 1.48, "learning_rate": 0.001, "loss": 3.5728, "step": 7692 }, { "epoch": 1.48, "learning_rate": 0.001, "loss": 3.5719, "step": 7704 }, { "epoch": 1.48, "learning_rate": 0.001, "loss": 3.5842, "step": 7716 }, { "epoch": 1.48, "learning_rate": 0.001, "loss": 3.5801, "step": 7728 }, { "epoch": 1.49, "learning_rate": 0.001, "loss": 3.5589, "step": 7740 }, { "epoch": 1.49, "learning_rate": 0.001, "loss": 3.573, "step": 7752 }, { "epoch": 1.49, "learning_rate": 0.001, "loss": 3.5721, "step": 7764 }, { "epoch": 1.49, "learning_rate": 0.001, "loss": 3.5748, "step": 7776 }, { "epoch": 1.5, "learning_rate": 0.001, "loss": 3.5474, "step": 7788 }, { "epoch": 1.5, "learning_rate": 0.001, "loss": 3.5707, "step": 7800 }, { "epoch": 1.5, "learning_rate": 0.001, "loss": 3.5677, "step": 7812 }, { "epoch": 1.5, "learning_rate": 0.001, "loss": 3.5741, "step": 7824 }, { "epoch": 1.5, "learning_rate": 0.001, "loss": 3.5701, "step": 7836 }, { "epoch": 1.51, "learning_rate": 0.001, "loss": 3.5639, "step": 7848 }, { "epoch": 1.51, "learning_rate": 0.001, "loss": 3.5625, "step": 7860 }, { "epoch": 1.51, "learning_rate": 0.001, "loss": 3.5553, "step": 7872 }, { "epoch": 1.51, "learning_rate": 0.001, "loss": 3.5686, "step": 7884 }, { "epoch": 1.52, "learning_rate": 0.001, "loss": 3.5667, "step": 7896 }, { "epoch": 1.52, "learning_rate": 0.001, "loss": 3.556, "step": 7908 }, { "epoch": 1.52, "learning_rate": 0.001, "loss": 3.5595, "step": 7920 }, { "epoch": 1.52, "learning_rate": 0.001, "loss": 3.5614, "step": 7932 }, { "epoch": 1.53, "learning_rate": 0.001, "loss": 3.5585, "step": 7944 }, { "epoch": 1.53, "learning_rate": 0.001, "loss": 3.5488, "step": 7956 }, { "epoch": 1.53, "learning_rate": 0.001, "loss": 3.5476, "step": 7968 }, { "epoch": 1.53, "learning_rate": 0.001, "loss": 3.5541, "step": 7980 }, { "epoch": 1.53, "learning_rate": 0.001, "loss": 3.5543, "step": 7992 }, { "epoch": 1.54, "learning_rate": 0.001, "loss": 3.5492, "step": 8004 }, { "epoch": 1.54, "learning_rate": 0.001, "loss": 3.5435, "step": 8016 }, { "epoch": 1.54, "learning_rate": 0.001, "loss": 3.5357, "step": 8028 }, { "epoch": 1.54, "learning_rate": 0.001, "loss": 3.5477, "step": 8040 }, { "epoch": 1.55, "learning_rate": 0.001, "loss": 3.5515, "step": 8052 }, { "epoch": 1.55, "learning_rate": 0.001, "loss": 3.5559, "step": 8064 }, { "epoch": 1.55, "learning_rate": 0.001, "loss": 3.5472, "step": 8076 }, { "epoch": 1.55, "learning_rate": 0.001, "loss": 3.5544, "step": 8088 }, { "epoch": 1.56, "learning_rate": 0.001, "loss": 3.5394, "step": 8100 }, { "epoch": 1.56, "learning_rate": 0.001, "loss": 3.5457, "step": 8112 }, { "epoch": 1.56, "learning_rate": 0.001, "loss": 3.5442, "step": 8124 }, { "epoch": 1.56, "eval_ag_news_accuracy": 0.2358125, "eval_ag_news_bleu_score": 2.4883831524173123, "eval_ag_news_bleu_score_sem": 0.08879235078149196, "eval_ag_news_emb_cos_sim": 0.535577654838562, "eval_ag_news_emb_cos_sim_sem": 0.01108892408818029, "eval_ag_news_emb_top1_equal": 0.1015625, "eval_ag_news_emb_top1_equal_sem": 0.026804565886848545, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.701244354248047, "eval_ag_news_n_ngrams_match_1": 8.354, "eval_ag_news_n_ngrams_match_2": 1.29, "eval_ag_news_n_ngrams_match_3": 0.264, "eval_ag_news_num_pred_words": 44.442, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 110.08407084060602, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.19311281601745423, "eval_ag_news_runtime": 11.1378, "eval_ag_news_samples_per_second": 44.892, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.2328350888237543, "eval_ag_news_token_set_f1_sem": 0.004361067956988328, "eval_ag_news_token_set_precision": 0.18868677528427935, "eval_ag_news_token_set_recall": 0.3524863948893839, "eval_ag_news_true_num_tokens": 56.09375, "step": 8125 }, { "epoch": 1.56, "eval_anthropic_toxic_prompts_accuracy": 0.0703125, "eval_anthropic_toxic_prompts_bleu_score": 1.545958566368486, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0686130866006481, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.3970567286014557, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011643941448241076, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.448799133300781, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.144, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.566, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.13, "eval_anthropic_toxic_prompts_num_pred_words": 43.016, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 85.52417917237305, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.11145307880516193, "eval_anthropic_toxic_prompts_runtime": 9.9448, "eval_anthropic_toxic_prompts_samples_per_second": 50.278, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.20310301259868394, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005631626723020884, "eval_anthropic_toxic_prompts_token_set_precision": 0.20265703593220222, "eval_anthropic_toxic_prompts_token_set_recall": 0.2510094551800592, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 8125 }, { "epoch": 1.56, "eval_arxiv_accuracy": 0.2689375, "eval_arxiv_bleu_score": 1.9417341197111846, "eval_arxiv_bleu_score_sem": 0.06158943468102233, "eval_arxiv_emb_cos_sim": 0.4218406677246094, "eval_arxiv_emb_cos_sim_sem": 0.009850202527209301, "eval_arxiv_emb_top1_equal": 0.1171875, "eval_arxiv_emb_top1_equal_sem": 0.02854125312152025, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.472835540771484, "eval_arxiv_n_ngrams_match_1": 7.666, "eval_arxiv_n_ngrams_match_2": 1.002, "eval_arxiv_n_ngrams_match_3": 0.1, "eval_arxiv_num_pred_words": 34.67, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 87.60477808213642, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.18041833384422198, "eval_arxiv_runtime": 10.4142, "eval_arxiv_samples_per_second": 48.011, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.18937989286851253, "eval_arxiv_token_set_f1_sem": 0.004361374369966269, "eval_arxiv_token_set_precision": 0.13118151385965218, "eval_arxiv_token_set_recall": 0.41357298590123737, "eval_arxiv_true_num_tokens": 64.0, "step": 8125 }, { "epoch": 1.56, "eval_python_code_alpaca_accuracy": 0.10209375, "eval_python_code_alpaca_bleu_score": 2.0361606743426623, "eval_python_code_alpaca_bleu_score_sem": 0.059958079090687615, "eval_python_code_alpaca_emb_cos_sim": 0.31711506843566895, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008342725659808028, "eval_python_code_alpaca_emb_top1_equal": 0.0, "eval_python_code_alpaca_emb_top1_equal_sem": 0.0, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.409636497497559, "eval_python_code_alpaca_n_ngrams_match_1": 4.076, "eval_python_code_alpaca_n_ngrams_match_2": 0.464, "eval_python_code_alpaca_n_ngrams_match_3": 0.098, "eval_python_code_alpaca_num_pred_words": 31.592, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 82.23956378298436, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.1523855505706459, "eval_python_code_alpaca_runtime": 9.7335, "eval_python_code_alpaca_samples_per_second": 51.369, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.2411080441921459, "eval_python_code_alpaca_token_set_f1_sem": 0.005053151733454404, "eval_python_code_alpaca_token_set_precision": 0.1960890074593007, "eval_python_code_alpaca_token_set_recall": 0.39624581528722413, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 8125 }, { "epoch": 1.56, "eval_wikibio_accuracy": 0.24809375, "eval_wikibio_bleu_score": 3.456584045194845, "eval_wikibio_bleu_score_sem": 0.14630321622658643, "eval_wikibio_emb_cos_sim": 0.5349158644676208, "eval_wikibio_emb_cos_sim_sem": 0.014574552753728248, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.757209777832031, "eval_wikibio_n_ngrams_match_1": 7.354, "eval_wikibio_n_ngrams_match_2": 2.136, "eval_wikibio_n_ngrams_match_3": 0.666, "eval_wikibio_num_pred_words": 35.106, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 116.4206328594945, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.22421175876502936, "eval_wikibio_runtime": 10.2578, "eval_wikibio_samples_per_second": 48.743, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.24044633626780249, "eval_wikibio_token_set_f1_sem": 0.006852145991997033, "eval_wikibio_token_set_precision": 0.23076434597942477, "eval_wikibio_token_set_recall": 0.26870808290421594, "eval_wikibio_true_num_tokens": 61.1328125, "step": 8125 }, { "epoch": 1.56, "eval_nq_accuracy": 0.4089375, "eval_nq_bleu_score": 5.701252720797398, "eval_nq_bleu_score_sem": 0.26315345818436453, "eval_nq_emb_cos_sim": 0.6082754731178284, "eval_nq_emb_cos_sim_sem": 0.011521773539476187, "eval_nq_emb_top1_equal": 0.0390625, "eval_nq_emb_top1_equal_sem": 0.017191973462108996, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.189636468887329, "eval_nq_n_ngrams_match_1": 16.084, "eval_nq_n_ngrams_match_2": 4.454, "eval_nq_n_ngrams_match_3": 1.532, "eval_nq_num_pred_words": 48.046, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 24.279599448763665, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.293869560026674, "eval_nq_runtime": 10.3857, "eval_nq_samples_per_second": 48.143, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.3357395812187461, "eval_nq_token_set_f1_sem": 0.005076817269316643, "eval_nq_token_set_precision": 0.27053565815348857, "eval_nq_token_set_recall": 0.4853697765823322, "eval_nq_true_num_tokens": 64.0, "step": 8125 }, { "epoch": 1.56, "learning_rate": 0.001, "loss": 3.5404, "step": 8136 }, { "epoch": 1.56, "learning_rate": 0.001, "loss": 3.5336, "step": 8148 }, { "epoch": 1.57, "learning_rate": 0.001, "loss": 3.5427, "step": 8160 }, { "epoch": 1.57, "learning_rate": 0.001, "loss": 3.5372, "step": 8172 }, { "epoch": 1.57, "learning_rate": 0.001, "loss": 3.5478, "step": 8184 }, { "epoch": 1.57, "learning_rate": 0.001, "loss": 3.5418, "step": 8196 }, { "epoch": 1.58, "learning_rate": 0.001, "loss": 3.5378, "step": 8208 }, { "epoch": 1.58, "learning_rate": 0.001, "loss": 3.5395, "step": 8220 }, { "epoch": 1.58, "learning_rate": 0.001, "loss": 3.5339, "step": 8232 }, { "epoch": 1.58, "learning_rate": 0.001, "loss": 3.5413, "step": 8244 }, { "epoch": 1.59, "learning_rate": 0.001, "loss": 3.5412, "step": 8256 }, { "epoch": 1.59, "learning_rate": 0.001, "loss": 3.5465, "step": 8268 }, { "epoch": 1.59, "learning_rate": 0.001, "loss": 3.5309, "step": 8280 }, { "epoch": 1.59, "learning_rate": 0.001, "loss": 3.5358, "step": 8292 }, { "epoch": 1.59, "learning_rate": 0.001, "loss": 3.5349, "step": 8304 }, { "epoch": 1.6, "learning_rate": 0.001, "loss": 3.5212, "step": 8316 }, { "epoch": 1.6, "learning_rate": 0.001, "loss": 3.5231, "step": 8328 }, { "epoch": 1.6, "learning_rate": 0.001, "loss": 3.5279, "step": 8340 }, { "epoch": 1.6, "learning_rate": 0.001, "loss": 3.5235, "step": 8352 }, { "epoch": 1.61, "learning_rate": 0.001, "loss": 3.5204, "step": 8364 }, { "epoch": 1.61, "learning_rate": 0.001, "loss": 3.5289, "step": 8376 }, { "epoch": 1.61, "learning_rate": 0.001, "loss": 3.5242, "step": 8388 }, { "epoch": 1.61, "learning_rate": 0.001, "loss": 3.5169, "step": 8400 }, { "epoch": 1.62, "learning_rate": 0.001, "loss": 3.5191, "step": 8412 }, { "epoch": 1.62, "learning_rate": 0.001, "loss": 3.5221, "step": 8424 }, { "epoch": 1.62, "learning_rate": 0.001, "loss": 3.5202, "step": 8436 }, { "epoch": 1.62, "learning_rate": 0.001, "loss": 3.5161, "step": 8448 }, { "epoch": 1.62, "learning_rate": 0.001, "loss": 3.5158, "step": 8460 }, { "epoch": 1.63, "learning_rate": 0.001, "loss": 3.5224, "step": 8472 }, { "epoch": 1.63, "learning_rate": 0.001, "loss": 3.5148, "step": 8484 }, { "epoch": 1.63, "learning_rate": 0.001, "loss": 3.5192, "step": 8496 }, { "epoch": 1.63, "learning_rate": 0.001, "loss": 3.501, "step": 8508 }, { "epoch": 1.64, "learning_rate": 0.001, "loss": 3.524, "step": 8520 }, { "epoch": 1.64, "learning_rate": 0.001, "loss": 3.5008, "step": 8532 }, { "epoch": 1.64, "learning_rate": 0.001, "loss": 3.5206, "step": 8544 }, { "epoch": 1.64, "learning_rate": 0.001, "loss": 3.5107, "step": 8556 }, { "epoch": 1.65, "learning_rate": 0.001, "loss": 3.5069, "step": 8568 }, { "epoch": 1.65, "learning_rate": 0.001, "loss": 3.5055, "step": 8580 }, { "epoch": 1.65, "learning_rate": 0.001, "loss": 3.5152, "step": 8592 }, { "epoch": 1.65, "learning_rate": 0.001, "loss": 3.5064, "step": 8604 }, { "epoch": 1.65, "learning_rate": 0.001, "loss": 3.5062, "step": 8616 }, { "epoch": 1.66, "learning_rate": 0.001, "loss": 3.4983, "step": 8628 }, { "epoch": 1.66, "learning_rate": 0.001, "loss": 3.5099, "step": 8640 }, { "epoch": 1.66, "learning_rate": 0.001, "loss": 3.4936, "step": 8652 }, { "epoch": 1.66, "learning_rate": 0.001, "loss": 3.5028, "step": 8664 }, { "epoch": 1.67, "learning_rate": 0.001, "loss": 3.5084, "step": 8676 }, { "epoch": 1.67, "learning_rate": 0.001, "loss": 3.5103, "step": 8688 }, { "epoch": 1.67, "learning_rate": 0.001, "loss": 3.5102, "step": 8700 }, { "epoch": 1.67, "learning_rate": 0.001, "loss": 3.4951, "step": 8712 }, { "epoch": 1.68, "learning_rate": 0.001, "loss": 3.5018, "step": 8724 }, { "epoch": 1.68, "learning_rate": 0.001, "loss": 3.4946, "step": 8736 }, { "epoch": 1.68, "learning_rate": 0.001, "loss": 3.4883, "step": 8748 }, { "epoch": 1.68, "eval_ag_news_accuracy": 0.23778125, "eval_ag_news_bleu_score": 2.3704854141674105, "eval_ag_news_bleu_score_sem": 0.08805929234025343, "eval_ag_news_emb_cos_sim": 0.5304872989654541, "eval_ag_news_emb_cos_sim_sem": 0.013255226003524682, "eval_ag_news_emb_top1_equal": 0.140625, "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.644641399383545, "eval_ag_news_n_ngrams_match_1": 7.906, "eval_ag_news_n_ngrams_match_2": 1.166, "eval_ag_news_n_ngrams_match_3": 0.266, "eval_ag_news_num_pred_words": 43.142, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 104.0260552884777, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.1867053586924297, "eval_ag_news_runtime": 10.3761, "eval_ag_news_samples_per_second": 48.188, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.22633767821861886, "eval_ag_news_token_set_f1_sem": 0.00459927641854774, "eval_ag_news_token_set_precision": 0.17909611571602405, "eval_ag_news_token_set_recall": 0.36599050598760197, "eval_ag_news_true_num_tokens": 56.09375, "step": 8750 }, { "epoch": 1.68, "eval_anthropic_toxic_prompts_accuracy": 0.0720625, "eval_anthropic_toxic_prompts_bleu_score": 1.5380644383019462, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06938048383491413, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.38932090997695923, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011143205277795273, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0234375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.013424676090873717, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.367114543914795, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.028, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.542, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.116, "eval_anthropic_toxic_prompts_num_pred_words": 42.348, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 78.81588350819912, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.10855016633582844, "eval_anthropic_toxic_prompts_runtime": 12.2397, "eval_anthropic_toxic_prompts_samples_per_second": 40.851, "eval_anthropic_toxic_prompts_steps_per_second": 0.082, "eval_anthropic_toxic_prompts_token_set_f1": 0.20107549111213632, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005727311625011449, "eval_anthropic_toxic_prompts_token_set_precision": 0.19489493214241732, "eval_anthropic_toxic_prompts_token_set_recall": 0.2561713415112732, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 8750 }, { "epoch": 1.68, "eval_arxiv_accuracy": 0.27196875, "eval_arxiv_bleu_score": 1.9063170645113854, "eval_arxiv_bleu_score_sem": 0.06346725348494, "eval_arxiv_emb_cos_sim": 0.4322901964187622, "eval_arxiv_emb_cos_sim_sem": 0.009643776482826055, "eval_arxiv_emb_top1_equal": 0.1640625, "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.439461708068848, "eval_arxiv_n_ngrams_match_1": 7.572, "eval_arxiv_n_ngrams_match_2": 0.976, "eval_arxiv_n_ngrams_match_3": 0.114, "eval_arxiv_num_pred_words": 33.218, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 84.72932028667496, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.17953503878779442, "eval_arxiv_runtime": 10.2275, "eval_arxiv_samples_per_second": 48.888, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.18809067298590038, "eval_arxiv_token_set_f1_sem": 0.004465375225601672, "eval_arxiv_token_set_precision": 0.12943848809496553, "eval_arxiv_token_set_recall": 0.4167102154679495, "eval_arxiv_true_num_tokens": 64.0, "step": 8750 }, { "epoch": 1.68, "eval_python_code_alpaca_accuracy": 0.1026875, "eval_python_code_alpaca_bleu_score": 2.2041862755943775, "eval_python_code_alpaca_bleu_score_sem": 0.06834122070214509, "eval_python_code_alpaca_emb_cos_sim": 0.3348689675331116, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008170157364213696, "eval_python_code_alpaca_emb_top1_equal": 0.0, "eval_python_code_alpaca_emb_top1_equal_sem": 0.0, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.387907981872559, "eval_python_code_alpaca_n_ngrams_match_1": 4.416, "eval_python_code_alpaca_n_ngrams_match_2": 0.54, "eval_python_code_alpaca_n_ngrams_match_3": 0.12, "eval_python_code_alpaca_num_pred_words": 31.672, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 80.47189410217852, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.16196784827312427, "eval_python_code_alpaca_runtime": 10.5638, "eval_python_code_alpaca_samples_per_second": 47.331, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.26282985117419794, "eval_python_code_alpaca_token_set_f1_sem": 0.0049658116580926165, "eval_python_code_alpaca_token_set_precision": 0.21515742623335407, "eval_python_code_alpaca_token_set_recall": 0.4139358274702672, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 8750 }, { "epoch": 1.68, "eval_wikibio_accuracy": 0.2573125, "eval_wikibio_bleu_score": 3.2162036203404467, "eval_wikibio_bleu_score_sem": 0.1456760587055847, "eval_wikibio_emb_cos_sim": 0.5134081840515137, "eval_wikibio_emb_cos_sim_sem": 0.015979585120378715, "eval_wikibio_emb_top1_equal": 0.0859375, "eval_wikibio_emb_top1_equal_sem": 0.02487009666300537, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.720954895019531, "eval_wikibio_n_ngrams_match_1": 6.634, "eval_wikibio_n_ngrams_match_2": 1.802, "eval_wikibio_n_ngrams_match_3": 0.514, "eval_wikibio_num_pred_words": 33.094, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 112.27541272879289, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2122320603838448, "eval_wikibio_runtime": 10.1467, "eval_wikibio_samples_per_second": 49.277, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.22346104983996015, "eval_wikibio_token_set_f1_sem": 0.00664109296140654, "eval_wikibio_token_set_precision": 0.21128450768421647, "eval_wikibio_token_set_recall": 0.26172893569192507, "eval_wikibio_true_num_tokens": 61.1328125, "step": 8750 }, { "epoch": 1.68, "eval_nq_accuracy": 0.41128125, "eval_nq_bleu_score": 5.820829272424745, "eval_nq_bleu_score_sem": 0.2595851636937686, "eval_nq_emb_cos_sim": 0.6050189137458801, "eval_nq_emb_cos_sim_sem": 0.011790530821539998, "eval_nq_emb_top1_equal": 0.140625, "eval_nq_emb_top1_equal_sem": 0.030847557647994725, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.144869327545166, "eval_nq_n_ngrams_match_1": 16.144, "eval_nq_n_ngrams_match_2": 4.418, "eval_nq_n_ngrams_match_3": 1.56, "eval_nq_num_pred_words": 47.164, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 23.216641499522527, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.2968378615393472, "eval_nq_runtime": 10.6142, "eval_nq_samples_per_second": 47.107, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.3378667476349911, "eval_nq_token_set_f1_sem": 0.00477608986858229, "eval_nq_token_set_precision": 0.272093226303916, "eval_nq_token_set_recall": 0.4856166580211932, "eval_nq_true_num_tokens": 64.0, "step": 8750 }, { "epoch": 1.68, "learning_rate": 0.001, "loss": 3.4996, "step": 8760 }, { "epoch": 1.68, "learning_rate": 0.001, "loss": 3.4984, "step": 8772 }, { "epoch": 1.69, "learning_rate": 0.001, "loss": 3.4953, "step": 8784 }, { "epoch": 1.69, "learning_rate": 0.001, "loss": 3.4944, "step": 8796 }, { "epoch": 1.69, "learning_rate": 0.001, "loss": 3.5001, "step": 8808 }, { "epoch": 1.69, "learning_rate": 0.001, "loss": 3.4954, "step": 8820 }, { "epoch": 1.7, "learning_rate": 0.001, "loss": 3.4957, "step": 8832 }, { "epoch": 1.7, "learning_rate": 0.001, "loss": 3.4775, "step": 8844 }, { "epoch": 1.7, "learning_rate": 0.001, "loss": 3.499, "step": 8856 }, { "epoch": 1.7, "learning_rate": 0.001, "loss": 3.501, "step": 8868 }, { "epoch": 1.71, "learning_rate": 0.001, "loss": 3.4837, "step": 8880 }, { "epoch": 1.71, "learning_rate": 0.001, "loss": 3.4904, "step": 8892 }, { "epoch": 1.71, "learning_rate": 0.001, "loss": 3.4952, "step": 8904 }, { "epoch": 1.71, "learning_rate": 0.001, "loss": 3.4865, "step": 8916 }, { "epoch": 1.71, "learning_rate": 0.001, "loss": 3.4925, "step": 8928 }, { "epoch": 1.72, "learning_rate": 0.001, "loss": 3.4793, "step": 8940 }, { "epoch": 1.72, "learning_rate": 0.001, "loss": 3.4854, "step": 8952 }, { "epoch": 1.72, "learning_rate": 0.001, "loss": 3.4812, "step": 8964 }, { "epoch": 1.72, "learning_rate": 0.001, "loss": 3.4835, "step": 8976 }, { "epoch": 1.73, "learning_rate": 0.001, "loss": 3.4805, "step": 8988 }, { "epoch": 1.73, "learning_rate": 0.001, "loss": 3.4922, "step": 9000 }, { "epoch": 1.73, "learning_rate": 0.001, "loss": 3.4939, "step": 9012 }, { "epoch": 1.73, "learning_rate": 0.001, "loss": 3.4814, "step": 9024 }, { "epoch": 1.74, "learning_rate": 0.001, "loss": 3.4842, "step": 9036 }, { "epoch": 1.74, "learning_rate": 0.001, "loss": 3.4645, "step": 9048 }, { "epoch": 1.74, "learning_rate": 0.001, "loss": 3.4831, "step": 9060 }, { "epoch": 1.74, "learning_rate": 0.001, "loss": 3.4698, "step": 9072 }, { "epoch": 1.74, "learning_rate": 0.001, "loss": 3.4693, "step": 9084 }, { "epoch": 1.75, "learning_rate": 0.001, "loss": 3.4736, "step": 9096 }, { "epoch": 1.75, "learning_rate": 0.001, "loss": 3.4687, "step": 9108 }, { "epoch": 1.75, "learning_rate": 0.001, "loss": 3.4786, "step": 9120 }, { "epoch": 1.75, "learning_rate": 0.001, "loss": 3.473, "step": 9132 }, { "epoch": 1.76, "learning_rate": 0.001, "loss": 3.462, "step": 9144 }, { "epoch": 1.76, "learning_rate": 0.001, "loss": 3.485, "step": 9156 }, { "epoch": 1.76, "learning_rate": 0.001, "loss": 3.4625, "step": 9168 }, { "epoch": 1.76, "learning_rate": 0.001, "loss": 3.4494, "step": 9180 }, { "epoch": 1.76, "learning_rate": 0.001, "loss": 3.4763, "step": 9192 }, { "epoch": 1.77, "learning_rate": 0.001, "loss": 3.4664, "step": 9204 }, { "epoch": 1.77, "learning_rate": 0.001, "loss": 3.4651, "step": 9216 }, { "epoch": 1.77, "learning_rate": 0.001, "loss": 3.466, "step": 9228 }, { "epoch": 1.77, "learning_rate": 0.001, "loss": 3.4596, "step": 9240 }, { "epoch": 1.78, "learning_rate": 0.001, "loss": 3.466, "step": 9252 }, { "epoch": 1.78, "learning_rate": 0.001, "loss": 3.4631, "step": 9264 }, { "epoch": 1.78, "learning_rate": 0.001, "loss": 3.4606, "step": 9276 }, { "epoch": 1.78, "learning_rate": 0.001, "loss": 3.4501, "step": 9288 }, { "epoch": 1.79, "learning_rate": 0.001, "loss": 3.463, "step": 9300 }, { "epoch": 1.79, "learning_rate": 0.001, "loss": 3.4535, "step": 9312 }, { "epoch": 1.79, "learning_rate": 0.001, "loss": 3.4639, "step": 9324 }, { "epoch": 1.79, "learning_rate": 0.001, "loss": 3.469, "step": 9336 }, { "epoch": 1.79, "learning_rate": 0.001, "loss": 3.4589, "step": 9348 }, { "epoch": 1.8, "learning_rate": 0.001, "loss": 3.4613, "step": 9360 }, { "epoch": 1.8, "learning_rate": 0.001, "loss": 3.4576, "step": 9372 }, { "epoch": 1.8, "eval_ag_news_accuracy": 0.2413125, "eval_ag_news_bleu_score": 2.5544844559070983, "eval_ag_news_bleu_score_sem": 0.08223989162897455, "eval_ag_news_emb_cos_sim": 0.5481958389282227, "eval_ag_news_emb_cos_sim_sem": 0.011590007970126309, "eval_ag_news_emb_top1_equal": 0.03125, "eval_ag_news_emb_top1_equal_sem": 0.015439349450344106, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.605099678039551, "eval_ag_news_n_ngrams_match_1": 8.48, "eval_ag_news_n_ngrams_match_2": 1.362, "eval_ag_news_n_ngrams_match_3": 0.29, "eval_ag_news_num_pred_words": 43.434, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 99.99294945370863, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.20146325608356472, "eval_ag_news_runtime": 11.8026, "eval_ag_news_samples_per_second": 42.363, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.23863517867807965, "eval_ag_news_token_set_f1_sem": 0.0043812817770934155, "eval_ag_news_token_set_precision": 0.19362816222352633, "eval_ag_news_token_set_recall": 0.36013605266415294, "eval_ag_news_true_num_tokens": 56.09375, "step": 9375 }, { "epoch": 1.8, "eval_anthropic_toxic_prompts_accuracy": 0.072875, "eval_anthropic_toxic_prompts_bleu_score": 1.5905957428879907, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06562406450273717, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4287947714328766, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012074247600647145, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.347011566162109, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.15, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.554, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.128, "eval_anthropic_toxic_prompts_num_pred_words": 42.386, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 77.24726929044871, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1139319311985194, "eval_anthropic_toxic_prompts_runtime": 10.4937, "eval_anthropic_toxic_prompts_samples_per_second": 47.648, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.20623150340487895, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005574846580783877, "eval_anthropic_toxic_prompts_token_set_precision": 0.20602393665230287, "eval_anthropic_toxic_prompts_token_set_recall": 0.25728422074732116, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 9375 }, { "epoch": 1.8, "eval_arxiv_accuracy": 0.27428125, "eval_arxiv_bleu_score": 2.024296253189745, "eval_arxiv_bleu_score_sem": 0.06256252374453004, "eval_arxiv_emb_cos_sim": 0.44694453477859497, "eval_arxiv_emb_cos_sim_sem": 0.009536469289428303, "eval_arxiv_emb_top1_equal": 0.1796875, "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.406665802001953, "eval_arxiv_n_ngrams_match_1": 8.296, "eval_arxiv_n_ngrams_match_2": 1.064, "eval_arxiv_n_ngrams_match_3": 0.114, "eval_arxiv_num_pred_words": 34.622, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 81.99561760560155, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.19660889423345324, "eval_arxiv_runtime": 11.235, "eval_arxiv_samples_per_second": 44.504, "eval_arxiv_steps_per_second": 0.089, "eval_arxiv_token_set_f1": 0.2050662739932027, "eval_arxiv_token_set_f1_sem": 0.004335582554494625, "eval_arxiv_token_set_precision": 0.14419046096994229, "eval_arxiv_token_set_recall": 0.41716972507487776, "eval_arxiv_true_num_tokens": 64.0, "step": 9375 }, { "epoch": 1.8, "eval_python_code_alpaca_accuracy": 0.10359375, "eval_python_code_alpaca_bleu_score": 2.2027003126845086, "eval_python_code_alpaca_bleu_score_sem": 0.06224611498459122, "eval_python_code_alpaca_emb_cos_sim": 0.359014630317688, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009481825241789421, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.309894561767578, "eval_python_code_alpaca_n_ngrams_match_1": 4.75, "eval_python_code_alpaca_n_ngrams_match_2": 0.61, "eval_python_code_alpaca_n_ngrams_match_3": 0.134, "eval_python_code_alpaca_num_pred_words": 34.37, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 74.43264048054209, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.1742315144054959, "eval_python_code_alpaca_runtime": 10.8202, "eval_python_code_alpaca_samples_per_second": 46.21, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.2747307998613838, "eval_python_code_alpaca_token_set_f1_sem": 0.0048689995867937695, "eval_python_code_alpaca_token_set_precision": 0.2353429153331676, "eval_python_code_alpaca_token_set_recall": 0.39477668375484204, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 9375 }, { "epoch": 1.8, "eval_wikibio_accuracy": 0.252375, "eval_wikibio_bleu_score": 3.754466439365244, "eval_wikibio_bleu_score_sem": 0.14811967447223032, "eval_wikibio_emb_cos_sim": 0.5836482048034668, "eval_wikibio_emb_cos_sim_sem": 0.013556932901500902, "eval_wikibio_emb_top1_equal": 0.1328125, "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.745633602142334, "eval_wikibio_n_ngrams_match_1": 7.97, "eval_wikibio_n_ngrams_match_2": 2.228, "eval_wikibio_n_ngrams_match_3": 0.65, "eval_wikibio_num_pred_words": 36.424, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 115.0806977849757, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2555147154181383, "eval_wikibio_runtime": 10.875, "eval_wikibio_samples_per_second": 45.977, "eval_wikibio_steps_per_second": 0.092, "eval_wikibio_token_set_f1": 0.2584773880953907, "eval_wikibio_token_set_f1_sem": 0.006084545337543673, "eval_wikibio_token_set_precision": 0.2550555861799195, "eval_wikibio_token_set_recall": 0.28019213217280126, "eval_wikibio_true_num_tokens": 61.1328125, "step": 9375 }, { "epoch": 1.8, "eval_nq_accuracy": 0.417125, "eval_nq_bleu_score": 6.20073225862314, "eval_nq_bleu_score_sem": 0.266347373932815, "eval_nq_emb_cos_sim": 0.634925127029419, "eval_nq_emb_cos_sim_sem": 0.01126875942242021, "eval_nq_emb_top1_equal": 0.109375, "eval_nq_emb_top1_equal_sem": 0.027695207821224692, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.1027681827545166, "eval_nq_n_ngrams_match_1": 16.838, "eval_nq_n_ngrams_match_2": 4.704, "eval_nq_n_ngrams_match_3": 1.672, "eval_nq_num_pred_words": 47.424, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 22.259484395523963, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3107978405072947, "eval_nq_runtime": 11.5849, "eval_nq_samples_per_second": 43.16, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.3496079670088032, "eval_nq_token_set_f1_sem": 0.00473141815781879, "eval_nq_token_set_precision": 0.2838536832792571, "eval_nq_token_set_recall": 0.48512943651616086, "eval_nq_true_num_tokens": 64.0, "step": 9375 }, { "epoch": 1.8, "learning_rate": 0.001, "loss": 3.4419, "step": 9384 }, { "epoch": 1.8, "learning_rate": 0.001, "loss": 3.4663, "step": 9396 }, { "epoch": 1.81, "learning_rate": 0.001, "loss": 3.4548, "step": 9408 }, { "epoch": 1.81, "learning_rate": 0.001, "loss": 3.4706, "step": 9420 }, { "epoch": 1.81, "learning_rate": 0.001, "loss": 3.4466, "step": 9432 }, { "epoch": 1.81, "learning_rate": 0.001, "loss": 3.4504, "step": 9444 }, { "epoch": 1.82, "learning_rate": 0.001, "loss": 3.4421, "step": 9456 }, { "epoch": 1.82, "learning_rate": 0.001, "loss": 3.4526, "step": 9468 }, { "epoch": 1.82, "learning_rate": 0.001, "loss": 3.4514, "step": 9480 }, { "epoch": 1.82, "learning_rate": 0.001, "loss": 3.4362, "step": 9492 }, { "epoch": 1.82, "learning_rate": 0.001, "loss": 3.4481, "step": 9504 }, { "epoch": 1.83, "learning_rate": 0.001, "loss": 3.4493, "step": 9516 }, { "epoch": 1.83, "learning_rate": 0.001, "loss": 3.4461, "step": 9528 }, { "epoch": 1.83, "learning_rate": 0.001, "loss": 3.4476, "step": 9540 }, { "epoch": 1.83, "learning_rate": 0.001, "loss": 3.4497, "step": 9552 }, { "epoch": 1.84, "learning_rate": 0.001, "loss": 3.4436, "step": 9564 }, { "epoch": 1.84, "learning_rate": 0.001, "loss": 3.4514, "step": 9576 }, { "epoch": 1.84, "learning_rate": 0.001, "loss": 3.452, "step": 9588 }, { "epoch": 1.84, "learning_rate": 0.001, "loss": 3.4462, "step": 9600 }, { "epoch": 1.85, "learning_rate": 0.001, "loss": 3.4447, "step": 9612 }, { "epoch": 1.85, "learning_rate": 0.001, "loss": 3.4387, "step": 9624 }, { "epoch": 1.85, "learning_rate": 0.001, "loss": 3.445, "step": 9636 }, { "epoch": 1.85, "learning_rate": 0.001, "loss": 3.4333, "step": 9648 }, { "epoch": 1.85, "learning_rate": 0.001, "loss": 3.4435, "step": 9660 }, { "epoch": 1.86, "learning_rate": 0.001, "loss": 3.4408, "step": 9672 }, { "epoch": 1.86, "learning_rate": 0.001, "loss": 3.4445, "step": 9684 }, { "epoch": 1.86, "learning_rate": 0.001, "loss": 3.4346, "step": 9696 }, { "epoch": 1.86, "learning_rate": 0.001, "loss": 3.4328, "step": 9708 }, { "epoch": 1.87, "learning_rate": 0.001, "loss": 3.4498, "step": 9720 }, { "epoch": 1.87, "learning_rate": 0.001, "loss": 3.4271, "step": 9732 }, { "epoch": 1.87, "learning_rate": 0.001, "loss": 3.4355, "step": 9744 }, { "epoch": 1.87, "learning_rate": 0.001, "loss": 3.4384, "step": 9756 }, { "epoch": 1.88, "learning_rate": 0.001, "loss": 3.4385, "step": 9768 }, { "epoch": 1.88, "learning_rate": 0.001, "loss": 3.4475, "step": 9780 }, { "epoch": 1.88, "learning_rate": 0.001, "loss": 3.4299, "step": 9792 }, { "epoch": 1.88, "learning_rate": 0.001, "loss": 3.4292, "step": 9804 }, { "epoch": 1.88, "learning_rate": 0.001, "loss": 3.4305, "step": 9816 }, { "epoch": 1.89, "learning_rate": 0.001, "loss": 3.4308, "step": 9828 }, { "epoch": 1.89, "learning_rate": 0.001, "loss": 3.4247, "step": 9840 }, { "epoch": 1.89, "learning_rate": 0.001, "loss": 3.4309, "step": 9852 }, { "epoch": 1.89, "learning_rate": 0.001, "loss": 3.4209, "step": 9864 }, { "epoch": 1.9, "learning_rate": 0.001, "loss": 3.4368, "step": 9876 }, { "epoch": 1.9, "learning_rate": 0.001, "loss": 3.4361, "step": 9888 }, { "epoch": 1.9, "learning_rate": 0.001, "loss": 3.4188, "step": 9900 }, { "epoch": 1.9, "learning_rate": 0.001, "loss": 3.424, "step": 9912 }, { "epoch": 1.91, "learning_rate": 0.001, "loss": 3.4236, "step": 9924 }, { "epoch": 1.91, "learning_rate": 0.001, "loss": 3.4265, "step": 9936 }, { "epoch": 1.91, "learning_rate": 0.001, "loss": 3.4117, "step": 9948 }, { "epoch": 1.91, "learning_rate": 0.001, "loss": 3.4115, "step": 9960 }, { "epoch": 1.91, "learning_rate": 0.001, "loss": 3.4166, "step": 9972 }, { "epoch": 1.92, "learning_rate": 0.001, "loss": 3.4291, "step": 9984 }, { "epoch": 1.92, "learning_rate": 0.001, "loss": 3.4302, "step": 9996 }, { "epoch": 1.92, "eval_ag_news_accuracy": 0.2426875, "eval_ag_news_bleu_score": 2.562018860710131, "eval_ag_news_bleu_score_sem": 0.08929048573768676, "eval_ag_news_emb_cos_sim": 0.557389497756958, "eval_ag_news_emb_cos_sim_sem": 0.01211784983252068, "eval_ag_news_emb_top1_equal": 0.078125, "eval_ag_news_emb_top1_equal_sem": 0.023813825516515504, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.576404094696045, "eval_ag_news_n_ngrams_match_1": 8.74, "eval_ag_news_n_ngrams_match_2": 1.382, "eval_ag_news_n_ngrams_match_3": 0.298, "eval_ag_news_num_pred_words": 44.534, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 97.16437138134575, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2046196696514389, "eval_ag_news_runtime": 11.0855, "eval_ag_news_samples_per_second": 45.104, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.24116201266850124, "eval_ag_news_token_set_f1_sem": 0.0043896089504334036, "eval_ag_news_token_set_precision": 0.19924948641894807, "eval_ag_news_token_set_recall": 0.346416504838885, "eval_ag_news_true_num_tokens": 56.09375, "step": 10000 }, { "epoch": 1.92, "eval_anthropic_toxic_prompts_accuracy": 0.073375, "eval_anthropic_toxic_prompts_bleu_score": 1.5396786676637733, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.05939510111961594, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4273219406604767, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01240072367465761, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.297293663024902, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.308, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.592, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.124, "eval_anthropic_toxic_prompts_num_pred_words": 44.402, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 73.50060687775462, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.11638285444199434, "eval_anthropic_toxic_prompts_runtime": 9.8467, "eval_anthropic_toxic_prompts_samples_per_second": 50.778, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.21663816968739305, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005421078331209499, "eval_anthropic_toxic_prompts_token_set_precision": 0.21744291581774905, "eval_anthropic_toxic_prompts_token_set_recall": 0.2613636287290501, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 10000 }, { "epoch": 1.92, "eval_arxiv_accuracy": 0.27715625, "eval_arxiv_bleu_score": 2.1180178192309875, "eval_arxiv_bleu_score_sem": 0.06668480554139618, "eval_arxiv_emb_cos_sim": 0.45609450340270996, "eval_arxiv_emb_cos_sim_sem": 0.010038108982939059, "eval_arxiv_emb_top1_equal": 0.140625, "eval_arxiv_emb_top1_equal_sem": 0.030847557647994725, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.365558624267578, "eval_arxiv_n_ngrams_match_1": 8.536, "eval_arxiv_n_ngrams_match_2": 1.09, "eval_arxiv_n_ngrams_match_3": 0.14, "eval_arxiv_num_pred_words": 36.128, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 78.6933476792083, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.20092655363184458, "eval_arxiv_runtime": 10.6599, "eval_arxiv_samples_per_second": 46.905, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.20857388365008178, "eval_arxiv_token_set_f1_sem": 0.004363607045164412, "eval_arxiv_token_set_precision": 0.14981453338106698, "eval_arxiv_token_set_recall": 0.3936188877012378, "eval_arxiv_true_num_tokens": 64.0, "step": 10000 }, { "epoch": 1.92, "eval_python_code_alpaca_accuracy": 0.106625, "eval_python_code_alpaca_bleu_score": 2.306136867673515, "eval_python_code_alpaca_bleu_score_sem": 0.06733216200187893, "eval_python_code_alpaca_emb_cos_sim": 0.36762118339538574, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009133783810776045, "eval_python_code_alpaca_emb_top1_equal": 0.0234375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.2383131980896, "eval_python_code_alpaca_n_ngrams_match_1": 4.888, "eval_python_code_alpaca_n_ngrams_match_2": 0.696, "eval_python_code_alpaca_n_ngrams_match_3": 0.156, "eval_python_code_alpaca_num_pred_words": 33.856, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 69.29087322928436, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.17943855514448526, "eval_python_code_alpaca_runtime": 10.1289, "eval_python_code_alpaca_samples_per_second": 49.364, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.27891009379117004, "eval_python_code_alpaca_token_set_f1_sem": 0.004791397788533149, "eval_python_code_alpaca_token_set_precision": 0.2380736863803701, "eval_python_code_alpaca_token_set_recall": 0.4021618677653544, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 10000 }, { "epoch": 1.92, "eval_wikibio_accuracy": 0.2575625, "eval_wikibio_bleu_score": 4.0528717657567555, "eval_wikibio_bleu_score_sem": 0.15489019946329716, "eval_wikibio_emb_cos_sim": 0.6026620864868164, "eval_wikibio_emb_cos_sim_sem": 0.011716515687198803, "eval_wikibio_emb_top1_equal": 0.046875, "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.687716960906982, "eval_wikibio_n_ngrams_match_1": 8.38, "eval_wikibio_n_ngrams_match_2": 2.432, "eval_wikibio_n_ngrams_match_3": 0.738, "eval_wikibio_num_pred_words": 37.304, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 108.60494721269654, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2670667623430598, "eval_wikibio_runtime": 10.2666, "eval_wikibio_samples_per_second": 48.702, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.27028002450450317, "eval_wikibio_token_set_f1_sem": 0.005603489513180497, "eval_wikibio_token_set_precision": 0.26877049911201645, "eval_wikibio_token_set_recall": 0.2900711922225668, "eval_wikibio_true_num_tokens": 61.1328125, "step": 10000 }, { "epoch": 1.92, "eval_nq_accuracy": 0.42325, "eval_nq_bleu_score": 6.175180435775883, "eval_nq_bleu_score_sem": 0.2765888910925053, "eval_nq_emb_cos_sim": 0.62808758020401, "eval_nq_emb_cos_sim_sem": 0.011437373158244104, "eval_nq_emb_top1_equal": 0.1171875, "eval_nq_emb_top1_equal_sem": 0.02854125312152025, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.0615806579589844, "eval_nq_n_ngrams_match_1": 16.964, "eval_nq_n_ngrams_match_2": 4.73, "eval_nq_n_ngrams_match_3": 1.672, "eval_nq_num_pred_words": 47.94, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 21.361295392277313, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3126946565468729, "eval_nq_runtime": 10.6225, "eval_nq_samples_per_second": 47.07, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.34982885493782756, "eval_nq_token_set_f1_sem": 0.0048272711436809015, "eval_nq_token_set_precision": 0.2884604033974762, "eval_nq_token_set_recall": 0.47479353318137724, "eval_nq_true_num_tokens": 64.0, "step": 10000 }, { "epoch": 1.92, "learning_rate": 0.001, "loss": 3.431, "step": 10008 }, { "epoch": 1.92, "learning_rate": 0.001, "loss": 3.4281, "step": 10020 }, { "epoch": 1.93, "learning_rate": 0.001, "loss": 3.4155, "step": 10032 }, { "epoch": 1.93, "learning_rate": 0.001, "loss": 3.4156, "step": 10044 }, { "epoch": 1.93, "learning_rate": 0.001, "loss": 3.4122, "step": 10056 }, { "epoch": 1.93, "learning_rate": 0.001, "loss": 3.4074, "step": 10068 }, { "epoch": 1.94, "learning_rate": 0.001, "loss": 3.4036, "step": 10080 }, { "epoch": 1.94, "learning_rate": 0.001, "loss": 3.4091, "step": 10092 }, { "epoch": 1.94, "learning_rate": 0.001, "loss": 3.4035, "step": 10104 }, { "epoch": 1.94, "learning_rate": 0.001, "loss": 3.4123, "step": 10116 }, { "epoch": 1.94, "learning_rate": 0.001, "loss": 3.4017, "step": 10128 }, { "epoch": 1.95, "learning_rate": 0.001, "loss": 3.4128, "step": 10140 }, { "epoch": 1.95, "learning_rate": 0.001, "loss": 3.3969, "step": 10152 }, { "epoch": 1.95, "learning_rate": 0.001, "loss": 3.4173, "step": 10164 }, { "epoch": 1.95, "learning_rate": 0.001, "loss": 3.4068, "step": 10176 }, { "epoch": 1.96, "learning_rate": 0.001, "loss": 3.4229, "step": 10188 }, { "epoch": 1.96, "learning_rate": 0.001, "loss": 3.4106, "step": 10200 }, { "epoch": 1.96, "learning_rate": 0.001, "loss": 3.4135, "step": 10212 }, { "epoch": 1.96, "learning_rate": 0.001, "loss": 3.4101, "step": 10224 }, { "epoch": 1.97, "learning_rate": 0.001, "loss": 3.4037, "step": 10236 }, { "epoch": 1.97, "learning_rate": 0.001, "loss": 3.3929, "step": 10248 }, { "epoch": 1.97, "learning_rate": 0.001, "loss": 3.4174, "step": 10260 }, { "epoch": 1.97, "learning_rate": 0.001, "loss": 3.4023, "step": 10272 }, { "epoch": 1.97, "learning_rate": 0.001, "loss": 3.4014, "step": 10284 }, { "epoch": 1.98, "learning_rate": 0.001, "loss": 3.4067, "step": 10296 }, { "epoch": 1.98, "learning_rate": 0.001, "loss": 3.4152, "step": 10308 }, { "epoch": 1.98, "learning_rate": 0.001, "loss": 3.404, "step": 10320 }, { "epoch": 1.98, "learning_rate": 0.001, "loss": 3.3939, "step": 10332 }, { "epoch": 1.99, "learning_rate": 0.001, "loss": 3.3946, "step": 10344 }, { "epoch": 1.99, "learning_rate": 0.001, "loss": 3.3971, "step": 10356 }, { "epoch": 1.99, "learning_rate": 0.001, "loss": 3.4069, "step": 10368 }, { "epoch": 1.99, "learning_rate": 0.001, "loss": 3.4033, "step": 10380 }, { "epoch": 2.0, "learning_rate": 0.001, "loss": 3.4036, "step": 10392 }, { "epoch": 2.0, "learning_rate": 0.001, "loss": 3.3982, "step": 10404 }, { "epoch": 2.0, "learning_rate": 0.001, "loss": 3.3945, "step": 10416 }, { "epoch": 2.0, "learning_rate": 0.001, "loss": 3.3884, "step": 10428 }, { "epoch": 2.0, "learning_rate": 0.001, "loss": 3.379, "step": 10440 }, { "epoch": 2.01, "learning_rate": 0.001, "loss": 3.3761, "step": 10452 }, { "epoch": 2.01, "learning_rate": 0.001, "loss": 3.3945, "step": 10464 }, { "epoch": 2.01, "learning_rate": 0.001, "loss": 3.3764, "step": 10476 }, { "epoch": 2.01, "learning_rate": 0.001, "loss": 3.3722, "step": 10488 }, { "epoch": 2.02, "learning_rate": 0.001, "loss": 3.3789, "step": 10500 }, { "epoch": 2.02, "learning_rate": 0.001, "loss": 3.3745, "step": 10512 }, { "epoch": 2.02, "learning_rate": 0.001, "loss": 3.3741, "step": 10524 }, { "epoch": 2.02, "learning_rate": 0.001, "loss": 3.3759, "step": 10536 }, { "epoch": 2.03, "learning_rate": 0.001, "loss": 3.3591, "step": 10548 }, { "epoch": 2.03, "learning_rate": 0.001, "loss": 3.3719, "step": 10560 }, { "epoch": 2.03, "learning_rate": 0.001, "loss": 3.3824, "step": 10572 }, { "epoch": 2.03, "learning_rate": 0.001, "loss": 3.3717, "step": 10584 }, { "epoch": 2.03, "learning_rate": 0.001, "loss": 3.3741, "step": 10596 }, { "epoch": 2.04, "learning_rate": 0.001, "loss": 3.3633, "step": 10608 }, { "epoch": 2.04, "learning_rate": 0.001, "loss": 3.378, "step": 10620 }, { "epoch": 2.04, "eval_ag_news_accuracy": 0.2460625, "eval_ag_news_bleu_score": 2.5823070923245934, "eval_ag_news_bleu_score_sem": 0.09557406089950654, "eval_ag_news_emb_cos_sim": 0.5710728168487549, "eval_ag_news_emb_cos_sim_sem": 0.012378251501867207, "eval_ag_news_emb_top1_equal": 0.109375, "eval_ag_news_emb_top1_equal_sem": 0.027695207821224692, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.5237579345703125, "eval_ag_news_n_ngrams_match_1": 8.692, "eval_ag_news_n_ngrams_match_2": 1.34, "eval_ag_news_n_ngrams_match_3": 0.308, "eval_ag_news_num_pred_words": 44.374, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 92.18135941410526, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2078462630268746, "eval_ag_news_runtime": 10.746, "eval_ag_news_samples_per_second": 46.529, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.23999342859317674, "eval_ag_news_token_set_f1_sem": 0.0045327772425795745, "eval_ag_news_token_set_precision": 0.19763210209260546, "eval_ag_news_token_set_recall": 0.34862594959553017, "eval_ag_news_true_num_tokens": 56.09375, "step": 10625 }, { "epoch": 2.04, "eval_anthropic_toxic_prompts_accuracy": 0.07575, "eval_anthropic_toxic_prompts_bleu_score": 1.5667824704383029, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0684543929000709, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.42869311571121216, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012086998339410706, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.234001636505127, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.188, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.6, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.164, "eval_anthropic_toxic_prompts_num_pred_words": 44.038, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 68.99276448103703, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.11331182812465268, "eval_anthropic_toxic_prompts_runtime": 9.8803, "eval_anthropic_toxic_prompts_samples_per_second": 50.606, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.2063507067504228, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005421787028026613, "eval_anthropic_toxic_prompts_token_set_precision": 0.2067507138332873, "eval_anthropic_toxic_prompts_token_set_recall": 0.25887168962558404, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 10625 }, { "epoch": 2.04, "eval_arxiv_accuracy": 0.27859375, "eval_arxiv_bleu_score": 2.2033503045504976, "eval_arxiv_bleu_score_sem": 0.06797319733926978, "eval_arxiv_emb_cos_sim": 0.4704684019088745, "eval_arxiv_emb_cos_sim_sem": 0.010251886359664918, "eval_arxiv_emb_top1_equal": 0.171875, "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.335144519805908, "eval_arxiv_n_ngrams_match_1": 8.744, "eval_arxiv_n_ngrams_match_2": 1.182, "eval_arxiv_n_ngrams_match_3": 0.15, "eval_arxiv_num_pred_words": 35.222, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 76.33599015511138, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.20541628902508738, "eval_arxiv_runtime": 10.3443, "eval_arxiv_samples_per_second": 48.336, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.2136171243119018, "eval_arxiv_token_set_f1_sem": 0.004410207072698844, "eval_arxiv_token_set_precision": 0.15367142683030394, "eval_arxiv_token_set_recall": 0.40260993530323613, "eval_arxiv_true_num_tokens": 64.0, "step": 10625 }, { "epoch": 2.04, "eval_python_code_alpaca_accuracy": 0.10709375, "eval_python_code_alpaca_bleu_score": 2.4331331266225, "eval_python_code_alpaca_bleu_score_sem": 0.07723671548849642, "eval_python_code_alpaca_emb_cos_sim": 0.3782823979854584, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010019162657444396, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.180656909942627, "eval_python_code_alpaca_n_ngrams_match_1": 5.046, "eval_python_code_alpaca_n_ngrams_match_2": 0.798, "eval_python_code_alpaca_n_ngrams_match_3": 0.206, "eval_python_code_alpaca_num_pred_words": 35.73, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 65.4088067996686, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.17535980862514963, "eval_python_code_alpaca_runtime": 10.6607, "eval_python_code_alpaca_samples_per_second": 46.901, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.2851954812355219, "eval_python_code_alpaca_token_set_f1_sem": 0.005362792811425221, "eval_python_code_alpaca_token_set_precision": 0.2508346643902476, "eval_python_code_alpaca_token_set_recall": 0.3983363955063751, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 10625 }, { "epoch": 2.04, "eval_wikibio_accuracy": 0.26234375, "eval_wikibio_bleu_score": 4.121821244516629, "eval_wikibio_bleu_score_sem": 0.17431240658392042, "eval_wikibio_emb_cos_sim": 0.5726078748703003, "eval_wikibio_emb_cos_sim_sem": 0.013797334574139994, "eval_wikibio_emb_top1_equal": 0.046875, "eval_wikibio_emb_top1_equal_sem": 0.01875615101164758, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.681370258331299, "eval_wikibio_n_ngrams_match_1": 7.89, "eval_wikibio_n_ngrams_match_2": 2.302, "eval_wikibio_n_ngrams_match_3": 0.71, "eval_wikibio_num_pred_words": 34.322, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 107.91784663240344, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.25907671373595287, "eval_wikibio_runtime": 10.0235, "eval_wikibio_samples_per_second": 49.883, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.2583744902020269, "eval_wikibio_token_set_f1_sem": 0.0062210127601358195, "eval_wikibio_token_set_precision": 0.25181756012882145, "eval_wikibio_token_set_recall": 0.2830897168428563, "eval_wikibio_true_num_tokens": 61.1328125, "step": 10625 }, { "epoch": 2.04, "eval_nq_accuracy": 0.42671875, "eval_nq_bleu_score": 6.514900534831086, "eval_nq_bleu_score_sem": 0.287436139214196, "eval_nq_emb_cos_sim": 0.6404000520706177, "eval_nq_emb_cos_sim_sem": 0.012100701333532811, "eval_nq_emb_top1_equal": 0.15625, "eval_nq_emb_top1_equal_sem": 0.03221922156442571, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 3.0305635929107666, "eval_nq_n_ngrams_match_1": 17.164, "eval_nq_n_ngrams_match_2": 4.976, "eval_nq_n_ngrams_match_3": 1.864, "eval_nq_num_pred_words": 47.74, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 20.708900690670102, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.31591395402792466, "eval_nq_runtime": 10.8708, "eval_nq_samples_per_second": 45.995, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.3545936741460492, "eval_nq_token_set_f1_sem": 0.004861322315256306, "eval_nq_token_set_precision": 0.29108134708930233, "eval_nq_token_set_recall": 0.48476600959067834, "eval_nq_true_num_tokens": 64.0, "step": 10625 }, { "epoch": 2.04, "learning_rate": 0.001, "loss": 3.3669, "step": 10632 }, { "epoch": 2.04, "learning_rate": 0.001, "loss": 3.3898, "step": 10644 }, { "epoch": 2.05, "learning_rate": 0.001, "loss": 3.366, "step": 10656 }, { "epoch": 2.05, "learning_rate": 0.001, "loss": 3.3637, "step": 10668 }, { "epoch": 2.05, "learning_rate": 0.001, "loss": 3.3699, "step": 10680 }, { "epoch": 2.05, "learning_rate": 0.001, "loss": 3.3736, "step": 10692 }, { "epoch": 2.06, "learning_rate": 0.001, "loss": 3.3761, "step": 10704 }, { "epoch": 2.06, "learning_rate": 0.001, "loss": 3.3563, "step": 10716 }, { "epoch": 2.06, "learning_rate": 0.001, "loss": 3.3725, "step": 10728 }, { "epoch": 2.06, "learning_rate": 0.001, "loss": 3.372, "step": 10740 }, { "epoch": 2.06, "learning_rate": 0.001, "loss": 3.3638, "step": 10752 }, { "epoch": 2.07, "learning_rate": 0.001, "loss": 3.3702, "step": 10764 }, { "epoch": 2.07, "learning_rate": 0.001, "loss": 3.3725, "step": 10776 }, { "epoch": 2.07, "learning_rate": 0.001, "loss": 3.3669, "step": 10788 }, { "epoch": 2.07, "learning_rate": 0.001, "loss": 3.3658, "step": 10800 }, { "epoch": 2.08, "learning_rate": 0.001, "loss": 3.3695, "step": 10812 }, { "epoch": 2.08, "learning_rate": 0.001, "loss": 3.3622, "step": 10824 }, { "epoch": 2.08, "learning_rate": 0.001, "loss": 3.3703, "step": 10836 }, { "epoch": 2.08, "learning_rate": 0.001, "loss": 3.3648, "step": 10848 }, { "epoch": 2.09, "learning_rate": 0.001, "loss": 3.3652, "step": 10860 }, { "epoch": 2.09, "learning_rate": 0.001, "loss": 3.37, "step": 10872 }, { "epoch": 2.09, "learning_rate": 0.001, "loss": 3.3578, "step": 10884 }, { "epoch": 2.09, "learning_rate": 0.001, "loss": 3.3616, "step": 10896 }, { "epoch": 2.09, "learning_rate": 0.001, "loss": 3.3533, "step": 10908 }, { "epoch": 2.1, "learning_rate": 0.001, "loss": 3.3664, "step": 10920 }, { "epoch": 2.1, "learning_rate": 0.001, "loss": 3.3681, "step": 10932 }, { "epoch": 2.1, "learning_rate": 0.001, "loss": 3.3596, "step": 10944 }, { "epoch": 2.1, "learning_rate": 0.001, "loss": 3.3547, "step": 10956 }, { "epoch": 2.11, "learning_rate": 0.001, "loss": 3.3687, "step": 10968 }, { "epoch": 2.11, "learning_rate": 0.001, "loss": 3.365, "step": 10980 }, { "epoch": 2.11, "learning_rate": 0.001, "loss": 3.3633, "step": 10992 }, { "epoch": 2.11, "learning_rate": 0.001, "loss": 3.351, "step": 11004 }, { "epoch": 2.12, "learning_rate": 0.001, "loss": 3.3533, "step": 11016 }, { "epoch": 2.12, "learning_rate": 0.001, "loss": 3.3579, "step": 11028 }, { "epoch": 2.12, "learning_rate": 0.001, "loss": 3.3604, "step": 11040 }, { "epoch": 2.12, "learning_rate": 0.001, "loss": 3.3567, "step": 11052 }, { "epoch": 2.12, "learning_rate": 0.001, "loss": 3.3409, "step": 11064 }, { "epoch": 2.13, "learning_rate": 0.001, "loss": 3.3481, "step": 11076 }, { "epoch": 2.13, "learning_rate": 0.001, "loss": 3.3667, "step": 11088 }, { "epoch": 2.13, "learning_rate": 0.001, "loss": 3.3462, "step": 11100 }, { "epoch": 2.13, "learning_rate": 0.001, "loss": 3.3446, "step": 11112 }, { "epoch": 2.14, "learning_rate": 0.001, "loss": 3.3504, "step": 11124 }, { "epoch": 2.14, "learning_rate": 0.001, "loss": 3.3443, "step": 11136 }, { "epoch": 2.14, "learning_rate": 0.001, "loss": 3.3672, "step": 11148 }, { "epoch": 2.14, "learning_rate": 0.001, "loss": 3.3504, "step": 11160 }, { "epoch": 2.15, "learning_rate": 0.001, "loss": 3.3632, "step": 11172 }, { "epoch": 2.15, "learning_rate": 0.001, "loss": 3.348, "step": 11184 }, { "epoch": 2.15, "learning_rate": 0.001, "loss": 3.3525, "step": 11196 }, { "epoch": 2.15, "learning_rate": 0.001, "loss": 3.3268, "step": 11208 }, { "epoch": 2.15, "learning_rate": 0.001, "loss": 3.3477, "step": 11220 }, { "epoch": 2.16, "learning_rate": 0.001, "loss": 3.347, "step": 11232 }, { "epoch": 2.16, "learning_rate": 0.001, "loss": 3.3552, "step": 11244 }, { "epoch": 2.16, "eval_ag_news_accuracy": 0.24784375, "eval_ag_news_bleu_score": 2.7445599258443236, "eval_ag_news_bleu_score_sem": 0.09877952293750877, "eval_ag_news_emb_cos_sim": 0.5810866355895996, "eval_ag_news_emb_cos_sim_sem": 0.01221696078097208, "eval_ag_news_emb_top1_equal": 0.1171875, "eval_ag_news_emb_top1_equal_sem": 0.02854125312152025, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.501502990722656, "eval_ag_news_n_ngrams_match_1": 8.896, "eval_ag_news_n_ngrams_match_2": 1.484, "eval_ag_news_n_ngrams_match_3": 0.336, "eval_ag_news_num_pred_words": 43.876, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 90.15252793820358, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2109255283508678, "eval_ag_news_runtime": 10.3507, "eval_ag_news_samples_per_second": 48.306, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.24467123734232962, "eval_ag_news_token_set_f1_sem": 0.00464406244716166, "eval_ag_news_token_set_precision": 0.20432879044949856, "eval_ag_news_token_set_recall": 0.3474431784687123, "eval_ag_news_true_num_tokens": 56.09375, "step": 11250 }, { "epoch": 2.16, "eval_anthropic_toxic_prompts_accuracy": 0.076, "eval_anthropic_toxic_prompts_bleu_score": 1.6010230290027194, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06516102392011566, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4394418001174927, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011667782076519067, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.201409339904785, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.322, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.664, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.158, "eval_anthropic_toxic_prompts_num_pred_words": 44.876, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 66.7803810069828, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.11635369473081233, "eval_anthropic_toxic_prompts_runtime": 9.951, "eval_anthropic_toxic_prompts_samples_per_second": 50.246, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.21500305601408232, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0055104148309599145, "eval_anthropic_toxic_prompts_token_set_precision": 0.22044425128328066, "eval_anthropic_toxic_prompts_token_set_recall": 0.2552014335409628, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 11250 }, { "epoch": 2.16, "eval_arxiv_accuracy": 0.2788125, "eval_arxiv_bleu_score": 2.331719697644204, "eval_arxiv_bleu_score_sem": 0.0718559434105871, "eval_arxiv_emb_cos_sim": 0.47331756353378296, "eval_arxiv_emb_cos_sim_sem": 0.010864395972361612, "eval_arxiv_emb_top1_equal": 0.1328125, "eval_arxiv_emb_top1_equal_sem": 0.030114394778901498, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.310990810394287, "eval_arxiv_n_ngrams_match_1": 8.764, "eval_arxiv_n_ngrams_match_2": 1.314, "eval_arxiv_n_ngrams_match_3": 0.182, "eval_arxiv_num_pred_words": 36.87, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 74.51428190192313, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.20464207246615473, "eval_arxiv_runtime": 10.7068, "eval_arxiv_samples_per_second": 46.699, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.2151186928719234, "eval_arxiv_token_set_f1_sem": 0.004250104115131363, "eval_arxiv_token_set_precision": 0.15530441116875587, "eval_arxiv_token_set_recall": 0.40453428097621075, "eval_arxiv_true_num_tokens": 64.0, "step": 11250 }, { "epoch": 2.16, "eval_python_code_alpaca_accuracy": 0.10753125, "eval_python_code_alpaca_bleu_score": 2.33818332320569, "eval_python_code_alpaca_bleu_score_sem": 0.07790112017255417, "eval_python_code_alpaca_emb_cos_sim": 0.38225212693214417, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009633005987397997, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.1621551513671875, "eval_python_code_alpaca_n_ngrams_match_1": 4.762, "eval_python_code_alpaca_n_ngrams_match_2": 0.676, "eval_python_code_alpaca_n_ngrams_match_3": 0.144, "eval_python_code_alpaca_num_pred_words": 33.514, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 64.20975533219276, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.17286769300553573, "eval_python_code_alpaca_runtime": 10.8297, "eval_python_code_alpaca_samples_per_second": 46.169, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.27530757024370717, "eval_python_code_alpaca_token_set_f1_sem": 0.005168029282860783, "eval_python_code_alpaca_token_set_precision": 0.2346115716046044, "eval_python_code_alpaca_token_set_recall": 0.3960837590032956, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 11250 }, { "epoch": 2.16, "eval_wikibio_accuracy": 0.2603125, "eval_wikibio_bleu_score": 4.025779789448843, "eval_wikibio_bleu_score_sem": 0.17339246261718744, "eval_wikibio_emb_cos_sim": 0.6065972447395325, "eval_wikibio_emb_cos_sim_sem": 0.012248833018069427, "eval_wikibio_emb_top1_equal": 0.0703125, "eval_wikibio_emb_top1_equal_sem": 0.022687306110270106, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.674853324890137, "eval_wikibio_n_ngrams_match_1": 8.23, "eval_wikibio_n_ngrams_match_2": 2.33, "eval_wikibio_n_ngrams_match_3": 0.736, "eval_wikibio_num_pred_words": 36.226, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 107.21683989690757, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2648880309162607, "eval_wikibio_runtime": 10.1697, "eval_wikibio_samples_per_second": 49.166, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.264549726186732, "eval_wikibio_token_set_f1_sem": 0.005905943122690257, "eval_wikibio_token_set_precision": 0.26461674832557164, "eval_wikibio_token_set_recall": 0.28152007760025227, "eval_wikibio_true_num_tokens": 61.1328125, "step": 11250 }, { "epoch": 2.16, "eval_nq_accuracy": 0.4299375, "eval_nq_bleu_score": 6.468436321292758, "eval_nq_bleu_score_sem": 0.2825036321460263, "eval_nq_emb_cos_sim": 0.647667407989502, "eval_nq_emb_cos_sim_sem": 0.011768005965188956, "eval_nq_emb_top1_equal": 0.109375, "eval_nq_emb_top1_equal_sem": 0.027695207821224692, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.998945474624634, "eval_nq_n_ngrams_match_1": 17.144, "eval_nq_n_ngrams_match_2": 4.894, "eval_nq_n_ngrams_match_3": 1.824, "eval_nq_num_pred_words": 47.63, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 20.06436737869699, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.318101444874767, "eval_nq_runtime": 10.4086, "eval_nq_samples_per_second": 48.037, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.35412734518069694, "eval_nq_token_set_f1_sem": 0.0049829476778019055, "eval_nq_token_set_precision": 0.29336270290241306, "eval_nq_token_set_recall": 0.4751326232364711, "eval_nq_true_num_tokens": 64.0, "step": 11250 }, { "epoch": 2.16, "learning_rate": 0.001, "loss": 3.3426, "step": 11256 }, { "epoch": 2.16, "learning_rate": 0.001, "loss": 3.3484, "step": 11268 }, { "epoch": 2.17, "learning_rate": 0.001, "loss": 3.3409, "step": 11280 }, { "epoch": 2.17, "learning_rate": 0.001, "loss": 3.347, "step": 11292 }, { "epoch": 2.17, "learning_rate": 0.001, "loss": 3.3473, "step": 11304 }, { "epoch": 2.17, "learning_rate": 0.001, "loss": 3.3389, "step": 11316 }, { "epoch": 2.18, "learning_rate": 0.001, "loss": 3.3433, "step": 11328 }, { "epoch": 2.18, "learning_rate": 0.001, "loss": 3.3393, "step": 11340 }, { "epoch": 2.18, "learning_rate": 0.001, "loss": 3.3541, "step": 11352 }, { "epoch": 2.18, "learning_rate": 0.001, "loss": 3.3482, "step": 11364 }, { "epoch": 2.18, "learning_rate": 0.001, "loss": 3.3396, "step": 11376 }, { "epoch": 2.19, "learning_rate": 0.001, "loss": 3.3381, "step": 11388 }, { "epoch": 2.19, "learning_rate": 0.001, "loss": 3.3423, "step": 11400 }, { "epoch": 2.19, "learning_rate": 0.001, "loss": 3.3383, "step": 11412 }, { "epoch": 2.19, "learning_rate": 0.001, "loss": 3.3368, "step": 11424 }, { "epoch": 2.2, "learning_rate": 0.001, "loss": 3.3477, "step": 11436 }, { "epoch": 2.2, "learning_rate": 0.001, "loss": 3.3431, "step": 11448 }, { "epoch": 2.2, "learning_rate": 0.001, "loss": 3.3404, "step": 11460 }, { "epoch": 2.2, "learning_rate": 0.001, "loss": 3.3363, "step": 11472 }, { "epoch": 2.21, "learning_rate": 0.001, "loss": 3.3391, "step": 11484 }, { "epoch": 2.21, "learning_rate": 0.001, "loss": 3.3476, "step": 11496 }, { "epoch": 2.21, "learning_rate": 0.001, "loss": 3.3461, "step": 11508 }, { "epoch": 2.21, "learning_rate": 0.001, "loss": 3.334, "step": 11520 }, { "epoch": 2.21, "learning_rate": 0.001, "loss": 3.349, "step": 11532 }, { "epoch": 2.22, "learning_rate": 0.001, "loss": 3.3412, "step": 11544 }, { "epoch": 2.22, "learning_rate": 0.001, "loss": 3.3314, "step": 11556 }, { "epoch": 2.22, "learning_rate": 0.001, "loss": 3.3283, "step": 11568 }, { "epoch": 2.22, "learning_rate": 0.001, "loss": 3.3298, "step": 11580 }, { "epoch": 2.23, "learning_rate": 0.001, "loss": 3.3416, "step": 11592 }, { "epoch": 2.23, "learning_rate": 0.001, "loss": 3.3344, "step": 11604 }, { "epoch": 2.23, "learning_rate": 0.001, "loss": 3.3361, "step": 11616 }, { "epoch": 2.23, "learning_rate": 0.001, "loss": 3.3334, "step": 11628 }, { "epoch": 2.24, "learning_rate": 0.001, "loss": 3.3384, "step": 11640 }, { "epoch": 2.24, "learning_rate": 0.001, "loss": 3.3416, "step": 11652 }, { "epoch": 2.24, "learning_rate": 0.001, "loss": 3.3276, "step": 11664 }, { "epoch": 2.24, "learning_rate": 0.001, "loss": 3.3344, "step": 11676 }, { "epoch": 2.24, "learning_rate": 0.001, "loss": 3.3316, "step": 11688 }, { "epoch": 2.25, "learning_rate": 0.001, "loss": 3.3375, "step": 11700 }, { "epoch": 2.25, "learning_rate": 0.001, "loss": 3.3329, "step": 11712 }, { "epoch": 2.25, "learning_rate": 0.001, "loss": 3.3339, "step": 11724 }, { "epoch": 2.25, "learning_rate": 0.001, "loss": 3.3366, "step": 11736 }, { "epoch": 2.26, "learning_rate": 0.001, "loss": 3.3447, "step": 11748 }, { "epoch": 2.26, "learning_rate": 0.001, "loss": 3.3198, "step": 11760 }, { "epoch": 2.26, "learning_rate": 0.001, "loss": 3.3338, "step": 11772 }, { "epoch": 2.26, "learning_rate": 0.001, "loss": 3.318, "step": 11784 }, { "epoch": 2.26, "learning_rate": 0.001, "loss": 3.3252, "step": 11796 }, { "epoch": 2.27, "learning_rate": 0.001, "loss": 3.3258, "step": 11808 }, { "epoch": 2.27, "learning_rate": 0.001, "loss": 3.3341, "step": 11820 }, { "epoch": 2.27, "learning_rate": 0.001, "loss": 3.32, "step": 11832 }, { "epoch": 2.27, "learning_rate": 0.001, "loss": 3.3266, "step": 11844 }, { "epoch": 2.28, "learning_rate": 0.001, "loss": 3.318, "step": 11856 }, { "epoch": 2.28, "learning_rate": 0.001, "loss": 3.3183, "step": 11868 }, { "epoch": 2.28, "eval_ag_news_accuracy": 0.24703125, "eval_ag_news_bleu_score": 2.7937015627662416, "eval_ag_news_bleu_score_sem": 0.09449300742565286, "eval_ag_news_emb_cos_sim": 0.5981181859970093, "eval_ag_news_emb_cos_sim_sem": 0.010756656773766624, "eval_ag_news_emb_top1_equal": 0.1328125, "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.4853644371032715, "eval_ag_news_n_ngrams_match_1": 9.444, "eval_ag_news_n_ngrams_match_2": 1.514, "eval_ag_news_n_ngrams_match_3": 0.324, "eval_ag_news_num_pred_words": 45.914, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 88.70927387354537, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2223248034476243, "eval_ag_news_runtime": 10.5258, "eval_ag_news_samples_per_second": 47.502, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.2553805768334155, "eval_ag_news_token_set_f1_sem": 0.004149271939107957, "eval_ag_news_token_set_precision": 0.21490000811978566, "eval_ag_news_token_set_recall": 0.3457388318396937, "eval_ag_news_true_num_tokens": 56.09375, "step": 11875 }, { "epoch": 2.28, "eval_anthropic_toxic_prompts_accuracy": 0.0768125, "eval_anthropic_toxic_prompts_bleu_score": 1.6148785945359487, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06695166853089543, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4605611562728882, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012429816853931271, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0234375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.013424676090873717, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.194695949554443, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.338, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.618, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.152, "eval_anthropic_toxic_prompts_num_pred_words": 44.854, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 66.33355976242449, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.11675054713114878, "eval_anthropic_toxic_prompts_runtime": 10.0662, "eval_anthropic_toxic_prompts_samples_per_second": 49.671, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.21671206336021467, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005479358237617751, "eval_anthropic_toxic_prompts_token_set_precision": 0.22181571243632656, "eval_anthropic_toxic_prompts_token_set_recall": 0.25917747894962717, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 11875 }, { "epoch": 2.28, "eval_arxiv_accuracy": 0.2815625, "eval_arxiv_bleu_score": 2.4608083050923413, "eval_arxiv_bleu_score_sem": 0.07339472201132706, "eval_arxiv_emb_cos_sim": 0.49826580286026, "eval_arxiv_emb_cos_sim_sem": 0.009700269723443353, "eval_arxiv_emb_top1_equal": 0.1796875, "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.294712543487549, "eval_arxiv_n_ngrams_match_1": 9.472, "eval_arxiv_n_ngrams_match_2": 1.352, "eval_arxiv_n_ngrams_match_3": 0.212, "eval_arxiv_num_pred_words": 37.294, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 73.31113765188522, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.21865798895946734, "eval_arxiv_runtime": 10.1772, "eval_arxiv_samples_per_second": 49.13, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.22903631514810155, "eval_arxiv_token_set_f1_sem": 0.004112179214568153, "eval_arxiv_token_set_precision": 0.168405257229587, "eval_arxiv_token_set_recall": 0.3998475453420297, "eval_arxiv_true_num_tokens": 64.0, "step": 11875 }, { "epoch": 2.28, "eval_python_code_alpaca_accuracy": 0.1076875, "eval_python_code_alpaca_bleu_score": 2.4303940005366877, "eval_python_code_alpaca_bleu_score_sem": 0.08203146448186399, "eval_python_code_alpaca_emb_cos_sim": 0.39726346731185913, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010924835870215251, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.135598659515381, "eval_python_code_alpaca_n_ngrams_match_1": 5.21, "eval_python_code_alpaca_n_ngrams_match_2": 0.806, "eval_python_code_alpaca_n_ngrams_match_3": 0.19, "eval_python_code_alpaca_num_pred_words": 33.626, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 62.527012258929766, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.18712196486011473, "eval_python_code_alpaca_runtime": 10.6872, "eval_python_code_alpaca_samples_per_second": 46.785, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.2889798734703638, "eval_python_code_alpaca_token_set_f1_sem": 0.0053625081564402585, "eval_python_code_alpaca_token_set_precision": 0.25991972374284805, "eval_python_code_alpaca_token_set_recall": 0.3795541481451212, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 11875 }, { "epoch": 2.28, "eval_wikibio_accuracy": 0.2579375, "eval_wikibio_bleu_score": 4.146975743252501, "eval_wikibio_bleu_score_sem": 0.1704537263692191, "eval_wikibio_emb_cos_sim": 0.5991557836532593, "eval_wikibio_emb_cos_sim_sem": 0.013391245737470482, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.702815532684326, "eval_wikibio_n_ngrams_match_1": 8.508, "eval_wikibio_n_ngrams_match_2": 2.502, "eval_wikibio_n_ngrams_match_3": 0.788, "eval_wikibio_num_pred_words": 37.052, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 110.25716850693988, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.27106466548154684, "eval_wikibio_runtime": 10.0116, "eval_wikibio_samples_per_second": 49.942, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.26870454642046887, "eval_wikibio_token_set_f1_sem": 0.0059767885878211574, "eval_wikibio_token_set_precision": 0.2682234422428167, "eval_wikibio_token_set_recall": 0.2876715398715617, "eval_wikibio_true_num_tokens": 61.1328125, "step": 11875 }, { "epoch": 2.28, "eval_nq_accuracy": 0.432125, "eval_nq_bleu_score": 6.927951880533188, "eval_nq_bleu_score_sem": 0.32423319331243844, "eval_nq_emb_cos_sim": 0.6612377762794495, "eval_nq_emb_cos_sim_sem": 0.011943865007689678, "eval_nq_emb_top1_equal": 0.140625, "eval_nq_emb_top1_equal_sem": 0.030847557647994725, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.968759059906006, "eval_nq_n_ngrams_match_1": 17.66, "eval_nq_n_ngrams_match_2": 5.206, "eval_nq_n_ngrams_match_3": 2.034, "eval_nq_num_pred_words": 48.306, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 19.46774629340781, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.32456458019104223, "eval_nq_runtime": 10.4751, "eval_nq_samples_per_second": 47.732, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.3622490786429294, "eval_nq_token_set_f1_sem": 0.004978429005560884, "eval_nq_token_set_precision": 0.299877244152493, "eval_nq_token_set_recall": 0.48329076637595264, "eval_nq_true_num_tokens": 64.0, "step": 11875 }, { "epoch": 2.28, "learning_rate": 0.001, "loss": 3.3233, "step": 11880 }, { "epoch": 2.28, "learning_rate": 0.001, "loss": 3.3263, "step": 11892 }, { "epoch": 2.29, "learning_rate": 0.001, "loss": 3.3272, "step": 11904 }, { "epoch": 2.29, "learning_rate": 0.001, "loss": 3.3166, "step": 11916 }, { "epoch": 2.29, "learning_rate": 0.001, "loss": 3.3213, "step": 11928 }, { "epoch": 2.29, "learning_rate": 0.001, "loss": 3.3213, "step": 11940 }, { "epoch": 2.29, "learning_rate": 0.001, "loss": 3.3227, "step": 11952 }, { "epoch": 2.3, "learning_rate": 0.001, "loss": 3.3134, "step": 11964 }, { "epoch": 2.3, "learning_rate": 0.001, "loss": 3.319, "step": 11976 }, { "epoch": 2.3, "learning_rate": 0.001, "loss": 3.3222, "step": 11988 }, { "epoch": 2.3, "learning_rate": 0.001, "loss": 3.3201, "step": 12000 }, { "epoch": 2.31, "learning_rate": 0.001, "loss": 3.3125, "step": 12012 }, { "epoch": 2.31, "learning_rate": 0.001, "loss": 3.3249, "step": 12024 }, { "epoch": 2.31, "learning_rate": 0.001, "loss": 3.3003, "step": 12036 }, { "epoch": 2.31, "learning_rate": 0.001, "loss": 3.3122, "step": 12048 }, { "epoch": 2.32, "learning_rate": 0.001, "loss": 3.3112, "step": 12060 }, { "epoch": 2.32, "learning_rate": 0.001, "loss": 3.3167, "step": 12072 }, { "epoch": 2.32, "learning_rate": 0.001, "loss": 3.3116, "step": 12084 }, { "epoch": 2.32, "learning_rate": 0.001, "loss": 3.3118, "step": 12096 }, { "epoch": 2.32, "learning_rate": 0.001, "loss": 3.3067, "step": 12108 }, { "epoch": 2.33, "learning_rate": 0.001, "loss": 3.3098, "step": 12120 }, { "epoch": 2.33, "learning_rate": 0.001, "loss": 3.3024, "step": 12132 }, { "epoch": 2.33, "learning_rate": 0.001, "loss": 3.3164, "step": 12144 }, { "epoch": 2.33, "learning_rate": 0.001, "loss": 3.3169, "step": 12156 }, { "epoch": 2.34, "learning_rate": 0.001, "loss": 3.2913, "step": 12168 }, { "epoch": 2.34, "learning_rate": 0.001, "loss": 3.3111, "step": 12180 }, { "epoch": 2.34, "learning_rate": 0.001, "loss": 3.3134, "step": 12192 }, { "epoch": 2.34, "learning_rate": 0.001, "loss": 3.3113, "step": 12204 }, { "epoch": 2.35, "learning_rate": 0.001, "loss": 3.3067, "step": 12216 }, { "epoch": 2.35, "learning_rate": 0.001, "loss": 3.3071, "step": 12228 }, { "epoch": 2.35, "learning_rate": 0.001, "loss": 3.2984, "step": 12240 }, { "epoch": 2.35, "learning_rate": 0.001, "loss": 3.2973, "step": 12252 }, { "epoch": 2.35, "learning_rate": 0.001, "loss": 3.2992, "step": 12264 }, { "epoch": 2.36, "learning_rate": 0.001, "loss": 3.2974, "step": 12276 }, { "epoch": 2.36, "learning_rate": 0.001, "loss": 3.3075, "step": 12288 }, { "epoch": 2.36, "learning_rate": 0.001, "loss": 3.3089, "step": 12300 }, { "epoch": 2.36, "learning_rate": 0.001, "loss": 3.2975, "step": 12312 }, { "epoch": 2.37, "learning_rate": 0.001, "loss": 3.311, "step": 12324 }, { "epoch": 2.37, "learning_rate": 0.001, "loss": 3.3121, "step": 12336 }, { "epoch": 2.37, "learning_rate": 0.001, "loss": 3.3084, "step": 12348 }, { "epoch": 2.37, "learning_rate": 0.001, "loss": 3.2951, "step": 12360 }, { "epoch": 2.38, "learning_rate": 0.001, "loss": 3.2934, "step": 12372 }, { "epoch": 2.38, "learning_rate": 0.001, "loss": 3.2989, "step": 12384 }, { "epoch": 2.38, "learning_rate": 0.001, "loss": 3.3096, "step": 12396 }, { "epoch": 2.38, "learning_rate": 0.001, "loss": 3.2998, "step": 12408 }, { "epoch": 2.38, "learning_rate": 0.001, "loss": 3.2956, "step": 12420 }, { "epoch": 2.39, "learning_rate": 0.001, "loss": 3.3046, "step": 12432 }, { "epoch": 2.39, "learning_rate": 0.001, "loss": 3.2991, "step": 12444 }, { "epoch": 2.39, "learning_rate": 0.001, "loss": 3.3048, "step": 12456 }, { "epoch": 2.39, "learning_rate": 0.001, "loss": 3.3134, "step": 12468 }, { "epoch": 2.4, "learning_rate": 0.001, "loss": 3.2976, "step": 12480 }, { "epoch": 2.4, "learning_rate": 0.001, "loss": 3.3001, "step": 12492 }, { "epoch": 2.4, "eval_ag_news_accuracy": 0.24971875, "eval_ag_news_bleu_score": 2.826092698746348, "eval_ag_news_bleu_score_sem": 0.10405368399502497, "eval_ag_news_emb_cos_sim": 0.6051392555236816, "eval_ag_news_emb_cos_sim_sem": 0.011451116982125704, "eval_ag_news_emb_top1_equal": 0.1328125, "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.447063446044922, "eval_ag_news_n_ngrams_match_1": 9.228, "eval_ag_news_n_ngrams_match_2": 1.486, "eval_ag_news_n_ngrams_match_3": 0.328, "eval_ag_news_num_pred_words": 44.924, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 85.37586469552387, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2190468766261795, "eval_ag_news_runtime": 12.4371, "eval_ag_news_samples_per_second": 40.202, "eval_ag_news_steps_per_second": 0.08, "eval_ag_news_token_set_f1": 0.252671136839368, "eval_ag_news_token_set_f1_sem": 0.00416211692282695, "eval_ag_news_token_set_precision": 0.2116944573657276, "eval_ag_news_token_set_recall": 0.34707036925453266, "eval_ag_news_true_num_tokens": 56.09375, "step": 12500 }, { "epoch": 2.4, "eval_anthropic_toxic_prompts_accuracy": 0.07859375, "eval_anthropic_toxic_prompts_bleu_score": 1.6646446228802656, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06766547312689805, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.45219215750694275, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01256326694720692, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.135889530181885, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.384, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.664, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.166, "eval_anthropic_toxic_prompts_num_pred_words": 43.678, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 62.54520217798871, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.11992763643210105, "eval_anthropic_toxic_prompts_runtime": 9.7978, "eval_anthropic_toxic_prompts_samples_per_second": 51.032, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.2172559367052368, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005754577580731006, "eval_anthropic_toxic_prompts_token_set_precision": 0.22574504442036494, "eval_anthropic_toxic_prompts_token_set_recall": 0.2507367217451066, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 12500 }, { "epoch": 2.4, "eval_arxiv_accuracy": 0.28115625, "eval_arxiv_bleu_score": 2.3457166495172768, "eval_arxiv_bleu_score_sem": 0.0671725321596959, "eval_arxiv_emb_cos_sim": 0.48694080114364624, "eval_arxiv_emb_cos_sim_sem": 0.01065699528210655, "eval_arxiv_emb_top1_equal": 0.1953125, "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.275970935821533, "eval_arxiv_n_ngrams_match_1": 9.372, "eval_arxiv_n_ngrams_match_2": 1.272, "eval_arxiv_n_ngrams_match_3": 0.158, "eval_arxiv_num_pred_words": 35.53, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 71.94996420403176, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.21996145432350012, "eval_arxiv_runtime": 10.2338, "eval_arxiv_samples_per_second": 48.858, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.22896068861045651, "eval_arxiv_token_set_f1_sem": 0.004288100563487217, "eval_arxiv_token_set_precision": 0.16931357812446676, "eval_arxiv_token_set_recall": 0.3940005563945889, "eval_arxiv_true_num_tokens": 64.0, "step": 12500 }, { "epoch": 2.4, "eval_python_code_alpaca_accuracy": 0.10940625, "eval_python_code_alpaca_bleu_score": 2.440995180886357, "eval_python_code_alpaca_bleu_score_sem": 0.07114712202953336, "eval_python_code_alpaca_emb_cos_sim": 0.4028877019882202, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009777283869473899, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.087093353271484, "eval_python_code_alpaca_n_ngrams_match_1": 5.048, "eval_python_code_alpaca_n_ngrams_match_2": 0.754, "eval_python_code_alpaca_n_ngrams_match_3": 0.164, "eval_python_code_alpaca_num_pred_words": 33.442, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 59.56650105811409, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.18386204582450022, "eval_python_code_alpaca_runtime": 9.9476, "eval_python_code_alpaca_samples_per_second": 50.263, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.28525719592563015, "eval_python_code_alpaca_token_set_f1_sem": 0.00517466331528906, "eval_python_code_alpaca_token_set_precision": 0.25021821502350705, "eval_python_code_alpaca_token_set_recall": 0.39145710504919656, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 12500 }, { "epoch": 2.4, "eval_wikibio_accuracy": 0.26415625, "eval_wikibio_bleu_score": 3.8662112814379053, "eval_wikibio_bleu_score_sem": 0.1616383110079937, "eval_wikibio_emb_cos_sim": 0.5877749919891357, "eval_wikibio_emb_cos_sim_sem": 0.014569499083173986, "eval_wikibio_emb_top1_equal": 0.09375, "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.646920204162598, "eval_wikibio_n_ngrams_match_1": 8.052, "eval_wikibio_n_ngrams_match_2": 2.288, "eval_wikibio_n_ngrams_match_3": 0.668, "eval_wikibio_num_pred_words": 35.422, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 104.26338066681669, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.26084861896744355, "eval_wikibio_runtime": 10.8319, "eval_wikibio_samples_per_second": 46.16, "eval_wikibio_steps_per_second": 0.092, "eval_wikibio_token_set_f1": 0.2563883128449072, "eval_wikibio_token_set_f1_sem": 0.006257974487319957, "eval_wikibio_token_set_precision": 0.2543646756806158, "eval_wikibio_token_set_recall": 0.27274956503895254, "eval_wikibio_true_num_tokens": 61.1328125, "step": 12500 }, { "epoch": 2.4, "eval_nq_accuracy": 0.4365, "eval_nq_bleu_score": 6.746635596538757, "eval_nq_bleu_score_sem": 0.2874480202467293, "eval_nq_emb_cos_sim": 0.6585381031036377, "eval_nq_emb_cos_sim_sem": 0.01168807973534411, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.9432129859924316, "eval_nq_n_ngrams_match_1": 17.83, "eval_nq_n_ngrams_match_2": 5.082, "eval_nq_n_ngrams_match_3": 1.918, "eval_nq_num_pred_words": 48.308, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 18.97672040292327, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3324887701032784, "eval_nq_runtime": 10.4015, "eval_nq_samples_per_second": 48.07, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.36427021711641167, "eval_nq_token_set_f1_sem": 0.004655915477029171, "eval_nq_token_set_precision": 0.30350370361311, "eval_nq_token_set_recall": 0.47646411215695267, "eval_nq_true_num_tokens": 64.0, "step": 12500 }, { "epoch": 2.4, "learning_rate": 0.001, "loss": 3.2849, "step": 12504 }, { "epoch": 2.4, "learning_rate": 0.001, "loss": 3.293, "step": 12516 }, { "epoch": 2.41, "learning_rate": 0.001, "loss": 3.303, "step": 12528 }, { "epoch": 2.41, "learning_rate": 0.001, "loss": 3.2915, "step": 12540 }, { "epoch": 2.41, "learning_rate": 0.001, "loss": 3.2959, "step": 12552 }, { "epoch": 2.41, "learning_rate": 0.001, "loss": 3.3051, "step": 12564 }, { "epoch": 2.41, "learning_rate": 0.001, "loss": 3.3144, "step": 12576 }, { "epoch": 2.42, "learning_rate": 0.001, "loss": 3.2956, "step": 12588 }, { "epoch": 2.42, "learning_rate": 0.001, "loss": 3.2904, "step": 12600 }, { "epoch": 2.42, "learning_rate": 0.001, "loss": 3.2985, "step": 12612 }, { "epoch": 2.42, "learning_rate": 0.001, "loss": 3.2916, "step": 12624 }, { "epoch": 2.43, "learning_rate": 0.001, "loss": 3.2831, "step": 12636 }, { "epoch": 2.43, "learning_rate": 0.001, "loss": 3.2799, "step": 12648 }, { "epoch": 2.43, "learning_rate": 0.001, "loss": 3.287, "step": 12660 }, { "epoch": 2.43, "learning_rate": 0.001, "loss": 3.284, "step": 12672 }, { "epoch": 2.44, "learning_rate": 0.001, "loss": 3.2984, "step": 12684 }, { "epoch": 2.44, "learning_rate": 0.001, "loss": 3.2869, "step": 12696 }, { "epoch": 2.44, "learning_rate": 0.001, "loss": 3.2933, "step": 12708 }, { "epoch": 2.44, "learning_rate": 0.001, "loss": 3.2856, "step": 12720 }, { "epoch": 2.44, "learning_rate": 0.001, "loss": 3.2864, "step": 12732 }, { "epoch": 2.45, "learning_rate": 0.001, "loss": 3.2873, "step": 12744 }, { "epoch": 2.45, "learning_rate": 0.001, "loss": 3.2849, "step": 12756 }, { "epoch": 2.45, "learning_rate": 0.001, "loss": 3.2829, "step": 12768 }, { "epoch": 2.45, "learning_rate": 0.001, "loss": 3.2822, "step": 12780 }, { "epoch": 2.46, "learning_rate": 0.001, "loss": 3.2721, "step": 12792 }, { "epoch": 2.46, "learning_rate": 0.001, "loss": 3.2747, "step": 12804 }, { "epoch": 2.46, "learning_rate": 0.001, "loss": 3.2763, "step": 12816 }, { "epoch": 2.46, "learning_rate": 0.001, "loss": 3.2932, "step": 12828 }, { "epoch": 2.47, "learning_rate": 0.001, "loss": 3.2863, "step": 12840 }, { "epoch": 2.47, "learning_rate": 0.001, "loss": 3.2846, "step": 12852 }, { "epoch": 2.47, "learning_rate": 0.001, "loss": 3.2786, "step": 12864 }, { "epoch": 2.47, "learning_rate": 0.001, "loss": 3.2894, "step": 12876 }, { "epoch": 2.47, "learning_rate": 0.001, "loss": 3.2768, "step": 12888 }, { "epoch": 2.48, "learning_rate": 0.001, "loss": 3.2748, "step": 12900 }, { "epoch": 2.48, "learning_rate": 0.001, "loss": 3.2761, "step": 12912 }, { "epoch": 2.48, "learning_rate": 0.001, "loss": 3.282, "step": 12924 }, { "epoch": 2.48, "learning_rate": 0.001, "loss": 3.2828, "step": 12936 }, { "epoch": 2.49, "learning_rate": 0.001, "loss": 3.2652, "step": 12948 }, { "epoch": 2.49, "learning_rate": 0.001, "loss": 3.2758, "step": 12960 }, { "epoch": 2.49, "learning_rate": 0.001, "loss": 3.2806, "step": 12972 }, { "epoch": 2.49, "learning_rate": 0.001, "loss": 3.2714, "step": 12984 }, { "epoch": 2.5, "learning_rate": 0.001, "loss": 3.264, "step": 12996 }, { "epoch": 2.5, "learning_rate": 0.001, "loss": 3.2716, "step": 13008 }, { "epoch": 2.5, "learning_rate": 0.001, "loss": 3.2926, "step": 13020 }, { "epoch": 2.5, "learning_rate": 0.001, "loss": 3.2747, "step": 13032 }, { "epoch": 2.5, "learning_rate": 0.001, "loss": 3.2642, "step": 13044 }, { "epoch": 2.51, "learning_rate": 0.001, "loss": 3.2794, "step": 13056 }, { "epoch": 2.51, "learning_rate": 0.001, "loss": 3.2818, "step": 13068 }, { "epoch": 2.51, "learning_rate": 0.001, "loss": 3.2701, "step": 13080 }, { "epoch": 2.51, "learning_rate": 0.001, "loss": 3.2793, "step": 13092 }, { "epoch": 2.52, "learning_rate": 0.001, "loss": 3.2623, "step": 13104 }, { "epoch": 2.52, "learning_rate": 0.001, "loss": 3.2681, "step": 13116 }, { "epoch": 2.52, "eval_ag_news_accuracy": 0.2536875, "eval_ag_news_bleu_score": 2.9449891964169406, "eval_ag_news_bleu_score_sem": 0.11170758560265776, "eval_ag_news_emb_cos_sim": 0.6044542789459229, "eval_ag_news_emb_cos_sim_sem": 0.012544080911511352, "eval_ag_news_emb_top1_equal": 0.0859375, "eval_ag_news_emb_top1_equal_sem": 0.02487009666300537, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.407440662384033, "eval_ag_news_n_ngrams_match_1": 9.36, "eval_ag_news_n_ngrams_match_2": 1.544, "eval_ag_news_n_ngrams_match_3": 0.362, "eval_ag_news_num_pred_words": 43.174, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 82.05917738298486, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.22583801754747374, "eval_ag_news_runtime": 15.3408, "eval_ag_news_samples_per_second": 32.593, "eval_ag_news_steps_per_second": 0.065, "eval_ag_news_token_set_f1": 0.25504214343372367, "eval_ag_news_token_set_f1_sem": 0.004312547375032933, "eval_ag_news_token_set_precision": 0.2148031596987005, "eval_ag_news_token_set_recall": 0.34836961628933527, "eval_ag_news_true_num_tokens": 56.09375, "step": 13125 }, { "epoch": 2.52, "eval_anthropic_toxic_prompts_accuracy": 0.077875, "eval_anthropic_toxic_prompts_bleu_score": 1.751248530201478, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06713836266353478, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4631584584712982, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012515903109103787, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.03125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015439349450344106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.127745151519775, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.592, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.696, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.17, "eval_anthropic_toxic_prompts_num_pred_words": 42.91, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 62.03787908789136, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.12692583998457163, "eval_anthropic_toxic_prompts_runtime": 10.2293, "eval_anthropic_toxic_prompts_samples_per_second": 48.879, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.2325835861480348, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005848272577870726, "eval_anthropic_toxic_prompts_token_set_precision": 0.23768760026944197, "eval_anthropic_toxic_prompts_token_set_recall": 0.2732707689420043, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 13125 }, { "epoch": 2.52, "eval_arxiv_accuracy": 0.28484375, "eval_arxiv_bleu_score": 2.5811135225852504, "eval_arxiv_bleu_score_sem": 0.07618768579731194, "eval_arxiv_emb_cos_sim": 0.5199321508407593, "eval_arxiv_emb_cos_sim_sem": 0.008200699341404919, "eval_arxiv_emb_top1_equal": 0.171875, "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.23995304107666, "eval_arxiv_n_ngrams_match_1": 9.88, "eval_arxiv_n_ngrams_match_2": 1.476, "eval_arxiv_n_ngrams_match_3": 0.226, "eval_arxiv_num_pred_words": 37.59, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 69.40459259728735, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.23090852795615785, "eval_arxiv_runtime": 10.4624, "eval_arxiv_samples_per_second": 47.79, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.238395627428193, "eval_arxiv_token_set_f1_sem": 0.0039886683522581765, "eval_arxiv_token_set_precision": 0.1773076603557147, "eval_arxiv_token_set_recall": 0.40447644208248185, "eval_arxiv_true_num_tokens": 64.0, "step": 13125 }, { "epoch": 2.52, "eval_python_code_alpaca_accuracy": 0.1119375, "eval_python_code_alpaca_bleu_score": 2.561315340916709, "eval_python_code_alpaca_bleu_score_sem": 0.08306839700716231, "eval_python_code_alpaca_emb_cos_sim": 0.42342567443847656, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009762105077109406, "eval_python_code_alpaca_emb_top1_equal": 0.0234375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.990710973739624, "eval_python_code_alpaca_n_ngrams_match_1": 5.514, "eval_python_code_alpaca_n_ngrams_match_2": 0.91, "eval_python_code_alpaca_n_ngrams_match_3": 0.224, "eval_python_code_alpaca_num_pred_words": 35.608, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 54.09333463533215, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.19147301206757505, "eval_python_code_alpaca_runtime": 9.9754, "eval_python_code_alpaca_samples_per_second": 50.123, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.30585590490609965, "eval_python_code_alpaca_token_set_f1_sem": 0.005042719737604739, "eval_python_code_alpaca_token_set_precision": 0.2745375740186613, "eval_python_code_alpaca_token_set_recall": 0.3981217515573972, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 13125 }, { "epoch": 2.52, "eval_wikibio_accuracy": 0.26803125, "eval_wikibio_bleu_score": 4.323228286961814, "eval_wikibio_bleu_score_sem": 0.17612224389306505, "eval_wikibio_emb_cos_sim": 0.6059797406196594, "eval_wikibio_emb_cos_sim_sem": 0.012709243874113417, "eval_wikibio_emb_top1_equal": 0.09375, "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.592751979827881, "eval_wikibio_n_ngrams_match_1": 8.424, "eval_wikibio_n_ngrams_match_2": 2.436, "eval_wikibio_n_ngrams_match_3": 0.786, "eval_wikibio_num_pred_words": 35.794, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 98.76585815779272, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2725388227904081, "eval_wikibio_runtime": 10.3485, "eval_wikibio_samples_per_second": 48.316, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.2719072575997325, "eval_wikibio_token_set_f1_sem": 0.00600774684149003, "eval_wikibio_token_set_precision": 0.26944509606780204, "eval_wikibio_token_set_recall": 0.29515541713185645, "eval_wikibio_true_num_tokens": 61.1328125, "step": 13125 }, { "epoch": 2.52, "eval_nq_accuracy": 0.4376875, "eval_nq_bleu_score": 6.738113226507277, "eval_nq_bleu_score_sem": 0.30399523897515385, "eval_nq_emb_cos_sim": 0.6823140978813171, "eval_nq_emb_cos_sim_sem": 0.010425863281961824, "eval_nq_emb_top1_equal": 0.1484375, "eval_nq_emb_top1_equal_sem": 0.031548465007086954, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.915091037750244, "eval_nq_n_ngrams_match_1": 17.774, "eval_nq_n_ngrams_match_2": 5.142, "eval_nq_n_ngrams_match_3": 1.908, "eval_nq_num_pred_words": 47.11, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 18.450492017575765, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.33280402860050506, "eval_nq_runtime": 10.6217, "eval_nq_samples_per_second": 47.073, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.3666457291777285, "eval_nq_token_set_f1_sem": 0.004688051975408419, "eval_nq_token_set_precision": 0.30308058586002634, "eval_nq_token_set_recall": 0.486597131621353, "eval_nq_true_num_tokens": 64.0, "step": 13125 }, { "epoch": 2.52, "learning_rate": 0.001, "loss": 3.2737, "step": 13128 }, { "epoch": 2.52, "learning_rate": 0.001, "loss": 3.2603, "step": 13140 }, { "epoch": 2.53, "learning_rate": 0.001, "loss": 3.2597, "step": 13152 }, { "epoch": 2.53, "learning_rate": 0.001, "loss": 3.2711, "step": 13164 }, { "epoch": 2.53, "learning_rate": 0.001, "loss": 3.2802, "step": 13176 }, { "epoch": 2.53, "learning_rate": 0.001, "loss": 3.272, "step": 13188 }, { "epoch": 2.53, "learning_rate": 0.001, "loss": 3.2725, "step": 13200 }, { "epoch": 2.54, "learning_rate": 0.001, "loss": 3.2786, "step": 13212 }, { "epoch": 2.54, "learning_rate": 0.001, "loss": 3.2792, "step": 13224 }, { "epoch": 2.54, "learning_rate": 0.001, "loss": 3.272, "step": 13236 }, { "epoch": 2.54, "learning_rate": 0.001, "loss": 3.2692, "step": 13248 }, { "epoch": 2.55, "learning_rate": 0.001, "loss": 3.2652, "step": 13260 }, { "epoch": 2.55, "learning_rate": 0.001, "loss": 3.2718, "step": 13272 }, { "epoch": 2.55, "learning_rate": 0.001, "loss": 3.2621, "step": 13284 }, { "epoch": 2.55, "learning_rate": 0.001, "loss": 3.2689, "step": 13296 }, { "epoch": 2.56, "learning_rate": 0.001, "loss": 3.2621, "step": 13308 }, { "epoch": 2.56, "learning_rate": 0.001, "loss": 3.2754, "step": 13320 }, { "epoch": 2.56, "learning_rate": 0.001, "loss": 3.2726, "step": 13332 }, { "epoch": 2.56, "learning_rate": 0.001, "loss": 3.2598, "step": 13344 }, { "epoch": 2.56, "learning_rate": 0.001, "loss": 3.2694, "step": 13356 }, { "epoch": 2.57, "learning_rate": 0.001, "loss": 3.2706, "step": 13368 }, { "epoch": 2.57, "learning_rate": 0.001, "loss": 3.2583, "step": 13380 }, { "epoch": 2.57, "learning_rate": 0.001, "loss": 3.2653, "step": 13392 }, { "epoch": 2.57, "learning_rate": 0.001, "loss": 3.26, "step": 13404 }, { "epoch": 2.58, "learning_rate": 0.001, "loss": 3.2639, "step": 13416 }, { "epoch": 2.58, "learning_rate": 0.001, "loss": 3.2541, "step": 13428 }, { "epoch": 2.58, "learning_rate": 0.001, "loss": 3.2659, "step": 13440 }, { "epoch": 2.58, "learning_rate": 0.001, "loss": 3.2545, "step": 13452 }, { "epoch": 2.59, "learning_rate": 0.001, "loss": 3.2692, "step": 13464 }, { "epoch": 2.59, "learning_rate": 0.001, "loss": 3.2641, "step": 13476 }, { "epoch": 2.59, "learning_rate": 0.001, "loss": 3.2666, "step": 13488 }, { "epoch": 2.59, "learning_rate": 0.001, "loss": 3.2633, "step": 13500 }, { "epoch": 2.59, "learning_rate": 0.001, "loss": 3.2579, "step": 13512 }, { "epoch": 2.6, "learning_rate": 0.001, "loss": 3.261, "step": 13524 }, { "epoch": 2.6, "learning_rate": 0.001, "loss": 3.251, "step": 13536 }, { "epoch": 2.6, "learning_rate": 0.001, "loss": 3.2652, "step": 13548 }, { "epoch": 2.6, "learning_rate": 0.001, "loss": 3.2574, "step": 13560 }, { "epoch": 2.61, "learning_rate": 0.001, "loss": 3.2446, "step": 13572 }, { "epoch": 2.61, "learning_rate": 0.001, "loss": 3.2615, "step": 13584 }, { "epoch": 2.61, "learning_rate": 0.001, "loss": 3.2528, "step": 13596 }, { "epoch": 2.61, "learning_rate": 0.001, "loss": 3.2607, "step": 13608 }, { "epoch": 2.62, "learning_rate": 0.001, "loss": 3.255, "step": 13620 }, { "epoch": 2.62, "learning_rate": 0.001, "loss": 3.2528, "step": 13632 }, { "epoch": 2.62, "learning_rate": 0.001, "loss": 3.2607, "step": 13644 }, { "epoch": 2.62, "learning_rate": 0.001, "loss": 3.2503, "step": 13656 }, { "epoch": 2.62, "learning_rate": 0.001, "loss": 3.2505, "step": 13668 }, { "epoch": 2.63, "learning_rate": 0.001, "loss": 3.2545, "step": 13680 }, { "epoch": 2.63, "learning_rate": 0.001, "loss": 3.2544, "step": 13692 }, { "epoch": 2.63, "learning_rate": 0.001, "loss": 3.2668, "step": 13704 }, { "epoch": 2.63, "learning_rate": 0.001, "loss": 3.2631, "step": 13716 }, { "epoch": 2.64, "learning_rate": 0.001, "loss": 3.242, "step": 13728 }, { "epoch": 2.64, "learning_rate": 0.001, "loss": 3.2561, "step": 13740 }, { "epoch": 2.64, "eval_ag_news_accuracy": 0.2550625, "eval_ag_news_bleu_score": 3.0595852539348525, "eval_ag_news_bleu_score_sem": 0.10125286244043642, "eval_ag_news_emb_cos_sim": 0.6164160966873169, "eval_ag_news_emb_cos_sim_sem": 0.011056885897866929, "eval_ag_news_emb_top1_equal": 0.125, "eval_ag_news_emb_top1_equal_sem": 0.02934655822437397, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.388191223144531, "eval_ag_news_n_ngrams_match_1": 9.738, "eval_ag_news_n_ngrams_match_2": 1.69, "eval_ag_news_n_ngrams_match_3": 0.416, "eval_ag_news_num_pred_words": 45.606, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 80.49469029208058, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.22959446391039756, "eval_ag_news_runtime": 11.6257, "eval_ag_news_samples_per_second": 43.008, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.26311258895517087, "eval_ag_news_token_set_f1_sem": 0.004360061114056252, "eval_ag_news_token_set_precision": 0.2244133253321986, "eval_ag_news_token_set_recall": 0.3514068328125633, "eval_ag_news_true_num_tokens": 56.09375, "step": 13750 }, { "epoch": 2.64, "eval_anthropic_toxic_prompts_accuracy": 0.0798125, "eval_anthropic_toxic_prompts_bleu_score": 1.7996500465751293, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.06931926454336522, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.48772984743118286, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.013242475264761121, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0234375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.013424676090873717, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.096881866455078, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.838, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.848, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.246, "eval_anthropic_toxic_prompts_num_pred_words": 45.678, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 60.15243155481487, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.13198045152000265, "eval_anthropic_toxic_prompts_runtime": 11.7437, "eval_anthropic_toxic_prompts_samples_per_second": 42.576, "eval_anthropic_toxic_prompts_steps_per_second": 0.085, "eval_anthropic_toxic_prompts_token_set_f1": 0.2450321035635283, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006188998623815163, "eval_anthropic_toxic_prompts_token_set_precision": 0.2594393760445298, "eval_anthropic_toxic_prompts_token_set_recall": 0.27603164540936503, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 13750 }, { "epoch": 2.64, "eval_arxiv_accuracy": 0.287, "eval_arxiv_bleu_score": 2.632543308552264, "eval_arxiv_bleu_score_sem": 0.07272150154183635, "eval_arxiv_emb_cos_sim": 0.5344278216362, "eval_arxiv_emb_cos_sim_sem": 0.008885583654073423, "eval_arxiv_emb_top1_equal": 0.1953125, "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.203517436981201, "eval_arxiv_n_ngrams_match_1": 10.338, "eval_arxiv_n_ngrams_match_2": 1.5, "eval_arxiv_n_ngrams_match_3": 0.22, "eval_arxiv_num_pred_words": 38.834, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 66.92130902572652, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.23994740816249588, "eval_arxiv_runtime": 12.495, "eval_arxiv_samples_per_second": 40.016, "eval_arxiv_steps_per_second": 0.08, "eval_arxiv_token_set_f1": 0.2490406751942718, "eval_arxiv_token_set_f1_sem": 0.0038308698796611497, "eval_arxiv_token_set_precision": 0.18654490094102041, "eval_arxiv_token_set_recall": 0.4134002024260476, "eval_arxiv_true_num_tokens": 64.0, "step": 13750 }, { "epoch": 2.64, "eval_python_code_alpaca_accuracy": 0.11378125, "eval_python_code_alpaca_bleu_score": 2.5490362239244195, "eval_python_code_alpaca_bleu_score_sem": 0.08137361657241758, "eval_python_code_alpaca_emb_cos_sim": 0.4182761609554291, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010729133564599994, "eval_python_code_alpaca_emb_top1_equal": 0.03125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.015439349450344106, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 4.002032279968262, "eval_python_code_alpaca_n_ngrams_match_1": 5.458, "eval_python_code_alpaca_n_ngrams_match_2": 0.958, "eval_python_code_alpaca_n_ngrams_match_3": 0.222, "eval_python_code_alpaca_num_pred_words": 35.752, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 54.709221585777854, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.18824091787657266, "eval_python_code_alpaca_runtime": 14.4755, "eval_python_code_alpaca_samples_per_second": 34.541, "eval_python_code_alpaca_steps_per_second": 0.069, "eval_python_code_alpaca_token_set_f1": 0.3069438577192991, "eval_python_code_alpaca_token_set_f1_sem": 0.0053490704214093675, "eval_python_code_alpaca_token_set_precision": 0.274700933677545, "eval_python_code_alpaca_token_set_recall": 0.4007708336990634, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 13750 }, { "epoch": 2.64, "eval_wikibio_accuracy": 0.26978125, "eval_wikibio_bleu_score": 4.288860876153392, "eval_wikibio_bleu_score_sem": 0.1662797453557564, "eval_wikibio_emb_cos_sim": 0.629534125328064, "eval_wikibio_emb_cos_sim_sem": 0.013088037358016062, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.626995086669922, "eval_wikibio_n_ngrams_match_1": 8.706, "eval_wikibio_n_ngrams_match_2": 2.566, "eval_wikibio_n_ngrams_match_3": 0.768, "eval_wikibio_num_pred_words": 36.79, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 102.20648059507401, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.28293588694026894, "eval_wikibio_runtime": 15.3486, "eval_wikibio_samples_per_second": 32.576, "eval_wikibio_steps_per_second": 0.065, "eval_wikibio_token_set_f1": 0.2771370476635763, "eval_wikibio_token_set_f1_sem": 0.005909443023008455, "eval_wikibio_token_set_precision": 0.2793071172081303, "eval_wikibio_token_set_recall": 0.29145766768540515, "eval_wikibio_true_num_tokens": 61.1328125, "step": 13750 }, { "epoch": 2.64, "eval_nq_accuracy": 0.441375, "eval_nq_bleu_score": 6.936821043395195, "eval_nq_bleu_score_sem": 0.30734579894337744, "eval_nq_emb_cos_sim": 0.6749643087387085, "eval_nq_emb_cos_sim_sem": 0.011556900303531984, "eval_nq_emb_top1_equal": 0.1640625, "eval_nq_emb_top1_equal_sem": 0.03286167651298939, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.8895976543426514, "eval_nq_n_ngrams_match_1": 17.95, "eval_nq_n_ngrams_match_2": 5.266, "eval_nq_n_ngrams_match_3": 1.988, "eval_nq_num_pred_words": 48.37, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 17.98607152777222, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3366740775514444, "eval_nq_runtime": 13.0683, "eval_nq_samples_per_second": 38.26, "eval_nq_steps_per_second": 0.077, "eval_nq_token_set_f1": 0.3699267581375482, "eval_nq_token_set_f1_sem": 0.0047740210719885885, "eval_nq_token_set_precision": 0.30951190839774545, "eval_nq_token_set_recall": 0.483800685180859, "eval_nq_true_num_tokens": 64.0, "step": 13750 }, { "epoch": 2.64, "learning_rate": 0.001, "loss": 3.2637, "step": 13752 }, { "epoch": 2.64, "learning_rate": 0.001, "loss": 3.2672, "step": 13764 }, { "epoch": 2.65, "learning_rate": 0.001, "loss": 3.2434, "step": 13776 }, { "epoch": 2.65, "learning_rate": 0.001, "loss": 3.2529, "step": 13788 }, { "epoch": 2.65, "learning_rate": 0.001, "loss": 3.2508, "step": 13800 }, { "epoch": 2.65, "learning_rate": 0.001, "loss": 3.2417, "step": 13812 }, { "epoch": 2.65, "learning_rate": 0.001, "loss": 3.2503, "step": 13824 }, { "epoch": 2.66, "learning_rate": 0.001, "loss": 3.245, "step": 13836 }, { "epoch": 2.66, "learning_rate": 0.001, "loss": 3.2398, "step": 13848 }, { "epoch": 2.66, "learning_rate": 0.001, "loss": 3.2501, "step": 13860 }, { "epoch": 2.66, "learning_rate": 0.001, "loss": 3.2507, "step": 13872 }, { "epoch": 2.67, "learning_rate": 0.001, "loss": 3.2506, "step": 13884 }, { "epoch": 2.67, "learning_rate": 0.001, "loss": 3.2511, "step": 13896 }, { "epoch": 2.67, "learning_rate": 0.001, "loss": 3.2521, "step": 13908 }, { "epoch": 2.67, "learning_rate": 0.001, "loss": 3.2431, "step": 13920 }, { "epoch": 2.68, "learning_rate": 0.001, "loss": 3.2436, "step": 13932 }, { "epoch": 2.68, "learning_rate": 0.001, "loss": 3.2263, "step": 13944 }, { "epoch": 2.68, "learning_rate": 0.001, "loss": 3.2548, "step": 13956 }, { "epoch": 2.68, "learning_rate": 0.001, "loss": 3.2387, "step": 13968 }, { "epoch": 2.68, "learning_rate": 0.001, "loss": 3.247, "step": 13980 }, { "epoch": 2.69, "learning_rate": 0.001, "loss": 3.246, "step": 13992 }, { "epoch": 2.69, "learning_rate": 0.001, "loss": 3.2473, "step": 14004 }, { "epoch": 2.69, "learning_rate": 0.001, "loss": 3.249, "step": 14016 }, { "epoch": 2.69, "learning_rate": 0.001, "loss": 3.2356, "step": 14028 }, { "epoch": 2.7, "learning_rate": 0.001, "loss": 3.2423, "step": 14040 }, { "epoch": 2.7, "learning_rate": 0.001, "loss": 3.2445, "step": 14052 }, { "epoch": 2.7, "learning_rate": 0.001, "loss": 3.2361, "step": 14064 }, { "epoch": 2.7, "learning_rate": 0.001, "loss": 3.2529, "step": 14076 }, { "epoch": 2.71, "learning_rate": 0.001, "loss": 3.2334, "step": 14088 }, { "epoch": 2.71, "learning_rate": 0.001, "loss": 3.2309, "step": 14100 }, { "epoch": 2.71, "learning_rate": 0.001, "loss": 3.2429, "step": 14112 }, { "epoch": 2.71, "learning_rate": 0.001, "loss": 3.2419, "step": 14124 }, { "epoch": 2.71, "learning_rate": 0.001, "loss": 3.2352, "step": 14136 }, { "epoch": 2.72, "learning_rate": 0.001, "loss": 3.2317, "step": 14148 }, { "epoch": 2.72, "learning_rate": 0.001, "loss": 3.2368, "step": 14160 }, { "epoch": 2.72, "learning_rate": 0.001, "loss": 3.2424, "step": 14172 }, { "epoch": 2.72, "learning_rate": 0.001, "loss": 3.2388, "step": 14184 }, { "epoch": 2.73, "learning_rate": 0.001, "loss": 3.2367, "step": 14196 }, { "epoch": 2.73, "learning_rate": 0.001, "loss": 3.2314, "step": 14208 }, { "epoch": 2.73, "learning_rate": 0.001, "loss": 3.2157, "step": 14220 }, { "epoch": 2.73, "learning_rate": 0.001, "loss": 3.2254, "step": 14232 }, { "epoch": 2.74, "learning_rate": 0.001, "loss": 3.2334, "step": 14244 }, { "epoch": 2.74, "learning_rate": 0.001, "loss": 3.2297, "step": 14256 }, { "epoch": 2.74, "learning_rate": 0.001, "loss": 3.2353, "step": 14268 }, { "epoch": 2.74, "learning_rate": 0.001, "loss": 3.2344, "step": 14280 }, { "epoch": 2.74, "learning_rate": 0.001, "loss": 3.2369, "step": 14292 }, { "epoch": 2.75, "learning_rate": 0.001, "loss": 3.2255, "step": 14304 }, { "epoch": 2.75, "learning_rate": 0.001, "loss": 3.2334, "step": 14316 }, { "epoch": 2.75, "learning_rate": 0.001, "loss": 3.2351, "step": 14328 }, { "epoch": 2.75, "learning_rate": 0.001, "loss": 3.2212, "step": 14340 }, { "epoch": 2.76, "learning_rate": 0.001, "loss": 3.2223, "step": 14352 }, { "epoch": 2.76, "learning_rate": 0.001, "loss": 3.2268, "step": 14364 }, { "epoch": 2.76, "eval_ag_news_accuracy": 0.25671875, "eval_ag_news_bleu_score": 2.9228933524795804, "eval_ag_news_bleu_score_sem": 0.09653019308025049, "eval_ag_news_emb_cos_sim": 0.6249127388000488, "eval_ag_news_emb_cos_sim_sem": 0.01130702612669007, "eval_ag_news_emb_top1_equal": 0.109375, "eval_ag_news_emb_top1_equal_sem": 0.027695207821224692, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.3742289543151855, "eval_ag_news_n_ngrams_match_1": 9.716, "eval_ag_news_n_ngrams_match_2": 1.65, "eval_ag_news_n_ngrams_match_3": 0.366, "eval_ag_news_num_pred_words": 44.996, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 79.37861141465126, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.22996264353717513, "eval_ag_news_runtime": 11.1257, "eval_ag_news_samples_per_second": 44.941, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.26167208160282823, "eval_ag_news_token_set_f1_sem": 0.0041832893715841825, "eval_ag_news_token_set_precision": 0.22363620007574808, "eval_ag_news_token_set_recall": 0.34746237249697737, "eval_ag_news_true_num_tokens": 56.09375, "step": 14375 }, { "epoch": 2.76, "eval_anthropic_toxic_prompts_accuracy": 0.08171875, "eval_anthropic_toxic_prompts_bleu_score": 1.8160183436173238, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0799183565327385, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.4794783592224121, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011927572616562186, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.007812499866294757, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.034400463104248, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.716, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.812, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.228, "eval_anthropic_toxic_prompts_num_pred_words": 45.804, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 56.50903083143656, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1293065098026931, "eval_anthropic_toxic_prompts_runtime": 10.1683, "eval_anthropic_toxic_prompts_samples_per_second": 49.173, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.23886608170548068, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005991189123483841, "eval_anthropic_toxic_prompts_token_set_precision": 0.2483372762205062, "eval_anthropic_toxic_prompts_token_set_recall": 0.2699679507572635, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 14375 }, { "epoch": 2.76, "eval_arxiv_accuracy": 0.2880625, "eval_arxiv_bleu_score": 2.5976802419398055, "eval_arxiv_bleu_score_sem": 0.07470704319548123, "eval_arxiv_emb_cos_sim": 0.5267419219017029, "eval_arxiv_emb_cos_sim_sem": 0.010388814226487224, "eval_arxiv_emb_top1_equal": 0.15625, "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.1842041015625, "eval_arxiv_n_ngrams_match_1": 9.898, "eval_arxiv_n_ngrams_match_2": 1.492, "eval_arxiv_n_ngrams_match_3": 0.218, "eval_arxiv_num_pred_words": 37.536, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 65.64123636340126, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.23095370274952293, "eval_arxiv_runtime": 11.0414, "eval_arxiv_samples_per_second": 45.284, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.23907810694460827, "eval_arxiv_token_set_f1_sem": 0.004133082865537206, "eval_arxiv_token_set_precision": 0.17881862594052503, "eval_arxiv_token_set_recall": 0.3997951484317853, "eval_arxiv_true_num_tokens": 64.0, "step": 14375 }, { "epoch": 2.76, "eval_python_code_alpaca_accuracy": 0.111875, "eval_python_code_alpaca_bleu_score": 2.3207171005376215, "eval_python_code_alpaca_bleu_score_sem": 0.07783894784708693, "eval_python_code_alpaca_emb_cos_sim": 0.42236027121543884, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010205397727212894, "eval_python_code_alpaca_emb_top1_equal": 0.0234375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.9959185123443604, "eval_python_code_alpaca_n_ngrams_match_1": 5.32, "eval_python_code_alpaca_n_ngrams_match_2": 0.822, "eval_python_code_alpaca_n_ngrams_match_3": 0.176, "eval_python_code_alpaca_num_pred_words": 37.104, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 54.375762502464724, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.1815364623681976, "eval_python_code_alpaca_runtime": 11.6638, "eval_python_code_alpaca_samples_per_second": 42.868, "eval_python_code_alpaca_steps_per_second": 0.086, "eval_python_code_alpaca_token_set_f1": 0.2983793633896166, "eval_python_code_alpaca_token_set_f1_sem": 0.005223507485486749, "eval_python_code_alpaca_token_set_precision": 0.26527427441961166, "eval_python_code_alpaca_token_set_recall": 0.38938111330511516, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 14375 }, { "epoch": 2.76, "eval_wikibio_accuracy": 0.2723125, "eval_wikibio_bleu_score": 4.344365844835519, "eval_wikibio_bleu_score_sem": 0.16470560340428783, "eval_wikibio_emb_cos_sim": 0.6137693524360657, "eval_wikibio_emb_cos_sim_sem": 0.01276244505073531, "eval_wikibio_emb_top1_equal": 0.0625, "eval_wikibio_emb_top1_equal_sem": 0.02147948148198014, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.514110088348389, "eval_wikibio_n_ngrams_match_1": 8.652, "eval_wikibio_n_ngrams_match_2": 2.51, "eval_wikibio_n_ngrams_match_3": 0.79, "eval_wikibio_num_pred_words": 36.946, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 91.2962842338586, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.28470175904362316, "eval_wikibio_runtime": 10.714, "eval_wikibio_samples_per_second": 46.668, "eval_wikibio_steps_per_second": 0.093, "eval_wikibio_token_set_f1": 0.2786252165977963, "eval_wikibio_token_set_f1_sem": 0.005705800909168101, "eval_wikibio_token_set_precision": 0.2789999887690485, "eval_wikibio_token_set_recall": 0.2914470857535824, "eval_wikibio_true_num_tokens": 61.1328125, "step": 14375 }, { "epoch": 2.76, "eval_nq_accuracy": 0.44321875, "eval_nq_bleu_score": 7.142864121366401, "eval_nq_bleu_score_sem": 0.3259829282669103, "eval_nq_emb_cos_sim": 0.6839103698730469, "eval_nq_emb_cos_sim_sem": 0.011409034670976592, "eval_nq_emb_top1_equal": 0.1171875, "eval_nq_emb_top1_equal_sem": 0.02854125312152025, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.8703842163085938, "eval_nq_n_ngrams_match_1": 18.118, "eval_nq_n_ngrams_match_2": 5.434, "eval_nq_n_ngrams_match_3": 2.086, "eval_nq_num_pred_words": 48.448, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 17.64379593183887, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3374483407607677, "eval_nq_runtime": 19.6069, "eval_nq_samples_per_second": 25.501, "eval_nq_steps_per_second": 0.051, "eval_nq_token_set_f1": 0.36932619798037547, "eval_nq_token_set_f1_sem": 0.004742159484697793, "eval_nq_token_set_precision": 0.3094702412379923, "eval_nq_token_set_recall": 0.4798212823065362, "eval_nq_true_num_tokens": 64.0, "step": 14375 }, { "epoch": 2.76, "learning_rate": 0.001, "loss": 3.2386, "step": 14376 }, { "epoch": 2.76, "learning_rate": 0.001, "loss": 3.2286, "step": 14388 }, { "epoch": 2.76, "learning_rate": 0.001, "loss": 3.2141, "step": 14400 }, { "epoch": 2.77, "learning_rate": 0.001, "loss": 3.2231, "step": 14412 }, { "epoch": 2.77, "learning_rate": 0.001, "loss": 3.2178, "step": 14424 }, { "epoch": 2.77, "learning_rate": 0.001, "loss": 3.2316, "step": 14436 }, { "epoch": 2.77, "learning_rate": 0.001, "loss": 3.2307, "step": 14448 }, { "epoch": 2.78, "learning_rate": 0.001, "loss": 3.2246, "step": 14460 }, { "epoch": 2.78, "learning_rate": 0.001, "loss": 3.227, "step": 14472 }, { "epoch": 2.78, "learning_rate": 0.001, "loss": 3.2174, "step": 14484 }, { "epoch": 2.78, "learning_rate": 0.001, "loss": 3.2122, "step": 14496 }, { "epoch": 2.79, "learning_rate": 0.001, "loss": 3.2225, "step": 14508 }, { "epoch": 2.79, "learning_rate": 0.001, "loss": 3.2192, "step": 14520 }, { "epoch": 2.79, "learning_rate": 0.001, "loss": 3.2307, "step": 14532 }, { "epoch": 2.79, "learning_rate": 0.001, "loss": 3.221, "step": 14544 }, { "epoch": 2.79, "learning_rate": 0.001, "loss": 3.2284, "step": 14556 }, { "epoch": 2.8, "learning_rate": 0.001, "loss": 3.2116, "step": 14568 }, { "epoch": 2.8, "learning_rate": 0.001, "loss": 3.2036, "step": 14580 }, { "epoch": 2.8, "learning_rate": 0.001, "loss": 3.222, "step": 14592 }, { "epoch": 2.8, "learning_rate": 0.001, "loss": 3.2189, "step": 14604 }, { "epoch": 2.81, "learning_rate": 0.001, "loss": 3.2201, "step": 14616 }, { "epoch": 2.81, "learning_rate": 0.001, "loss": 3.214, "step": 14628 }, { "epoch": 2.81, "learning_rate": 0.001, "loss": 3.2172, "step": 14640 }, { "epoch": 2.81, "learning_rate": 0.001, "loss": 3.2111, "step": 14652 }, { "epoch": 2.82, "learning_rate": 0.001, "loss": 3.2105, "step": 14664 }, { "epoch": 2.82, "learning_rate": 0.001, "loss": 3.2106, "step": 14676 }, { "epoch": 2.82, "learning_rate": 0.001, "loss": 3.2145, "step": 14688 }, { "epoch": 2.82, "learning_rate": 0.001, "loss": 3.2146, "step": 14700 }, { "epoch": 2.82, "learning_rate": 0.001, "loss": 3.2141, "step": 14712 }, { "epoch": 2.83, "learning_rate": 0.001, "loss": 3.2206, "step": 14724 }, { "epoch": 2.83, "learning_rate": 0.001, "loss": 3.2162, "step": 14736 }, { "epoch": 2.83, "learning_rate": 0.001, "loss": 3.2175, "step": 14748 }, { "epoch": 2.83, "learning_rate": 0.001, "loss": 3.2075, "step": 14760 }, { "epoch": 2.84, "learning_rate": 0.001, "loss": 3.2282, "step": 14772 }, { "epoch": 2.84, "learning_rate": 0.001, "loss": 3.2177, "step": 14784 }, { "epoch": 2.84, "learning_rate": 0.001, "loss": 3.2168, "step": 14796 }, { "epoch": 2.84, "learning_rate": 0.001, "loss": 3.211, "step": 14808 }, { "epoch": 2.85, "learning_rate": 0.001, "loss": 3.221, "step": 14820 }, { "epoch": 2.85, "learning_rate": 0.001, "loss": 3.2264, "step": 14832 }, { "epoch": 2.85, "learning_rate": 0.001, "loss": 3.2003, "step": 14844 }, { "epoch": 2.85, "learning_rate": 0.001, "loss": 3.2169, "step": 14856 }, { "epoch": 2.85, "learning_rate": 0.001, "loss": 3.2082, "step": 14868 }, { "epoch": 2.86, "learning_rate": 0.001, "loss": 3.2176, "step": 14880 }, { "epoch": 2.86, "learning_rate": 0.001, "loss": 3.1955, "step": 14892 }, { "epoch": 2.86, "learning_rate": 0.001, "loss": 3.2079, "step": 14904 }, { "epoch": 2.86, "learning_rate": 0.001, "loss": 3.2158, "step": 14916 }, { "epoch": 2.87, "learning_rate": 0.001, "loss": 3.2051, "step": 14928 }, { "epoch": 2.87, "learning_rate": 0.001, "loss": 3.2095, "step": 14940 }, { "epoch": 2.87, "learning_rate": 0.001, "loss": 3.2042, "step": 14952 }, { "epoch": 2.87, "learning_rate": 0.001, "loss": 3.2064, "step": 14964 }, { "epoch": 2.88, "learning_rate": 0.001, "loss": 3.2085, "step": 14976 }, { "epoch": 2.88, "learning_rate": 0.001, "loss": 3.205, "step": 14988 }, { "epoch": 2.88, "learning_rate": 0.001, "loss": 3.2067, "step": 15000 }, { "epoch": 2.88, "eval_ag_news_accuracy": 0.25921875, "eval_ag_news_bleu_score": 3.1146728829454373, "eval_ag_news_bleu_score_sem": 0.11705073789729323, "eval_ag_news_emb_cos_sim": 0.6343845129013062, "eval_ag_news_emb_cos_sim_sem": 0.01061068182249365, "eval_ag_news_emb_top1_equal": 0.1171875, "eval_ag_news_emb_top1_equal_sem": 0.02854125312152025, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.330489158630371, "eval_ag_news_n_ngrams_match_1": 9.858, "eval_ag_news_n_ngrams_match_2": 1.73, "eval_ag_news_n_ngrams_match_3": 0.418, "eval_ag_news_num_pred_words": 44.542, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 75.98144445970051, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.23708583069714723, "eval_ag_news_runtime": 11.0115, "eval_ag_news_samples_per_second": 45.407, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.26458062076987976, "eval_ag_news_token_set_f1_sem": 0.00436743089470579, "eval_ag_news_token_set_precision": 0.22692343709668275, "eval_ag_news_token_set_recall": 0.3478912418817946, "eval_ag_news_true_num_tokens": 56.09375, "step": 15000 }, { "epoch": 2.88, "eval_anthropic_toxic_prompts_accuracy": 0.0831875, "eval_anthropic_toxic_prompts_bleu_score": 1.9228351405685142, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0778504739820073, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5017146468162537, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012301013450703946, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 4.030600547790527, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.848, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.858, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.254, "eval_anthropic_toxic_prompts_num_pred_words": 44.712, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 56.29470876056559, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.13774657993685357, "eval_anthropic_toxic_prompts_runtime": 10.4249, "eval_anthropic_toxic_prompts_samples_per_second": 47.962, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.23852053641150797, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0058289375314690695, "eval_anthropic_toxic_prompts_token_set_precision": 0.25924826553107483, "eval_anthropic_toxic_prompts_token_set_recall": 0.2587102409923538, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 15000 }, { "epoch": 2.88, "eval_arxiv_accuracy": 0.288625, "eval_arxiv_bleu_score": 2.6195801248021557, "eval_arxiv_bleu_score_sem": 0.08564188071800778, "eval_arxiv_emb_cos_sim": 0.5326679348945618, "eval_arxiv_emb_cos_sim_sem": 0.009802819591315437, "eval_arxiv_emb_top1_equal": 0.15625, "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.175990581512451, "eval_arxiv_n_ngrams_match_1": 10.0, "eval_arxiv_n_ngrams_match_2": 1.562, "eval_arxiv_n_ngrams_match_3": 0.238, "eval_arxiv_num_pred_words": 37.344, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 65.10429884452482, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.23304834470136332, "eval_arxiv_runtime": 11.1299, "eval_arxiv_samples_per_second": 44.924, "eval_arxiv_steps_per_second": 0.09, "eval_arxiv_token_set_f1": 0.23857942787074587, "eval_arxiv_token_set_f1_sem": 0.004409098720889074, "eval_arxiv_token_set_precision": 0.1782094588867365, "eval_arxiv_token_set_recall": 0.4031503223975969, "eval_arxiv_true_num_tokens": 64.0, "step": 15000 }, { "epoch": 2.88, "eval_python_code_alpaca_accuracy": 0.1154375, "eval_python_code_alpaca_bleu_score": 2.602421954645876, "eval_python_code_alpaca_bleu_score_sem": 0.08119990895067912, "eval_python_code_alpaca_emb_cos_sim": 0.4306205213069916, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009464545033899954, "eval_python_code_alpaca_emb_top1_equal": 0.03125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.015439349450344106, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.8729326725006104, "eval_python_code_alpaca_n_ngrams_match_1": 5.614, "eval_python_code_alpaca_n_ngrams_match_2": 1.004, "eval_python_code_alpaca_n_ngrams_match_3": 0.236, "eval_python_code_alpaca_num_pred_words": 36.152, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 48.08319176565465, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.1942210687228555, "eval_python_code_alpaca_runtime": 10.6812, "eval_python_code_alpaca_samples_per_second": 46.811, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.3129188630928717, "eval_python_code_alpaca_token_set_f1_sem": 0.00527457308292601, "eval_python_code_alpaca_token_set_precision": 0.2849465040152018, "eval_python_code_alpaca_token_set_recall": 0.3965390098016386, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 15000 }, { "epoch": 2.88, "eval_wikibio_accuracy": 0.2720625, "eval_wikibio_bleu_score": 4.657730721407051, "eval_wikibio_bleu_score_sem": 0.19389034286767964, "eval_wikibio_emb_cos_sim": 0.6262930631637573, "eval_wikibio_emb_cos_sim_sem": 0.01195884557815727, "eval_wikibio_emb_top1_equal": 0.1171875, "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.540160655975342, "eval_wikibio_n_ngrams_match_1": 8.852, "eval_wikibio_n_ngrams_match_2": 2.612, "eval_wikibio_n_ngrams_match_3": 0.84, "eval_wikibio_num_pred_words": 36.068, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 93.70585331550829, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2899552860106226, "eval_wikibio_runtime": 10.6219, "eval_wikibio_samples_per_second": 47.073, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.2817308915432452, "eval_wikibio_token_set_f1_sem": 0.005963242266797947, "eval_wikibio_token_set_precision": 0.28220718950235946, "eval_wikibio_token_set_recall": 0.2949596795865249, "eval_wikibio_true_num_tokens": 61.1328125, "step": 15000 }, { "epoch": 2.88, "eval_nq_accuracy": 0.44459375, "eval_nq_bleu_score": 7.449978514961194, "eval_nq_bleu_score_sem": 0.32287563581044465, "eval_nq_emb_cos_sim": 0.6952831745147705, "eval_nq_emb_cos_sim_sem": 0.011135441038046156, "eval_nq_emb_top1_equal": 0.1640625, "eval_nq_emb_top1_equal_sem": 0.03286167651298939, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.845280170440674, "eval_nq_n_ngrams_match_1": 18.454, "eval_nq_n_ngrams_match_2": 5.65, "eval_nq_n_ngrams_match_3": 2.206, "eval_nq_num_pred_words": 48.112, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 17.20637871238331, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3457408374951795, "eval_nq_runtime": 11.4399, "eval_nq_samples_per_second": 43.706, "eval_nq_steps_per_second": 0.087, "eval_nq_token_set_f1": 0.37587631562377505, "eval_nq_token_set_f1_sem": 0.004711899644655883, "eval_nq_token_set_precision": 0.3183044542889795, "eval_nq_token_set_recall": 0.4760862717565977, "eval_nq_true_num_tokens": 64.0, "step": 15000 }, { "epoch": 2.88, "learning_rate": 0.001, "loss": 3.2003, "step": 15012 }, { "epoch": 2.88, "learning_rate": 0.001, "loss": 3.206, "step": 15024 }, { "epoch": 2.89, "learning_rate": 0.001, "loss": 3.2072, "step": 15036 }, { "epoch": 2.89, "learning_rate": 0.001, "loss": 3.207, "step": 15048 }, { "epoch": 2.89, "learning_rate": 0.001, "loss": 3.2047, "step": 15060 }, { "epoch": 2.89, "learning_rate": 0.001, "loss": 3.2034, "step": 15072 }, { "epoch": 2.9, "learning_rate": 0.001, "loss": 3.2035, "step": 15084 }, { "epoch": 2.9, "learning_rate": 0.001, "loss": 3.2006, "step": 15096 }, { "epoch": 2.9, "learning_rate": 0.001, "loss": 3.2006, "step": 15108 }, { "epoch": 2.9, "learning_rate": 0.001, "loss": 3.2184, "step": 15120 }, { "epoch": 2.91, "learning_rate": 0.001, "loss": 3.2001, "step": 15132 }, { "epoch": 2.91, "learning_rate": 0.001, "loss": 3.1975, "step": 15144 }, { "epoch": 2.91, "learning_rate": 0.001, "loss": 3.2084, "step": 15156 }, { "epoch": 2.91, "learning_rate": 0.001, "loss": 3.2001, "step": 15168 }, { "epoch": 2.91, "learning_rate": 0.001, "loss": 3.1977, "step": 15180 }, { "epoch": 2.92, "learning_rate": 0.001, "loss": 3.1937, "step": 15192 }, { "epoch": 2.92, "learning_rate": 0.001, "loss": 3.1947, "step": 15204 }, { "epoch": 2.92, "learning_rate": 0.001, "loss": 3.2067, "step": 15216 }, { "epoch": 2.92, "learning_rate": 0.001, "loss": 3.2051, "step": 15228 }, { "epoch": 2.93, "learning_rate": 0.001, "loss": 3.2091, "step": 15240 }, { "epoch": 2.93, "learning_rate": 0.001, "loss": 3.1982, "step": 15252 }, { "epoch": 2.93, "learning_rate": 0.001, "loss": 3.1982, "step": 15264 }, { "epoch": 2.93, "learning_rate": 0.001, "loss": 3.1999, "step": 15276 }, { "epoch": 2.94, "learning_rate": 0.001, "loss": 3.1946, "step": 15288 }, { "epoch": 2.94, "learning_rate": 0.001, "loss": 3.2004, "step": 15300 }, { "epoch": 2.94, "learning_rate": 0.001, "loss": 3.1929, "step": 15312 }, { "epoch": 2.94, "learning_rate": 0.001, "loss": 3.1967, "step": 15324 }, { "epoch": 2.94, "learning_rate": 0.001, "loss": 3.2117, "step": 15336 }, { "epoch": 2.95, "learning_rate": 0.001, "loss": 3.209, "step": 15348 }, { "epoch": 2.95, "learning_rate": 0.001, "loss": 3.1961, "step": 15360 }, { "epoch": 2.95, "learning_rate": 0.001, "loss": 3.192, "step": 15372 }, { "epoch": 2.95, "learning_rate": 0.001, "loss": 3.1932, "step": 15384 }, { "epoch": 2.96, "learning_rate": 0.001, "loss": 3.1985, "step": 15396 }, { "epoch": 2.96, "learning_rate": 0.001, "loss": 3.1908, "step": 15408 }, { "epoch": 2.96, "learning_rate": 0.001, "loss": 3.2049, "step": 15420 }, { "epoch": 2.96, "learning_rate": 0.001, "loss": 3.2007, "step": 15432 }, { "epoch": 2.97, "learning_rate": 0.001, "loss": 3.1823, "step": 15444 }, { "epoch": 2.97, "learning_rate": 0.001, "loss": 3.1852, "step": 15456 }, { "epoch": 2.97, "learning_rate": 0.001, "loss": 3.1955, "step": 15468 }, { "epoch": 2.97, "learning_rate": 0.001, "loss": 3.197, "step": 15480 }, { "epoch": 2.97, "learning_rate": 0.001, "loss": 3.2003, "step": 15492 }, { "epoch": 2.98, "learning_rate": 0.001, "loss": 3.197, "step": 15504 }, { "epoch": 2.98, "learning_rate": 0.001, "loss": 3.191, "step": 15516 }, { "epoch": 2.98, "learning_rate": 0.001, "loss": 3.1927, "step": 15528 }, { "epoch": 2.98, "learning_rate": 0.001, "loss": 3.1963, "step": 15540 }, { "epoch": 2.99, "learning_rate": 0.001, "loss": 3.1868, "step": 15552 }, { "epoch": 2.99, "learning_rate": 0.001, "loss": 3.1884, "step": 15564 }, { "epoch": 2.99, "learning_rate": 0.001, "loss": 3.1942, "step": 15576 }, { "epoch": 2.99, "learning_rate": 0.001, "loss": 3.2021, "step": 15588 }, { "epoch": 3.0, "learning_rate": 0.001, "loss": 3.1847, "step": 15600 }, { "epoch": 3.0, "learning_rate": 0.001, "loss": 3.1873, "step": 15612 }, { "epoch": 3.0, "learning_rate": 0.001, "loss": 3.1711, "step": 15624 }, { "epoch": 3.0, "eval_ag_news_accuracy": 0.2603125, "eval_ag_news_bleu_score": 3.268182348171603, "eval_ag_news_bleu_score_sem": 0.11873960172756134, "eval_ag_news_emb_cos_sim": 0.6460250020027161, "eval_ag_news_emb_cos_sim_sem": 0.011290181875264888, "eval_ag_news_emb_top1_equal": 0.0859375, "eval_ag_news_emb_top1_equal_sem": 0.02487009666300537, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.309754848480225, "eval_ag_news_n_ngrams_match_1": 10.272, "eval_ag_news_n_ngrams_match_2": 1.886, "eval_ag_news_n_ngrams_match_3": 0.466, "eval_ag_news_num_pred_words": 46.156, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 74.42224197807555, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2440984893400114, "eval_ag_news_runtime": 10.637, "eval_ag_news_samples_per_second": 47.006, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.27347343185235684, "eval_ag_news_token_set_f1_sem": 0.004372880692145891, "eval_ag_news_token_set_precision": 0.23585201604261652, "eval_ag_news_token_set_recall": 0.35244043449096946, "eval_ag_news_true_num_tokens": 56.09375, "step": 15625 }, { "epoch": 3.0, "eval_anthropic_toxic_prompts_accuracy": 0.08459375, "eval_anthropic_toxic_prompts_bleu_score": 1.9100364629326811, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07250582354424716, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5081263184547424, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011363577977768578, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.03125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.015439349450344106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.976088285446167, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.004, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.932, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.27, "eval_anthropic_toxic_prompts_num_pred_words": 46.7, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 53.3080997782062, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.13961901513372155, "eval_anthropic_toxic_prompts_runtime": 10.1073, "eval_anthropic_toxic_prompts_samples_per_second": 49.469, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.2496937537485495, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0057900691088243975, "eval_anthropic_toxic_prompts_token_set_precision": 0.27453209078491864, "eval_anthropic_toxic_prompts_token_set_recall": 0.26925618875136736, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 15625 }, { "epoch": 3.0, "eval_arxiv_accuracy": 0.29284375, "eval_arxiv_bleu_score": 2.696982060820797, "eval_arxiv_bleu_score_sem": 0.08248124219938613, "eval_arxiv_emb_cos_sim": 0.5395243167877197, "eval_arxiv_emb_cos_sim_sem": 0.010061520240197845, "eval_arxiv_emb_top1_equal": 0.1796875, "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.148216247558594, "eval_arxiv_n_ngrams_match_1": 10.214, "eval_arxiv_n_ngrams_match_2": 1.606, "eval_arxiv_n_ngrams_match_3": 0.254, "eval_arxiv_num_pred_words": 38.316, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 63.32095060153439, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2366294890814875, "eval_arxiv_runtime": 17.6163, "eval_arxiv_samples_per_second": 28.383, "eval_arxiv_steps_per_second": 0.057, "eval_arxiv_token_set_f1": 0.2455057804067272, "eval_arxiv_token_set_f1_sem": 0.004128954099926646, "eval_arxiv_token_set_precision": 0.18625161758459255, "eval_arxiv_token_set_recall": 0.39564808691839587, "eval_arxiv_true_num_tokens": 64.0, "step": 15625 }, { "epoch": 3.0, "eval_python_code_alpaca_accuracy": 0.11765625, "eval_python_code_alpaca_bleu_score": 2.795027764789367, "eval_python_code_alpaca_bleu_score_sem": 0.08584239624961469, "eval_python_code_alpaca_emb_cos_sim": 0.46135619282722473, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009966544951256578, "eval_python_code_alpaca_emb_top1_equal": 0.0078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.007812499866294757, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.8526418209075928, "eval_python_code_alpaca_n_ngrams_match_1": 6.012, "eval_python_code_alpaca_n_ngrams_match_2": 1.094, "eval_python_code_alpaca_n_ngrams_match_3": 0.278, "eval_python_code_alpaca_num_pred_words": 36.364, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 47.1173746204497, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2102465222751838, "eval_python_code_alpaca_runtime": 9.8792, "eval_python_code_alpaca_samples_per_second": 50.612, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.3280570871663888, "eval_python_code_alpaca_token_set_f1_sem": 0.0049616657410164735, "eval_python_code_alpaca_token_set_precision": 0.30581218258487314, "eval_python_code_alpaca_token_set_recall": 0.3993076825225803, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 15625 }, { "epoch": 3.0, "eval_wikibio_accuracy": 0.26859375, "eval_wikibio_bleu_score": 4.3952367121655245, "eval_wikibio_bleu_score_sem": 0.16632628594435483, "eval_wikibio_emb_cos_sim": 0.6318996548652649, "eval_wikibio_emb_cos_sim_sem": 0.012864224421533055, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.579071521759033, "eval_wikibio_n_ngrams_match_1": 8.778, "eval_wikibio_n_ngrams_match_2": 2.546, "eval_wikibio_n_ngrams_match_3": 0.784, "eval_wikibio_num_pred_words": 36.46, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 97.42389623305286, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2871338496619885, "eval_wikibio_runtime": 10.4046, "eval_wikibio_samples_per_second": 48.056, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.2791240929968217, "eval_wikibio_token_set_f1_sem": 0.005736262864538556, "eval_wikibio_token_set_precision": 0.27854953209470507, "eval_wikibio_token_set_recall": 0.2935683917036409, "eval_wikibio_true_num_tokens": 61.1328125, "step": 15625 }, { "epoch": 3.0, "eval_nq_accuracy": 0.44878125, "eval_nq_bleu_score": 7.387878440285374, "eval_nq_bleu_score_sem": 0.29598220306382333, "eval_nq_emb_cos_sim": 0.7005928754806519, "eval_nq_emb_cos_sim_sem": 0.011368274717199511, "eval_nq_emb_top1_equal": 0.1796875, "eval_nq_emb_top1_equal_sem": 0.034068008879424266, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.824287176132202, "eval_nq_n_ngrams_match_1": 18.572, "eval_nq_n_ngrams_match_2": 5.672, "eval_nq_n_ngrams_match_3": 2.136, "eval_nq_num_pred_words": 48.676, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 16.848930384725364, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3483729051240489, "eval_nq_runtime": 10.8993, "eval_nq_samples_per_second": 45.875, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.377098468533167, "eval_nq_token_set_f1_sem": 0.004663483861263476, "eval_nq_token_set_precision": 0.319404770270989, "eval_nq_token_set_recall": 0.4766853399137281, "eval_nq_true_num_tokens": 64.0, "step": 15625 }, { "epoch": 3.0, "learning_rate": 0.001, "loss": 3.1806, "step": 15636 }, { "epoch": 3.0, "learning_rate": 0.001, "loss": 3.1734, "step": 15648 }, { "epoch": 3.01, "learning_rate": 0.001, "loss": 3.1738, "step": 15660 }, { "epoch": 3.01, "learning_rate": 0.001, "loss": 3.1834, "step": 15672 }, { "epoch": 3.01, "learning_rate": 0.001, "loss": 3.1724, "step": 15684 }, { "epoch": 3.01, "learning_rate": 0.001, "loss": 3.1817, "step": 15696 }, { "epoch": 3.02, "learning_rate": 0.001, "loss": 3.176, "step": 15708 }, { "epoch": 3.02, "learning_rate": 0.001, "loss": 3.1563, "step": 15720 }, { "epoch": 3.02, "learning_rate": 0.001, "loss": 3.1622, "step": 15732 }, { "epoch": 3.02, "learning_rate": 0.001, "loss": 3.1693, "step": 15744 }, { "epoch": 3.03, "learning_rate": 0.001, "loss": 3.1809, "step": 15756 }, { "epoch": 3.03, "learning_rate": 0.001, "loss": 3.1824, "step": 15768 }, { "epoch": 3.03, "learning_rate": 0.001, "loss": 3.1667, "step": 15780 }, { "epoch": 3.03, "learning_rate": 0.001, "loss": 3.1679, "step": 15792 }, { "epoch": 3.03, "learning_rate": 0.001, "loss": 3.1847, "step": 15804 }, { "epoch": 3.04, "learning_rate": 0.001, "loss": 3.1623, "step": 15816 }, { "epoch": 3.04, "learning_rate": 0.001, "loss": 3.1641, "step": 15828 }, { "epoch": 3.04, "learning_rate": 0.001, "loss": 3.1671, "step": 15840 }, { "epoch": 3.04, "learning_rate": 0.001, "loss": 3.1782, "step": 15852 }, { "epoch": 3.05, "learning_rate": 0.001, "loss": 3.1654, "step": 15864 }, { "epoch": 3.05, "learning_rate": 0.001, "loss": 3.1616, "step": 15876 }, { "epoch": 3.05, "learning_rate": 0.001, "loss": 3.1711, "step": 15888 }, { "epoch": 3.05, "learning_rate": 0.001, "loss": 3.1624, "step": 15900 }, { "epoch": 3.06, "learning_rate": 0.001, "loss": 3.1787, "step": 15912 }, { "epoch": 3.06, "learning_rate": 0.001, "loss": 3.1705, "step": 15924 }, { "epoch": 3.06, "learning_rate": 0.001, "loss": 3.1592, "step": 15936 }, { "epoch": 3.06, "learning_rate": 0.001, "loss": 3.1644, "step": 15948 }, { "epoch": 3.06, "learning_rate": 0.001, "loss": 3.1611, "step": 15960 }, { "epoch": 3.07, "learning_rate": 0.001, "loss": 3.1743, "step": 15972 }, { "epoch": 3.07, "learning_rate": 0.001, "loss": 3.1627, "step": 15984 }, { "epoch": 3.07, "learning_rate": 0.001, "loss": 3.1836, "step": 15996 }, { "epoch": 3.07, "learning_rate": 0.001, "loss": 3.1657, "step": 16008 }, { "epoch": 3.08, "learning_rate": 0.001, "loss": 3.159, "step": 16020 }, { "epoch": 3.08, "learning_rate": 0.001, "loss": 3.1646, "step": 16032 }, { "epoch": 3.08, "learning_rate": 0.001, "loss": 3.1568, "step": 16044 }, { "epoch": 3.08, "learning_rate": 0.001, "loss": 3.1673, "step": 16056 }, { "epoch": 3.09, "learning_rate": 0.001, "loss": 3.1648, "step": 16068 }, { "epoch": 3.09, "learning_rate": 0.001, "loss": 3.1641, "step": 16080 }, { "epoch": 3.09, "learning_rate": 0.001, "loss": 3.1565, "step": 16092 }, { "epoch": 3.09, "learning_rate": 0.001, "loss": 3.1479, "step": 16104 }, { "epoch": 3.09, "learning_rate": 0.001, "loss": 3.1622, "step": 16116 }, { "epoch": 3.1, "learning_rate": 0.001, "loss": 3.1675, "step": 16128 }, { "epoch": 3.1, "learning_rate": 0.001, "loss": 3.179, "step": 16140 }, { "epoch": 3.1, "learning_rate": 0.001, "loss": 3.1674, "step": 16152 }, { "epoch": 3.1, "learning_rate": 0.001, "loss": 3.159, "step": 16164 }, { "epoch": 3.11, "learning_rate": 0.001, "loss": 3.1536, "step": 16176 }, { "epoch": 3.11, "learning_rate": 0.001, "loss": 3.1582, "step": 16188 }, { "epoch": 3.11, "learning_rate": 0.001, "loss": 3.1621, "step": 16200 }, { "epoch": 3.11, "learning_rate": 0.001, "loss": 3.1679, "step": 16212 }, { "epoch": 3.12, "learning_rate": 0.001, "loss": 3.158, "step": 16224 }, { "epoch": 3.12, "learning_rate": 0.001, "loss": 3.1478, "step": 16236 }, { "epoch": 3.12, "learning_rate": 0.001, "loss": 3.1548, "step": 16248 }, { "epoch": 3.12, "eval_ag_news_accuracy": 0.2639375, "eval_ag_news_bleu_score": 3.1814057161788645, "eval_ag_news_bleu_score_sem": 0.11700248963789446, "eval_ag_news_emb_cos_sim": 0.6401171088218689, "eval_ag_news_emb_cos_sim_sem": 0.011685395507929573, "eval_ag_news_emb_top1_equal": 0.1015625, "eval_ag_news_emb_top1_equal_sem": 0.026804565886848545, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.290400505065918, "eval_ag_news_n_ngrams_match_1": 9.948, "eval_ag_news_n_ngrams_match_2": 1.774, "eval_ag_news_n_ngrams_match_3": 0.438, "eval_ag_news_num_pred_words": 44.398, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 72.99569779275832, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.23894300331323687, "eval_ag_news_runtime": 10.5754, "eval_ag_news_samples_per_second": 47.28, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.26816691817397154, "eval_ag_news_token_set_f1_sem": 0.004438205105594491, "eval_ag_news_token_set_precision": 0.22896652228378245, "eval_ag_news_token_set_recall": 0.357512889401445, "eval_ag_news_true_num_tokens": 56.09375, "step": 16250 }, { "epoch": 3.12, "eval_anthropic_toxic_prompts_accuracy": 0.08340625, "eval_anthropic_toxic_prompts_bleu_score": 1.936660342823424, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08287166779429654, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5021185874938965, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012252040129823304, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.9689595699310303, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 3.944, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.858, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.246, "eval_anthropic_toxic_prompts_num_pred_words": 44.708, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 52.92943280839483, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1385605902191907, "eval_anthropic_toxic_prompts_runtime": 10.123, "eval_anthropic_toxic_prompts_samples_per_second": 49.392, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.25415028179892357, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005881566021876812, "eval_anthropic_toxic_prompts_token_set_precision": 0.2693754887360548, "eval_anthropic_toxic_prompts_token_set_recall": 0.2811603069470758, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 16250 }, { "epoch": 3.12, "eval_arxiv_accuracy": 0.29425, "eval_arxiv_bleu_score": 2.5639858200829724, "eval_arxiv_bleu_score_sem": 0.07629532420099573, "eval_arxiv_emb_cos_sim": 0.5470216274261475, "eval_arxiv_emb_cos_sim_sem": 0.009226406120190291, "eval_arxiv_emb_top1_equal": 0.203125, "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.116957187652588, "eval_arxiv_n_ngrams_match_1": 9.904, "eval_arxiv_n_ngrams_match_2": 1.54, "eval_arxiv_n_ngrams_match_3": 0.236, "eval_arxiv_num_pred_words": 36.468, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 61.372213732635416, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.23360398366144824, "eval_arxiv_runtime": 10.4672, "eval_arxiv_samples_per_second": 47.768, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.24120323309675257, "eval_arxiv_token_set_f1_sem": 0.004272308361307677, "eval_arxiv_token_set_precision": 0.17836754131634255, "eval_arxiv_token_set_recall": 0.42066364898546915, "eval_arxiv_true_num_tokens": 64.0, "step": 16250 }, { "epoch": 3.12, "eval_python_code_alpaca_accuracy": 0.1181875, "eval_python_code_alpaca_bleu_score": 2.6655154167411714, "eval_python_code_alpaca_bleu_score_sem": 0.08819318507159991, "eval_python_code_alpaca_emb_cos_sim": 0.4437231421470642, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00913315226659289, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.8443946838378906, "eval_python_code_alpaca_n_ngrams_match_1": 5.638, "eval_python_code_alpaca_n_ngrams_match_2": 1.044, "eval_python_code_alpaca_n_ngrams_match_3": 0.264, "eval_python_code_alpaca_num_pred_words": 36.442, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 46.73038912819738, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.19533444208456124, "eval_python_code_alpaca_runtime": 10.6622, "eval_python_code_alpaca_samples_per_second": 46.894, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.3157780856226203, "eval_python_code_alpaca_token_set_f1_sem": 0.005365401957471515, "eval_python_code_alpaca_token_set_precision": 0.28295346829435153, "eval_python_code_alpaca_token_set_recall": 0.408937565343303, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 16250 }, { "epoch": 3.12, "eval_wikibio_accuracy": 0.2725, "eval_wikibio_bleu_score": 4.485603617690607, "eval_wikibio_bleu_score_sem": 0.17564340019764493, "eval_wikibio_emb_cos_sim": 0.6322147846221924, "eval_wikibio_emb_cos_sim_sem": 0.012563671293534822, "eval_wikibio_emb_top1_equal": 0.078125, "eval_wikibio_emb_top1_equal_sem": 0.023813825516515504, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.520080089569092, "eval_wikibio_n_ngrams_match_1": 8.686, "eval_wikibio_n_ngrams_match_2": 2.554, "eval_wikibio_n_ngrams_match_3": 0.8, "eval_wikibio_num_pred_words": 36.01, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 91.84295334616633, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2866793295831782, "eval_wikibio_runtime": 11.7506, "eval_wikibio_samples_per_second": 42.551, "eval_wikibio_steps_per_second": 0.085, "eval_wikibio_token_set_f1": 0.2830691048525941, "eval_wikibio_token_set_f1_sem": 0.005442402733291205, "eval_wikibio_token_set_precision": 0.2795208030179828, "eval_wikibio_token_set_recall": 0.3099275265012032, "eval_wikibio_true_num_tokens": 61.1328125, "step": 16250 }, { "epoch": 3.12, "eval_nq_accuracy": 0.4508125, "eval_nq_bleu_score": 7.534441912378916, "eval_nq_bleu_score_sem": 0.3328234962053897, "eval_nq_emb_cos_sim": 0.7037692070007324, "eval_nq_emb_cos_sim_sem": 0.010573599181249144, "eval_nq_emb_top1_equal": 0.1875, "eval_nq_emb_top1_equal_sem": 0.034634623208270626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.8061931133270264, "eval_nq_n_ngrams_match_1": 18.704, "eval_nq_n_ngrams_match_2": 5.722, "eval_nq_n_ngrams_match_3": 2.24, "eval_nq_num_pred_words": 48.152, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 16.54680634856935, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3485770031977412, "eval_nq_runtime": 11.3821, "eval_nq_samples_per_second": 43.929, "eval_nq_steps_per_second": 0.088, "eval_nq_token_set_f1": 0.382374129631992, "eval_nq_token_set_f1_sem": 0.00479487791611988, "eval_nq_token_set_precision": 0.32179220544616005, "eval_nq_token_set_recall": 0.49008944412534033, "eval_nq_true_num_tokens": 64.0, "step": 16250 }, { "epoch": 3.12, "learning_rate": 0.001, "loss": 3.1707, "step": 16260 }, { "epoch": 3.12, "learning_rate": 0.001, "loss": 3.1629, "step": 16272 }, { "epoch": 3.13, "learning_rate": 0.001, "loss": 3.1492, "step": 16284 }, { "epoch": 3.13, "learning_rate": 0.001, "loss": 3.1674, "step": 16296 }, { "epoch": 3.13, "learning_rate": 0.001, "loss": 3.1614, "step": 16308 }, { "epoch": 3.13, "learning_rate": 0.001, "loss": 3.1557, "step": 16320 }, { "epoch": 3.14, "learning_rate": 0.001, "loss": 3.1557, "step": 16332 }, { "epoch": 3.14, "learning_rate": 0.001, "loss": 3.1664, "step": 16344 }, { "epoch": 3.14, "learning_rate": 0.001, "loss": 3.1599, "step": 16356 }, { "epoch": 3.14, "learning_rate": 0.001, "loss": 3.1541, "step": 16368 }, { "epoch": 3.15, "learning_rate": 0.001, "loss": 3.153, "step": 16380 }, { "epoch": 3.15, "learning_rate": 0.001, "loss": 3.1648, "step": 16392 }, { "epoch": 3.15, "learning_rate": 0.001, "loss": 3.1364, "step": 16404 }, { "epoch": 3.15, "learning_rate": 0.001, "loss": 3.155, "step": 16416 }, { "epoch": 3.15, "learning_rate": 0.001, "loss": 3.1549, "step": 16428 }, { "epoch": 3.16, "learning_rate": 0.001, "loss": 3.149, "step": 16440 }, { "epoch": 3.16, "learning_rate": 0.001, "loss": 3.1526, "step": 16452 }, { "epoch": 3.16, "learning_rate": 0.001, "loss": 3.1595, "step": 16464 }, { "epoch": 3.16, "learning_rate": 0.001, "loss": 3.1534, "step": 16476 }, { "epoch": 3.17, "learning_rate": 0.001, "loss": 3.1485, "step": 16488 }, { "epoch": 3.17, "learning_rate": 0.001, "loss": 3.1539, "step": 16500 }, { "epoch": 3.17, "learning_rate": 0.001, "loss": 3.1537, "step": 16512 }, { "epoch": 3.17, "learning_rate": 0.001, "loss": 3.1413, "step": 16524 }, { "epoch": 3.18, "learning_rate": 0.001, "loss": 3.1461, "step": 16536 }, { "epoch": 3.18, "learning_rate": 0.001, "loss": 3.1507, "step": 16548 }, { "epoch": 3.18, "learning_rate": 0.001, "loss": 3.1534, "step": 16560 }, { "epoch": 3.18, "learning_rate": 0.001, "loss": 3.1528, "step": 16572 }, { "epoch": 3.18, "learning_rate": 0.001, "loss": 3.1506, "step": 16584 }, { "epoch": 3.19, "learning_rate": 0.001, "loss": 3.154, "step": 16596 }, { "epoch": 3.19, "learning_rate": 0.001, "loss": 3.1533, "step": 16608 }, { "epoch": 3.19, "learning_rate": 0.001, "loss": 3.1509, "step": 16620 }, { "epoch": 3.19, "learning_rate": 0.001, "loss": 3.1583, "step": 16632 }, { "epoch": 3.2, "learning_rate": 0.001, "loss": 3.1663, "step": 16644 }, { "epoch": 3.2, "learning_rate": 0.001, "loss": 3.1597, "step": 16656 }, { "epoch": 3.2, "learning_rate": 0.001, "loss": 3.1477, "step": 16668 }, { "epoch": 3.2, "learning_rate": 0.001, "loss": 3.146, "step": 16680 }, { "epoch": 3.21, "learning_rate": 0.001, "loss": 3.1472, "step": 16692 }, { "epoch": 3.21, "learning_rate": 0.001, "loss": 3.1509, "step": 16704 }, { "epoch": 3.21, "learning_rate": 0.001, "loss": 3.151, "step": 16716 }, { "epoch": 3.21, "learning_rate": 0.001, "loss": 3.1417, "step": 16728 }, { "epoch": 3.21, "learning_rate": 0.001, "loss": 3.1468, "step": 16740 }, { "epoch": 3.22, "learning_rate": 0.001, "loss": 3.1415, "step": 16752 }, { "epoch": 3.22, "learning_rate": 0.001, "loss": 3.1549, "step": 16764 }, { "epoch": 3.22, "learning_rate": 0.001, "loss": 3.1339, "step": 16776 }, { "epoch": 3.22, "learning_rate": 0.001, "loss": 3.1514, "step": 16788 }, { "epoch": 3.23, "learning_rate": 0.001, "loss": 3.1544, "step": 16800 }, { "epoch": 3.23, "learning_rate": 0.001, "loss": 3.1501, "step": 16812 }, { "epoch": 3.23, "learning_rate": 0.001, "loss": 3.1483, "step": 16824 }, { "epoch": 3.23, "learning_rate": 0.001, "loss": 3.1459, "step": 16836 }, { "epoch": 3.24, "learning_rate": 0.001, "loss": 3.1415, "step": 16848 }, { "epoch": 3.24, "learning_rate": 0.001, "loss": 3.1496, "step": 16860 }, { "epoch": 3.24, "learning_rate": 0.001, "loss": 3.1354, "step": 16872 }, { "epoch": 3.24, "eval_ag_news_accuracy": 0.266, "eval_ag_news_bleu_score": 3.303569834022567, "eval_ag_news_bleu_score_sem": 0.12057156154320481, "eval_ag_news_emb_cos_sim": 0.6400679349899292, "eval_ag_news_emb_cos_sim_sem": 0.01229078493740596, "eval_ag_news_emb_top1_equal": 0.1171875, "eval_ag_news_emb_top1_equal_sem": 0.02854125312152025, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.255241394042969, "eval_ag_news_n_ngrams_match_1": 10.288, "eval_ag_news_n_ngrams_match_2": 1.854, "eval_ag_news_n_ngrams_match_3": 0.478, "eval_ag_news_num_pred_words": 44.574, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 70.47382709735112, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.24354215636578458, "eval_ag_news_runtime": 10.7818, "eval_ag_news_samples_per_second": 46.375, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.2760969694638503, "eval_ag_news_token_set_f1_sem": 0.004610602165884031, "eval_ag_news_token_set_precision": 0.23593322373411124, "eval_ag_news_token_set_recall": 0.3631982850513387, "eval_ag_news_true_num_tokens": 56.09375, "step": 16875 }, { "epoch": 3.24, "eval_anthropic_toxic_prompts_accuracy": 0.0853125, "eval_anthropic_toxic_prompts_bleu_score": 1.9637353748833446, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07792872705331727, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5194891691207886, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012998227692195411, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.9574742317199707, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.016, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.934, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.276, "eval_anthropic_toxic_prompts_num_pred_words": 45.818, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 52.32499808427228, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.13946636146453192, "eval_anthropic_toxic_prompts_runtime": 9.9617, "eval_anthropic_toxic_prompts_samples_per_second": 50.192, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.262844829612984, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00598170911421396, "eval_anthropic_toxic_prompts_token_set_precision": 0.27494351000750017, "eval_anthropic_toxic_prompts_token_set_recall": 0.29621014492748643, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 16875 }, { "epoch": 3.24, "eval_arxiv_accuracy": 0.29409375, "eval_arxiv_bleu_score": 2.7263133784274958, "eval_arxiv_bleu_score_sem": 0.08284921304694123, "eval_arxiv_emb_cos_sim": 0.5507416129112244, "eval_arxiv_emb_cos_sim_sem": 0.009650875592622109, "eval_arxiv_emb_top1_equal": 0.15625, "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.089741230010986, "eval_arxiv_n_ngrams_match_1": 10.364, "eval_arxiv_n_ngrams_match_2": 1.562, "eval_arxiv_n_ngrams_match_3": 0.266, "eval_arxiv_num_pred_words": 38.226, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 59.724434813001274, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2396616667609068, "eval_arxiv_runtime": 10.2405, "eval_arxiv_samples_per_second": 48.826, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.25243020170923214, "eval_arxiv_token_set_f1_sem": 0.004041100372068284, "eval_arxiv_token_set_precision": 0.18850721985936827, "eval_arxiv_token_set_recall": 0.41718000763134117, "eval_arxiv_true_num_tokens": 64.0, "step": 16875 }, { "epoch": 3.24, "eval_python_code_alpaca_accuracy": 0.11846875, "eval_python_code_alpaca_bleu_score": 2.7778647195056383, "eval_python_code_alpaca_bleu_score_sem": 0.08894850527051816, "eval_python_code_alpaca_emb_cos_sim": 0.47885242104530334, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009847563739365815, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.8238985538482666, "eval_python_code_alpaca_n_ngrams_match_1": 6.128, "eval_python_code_alpaca_n_ngrams_match_2": 1.15, "eval_python_code_alpaca_n_ngrams_match_3": 0.298, "eval_python_code_alpaca_num_pred_words": 37.428, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 45.78234579634366, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2096884216673367, "eval_python_code_alpaca_runtime": 19.5585, "eval_python_code_alpaca_samples_per_second": 25.564, "eval_python_code_alpaca_steps_per_second": 0.051, "eval_python_code_alpaca_token_set_f1": 0.3299429882816963, "eval_python_code_alpaca_token_set_f1_sem": 0.005303511076664485, "eval_python_code_alpaca_token_set_precision": 0.30877943636792743, "eval_python_code_alpaca_token_set_recall": 0.39720967992619755, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 16875 }, { "epoch": 3.24, "eval_wikibio_accuracy": 0.27390625, "eval_wikibio_bleu_score": 4.70871177857877, "eval_wikibio_bleu_score_sem": 0.1792521367134163, "eval_wikibio_emb_cos_sim": 0.6363104581832886, "eval_wikibio_emb_cos_sim_sem": 0.011500971435558028, "eval_wikibio_emb_top1_equal": 0.0625, "eval_wikibio_emb_top1_equal_sem": 0.02147948148198014, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.529972076416016, "eval_wikibio_n_ngrams_match_1": 8.762, "eval_wikibio_n_ngrams_match_2": 2.618, "eval_wikibio_n_ngrams_match_3": 0.834, "eval_wikibio_num_pred_words": 36.068, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 92.75597096680391, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2894653343321614, "eval_wikibio_runtime": 10.2644, "eval_wikibio_samples_per_second": 48.712, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.2810842430578845, "eval_wikibio_token_set_f1_sem": 0.005703281804297346, "eval_wikibio_token_set_precision": 0.2791533800186806, "eval_wikibio_token_set_recall": 0.298001260891474, "eval_wikibio_true_num_tokens": 61.1328125, "step": 16875 }, { "epoch": 3.24, "eval_nq_accuracy": 0.453625, "eval_nq_bleu_score": 7.799947295850558, "eval_nq_bleu_score_sem": 0.34341544883029496, "eval_nq_emb_cos_sim": 0.7063493728637695, "eval_nq_emb_cos_sim_sem": 0.010792150347113368, "eval_nq_emb_top1_equal": 0.125, "eval_nq_emb_top1_equal_sem": 0.02934655822437397, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.7839088439941406, "eval_nq_n_ngrams_match_1": 18.848, "eval_nq_n_ngrams_match_2": 5.8, "eval_nq_n_ngrams_match_3": 2.364, "eval_nq_num_pred_words": 48.476, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 16.182150988611035, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3511610581123732, "eval_nq_runtime": 11.575, "eval_nq_samples_per_second": 43.196, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.38475317847984153, "eval_nq_token_set_f1_sem": 0.004827815823807924, "eval_nq_token_set_precision": 0.3250383639370381, "eval_nq_token_set_recall": 0.492457164008117, "eval_nq_true_num_tokens": 64.0, "step": 16875 }, { "epoch": 3.24, "learning_rate": 0.001, "loss": 3.1462, "step": 16884 }, { "epoch": 3.24, "learning_rate": 0.001, "loss": 3.1494, "step": 16896 }, { "epoch": 3.25, "learning_rate": 0.001, "loss": 3.14, "step": 16908 }, { "epoch": 3.25, "learning_rate": 0.001, "loss": 3.154, "step": 16920 }, { "epoch": 3.25, "learning_rate": 0.001, "loss": 3.1333, "step": 16932 }, { "epoch": 3.25, "learning_rate": 0.001, "loss": 3.1471, "step": 16944 }, { "epoch": 3.26, "learning_rate": 0.001, "loss": 3.1428, "step": 16956 }, { "epoch": 3.26, "learning_rate": 0.001, "loss": 3.1304, "step": 16968 }, { "epoch": 3.26, "learning_rate": 0.001, "loss": 3.1343, "step": 16980 }, { "epoch": 3.26, "learning_rate": 0.001, "loss": 3.144, "step": 16992 }, { "epoch": 3.26, "learning_rate": 0.001, "loss": 3.1491, "step": 17004 }, { "epoch": 3.27, "learning_rate": 0.001, "loss": 3.1504, "step": 17016 }, { "epoch": 3.27, "learning_rate": 0.001, "loss": 3.1335, "step": 17028 }, { "epoch": 3.27, "learning_rate": 0.001, "loss": 3.136, "step": 17040 }, { "epoch": 3.27, "learning_rate": 0.001, "loss": 3.1514, "step": 17052 }, { "epoch": 3.28, "learning_rate": 0.001, "loss": 3.1317, "step": 17064 }, { "epoch": 3.28, "learning_rate": 0.001, "loss": 3.128, "step": 17076 }, { "epoch": 3.28, "learning_rate": 0.001, "loss": 3.1309, "step": 17088 }, { "epoch": 3.28, "learning_rate": 0.001, "loss": 3.1312, "step": 17100 }, { "epoch": 3.29, "learning_rate": 0.001, "loss": 3.1446, "step": 17112 }, { "epoch": 3.29, "learning_rate": 0.001, "loss": 3.1496, "step": 17124 }, { "epoch": 3.29, "learning_rate": 0.001, "loss": 3.1473, "step": 17136 }, { "epoch": 3.29, "learning_rate": 0.001, "loss": 3.1476, "step": 17148 }, { "epoch": 3.29, "learning_rate": 0.001, "loss": 3.1391, "step": 17160 }, { "epoch": 3.3, "learning_rate": 0.001, "loss": 3.1305, "step": 17172 }, { "epoch": 3.3, "learning_rate": 0.001, "loss": 3.1441, "step": 17184 }, { "epoch": 3.3, "learning_rate": 0.001, "loss": 3.1377, "step": 17196 }, { "epoch": 3.3, "learning_rate": 0.001, "loss": 3.1193, "step": 17208 }, { "epoch": 3.31, "learning_rate": 0.001, "loss": 3.1296, "step": 17220 }, { "epoch": 3.31, "learning_rate": 0.001, "loss": 3.1363, "step": 17232 }, { "epoch": 3.31, "learning_rate": 0.001, "loss": 3.1334, "step": 17244 }, { "epoch": 3.31, "learning_rate": 0.001, "loss": 3.1382, "step": 17256 }, { "epoch": 3.32, "learning_rate": 0.001, "loss": 3.1419, "step": 17268 }, { "epoch": 3.32, "learning_rate": 0.001, "loss": 3.1289, "step": 17280 }, { "epoch": 3.32, "learning_rate": 0.001, "loss": 3.1375, "step": 17292 }, { "epoch": 3.32, "learning_rate": 0.001, "loss": 3.1312, "step": 17304 }, { "epoch": 3.32, "learning_rate": 0.001, "loss": 3.1386, "step": 17316 }, { "epoch": 3.33, "learning_rate": 0.001, "loss": 3.1428, "step": 17328 }, { "epoch": 3.33, "learning_rate": 0.001, "loss": 3.1167, "step": 17340 }, { "epoch": 3.33, "learning_rate": 0.001, "loss": 3.1223, "step": 17352 }, { "epoch": 3.33, "learning_rate": 0.001, "loss": 3.1361, "step": 17364 }, { "epoch": 3.34, "learning_rate": 0.001, "loss": 3.1261, "step": 17376 }, { "epoch": 3.34, "learning_rate": 0.001, "loss": 3.1137, "step": 17388 }, { "epoch": 3.34, "learning_rate": 0.001, "loss": 3.125, "step": 17400 }, { "epoch": 3.34, "learning_rate": 0.001, "loss": 3.1294, "step": 17412 }, { "epoch": 3.35, "learning_rate": 0.001, "loss": 3.1268, "step": 17424 }, { "epoch": 3.35, "learning_rate": 0.001, "loss": 3.1255, "step": 17436 }, { "epoch": 3.35, "learning_rate": 0.001, "loss": 3.1188, "step": 17448 }, { "epoch": 3.35, "learning_rate": 0.001, "loss": 3.1168, "step": 17460 }, { "epoch": 3.35, "learning_rate": 0.001, "loss": 3.1214, "step": 17472 }, { "epoch": 3.36, "learning_rate": 0.001, "loss": 3.1382, "step": 17484 }, { "epoch": 3.36, "learning_rate": 0.001, "loss": 3.134, "step": 17496 }, { "epoch": 3.36, "eval_ag_news_accuracy": 0.26496875, "eval_ag_news_bleu_score": 3.2441573621282522, "eval_ag_news_bleu_score_sem": 0.12257956427677824, "eval_ag_news_emb_cos_sim": 0.641579270362854, "eval_ag_news_emb_cos_sim_sem": 0.011090903672971286, "eval_ag_news_emb_top1_equal": 0.1328125, "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.234482288360596, "eval_ag_news_n_ngrams_match_1": 10.064, "eval_ag_news_n_ngrams_match_2": 1.832, "eval_ag_news_n_ngrams_match_3": 0.45, "eval_ag_news_num_pred_words": 44.004, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 69.02593395214409, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.24424833659897957, "eval_ag_news_runtime": 10.9964, "eval_ag_news_samples_per_second": 45.469, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.2700086157637954, "eval_ag_news_token_set_f1_sem": 0.004523843277511473, "eval_ag_news_token_set_precision": 0.232610492572598, "eval_ag_news_token_set_recall": 0.3508298426646458, "eval_ag_news_true_num_tokens": 56.09375, "step": 17500 }, { "epoch": 3.36, "eval_anthropic_toxic_prompts_accuracy": 0.08559375, "eval_anthropic_toxic_prompts_bleu_score": 1.9210587842054136, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07415382958891022, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5235103368759155, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011947159047318608, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.9652905464172363, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.028, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.902, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.244, "eval_anthropic_toxic_prompts_num_pred_words": 44.714, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 52.735589300499036, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.14236314841014502, "eval_anthropic_toxic_prompts_runtime": 13.2336, "eval_anthropic_toxic_prompts_samples_per_second": 37.782, "eval_anthropic_toxic_prompts_steps_per_second": 0.076, "eval_anthropic_toxic_prompts_token_set_f1": 0.25788811721480054, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005946434149208624, "eval_anthropic_toxic_prompts_token_set_precision": 0.2731150146252883, "eval_anthropic_toxic_prompts_token_set_recall": 0.28795132078417035, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 17500 }, { "epoch": 3.36, "eval_arxiv_accuracy": 0.293375, "eval_arxiv_bleu_score": 2.589116157824733, "eval_arxiv_bleu_score_sem": 0.07323929283421372, "eval_arxiv_emb_cos_sim": 0.5551767349243164, "eval_arxiv_emb_cos_sim_sem": 0.009073357742357076, "eval_arxiv_emb_top1_equal": 0.1640625, "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.081957817077637, "eval_arxiv_n_ngrams_match_1": 10.096, "eval_arxiv_n_ngrams_match_2": 1.508, "eval_arxiv_n_ngrams_match_3": 0.228, "eval_arxiv_num_pred_words": 37.246, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 59.26137928853457, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.23836521185924064, "eval_arxiv_runtime": 12.7021, "eval_arxiv_samples_per_second": 39.363, "eval_arxiv_steps_per_second": 0.079, "eval_arxiv_token_set_f1": 0.2476084861650361, "eval_arxiv_token_set_f1_sem": 0.0041366555700042525, "eval_arxiv_token_set_precision": 0.18490047755050426, "eval_arxiv_token_set_recall": 0.4137503932332973, "eval_arxiv_true_num_tokens": 64.0, "step": 17500 }, { "epoch": 3.36, "eval_python_code_alpaca_accuracy": 0.11753125, "eval_python_code_alpaca_bleu_score": 2.813648902263996, "eval_python_code_alpaca_bleu_score_sem": 0.09021030212828986, "eval_python_code_alpaca_emb_cos_sim": 0.4602062404155731, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01032508885008326, "eval_python_code_alpaca_emb_top1_equal": 0.0234375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.79595685005188, "eval_python_code_alpaca_n_ngrams_match_1": 5.726, "eval_python_code_alpaca_n_ngrams_match_2": 1.02, "eval_python_code_alpaca_n_ngrams_match_3": 0.254, "eval_python_code_alpaca_num_pred_words": 34.27, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 44.52081577612871, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.20778673161659977, "eval_python_code_alpaca_runtime": 9.9902, "eval_python_code_alpaca_samples_per_second": 50.049, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.3189752979213727, "eval_python_code_alpaca_token_set_f1_sem": 0.005422379906865857, "eval_python_code_alpaca_token_set_precision": 0.28872587777116143, "eval_python_code_alpaca_token_set_recall": 0.399636632201648, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 17500 }, { "epoch": 3.36, "eval_wikibio_accuracy": 0.2739375, "eval_wikibio_bleu_score": 4.387326107726963, "eval_wikibio_bleu_score_sem": 0.17358579823292636, "eval_wikibio_emb_cos_sim": 0.6526084542274475, "eval_wikibio_emb_cos_sim_sem": 0.012171598918173215, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.484732627868652, "eval_wikibio_n_ngrams_match_1": 8.788, "eval_wikibio_n_ngrams_match_2": 2.584, "eval_wikibio_n_ngrams_match_3": 0.79, "eval_wikibio_num_pred_words": 36.746, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 88.65324423700044, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.28826452861928664, "eval_wikibio_runtime": 19.9755, "eval_wikibio_samples_per_second": 25.031, "eval_wikibio_steps_per_second": 0.05, "eval_wikibio_token_set_f1": 0.2780186223496822, "eval_wikibio_token_set_f1_sem": 0.006014848414323814, "eval_wikibio_token_set_precision": 0.2802470958934345, "eval_wikibio_token_set_recall": 0.2878118735590263, "eval_wikibio_true_num_tokens": 61.1328125, "step": 17500 }, { "epoch": 3.36, "eval_nq_accuracy": 0.45309375, "eval_nq_bleu_score": 7.424779607535211, "eval_nq_bleu_score_sem": 0.33447787501293735, "eval_nq_emb_cos_sim": 0.7163922786712646, "eval_nq_emb_cos_sim_sem": 0.009680097846619334, "eval_nq_emb_top1_equal": 0.171875, "eval_nq_emb_top1_equal_sem": 0.03347745514062371, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.7687456607818604, "eval_nq_n_ngrams_match_1": 18.592, "eval_nq_n_ngrams_match_2": 5.604, "eval_nq_n_ngrams_match_3": 2.178, "eval_nq_num_pred_words": 47.842, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 15.93862901843315, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3510003130491518, "eval_nq_runtime": 11.0912, "eval_nq_samples_per_second": 45.081, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.37989460490433385, "eval_nq_token_set_f1_sem": 0.004829807244000491, "eval_nq_token_set_precision": 0.3192371971832584, "eval_nq_token_set_recall": 0.4916528337544238, "eval_nq_true_num_tokens": 64.0, "step": 17500 }, { "epoch": 3.36, "learning_rate": 0.001, "loss": 3.1351, "step": 17508 }, { "epoch": 3.36, "learning_rate": 0.001, "loss": 3.1163, "step": 17520 }, { "epoch": 3.37, "learning_rate": 0.001, "loss": 3.1215, "step": 17532 }, { "epoch": 3.37, "learning_rate": 0.001, "loss": 3.1281, "step": 17544 }, { "epoch": 3.37, "learning_rate": 0.001, "loss": 3.1285, "step": 17556 }, { "epoch": 3.37, "learning_rate": 0.001, "loss": 3.1262, "step": 17568 }, { "epoch": 3.38, "learning_rate": 0.001, "loss": 3.1244, "step": 17580 }, { "epoch": 3.38, "learning_rate": 0.001, "loss": 3.1269, "step": 17592 }, { "epoch": 3.38, "learning_rate": 0.001, "loss": 3.1296, "step": 17604 }, { "epoch": 3.38, "learning_rate": 0.001, "loss": 3.119, "step": 17616 }, { "epoch": 3.38, "learning_rate": 0.001, "loss": 3.1211, "step": 17628 }, { "epoch": 3.39, "learning_rate": 0.001, "loss": 3.1183, "step": 17640 }, { "epoch": 3.39, "learning_rate": 0.001, "loss": 3.1234, "step": 17652 }, { "epoch": 3.39, "learning_rate": 0.001, "loss": 3.1201, "step": 17664 }, { "epoch": 3.39, "learning_rate": 0.001, "loss": 3.128, "step": 17676 }, { "epoch": 3.4, "learning_rate": 0.001, "loss": 3.1192, "step": 17688 }, { "epoch": 3.4, "learning_rate": 0.001, "loss": 3.1265, "step": 17700 }, { "epoch": 3.4, "learning_rate": 0.001, "loss": 3.1276, "step": 17712 }, { "epoch": 3.4, "learning_rate": 0.001, "loss": 3.1162, "step": 17724 }, { "epoch": 3.41, "learning_rate": 0.001, "loss": 3.1165, "step": 17736 }, { "epoch": 3.41, "learning_rate": 0.001, "loss": 3.1288, "step": 17748 }, { "epoch": 3.41, "learning_rate": 0.001, "loss": 3.1183, "step": 17760 }, { "epoch": 3.41, "learning_rate": 0.001, "loss": 3.1217, "step": 17772 }, { "epoch": 3.41, "learning_rate": 0.001, "loss": 3.1272, "step": 17784 }, { "epoch": 3.42, "learning_rate": 0.001, "loss": 3.1272, "step": 17796 }, { "epoch": 3.42, "learning_rate": 0.001, "loss": 3.1151, "step": 17808 }, { "epoch": 3.42, "learning_rate": 0.001, "loss": 3.1173, "step": 17820 }, { "epoch": 3.42, "learning_rate": 0.001, "loss": 3.1184, "step": 17832 }, { "epoch": 3.43, "learning_rate": 0.001, "loss": 3.1184, "step": 17844 }, { "epoch": 3.43, "learning_rate": 0.001, "loss": 3.1173, "step": 17856 }, { "epoch": 3.43, "learning_rate": 0.001, "loss": 3.129, "step": 17868 }, { "epoch": 3.43, "learning_rate": 0.001, "loss": 3.1168, "step": 17880 }, { "epoch": 3.44, "learning_rate": 0.001, "loss": 3.1307, "step": 17892 }, { "epoch": 3.44, "learning_rate": 0.001, "loss": 3.1187, "step": 17904 }, { "epoch": 3.44, "learning_rate": 0.001, "loss": 3.1256, "step": 17916 }, { "epoch": 3.44, "learning_rate": 0.001, "loss": 3.1134, "step": 17928 }, { "epoch": 3.44, "learning_rate": 0.001, "loss": 3.1152, "step": 17940 }, { "epoch": 3.45, "learning_rate": 0.001, "loss": 3.1066, "step": 17952 }, { "epoch": 3.45, "learning_rate": 0.001, "loss": 3.118, "step": 17964 }, { "epoch": 3.45, "learning_rate": 0.001, "loss": 3.1183, "step": 17976 }, { "epoch": 3.45, "learning_rate": 0.001, "loss": 3.118, "step": 17988 }, { "epoch": 3.46, "learning_rate": 0.001, "loss": 3.1044, "step": 18000 }, { "epoch": 3.46, "learning_rate": 0.001, "loss": 3.1164, "step": 18012 }, { "epoch": 3.46, "learning_rate": 0.001, "loss": 3.1045, "step": 18024 }, { "epoch": 3.46, "learning_rate": 0.001, "loss": 3.1194, "step": 18036 }, { "epoch": 3.47, "learning_rate": 0.001, "loss": 3.1175, "step": 18048 }, { "epoch": 3.47, "learning_rate": 0.001, "loss": 3.1066, "step": 18060 }, { "epoch": 3.47, "learning_rate": 0.001, "loss": 3.1193, "step": 18072 }, { "epoch": 3.47, "learning_rate": 0.001, "loss": 3.1077, "step": 18084 }, { "epoch": 3.47, "learning_rate": 0.001, "loss": 3.1064, "step": 18096 }, { "epoch": 3.48, "learning_rate": 0.001, "loss": 3.1213, "step": 18108 }, { "epoch": 3.48, "learning_rate": 0.001, "loss": 3.1185, "step": 18120 }, { "epoch": 3.48, "eval_ag_news_accuracy": 0.26609375, "eval_ag_news_bleu_score": 3.3768854625270333, "eval_ag_news_bleu_score_sem": 0.12308929967473654, "eval_ag_news_emb_cos_sim": 0.6711300611495972, "eval_ag_news_emb_cos_sim_sem": 0.011022524362529406, "eval_ag_news_emb_top1_equal": 0.1328125, "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.211260795593262, "eval_ag_news_n_ngrams_match_1": 10.72, "eval_ag_news_n_ngrams_match_2": 1.94, "eval_ag_news_n_ngrams_match_3": 0.488, "eval_ag_news_num_pred_words": 45.73, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 67.4415161963566, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.25827675854082854, "eval_ag_news_runtime": 10.5988, "eval_ag_news_samples_per_second": 47.175, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.28390773708987077, "eval_ag_news_token_set_f1_sem": 0.004377102377260949, "eval_ag_news_token_set_precision": 0.24888274827597226, "eval_ag_news_token_set_recall": 0.352436476930113, "eval_ag_news_true_num_tokens": 56.09375, "step": 18125 }, { "epoch": 3.48, "eval_anthropic_toxic_prompts_accuracy": 0.086375, "eval_anthropic_toxic_prompts_bleu_score": 1.9462429816587172, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0691554359334003, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5473330020904541, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01091542461065167, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.9248125553131104, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.124, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.918, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.248, "eval_anthropic_toxic_prompts_num_pred_words": 45.424, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 50.64358429828718, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1488285552175686, "eval_anthropic_toxic_prompts_runtime": 10.3674, "eval_anthropic_toxic_prompts_samples_per_second": 48.228, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.2584258301129104, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005599863647917523, "eval_anthropic_toxic_prompts_token_set_precision": 0.28120697096026415, "eval_anthropic_toxic_prompts_token_set_recall": 0.27877339063580747, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 18125 }, { "epoch": 3.48, "eval_arxiv_accuracy": 0.29428125, "eval_arxiv_bleu_score": 2.8244090290872426, "eval_arxiv_bleu_score_sem": 0.08591733016095368, "eval_arxiv_emb_cos_sim": 0.5704166293144226, "eval_arxiv_emb_cos_sim_sem": 0.00989456801216759, "eval_arxiv_emb_top1_equal": 0.171875, "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.084709167480469, "eval_arxiv_n_ngrams_match_1": 10.464, "eval_arxiv_n_ngrams_match_2": 1.658, "eval_arxiv_n_ngrams_match_3": 0.294, "eval_arxiv_num_pred_words": 37.148, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 59.42465261638336, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.24941656184494082, "eval_arxiv_runtime": 10.5458, "eval_arxiv_samples_per_second": 47.412, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.2517879611899787, "eval_arxiv_token_set_f1_sem": 0.004217687107659366, "eval_arxiv_token_set_precision": 0.1921424543103267, "eval_arxiv_token_set_recall": 0.40228131402033, "eval_arxiv_true_num_tokens": 64.0, "step": 18125 }, { "epoch": 3.48, "eval_python_code_alpaca_accuracy": 0.12159375, "eval_python_code_alpaca_bleu_score": 2.831516167722937, "eval_python_code_alpaca_bleu_score_sem": 0.08430262502229126, "eval_python_code_alpaca_emb_cos_sim": 0.5020995140075684, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011259268478971162, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.7325639724731445, "eval_python_code_alpaca_n_ngrams_match_1": 6.062, "eval_python_code_alpaca_n_ngrams_match_2": 1.206, "eval_python_code_alpaca_n_ngrams_match_3": 0.294, "eval_python_code_alpaca_num_pred_words": 36.598, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 41.78610936551627, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.21191340305227177, "eval_python_code_alpaca_runtime": 10.8731, "eval_python_code_alpaca_samples_per_second": 45.985, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.32598592610018756, "eval_python_code_alpaca_token_set_f1_sem": 0.005098146004097936, "eval_python_code_alpaca_token_set_precision": 0.3087060931733973, "eval_python_code_alpaca_token_set_recall": 0.3889249388468435, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 18125 }, { "epoch": 3.48, "eval_wikibio_accuracy": 0.27421875, "eval_wikibio_bleu_score": 4.816185673964495, "eval_wikibio_bleu_score_sem": 0.17921660270328887, "eval_wikibio_emb_cos_sim": 0.648404598236084, "eval_wikibio_emb_cos_sim_sem": 0.012460465515332786, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.521198749542236, "eval_wikibio_n_ngrams_match_1": 9.024, "eval_wikibio_n_ngrams_match_2": 2.806, "eval_wikibio_n_ngrams_match_3": 0.932, "eval_wikibio_num_pred_words": 36.616, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 91.94575186946655, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.29600679232278354, "eval_wikibio_runtime": 10.3942, "eval_wikibio_samples_per_second": 48.104, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.286489916066079, "eval_wikibio_token_set_f1_sem": 0.005601602836704596, "eval_wikibio_token_set_precision": 0.2882670778193052, "eval_wikibio_token_set_recall": 0.2990335197270945, "eval_wikibio_true_num_tokens": 61.1328125, "step": 18125 }, { "epoch": 3.48, "eval_nq_accuracy": 0.455, "eval_nq_bleu_score": 7.91302147799092, "eval_nq_bleu_score_sem": 0.34844504143596244, "eval_nq_emb_cos_sim": 0.7135587334632874, "eval_nq_emb_cos_sim_sem": 0.010083692116692607, "eval_nq_emb_top1_equal": 0.1796875, "eval_nq_emb_top1_equal_sem": 0.034068008879424266, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.752685785293579, "eval_nq_n_ngrams_match_1": 19.176, "eval_nq_n_ngrams_match_2": 5.974, "eval_nq_n_ngrams_match_3": 2.362, "eval_nq_num_pred_words": 48.264, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 15.684701104012898, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.360912961980277, "eval_nq_runtime": 10.7574, "eval_nq_samples_per_second": 46.48, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.38839248311068214, "eval_nq_token_set_f1_sem": 0.004686219499329099, "eval_nq_token_set_precision": 0.3303860384492778, "eval_nq_token_set_recall": 0.48861715152053414, "eval_nq_true_num_tokens": 64.0, "step": 18125 }, { "epoch": 3.48, "learning_rate": 0.001, "loss": 3.1112, "step": 18132 }, { "epoch": 3.48, "learning_rate": 0.001, "loss": 3.1104, "step": 18144 }, { "epoch": 3.49, "learning_rate": 0.001, "loss": 3.1109, "step": 18156 }, { "epoch": 3.49, "learning_rate": 0.001, "loss": 3.1119, "step": 18168 }, { "epoch": 3.49, "learning_rate": 0.001, "loss": 3.1135, "step": 18180 }, { "epoch": 3.49, "learning_rate": 0.001, "loss": 3.1212, "step": 18192 }, { "epoch": 3.5, "learning_rate": 0.001, "loss": 3.1073, "step": 18204 }, { "epoch": 3.5, "learning_rate": 0.001, "loss": 3.099, "step": 18216 }, { "epoch": 3.5, "learning_rate": 0.001, "loss": 3.1044, "step": 18228 }, { "epoch": 3.5, "learning_rate": 0.001, "loss": 3.1012, "step": 18240 }, { "epoch": 3.5, "learning_rate": 0.001, "loss": 3.1142, "step": 18252 }, { "epoch": 3.51, "learning_rate": 0.001, "loss": 3.1098, "step": 18264 }, { "epoch": 3.51, "learning_rate": 0.001, "loss": 3.1116, "step": 18276 }, { "epoch": 3.51, "learning_rate": 0.001, "loss": 3.1114, "step": 18288 }, { "epoch": 3.51, "learning_rate": 0.001, "loss": 3.1079, "step": 18300 }, { "epoch": 3.52, "learning_rate": 0.001, "loss": 3.1098, "step": 18312 }, { "epoch": 3.52, "learning_rate": 0.001, "loss": 3.1181, "step": 18324 }, { "epoch": 3.52, "learning_rate": 0.001, "loss": 3.1233, "step": 18336 }, { "epoch": 3.52, "learning_rate": 0.001, "loss": 3.1056, "step": 18348 }, { "epoch": 3.53, "learning_rate": 0.001, "loss": 3.1071, "step": 18360 }, { "epoch": 3.53, "learning_rate": 0.001, "loss": 3.102, "step": 18372 }, { "epoch": 3.53, "learning_rate": 0.001, "loss": 3.1087, "step": 18384 }, { "epoch": 3.53, "learning_rate": 0.001, "loss": 3.1061, "step": 18396 }, { "epoch": 3.53, "learning_rate": 0.001, "loss": 3.1102, "step": 18408 }, { "epoch": 3.54, "learning_rate": 0.001, "loss": 3.1061, "step": 18420 }, { "epoch": 3.54, "learning_rate": 0.001, "loss": 3.105, "step": 18432 }, { "epoch": 3.54, "learning_rate": 0.001, "loss": 3.1028, "step": 18444 }, { "epoch": 3.54, "learning_rate": 0.001, "loss": 3.1017, "step": 18456 }, { "epoch": 3.55, "learning_rate": 0.001, "loss": 3.0994, "step": 18468 }, { "epoch": 3.55, "learning_rate": 0.001, "loss": 3.1058, "step": 18480 }, { "epoch": 3.55, "learning_rate": 0.001, "loss": 3.1029, "step": 18492 }, { "epoch": 3.55, "learning_rate": 0.001, "loss": 3.1074, "step": 18504 }, { "epoch": 3.56, "learning_rate": 0.001, "loss": 3.101, "step": 18516 }, { "epoch": 3.56, "learning_rate": 0.001, "loss": 3.1063, "step": 18528 }, { "epoch": 3.56, "learning_rate": 0.001, "loss": 3.0956, "step": 18540 }, { "epoch": 3.56, "learning_rate": 0.001, "loss": 3.1052, "step": 18552 }, { "epoch": 3.56, "learning_rate": 0.001, "loss": 3.0998, "step": 18564 }, { "epoch": 3.57, "learning_rate": 0.001, "loss": 3.0962, "step": 18576 }, { "epoch": 3.57, "learning_rate": 0.001, "loss": 3.1042, "step": 18588 }, { "epoch": 3.57, "learning_rate": 0.001, "loss": 3.0996, "step": 18600 }, { "epoch": 3.57, "learning_rate": 0.001, "loss": 3.0973, "step": 18612 }, { "epoch": 3.58, "learning_rate": 0.001, "loss": 3.1108, "step": 18624 }, { "epoch": 3.58, "learning_rate": 0.001, "loss": 3.1146, "step": 18636 }, { "epoch": 3.58, "learning_rate": 0.001, "loss": 3.1076, "step": 18648 }, { "epoch": 3.58, "learning_rate": 0.001, "loss": 3.1092, "step": 18660 }, { "epoch": 3.59, "learning_rate": 0.001, "loss": 3.111, "step": 18672 }, { "epoch": 3.59, "learning_rate": 0.001, "loss": 3.1088, "step": 18684 }, { "epoch": 3.59, "learning_rate": 0.001, "loss": 3.104, "step": 18696 }, { "epoch": 3.59, "learning_rate": 0.001, "loss": 3.1016, "step": 18708 }, { "epoch": 3.59, "learning_rate": 0.001, "loss": 3.096, "step": 18720 }, { "epoch": 3.6, "learning_rate": 0.001, "loss": 3.1003, "step": 18732 }, { "epoch": 3.6, "learning_rate": 0.001, "loss": 3.0904, "step": 18744 }, { "epoch": 3.6, "eval_ag_news_accuracy": 0.26890625, "eval_ag_news_bleu_score": 3.3306737212340427, "eval_ag_news_bleu_score_sem": 0.11451415513508868, "eval_ag_news_emb_cos_sim": 0.6727374792098999, "eval_ag_news_emb_cos_sim_sem": 0.01068550433240163, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.192138195037842, "eval_ag_news_n_ngrams_match_1": 10.45, "eval_ag_news_n_ngrams_match_2": 1.886, "eval_ag_news_n_ngrams_match_3": 0.448, "eval_ag_news_num_pred_words": 45.028, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 66.16411159612522, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2512756847015737, "eval_ag_news_runtime": 10.668, "eval_ag_news_samples_per_second": 46.869, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.27799062226419174, "eval_ag_news_token_set_f1_sem": 0.004415429101864606, "eval_ag_news_token_set_precision": 0.24184646106291946, "eval_ag_news_token_set_recall": 0.3554571349590701, "eval_ag_news_true_num_tokens": 56.09375, "step": 18750 }, { "epoch": 3.6, "eval_anthropic_toxic_prompts_accuracy": 0.088625, "eval_anthropic_toxic_prompts_bleu_score": 2.0829403759575826, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08500098511040192, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5523356795310974, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011747851869066654, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.874500036239624, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.236, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.994, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.314, "eval_anthropic_toxic_prompts_num_pred_words": 46.062, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 48.15861470903693, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.14965602792635907, "eval_anthropic_toxic_prompts_runtime": 10.0725, "eval_anthropic_toxic_prompts_samples_per_second": 49.64, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.2711838520748361, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005941723493908647, "eval_anthropic_toxic_prompts_token_set_precision": 0.294925893721296, "eval_anthropic_toxic_prompts_token_set_recall": 0.2954815093920474, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 18750 }, { "epoch": 3.6, "eval_arxiv_accuracy": 0.29665625, "eval_arxiv_bleu_score": 2.902288499661674, "eval_arxiv_bleu_score_sem": 0.08611618824157984, "eval_arxiv_emb_cos_sim": 0.5800829529762268, "eval_arxiv_emb_cos_sim_sem": 0.008218194234803987, "eval_arxiv_emb_top1_equal": 0.15625, "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.063896656036377, "eval_arxiv_n_ngrams_match_1": 10.776, "eval_arxiv_n_ngrams_match_2": 1.746, "eval_arxiv_n_ngrams_match_3": 0.302, "eval_arxiv_num_pred_words": 38.522, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 58.20065773931756, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.254511440871516, "eval_arxiv_runtime": 10.3749, "eval_arxiv_samples_per_second": 48.193, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.2594643321632246, "eval_arxiv_token_set_f1_sem": 0.0039419367881206435, "eval_arxiv_token_set_precision": 0.19696856759428902, "eval_arxiv_token_set_recall": 0.4105121111739601, "eval_arxiv_true_num_tokens": 64.0, "step": 18750 }, { "epoch": 3.6, "eval_python_code_alpaca_accuracy": 0.12234375, "eval_python_code_alpaca_bleu_score": 3.0402250384275593, "eval_python_code_alpaca_bleu_score_sem": 0.09149842023625508, "eval_python_code_alpaca_emb_cos_sim": 0.5129662752151489, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010910445355626812, "eval_python_code_alpaca_emb_top1_equal": 0.0234375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.719010829925537, "eval_python_code_alpaca_n_ngrams_match_1": 6.404, "eval_python_code_alpaca_n_ngrams_match_2": 1.368, "eval_python_code_alpaca_n_ngrams_match_3": 0.362, "eval_python_code_alpaca_num_pred_words": 38.562, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 41.223596785889626, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.21990430883874268, "eval_python_code_alpaca_runtime": 10.3607, "eval_python_code_alpaca_samples_per_second": 48.259, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.3434977363611162, "eval_python_code_alpaca_token_set_f1_sem": 0.00533002193842187, "eval_python_code_alpaca_token_set_precision": 0.332952346210531, "eval_python_code_alpaca_token_set_recall": 0.3985187483902321, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 18750 }, { "epoch": 3.6, "eval_wikibio_accuracy": 0.28053125, "eval_wikibio_bleu_score": 4.86568482678552, "eval_wikibio_bleu_score_sem": 0.19821865716439607, "eval_wikibio_emb_cos_sim": 0.6397950649261475, "eval_wikibio_emb_cos_sim_sem": 0.011672202227256647, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.44827127456665, "eval_wikibio_n_ngrams_match_1": 8.874, "eval_wikibio_n_ngrams_match_2": 2.726, "eval_wikibio_n_ngrams_match_3": 0.912, "eval_wikibio_num_pred_words": 36.294, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 85.47904640032947, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.29585587354965814, "eval_wikibio_runtime": 10.0232, "eval_wikibio_samples_per_second": 49.884, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.2848981071731547, "eval_wikibio_token_set_f1_sem": 0.005397483888067579, "eval_wikibio_token_set_precision": 0.28393067120324256, "eval_wikibio_token_set_recall": 0.30555215692346377, "eval_wikibio_true_num_tokens": 61.1328125, "step": 18750 }, { "epoch": 3.6, "eval_nq_accuracy": 0.457375, "eval_nq_bleu_score": 7.63792443550545, "eval_nq_bleu_score_sem": 0.3387526478308195, "eval_nq_emb_cos_sim": 0.7198264598846436, "eval_nq_emb_cos_sim_sem": 0.009967450449955053, "eval_nq_emb_top1_equal": 0.1796875, "eval_nq_emb_top1_equal_sem": 0.034068008879424266, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.742191791534424, "eval_nq_n_ngrams_match_1": 19.086, "eval_nq_n_ngrams_match_2": 5.89, "eval_nq_n_ngrams_match_3": 2.254, "eval_nq_num_pred_words": 48.316, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 15.520966565712516, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.36122749924215347, "eval_nq_runtime": 10.5748, "eval_nq_samples_per_second": 47.282, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.3885224715429432, "eval_nq_token_set_f1_sem": 0.004676462222611935, "eval_nq_token_set_precision": 0.3311734521359436, "eval_nq_token_set_recall": 0.4873702363118083, "eval_nq_true_num_tokens": 64.0, "step": 18750 }, { "epoch": 3.6, "learning_rate": 0.001, "loss": 3.0942, "step": 18756 }, { "epoch": 3.6, "learning_rate": 0.001, "loss": 3.1, "step": 18768 }, { "epoch": 3.61, "learning_rate": 0.001, "loss": 3.0996, "step": 18780 }, { "epoch": 3.61, "learning_rate": 0.001, "loss": 3.1098, "step": 18792 }, { "epoch": 3.61, "learning_rate": 0.001, "loss": 3.1113, "step": 18804 }, { "epoch": 3.61, "learning_rate": 0.001, "loss": 3.0989, "step": 18816 }, { "epoch": 3.62, "learning_rate": 0.001, "loss": 3.1004, "step": 18828 }, { "epoch": 3.62, "learning_rate": 0.001, "loss": 3.0931, "step": 18840 }, { "epoch": 3.62, "learning_rate": 0.001, "loss": 3.0959, "step": 18852 }, { "epoch": 3.62, "learning_rate": 0.001, "loss": 3.1116, "step": 18864 }, { "epoch": 3.62, "learning_rate": 0.001, "loss": 3.0983, "step": 18876 }, { "epoch": 3.63, "learning_rate": 0.001, "loss": 3.1, "step": 18888 }, { "epoch": 3.63, "learning_rate": 0.001, "loss": 3.1089, "step": 18900 }, { "epoch": 3.63, "learning_rate": 0.001, "loss": 3.0966, "step": 18912 }, { "epoch": 3.63, "learning_rate": 0.001, "loss": 3.1052, "step": 18924 }, { "epoch": 3.64, "learning_rate": 0.001, "loss": 3.0952, "step": 18936 }, { "epoch": 3.64, "learning_rate": 0.001, "loss": 3.1044, "step": 18948 }, { "epoch": 3.64, "learning_rate": 0.001, "loss": 3.0945, "step": 18960 }, { "epoch": 3.64, "learning_rate": 0.001, "loss": 3.096, "step": 18972 }, { "epoch": 3.65, "learning_rate": 0.001, "loss": 3.1093, "step": 18984 }, { "epoch": 3.65, "learning_rate": 0.001, "loss": 3.1089, "step": 18996 }, { "epoch": 3.65, "learning_rate": 0.001, "loss": 3.0969, "step": 19008 }, { "epoch": 3.65, "learning_rate": 0.001, "loss": 3.0936, "step": 19020 }, { "epoch": 3.65, "learning_rate": 0.001, "loss": 3.0923, "step": 19032 }, { "epoch": 3.66, "learning_rate": 0.001, "loss": 3.0923, "step": 19044 }, { "epoch": 3.66, "learning_rate": 0.001, "loss": 3.0878, "step": 19056 }, { "epoch": 3.66, "learning_rate": 0.001, "loss": 3.0953, "step": 19068 }, { "epoch": 3.66, "learning_rate": 0.001, "loss": 3.0806, "step": 19080 }, { "epoch": 3.67, "learning_rate": 0.001, "loss": 3.0987, "step": 19092 }, { "epoch": 3.67, "learning_rate": 0.001, "loss": 3.0967, "step": 19104 }, { "epoch": 3.67, "learning_rate": 0.001, "loss": 3.1029, "step": 19116 }, { "epoch": 3.67, "learning_rate": 0.001, "loss": 3.0916, "step": 19128 }, { "epoch": 3.68, "learning_rate": 0.001, "loss": 3.1063, "step": 19140 }, { "epoch": 3.68, "learning_rate": 0.001, "loss": 3.089, "step": 19152 }, { "epoch": 3.68, "learning_rate": 0.001, "loss": 3.0841, "step": 19164 }, { "epoch": 3.68, "learning_rate": 0.001, "loss": 3.1062, "step": 19176 }, { "epoch": 3.68, "learning_rate": 0.001, "loss": 3.102, "step": 19188 }, { "epoch": 3.69, "learning_rate": 0.001, "loss": 3.0927, "step": 19200 }, { "epoch": 3.69, "learning_rate": 0.001, "loss": 3.0947, "step": 19212 }, { "epoch": 3.69, "learning_rate": 0.001, "loss": 3.0963, "step": 19224 }, { "epoch": 3.69, "learning_rate": 0.001, "loss": 3.0898, "step": 19236 }, { "epoch": 3.7, "learning_rate": 0.001, "loss": 3.0965, "step": 19248 }, { "epoch": 3.7, "learning_rate": 0.001, "loss": 3.0831, "step": 19260 }, { "epoch": 3.7, "learning_rate": 0.001, "loss": 3.089, "step": 19272 }, { "epoch": 3.7, "learning_rate": 0.001, "loss": 3.0967, "step": 19284 }, { "epoch": 3.71, "learning_rate": 0.001, "loss": 3.0961, "step": 19296 }, { "epoch": 3.71, "learning_rate": 0.001, "loss": 3.0953, "step": 19308 }, { "epoch": 3.71, "learning_rate": 0.001, "loss": 3.0798, "step": 19320 }, { "epoch": 3.71, "learning_rate": 0.001, "loss": 3.0965, "step": 19332 }, { "epoch": 3.71, "learning_rate": 0.001, "loss": 3.0928, "step": 19344 }, { "epoch": 3.72, "learning_rate": 0.001, "loss": 3.0933, "step": 19356 }, { "epoch": 3.72, "learning_rate": 0.001, "loss": 3.0874, "step": 19368 }, { "epoch": 3.72, "eval_ag_news_accuracy": 0.27, "eval_ag_news_bleu_score": 3.472754228337938, "eval_ag_news_bleu_score_sem": 0.12049368215361671, "eval_ag_news_emb_cos_sim": 0.6796576976776123, "eval_ag_news_emb_cos_sim_sem": 0.010592004183158244, "eval_ag_news_emb_top1_equal": 0.1328125, "eval_ag_news_emb_top1_equal_sem": 0.030114394778901498, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.175224781036377, "eval_ag_news_n_ngrams_match_1": 10.728, "eval_ag_news_n_ngrams_match_2": 1.962, "eval_ag_news_n_ngrams_match_3": 0.46, "eval_ag_news_num_pred_words": 44.62, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 65.05446102682308, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.25913545264538174, "eval_ag_news_runtime": 17.3813, "eval_ag_news_samples_per_second": 28.766, "eval_ag_news_steps_per_second": 0.058, "eval_ag_news_token_set_f1": 0.2834739402798457, "eval_ag_news_token_set_f1_sem": 0.00435891938822785, "eval_ag_news_token_set_precision": 0.2487709706036216, "eval_ag_news_token_set_recall": 0.35297619122472856, "eval_ag_news_true_num_tokens": 56.09375, "step": 19375 }, { "epoch": 3.72, "eval_anthropic_toxic_prompts_accuracy": 0.0891875, "eval_anthropic_toxic_prompts_bleu_score": 2.0192076639250716, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07216344694835221, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5528053641319275, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.012621590283011902, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.8718910217285156, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.31, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.992, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.28, "eval_anthropic_toxic_prompts_num_pred_words": 46.138, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 48.03313194877702, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.15187383470242657, "eval_anthropic_toxic_prompts_runtime": 9.9967, "eval_anthropic_toxic_prompts_samples_per_second": 50.016, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.26705965718786157, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005534154285558849, "eval_anthropic_toxic_prompts_token_set_precision": 0.296262196589119, "eval_anthropic_toxic_prompts_token_set_recall": 0.28415050166917083, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 19375 }, { "epoch": 3.72, "eval_arxiv_accuracy": 0.2963125, "eval_arxiv_bleu_score": 2.972708753679175, "eval_arxiv_bleu_score_sem": 0.0993640178338775, "eval_arxiv_emb_cos_sim": 0.5856300592422485, "eval_arxiv_emb_cos_sim_sem": 0.009791579553653187, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.035984516143799, "eval_arxiv_n_ngrams_match_1": 10.79, "eval_arxiv_n_ngrams_match_2": 1.808, "eval_arxiv_n_ngrams_match_3": 0.338, "eval_arxiv_num_pred_words": 37.514, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 56.59861506784591, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2537807955642004, "eval_arxiv_runtime": 14.7005, "eval_arxiv_samples_per_second": 34.012, "eval_arxiv_steps_per_second": 0.068, "eval_arxiv_token_set_f1": 0.25880794735075296, "eval_arxiv_token_set_f1_sem": 0.0043315438403489295, "eval_arxiv_token_set_precision": 0.1972017494362223, "eval_arxiv_token_set_recall": 0.40974570893421125, "eval_arxiv_true_num_tokens": 64.0, "step": 19375 }, { "epoch": 3.72, "eval_python_code_alpaca_accuracy": 0.1211875, "eval_python_code_alpaca_bleu_score": 2.908157101103175, "eval_python_code_alpaca_bleu_score_sem": 0.09723201187971059, "eval_python_code_alpaca_emb_cos_sim": 0.5246033668518066, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01038970721284005, "eval_python_code_alpaca_emb_top1_equal": 0.03125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.015439349450344106, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.6962592601776123, "eval_python_code_alpaca_n_ngrams_match_1": 6.342, "eval_python_code_alpaca_n_ngrams_match_2": 1.296, "eval_python_code_alpaca_n_ngrams_match_3": 0.358, "eval_python_code_alpaca_num_pred_words": 38.294, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 40.29628415768576, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2123891805818689, "eval_python_code_alpaca_runtime": 11.0532, "eval_python_code_alpaca_samples_per_second": 45.236, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.3382246492097026, "eval_python_code_alpaca_token_set_f1_sem": 0.005487002634017304, "eval_python_code_alpaca_token_set_precision": 0.3245677435905774, "eval_python_code_alpaca_token_set_recall": 0.39618136763514733, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 19375 }, { "epoch": 3.72, "eval_wikibio_accuracy": 0.28075, "eval_wikibio_bleu_score": 4.855825708587543, "eval_wikibio_bleu_score_sem": 0.1795522937569043, "eval_wikibio_emb_cos_sim": 0.6441939473152161, "eval_wikibio_emb_cos_sim_sem": 0.011822260813023622, "eval_wikibio_emb_top1_equal": 0.09375, "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.401398658752441, "eval_wikibio_n_ngrams_match_1": 8.954, "eval_wikibio_n_ngrams_match_2": 2.746, "eval_wikibio_n_ngrams_match_3": 0.91, "eval_wikibio_num_pred_words": 36.108, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 81.56487034145702, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.296123596479039, "eval_wikibio_runtime": 10.173, "eval_wikibio_samples_per_second": 49.15, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.28835538818950723, "eval_wikibio_token_set_f1_sem": 0.0057115422911795475, "eval_wikibio_token_set_precision": 0.2871581305250753, "eval_wikibio_token_set_recall": 0.304197334168239, "eval_wikibio_true_num_tokens": 61.1328125, "step": 19375 }, { "epoch": 3.72, "eval_nq_accuracy": 0.4594375, "eval_nq_bleu_score": 8.039485433057248, "eval_nq_bleu_score_sem": 0.35879975248025403, "eval_nq_emb_cos_sim": 0.7219055891036987, "eval_nq_emb_cos_sim_sem": 0.010759631419309182, "eval_nq_emb_top1_equal": 0.1875, "eval_nq_emb_top1_equal_sem": 0.034634623208270626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.72248911857605, "eval_nq_n_ngrams_match_1": 19.16, "eval_nq_n_ngrams_match_2": 5.964, "eval_nq_n_ngrams_match_3": 2.44, "eval_nq_num_pred_words": 48.076, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 15.218154932533585, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.36304378926611514, "eval_nq_runtime": 10.4358, "eval_nq_samples_per_second": 47.912, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.39067846873807627, "eval_nq_token_set_f1_sem": 0.00483756581787002, "eval_nq_token_set_precision": 0.3321573251141496, "eval_nq_token_set_recall": 0.4914988579958725, "eval_nq_true_num_tokens": 64.0, "step": 19375 }, { "epoch": 3.72, "learning_rate": 0.001, "loss": 3.0911, "step": 19380 }, { "epoch": 3.72, "learning_rate": 0.001, "loss": 3.0954, "step": 19392 }, { "epoch": 3.73, "learning_rate": 0.001, "loss": 3.0698, "step": 19404 }, { "epoch": 3.73, "learning_rate": 0.001, "loss": 3.0931, "step": 19416 }, { "epoch": 3.73, "learning_rate": 0.001, "loss": 3.0916, "step": 19428 }, { "epoch": 3.73, "learning_rate": 0.001, "loss": 3.0962, "step": 19440 }, { "epoch": 3.74, "learning_rate": 0.001, "loss": 3.0803, "step": 19452 }, { "epoch": 3.74, "learning_rate": 0.001, "loss": 3.0783, "step": 19464 }, { "epoch": 3.74, "learning_rate": 0.001, "loss": 3.09, "step": 19476 }, { "epoch": 3.74, "learning_rate": 0.001, "loss": 3.0775, "step": 19488 }, { "epoch": 3.74, "learning_rate": 0.001, "loss": 3.0802, "step": 19500 }, { "epoch": 3.75, "learning_rate": 0.001, "loss": 3.0825, "step": 19512 }, { "epoch": 3.75, "learning_rate": 0.001, "loss": 3.0802, "step": 19524 }, { "epoch": 3.75, "learning_rate": 0.001, "loss": 3.0916, "step": 19536 }, { "epoch": 3.75, "learning_rate": 0.001, "loss": 3.0741, "step": 19548 }, { "epoch": 3.76, "learning_rate": 0.001, "loss": 3.0918, "step": 19560 }, { "epoch": 3.76, "learning_rate": 0.001, "loss": 3.0813, "step": 19572 }, { "epoch": 3.76, "learning_rate": 0.001, "loss": 3.0862, "step": 19584 }, { "epoch": 3.76, "learning_rate": 0.001, "loss": 3.0845, "step": 19596 }, { "epoch": 3.76, "learning_rate": 0.001, "loss": 3.0903, "step": 19608 }, { "epoch": 3.77, "learning_rate": 0.001, "loss": 3.0822, "step": 19620 }, { "epoch": 3.77, "learning_rate": 0.001, "loss": 3.0755, "step": 19632 }, { "epoch": 3.77, "learning_rate": 0.001, "loss": 3.0845, "step": 19644 }, { "epoch": 3.77, "learning_rate": 0.001, "loss": 3.0773, "step": 19656 }, { "epoch": 3.78, "learning_rate": 0.001, "loss": 3.0736, "step": 19668 }, { "epoch": 3.78, "learning_rate": 0.001, "loss": 3.0945, "step": 19680 }, { "epoch": 3.78, "learning_rate": 0.001, "loss": 3.0816, "step": 19692 }, { "epoch": 3.78, "learning_rate": 0.001, "loss": 3.0707, "step": 19704 }, { "epoch": 3.79, "learning_rate": 0.001, "loss": 3.0762, "step": 19716 }, { "epoch": 3.79, "learning_rate": 0.001, "loss": 3.0924, "step": 19728 }, { "epoch": 3.79, "learning_rate": 0.001, "loss": 3.0716, "step": 19740 }, { "epoch": 3.79, "learning_rate": 0.001, "loss": 3.0803, "step": 19752 }, { "epoch": 3.79, "learning_rate": 0.001, "loss": 3.0682, "step": 19764 }, { "epoch": 3.8, "learning_rate": 0.001, "loss": 3.0855, "step": 19776 }, { "epoch": 3.8, "learning_rate": 0.001, "loss": 3.0878, "step": 19788 }, { "epoch": 3.8, "learning_rate": 0.001, "loss": 3.0884, "step": 19800 }, { "epoch": 3.8, "learning_rate": 0.001, "loss": 3.0944, "step": 19812 }, { "epoch": 3.81, "learning_rate": 0.001, "loss": 3.0813, "step": 19824 }, { "epoch": 3.81, "learning_rate": 0.001, "loss": 3.0714, "step": 19836 }, { "epoch": 3.81, "learning_rate": 0.001, "loss": 3.0757, "step": 19848 }, { "epoch": 3.81, "learning_rate": 0.001, "loss": 3.0772, "step": 19860 }, { "epoch": 3.82, "learning_rate": 0.001, "loss": 3.0861, "step": 19872 }, { "epoch": 3.82, "learning_rate": 0.001, "loss": 3.0902, "step": 19884 }, { "epoch": 3.82, "learning_rate": 0.001, "loss": 3.0751, "step": 19896 }, { "epoch": 3.82, "learning_rate": 0.001, "loss": 3.0858, "step": 19908 }, { "epoch": 3.82, "learning_rate": 0.001, "loss": 3.0863, "step": 19920 }, { "epoch": 3.83, "learning_rate": 0.001, "loss": 3.0842, "step": 19932 }, { "epoch": 3.83, "learning_rate": 0.001, "loss": 3.0762, "step": 19944 }, { "epoch": 3.83, "learning_rate": 0.001, "loss": 3.0898, "step": 19956 }, { "epoch": 3.83, "learning_rate": 0.001, "loss": 3.0837, "step": 19968 }, { "epoch": 3.84, "learning_rate": 0.001, "loss": 3.0808, "step": 19980 }, { "epoch": 3.84, "learning_rate": 0.001, "loss": 3.0948, "step": 19992 }, { "epoch": 3.84, "eval_ag_news_accuracy": 0.269125, "eval_ag_news_bleu_score": 3.5205719450799524, "eval_ag_news_bleu_score_sem": 0.13084593860321023, "eval_ag_news_emb_cos_sim": 0.6838691830635071, "eval_ag_news_emb_cos_sim_sem": 0.011532360300986517, "eval_ag_news_emb_top1_equal": 0.1484375, "eval_ag_news_emb_top1_equal_sem": 0.031548465007086954, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.1688456535339355, "eval_ag_news_n_ngrams_match_1": 10.702, "eval_ag_news_n_ngrams_match_2": 2.006, "eval_ag_news_n_ngrams_match_3": 0.534, "eval_ag_news_num_pred_words": 45.61, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 64.6407911545572, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2573117956030651, "eval_ag_news_runtime": 10.481, "eval_ag_news_samples_per_second": 47.705, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.2833631714930698, "eval_ag_news_token_set_f1_sem": 0.004298555133459661, "eval_ag_news_token_set_precision": 0.25019726274739207, "eval_ag_news_token_set_recall": 0.34880132857872753, "eval_ag_news_true_num_tokens": 56.09375, "step": 20000 }, { "epoch": 3.84, "eval_anthropic_toxic_prompts_accuracy": 0.0874375, "eval_anthropic_toxic_prompts_bleu_score": 2.082524207080151, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07931001353876281, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5490853190422058, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011788019132786551, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.8917834758758545, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.336, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.032, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.292, "eval_anthropic_toxic_prompts_num_pred_words": 45.46, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 48.998195743714746, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1545424205150261, "eval_anthropic_toxic_prompts_runtime": 10.2736, "eval_anthropic_toxic_prompts_samples_per_second": 48.668, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.26496288095514636, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005541832229746886, "eval_anthropic_toxic_prompts_token_set_precision": 0.2988874207066744, "eval_anthropic_toxic_prompts_token_set_recall": 0.27338358910120303, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 20000 }, { "epoch": 3.84, "eval_arxiv_accuracy": 0.29815625, "eval_arxiv_bleu_score": 2.983439874000031, "eval_arxiv_bleu_score_sem": 0.08992601673457186, "eval_arxiv_emb_cos_sim": 0.5861841440200806, "eval_arxiv_emb_cos_sim_sem": 0.008722870477041051, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.020208835601807, "eval_arxiv_n_ngrams_match_1": 11.05, "eval_arxiv_n_ngrams_match_2": 1.818, "eval_arxiv_n_ngrams_match_3": 0.316, "eval_arxiv_num_pred_words": 38.414, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 55.712739415463716, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.26075130061568297, "eval_arxiv_runtime": 10.1994, "eval_arxiv_samples_per_second": 49.022, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.26344510050212405, "eval_arxiv_token_set_f1_sem": 0.004076579819875716, "eval_arxiv_token_set_precision": 0.2033983566688797, "eval_arxiv_token_set_recall": 0.4034139858147007, "eval_arxiv_true_num_tokens": 64.0, "step": 20000 }, { "epoch": 3.84, "eval_python_code_alpaca_accuracy": 0.1219375, "eval_python_code_alpaca_bleu_score": 2.788880747633829, "eval_python_code_alpaca_bleu_score_sem": 0.0865622969943088, "eval_python_code_alpaca_emb_cos_sim": 0.49001336097717285, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01068028470863136, "eval_python_code_alpaca_emb_top1_equal": 0.0546875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.020175758285348722, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.738424777984619, "eval_python_code_alpaca_n_ngrams_match_1": 6.16, "eval_python_code_alpaca_n_ngrams_match_2": 1.172, "eval_python_code_alpaca_n_ngrams_match_3": 0.31, "eval_python_code_alpaca_num_pred_words": 37.482, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 42.031728686057264, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2096404841062586, "eval_python_code_alpaca_runtime": 10.1415, "eval_python_code_alpaca_samples_per_second": 49.303, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.3266004332652199, "eval_python_code_alpaca_token_set_f1_sem": 0.005286487401659509, "eval_python_code_alpaca_token_set_precision": 0.3146256880859559, "eval_python_code_alpaca_token_set_recall": 0.3794747305124554, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 20000 }, { "epoch": 3.84, "eval_wikibio_accuracy": 0.27809375, "eval_wikibio_bleu_score": 4.852052903618418, "eval_wikibio_bleu_score_sem": 0.17969183085346155, "eval_wikibio_emb_cos_sim": 0.6578832268714905, "eval_wikibio_emb_cos_sim_sem": 0.010880642923918054, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.403044700622559, "eval_wikibio_n_ngrams_match_1": 9.276, "eval_wikibio_n_ngrams_match_2": 2.822, "eval_wikibio_n_ngrams_match_3": 0.9, "eval_wikibio_num_pred_words": 37.478, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 81.69924009194838, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3046706090166992, "eval_wikibio_runtime": 14.0655, "eval_wikibio_samples_per_second": 35.548, "eval_wikibio_steps_per_second": 0.071, "eval_wikibio_token_set_f1": 0.29629518121620035, "eval_wikibio_token_set_f1_sem": 0.005372514249732754, "eval_wikibio_token_set_precision": 0.30051227063985675, "eval_wikibio_token_set_recall": 0.30685222842469106, "eval_wikibio_true_num_tokens": 61.1328125, "step": 20000 }, { "epoch": 3.84, "eval_nq_accuracy": 0.45965625, "eval_nq_bleu_score": 8.207424029190614, "eval_nq_bleu_score_sem": 0.3686875235712443, "eval_nq_emb_cos_sim": 0.7135041952133179, "eval_nq_emb_cos_sim_sem": 0.011079902028420926, "eval_nq_emb_top1_equal": 0.15625, "eval_nq_emb_top1_equal_sem": 0.03221922156442571, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.712360143661499, "eval_nq_n_ngrams_match_1": 19.526, "eval_nq_n_ngrams_match_2": 6.134, "eval_nq_n_ngrams_match_3": 2.472, "eval_nq_num_pred_words": 47.752, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 15.064788655791137, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3689419126746766, "eval_nq_runtime": 10.6154, "eval_nq_samples_per_second": 47.101, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.39564260143520746, "eval_nq_token_set_f1_sem": 0.004959118859659147, "eval_nq_token_set_precision": 0.33987234351582124, "eval_nq_token_set_recall": 0.49062797710110184, "eval_nq_true_num_tokens": 64.0, "step": 20000 }, { "epoch": 3.84, "learning_rate": 0.001, "loss": 3.0772, "step": 20004 }, { "epoch": 3.84, "learning_rate": 0.001, "loss": 3.0713, "step": 20016 }, { "epoch": 3.85, "learning_rate": 0.001, "loss": 3.0829, "step": 20028 }, { "epoch": 3.85, "learning_rate": 0.001, "loss": 3.0653, "step": 20040 }, { "epoch": 3.85, "learning_rate": 0.001, "loss": 3.0761, "step": 20052 }, { "epoch": 3.85, "learning_rate": 0.001, "loss": 3.0795, "step": 20064 }, { "epoch": 3.85, "learning_rate": 0.001, "loss": 3.0683, "step": 20076 }, { "epoch": 3.86, "learning_rate": 0.001, "loss": 3.0669, "step": 20088 }, { "epoch": 3.86, "learning_rate": 0.001, "loss": 3.0715, "step": 20100 }, { "epoch": 3.86, "learning_rate": 0.001, "loss": 3.0731, "step": 20112 }, { "epoch": 3.86, "learning_rate": 0.001, "loss": 3.0724, "step": 20124 }, { "epoch": 3.87, "learning_rate": 0.001, "loss": 3.0855, "step": 20136 }, { "epoch": 3.87, "learning_rate": 0.001, "loss": 3.0827, "step": 20148 }, { "epoch": 3.87, "learning_rate": 0.001, "loss": 3.0745, "step": 20160 }, { "epoch": 3.87, "learning_rate": 0.001, "loss": 3.0708, "step": 20172 }, { "epoch": 3.88, "learning_rate": 0.001, "loss": 3.0722, "step": 20184 }, { "epoch": 3.88, "learning_rate": 0.001, "loss": 3.0702, "step": 20196 }, { "epoch": 3.88, "learning_rate": 0.001, "loss": 3.0823, "step": 20208 }, { "epoch": 3.88, "learning_rate": 0.001, "loss": 3.0812, "step": 20220 }, { "epoch": 3.88, "learning_rate": 0.001, "loss": 3.0858, "step": 20232 }, { "epoch": 3.89, "learning_rate": 0.001, "loss": 3.0743, "step": 20244 }, { "epoch": 3.89, "learning_rate": 0.001, "loss": 3.08, "step": 20256 }, { "epoch": 3.89, "learning_rate": 0.001, "loss": 3.0741, "step": 20268 }, { "epoch": 3.89, "learning_rate": 0.001, "loss": 3.0641, "step": 20280 }, { "epoch": 3.9, "learning_rate": 0.001, "loss": 3.0639, "step": 20292 }, { "epoch": 3.9, "learning_rate": 0.001, "loss": 3.0685, "step": 20304 }, { "epoch": 3.9, "learning_rate": 0.001, "loss": 3.0775, "step": 20316 }, { "epoch": 3.9, "learning_rate": 0.001, "loss": 3.0615, "step": 20328 }, { "epoch": 3.91, "learning_rate": 0.001, "loss": 3.0698, "step": 20340 }, { "epoch": 3.91, "learning_rate": 0.001, "loss": 3.0754, "step": 20352 }, { "epoch": 3.91, "learning_rate": 0.001, "loss": 3.0719, "step": 20364 }, { "epoch": 3.91, "learning_rate": 0.001, "loss": 3.0746, "step": 20376 }, { "epoch": 3.91, "learning_rate": 0.001, "loss": 3.0733, "step": 20388 }, { "epoch": 3.92, "learning_rate": 0.001, "loss": 3.0721, "step": 20400 }, { "epoch": 3.92, "learning_rate": 0.001, "loss": 3.0735, "step": 20412 }, { "epoch": 3.92, "learning_rate": 0.001, "loss": 3.0711, "step": 20424 }, { "epoch": 3.92, "learning_rate": 0.001, "loss": 3.0725, "step": 20436 }, { "epoch": 3.93, "learning_rate": 0.001, "loss": 3.0776, "step": 20448 }, { "epoch": 3.93, "learning_rate": 0.001, "loss": 3.0676, "step": 20460 }, { "epoch": 3.93, "learning_rate": 0.001, "loss": 3.0667, "step": 20472 }, { "epoch": 3.93, "learning_rate": 0.001, "loss": 3.0786, "step": 20484 }, { "epoch": 3.94, "learning_rate": 0.001, "loss": 3.0771, "step": 20496 }, { "epoch": 3.94, "learning_rate": 0.001, "loss": 3.0751, "step": 20508 }, { "epoch": 3.94, "learning_rate": 0.001, "loss": 3.0708, "step": 20520 }, { "epoch": 3.94, "learning_rate": 0.001, "loss": 3.0639, "step": 20532 }, { "epoch": 3.94, "learning_rate": 0.001, "loss": 3.0663, "step": 20544 }, { "epoch": 3.95, "learning_rate": 0.001, "loss": 3.0689, "step": 20556 }, { "epoch": 3.95, "learning_rate": 0.001, "loss": 3.0638, "step": 20568 }, { "epoch": 3.95, "learning_rate": 0.001, "loss": 3.0769, "step": 20580 }, { "epoch": 3.95, "learning_rate": 0.001, "loss": 3.0643, "step": 20592 }, { "epoch": 3.96, "learning_rate": 0.001, "loss": 3.0708, "step": 20604 }, { "epoch": 3.96, "learning_rate": 0.001, "loss": 3.0661, "step": 20616 }, { "epoch": 3.96, "eval_ag_news_accuracy": 0.2705625, "eval_ag_news_bleu_score": 3.497625776562606, "eval_ag_news_bleu_score_sem": 0.1243416779453758, "eval_ag_news_emb_cos_sim": 0.6854905486106873, "eval_ag_news_emb_cos_sim_sem": 0.011165756475926632, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.142734527587891, "eval_ag_news_n_ngrams_match_1": 10.81, "eval_ag_news_n_ngrams_match_2": 2.016, "eval_ag_news_n_ngrams_match_3": 0.508, "eval_ag_news_num_pred_words": 45.486, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 62.974792519708714, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.25981317909377943, "eval_ag_news_runtime": 11.3131, "eval_ag_news_samples_per_second": 44.196, "eval_ag_news_steps_per_second": 0.088, "eval_ag_news_token_set_f1": 0.28457443987735725, "eval_ag_news_token_set_f1_sem": 0.004563032961620619, "eval_ag_news_token_set_precision": 0.2500429050994227, "eval_ag_news_token_set_recall": 0.3547567739726648, "eval_ag_news_true_num_tokens": 56.09375, "step": 20625 }, { "epoch": 3.96, "eval_anthropic_toxic_prompts_accuracy": 0.0889375, "eval_anthropic_toxic_prompts_bleu_score": 1.940812447781033, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07052932065667471, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5502721071243286, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010820657421774925, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.8554270267486572, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.244, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.97, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.276, "eval_anthropic_toxic_prompts_num_pred_words": 47.462, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 47.24878913073644, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.14700750479400432, "eval_anthropic_toxic_prompts_runtime": 9.7977, "eval_anthropic_toxic_prompts_samples_per_second": 51.032, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.2650050236180496, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006014103829789257, "eval_anthropic_toxic_prompts_token_set_precision": 0.29226082959480526, "eval_anthropic_toxic_prompts_token_set_recall": 0.28099879207721473, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 20625 }, { "epoch": 3.96, "eval_arxiv_accuracy": 0.2989375, "eval_arxiv_bleu_score": 3.0788719675519833, "eval_arxiv_bleu_score_sem": 0.08817320989733096, "eval_arxiv_emb_cos_sim": 0.6014991998672485, "eval_arxiv_emb_cos_sim_sem": 0.0078075713191970144, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 4.004692077636719, "eval_arxiv_n_ngrams_match_1": 11.292, "eval_arxiv_n_ngrams_match_2": 1.916, "eval_arxiv_n_ngrams_match_3": 0.336, "eval_arxiv_num_pred_words": 38.952, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 54.854930738325024, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.26215141159546584, "eval_arxiv_runtime": 10.1275, "eval_arxiv_samples_per_second": 49.371, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.26786712182741457, "eval_arxiv_token_set_f1_sem": 0.004083838112782947, "eval_arxiv_token_set_precision": 0.20782117795380609, "eval_arxiv_token_set_recall": 0.40398350550246026, "eval_arxiv_true_num_tokens": 64.0, "step": 20625 }, { "epoch": 3.96, "eval_python_code_alpaca_accuracy": 0.1240625, "eval_python_code_alpaca_bleu_score": 2.9143636046107377, "eval_python_code_alpaca_bleu_score_sem": 0.08848970243259717, "eval_python_code_alpaca_emb_cos_sim": 0.5072420239448547, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01087854545966013, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.7139439582824707, "eval_python_code_alpaca_n_ngrams_match_1": 6.358, "eval_python_code_alpaca_n_ngrams_match_2": 1.294, "eval_python_code_alpaca_n_ngrams_match_3": 0.318, "eval_python_code_alpaca_num_pred_words": 37.594, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 41.015250390274765, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.21857121612457775, "eval_python_code_alpaca_runtime": 9.9235, "eval_python_code_alpaca_samples_per_second": 50.385, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.3449749959334582, "eval_python_code_alpaca_token_set_f1_sem": 0.005456353292137309, "eval_python_code_alpaca_token_set_precision": 0.3258423302136996, "eval_python_code_alpaca_token_set_recall": 0.4124755843303544, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 20625 }, { "epoch": 3.96, "eval_wikibio_accuracy": 0.27915625, "eval_wikibio_bleu_score": 4.900959285731773, "eval_wikibio_bleu_score_sem": 0.17996779787220374, "eval_wikibio_emb_cos_sim": 0.6767225861549377, "eval_wikibio_emb_cos_sim_sem": 0.010583971257249872, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.434642314910889, "eval_wikibio_n_ngrams_match_1": 9.508, "eval_wikibio_n_ngrams_match_2": 2.93, "eval_wikibio_n_ngrams_match_3": 0.972, "eval_wikibio_num_pred_words": 38.054, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 84.3219587862873, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3075396036762009, "eval_wikibio_runtime": 10.0767, "eval_wikibio_samples_per_second": 49.62, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.29780042871675627, "eval_wikibio_token_set_f1_sem": 0.005478605226638085, "eval_wikibio_token_set_precision": 0.3042194005019633, "eval_wikibio_token_set_recall": 0.30874025411839556, "eval_wikibio_true_num_tokens": 61.1328125, "step": 20625 }, { "epoch": 3.96, "eval_nq_accuracy": 0.46215625, "eval_nq_bleu_score": 8.28533813478615, "eval_nq_bleu_score_sem": 0.3660077465844488, "eval_nq_emb_cos_sim": 0.7317532896995544, "eval_nq_emb_cos_sim_sem": 0.009859092878067069, "eval_nq_emb_top1_equal": 0.1796875, "eval_nq_emb_top1_equal_sem": 0.034068008879424266, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.6991474628448486, "eval_nq_n_ngrams_match_1": 19.582, "eval_nq_n_ngrams_match_2": 6.286, "eval_nq_n_ngrams_match_3": 2.528, "eval_nq_num_pred_words": 48.442, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 14.86705160662268, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3683165596709803, "eval_nq_runtime": 10.9532, "eval_nq_samples_per_second": 45.649, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.39445822839836675, "eval_nq_token_set_f1_sem": 0.004759952568872269, "eval_nq_token_set_precision": 0.3384423186274052, "eval_nq_token_set_recall": 0.4880352088176547, "eval_nq_true_num_tokens": 64.0, "step": 20625 }, { "epoch": 3.96, "learning_rate": 0.001, "loss": 3.0717, "step": 20628 }, { "epoch": 3.96, "learning_rate": 0.001, "loss": 3.0771, "step": 20640 }, { "epoch": 3.97, "learning_rate": 0.001, "loss": 3.0727, "step": 20652 }, { "epoch": 3.97, "learning_rate": 0.001, "loss": 3.0653, "step": 20664 }, { "epoch": 3.97, "learning_rate": 0.001, "loss": 3.0626, "step": 20676 }, { "epoch": 3.97, "learning_rate": 0.001, "loss": 3.0562, "step": 20688 }, { "epoch": 3.97, "learning_rate": 0.001, "loss": 3.0685, "step": 20700 }, { "epoch": 3.98, "learning_rate": 0.001, "loss": 3.066, "step": 20712 }, { "epoch": 3.98, "learning_rate": 0.001, "loss": 3.073, "step": 20724 }, { "epoch": 3.98, "learning_rate": 0.001, "loss": 3.0674, "step": 20736 }, { "epoch": 3.98, "learning_rate": 0.001, "loss": 3.0645, "step": 20748 }, { "epoch": 3.99, "learning_rate": 0.001, "loss": 3.066, "step": 20760 }, { "epoch": 3.99, "learning_rate": 0.001, "loss": 3.0712, "step": 20772 }, { "epoch": 3.99, "learning_rate": 0.001, "loss": 3.0664, "step": 20784 }, { "epoch": 3.99, "learning_rate": 0.001, "loss": 3.0487, "step": 20796 }, { "epoch": 4.0, "learning_rate": 0.001, "loss": 3.0565, "step": 20808 }, { "epoch": 4.0, "learning_rate": 0.001, "loss": 3.07, "step": 20820 }, { "epoch": 4.0, "learning_rate": 0.001, "loss": 3.065, "step": 20832 }, { "epoch": 4.0, "learning_rate": 0.001, "loss": 3.0418, "step": 20844 }, { "epoch": 4.0, "learning_rate": 0.001, "loss": 3.0483, "step": 20856 }, { "epoch": 4.01, "learning_rate": 0.001, "loss": 3.0469, "step": 20868 }, { "epoch": 4.01, "learning_rate": 0.001, "loss": 3.0389, "step": 20880 }, { "epoch": 4.01, "learning_rate": 0.001, "loss": 3.0522, "step": 20892 }, { "epoch": 4.01, "learning_rate": 0.001, "loss": 3.0505, "step": 20904 }, { "epoch": 4.02, "learning_rate": 0.001, "loss": 3.0453, "step": 20916 }, { "epoch": 4.02, "learning_rate": 0.001, "loss": 3.039, "step": 20928 }, { "epoch": 4.02, "learning_rate": 0.001, "loss": 3.0363, "step": 20940 }, { "epoch": 4.02, "learning_rate": 0.001, "loss": 3.0476, "step": 20952 }, { "epoch": 4.03, "learning_rate": 0.001, "loss": 3.0522, "step": 20964 }, { "epoch": 4.03, "learning_rate": 0.001, "loss": 3.0349, "step": 20976 }, { "epoch": 4.03, "learning_rate": 0.001, "loss": 3.045, "step": 20988 }, { "epoch": 4.03, "learning_rate": 0.001, "loss": 3.0515, "step": 21000 }, { "epoch": 4.03, "learning_rate": 0.001, "loss": 3.0399, "step": 21012 }, { "epoch": 4.04, "learning_rate": 0.001, "loss": 3.0417, "step": 21024 }, { "epoch": 4.04, "learning_rate": 0.001, "loss": 3.0497, "step": 21036 }, { "epoch": 4.04, "learning_rate": 0.001, "loss": 3.038, "step": 21048 }, { "epoch": 4.04, "learning_rate": 0.001, "loss": 3.0283, "step": 21060 }, { "epoch": 4.05, "learning_rate": 0.001, "loss": 3.0494, "step": 21072 }, { "epoch": 4.05, "learning_rate": 0.001, "loss": 3.0447, "step": 21084 }, { "epoch": 4.05, "learning_rate": 0.001, "loss": 3.0374, "step": 21096 }, { "epoch": 4.05, "learning_rate": 0.001, "loss": 3.0452, "step": 21108 }, { "epoch": 4.06, "learning_rate": 0.001, "loss": 3.0368, "step": 21120 }, { "epoch": 4.06, "learning_rate": 0.001, "loss": 3.0386, "step": 21132 }, { "epoch": 4.06, "learning_rate": 0.001, "loss": 3.0436, "step": 21144 }, { "epoch": 4.06, "learning_rate": 0.001, "loss": 3.0521, "step": 21156 }, { "epoch": 4.06, "learning_rate": 0.001, "loss": 3.0326, "step": 21168 }, { "epoch": 4.07, "learning_rate": 0.001, "loss": 3.0423, "step": 21180 }, { "epoch": 4.07, "learning_rate": 0.001, "loss": 3.0463, "step": 21192 }, { "epoch": 4.07, "learning_rate": 0.001, "loss": 3.0457, "step": 21204 }, { "epoch": 4.07, "learning_rate": 0.001, "loss": 3.036, "step": 21216 }, { "epoch": 4.08, "learning_rate": 0.001, "loss": 3.0324, "step": 21228 }, { "epoch": 4.08, "learning_rate": 0.001, "loss": 3.0401, "step": 21240 }, { "epoch": 4.08, "eval_ag_news_accuracy": 0.27059375, "eval_ag_news_bleu_score": 3.4759116114975583, "eval_ag_news_bleu_score_sem": 0.12353977729368248, "eval_ag_news_emb_cos_sim": 0.6965416669845581, "eval_ag_news_emb_cos_sim_sem": 0.010154137939800535, "eval_ag_news_emb_top1_equal": 0.125, "eval_ag_news_emb_top1_equal_sem": 0.02934655822437397, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.144755840301514, "eval_ag_news_n_ngrams_match_1": 10.956, "eval_ag_news_n_ngrams_match_2": 2.052, "eval_ag_news_n_ngrams_match_3": 0.496, "eval_ag_news_num_pred_words": 45.744, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 63.10221300340487, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2660516703895089, "eval_ag_news_runtime": 10.3868, "eval_ag_news_samples_per_second": 48.138, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.2865905061593258, "eval_ag_news_token_set_f1_sem": 0.004343457012985784, "eval_ag_news_token_set_precision": 0.25481817659442135, "eval_ag_news_token_set_recall": 0.3522420521165349, "eval_ag_news_true_num_tokens": 56.09375, "step": 21250 }, { "epoch": 4.08, "eval_anthropic_toxic_prompts_accuracy": 0.08903125, "eval_anthropic_toxic_prompts_bleu_score": 1.9651381662392637, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07138236548140361, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.565031886100769, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011376758087551344, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.8321046829223633, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.342, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 0.992, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.268, "eval_anthropic_toxic_prompts_num_pred_words": 46.912, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 46.15958736479582, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.15272833744161227, "eval_anthropic_toxic_prompts_runtime": 9.9557, "eval_anthropic_toxic_prompts_samples_per_second": 50.222, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.273758563134554, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005968473785356642, "eval_anthropic_toxic_prompts_token_set_precision": 0.29467451304701114, "eval_anthropic_toxic_prompts_token_set_recall": 0.29285949299857483, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 21250 }, { "epoch": 4.08, "eval_arxiv_accuracy": 0.29784375, "eval_arxiv_bleu_score": 2.98095021356775, "eval_arxiv_bleu_score_sem": 0.08693911428074183, "eval_arxiv_emb_cos_sim": 0.6017891764640808, "eval_arxiv_emb_cos_sim_sem": 0.008590137538734595, "eval_arxiv_emb_top1_equal": 0.171875, "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.9887657165527344, "eval_arxiv_n_ngrams_match_1": 11.018, "eval_arxiv_n_ngrams_match_2": 1.814, "eval_arxiv_n_ngrams_match_3": 0.312, "eval_arxiv_num_pred_words": 38.388, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 53.988211466319015, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2594239905479414, "eval_arxiv_runtime": 10.2082, "eval_arxiv_samples_per_second": 48.98, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.2642451292254428, "eval_arxiv_token_set_f1_sem": 0.0038802082355823574, "eval_arxiv_token_set_precision": 0.2039543512501798, "eval_arxiv_token_set_recall": 0.408623030396844, "eval_arxiv_true_num_tokens": 64.0, "step": 21250 }, { "epoch": 4.08, "eval_python_code_alpaca_accuracy": 0.125, "eval_python_code_alpaca_bleu_score": 2.8776236589230293, "eval_python_code_alpaca_bleu_score_sem": 0.0943106397693027, "eval_python_code_alpaca_emb_cos_sim": 0.5189061164855957, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009752914429956, "eval_python_code_alpaca_emb_top1_equal": 0.046875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.6420161724090576, "eval_python_code_alpaca_n_ngrams_match_1": 6.198, "eval_python_code_alpaca_n_ngrams_match_2": 1.266, "eval_python_code_alpaca_n_ngrams_match_3": 0.334, "eval_python_code_alpaca_num_pred_words": 37.466, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 38.16871390840089, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.21139995276143378, "eval_python_code_alpaca_runtime": 10.5356, "eval_python_code_alpaca_samples_per_second": 47.458, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.3317504191173044, "eval_python_code_alpaca_token_set_f1_sem": 0.0054573483567321, "eval_python_code_alpaca_token_set_precision": 0.31876378129823757, "eval_python_code_alpaca_token_set_recall": 0.3905314766738178, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 21250 }, { "epoch": 4.08, "eval_wikibio_accuracy": 0.27596875, "eval_wikibio_bleu_score": 4.723717154191338, "eval_wikibio_bleu_score_sem": 0.1826744504726285, "eval_wikibio_emb_cos_sim": 0.6692360639572144, "eval_wikibio_emb_cos_sim_sem": 0.011303136762825924, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.416079521179199, "eval_wikibio_n_ngrams_match_1": 8.93, "eval_wikibio_n_ngrams_match_2": 2.736, "eval_wikibio_n_ngrams_match_3": 0.892, "eval_wikibio_num_pred_words": 37.176, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 82.77114589914449, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2953368064200208, "eval_wikibio_runtime": 10.2446, "eval_wikibio_samples_per_second": 48.806, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.2822498487406837, "eval_wikibio_token_set_f1_sem": 0.005807926081839543, "eval_wikibio_token_set_precision": 0.28459061256356083, "eval_wikibio_token_set_recall": 0.29633119494668225, "eval_wikibio_true_num_tokens": 61.1328125, "step": 21250 }, { "epoch": 4.08, "eval_nq_accuracy": 0.464875, "eval_nq_bleu_score": 8.341417445829022, "eval_nq_bleu_score_sem": 0.3837464849029816, "eval_nq_emb_cos_sim": 0.7356172800064087, "eval_nq_emb_cos_sim_sem": 0.010324239327667963, "eval_nq_emb_top1_equal": 0.203125, "eval_nq_emb_top1_equal_sem": 0.03570055125142555, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.688774347305298, "eval_nq_n_ngrams_match_1": 19.506, "eval_nq_n_ngrams_match_2": 6.232, "eval_nq_n_ngrams_match_3": 2.568, "eval_nq_num_pred_words": 48.698, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 14.713631062776367, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3659405393239539, "eval_nq_runtime": 10.5198, "eval_nq_samples_per_second": 47.53, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.39202691823958985, "eval_nq_token_set_f1_sem": 0.005032378170226777, "eval_nq_token_set_precision": 0.33906263724145974, "eval_nq_token_set_recall": 0.48259763210815115, "eval_nq_true_num_tokens": 64.0, "step": 21250 }, { "epoch": 4.08, "learning_rate": 0.001, "loss": 3.0532, "step": 21252 }, { "epoch": 4.08, "learning_rate": 0.001, "loss": 3.0389, "step": 21264 }, { "epoch": 4.09, "learning_rate": 0.001, "loss": 3.036, "step": 21276 }, { "epoch": 4.09, "learning_rate": 0.001, "loss": 3.0452, "step": 21288 }, { "epoch": 4.09, "learning_rate": 0.001, "loss": 3.0483, "step": 21300 }, { "epoch": 4.09, "learning_rate": 0.001, "loss": 3.0275, "step": 21312 }, { "epoch": 4.09, "learning_rate": 0.001, "loss": 3.0406, "step": 21324 }, { "epoch": 4.1, "learning_rate": 0.001, "loss": 3.0345, "step": 21336 }, { "epoch": 4.1, "learning_rate": 0.001, "loss": 3.0377, "step": 21348 }, { "epoch": 4.1, "learning_rate": 0.001, "loss": 3.0301, "step": 21360 }, { "epoch": 4.1, "learning_rate": 0.001, "loss": 3.0438, "step": 21372 }, { "epoch": 4.11, "learning_rate": 0.001, "loss": 3.0442, "step": 21384 }, { "epoch": 4.11, "learning_rate": 0.001, "loss": 3.0437, "step": 21396 }, { "epoch": 4.11, "learning_rate": 0.001, "loss": 3.0391, "step": 21408 }, { "epoch": 4.11, "learning_rate": 0.001, "loss": 3.0375, "step": 21420 }, { "epoch": 4.12, "learning_rate": 0.001, "loss": 3.0438, "step": 21432 }, { "epoch": 4.12, "learning_rate": 0.001, "loss": 3.0337, "step": 21444 }, { "epoch": 4.12, "learning_rate": 0.001, "loss": 3.0285, "step": 21456 }, { "epoch": 4.12, "learning_rate": 0.001, "loss": 3.0454, "step": 21468 }, { "epoch": 4.12, "learning_rate": 0.001, "loss": 3.0475, "step": 21480 }, { "epoch": 4.13, "learning_rate": 0.001, "loss": 3.0364, "step": 21492 }, { "epoch": 4.13, "learning_rate": 0.001, "loss": 3.0506, "step": 21504 }, { "epoch": 4.13, "learning_rate": 0.001, "loss": 3.0315, "step": 21516 }, { "epoch": 4.13, "learning_rate": 0.001, "loss": 3.0295, "step": 21528 }, { "epoch": 4.14, "learning_rate": 0.001, "loss": 3.0315, "step": 21540 }, { "epoch": 4.14, "learning_rate": 0.001, "loss": 3.0351, "step": 21552 }, { "epoch": 4.14, "learning_rate": 0.001, "loss": 3.0351, "step": 21564 }, { "epoch": 4.14, "learning_rate": 0.001, "loss": 3.0465, "step": 21576 }, { "epoch": 4.15, "learning_rate": 0.001, "loss": 3.0398, "step": 21588 }, { "epoch": 4.15, "learning_rate": 0.001, "loss": 3.028, "step": 21600 }, { "epoch": 4.15, "learning_rate": 0.001, "loss": 3.0413, "step": 21612 }, { "epoch": 4.15, "learning_rate": 0.001, "loss": 3.0389, "step": 21624 }, { "epoch": 4.15, "learning_rate": 0.001, "loss": 3.0374, "step": 21636 }, { "epoch": 4.16, "learning_rate": 0.001, "loss": 3.0321, "step": 21648 }, { "epoch": 4.16, "learning_rate": 0.001, "loss": 3.0385, "step": 21660 }, { "epoch": 4.16, "learning_rate": 0.001, "loss": 3.0372, "step": 21672 }, { "epoch": 4.16, "learning_rate": 0.001, "loss": 3.0316, "step": 21684 }, { "epoch": 4.17, "learning_rate": 0.001, "loss": 3.0331, "step": 21696 }, { "epoch": 4.17, "learning_rate": 0.001, "loss": 3.0242, "step": 21708 }, { "epoch": 4.17, "learning_rate": 0.001, "loss": 3.0321, "step": 21720 }, { "epoch": 4.17, "learning_rate": 0.001, "loss": 3.0334, "step": 21732 }, { "epoch": 4.18, "learning_rate": 0.001, "loss": 3.0271, "step": 21744 }, { "epoch": 4.18, "learning_rate": 0.001, "loss": 3.036, "step": 21756 }, { "epoch": 4.18, "learning_rate": 0.001, "loss": 3.0285, "step": 21768 }, { "epoch": 4.18, "learning_rate": 0.001, "loss": 3.0339, "step": 21780 }, { "epoch": 4.18, "learning_rate": 0.001, "loss": 3.0349, "step": 21792 }, { "epoch": 4.19, "learning_rate": 0.001, "loss": 3.0377, "step": 21804 }, { "epoch": 4.19, "learning_rate": 0.001, "loss": 3.0341, "step": 21816 }, { "epoch": 4.19, "learning_rate": 0.001, "loss": 3.0324, "step": 21828 }, { "epoch": 4.19, "learning_rate": 0.001, "loss": 3.0356, "step": 21840 }, { "epoch": 4.2, "learning_rate": 0.001, "loss": 3.0243, "step": 21852 }, { "epoch": 4.2, "learning_rate": 0.001, "loss": 3.0221, "step": 21864 }, { "epoch": 4.2, "eval_ag_news_accuracy": 0.27284375, "eval_ag_news_bleu_score": 3.4996654310486885, "eval_ag_news_bleu_score_sem": 0.12056780449144557, "eval_ag_news_emb_cos_sim": 0.7136399149894714, "eval_ag_news_emb_cos_sim_sem": 0.008702499061231115, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.116077899932861, "eval_ag_news_n_ngrams_match_1": 11.198, "eval_ag_news_n_ngrams_match_2": 2.082, "eval_ag_news_n_ngrams_match_3": 0.51, "eval_ag_news_num_pred_words": 46.234, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 61.31827361668396, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.26791186121739585, "eval_ag_news_runtime": 11.0522, "eval_ag_news_samples_per_second": 45.24, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.2911938956083088, "eval_ag_news_token_set_f1_sem": 0.004323212925036001, "eval_ag_news_token_set_precision": 0.25939685552279124, "eval_ag_news_token_set_recall": 0.3509315539008144, "eval_ag_news_true_num_tokens": 56.09375, "step": 21875 }, { "epoch": 4.2, "eval_anthropic_toxic_prompts_accuracy": 0.09128125, "eval_anthropic_toxic_prompts_bleu_score": 1.9570257695194715, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07294093462571513, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.561536431312561, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011565628652440787, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.8141582012176514, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.43, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.014, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.272, "eval_anthropic_toxic_prompts_num_pred_words": 46.624, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 45.338574357723054, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.15432672773448186, "eval_anthropic_toxic_prompts_runtime": 10.1394, "eval_anthropic_toxic_prompts_samples_per_second": 49.313, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.2672176719490324, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005943945345494677, "eval_anthropic_toxic_prompts_token_set_precision": 0.29800353577576116, "eval_anthropic_toxic_prompts_token_set_recall": 0.28088067934522803, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 21875 }, { "epoch": 4.2, "eval_arxiv_accuracy": 0.301, "eval_arxiv_bleu_score": 3.1479586906083488, "eval_arxiv_bleu_score_sem": 0.0944101131232911, "eval_arxiv_emb_cos_sim": 0.6080456376075745, "eval_arxiv_emb_cos_sim_sem": 0.008475128008771477, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.970264196395874, "eval_arxiv_n_ngrams_match_1": 11.418, "eval_arxiv_n_ngrams_match_2": 1.94, "eval_arxiv_n_ngrams_match_3": 0.346, "eval_arxiv_num_pred_words": 39.672, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 52.998531011077056, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.26787391631613255, "eval_arxiv_runtime": 10.4506, "eval_arxiv_samples_per_second": 47.844, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.2688418724035355, "eval_arxiv_token_set_f1_sem": 0.003939178956185688, "eval_arxiv_token_set_precision": 0.21069150806535167, "eval_arxiv_token_set_recall": 0.39311801139098385, "eval_arxiv_true_num_tokens": 64.0, "step": 21875 }, { "epoch": 4.2, "eval_python_code_alpaca_accuracy": 0.12534375, "eval_python_code_alpaca_bleu_score": 2.896494337010263, "eval_python_code_alpaca_bleu_score_sem": 0.09235661841466093, "eval_python_code_alpaca_emb_cos_sim": 0.540421187877655, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01026512968972045, "eval_python_code_alpaca_emb_top1_equal": 0.0234375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.013424676090873717, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.6180341243743896, "eval_python_code_alpaca_n_ngrams_match_1": 6.56, "eval_python_code_alpaca_n_ngrams_match_2": 1.306, "eval_python_code_alpaca_n_ngrams_match_3": 0.336, "eval_python_code_alpaca_num_pred_words": 38.536, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 37.26423890893017, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.22576526748087372, "eval_python_code_alpaca_runtime": 11.6631, "eval_python_code_alpaca_samples_per_second": 42.87, "eval_python_code_alpaca_steps_per_second": 0.086, "eval_python_code_alpaca_token_set_f1": 0.3462301535756837, "eval_python_code_alpaca_token_set_f1_sem": 0.005242461543095605, "eval_python_code_alpaca_token_set_precision": 0.3430515376847243, "eval_python_code_alpaca_token_set_recall": 0.3867008615388679, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 21875 }, { "epoch": 4.2, "eval_wikibio_accuracy": 0.27775, "eval_wikibio_bleu_score": 4.848883945765031, "eval_wikibio_bleu_score_sem": 0.17057304405972518, "eval_wikibio_emb_cos_sim": 0.687816321849823, "eval_wikibio_emb_cos_sim_sem": 0.009560151867024377, "eval_wikibio_emb_top1_equal": 0.109375, "eval_wikibio_emb_top1_equal_sem": 0.027695207821224692, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.434406757354736, "eval_wikibio_n_ngrams_match_1": 9.464, "eval_wikibio_n_ngrams_match_2": 2.87, "eval_wikibio_n_ngrams_match_3": 0.928, "eval_wikibio_num_pred_words": 38.342, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 84.30209845096348, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.31197704945570526, "eval_wikibio_runtime": 10.0717, "eval_wikibio_samples_per_second": 49.644, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.2942061068802919, "eval_wikibio_token_set_f1_sem": 0.0053270369418450285, "eval_wikibio_token_set_precision": 0.30313770407388707, "eval_wikibio_token_set_recall": 0.2981083597781698, "eval_wikibio_true_num_tokens": 61.1328125, "step": 21875 }, { "epoch": 4.2, "eval_nq_accuracy": 0.46346875, "eval_nq_bleu_score": 8.24216455551241, "eval_nq_bleu_score_sem": 0.3651753215587689, "eval_nq_emb_cos_sim": 0.7345483303070068, "eval_nq_emb_cos_sim_sem": 0.011004716660489047, "eval_nq_emb_top1_equal": 0.171875, "eval_nq_emb_top1_equal_sem": 0.03347745514062371, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.680889129638672, "eval_nq_n_ngrams_match_1": 19.792, "eval_nq_n_ngrams_match_2": 6.292, "eval_nq_n_ngrams_match_3": 2.508, "eval_nq_num_pred_words": 48.768, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 14.598067101455777, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3747689920413043, "eval_nq_runtime": 10.5887, "eval_nq_samples_per_second": 47.22, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.398663761084407, "eval_nq_token_set_f1_sem": 0.004841490243342143, "eval_nq_token_set_precision": 0.34388651679642374, "eval_nq_token_set_recall": 0.48906836026594014, "eval_nq_true_num_tokens": 64.0, "step": 21875 }, { "epoch": 4.2, "learning_rate": 0.001, "loss": 3.0364, "step": 21876 }, { "epoch": 4.2, "learning_rate": 0.001, "loss": 3.0372, "step": 21888 }, { "epoch": 4.21, "learning_rate": 0.001, "loss": 3.0341, "step": 21900 }, { "epoch": 4.21, "learning_rate": 0.001, "loss": 3.038, "step": 21912 }, { "epoch": 4.21, "learning_rate": 0.001, "loss": 3.0426, "step": 21924 }, { "epoch": 4.21, "learning_rate": 0.001, "loss": 3.0261, "step": 21936 }, { "epoch": 4.21, "learning_rate": 0.001, "loss": 3.0327, "step": 21948 }, { "epoch": 4.22, "learning_rate": 0.001, "loss": 3.0332, "step": 21960 }, { "epoch": 4.22, "learning_rate": 0.001, "loss": 3.023, "step": 21972 }, { "epoch": 4.22, "learning_rate": 0.001, "loss": 3.0322, "step": 21984 }, { "epoch": 4.22, "learning_rate": 0.001, "loss": 3.0373, "step": 21996 }, { "epoch": 4.23, "learning_rate": 0.001, "loss": 3.0294, "step": 22008 }, { "epoch": 4.23, "learning_rate": 0.001, "loss": 3.042, "step": 22020 }, { "epoch": 4.23, "learning_rate": 0.001, "loss": 3.0404, "step": 22032 }, { "epoch": 4.23, "learning_rate": 0.001, "loss": 3.0186, "step": 22044 }, { "epoch": 4.24, "learning_rate": 0.001, "loss": 3.0258, "step": 22056 }, { "epoch": 4.24, "learning_rate": 0.001, "loss": 3.0196, "step": 22068 }, { "epoch": 4.24, "learning_rate": 0.001, "loss": 3.0265, "step": 22080 }, { "epoch": 4.24, "learning_rate": 0.001, "loss": 3.0331, "step": 22092 }, { "epoch": 4.24, "learning_rate": 0.001, "loss": 3.017, "step": 22104 }, { "epoch": 4.25, "learning_rate": 0.001, "loss": 3.0206, "step": 22116 }, { "epoch": 4.25, "learning_rate": 0.001, "loss": 3.0167, "step": 22128 }, { "epoch": 4.25, "learning_rate": 0.001, "loss": 3.0315, "step": 22140 }, { "epoch": 4.25, "learning_rate": 0.001, "loss": 3.0302, "step": 22152 }, { "epoch": 4.26, "learning_rate": 0.001, "loss": 3.0257, "step": 22164 }, { "epoch": 4.26, "learning_rate": 0.001, "loss": 3.0379, "step": 22176 }, { "epoch": 4.26, "learning_rate": 0.001, "loss": 3.0312, "step": 22188 }, { "epoch": 4.26, "learning_rate": 0.001, "loss": 3.0286, "step": 22200 }, { "epoch": 4.26, "learning_rate": 0.001, "loss": 3.0219, "step": 22212 }, { "epoch": 4.27, "learning_rate": 0.001, "loss": 3.0249, "step": 22224 }, { "epoch": 4.27, "learning_rate": 0.001, "loss": 3.0273, "step": 22236 }, { "epoch": 4.27, "learning_rate": 0.001, "loss": 3.0318, "step": 22248 }, { "epoch": 4.27, "learning_rate": 0.001, "loss": 3.0245, "step": 22260 }, { "epoch": 4.28, "learning_rate": 0.001, "loss": 3.0199, "step": 22272 }, { "epoch": 4.28, "learning_rate": 0.001, "loss": 3.0305, "step": 22284 }, { "epoch": 4.28, "learning_rate": 0.001, "loss": 3.0338, "step": 22296 }, { "epoch": 4.28, "learning_rate": 0.001, "loss": 3.0244, "step": 22308 }, { "epoch": 4.29, "learning_rate": 0.001, "loss": 3.0136, "step": 22320 }, { "epoch": 4.29, "learning_rate": 0.001, "loss": 3.0282, "step": 22332 }, { "epoch": 4.29, "learning_rate": 0.001, "loss": 3.0258, "step": 22344 }, { "epoch": 4.29, "learning_rate": 0.001, "loss": 3.028, "step": 22356 }, { "epoch": 4.29, "learning_rate": 0.001, "loss": 3.0239, "step": 22368 }, { "epoch": 4.3, "learning_rate": 0.001, "loss": 3.0295, "step": 22380 }, { "epoch": 4.3, "learning_rate": 0.001, "loss": 3.0172, "step": 22392 }, { "epoch": 4.3, "learning_rate": 0.001, "loss": 3.0118, "step": 22404 }, { "epoch": 4.3, "learning_rate": 0.001, "loss": 3.0217, "step": 22416 }, { "epoch": 4.31, "learning_rate": 0.001, "loss": 3.0167, "step": 22428 }, { "epoch": 4.31, "learning_rate": 0.001, "loss": 3.0256, "step": 22440 }, { "epoch": 4.31, "learning_rate": 0.001, "loss": 3.0411, "step": 22452 }, { "epoch": 4.31, "learning_rate": 0.001, "loss": 3.0303, "step": 22464 }, { "epoch": 4.32, "learning_rate": 0.001, "loss": 3.0146, "step": 22476 }, { "epoch": 4.32, "learning_rate": 0.001, "loss": 3.0345, "step": 22488 }, { "epoch": 4.32, "learning_rate": 0.001, "loss": 3.0236, "step": 22500 }, { "epoch": 4.32, "eval_ag_news_accuracy": 0.27275, "eval_ag_news_bleu_score": 3.479657594119364, "eval_ag_news_bleu_score_sem": 0.10953250121045172, "eval_ag_news_emb_cos_sim": 0.6979045867919922, "eval_ag_news_emb_cos_sim_sem": 0.010230112244052977, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.114940166473389, "eval_ag_news_n_ngrams_match_1": 11.128, "eval_ag_news_n_ngrams_match_2": 2.028, "eval_ag_news_n_ngrams_match_3": 0.48, "eval_ag_news_num_pred_words": 45.164, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 61.24854943640071, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.26991540463157826, "eval_ag_news_runtime": 10.5735, "eval_ag_news_samples_per_second": 47.288, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.29149963746302726, "eval_ag_news_token_set_f1_sem": 0.004084346860474833, "eval_ag_news_token_set_precision": 0.2607596166810238, "eval_ag_news_token_set_recall": 0.35559723609736654, "eval_ag_news_true_num_tokens": 56.09375, "step": 22500 }, { "epoch": 4.32, "eval_anthropic_toxic_prompts_accuracy": 0.091, "eval_anthropic_toxic_prompts_bleu_score": 2.1467354899679276, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08375725211606826, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5755442380905151, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0104705165508701, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.811479091644287, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.412, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.066, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.318, "eval_anthropic_toxic_prompts_num_pred_words": 46.702, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 45.217269915621166, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.15658175970509802, "eval_anthropic_toxic_prompts_runtime": 10.7134, "eval_anthropic_toxic_prompts_samples_per_second": 46.671, "eval_anthropic_toxic_prompts_steps_per_second": 0.093, "eval_anthropic_toxic_prompts_token_set_f1": 0.2766725885325113, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005507780084356059, "eval_anthropic_toxic_prompts_token_set_precision": 0.3088017380334575, "eval_anthropic_toxic_prompts_token_set_recall": 0.29196960139901595, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 22500 }, { "epoch": 4.32, "eval_arxiv_accuracy": 0.29853125, "eval_arxiv_bleu_score": 3.0582062513732997, "eval_arxiv_bleu_score_sem": 0.09108078413523835, "eval_arxiv_emb_cos_sim": 0.6037775278091431, "eval_arxiv_emb_cos_sim_sem": 0.008646464826135881, "eval_arxiv_emb_top1_equal": 0.1640625, "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.9810023307800293, "eval_arxiv_n_ngrams_match_1": 11.232, "eval_arxiv_n_ngrams_match_2": 1.84, "eval_arxiv_n_ngrams_match_3": 0.334, "eval_arxiv_num_pred_words": 37.932, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 53.570702890536445, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2680710291792464, "eval_arxiv_runtime": 10.3676, "eval_arxiv_samples_per_second": 48.227, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.26446141481814944, "eval_arxiv_token_set_f1_sem": 0.003932504418393081, "eval_arxiv_token_set_precision": 0.20663165098964145, "eval_arxiv_token_set_recall": 0.39558942508157624, "eval_arxiv_true_num_tokens": 64.0, "step": 22500 }, { "epoch": 4.32, "eval_python_code_alpaca_accuracy": 0.12871875, "eval_python_code_alpaca_bleu_score": 3.0923369481222225, "eval_python_code_alpaca_bleu_score_sem": 0.10502875370452475, "eval_python_code_alpaca_emb_cos_sim": 0.5569471120834351, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010260473121504503, "eval_python_code_alpaca_emb_top1_equal": 0.046875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.567882776260376, "eval_python_code_alpaca_n_ngrams_match_1": 6.786, "eval_python_code_alpaca_n_ngrams_match_2": 1.412, "eval_python_code_alpaca_n_ngrams_match_3": 0.396, "eval_python_code_alpaca_num_pred_words": 38.674, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 35.44147612529317, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.232692482621829, "eval_python_code_alpaca_runtime": 10.2045, "eval_python_code_alpaca_samples_per_second": 48.998, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.3519745599321546, "eval_python_code_alpaca_token_set_f1_sem": 0.005175825417515168, "eval_python_code_alpaca_token_set_precision": 0.35034291983209737, "eval_python_code_alpaca_token_set_recall": 0.38106866854812355, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 22500 }, { "epoch": 4.32, "eval_wikibio_accuracy": 0.2784375, "eval_wikibio_bleu_score": 4.851032126013448, "eval_wikibio_bleu_score_sem": 0.16954693795931916, "eval_wikibio_emb_cos_sim": 0.6652008891105652, "eval_wikibio_emb_cos_sim_sem": 0.010656725278858277, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.453147888183594, "eval_wikibio_n_ngrams_match_1": 9.05, "eval_wikibio_n_ngrams_match_2": 2.796, "eval_wikibio_n_ngrams_match_3": 0.932, "eval_wikibio_num_pred_words": 36.74, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 85.89691274019133, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3030333520574503, "eval_wikibio_runtime": 10.2186, "eval_wikibio_samples_per_second": 48.93, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.2890650687520406, "eval_wikibio_token_set_f1_sem": 0.005533575726017881, "eval_wikibio_token_set_precision": 0.2931095583767006, "eval_wikibio_token_set_recall": 0.30019058243190055, "eval_wikibio_true_num_tokens": 61.1328125, "step": 22500 }, { "epoch": 4.32, "eval_nq_accuracy": 0.46415625, "eval_nq_bleu_score": 8.012690074462503, "eval_nq_bleu_score_sem": 0.36003050024183963, "eval_nq_emb_cos_sim": 0.7436613440513611, "eval_nq_emb_cos_sim_sem": 0.009756727402657218, "eval_nq_emb_top1_equal": 0.140625, "eval_nq_emb_top1_equal_sem": 0.030847557647994725, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.6690025329589844, "eval_nq_n_ngrams_match_1": 19.476, "eval_nq_n_ngrams_match_2": 6.082, "eval_nq_n_ngrams_match_3": 2.396, "eval_nq_num_pred_words": 48.134, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 14.425572980527024, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3703957760453091, "eval_nq_runtime": 10.4749, "eval_nq_samples_per_second": 47.733, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.39134056474466017, "eval_nq_token_set_f1_sem": 0.00496001197015542, "eval_nq_token_set_precision": 0.33770094633683134, "eval_nq_token_set_recall": 0.4800431626741018, "eval_nq_true_num_tokens": 64.0, "step": 22500 }, { "epoch": 4.32, "learning_rate": 0.001, "loss": 3.0399, "step": 22512 }, { "epoch": 4.32, "learning_rate": 0.001, "loss": 3.0207, "step": 22524 }, { "epoch": 4.33, "learning_rate": 0.001, "loss": 3.0294, "step": 22536 }, { "epoch": 4.33, "learning_rate": 0.001, "loss": 3.0149, "step": 22548 }, { "epoch": 4.33, "learning_rate": 0.001, "loss": 3.032, "step": 22560 }, { "epoch": 4.33, "learning_rate": 0.001, "loss": 3.0277, "step": 22572 }, { "epoch": 4.34, "learning_rate": 0.001, "loss": 3.0223, "step": 22584 }, { "epoch": 4.34, "learning_rate": 0.001, "loss": 3.026, "step": 22596 }, { "epoch": 4.34, "learning_rate": 0.001, "loss": 3.0252, "step": 22608 }, { "epoch": 4.34, "learning_rate": 0.001, "loss": 3.0221, "step": 22620 }, { "epoch": 4.35, "learning_rate": 0.001, "loss": 3.0218, "step": 22632 }, { "epoch": 4.35, "learning_rate": 0.001, "loss": 3.029, "step": 22644 }, { "epoch": 4.35, "learning_rate": 0.001, "loss": 3.0148, "step": 22656 }, { "epoch": 4.35, "learning_rate": 0.001, "loss": 3.0192, "step": 22668 }, { "epoch": 4.35, "learning_rate": 0.001, "loss": 3.0275, "step": 22680 }, { "epoch": 4.36, "learning_rate": 0.001, "loss": 3.0207, "step": 22692 }, { "epoch": 4.36, "learning_rate": 0.001, "loss": 3.0241, "step": 22704 }, { "epoch": 4.36, "learning_rate": 0.001, "loss": 3.0322, "step": 22716 }, { "epoch": 4.36, "learning_rate": 0.001, "loss": 3.0211, "step": 22728 }, { "epoch": 4.37, "learning_rate": 0.001, "loss": 3.0285, "step": 22740 }, { "epoch": 4.37, "learning_rate": 0.001, "loss": 3.0168, "step": 22752 }, { "epoch": 4.37, "learning_rate": 0.001, "loss": 3.0208, "step": 22764 }, { "epoch": 4.37, "learning_rate": 0.001, "loss": 3.0253, "step": 22776 }, { "epoch": 4.38, "learning_rate": 0.001, "loss": 3.0054, "step": 22788 }, { "epoch": 4.38, "learning_rate": 0.001, "loss": 3.0273, "step": 22800 }, { "epoch": 4.38, "learning_rate": 0.001, "loss": 3.0118, "step": 22812 }, { "epoch": 4.38, "learning_rate": 0.001, "loss": 3.0144, "step": 22824 }, { "epoch": 4.38, "learning_rate": 0.001, "loss": 3.0147, "step": 22836 }, { "epoch": 4.39, "learning_rate": 0.001, "loss": 3.0187, "step": 22848 }, { "epoch": 4.39, "learning_rate": 0.001, "loss": 3.0337, "step": 22860 }, { "epoch": 4.39, "learning_rate": 0.001, "loss": 3.0271, "step": 22872 }, { "epoch": 4.39, "learning_rate": 0.001, "loss": 3.0217, "step": 22884 }, { "epoch": 4.4, "learning_rate": 0.001, "loss": 3.0146, "step": 22896 }, { "epoch": 4.4, "learning_rate": 0.001, "loss": 3.0197, "step": 22908 }, { "epoch": 4.4, "learning_rate": 0.001, "loss": 3.0147, "step": 22920 }, { "epoch": 4.4, "learning_rate": 0.001, "loss": 3.0297, "step": 22932 }, { "epoch": 4.41, "learning_rate": 0.001, "loss": 3.0136, "step": 22944 }, { "epoch": 4.41, "learning_rate": 0.001, "loss": 3.0322, "step": 22956 }, { "epoch": 4.41, "learning_rate": 0.001, "loss": 3.0137, "step": 22968 }, { "epoch": 4.41, "learning_rate": 0.001, "loss": 3.0146, "step": 22980 }, { "epoch": 4.41, "learning_rate": 0.001, "loss": 3.0311, "step": 22992 }, { "epoch": 4.42, "learning_rate": 0.001, "loss": 3.0246, "step": 23004 }, { "epoch": 4.42, "learning_rate": 0.001, "loss": 3.0264, "step": 23016 }, { "epoch": 4.42, "learning_rate": 0.001, "loss": 3.0253, "step": 23028 }, { "epoch": 4.42, "learning_rate": 0.001, "loss": 3.0061, "step": 23040 }, { "epoch": 4.43, "learning_rate": 0.001, "loss": 3.0181, "step": 23052 }, { "epoch": 4.43, "learning_rate": 0.001, "loss": 3.0234, "step": 23064 }, { "epoch": 4.43, "learning_rate": 0.001, "loss": 3.0178, "step": 23076 }, { "epoch": 4.43, "learning_rate": 0.001, "loss": 3.0152, "step": 23088 }, { "epoch": 4.44, "learning_rate": 0.001, "loss": 3.0074, "step": 23100 }, { "epoch": 4.44, "learning_rate": 0.001, "loss": 3.0077, "step": 23112 }, { "epoch": 4.44, "learning_rate": 0.001, "loss": 3.0109, "step": 23124 }, { "epoch": 4.44, "eval_ag_news_accuracy": 0.27584375, "eval_ag_news_bleu_score": 3.5716758476163055, "eval_ag_news_bleu_score_sem": 0.11865045689571037, "eval_ag_news_emb_cos_sim": 0.7046859860420227, "eval_ag_news_emb_cos_sim_sem": 0.010416290678993786, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.088851451873779, "eval_ag_news_n_ngrams_match_1": 11.364, "eval_ag_news_n_ngrams_match_2": 2.094, "eval_ag_news_n_ngrams_match_3": 0.508, "eval_ag_news_num_pred_words": 45.782, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 59.67131695170793, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2746066976798046, "eval_ag_news_runtime": 10.4302, "eval_ag_news_samples_per_second": 47.938, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.2950589386254385, "eval_ag_news_token_set_f1_sem": 0.004184535708399377, "eval_ag_news_token_set_precision": 0.2623980523801081, "eval_ag_news_token_set_recall": 0.3553461423306421, "eval_ag_news_true_num_tokens": 56.09375, "step": 23125 }, { "epoch": 4.44, "eval_anthropic_toxic_prompts_accuracy": 0.09171875, "eval_anthropic_toxic_prompts_bleu_score": 2.041813598444441, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07717302756597874, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5595167875289917, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011354339915410596, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.78218936920166, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.55, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.098, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.296, "eval_anthropic_toxic_prompts_num_pred_words": 47.714, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 43.91207631710553, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1570886418900885, "eval_anthropic_toxic_prompts_runtime": 10.181, "eval_anthropic_toxic_prompts_samples_per_second": 49.111, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.28102924922991435, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005959128838424839, "eval_anthropic_toxic_prompts_token_set_precision": 0.31281680572408405, "eval_anthropic_toxic_prompts_token_set_recall": 0.29645718755402817, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 23125 }, { "epoch": 4.44, "eval_arxiv_accuracy": 0.30075, "eval_arxiv_bleu_score": 3.040229451995313, "eval_arxiv_bleu_score_sem": 0.0907469743646014, "eval_arxiv_emb_cos_sim": 0.5961027145385742, "eval_arxiv_emb_cos_sim_sem": 0.00931088223404079, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.9455220699310303, "eval_arxiv_n_ngrams_match_1": 11.344, "eval_arxiv_n_ngrams_match_2": 1.888, "eval_arxiv_n_ngrams_match_3": 0.322, "eval_arxiv_num_pred_words": 38.826, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 51.703323817980994, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2663048317626505, "eval_arxiv_runtime": 13.7247, "eval_arxiv_samples_per_second": 36.431, "eval_arxiv_steps_per_second": 0.073, "eval_arxiv_token_set_f1": 0.27179114807822197, "eval_arxiv_token_set_f1_sem": 0.004030174693569544, "eval_arxiv_token_set_precision": 0.21050964701086558, "eval_arxiv_token_set_recall": 0.4164400239696585, "eval_arxiv_true_num_tokens": 64.0, "step": 23125 }, { "epoch": 4.44, "eval_python_code_alpaca_accuracy": 0.126625, "eval_python_code_alpaca_bleu_score": 3.100755500958702, "eval_python_code_alpaca_bleu_score_sem": 0.09742404613250283, "eval_python_code_alpaca_emb_cos_sim": 0.5470049381256104, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009650884153700712, "eval_python_code_alpaca_emb_top1_equal": 0.0625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.6031405925750732, "eval_python_code_alpaca_n_ngrams_match_1": 6.7, "eval_python_code_alpaca_n_ngrams_match_2": 1.444, "eval_python_code_alpaca_n_ngrams_match_3": 0.382, "eval_python_code_alpaca_num_pred_words": 38.786, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 36.71335526621004, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2311987525560811, "eval_python_code_alpaca_runtime": 9.9326, "eval_python_code_alpaca_samples_per_second": 50.339, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.35653511157681234, "eval_python_code_alpaca_token_set_f1_sem": 0.005007514582943103, "eval_python_code_alpaca_token_set_precision": 0.3500110018012393, "eval_python_code_alpaca_token_set_recall": 0.3983983015540674, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 23125 }, { "epoch": 4.44, "eval_wikibio_accuracy": 0.2800625, "eval_wikibio_bleu_score": 5.066281975791866, "eval_wikibio_bleu_score_sem": 0.19559487236539871, "eval_wikibio_emb_cos_sim": 0.6641095280647278, "eval_wikibio_emb_cos_sim_sem": 0.011152282655293307, "eval_wikibio_emb_top1_equal": 0.0703125, "eval_wikibio_emb_top1_equal_sem": 0.022687306110270106, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.403389930725098, "eval_wikibio_n_ngrams_match_1": 9.242, "eval_wikibio_n_ngrams_match_2": 2.836, "eval_wikibio_n_ngrams_match_3": 0.962, "eval_wikibio_num_pred_words": 36.508, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 81.72744999815691, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.31267778558798526, "eval_wikibio_runtime": 10.3121, "eval_wikibio_samples_per_second": 48.487, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.29345869824051796, "eval_wikibio_token_set_f1_sem": 0.005511322827209962, "eval_wikibio_token_set_precision": 0.29730328149697105, "eval_wikibio_token_set_recall": 0.305075295519826, "eval_wikibio_true_num_tokens": 61.1328125, "step": 23125 }, { "epoch": 4.44, "eval_nq_accuracy": 0.46690625, "eval_nq_bleu_score": 8.289383059373723, "eval_nq_bleu_score_sem": 0.3546366116227681, "eval_nq_emb_cos_sim": 0.7408619523048401, "eval_nq_emb_cos_sim_sem": 0.01016624396349078, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.6497559547424316, "eval_nq_n_ngrams_match_1": 19.884, "eval_nq_n_ngrams_match_2": 6.282, "eval_nq_n_ngrams_match_3": 2.506, "eval_nq_num_pred_words": 48.782, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 14.150584840828412, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.37610081675690465, "eval_nq_runtime": 10.4976, "eval_nq_samples_per_second": 47.63, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4011719137086193, "eval_nq_token_set_f1_sem": 0.0048304294110281375, "eval_nq_token_set_precision": 0.3474386134157962, "eval_nq_token_set_recall": 0.4929625483803897, "eval_nq_true_num_tokens": 64.0, "step": 23125 }, { "epoch": 4.44, "learning_rate": 0.001, "loss": 3.0158, "step": 23136 }, { "epoch": 4.44, "learning_rate": 0.001, "loss": 3.0107, "step": 23148 }, { "epoch": 4.45, "learning_rate": 0.001, "loss": 3.0186, "step": 23160 }, { "epoch": 4.45, "learning_rate": 0.001, "loss": 3.018, "step": 23172 }, { "epoch": 4.45, "learning_rate": 0.001, "loss": 3.0184, "step": 23184 }, { "epoch": 4.45, "learning_rate": 0.001, "loss": 3.0122, "step": 23196 }, { "epoch": 4.46, "learning_rate": 0.001, "loss": 3.0046, "step": 23208 }, { "epoch": 4.46, "learning_rate": 0.001, "loss": 3.0081, "step": 23220 }, { "epoch": 4.46, "learning_rate": 0.001, "loss": 3.0036, "step": 23232 }, { "epoch": 4.46, "learning_rate": 0.001, "loss": 2.9979, "step": 23244 }, { "epoch": 4.47, "learning_rate": 0.001, "loss": 3.0157, "step": 23256 }, { "epoch": 4.47, "learning_rate": 0.001, "loss": 2.995, "step": 23268 }, { "epoch": 4.47, "learning_rate": 0.001, "loss": 3.0125, "step": 23280 }, { "epoch": 4.47, "learning_rate": 0.001, "loss": 3.0147, "step": 23292 }, { "epoch": 4.47, "learning_rate": 0.001, "loss": 3.0167, "step": 23304 }, { "epoch": 4.48, "learning_rate": 0.001, "loss": 3.0188, "step": 23316 }, { "epoch": 4.48, "learning_rate": 0.001, "loss": 3.024, "step": 23328 }, { "epoch": 4.48, "learning_rate": 0.001, "loss": 3.0122, "step": 23340 }, { "epoch": 4.48, "learning_rate": 0.001, "loss": 3.0071, "step": 23352 }, { "epoch": 4.49, "learning_rate": 0.001, "loss": 3.0132, "step": 23364 }, { "epoch": 4.49, "learning_rate": 0.001, "loss": 3.013, "step": 23376 }, { "epoch": 4.49, "learning_rate": 0.001, "loss": 3.0137, "step": 23388 }, { "epoch": 4.49, "learning_rate": 0.001, "loss": 3.0051, "step": 23400 }, { "epoch": 4.5, "learning_rate": 0.001, "loss": 3.0003, "step": 23412 }, { "epoch": 4.5, "learning_rate": 0.001, "loss": 3.0071, "step": 23424 }, { "epoch": 4.5, "learning_rate": 0.001, "loss": 3.0141, "step": 23436 }, { "epoch": 4.5, "learning_rate": 0.001, "loss": 3.0149, "step": 23448 }, { "epoch": 4.5, "learning_rate": 0.001, "loss": 3.0186, "step": 23460 }, { "epoch": 4.51, "learning_rate": 0.001, "loss": 3.0047, "step": 23472 }, { "epoch": 4.51, "learning_rate": 0.001, "loss": 3.012, "step": 23484 }, { "epoch": 4.51, "learning_rate": 0.001, "loss": 3.0037, "step": 23496 }, { "epoch": 4.51, "learning_rate": 0.001, "loss": 3.0127, "step": 23508 }, { "epoch": 4.52, "learning_rate": 0.001, "loss": 3.0153, "step": 23520 }, { "epoch": 4.52, "learning_rate": 0.001, "loss": 2.9896, "step": 23532 }, { "epoch": 4.52, "learning_rate": 0.001, "loss": 3.0096, "step": 23544 }, { "epoch": 4.52, "learning_rate": 0.001, "loss": 3.0086, "step": 23556 }, { "epoch": 4.53, "learning_rate": 0.001, "loss": 3.0142, "step": 23568 }, { "epoch": 4.53, "learning_rate": 0.001, "loss": 3.0054, "step": 23580 }, { "epoch": 4.53, "learning_rate": 0.001, "loss": 3.0078, "step": 23592 }, { "epoch": 4.53, "learning_rate": 0.001, "loss": 3.0056, "step": 23604 }, { "epoch": 4.53, "learning_rate": 0.001, "loss": 3.0137, "step": 23616 }, { "epoch": 4.54, "learning_rate": 0.001, "loss": 3.01, "step": 23628 }, { "epoch": 4.54, "learning_rate": 0.001, "loss": 3.0035, "step": 23640 }, { "epoch": 4.54, "learning_rate": 0.001, "loss": 3.0047, "step": 23652 }, { "epoch": 4.54, "learning_rate": 0.001, "loss": 2.9867, "step": 23664 }, { "epoch": 4.55, "learning_rate": 0.001, "loss": 3.0107, "step": 23676 }, { "epoch": 4.55, "learning_rate": 0.001, "loss": 3.0122, "step": 23688 }, { "epoch": 4.55, "learning_rate": 0.001, "loss": 3.0137, "step": 23700 }, { "epoch": 4.55, "learning_rate": 0.001, "loss": 3.0156, "step": 23712 }, { "epoch": 4.56, "learning_rate": 0.001, "loss": 3.0039, "step": 23724 }, { "epoch": 4.56, "learning_rate": 0.001, "loss": 3.0087, "step": 23736 }, { "epoch": 4.56, "learning_rate": 0.001, "loss": 3.0042, "step": 23748 }, { "epoch": 4.56, "eval_ag_news_accuracy": 0.275875, "eval_ag_news_bleu_score": 3.5397518539873793, "eval_ag_news_bleu_score_sem": 0.12067641621710735, "eval_ag_news_emb_cos_sim": 0.6891865134239197, "eval_ag_news_emb_cos_sim_sem": 0.012085329587627478, "eval_ag_news_emb_top1_equal": 0.15625, "eval_ag_news_emb_top1_equal_sem": 0.03221922156442571, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.0836076736450195, "eval_ag_news_n_ngrams_match_1": 11.06, "eval_ag_news_n_ngrams_match_2": 2.022, "eval_ag_news_n_ngrams_match_3": 0.504, "eval_ag_news_num_pred_words": 45.242, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 59.35923276424905, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.26684539389394224, "eval_ag_news_runtime": 10.4582, "eval_ag_news_samples_per_second": 47.809, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.2902814941135996, "eval_ag_news_token_set_f1_sem": 0.0043703756993541, "eval_ag_news_token_set_precision": 0.25647585702989034, "eval_ag_news_token_set_recall": 0.3601458055025252, "eval_ag_news_true_num_tokens": 56.09375, "step": 23750 }, { "epoch": 4.56, "eval_anthropic_toxic_prompts_accuracy": 0.094, "eval_anthropic_toxic_prompts_bleu_score": 2.193721829876199, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08145666558247794, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5685354471206665, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011214567794858311, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.7131049633026123, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.572, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.132, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.344, "eval_anthropic_toxic_prompts_num_pred_words": 45.96, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 40.980853232638694, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.16141705535505432, "eval_anthropic_toxic_prompts_runtime": 9.789, "eval_anthropic_toxic_prompts_samples_per_second": 51.078, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.28932568749256143, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00625910267907374, "eval_anthropic_toxic_prompts_token_set_precision": 0.314695707907464, "eval_anthropic_toxic_prompts_token_set_recall": 0.30503461660418046, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 23750 }, { "epoch": 4.56, "eval_arxiv_accuracy": 0.3011875, "eval_arxiv_bleu_score": 3.011845975896167, "eval_arxiv_bleu_score_sem": 0.08987892206460524, "eval_arxiv_emb_cos_sim": 0.613021969795227, "eval_arxiv_emb_cos_sim_sem": 0.008598133586150521, "eval_arxiv_emb_top1_equal": 0.171875, "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.9512102603912354, "eval_arxiv_n_ngrams_match_1": 11.304, "eval_arxiv_n_ngrams_match_2": 1.814, "eval_arxiv_n_ngrams_match_3": 0.324, "eval_arxiv_num_pred_words": 38.038, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 51.9982602032154, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2663286013534136, "eval_arxiv_runtime": 10.6499, "eval_arxiv_samples_per_second": 46.949, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.2689519282328701, "eval_arxiv_token_set_f1_sem": 0.00411081549883377, "eval_arxiv_token_set_precision": 0.21111625711114063, "eval_arxiv_token_set_recall": 0.39833791665110174, "eval_arxiv_true_num_tokens": 64.0, "step": 23750 }, { "epoch": 4.56, "eval_python_code_alpaca_accuracy": 0.12703125, "eval_python_code_alpaca_bleu_score": 3.0343890236566384, "eval_python_code_alpaca_bleu_score_sem": 0.09849310413209293, "eval_python_code_alpaca_emb_cos_sim": 0.54156094789505, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010395470794373914, "eval_python_code_alpaca_emb_top1_equal": 0.0390625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017191973462108996, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.565164089202881, "eval_python_code_alpaca_n_ngrams_match_1": 6.524, "eval_python_code_alpaca_n_ngrams_match_2": 1.414, "eval_python_code_alpaca_n_ngrams_match_3": 0.394, "eval_python_code_alpaca_num_pred_words": 38.316, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 35.345252702807166, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.22337682857220353, "eval_python_code_alpaca_runtime": 9.6829, "eval_python_code_alpaca_samples_per_second": 51.638, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.3470054609207911, "eval_python_code_alpaca_token_set_f1_sem": 0.005621856247773359, "eval_python_code_alpaca_token_set_precision": 0.33591384926898127, "eval_python_code_alpaca_token_set_recall": 0.3999205686455455, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 23750 }, { "epoch": 4.56, "eval_wikibio_accuracy": 0.279125, "eval_wikibio_bleu_score": 4.8945065719545875, "eval_wikibio_bleu_score_sem": 0.19082108832708544, "eval_wikibio_emb_cos_sim": 0.6529830694198608, "eval_wikibio_emb_cos_sim_sem": 0.01165056377181335, "eval_wikibio_emb_top1_equal": 0.1171875, "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.377610206604004, "eval_wikibio_n_ngrams_match_1": 8.778, "eval_wikibio_n_ngrams_match_2": 2.776, "eval_wikibio_n_ngrams_match_3": 0.95, "eval_wikibio_num_pred_words": 35.324, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 79.64746480059652, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.2988186803748457, "eval_wikibio_runtime": 9.9426, "eval_wikibio_samples_per_second": 50.289, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.28439968007820415, "eval_wikibio_token_set_f1_sem": 0.005827573427971639, "eval_wikibio_token_set_precision": 0.28420933076874544, "eval_wikibio_token_set_recall": 0.3034326327683142, "eval_wikibio_true_num_tokens": 61.1328125, "step": 23750 }, { "epoch": 4.56, "eval_nq_accuracy": 0.46890625, "eval_nq_bleu_score": 8.609746563204862, "eval_nq_bleu_score_sem": 0.38085086835226667, "eval_nq_emb_cos_sim": 0.739669919013977, "eval_nq_emb_cos_sim_sem": 0.00985908234135494, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.642179250717163, "eval_nq_n_ngrams_match_1": 19.79, "eval_nq_n_ngrams_match_2": 6.48, "eval_nq_n_ngrams_match_3": 2.642, "eval_nq_num_pred_words": 48.38, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 14.043775191219092, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.37494113202813445, "eval_nq_runtime": 10.3068, "eval_nq_samples_per_second": 48.512, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.40122709431653997, "eval_nq_token_set_f1_sem": 0.0048887649521433645, "eval_nq_token_set_precision": 0.3444154276079394, "eval_nq_token_set_recall": 0.4984478763313491, "eval_nq_true_num_tokens": 64.0, "step": 23750 }, { "epoch": 4.56, "learning_rate": 0.001, "loss": 3.0077, "step": 23760 }, { "epoch": 4.56, "learning_rate": 0.001, "loss": 3.0059, "step": 23772 }, { "epoch": 4.57, "learning_rate": 0.001, "loss": 3.0079, "step": 23784 }, { "epoch": 4.57, "learning_rate": 0.001, "loss": 2.998, "step": 23796 }, { "epoch": 4.57, "learning_rate": 0.001, "loss": 2.9974, "step": 23808 }, { "epoch": 4.57, "learning_rate": 0.001, "loss": 2.9964, "step": 23820 }, { "epoch": 4.58, "learning_rate": 0.001, "loss": 3.0073, "step": 23832 }, { "epoch": 4.58, "learning_rate": 0.001, "loss": 3.0082, "step": 23844 }, { "epoch": 4.58, "learning_rate": 0.001, "loss": 3.0082, "step": 23856 }, { "epoch": 4.58, "learning_rate": 0.001, "loss": 3.0003, "step": 23868 }, { "epoch": 4.59, "learning_rate": 0.001, "loss": 2.9925, "step": 23880 }, { "epoch": 4.59, "learning_rate": 0.001, "loss": 2.9918, "step": 23892 }, { "epoch": 4.59, "learning_rate": 0.001, "loss": 2.9889, "step": 23904 }, { "epoch": 4.59, "learning_rate": 0.001, "loss": 3.0067, "step": 23916 }, { "epoch": 4.59, "learning_rate": 0.001, "loss": 2.9942, "step": 23928 }, { "epoch": 4.6, "learning_rate": 0.001, "loss": 3.0069, "step": 23940 }, { "epoch": 4.6, "learning_rate": 0.001, "loss": 3.0039, "step": 23952 }, { "epoch": 4.6, "learning_rate": 0.001, "loss": 2.9926, "step": 23964 }, { "epoch": 4.6, "learning_rate": 0.001, "loss": 3.0109, "step": 23976 }, { "epoch": 4.61, "learning_rate": 0.001, "loss": 3.0086, "step": 23988 }, { "epoch": 4.61, "learning_rate": 0.001, "loss": 2.994, "step": 24000 }, { "epoch": 4.61, "learning_rate": 0.001, "loss": 3.0047, "step": 24012 }, { "epoch": 4.61, "learning_rate": 0.001, "loss": 2.9932, "step": 24024 }, { "epoch": 4.62, "learning_rate": 0.001, "loss": 3.0083, "step": 24036 }, { "epoch": 4.62, "learning_rate": 0.001, "loss": 3.0119, "step": 24048 }, { "epoch": 4.62, "learning_rate": 0.001, "loss": 3.0039, "step": 24060 }, { "epoch": 4.62, "learning_rate": 0.001, "loss": 3.0, "step": 24072 }, { "epoch": 4.62, "learning_rate": 0.001, "loss": 3.0063, "step": 24084 }, { "epoch": 4.63, "learning_rate": 0.001, "loss": 2.9986, "step": 24096 }, { "epoch": 4.63, "learning_rate": 0.001, "loss": 3.003, "step": 24108 }, { "epoch": 4.63, "learning_rate": 0.001, "loss": 2.9984, "step": 24120 }, { "epoch": 4.63, "learning_rate": 0.001, "loss": 2.9971, "step": 24132 }, { "epoch": 4.64, "learning_rate": 0.001, "loss": 2.9948, "step": 24144 }, { "epoch": 4.64, "learning_rate": 0.001, "loss": 2.988, "step": 24156 }, { "epoch": 4.64, "learning_rate": 0.001, "loss": 2.9984, "step": 24168 }, { "epoch": 4.64, "learning_rate": 0.001, "loss": 3.0062, "step": 24180 }, { "epoch": 4.65, "learning_rate": 0.001, "loss": 3.0138, "step": 24192 }, { "epoch": 4.65, "learning_rate": 0.001, "loss": 3.0036, "step": 24204 }, { "epoch": 4.65, "learning_rate": 0.001, "loss": 2.9957, "step": 24216 }, { "epoch": 4.65, "learning_rate": 0.001, "loss": 2.9934, "step": 24228 }, { "epoch": 4.65, "learning_rate": 0.001, "loss": 2.99, "step": 24240 }, { "epoch": 4.66, "learning_rate": 0.001, "loss": 3.0084, "step": 24252 }, { "epoch": 4.66, "learning_rate": 0.001, "loss": 2.9995, "step": 24264 }, { "epoch": 4.66, "learning_rate": 0.001, "loss": 2.9944, "step": 24276 }, { "epoch": 4.66, "learning_rate": 0.001, "loss": 3.003, "step": 24288 }, { "epoch": 4.67, "learning_rate": 0.001, "loss": 2.9999, "step": 24300 }, { "epoch": 4.67, "learning_rate": 0.001, "loss": 2.9963, "step": 24312 }, { "epoch": 4.67, "learning_rate": 0.001, "loss": 2.9861, "step": 24324 }, { "epoch": 4.67, "learning_rate": 0.001, "loss": 2.9948, "step": 24336 }, { "epoch": 4.68, "learning_rate": 0.001, "loss": 2.9936, "step": 24348 }, { "epoch": 4.68, "learning_rate": 0.001, "loss": 3.0048, "step": 24360 }, { "epoch": 4.68, "learning_rate": 0.001, "loss": 2.9855, "step": 24372 }, { "epoch": 4.68, "eval_ag_news_accuracy": 0.27665625, "eval_ag_news_bleu_score": 3.644798243689116, "eval_ag_news_bleu_score_sem": 0.1299041355323708, "eval_ag_news_emb_cos_sim": 0.6989701986312866, "eval_ag_news_emb_cos_sim_sem": 0.010000469213040798, "eval_ag_news_emb_top1_equal": 0.1171875, "eval_ag_news_emb_top1_equal_sem": 0.02854125312152025, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.0787811279296875, "eval_ag_news_n_ngrams_match_1": 11.234, "eval_ag_news_n_ngrams_match_2": 2.106, "eval_ag_news_n_ngrams_match_3": 0.546, "eval_ag_news_num_pred_words": 44.924, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 59.07342300545964, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.27277121295924545, "eval_ag_news_runtime": 10.424, "eval_ag_news_samples_per_second": 47.966, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.29059431810262737, "eval_ag_news_token_set_f1_sem": 0.004478048907414098, "eval_ag_news_token_set_precision": 0.2588923037101447, "eval_ag_news_token_set_recall": 0.35042289837613494, "eval_ag_news_true_num_tokens": 56.09375, "step": 24375 }, { "epoch": 4.68, "eval_anthropic_toxic_prompts_accuracy": 0.09340625, "eval_anthropic_toxic_prompts_bleu_score": 2.1054566808337136, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08245272589551196, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5708047747612, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011311717597764939, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.742683172225952, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.5, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.12, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.326, "eval_anthropic_toxic_prompts_num_pred_words": 46.772, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 42.21109799901625, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1562143835309157, "eval_anthropic_toxic_prompts_runtime": 10.9041, "eval_anthropic_toxic_prompts_samples_per_second": 45.854, "eval_anthropic_toxic_prompts_steps_per_second": 0.092, "eval_anthropic_toxic_prompts_token_set_f1": 0.2862036345018115, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005856753564749602, "eval_anthropic_toxic_prompts_token_set_precision": 0.3087704328041896, "eval_anthropic_toxic_prompts_token_set_recall": 0.30892651621646117, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 24375 }, { "epoch": 4.68, "eval_arxiv_accuracy": 0.30275, "eval_arxiv_bleu_score": 3.1402423639370554, "eval_arxiv_bleu_score_sem": 0.09006107100832386, "eval_arxiv_emb_cos_sim": 0.619462251663208, "eval_arxiv_emb_cos_sim_sem": 0.008330594611426477, "eval_arxiv_emb_top1_equal": 0.1953125, "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.937114953994751, "eval_arxiv_n_ngrams_match_1": 11.604, "eval_arxiv_n_ngrams_match_2": 1.886, "eval_arxiv_n_ngrams_match_3": 0.356, "eval_arxiv_num_pred_words": 39.162, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 51.27047005572478, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2718459092513821, "eval_arxiv_runtime": 10.1367, "eval_arxiv_samples_per_second": 49.326, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.2743544716040028, "eval_arxiv_token_set_f1_sem": 0.003996136267723946, "eval_arxiv_token_set_precision": 0.21419747038897358, "eval_arxiv_token_set_recall": 0.40853309748221917, "eval_arxiv_true_num_tokens": 64.0, "step": 24375 }, { "epoch": 4.68, "eval_python_code_alpaca_accuracy": 0.12659375, "eval_python_code_alpaca_bleu_score": 3.072292444778623, "eval_python_code_alpaca_bleu_score_sem": 0.1001348948944544, "eval_python_code_alpaca_emb_cos_sim": 0.5532311201095581, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010581854036656707, "eval_python_code_alpaca_emb_top1_equal": 0.046875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.5399537086486816, "eval_python_code_alpaca_n_ngrams_match_1": 6.662, "eval_python_code_alpaca_n_ngrams_match_2": 1.446, "eval_python_code_alpaca_n_ngrams_match_3": 0.408, "eval_python_code_alpaca_num_pred_words": 38.43, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 34.46532370752109, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.22826250412062343, "eval_python_code_alpaca_runtime": 9.9313, "eval_python_code_alpaca_samples_per_second": 50.346, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.35498431158216953, "eval_python_code_alpaca_token_set_f1_sem": 0.005443706258915265, "eval_python_code_alpaca_token_set_precision": 0.3443360342008259, "eval_python_code_alpaca_token_set_recall": 0.40491970019595763, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 24375 }, { "epoch": 4.68, "eval_wikibio_accuracy": 0.284375, "eval_wikibio_bleu_score": 5.154139234359491, "eval_wikibio_bleu_score_sem": 0.1850111456141721, "eval_wikibio_emb_cos_sim": 0.6732481718063354, "eval_wikibio_emb_cos_sim_sem": 0.010341499779222189, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.316174030303955, "eval_wikibio_n_ngrams_match_1": 9.688, "eval_wikibio_n_ngrams_match_2": 3.034, "eval_wikibio_n_ngrams_match_3": 1.022, "eval_wikibio_num_pred_words": 37.874, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 74.90150848477074, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.31958321186210603, "eval_wikibio_runtime": 10.0591, "eval_wikibio_samples_per_second": 49.706, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.30491646176297693, "eval_wikibio_token_set_f1_sem": 0.005066329455645657, "eval_wikibio_token_set_precision": 0.3104134109672024, "eval_wikibio_token_set_recall": 0.31529480944426247, "eval_wikibio_true_num_tokens": 61.1328125, "step": 24375 }, { "epoch": 4.68, "eval_nq_accuracy": 0.4685625, "eval_nq_bleu_score": 8.24652177095543, "eval_nq_bleu_score_sem": 0.3709734844268983, "eval_nq_emb_cos_sim": 0.7405206561088562, "eval_nq_emb_cos_sim_sem": 0.010417774379770272, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.6306822299957275, "eval_nq_n_ngrams_match_1": 19.568, "eval_nq_n_ngrams_match_2": 6.206, "eval_nq_n_ngrams_match_3": 2.514, "eval_nq_num_pred_words": 47.938, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 13.883238233533564, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3706542074447209, "eval_nq_runtime": 10.4574, "eval_nq_samples_per_second": 47.813, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.39825322857668644, "eval_nq_token_set_f1_sem": 0.004999985940615028, "eval_nq_token_set_precision": 0.3407341508509416, "eval_nq_token_set_recall": 0.4980007119901405, "eval_nq_true_num_tokens": 64.0, "step": 24375 }, { "epoch": 4.68, "learning_rate": 0.001, "loss": 2.9834, "step": 24384 }, { "epoch": 4.68, "learning_rate": 0.001, "loss": 2.9895, "step": 24396 }, { "epoch": 4.69, "learning_rate": 0.001, "loss": 3.003, "step": 24408 }, { "epoch": 4.69, "learning_rate": 0.001, "loss": 2.9974, "step": 24420 }, { "epoch": 4.69, "learning_rate": 0.001, "loss": 3.0005, "step": 24432 }, { "epoch": 4.69, "learning_rate": 0.001, "loss": 2.9975, "step": 24444 }, { "epoch": 4.7, "learning_rate": 0.001, "loss": 2.9831, "step": 24456 }, { "epoch": 4.7, "learning_rate": 0.001, "loss": 2.9955, "step": 24468 }, { "epoch": 4.7, "learning_rate": 0.001, "loss": 2.9911, "step": 24480 }, { "epoch": 4.7, "learning_rate": 0.001, "loss": 2.9894, "step": 24492 }, { "epoch": 4.71, "learning_rate": 0.001, "loss": 2.9917, "step": 24504 }, { "epoch": 4.71, "learning_rate": 0.001, "loss": 2.9945, "step": 24516 }, { "epoch": 4.71, "learning_rate": 0.001, "loss": 2.9969, "step": 24528 }, { "epoch": 4.71, "learning_rate": 0.001, "loss": 2.9967, "step": 24540 }, { "epoch": 4.71, "learning_rate": 0.001, "loss": 2.9911, "step": 24552 }, { "epoch": 4.72, "learning_rate": 0.001, "loss": 3.0025, "step": 24564 }, { "epoch": 4.72, "learning_rate": 0.001, "loss": 2.9916, "step": 24576 }, { "epoch": 4.72, "learning_rate": 0.001, "loss": 2.9874, "step": 24588 }, { "epoch": 4.72, "learning_rate": 0.001, "loss": 3.0062, "step": 24600 }, { "epoch": 4.73, "learning_rate": 0.001, "loss": 2.9975, "step": 24612 }, { "epoch": 4.73, "learning_rate": 0.001, "loss": 2.996, "step": 24624 }, { "epoch": 4.73, "learning_rate": 0.001, "loss": 2.9879, "step": 24636 }, { "epoch": 4.73, "learning_rate": 0.001, "loss": 2.9809, "step": 24648 }, { "epoch": 4.74, "learning_rate": 0.001, "loss": 2.9995, "step": 24660 }, { "epoch": 4.74, "learning_rate": 0.001, "loss": 2.993, "step": 24672 }, { "epoch": 4.74, "learning_rate": 0.001, "loss": 2.9959, "step": 24684 }, { "epoch": 4.74, "learning_rate": 0.001, "loss": 2.9916, "step": 24696 }, { "epoch": 4.74, "learning_rate": 0.001, "loss": 2.9916, "step": 24708 }, { "epoch": 4.75, "learning_rate": 0.001, "loss": 2.9848, "step": 24720 }, { "epoch": 4.75, "learning_rate": 0.001, "loss": 2.988, "step": 24732 }, { "epoch": 4.75, "learning_rate": 0.001, "loss": 2.9829, "step": 24744 }, { "epoch": 4.75, "learning_rate": 0.001, "loss": 2.9838, "step": 24756 }, { "epoch": 4.76, "learning_rate": 0.001, "loss": 2.9878, "step": 24768 }, { "epoch": 4.76, "learning_rate": 0.001, "loss": 3.0004, "step": 24780 }, { "epoch": 4.76, "learning_rate": 0.001, "loss": 2.9915, "step": 24792 }, { "epoch": 4.76, "learning_rate": 0.001, "loss": 2.9997, "step": 24804 }, { "epoch": 4.76, "learning_rate": 0.001, "loss": 2.9891, "step": 24816 }, { "epoch": 4.77, "learning_rate": 0.001, "loss": 2.988, "step": 24828 }, { "epoch": 4.77, "learning_rate": 0.001, "loss": 2.9948, "step": 24840 }, { "epoch": 4.77, "learning_rate": 0.001, "loss": 2.9882, "step": 24852 }, { "epoch": 4.77, "learning_rate": 0.001, "loss": 2.9821, "step": 24864 }, { "epoch": 4.78, "learning_rate": 0.001, "loss": 2.9825, "step": 24876 }, { "epoch": 4.78, "learning_rate": 0.001, "loss": 2.9981, "step": 24888 }, { "epoch": 4.78, "learning_rate": 0.001, "loss": 2.9728, "step": 24900 }, { "epoch": 4.78, "learning_rate": 0.001, "loss": 2.981, "step": 24912 }, { "epoch": 4.79, "learning_rate": 0.001, "loss": 2.9841, "step": 24924 }, { "epoch": 4.79, "learning_rate": 0.001, "loss": 2.9882, "step": 24936 }, { "epoch": 4.79, "learning_rate": 0.001, "loss": 2.9811, "step": 24948 }, { "epoch": 4.79, "learning_rate": 0.001, "loss": 2.9894, "step": 24960 }, { "epoch": 4.79, "learning_rate": 0.001, "loss": 2.981, "step": 24972 }, { "epoch": 4.8, "learning_rate": 0.001, "loss": 2.9852, "step": 24984 }, { "epoch": 4.8, "learning_rate": 0.001, "loss": 2.9903, "step": 24996 }, { "epoch": 4.8, "eval_ag_news_accuracy": 0.27865625, "eval_ag_news_bleu_score": 3.747678119171919, "eval_ag_news_bleu_score_sem": 0.13037092337861464, "eval_ag_news_emb_cos_sim": 0.7191460132598877, "eval_ag_news_emb_cos_sim_sem": 0.009588980969950336, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.050067901611328, "eval_ag_news_n_ngrams_match_1": 11.504, "eval_ag_news_n_ngrams_match_2": 2.24, "eval_ag_news_n_ngrams_match_3": 0.59, "eval_ag_news_num_pred_words": 45.704, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 57.4013545575879, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2807045959733674, "eval_ag_news_runtime": 10.3313, "eval_ag_news_samples_per_second": 48.397, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.30009010921783813, "eval_ag_news_token_set_f1_sem": 0.004246243981141805, "eval_ag_news_token_set_precision": 0.26924921208357205, "eval_ag_news_token_set_recall": 0.3620972837105612, "eval_ag_news_true_num_tokens": 56.09375, "step": 25000 }, { "epoch": 4.8, "eval_anthropic_toxic_prompts_accuracy": 0.093375, "eval_anthropic_toxic_prompts_bleu_score": 2.0508207428638174, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07412518839495048, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.570429801940918, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011918807389160936, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.737182378768921, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.434, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.064, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.292, "eval_anthropic_toxic_prompts_num_pred_words": 46.018, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 41.979540925032836, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.15830046017744343, "eval_anthropic_toxic_prompts_runtime": 9.9591, "eval_anthropic_toxic_prompts_samples_per_second": 50.206, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.27491261021400826, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005592796108221736, "eval_anthropic_toxic_prompts_token_set_precision": 0.3121017692273368, "eval_anthropic_toxic_prompts_token_set_recall": 0.28282212773580107, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 25000 }, { "epoch": 4.8, "eval_arxiv_accuracy": 0.3051875, "eval_arxiv_bleu_score": 3.1548723493531314, "eval_arxiv_bleu_score_sem": 0.09704557643732598, "eval_arxiv_emb_cos_sim": 0.6206685304641724, "eval_arxiv_emb_cos_sim_sem": 0.009327608605999044, "eval_arxiv_emb_top1_equal": 0.1953125, "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.902249574661255, "eval_arxiv_n_ngrams_match_1": 11.626, "eval_arxiv_n_ngrams_match_2": 1.958, "eval_arxiv_n_ngrams_match_3": 0.356, "eval_arxiv_num_pred_words": 39.038, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 49.513708699703955, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.27366911104957703, "eval_arxiv_runtime": 10.4192, "eval_arxiv_samples_per_second": 47.988, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.27777530300356346, "eval_arxiv_token_set_f1_sem": 0.004012616347432226, "eval_arxiv_token_set_precision": 0.21788463919753517, "eval_arxiv_token_set_recall": 0.41003887398809874, "eval_arxiv_true_num_tokens": 64.0, "step": 25000 }, { "epoch": 4.8, "eval_python_code_alpaca_accuracy": 0.12925, "eval_python_code_alpaca_bleu_score": 3.2897174079593485, "eval_python_code_alpaca_bleu_score_sem": 0.11101639920129172, "eval_python_code_alpaca_emb_cos_sim": 0.5781171917915344, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011459346154297457, "eval_python_code_alpaca_emb_top1_equal": 0.0390625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.017191973462108996, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.478679895401001, "eval_python_code_alpaca_n_ngrams_match_1": 6.912, "eval_python_code_alpaca_n_ngrams_match_2": 1.528, "eval_python_code_alpaca_n_ngrams_match_3": 0.434, "eval_python_code_alpaca_num_pred_words": 38.444, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 32.416900118419434, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.24115608277986825, "eval_python_code_alpaca_runtime": 10.0311, "eval_python_code_alpaca_samples_per_second": 49.845, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.364292714766359, "eval_python_code_alpaca_token_set_f1_sem": 0.005946908469769408, "eval_python_code_alpaca_token_set_precision": 0.35964730621929325, "eval_python_code_alpaca_token_set_recall": 0.40374706398445964, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 25000 }, { "epoch": 4.8, "eval_wikibio_accuracy": 0.2849375, "eval_wikibio_bleu_score": 5.055257940902527, "eval_wikibio_bleu_score_sem": 0.18018427425628075, "eval_wikibio_emb_cos_sim": 0.6791350245475769, "eval_wikibio_emb_cos_sim_sem": 0.010853030811242847, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.277873992919922, "eval_wikibio_n_ngrams_match_1": 9.44, "eval_wikibio_n_ngrams_match_2": 3.014, "eval_wikibio_n_ngrams_match_3": 0.97, "eval_wikibio_num_pred_words": 37.816, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 72.08701946476889, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3152798645478886, "eval_wikibio_runtime": 10.3973, "eval_wikibio_samples_per_second": 48.089, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.29810819329785637, "eval_wikibio_token_set_f1_sem": 0.005348598253871104, "eval_wikibio_token_set_precision": 0.3038744686734838, "eval_wikibio_token_set_recall": 0.3066718056248075, "eval_wikibio_true_num_tokens": 61.1328125, "step": 25000 }, { "epoch": 4.8, "eval_nq_accuracy": 0.4716875, "eval_nq_bleu_score": 8.662555415990717, "eval_nq_bleu_score_sem": 0.38224303326992015, "eval_nq_emb_cos_sim": 0.7517282962799072, "eval_nq_emb_cos_sim_sem": 0.010072708252843962, "eval_nq_emb_top1_equal": 0.203125, "eval_nq_emb_top1_equal_sem": 0.03570055125142555, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.6186509132385254, "eval_nq_n_ngrams_match_1": 20.014, "eval_nq_n_ngrams_match_2": 6.522, "eval_nq_n_ngrams_match_3": 2.694, "eval_nq_num_pred_words": 47.998, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 13.71720539636029, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3819900413593659, "eval_nq_runtime": 11.0052, "eval_nq_samples_per_second": 45.433, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4037235175729149, "eval_nq_token_set_f1_sem": 0.004969695235582259, "eval_nq_token_set_precision": 0.3498406145879189, "eval_nq_token_set_recall": 0.49303426319603655, "eval_nq_true_num_tokens": 64.0, "step": 25000 }, { "epoch": 4.8, "learning_rate": 0.001, "loss": 2.9872, "step": 25008 }, { "epoch": 4.8, "learning_rate": 0.001, "loss": 2.9775, "step": 25020 }, { "epoch": 4.81, "learning_rate": 0.001, "loss": 2.9838, "step": 25032 }, { "epoch": 4.81, "learning_rate": 0.001, "loss": 2.9922, "step": 25044 }, { "epoch": 4.81, "learning_rate": 0.001, "loss": 2.9837, "step": 25056 }, { "epoch": 4.81, "learning_rate": 0.001, "loss": 2.9925, "step": 25068 }, { "epoch": 4.82, "learning_rate": 0.001, "loss": 2.9751, "step": 25080 }, { "epoch": 4.82, "learning_rate": 0.001, "loss": 2.9843, "step": 25092 }, { "epoch": 4.82, "learning_rate": 0.001, "loss": 2.97, "step": 25104 }, { "epoch": 4.82, "learning_rate": 0.001, "loss": 2.9744, "step": 25116 }, { "epoch": 4.82, "learning_rate": 0.001, "loss": 2.9828, "step": 25128 }, { "epoch": 4.83, "learning_rate": 0.001, "loss": 2.9771, "step": 25140 }, { "epoch": 4.83, "learning_rate": 0.001, "loss": 2.9746, "step": 25152 }, { "epoch": 4.83, "learning_rate": 0.001, "loss": 2.9837, "step": 25164 }, { "epoch": 4.83, "learning_rate": 0.001, "loss": 2.9931, "step": 25176 }, { "epoch": 4.84, "learning_rate": 0.001, "loss": 2.9874, "step": 25188 }, { "epoch": 4.84, "learning_rate": 0.001, "loss": 2.9817, "step": 25200 }, { "epoch": 4.84, "learning_rate": 0.001, "loss": 2.9874, "step": 25212 }, { "epoch": 4.84, "learning_rate": 0.001, "loss": 2.9911, "step": 25224 }, { "epoch": 4.85, "learning_rate": 0.001, "loss": 2.9712, "step": 25236 }, { "epoch": 4.85, "learning_rate": 0.001, "loss": 2.9869, "step": 25248 }, { "epoch": 4.85, "learning_rate": 0.001, "loss": 2.9764, "step": 25260 }, { "epoch": 4.85, "learning_rate": 0.001, "loss": 2.9789, "step": 25272 }, { "epoch": 4.85, "learning_rate": 0.001, "loss": 2.9787, "step": 25284 }, { "epoch": 4.86, "learning_rate": 0.001, "loss": 2.9921, "step": 25296 }, { "epoch": 4.86, "learning_rate": 0.001, "loss": 2.9915, "step": 25308 }, { "epoch": 4.86, "learning_rate": 0.001, "loss": 2.9751, "step": 25320 }, { "epoch": 4.86, "learning_rate": 0.001, "loss": 2.975, "step": 25332 }, { "epoch": 4.87, "learning_rate": 0.001, "loss": 2.9876, "step": 25344 }, { "epoch": 4.87, "learning_rate": 0.001, "loss": 2.9802, "step": 25356 }, { "epoch": 4.87, "learning_rate": 0.001, "loss": 2.9779, "step": 25368 }, { "epoch": 4.87, "learning_rate": 0.001, "loss": 2.9722, "step": 25380 }, { "epoch": 4.88, "learning_rate": 0.001, "loss": 2.9733, "step": 25392 }, { "epoch": 4.88, "learning_rate": 0.001, "loss": 2.9747, "step": 25404 }, { "epoch": 4.88, "learning_rate": 0.001, "loss": 2.9793, "step": 25416 }, { "epoch": 4.88, "learning_rate": 0.001, "loss": 2.9686, "step": 25428 }, { "epoch": 4.88, "learning_rate": 0.001, "loss": 2.9787, "step": 25440 }, { "epoch": 4.89, "learning_rate": 0.001, "loss": 2.9734, "step": 25452 }, { "epoch": 4.89, "learning_rate": 0.001, "loss": 2.9644, "step": 25464 }, { "epoch": 4.89, "learning_rate": 0.001, "loss": 2.9722, "step": 25476 }, { "epoch": 4.89, "learning_rate": 0.001, "loss": 2.9885, "step": 25488 }, { "epoch": 4.9, "learning_rate": 0.001, "loss": 2.9745, "step": 25500 }, { "epoch": 4.9, "learning_rate": 0.001, "loss": 2.9847, "step": 25512 }, { "epoch": 4.9, "learning_rate": 0.001, "loss": 2.9844, "step": 25524 }, { "epoch": 4.9, "learning_rate": 0.001, "loss": 2.9834, "step": 25536 }, { "epoch": 4.91, "learning_rate": 0.001, "loss": 2.9788, "step": 25548 }, { "epoch": 4.91, "learning_rate": 0.001, "loss": 2.9791, "step": 25560 }, { "epoch": 4.91, "learning_rate": 0.001, "loss": 2.9729, "step": 25572 }, { "epoch": 4.91, "learning_rate": 0.001, "loss": 2.9709, "step": 25584 }, { "epoch": 4.91, "learning_rate": 0.001, "loss": 2.9661, "step": 25596 }, { "epoch": 4.92, "learning_rate": 0.001, "loss": 2.9617, "step": 25608 }, { "epoch": 4.92, "learning_rate": 0.001, "loss": 2.9788, "step": 25620 }, { "epoch": 4.92, "eval_ag_news_accuracy": 0.27896875, "eval_ag_news_bleu_score": 3.7022313908477926, "eval_ag_news_bleu_score_sem": 0.12702515343757315, "eval_ag_news_emb_cos_sim": 0.7072823643684387, "eval_ag_news_emb_cos_sim_sem": 0.01164344753986009, "eval_ag_news_emb_top1_equal": 0.1640625, "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.034488201141357, "eval_ag_news_n_ngrams_match_1": 11.304, "eval_ag_news_n_ngrams_match_2": 2.178, "eval_ag_news_n_ngrams_match_3": 0.58, "eval_ag_news_num_pred_words": 45.416, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 56.51398904038924, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2743784357785968, "eval_ag_news_runtime": 11.0411, "eval_ag_news_samples_per_second": 45.285, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.2949216520780335, "eval_ag_news_token_set_f1_sem": 0.004517282991213544, "eval_ag_news_token_set_precision": 0.26115752691664934, "eval_ag_news_token_set_recall": 0.36121393688133735, "eval_ag_news_true_num_tokens": 56.09375, "step": 25625 }, { "epoch": 4.92, "eval_anthropic_toxic_prompts_accuracy": 0.0935625, "eval_anthropic_toxic_prompts_bleu_score": 2.1606053533585676, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07841458245842338, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5771173238754272, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010939913905270023, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.7294044494628906, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.628, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.184, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.344, "eval_anthropic_toxic_prompts_num_pred_words": 46.612, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 41.65429353869113, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.16121912497112084, "eval_anthropic_toxic_prompts_runtime": 10.0604, "eval_anthropic_toxic_prompts_samples_per_second": 49.7, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.2870803417654264, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006006123260005836, "eval_anthropic_toxic_prompts_token_set_precision": 0.3190048185939165, "eval_anthropic_toxic_prompts_token_set_recall": 0.298282717306593, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 25625 }, { "epoch": 4.92, "eval_arxiv_accuracy": 0.3046875, "eval_arxiv_bleu_score": 3.2052067354864637, "eval_arxiv_bleu_score_sem": 0.09327914875363522, "eval_arxiv_emb_cos_sim": 0.6312928795814514, "eval_arxiv_emb_cos_sim_sem": 0.008048950930430461, "eval_arxiv_emb_top1_equal": 0.171875, "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.9027645587921143, "eval_arxiv_n_ngrams_match_1": 11.724, "eval_arxiv_n_ngrams_match_2": 1.978, "eval_arxiv_n_ngrams_match_3": 0.37, "eval_arxiv_num_pred_words": 38.498, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 49.539214040803564, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.27526463518161226, "eval_arxiv_runtime": 10.4375, "eval_arxiv_samples_per_second": 47.904, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.2777970846073231, "eval_arxiv_token_set_f1_sem": 0.0041335483794326805, "eval_arxiv_token_set_precision": 0.218081399165317, "eval_arxiv_token_set_recall": 0.40698031821118064, "eval_arxiv_true_num_tokens": 64.0, "step": 25625 }, { "epoch": 4.92, "eval_python_code_alpaca_accuracy": 0.13034375, "eval_python_code_alpaca_bleu_score": 3.1045425556691137, "eval_python_code_alpaca_bleu_score_sem": 0.0982175518651635, "eval_python_code_alpaca_emb_cos_sim": 0.5631523132324219, "eval_python_code_alpaca_emb_cos_sim_sem": 0.012085411247146467, "eval_python_code_alpaca_emb_top1_equal": 0.046875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.4732348918914795, "eval_python_code_alpaca_n_ngrams_match_1": 6.852, "eval_python_code_alpaca_n_ngrams_match_2": 1.516, "eval_python_code_alpaca_n_ngrams_match_3": 0.398, "eval_python_code_alpaca_num_pred_words": 38.776, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 32.24086966164758, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.23800618869223422, "eval_python_code_alpaca_runtime": 10.3898, "eval_python_code_alpaca_samples_per_second": 48.124, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.36076042420434024, "eval_python_code_alpaca_token_set_f1_sem": 0.005528680813819971, "eval_python_code_alpaca_token_set_precision": 0.35556375357612696, "eval_python_code_alpaca_token_set_recall": 0.39794591627232334, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 25625 }, { "epoch": 4.92, "eval_wikibio_accuracy": 0.2818125, "eval_wikibio_bleu_score": 5.2276160731931, "eval_wikibio_bleu_score_sem": 0.17925560898671766, "eval_wikibio_emb_cos_sim": 0.7021856307983398, "eval_wikibio_emb_cos_sim_sem": 0.008435994659929112, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.346080303192139, "eval_wikibio_n_ngrams_match_1": 9.65, "eval_wikibio_n_ngrams_match_2": 3.01, "eval_wikibio_n_ngrams_match_3": 1.014, "eval_wikibio_num_pred_words": 37.714, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 77.17536525500653, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.324683110435965, "eval_wikibio_runtime": 9.9746, "eval_wikibio_samples_per_second": 50.127, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3040230656977922, "eval_wikibio_token_set_f1_sem": 0.005016679036750332, "eval_wikibio_token_set_precision": 0.3101032576976257, "eval_wikibio_token_set_recall": 0.3109929570823593, "eval_wikibio_true_num_tokens": 61.1328125, "step": 25625 }, { "epoch": 4.92, "eval_nq_accuracy": 0.472125, "eval_nq_bleu_score": 8.57318964466987, "eval_nq_bleu_score_sem": 0.3817501462323276, "eval_nq_emb_cos_sim": 0.7569206357002258, "eval_nq_emb_cos_sim_sem": 0.009972043797898233, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.6137852668762207, "eval_nq_n_ngrams_match_1": 19.866, "eval_nq_n_ngrams_match_2": 6.468, "eval_nq_n_ngrams_match_3": 2.628, "eval_nq_num_pred_words": 48.162, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 13.65062443688009, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.37813734112369435, "eval_nq_runtime": 10.4245, "eval_nq_samples_per_second": 47.964, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4038984316695101, "eval_nq_token_set_f1_sem": 0.0048201729970428365, "eval_nq_token_set_precision": 0.34842539672564526, "eval_nq_token_set_recall": 0.4960380513072947, "eval_nq_true_num_tokens": 64.0, "step": 25625 }, { "epoch": 4.92, "learning_rate": 0.001, "loss": 2.9764, "step": 25632 }, { "epoch": 4.92, "learning_rate": 0.001, "loss": 2.9652, "step": 25644 }, { "epoch": 4.93, "learning_rate": 0.001, "loss": 2.9755, "step": 25656 }, { "epoch": 4.93, "learning_rate": 0.001, "loss": 2.9691, "step": 25668 }, { "epoch": 4.93, "learning_rate": 0.001, "loss": 2.9727, "step": 25680 }, { "epoch": 4.93, "learning_rate": 0.001, "loss": 2.9732, "step": 25692 }, { "epoch": 4.94, "learning_rate": 0.001, "loss": 2.9714, "step": 25704 }, { "epoch": 4.94, "learning_rate": 0.001, "loss": 2.9798, "step": 25716 }, { "epoch": 4.94, "learning_rate": 0.001, "loss": 2.9718, "step": 25728 }, { "epoch": 4.94, "learning_rate": 0.001, "loss": 2.9744, "step": 25740 }, { "epoch": 4.94, "learning_rate": 0.001, "loss": 2.9746, "step": 25752 }, { "epoch": 4.95, "learning_rate": 0.001, "loss": 2.9751, "step": 25764 }, { "epoch": 4.95, "learning_rate": 0.001, "loss": 2.9773, "step": 25776 }, { "epoch": 4.95, "learning_rate": 0.001, "loss": 2.9768, "step": 25788 }, { "epoch": 4.95, "learning_rate": 0.001, "loss": 2.9673, "step": 25800 }, { "epoch": 4.96, "learning_rate": 0.001, "loss": 2.9802, "step": 25812 }, { "epoch": 4.96, "learning_rate": 0.001, "loss": 2.9793, "step": 25824 }, { "epoch": 4.96, "learning_rate": 0.001, "loss": 2.9696, "step": 25836 }, { "epoch": 4.96, "learning_rate": 0.001, "loss": 2.9765, "step": 25848 }, { "epoch": 4.97, "learning_rate": 0.001, "loss": 2.9648, "step": 25860 }, { "epoch": 4.97, "learning_rate": 0.001, "loss": 2.9728, "step": 25872 }, { "epoch": 4.97, "learning_rate": 0.001, "loss": 2.9693, "step": 25884 }, { "epoch": 4.97, "learning_rate": 0.001, "loss": 2.9768, "step": 25896 }, { "epoch": 4.97, "learning_rate": 0.001, "loss": 2.9594, "step": 25908 }, { "epoch": 4.98, "learning_rate": 0.001, "loss": 2.9719, "step": 25920 }, { "epoch": 4.98, "learning_rate": 0.001, "loss": 2.9769, "step": 25932 }, { "epoch": 4.98, "learning_rate": 0.001, "loss": 2.9644, "step": 25944 }, { "epoch": 4.98, "learning_rate": 0.001, "loss": 2.9695, "step": 25956 }, { "epoch": 4.99, "learning_rate": 0.001, "loss": 2.9589, "step": 25968 }, { "epoch": 4.99, "learning_rate": 0.001, "loss": 2.9766, "step": 25980 }, { "epoch": 4.99, "learning_rate": 0.001, "loss": 2.9715, "step": 25992 }, { "epoch": 4.99, "learning_rate": 0.001, "loss": 2.9821, "step": 26004 }, { "epoch": 5.0, "learning_rate": 0.001, "loss": 2.9693, "step": 26016 }, { "epoch": 5.0, "learning_rate": 0.001, "loss": 2.9704, "step": 26028 }, { "epoch": 5.0, "learning_rate": 0.001, "loss": 2.987, "step": 26040 }, { "epoch": 5.0, "learning_rate": 0.001, "loss": 2.9567, "step": 26052 }, { "epoch": 5.0, "learning_rate": 0.001, "loss": 2.9465, "step": 26064 }, { "epoch": 5.01, "learning_rate": 0.001, "loss": 2.9494, "step": 26076 }, { "epoch": 5.01, "learning_rate": 0.001, "loss": 2.9596, "step": 26088 }, { "epoch": 5.01, "learning_rate": 0.001, "loss": 2.9562, "step": 26100 }, { "epoch": 5.01, "learning_rate": 0.001, "loss": 2.9507, "step": 26112 }, { "epoch": 5.02, "learning_rate": 0.001, "loss": 2.9448, "step": 26124 }, { "epoch": 5.02, "learning_rate": 0.001, "loss": 2.9441, "step": 26136 }, { "epoch": 5.02, "learning_rate": 0.001, "loss": 2.9503, "step": 26148 }, { "epoch": 5.02, "learning_rate": 0.001, "loss": 2.9529, "step": 26160 }, { "epoch": 5.03, "learning_rate": 0.001, "loss": 2.9468, "step": 26172 }, { "epoch": 5.03, "learning_rate": 0.001, "loss": 2.9545, "step": 26184 }, { "epoch": 5.03, "learning_rate": 0.001, "loss": 2.9493, "step": 26196 }, { "epoch": 5.03, "learning_rate": 0.001, "loss": 2.9541, "step": 26208 }, { "epoch": 5.03, "learning_rate": 0.001, "loss": 2.9392, "step": 26220 }, { "epoch": 5.04, "learning_rate": 0.001, "loss": 2.943, "step": 26232 }, { "epoch": 5.04, "learning_rate": 0.001, "loss": 2.9556, "step": 26244 }, { "epoch": 5.04, "eval_ag_news_accuracy": 0.28015625, "eval_ag_news_bleu_score": 3.736718599536026, "eval_ag_news_bleu_score_sem": 0.1368179259557509, "eval_ag_news_emb_cos_sim": 0.7063626050949097, "eval_ag_news_emb_cos_sim_sem": 0.01123980322040421, "eval_ag_news_emb_top1_equal": 0.1640625, "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.027685642242432, "eval_ag_news_n_ngrams_match_1": 11.524, "eval_ag_news_n_ngrams_match_2": 2.202, "eval_ag_news_n_ngrams_match_3": 0.56, "eval_ag_news_num_pred_words": 45.846, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 56.13085392836912, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.27783993693430153, "eval_ag_news_runtime": 10.1232, "eval_ag_news_samples_per_second": 49.392, "eval_ag_news_steps_per_second": 0.099, "eval_ag_news_token_set_f1": 0.29647039914663714, "eval_ag_news_token_set_f1_sem": 0.004571673438081544, "eval_ag_news_token_set_precision": 0.2680941037969084, "eval_ag_news_token_set_recall": 0.35446756968113474, "eval_ag_news_true_num_tokens": 56.09375, "step": 26250 }, { "epoch": 5.04, "eval_anthropic_toxic_prompts_accuracy": 0.0965, "eval_anthropic_toxic_prompts_bleu_score": 2.147582886640655, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07966992712992176, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5788730382919312, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010722127309579567, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.7207133769989014, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.706, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.176, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.328, "eval_anthropic_toxic_prompts_num_pred_words": 46.68, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 41.29384168060291, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1654803392623658, "eval_anthropic_toxic_prompts_runtime": 9.2523, "eval_anthropic_toxic_prompts_samples_per_second": 54.041, "eval_anthropic_toxic_prompts_steps_per_second": 0.108, "eval_anthropic_toxic_prompts_token_set_f1": 0.2937395978643475, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0061159346555026035, "eval_anthropic_toxic_prompts_token_set_precision": 0.3280652517015705, "eval_anthropic_toxic_prompts_token_set_recall": 0.3076924694441835, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 26250 }, { "epoch": 5.04, "eval_arxiv_accuracy": 0.3036875, "eval_arxiv_bleu_score": 3.2569561892362464, "eval_arxiv_bleu_score_sem": 0.0994423042261703, "eval_arxiv_emb_cos_sim": 0.6460261344909668, "eval_arxiv_emb_cos_sim_sem": 0.008178726345351568, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8972249031066895, "eval_arxiv_n_ngrams_match_1": 11.862, "eval_arxiv_n_ngrams_match_2": 2.13, "eval_arxiv_n_ngrams_match_3": 0.39, "eval_arxiv_num_pred_words": 39.218, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 49.265542574801145, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2814910108717771, "eval_arxiv_runtime": 9.74, "eval_arxiv_samples_per_second": 51.335, "eval_arxiv_steps_per_second": 0.103, "eval_arxiv_token_set_f1": 0.2803151791516185, "eval_arxiv_token_set_f1_sem": 0.004112967583742836, "eval_arxiv_token_set_precision": 0.22161137838009956, "eval_arxiv_token_set_recall": 0.4078977490691022, "eval_arxiv_true_num_tokens": 64.0, "step": 26250 }, { "epoch": 5.04, "eval_python_code_alpaca_accuracy": 0.12959375, "eval_python_code_alpaca_bleu_score": 3.0913667214150835, "eval_python_code_alpaca_bleu_score_sem": 0.10517243681692368, "eval_python_code_alpaca_emb_cos_sim": 0.5842911005020142, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011832732987789562, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.462963342666626, "eval_python_code_alpaca_n_ngrams_match_1": 6.734, "eval_python_code_alpaca_n_ngrams_match_2": 1.478, "eval_python_code_alpaca_n_ngrams_match_3": 0.408, "eval_python_code_alpaca_num_pred_words": 38.17, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 31.911400955585695, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.23211424000125105, "eval_python_code_alpaca_runtime": 10.5042, "eval_python_code_alpaca_samples_per_second": 47.6, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.3534046559038142, "eval_python_code_alpaca_token_set_f1_sem": 0.005877893579494761, "eval_python_code_alpaca_token_set_precision": 0.3486447101786397, "eval_python_code_alpaca_token_set_recall": 0.39743856447173775, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 26250 }, { "epoch": 5.04, "eval_wikibio_accuracy": 0.28428125, "eval_wikibio_bleu_score": 5.136802617005691, "eval_wikibio_bleu_score_sem": 0.1775725642513426, "eval_wikibio_emb_cos_sim": 0.6929802894592285, "eval_wikibio_emb_cos_sim_sem": 0.008306888326228116, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.312255859375, "eval_wikibio_n_ngrams_match_1": 9.482, "eval_wikibio_n_ngrams_match_2": 3.032, "eval_wikibio_n_ngrams_match_3": 1.018, "eval_wikibio_num_pred_words": 37.272, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 74.60860576786769, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3142983480408808, "eval_wikibio_runtime": 9.4538, "eval_wikibio_samples_per_second": 52.889, "eval_wikibio_steps_per_second": 0.106, "eval_wikibio_token_set_f1": 0.29996201287412866, "eval_wikibio_token_set_f1_sem": 0.005286970493581309, "eval_wikibio_token_set_precision": 0.30351962058207854, "eval_wikibio_token_set_recall": 0.3143049085469827, "eval_wikibio_true_num_tokens": 61.1328125, "step": 26250 }, { "epoch": 5.04, "eval_nq_accuracy": 0.4734375, "eval_nq_bleu_score": 8.626090223326957, "eval_nq_bleu_score_sem": 0.3803047162378017, "eval_nq_emb_cos_sim": 0.7519087195396423, "eval_nq_emb_cos_sim_sem": 0.009378460095816454, "eval_nq_emb_top1_equal": 0.15625, "eval_nq_emb_top1_equal_sem": 0.03221922156442571, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5983827114105225, "eval_nq_n_ngrams_match_1": 20.114, "eval_nq_n_ngrams_match_2": 6.486, "eval_nq_n_ngrams_match_3": 2.66, "eval_nq_num_pred_words": 48.74, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 13.441980883644657, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3812398567184436, "eval_nq_runtime": 9.9152, "eval_nq_samples_per_second": 50.428, "eval_nq_steps_per_second": 0.101, "eval_nq_token_set_f1": 0.4052964394282942, "eval_nq_token_set_f1_sem": 0.0048446113674483204, "eval_nq_token_set_precision": 0.3521700946763356, "eval_nq_token_set_recall": 0.493707980858585, "eval_nq_true_num_tokens": 64.0, "step": 26250 }, { "epoch": 5.04, "learning_rate": 0.001, "loss": 2.9539, "step": 26256 }, { "epoch": 5.04, "learning_rate": 0.001, "loss": 2.9453, "step": 26268 }, { "epoch": 5.05, "learning_rate": 0.001, "loss": 2.9525, "step": 26280 }, { "epoch": 5.05, "learning_rate": 0.001, "loss": 2.9562, "step": 26292 }, { "epoch": 5.05, "learning_rate": 0.001, "loss": 2.9446, "step": 26304 }, { "epoch": 5.05, "learning_rate": 0.001, "loss": 2.9547, "step": 26316 }, { "epoch": 5.06, "learning_rate": 0.001, "loss": 2.9443, "step": 26328 }, { "epoch": 5.06, "learning_rate": 0.001, "loss": 2.9527, "step": 26340 }, { "epoch": 5.06, "learning_rate": 0.001, "loss": 2.9498, "step": 26352 }, { "epoch": 5.06, "learning_rate": 0.001, "loss": 2.9438, "step": 26364 }, { "epoch": 5.06, "learning_rate": 0.001, "loss": 2.9558, "step": 26376 }, { "epoch": 5.07, "learning_rate": 0.001, "loss": 2.9402, "step": 26388 }, { "epoch": 5.07, "learning_rate": 0.001, "loss": 2.9529, "step": 26400 }, { "epoch": 5.07, "learning_rate": 0.001, "loss": 2.9486, "step": 26412 }, { "epoch": 5.07, "learning_rate": 0.001, "loss": 2.9571, "step": 26424 }, { "epoch": 5.08, "learning_rate": 0.001, "loss": 2.9421, "step": 26436 }, { "epoch": 5.08, "learning_rate": 0.001, "loss": 2.9374, "step": 26448 }, { "epoch": 5.08, "learning_rate": 0.001, "loss": 2.9444, "step": 26460 }, { "epoch": 5.08, "learning_rate": 0.001, "loss": 2.9479, "step": 26472 }, { "epoch": 5.09, "learning_rate": 0.001, "loss": 2.9473, "step": 26484 }, { "epoch": 5.09, "learning_rate": 0.001, "loss": 2.9517, "step": 26496 }, { "epoch": 5.09, "learning_rate": 0.001, "loss": 2.9535, "step": 26508 }, { "epoch": 5.09, "learning_rate": 0.001, "loss": 2.9437, "step": 26520 }, { "epoch": 5.09, "learning_rate": 0.001, "loss": 2.942, "step": 26532 }, { "epoch": 5.1, "learning_rate": 0.001, "loss": 2.9387, "step": 26544 }, { "epoch": 5.1, "learning_rate": 0.001, "loss": 2.95, "step": 26556 }, { "epoch": 5.1, "learning_rate": 0.001, "loss": 2.9463, "step": 26568 }, { "epoch": 5.1, "learning_rate": 0.001, "loss": 2.9495, "step": 26580 }, { "epoch": 5.11, "learning_rate": 0.001, "loss": 2.9408, "step": 26592 }, { "epoch": 5.11, "learning_rate": 0.001, "loss": 2.9454, "step": 26604 }, { "epoch": 5.11, "learning_rate": 0.001, "loss": 2.9522, "step": 26616 }, { "epoch": 5.11, "learning_rate": 0.001, "loss": 2.9466, "step": 26628 }, { "epoch": 5.12, "learning_rate": 0.001, "loss": 2.9369, "step": 26640 }, { "epoch": 5.12, "learning_rate": 0.001, "loss": 2.9425, "step": 26652 }, { "epoch": 5.12, "learning_rate": 0.001, "loss": 2.9396, "step": 26664 }, { "epoch": 5.12, "learning_rate": 0.001, "loss": 2.9392, "step": 26676 }, { "epoch": 5.12, "learning_rate": 0.001, "loss": 2.9503, "step": 26688 }, { "epoch": 5.13, "learning_rate": 0.001, "loss": 2.9481, "step": 26700 }, { "epoch": 5.13, "learning_rate": 0.001, "loss": 2.93, "step": 26712 }, { "epoch": 5.13, "learning_rate": 0.001, "loss": 2.949, "step": 26724 }, { "epoch": 5.13, "learning_rate": 0.001, "loss": 2.9597, "step": 26736 }, { "epoch": 5.14, "learning_rate": 0.001, "loss": 2.9528, "step": 26748 }, { "epoch": 5.14, "learning_rate": 0.001, "loss": 2.9501, "step": 26760 }, { "epoch": 5.14, "learning_rate": 0.001, "loss": 2.9405, "step": 26772 }, { "epoch": 5.14, "learning_rate": 0.001, "loss": 2.9482, "step": 26784 }, { "epoch": 5.15, "learning_rate": 0.001, "loss": 2.9616, "step": 26796 }, { "epoch": 5.15, "learning_rate": 0.001, "loss": 2.946, "step": 26808 }, { "epoch": 5.15, "learning_rate": 0.001, "loss": 2.9434, "step": 26820 }, { "epoch": 5.15, "learning_rate": 0.001, "loss": 2.9392, "step": 26832 }, { "epoch": 5.15, "learning_rate": 0.001, "loss": 2.9417, "step": 26844 }, { "epoch": 5.16, "learning_rate": 0.001, "loss": 2.94, "step": 26856 }, { "epoch": 5.16, "learning_rate": 0.001, "loss": 2.9375, "step": 26868 }, { "epoch": 5.16, "eval_ag_news_accuracy": 0.282875, "eval_ag_news_bleu_score": 3.7854512005663534, "eval_ag_news_bleu_score_sem": 0.13142873040472827, "eval_ag_news_emb_cos_sim": 0.7158944010734558, "eval_ag_news_emb_cos_sim_sem": 0.009918688521861473, "eval_ag_news_emb_top1_equal": 0.1484375, "eval_ag_news_emb_top1_equal_sem": 0.031548465007086954, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.006141662597656, "eval_ag_news_n_ngrams_match_1": 11.738, "eval_ag_news_n_ngrams_match_2": 2.338, "eval_ag_news_n_ngrams_match_3": 0.622, "eval_ag_news_num_pred_words": 46.208, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 54.93450528205335, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.28279509556964266, "eval_ag_news_runtime": 11.4196, "eval_ag_news_samples_per_second": 43.785, "eval_ag_news_steps_per_second": 0.088, "eval_ag_news_token_set_f1": 0.3045864210040348, "eval_ag_news_token_set_f1_sem": 0.004338228889314767, "eval_ag_news_token_set_precision": 0.2740803309127095, "eval_ag_news_token_set_recall": 0.3651097856435764, "eval_ag_news_true_num_tokens": 56.09375, "step": 26875 }, { "epoch": 5.16, "eval_anthropic_toxic_prompts_accuracy": 0.0950625, "eval_anthropic_toxic_prompts_bleu_score": 2.241674797912559, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0798695328560625, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5853935480117798, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010477638051179426, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.7078540325164795, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.76, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.228, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.356, "eval_anthropic_toxic_prompts_num_pred_words": 47.1, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 40.76622958884503, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1654613509662934, "eval_anthropic_toxic_prompts_runtime": 11.2034, "eval_anthropic_toxic_prompts_samples_per_second": 44.629, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.295427675294669, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005867996618701394, "eval_anthropic_toxic_prompts_token_set_precision": 0.32898140873572207, "eval_anthropic_toxic_prompts_token_set_recall": 0.3047311345214721, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 26875 }, { "epoch": 5.16, "eval_arxiv_accuracy": 0.30940625, "eval_arxiv_bleu_score": 3.5055841830513903, "eval_arxiv_bleu_score_sem": 0.11004740666993268, "eval_arxiv_emb_cos_sim": 0.6475298404693604, "eval_arxiv_emb_cos_sim_sem": 0.00764625030525452, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8715243339538574, "eval_arxiv_n_ngrams_match_1": 12.398, "eval_arxiv_n_ngrams_match_2": 2.192, "eval_arxiv_n_ngrams_match_3": 0.466, "eval_arxiv_num_pred_words": 40.926, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 48.015522015383816, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2873588664592228, "eval_arxiv_runtime": 20.6966, "eval_arxiv_samples_per_second": 24.159, "eval_arxiv_steps_per_second": 0.048, "eval_arxiv_token_set_f1": 0.29173504762864994, "eval_arxiv_token_set_f1_sem": 0.00396961779486314, "eval_arxiv_token_set_precision": 0.23397454634483217, "eval_arxiv_token_set_recall": 0.40993250286747596, "eval_arxiv_true_num_tokens": 64.0, "step": 26875 }, { "epoch": 5.16, "eval_python_code_alpaca_accuracy": 0.13178125, "eval_python_code_alpaca_bleu_score": 3.2753286120583023, "eval_python_code_alpaca_bleu_score_sem": 0.12241459065912691, "eval_python_code_alpaca_emb_cos_sim": 0.5975684523582458, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009792527199200176, "eval_python_code_alpaca_emb_top1_equal": 0.0625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.4491353034973145, "eval_python_code_alpaca_n_ngrams_match_1": 7.064, "eval_python_code_alpaca_n_ngrams_match_2": 1.632, "eval_python_code_alpaca_n_ngrams_match_3": 0.466, "eval_python_code_alpaca_num_pred_words": 40.74, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 31.473165802714046, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.24194486437238982, "eval_python_code_alpaca_runtime": 11.0812, "eval_python_code_alpaca_samples_per_second": 45.122, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.37393461236903536, "eval_python_code_alpaca_token_set_f1_sem": 0.005537714648176672, "eval_python_code_alpaca_token_set_precision": 0.37571808813384855, "eval_python_code_alpaca_token_set_recall": 0.4066883117045732, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 26875 }, { "epoch": 5.16, "eval_wikibio_accuracy": 0.28740625, "eval_wikibio_bleu_score": 5.197628431999734, "eval_wikibio_bleu_score_sem": 0.19350876333660505, "eval_wikibio_emb_cos_sim": 0.678260087966919, "eval_wikibio_emb_cos_sim_sem": 0.010448878753971312, "eval_wikibio_emb_top1_equal": 0.0625, "eval_wikibio_emb_top1_equal_sem": 0.02147948148198014, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.2829084396362305, "eval_wikibio_n_ngrams_match_1": 9.4, "eval_wikibio_n_ngrams_match_2": 3.022, "eval_wikibio_n_ngrams_match_3": 1.024, "eval_wikibio_num_pred_words": 37.286, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 72.45085280451723, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.31443435401668496, "eval_wikibio_runtime": 9.7185, "eval_wikibio_samples_per_second": 51.448, "eval_wikibio_steps_per_second": 0.103, "eval_wikibio_token_set_f1": 0.29930695997135986, "eval_wikibio_token_set_f1_sem": 0.0054978596287111586, "eval_wikibio_token_set_precision": 0.3017414676425142, "eval_wikibio_token_set_recall": 0.3126191063497086, "eval_wikibio_true_num_tokens": 61.1328125, "step": 26875 }, { "epoch": 5.16, "eval_nq_accuracy": 0.4753125, "eval_nq_bleu_score": 8.761326230204498, "eval_nq_bleu_score_sem": 0.3924102676552468, "eval_nq_emb_cos_sim": 0.7646273374557495, "eval_nq_emb_cos_sim_sem": 0.009928361223594723, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5821120738983154, "eval_nq_n_ngrams_match_1": 20.428, "eval_nq_n_ngrams_match_2": 6.58, "eval_nq_n_ngrams_match_3": 2.728, "eval_nq_num_pred_words": 48.832, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 13.225040946699409, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3861617532351824, "eval_nq_runtime": 10.9479, "eval_nq_samples_per_second": 45.671, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4097102558091227, "eval_nq_token_set_f1_sem": 0.00494036544186437, "eval_nq_token_set_precision": 0.35675489572426566, "eval_nq_token_set_recall": 0.49628606792205227, "eval_nq_true_num_tokens": 64.0, "step": 26875 }, { "epoch": 5.16, "learning_rate": 0.001, "loss": 2.9374, "step": 26880 }, { "epoch": 5.16, "learning_rate": 0.001, "loss": 2.9462, "step": 26892 }, { "epoch": 5.17, "learning_rate": 0.001, "loss": 2.9399, "step": 26904 }, { "epoch": 5.17, "learning_rate": 0.001, "loss": 2.9268, "step": 26916 }, { "epoch": 5.17, "learning_rate": 0.001, "loss": 2.9466, "step": 26928 }, { "epoch": 5.17, "learning_rate": 0.001, "loss": 2.9406, "step": 26940 }, { "epoch": 5.18, "learning_rate": 0.001, "loss": 2.9349, "step": 26952 }, { "epoch": 5.18, "learning_rate": 0.001, "loss": 2.9415, "step": 26964 }, { "epoch": 5.18, "learning_rate": 0.001, "loss": 2.9368, "step": 26976 }, { "epoch": 5.18, "learning_rate": 0.001, "loss": 2.9476, "step": 26988 }, { "epoch": 5.18, "learning_rate": 0.001, "loss": 2.9419, "step": 27000 }, { "epoch": 5.19, "learning_rate": 0.001, "loss": 2.9713, "step": 27012 }, { "epoch": 5.19, "learning_rate": 0.001, "loss": 2.9454, "step": 27024 }, { "epoch": 5.19, "learning_rate": 0.001, "loss": 2.9415, "step": 27036 }, { "epoch": 5.19, "learning_rate": 0.001, "loss": 2.9344, "step": 27048 }, { "epoch": 5.2, "learning_rate": 0.001, "loss": 2.9366, "step": 27060 }, { "epoch": 5.2, "learning_rate": 0.001, "loss": 2.9298, "step": 27072 }, { "epoch": 5.2, "learning_rate": 0.001, "loss": 2.9431, "step": 27084 }, { "epoch": 5.2, "learning_rate": 0.001, "loss": 2.9377, "step": 27096 }, { "epoch": 5.21, "learning_rate": 0.001, "loss": 2.9373, "step": 27108 }, { "epoch": 5.21, "learning_rate": 0.001, "loss": 2.937, "step": 27120 }, { "epoch": 5.21, "learning_rate": 0.001, "loss": 2.926, "step": 27132 }, { "epoch": 5.21, "learning_rate": 0.001, "loss": 2.9423, "step": 27144 }, { "epoch": 5.21, "learning_rate": 0.001, "loss": 2.9383, "step": 27156 }, { "epoch": 5.22, "learning_rate": 0.001, "loss": 2.9441, "step": 27168 }, { "epoch": 5.22, "learning_rate": 0.001, "loss": 2.9487, "step": 27180 }, { "epoch": 5.22, "learning_rate": 0.001, "loss": 2.9374, "step": 27192 }, { "epoch": 5.22, "learning_rate": 0.001, "loss": 2.9409, "step": 27204 }, { "epoch": 5.23, "learning_rate": 0.001, "loss": 2.9464, "step": 27216 }, { "epoch": 5.23, "learning_rate": 0.001, "loss": 2.9389, "step": 27228 }, { "epoch": 5.23, "learning_rate": 0.001, "loss": 2.9467, "step": 27240 }, { "epoch": 5.23, "learning_rate": 0.001, "loss": 2.9469, "step": 27252 }, { "epoch": 5.24, "learning_rate": 0.001, "loss": 2.9504, "step": 27264 }, { "epoch": 5.24, "learning_rate": 0.001, "loss": 2.945, "step": 27276 }, { "epoch": 5.24, "learning_rate": 0.001, "loss": 2.9417, "step": 27288 }, { "epoch": 5.24, "learning_rate": 0.001, "loss": 2.9406, "step": 27300 }, { "epoch": 5.24, "learning_rate": 0.001, "loss": 2.9359, "step": 27312 }, { "epoch": 5.25, "learning_rate": 0.001, "loss": 2.9336, "step": 27324 }, { "epoch": 5.25, "learning_rate": 0.001, "loss": 2.9484, "step": 27336 }, { "epoch": 5.25, "learning_rate": 0.001, "loss": 2.9372, "step": 27348 }, { "epoch": 5.25, "learning_rate": 0.001, "loss": 2.9401, "step": 27360 }, { "epoch": 5.26, "learning_rate": 0.001, "loss": 2.9363, "step": 27372 }, { "epoch": 5.26, "learning_rate": 0.001, "loss": 2.9451, "step": 27384 }, { "epoch": 5.26, "learning_rate": 0.001, "loss": 2.939, "step": 27396 }, { "epoch": 5.26, "learning_rate": 0.001, "loss": 2.9267, "step": 27408 }, { "epoch": 5.26, "learning_rate": 0.001, "loss": 2.9365, "step": 27420 }, { "epoch": 5.27, "learning_rate": 0.001, "loss": 2.9334, "step": 27432 }, { "epoch": 5.27, "learning_rate": 0.001, "loss": 2.9472, "step": 27444 }, { "epoch": 5.27, "learning_rate": 0.001, "loss": 2.9322, "step": 27456 }, { "epoch": 5.27, "learning_rate": 0.001, "loss": 2.9478, "step": 27468 }, { "epoch": 5.28, "learning_rate": 0.001, "loss": 2.9494, "step": 27480 }, { "epoch": 5.28, "learning_rate": 0.001, "loss": 2.9527, "step": 27492 }, { "epoch": 5.28, "eval_ag_news_accuracy": 0.28053125, "eval_ag_news_bleu_score": 3.7578612855779228, "eval_ag_news_bleu_score_sem": 0.13059503646846266, "eval_ag_news_emb_cos_sim": 0.7259774804115295, "eval_ag_news_emb_cos_sim_sem": 0.009987638131847264, "eval_ag_news_emb_top1_equal": 0.1953125, "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 4.006907939910889, "eval_ag_news_n_ngrams_match_1": 11.738, "eval_ag_news_n_ngrams_match_2": 2.22, "eval_ag_news_n_ngrams_match_3": 0.59, "eval_ag_news_num_pred_words": 46.208, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 54.97661647953169, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2849091899999439, "eval_ag_news_runtime": 9.5209, "eval_ag_news_samples_per_second": 52.516, "eval_ag_news_steps_per_second": 0.105, "eval_ag_news_token_set_f1": 0.3037773517598862, "eval_ag_news_token_set_f1_sem": 0.004323399708916598, "eval_ag_news_token_set_precision": 0.27500233176900535, "eval_ag_news_token_set_recall": 0.3585740470239835, "eval_ag_news_true_num_tokens": 56.09375, "step": 27500 }, { "epoch": 5.28, "eval_anthropic_toxic_prompts_accuracy": 0.095875, "eval_anthropic_toxic_prompts_bleu_score": 2.3345396780786647, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09228593706829934, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5953093767166138, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010548440146866171, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.7097744941711426, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.944, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.296, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.4, "eval_anthropic_toxic_prompts_num_pred_words": 46.87, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 40.844594794176174, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1721896052541996, "eval_anthropic_toxic_prompts_runtime": 9.4444, "eval_anthropic_toxic_prompts_samples_per_second": 52.942, "eval_anthropic_toxic_prompts_steps_per_second": 0.106, "eval_anthropic_toxic_prompts_token_set_f1": 0.3030983429661299, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006125489978078192, "eval_anthropic_toxic_prompts_token_set_precision": 0.3411348033054161, "eval_anthropic_toxic_prompts_token_set_recall": 0.305753402274649, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 27500 }, { "epoch": 5.28, "eval_arxiv_accuracy": 0.3085625, "eval_arxiv_bleu_score": 3.296061585608656, "eval_arxiv_bleu_score_sem": 0.10416942048217424, "eval_arxiv_emb_cos_sim": 0.6408599615097046, "eval_arxiv_emb_cos_sim_sem": 0.008817862888139528, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8604612350463867, "eval_arxiv_n_ngrams_match_1": 11.928, "eval_arxiv_n_ngrams_match_2": 2.114, "eval_arxiv_n_ngrams_match_3": 0.392, "eval_arxiv_num_pred_words": 38.774, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 47.48724910200668, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.279966132452408, "eval_arxiv_runtime": 10.0851, "eval_arxiv_samples_per_second": 49.578, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.28102966511696453, "eval_arxiv_token_set_f1_sem": 0.004155880176126974, "eval_arxiv_token_set_precision": 0.22268511062260807, "eval_arxiv_token_set_recall": 0.4107942109840077, "eval_arxiv_true_num_tokens": 64.0, "step": 27500 }, { "epoch": 5.28, "eval_python_code_alpaca_accuracy": 0.13365625, "eval_python_code_alpaca_bleu_score": 3.2589135948120718, "eval_python_code_alpaca_bleu_score_sem": 0.11047396042084712, "eval_python_code_alpaca_emb_cos_sim": 0.6059686541557312, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010050184713581939, "eval_python_code_alpaca_emb_top1_equal": 0.0625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.441763162612915, "eval_python_code_alpaca_n_ngrams_match_1": 7.272, "eval_python_code_alpaca_n_ngrams_match_2": 1.61, "eval_python_code_alpaca_n_ngrams_match_3": 0.454, "eval_python_code_alpaca_num_pred_words": 39.792, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 31.24199435157494, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.25177699078380644, "eval_python_code_alpaca_runtime": 9.2088, "eval_python_code_alpaca_samples_per_second": 54.296, "eval_python_code_alpaca_steps_per_second": 0.109, "eval_python_code_alpaca_token_set_f1": 0.3723886277441521, "eval_python_code_alpaca_token_set_f1_sem": 0.005290559946533025, "eval_python_code_alpaca_token_set_precision": 0.3768749211123057, "eval_python_code_alpaca_token_set_recall": 0.39575625410252074, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 27500 }, { "epoch": 5.28, "eval_wikibio_accuracy": 0.2888125, "eval_wikibio_bleu_score": 5.139496459021352, "eval_wikibio_bleu_score_sem": 0.1920110596782717, "eval_wikibio_emb_cos_sim": 0.6766979694366455, "eval_wikibio_emb_cos_sim_sem": 0.011211206583689569, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.278515815734863, "eval_wikibio_n_ngrams_match_1": 9.132, "eval_wikibio_n_ngrams_match_2": 2.938, "eval_wikibio_n_ngrams_match_3": 1.04, "eval_wikibio_num_pred_words": 36.41, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 72.1333014093377, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.30869399813293014, "eval_wikibio_runtime": 11.6577, "eval_wikibio_samples_per_second": 42.89, "eval_wikibio_steps_per_second": 0.086, "eval_wikibio_token_set_f1": 0.29523833946066613, "eval_wikibio_token_set_f1_sem": 0.005688691490455236, "eval_wikibio_token_set_precision": 0.29625627495775986, "eval_wikibio_token_set_recall": 0.31488517577506525, "eval_wikibio_true_num_tokens": 61.1328125, "step": 27500 }, { "epoch": 5.28, "eval_nq_accuracy": 0.47734375, "eval_nq_bleu_score": 8.98719360816301, "eval_nq_bleu_score_sem": 0.3950689751367678, "eval_nq_emb_cos_sim": 0.7688852548599243, "eval_nq_emb_cos_sim_sem": 0.009140463427720513, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.576362371444702, "eval_nq_n_ngrams_match_1": 20.522, "eval_nq_n_ngrams_match_2": 6.728, "eval_nq_n_ngrams_match_3": 2.844, "eval_nq_num_pred_words": 48.832, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 13.149219081783706, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3879656762261471, "eval_nq_runtime": 9.8247, "eval_nq_samples_per_second": 50.892, "eval_nq_steps_per_second": 0.102, "eval_nq_token_set_f1": 0.4120591427760328, "eval_nq_token_set_f1_sem": 0.004981493214156046, "eval_nq_token_set_precision": 0.3583314366492254, "eval_nq_token_set_recall": 0.49836094384856133, "eval_nq_true_num_tokens": 64.0, "step": 27500 }, { "epoch": 5.28, "learning_rate": 0.001, "loss": 2.9319, "step": 27504 }, { "epoch": 5.28, "learning_rate": 0.001, "loss": 2.9308, "step": 27516 }, { "epoch": 5.29, "learning_rate": 0.001, "loss": 2.9335, "step": 27528 }, { "epoch": 5.29, "learning_rate": 0.001, "loss": 2.9443, "step": 27540 }, { "epoch": 5.29, "learning_rate": 0.001, "loss": 2.9353, "step": 27552 }, { "epoch": 5.29, "learning_rate": 0.001, "loss": 2.9288, "step": 27564 }, { "epoch": 5.29, "learning_rate": 0.001, "loss": 2.9284, "step": 27576 }, { "epoch": 5.3, "learning_rate": 0.001, "loss": 2.9443, "step": 27588 }, { "epoch": 5.3, "learning_rate": 0.001, "loss": 2.9342, "step": 27600 }, { "epoch": 5.3, "learning_rate": 0.001, "loss": 2.9305, "step": 27612 }, { "epoch": 5.3, "learning_rate": 0.001, "loss": 2.9397, "step": 27624 }, { "epoch": 5.31, "learning_rate": 0.001, "loss": 2.9419, "step": 27636 }, { "epoch": 5.31, "learning_rate": 0.001, "loss": 2.9427, "step": 27648 }, { "epoch": 5.31, "learning_rate": 0.001, "loss": 2.9476, "step": 27660 }, { "epoch": 5.31, "learning_rate": 0.001, "loss": 2.9398, "step": 27672 }, { "epoch": 5.32, "learning_rate": 0.001, "loss": 2.941, "step": 27684 }, { "epoch": 5.32, "learning_rate": 0.001, "loss": 2.9441, "step": 27696 }, { "epoch": 5.32, "learning_rate": 0.001, "loss": 2.9441, "step": 27708 }, { "epoch": 5.32, "learning_rate": 0.001, "loss": 2.943, "step": 27720 }, { "epoch": 5.32, "learning_rate": 0.001, "loss": 2.9356, "step": 27732 }, { "epoch": 5.33, "learning_rate": 0.001, "loss": 2.937, "step": 27744 }, { "epoch": 5.33, "learning_rate": 0.001, "loss": 2.931, "step": 27756 }, { "epoch": 5.33, "learning_rate": 0.001, "loss": 2.9459, "step": 27768 }, { "epoch": 5.33, "learning_rate": 0.001, "loss": 2.9297, "step": 27780 }, { "epoch": 5.34, "learning_rate": 0.001, "loss": 2.9302, "step": 27792 }, { "epoch": 5.34, "learning_rate": 0.001, "loss": 2.931, "step": 27804 }, { "epoch": 5.34, "learning_rate": 0.001, "loss": 2.9434, "step": 27816 }, { "epoch": 5.34, "learning_rate": 0.001, "loss": 2.9378, "step": 27828 }, { "epoch": 5.35, "learning_rate": 0.001, "loss": 2.9262, "step": 27840 }, { "epoch": 5.35, "learning_rate": 0.001, "loss": 2.9415, "step": 27852 }, { "epoch": 5.35, "learning_rate": 0.001, "loss": 2.9476, "step": 27864 }, { "epoch": 5.35, "learning_rate": 0.001, "loss": 2.94, "step": 27876 }, { "epoch": 5.35, "learning_rate": 0.001, "loss": 2.9443, "step": 27888 }, { "epoch": 5.36, "learning_rate": 0.001, "loss": 2.9338, "step": 27900 }, { "epoch": 5.36, "learning_rate": 0.001, "loss": 2.9523, "step": 27912 }, { "epoch": 5.36, "learning_rate": 0.001, "loss": 2.9416, "step": 27924 }, { "epoch": 5.36, "learning_rate": 0.001, "loss": 2.9283, "step": 27936 }, { "epoch": 5.37, "learning_rate": 0.001, "loss": 2.9286, "step": 27948 }, { "epoch": 5.37, "learning_rate": 0.001, "loss": 2.9437, "step": 27960 }, { "epoch": 5.37, "learning_rate": 0.001, "loss": 2.9341, "step": 27972 }, { "epoch": 5.37, "learning_rate": 0.001, "loss": 2.9415, "step": 27984 }, { "epoch": 5.38, "learning_rate": 0.001, "loss": 2.9261, "step": 27996 }, { "epoch": 5.38, "learning_rate": 0.001, "loss": 2.9347, "step": 28008 }, { "epoch": 5.38, "learning_rate": 0.001, "loss": 2.9229, "step": 28020 }, { "epoch": 5.38, "learning_rate": 0.001, "loss": 2.9346, "step": 28032 }, { "epoch": 5.38, "learning_rate": 0.001, "loss": 2.9342, "step": 28044 }, { "epoch": 5.39, "learning_rate": 0.001, "loss": 2.931, "step": 28056 }, { "epoch": 5.39, "learning_rate": 0.001, "loss": 2.9315, "step": 28068 }, { "epoch": 5.39, "learning_rate": 0.001, "loss": 2.9283, "step": 28080 }, { "epoch": 5.39, "learning_rate": 0.001, "loss": 2.9333, "step": 28092 }, { "epoch": 5.4, "learning_rate": 0.001, "loss": 2.9353, "step": 28104 }, { "epoch": 5.4, "learning_rate": 0.001, "loss": 2.9419, "step": 28116 }, { "epoch": 5.4, "eval_ag_news_accuracy": 0.28425, "eval_ag_news_bleu_score": 3.8629165277375987, "eval_ag_news_bleu_score_sem": 0.135544131175173, "eval_ag_news_emb_cos_sim": 0.7188982963562012, "eval_ag_news_emb_cos_sim_sem": 0.010305849472282545, "eval_ag_news_emb_top1_equal": 0.1796875, "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.989623546600342, "eval_ag_news_n_ngrams_match_1": 11.71, "eval_ag_news_n_ngrams_match_2": 2.3, "eval_ag_news_n_ngrams_match_3": 0.602, "eval_ag_news_num_pred_words": 45.456, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 54.03454404622978, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2857413940456188, "eval_ag_news_runtime": 9.5872, "eval_ag_news_samples_per_second": 52.153, "eval_ag_news_steps_per_second": 0.104, "eval_ag_news_token_set_f1": 0.30337213456634515, "eval_ag_news_token_set_f1_sem": 0.004370975610579565, "eval_ag_news_token_set_precision": 0.2725525747100057, "eval_ag_news_token_set_recall": 0.3613898275473111, "eval_ag_news_true_num_tokens": 56.09375, "step": 28125 }, { "epoch": 5.4, "eval_anthropic_toxic_prompts_accuracy": 0.0959375, "eval_anthropic_toxic_prompts_bleu_score": 2.272102842149566, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08801381308167477, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.594087541103363, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01074972098300855, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.6989173889160156, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.822, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.218, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.368, "eval_anthropic_toxic_prompts_num_pred_words": 46.744, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 40.403539354564465, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.16950995038576921, "eval_anthropic_toxic_prompts_runtime": 9.5821, "eval_anthropic_toxic_prompts_samples_per_second": 52.181, "eval_anthropic_toxic_prompts_steps_per_second": 0.104, "eval_anthropic_toxic_prompts_token_set_f1": 0.29779781201804956, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005960438905574659, "eval_anthropic_toxic_prompts_token_set_precision": 0.33424624499404704, "eval_anthropic_toxic_prompts_token_set_recall": 0.3051423563684294, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 28125 }, { "epoch": 5.4, "eval_arxiv_accuracy": 0.30684375, "eval_arxiv_bleu_score": 3.457059980301291, "eval_arxiv_bleu_score_sem": 0.11260979517869693, "eval_arxiv_emb_cos_sim": 0.6441875100135803, "eval_arxiv_emb_cos_sim_sem": 0.007731428427550528, "eval_arxiv_emb_top1_equal": 0.1796875, "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8626210689544678, "eval_arxiv_n_ngrams_match_1": 12.296, "eval_arxiv_n_ngrams_match_2": 2.186, "eval_arxiv_n_ngrams_match_3": 0.424, "eval_arxiv_num_pred_words": 39.466, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 47.58992451382261, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.28924850025282833, "eval_arxiv_runtime": 10.2322, "eval_arxiv_samples_per_second": 48.865, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.29142292421585997, "eval_arxiv_token_set_f1_sem": 0.004075060979354117, "eval_arxiv_token_set_precision": 0.23226119987236554, "eval_arxiv_token_set_recall": 0.41309108685453116, "eval_arxiv_true_num_tokens": 64.0, "step": 28125 }, { "epoch": 5.4, "eval_python_code_alpaca_accuracy": 0.1331875, "eval_python_code_alpaca_bleu_score": 3.276823387450839, "eval_python_code_alpaca_bleu_score_sem": 0.1289571953592232, "eval_python_code_alpaca_emb_cos_sim": 0.5843392610549927, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010902956387482033, "eval_python_code_alpaca_emb_top1_equal": 0.046875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.4225106239318848, "eval_python_code_alpaca_n_ngrams_match_1": 6.978, "eval_python_code_alpaca_n_ngrams_match_2": 1.594, "eval_python_code_alpaca_n_ngrams_match_3": 0.48, "eval_python_code_alpaca_num_pred_words": 39.45, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 30.64625974972903, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2434943104821939, "eval_python_code_alpaca_runtime": 10.6502, "eval_python_code_alpaca_samples_per_second": 46.947, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.36531147315177526, "eval_python_code_alpaca_token_set_f1_sem": 0.005839595746561747, "eval_python_code_alpaca_token_set_precision": 0.36269220036771904, "eval_python_code_alpaca_token_set_recall": 0.40091619267996637, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 28125 }, { "epoch": 5.4, "eval_wikibio_accuracy": 0.28459375, "eval_wikibio_bleu_score": 5.13638619529026, "eval_wikibio_bleu_score_sem": 0.18125277923423516, "eval_wikibio_emb_cos_sim": 0.697567880153656, "eval_wikibio_emb_cos_sim_sem": 0.010435320639640963, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.319770812988281, "eval_wikibio_n_ngrams_match_1": 9.674, "eval_wikibio_n_ngrams_match_2": 3.024, "eval_wikibio_n_ngrams_match_3": 1.014, "eval_wikibio_num_pred_words": 38.196, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 75.17139800954351, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.323949620156584, "eval_wikibio_runtime": 9.7845, "eval_wikibio_samples_per_second": 51.101, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.3041517611296937, "eval_wikibio_token_set_f1_sem": 0.0048940121649151186, "eval_wikibio_token_set_precision": 0.3106342098196182, "eval_wikibio_token_set_recall": 0.31110706844695096, "eval_wikibio_true_num_tokens": 61.1328125, "step": 28125 }, { "epoch": 5.4, "eval_nq_accuracy": 0.477125, "eval_nq_bleu_score": 8.996300433262643, "eval_nq_bleu_score_sem": 0.39888254366499315, "eval_nq_emb_cos_sim": 0.763920783996582, "eval_nq_emb_cos_sim_sem": 0.009703978646112311, "eval_nq_emb_top1_equal": 0.2109375, "eval_nq_emb_top1_equal_sem": 0.03620184850179216, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5666260719299316, "eval_nq_n_ngrams_match_1": 20.432, "eval_nq_n_ngrams_match_2": 6.794, "eval_nq_n_ngrams_match_3": 2.856, "eval_nq_num_pred_words": 48.59, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 13.02181557222095, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3883234856283586, "eval_nq_runtime": 11.6579, "eval_nq_samples_per_second": 42.889, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.40987566302541584, "eval_nq_token_set_f1_sem": 0.005033940333738851, "eval_nq_token_set_precision": 0.3554135430985488, "eval_nq_token_set_recall": 0.5004185330412072, "eval_nq_true_num_tokens": 64.0, "step": 28125 }, { "epoch": 5.4, "learning_rate": 0.001, "loss": 2.9311, "step": 28128 }, { "epoch": 5.4, "learning_rate": 0.001, "loss": 2.9289, "step": 28140 }, { "epoch": 5.41, "learning_rate": 0.001, "loss": 2.9335, "step": 28152 }, { "epoch": 5.41, "learning_rate": 0.001, "loss": 2.9314, "step": 28164 }, { "epoch": 5.41, "learning_rate": 0.001, "loss": 2.9216, "step": 28176 }, { "epoch": 5.41, "learning_rate": 0.001, "loss": 2.9297, "step": 28188 }, { "epoch": 5.41, "learning_rate": 0.001, "loss": 2.9393, "step": 28200 }, { "epoch": 5.42, "learning_rate": 0.001, "loss": 2.9256, "step": 28212 }, { "epoch": 5.42, "learning_rate": 0.001, "loss": 2.9268, "step": 28224 }, { "epoch": 5.42, "learning_rate": 0.001, "loss": 2.9334, "step": 28236 }, { "epoch": 5.42, "learning_rate": 0.001, "loss": 2.9322, "step": 28248 }, { "epoch": 5.43, "learning_rate": 0.001, "loss": 2.9424, "step": 28260 }, { "epoch": 5.43, "learning_rate": 0.001, "loss": 2.9244, "step": 28272 }, { "epoch": 5.43, "learning_rate": 0.001, "loss": 2.9343, "step": 28284 }, { "epoch": 5.43, "learning_rate": 0.001, "loss": 2.9333, "step": 28296 }, { "epoch": 5.44, "learning_rate": 0.001, "loss": 2.9402, "step": 28308 }, { "epoch": 5.44, "learning_rate": 0.001, "loss": 2.931, "step": 28320 }, { "epoch": 5.44, "learning_rate": 0.001, "loss": 2.931, "step": 28332 }, { "epoch": 5.44, "learning_rate": 0.001, "loss": 2.929, "step": 28344 }, { "epoch": 5.44, "learning_rate": 0.001, "loss": 2.9327, "step": 28356 }, { "epoch": 5.45, "learning_rate": 0.001, "loss": 2.9416, "step": 28368 }, { "epoch": 5.45, "learning_rate": 0.001, "loss": 2.9364, "step": 28380 }, { "epoch": 5.45, "learning_rate": 0.001, "loss": 2.922, "step": 28392 }, { "epoch": 5.45, "learning_rate": 0.001, "loss": 2.9244, "step": 28404 }, { "epoch": 5.46, "learning_rate": 0.001, "loss": 2.9267, "step": 28416 }, { "epoch": 5.46, "learning_rate": 0.001, "loss": 2.939, "step": 28428 }, { "epoch": 5.46, "learning_rate": 0.001, "loss": 2.9327, "step": 28440 }, { "epoch": 5.46, "learning_rate": 0.001, "loss": 2.9235, "step": 28452 }, { "epoch": 5.47, "learning_rate": 0.001, "loss": 2.9168, "step": 28464 }, { "epoch": 5.47, "learning_rate": 0.001, "loss": 2.9222, "step": 28476 }, { "epoch": 5.47, "learning_rate": 0.001, "loss": 2.9095, "step": 28488 }, { "epoch": 5.47, "learning_rate": 0.001, "loss": 2.923, "step": 28500 }, { "epoch": 5.47, "learning_rate": 0.001, "loss": 2.9406, "step": 28512 }, { "epoch": 5.48, "learning_rate": 0.001, "loss": 2.9307, "step": 28524 }, { "epoch": 5.48, "learning_rate": 0.001, "loss": 2.9327, "step": 28536 }, { "epoch": 5.48, "learning_rate": 0.001, "loss": 2.9293, "step": 28548 }, { "epoch": 5.48, "learning_rate": 0.001, "loss": 2.927, "step": 28560 }, { "epoch": 5.49, "learning_rate": 0.001, "loss": 2.9261, "step": 28572 }, { "epoch": 5.49, "learning_rate": 0.001, "loss": 2.9292, "step": 28584 }, { "epoch": 5.49, "learning_rate": 0.001, "loss": 2.9231, "step": 28596 }, { "epoch": 5.49, "learning_rate": 0.001, "loss": 2.9258, "step": 28608 }, { "epoch": 5.5, "learning_rate": 0.001, "loss": 2.9275, "step": 28620 }, { "epoch": 5.5, "learning_rate": 0.001, "loss": 2.9292, "step": 28632 }, { "epoch": 5.5, "learning_rate": 0.001, "loss": 2.9178, "step": 28644 }, { "epoch": 5.5, "learning_rate": 0.001, "loss": 2.9305, "step": 28656 }, { "epoch": 5.5, "learning_rate": 0.001, "loss": 2.9237, "step": 28668 }, { "epoch": 5.51, "learning_rate": 0.001, "loss": 2.9202, "step": 28680 }, { "epoch": 5.51, "learning_rate": 0.001, "loss": 2.9267, "step": 28692 }, { "epoch": 5.51, "learning_rate": 0.001, "loss": 2.9315, "step": 28704 }, { "epoch": 5.51, "learning_rate": 0.001, "loss": 2.9206, "step": 28716 }, { "epoch": 5.52, "learning_rate": 0.001, "loss": 2.9317, "step": 28728 }, { "epoch": 5.52, "learning_rate": 0.001, "loss": 2.9168, "step": 28740 }, { "epoch": 5.52, "eval_ag_news_accuracy": 0.28525, "eval_ag_news_bleu_score": 3.683319881216632, "eval_ag_news_bleu_score_sem": 0.11641768842346403, "eval_ag_news_emb_cos_sim": 0.7261709570884705, "eval_ag_news_emb_cos_sim_sem": 0.009650001045515508, "eval_ag_news_emb_top1_equal": 0.1796875, "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.9693117141723633, "eval_ag_news_n_ngrams_match_1": 11.778, "eval_ag_news_n_ngrams_match_2": 2.164, "eval_ag_news_n_ngrams_match_3": 0.538, "eval_ag_news_num_pred_words": 46.07, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 52.94807488551273, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.28490252400450233, "eval_ag_news_runtime": 10.2211, "eval_ag_news_samples_per_second": 48.919, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3019860572832703, "eval_ag_news_token_set_f1_sem": 0.00422515325440724, "eval_ag_news_token_set_precision": 0.27553396990373064, "eval_ag_news_token_set_recall": 0.3536215365054163, "eval_ag_news_true_num_tokens": 56.09375, "step": 28750 }, { "epoch": 5.52, "eval_anthropic_toxic_prompts_accuracy": 0.09525, "eval_anthropic_toxic_prompts_bleu_score": 2.160806075862903, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.07906417799213382, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5775938034057617, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01143050980738191, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.673187017440796, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.756, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.144, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.326, "eval_anthropic_toxic_prompts_num_pred_words": 46.534, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 39.3772019231811, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.16762825525763436, "eval_anthropic_toxic_prompts_runtime": 9.6452, "eval_anthropic_toxic_prompts_samples_per_second": 51.84, "eval_anthropic_toxic_prompts_steps_per_second": 0.104, "eval_anthropic_toxic_prompts_token_set_f1": 0.2896135354073873, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005875932793963504, "eval_anthropic_toxic_prompts_token_set_precision": 0.32533301138436854, "eval_anthropic_toxic_prompts_token_set_recall": 0.2946068753251194, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 28750 }, { "epoch": 5.52, "eval_arxiv_accuracy": 0.3083125, "eval_arxiv_bleu_score": 3.268154741507437, "eval_arxiv_bleu_score_sem": 0.09972210165521798, "eval_arxiv_emb_cos_sim": 0.6481302976608276, "eval_arxiv_emb_cos_sim_sem": 0.007643326367639076, "eval_arxiv_emb_top1_equal": 0.1640625, "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8440616130828857, "eval_arxiv_n_ngrams_match_1": 11.992, "eval_arxiv_n_ngrams_match_2": 2.088, "eval_arxiv_n_ngrams_match_3": 0.396, "eval_arxiv_num_pred_words": 39.278, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 46.714827193965235, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.281924942172997, "eval_arxiv_runtime": 9.6802, "eval_arxiv_samples_per_second": 51.652, "eval_arxiv_steps_per_second": 0.103, "eval_arxiv_token_set_f1": 0.286338715088182, "eval_arxiv_token_set_f1_sem": 0.003952879797160271, "eval_arxiv_token_set_precision": 0.22698345522828195, "eval_arxiv_token_set_recall": 0.4138662418310259, "eval_arxiv_true_num_tokens": 64.0, "step": 28750 }, { "epoch": 5.52, "eval_python_code_alpaca_accuracy": 0.132, "eval_python_code_alpaca_bleu_score": 3.0746062585888287, "eval_python_code_alpaca_bleu_score_sem": 0.09891558940506559, "eval_python_code_alpaca_emb_cos_sim": 0.5892761945724487, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009927602580321528, "eval_python_code_alpaca_emb_top1_equal": 0.0546875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.020175758285348722, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.415736198425293, "eval_python_code_alpaca_n_ngrams_match_1": 6.894, "eval_python_code_alpaca_n_ngrams_match_2": 1.476, "eval_python_code_alpaca_n_ngrams_match_3": 0.412, "eval_python_code_alpaca_num_pred_words": 39.508, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 30.439350582671775, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.23782295399497383, "eval_python_code_alpaca_runtime": 10.2993, "eval_python_code_alpaca_samples_per_second": 48.547, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.36354813563273164, "eval_python_code_alpaca_token_set_f1_sem": 0.005754133766519511, "eval_python_code_alpaca_token_set_precision": 0.357568283734813, "eval_python_code_alpaca_token_set_recall": 0.4071109379454599, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 28750 }, { "epoch": 5.52, "eval_wikibio_accuracy": 0.2851875, "eval_wikibio_bleu_score": 5.14171844989698, "eval_wikibio_bleu_score_sem": 0.1790106479977739, "eval_wikibio_emb_cos_sim": 0.6940916776657104, "eval_wikibio_emb_cos_sim_sem": 0.009865538711711204, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.292008876800537, "eval_wikibio_n_ngrams_match_1": 9.63, "eval_wikibio_n_ngrams_match_2": 2.984, "eval_wikibio_n_ngrams_match_3": 1.006, "eval_wikibio_num_pred_words": 37.524, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 73.11319647528754, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3223504903817959, "eval_wikibio_runtime": 9.6048, "eval_wikibio_samples_per_second": 52.057, "eval_wikibio_steps_per_second": 0.104, "eval_wikibio_token_set_f1": 0.304034879370388, "eval_wikibio_token_set_f1_sem": 0.005097293072236239, "eval_wikibio_token_set_precision": 0.3097706740080534, "eval_wikibio_token_set_recall": 0.3143658159191039, "eval_wikibio_true_num_tokens": 61.1328125, "step": 28750 }, { "epoch": 5.52, "eval_nq_accuracy": 0.47884375, "eval_nq_bleu_score": 9.092205620452065, "eval_nq_bleu_score_sem": 0.3947123207494208, "eval_nq_emb_cos_sim": 0.7664515972137451, "eval_nq_emb_cos_sim_sem": 0.00981405831188867, "eval_nq_emb_top1_equal": 0.203125, "eval_nq_emb_top1_equal_sem": 0.03570055125142555, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5566017627716064, "eval_nq_n_ngrams_match_1": 20.568, "eval_nq_n_ngrams_match_2": 6.798, "eval_nq_n_ngrams_match_3": 2.828, "eval_nq_num_pred_words": 48.934, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.891932946541385, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3882656244447178, "eval_nq_runtime": 10.0125, "eval_nq_samples_per_second": 49.938, "eval_nq_steps_per_second": 0.1, "eval_nq_token_set_f1": 0.41441957291226117, "eval_nq_token_set_f1_sem": 0.004839709257070892, "eval_nq_token_set_precision": 0.3624421831091439, "eval_nq_token_set_recall": 0.4976973300610982, "eval_nq_true_num_tokens": 64.0, "step": 28750 }, { "epoch": 5.52, "learning_rate": 0.001, "loss": 2.9225, "step": 28752 }, { "epoch": 5.52, "learning_rate": 0.001, "loss": 2.9271, "step": 28764 }, { "epoch": 5.53, "learning_rate": 0.001, "loss": 2.9154, "step": 28776 }, { "epoch": 5.53, "learning_rate": 0.001, "loss": 2.925, "step": 28788 }, { "epoch": 5.53, "learning_rate": 0.001, "loss": 2.924, "step": 28800 }, { "epoch": 5.53, "learning_rate": 0.001, "loss": 2.9294, "step": 28812 }, { "epoch": 5.53, "learning_rate": 0.001, "loss": 2.9207, "step": 28824 }, { "epoch": 5.54, "learning_rate": 0.001, "loss": 2.9265, "step": 28836 }, { "epoch": 5.54, "learning_rate": 0.001, "loss": 2.93, "step": 28848 }, { "epoch": 5.54, "learning_rate": 0.001, "loss": 2.9272, "step": 28860 }, { "epoch": 5.54, "learning_rate": 0.001, "loss": 2.9112, "step": 28872 }, { "epoch": 5.55, "learning_rate": 0.001, "loss": 2.9346, "step": 28884 }, { "epoch": 5.55, "learning_rate": 0.001, "loss": 2.9242, "step": 28896 }, { "epoch": 5.55, "learning_rate": 0.001, "loss": 2.9212, "step": 28908 }, { "epoch": 5.55, "learning_rate": 0.001, "loss": 2.9067, "step": 28920 }, { "epoch": 5.56, "learning_rate": 0.001, "loss": 2.9176, "step": 28932 }, { "epoch": 5.56, "learning_rate": 0.001, "loss": 2.9117, "step": 28944 }, { "epoch": 5.56, "learning_rate": 0.001, "loss": 2.9186, "step": 28956 }, { "epoch": 5.56, "learning_rate": 0.001, "loss": 2.9141, "step": 28968 }, { "epoch": 5.56, "learning_rate": 0.001, "loss": 2.9255, "step": 28980 }, { "epoch": 5.57, "learning_rate": 0.001, "loss": 2.9271, "step": 28992 }, { "epoch": 5.57, "learning_rate": 0.001, "loss": 2.9274, "step": 29004 }, { "epoch": 5.57, "learning_rate": 0.001, "loss": 2.9314, "step": 29016 }, { "epoch": 5.57, "learning_rate": 0.001, "loss": 2.9287, "step": 29028 }, { "epoch": 5.58, "learning_rate": 0.001, "loss": 2.9224, "step": 29040 }, { "epoch": 5.58, "learning_rate": 0.001, "loss": 2.9253, "step": 29052 }, { "epoch": 5.58, "learning_rate": 0.001, "loss": 2.9302, "step": 29064 }, { "epoch": 5.58, "learning_rate": 0.001, "loss": 2.9212, "step": 29076 }, { "epoch": 5.59, "learning_rate": 0.001, "loss": 2.9161, "step": 29088 }, { "epoch": 5.59, "learning_rate": 0.001, "loss": 2.9241, "step": 29100 }, { "epoch": 5.59, "learning_rate": 0.001, "loss": 2.9111, "step": 29112 }, { "epoch": 5.59, "learning_rate": 0.001, "loss": 2.9269, "step": 29124 }, { "epoch": 5.59, "learning_rate": 0.001, "loss": 2.9249, "step": 29136 }, { "epoch": 5.6, "learning_rate": 0.001, "loss": 2.9227, "step": 29148 }, { "epoch": 5.6, "learning_rate": 0.001, "loss": 2.9246, "step": 29160 }, { "epoch": 5.6, "learning_rate": 0.001, "loss": 2.9241, "step": 29172 }, { "epoch": 5.6, "learning_rate": 0.001, "loss": 2.9187, "step": 29184 }, { "epoch": 5.61, "learning_rate": 0.001, "loss": 2.9121, "step": 29196 }, { "epoch": 5.61, "learning_rate": 0.001, "loss": 2.9212, "step": 29208 }, { "epoch": 5.61, "learning_rate": 0.001, "loss": 2.9234, "step": 29220 }, { "epoch": 5.61, "learning_rate": 0.001, "loss": 2.9263, "step": 29232 }, { "epoch": 5.62, "learning_rate": 0.001, "loss": 2.9193, "step": 29244 }, { "epoch": 5.62, "learning_rate": 0.001, "loss": 2.9246, "step": 29256 }, { "epoch": 5.62, "learning_rate": 0.001, "loss": 2.9231, "step": 29268 }, { "epoch": 5.62, "learning_rate": 0.001, "loss": 2.9165, "step": 29280 }, { "epoch": 5.62, "learning_rate": 0.001, "loss": 2.9112, "step": 29292 }, { "epoch": 5.63, "learning_rate": 0.001, "loss": 2.913, "step": 29304 }, { "epoch": 5.63, "learning_rate": 0.001, "loss": 2.9225, "step": 29316 }, { "epoch": 5.63, "learning_rate": 0.001, "loss": 2.9135, "step": 29328 }, { "epoch": 5.63, "learning_rate": 0.001, "loss": 2.9195, "step": 29340 }, { "epoch": 5.64, "learning_rate": 0.001, "loss": 2.919, "step": 29352 }, { "epoch": 5.64, "learning_rate": 0.001, "loss": 2.9097, "step": 29364 }, { "epoch": 5.64, "eval_ag_news_accuracy": 0.2815625, "eval_ag_news_bleu_score": 3.9579431453791933, "eval_ag_news_bleu_score_sem": 0.13389890050609876, "eval_ag_news_emb_cos_sim": 0.7305126190185547, "eval_ag_news_emb_cos_sim_sem": 0.010065528142073428, "eval_ag_news_emb_top1_equal": 0.1796875, "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.980177402496338, "eval_ag_news_n_ngrams_match_1": 11.824, "eval_ag_news_n_ngrams_match_2": 2.32, "eval_ag_news_n_ngrams_match_3": 0.654, "eval_ag_news_num_pred_words": 46.228, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 53.52652912514409, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2884148738540912, "eval_ag_news_runtime": 10.1631, "eval_ag_news_samples_per_second": 49.197, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3045808051396171, "eval_ag_news_token_set_f1_sem": 0.004275929817397152, "eval_ag_news_token_set_precision": 0.2761385247378778, "eval_ag_news_token_set_recall": 0.3577790649932207, "eval_ag_news_true_num_tokens": 56.09375, "step": 29375 }, { "epoch": 5.64, "eval_anthropic_toxic_prompts_accuracy": 0.0966875, "eval_anthropic_toxic_prompts_bleu_score": 2.3350248656909875, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09176338011601559, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5957716703414917, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010789873100204765, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.6640102863311768, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.94, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.266, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.392, "eval_anthropic_toxic_prompts_num_pred_words": 46.534, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 39.0175008933683, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1730207063385469, "eval_anthropic_toxic_prompts_runtime": 9.8831, "eval_anthropic_toxic_prompts_samples_per_second": 50.592, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3000346491804135, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006012415945277402, "eval_anthropic_toxic_prompts_token_set_precision": 0.3420610200801772, "eval_anthropic_toxic_prompts_token_set_recall": 0.30261754564838533, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 29375 }, { "epoch": 5.64, "eval_arxiv_accuracy": 0.3090625, "eval_arxiv_bleu_score": 3.3241359711659437, "eval_arxiv_bleu_score_sem": 0.09375160570800184, "eval_arxiv_emb_cos_sim": 0.6541392207145691, "eval_arxiv_emb_cos_sim_sem": 0.007641927619104121, "eval_arxiv_emb_top1_equal": 0.1953125, "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8257458209991455, "eval_arxiv_n_ngrams_match_1": 12.044, "eval_arxiv_n_ngrams_match_2": 2.128, "eval_arxiv_n_ngrams_match_3": 0.404, "eval_arxiv_num_pred_words": 39.116, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 45.86699618168979, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2852597968181938, "eval_arxiv_runtime": 22.9602, "eval_arxiv_samples_per_second": 21.777, "eval_arxiv_steps_per_second": 0.044, "eval_arxiv_token_set_f1": 0.2857373098850007, "eval_arxiv_token_set_f1_sem": 0.003933038607794231, "eval_arxiv_token_set_precision": 0.22609456688552154, "eval_arxiv_token_set_recall": 0.41236478347392447, "eval_arxiv_true_num_tokens": 64.0, "step": 29375 }, { "epoch": 5.64, "eval_python_code_alpaca_accuracy": 0.13390625, "eval_python_code_alpaca_bleu_score": 3.185916377392192, "eval_python_code_alpaca_bleu_score_sem": 0.11558272745725323, "eval_python_code_alpaca_emb_cos_sim": 0.5900557637214661, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011087127578762512, "eval_python_code_alpaca_emb_top1_equal": 0.0625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.3717613220214844, "eval_python_code_alpaca_n_ngrams_match_1": 6.822, "eval_python_code_alpaca_n_ngrams_match_2": 1.536, "eval_python_code_alpaca_n_ngrams_match_3": 0.42, "eval_python_code_alpaca_num_pred_words": 39.574, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 29.129788838855962, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.23834039010553257, "eval_python_code_alpaca_runtime": 12.6156, "eval_python_code_alpaca_samples_per_second": 39.633, "eval_python_code_alpaca_steps_per_second": 0.079, "eval_python_code_alpaca_token_set_f1": 0.36887512618713925, "eval_python_code_alpaca_token_set_f1_sem": 0.005777866137227637, "eval_python_code_alpaca_token_set_precision": 0.35829297488772005, "eval_python_code_alpaca_token_set_recall": 0.42257935245215933, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 29375 }, { "epoch": 5.64, "eval_wikibio_accuracy": 0.28859375, "eval_wikibio_bleu_score": 5.232629256479571, "eval_wikibio_bleu_score_sem": 0.19667584747879158, "eval_wikibio_emb_cos_sim": 0.69016432762146, "eval_wikibio_emb_cos_sim_sem": 0.01163198227997611, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.284221172332764, "eval_wikibio_n_ngrams_match_1": 9.578, "eval_wikibio_n_ngrams_match_2": 3.032, "eval_wikibio_n_ngrams_match_3": 1.038, "eval_wikibio_num_pred_words": 37.286, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 72.54602386129733, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3205587357177153, "eval_wikibio_runtime": 14.9378, "eval_wikibio_samples_per_second": 33.472, "eval_wikibio_steps_per_second": 0.067, "eval_wikibio_token_set_f1": 0.2997864642925253, "eval_wikibio_token_set_f1_sem": 0.005517867437531372, "eval_wikibio_token_set_precision": 0.30791202680347074, "eval_wikibio_token_set_recall": 0.3069112164459001, "eval_wikibio_true_num_tokens": 61.1328125, "step": 29375 }, { "epoch": 5.64, "eval_nq_accuracy": 0.47859375, "eval_nq_bleu_score": 9.07752341736059, "eval_nq_bleu_score_sem": 0.41199354295420804, "eval_nq_emb_cos_sim": 0.7646245956420898, "eval_nq_emb_cos_sim_sem": 0.009229337301795322, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5586891174316406, "eval_nq_n_ngrams_match_1": 20.232, "eval_nq_n_ngrams_match_2": 6.75, "eval_nq_n_ngrams_match_3": 2.904, "eval_nq_num_pred_words": 48.412, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.91887108780064, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3843453631454201, "eval_nq_runtime": 9.7747, "eval_nq_samples_per_second": 51.152, "eval_nq_steps_per_second": 0.102, "eval_nq_token_set_f1": 0.4079419735728884, "eval_nq_token_set_f1_sem": 0.004967891975882009, "eval_nq_token_set_precision": 0.35603870422141043, "eval_nq_token_set_recall": 0.49267351774513696, "eval_nq_true_num_tokens": 64.0, "step": 29375 }, { "epoch": 5.64, "learning_rate": 0.001, "loss": 2.9148, "step": 29376 }, { "epoch": 5.64, "learning_rate": 0.001, "loss": 2.9313, "step": 29388 }, { "epoch": 5.65, "learning_rate": 0.001, "loss": 2.9231, "step": 29400 }, { "epoch": 5.65, "learning_rate": 0.001, "loss": 2.9201, "step": 29412 }, { "epoch": 5.65, "learning_rate": 0.001, "loss": 2.9194, "step": 29424 }, { "epoch": 5.65, "learning_rate": 0.001, "loss": 2.9141, "step": 29436 }, { "epoch": 5.65, "learning_rate": 0.001, "loss": 2.9279, "step": 29448 }, { "epoch": 5.66, "learning_rate": 0.001, "loss": 2.9105, "step": 29460 }, { "epoch": 5.66, "learning_rate": 0.001, "loss": 2.9191, "step": 29472 }, { "epoch": 5.66, "learning_rate": 0.001, "loss": 2.9161, "step": 29484 }, { "epoch": 5.66, "learning_rate": 0.001, "loss": 2.9192, "step": 29496 }, { "epoch": 5.67, "learning_rate": 0.001, "loss": 2.915, "step": 29508 }, { "epoch": 5.67, "learning_rate": 0.001, "loss": 2.9162, "step": 29520 }, { "epoch": 5.67, "learning_rate": 0.001, "loss": 2.9178, "step": 29532 }, { "epoch": 5.67, "learning_rate": 0.001, "loss": 2.922, "step": 29544 }, { "epoch": 5.68, "learning_rate": 0.001, "loss": 2.9292, "step": 29556 }, { "epoch": 5.68, "learning_rate": 0.001, "loss": 2.9285, "step": 29568 }, { "epoch": 5.68, "learning_rate": 0.001, "loss": 2.9153, "step": 29580 }, { "epoch": 5.68, "learning_rate": 0.001, "loss": 2.9175, "step": 29592 }, { "epoch": 5.68, "learning_rate": 0.001, "loss": 2.9274, "step": 29604 }, { "epoch": 5.69, "learning_rate": 0.001, "loss": 2.92, "step": 29616 }, { "epoch": 5.69, "learning_rate": 0.001, "loss": 2.9218, "step": 29628 }, { "epoch": 5.69, "learning_rate": 0.001, "loss": 2.9184, "step": 29640 }, { "epoch": 5.69, "learning_rate": 0.001, "loss": 2.9218, "step": 29652 }, { "epoch": 5.7, "learning_rate": 0.001, "loss": 2.9128, "step": 29664 }, { "epoch": 5.7, "learning_rate": 0.001, "loss": 2.9165, "step": 29676 }, { "epoch": 5.7, "learning_rate": 0.001, "loss": 2.919, "step": 29688 }, { "epoch": 5.7, "learning_rate": 0.001, "loss": 2.9148, "step": 29700 }, { "epoch": 5.71, "learning_rate": 0.001, "loss": 2.9225, "step": 29712 }, { "epoch": 5.71, "learning_rate": 0.001, "loss": 2.9328, "step": 29724 }, { "epoch": 5.71, "learning_rate": 0.001, "loss": 2.9174, "step": 29736 }, { "epoch": 5.71, "learning_rate": 0.001, "loss": 2.9262, "step": 29748 }, { "epoch": 5.71, "learning_rate": 0.001, "loss": 2.91, "step": 29760 }, { "epoch": 5.72, "learning_rate": 0.001, "loss": 2.9191, "step": 29772 }, { "epoch": 5.72, "learning_rate": 0.001, "loss": 2.9118, "step": 29784 }, { "epoch": 5.72, "learning_rate": 0.001, "loss": 2.9152, "step": 29796 }, { "epoch": 5.72, "learning_rate": 0.001, "loss": 2.9168, "step": 29808 }, { "epoch": 5.73, "learning_rate": 0.001, "loss": 2.9185, "step": 29820 }, { "epoch": 5.73, "learning_rate": 0.001, "loss": 2.9227, "step": 29832 }, { "epoch": 5.73, "learning_rate": 0.001, "loss": 2.9149, "step": 29844 }, { "epoch": 5.73, "learning_rate": 0.001, "loss": 2.9134, "step": 29856 }, { "epoch": 5.74, "learning_rate": 0.001, "loss": 2.9189, "step": 29868 }, { "epoch": 5.74, "learning_rate": 0.001, "loss": 2.9203, "step": 29880 }, { "epoch": 5.74, "learning_rate": 0.001, "loss": 2.9142, "step": 29892 }, { "epoch": 5.74, "learning_rate": 0.001, "loss": 2.9146, "step": 29904 }, { "epoch": 5.74, "learning_rate": 0.001, "loss": 2.9058, "step": 29916 }, { "epoch": 5.75, "learning_rate": 0.001, "loss": 2.9221, "step": 29928 }, { "epoch": 5.75, "learning_rate": 0.001, "loss": 2.9152, "step": 29940 }, { "epoch": 5.75, "learning_rate": 0.001, "loss": 2.9109, "step": 29952 }, { "epoch": 5.75, "learning_rate": 0.001, "loss": 2.9112, "step": 29964 }, { "epoch": 5.76, "learning_rate": 0.001, "loss": 2.9201, "step": 29976 }, { "epoch": 5.76, "learning_rate": 0.001, "loss": 2.9164, "step": 29988 }, { "epoch": 5.76, "learning_rate": 0.001, "loss": 2.9188, "step": 30000 }, { "epoch": 5.76, "eval_ag_news_accuracy": 0.285875, "eval_ag_news_bleu_score": 3.850783328454288, "eval_ag_news_bleu_score_sem": 0.1372927895962863, "eval_ag_news_emb_cos_sim": 0.7276172637939453, "eval_ag_news_emb_cos_sim_sem": 0.009851425444360625, "eval_ag_news_emb_top1_equal": 0.15625, "eval_ag_news_emb_top1_equal_sem": 0.03221922156442571, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.966392755508423, "eval_ag_news_n_ngrams_match_1": 11.838, "eval_ag_news_n_ngrams_match_2": 2.302, "eval_ag_news_n_ngrams_match_3": 0.596, "eval_ag_news_num_pred_words": 46.148, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 52.79374699153521, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2835993467716257, "eval_ag_news_runtime": 15.8892, "eval_ag_news_samples_per_second": 31.468, "eval_ag_news_steps_per_second": 0.063, "eval_ag_news_token_set_f1": 0.30344832396101123, "eval_ag_news_token_set_f1_sem": 0.004250525713913587, "eval_ag_news_token_set_precision": 0.27299063247100686, "eval_ag_news_token_set_recall": 0.361079217786275, "eval_ag_news_true_num_tokens": 56.09375, "step": 30000 }, { "epoch": 5.76, "eval_anthropic_toxic_prompts_accuracy": 0.095625, "eval_anthropic_toxic_prompts_bleu_score": 2.2058590011185744, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08472565620356692, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.595095694065094, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010127160005486514, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.011004959004867984, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.6490745544433594, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.784, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.194, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.352, "eval_anthropic_toxic_prompts_num_pred_words": 47.61, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 38.439076311016336, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.16773877446726976, "eval_anthropic_toxic_prompts_runtime": 10.4948, "eval_anthropic_toxic_prompts_samples_per_second": 47.643, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.3010833694682913, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006084173852136998, "eval_anthropic_toxic_prompts_token_set_precision": 0.32939871473377075, "eval_anthropic_toxic_prompts_token_set_recall": 0.318107955362561, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 30000 }, { "epoch": 5.76, "eval_arxiv_accuracy": 0.310375, "eval_arxiv_bleu_score": 3.3752250293476336, "eval_arxiv_bleu_score_sem": 0.09475203164372994, "eval_arxiv_emb_cos_sim": 0.6450604200363159, "eval_arxiv_emb_cos_sim_sem": 0.008361617326108527, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.827465295791626, "eval_arxiv_n_ngrams_match_1": 12.068, "eval_arxiv_n_ngrams_match_2": 2.108, "eval_arxiv_n_ngrams_match_3": 0.416, "eval_arxiv_num_pred_words": 38.628, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 45.94593116934354, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2873292248246817, "eval_arxiv_runtime": 15.7135, "eval_arxiv_samples_per_second": 31.82, "eval_arxiv_steps_per_second": 0.064, "eval_arxiv_token_set_f1": 0.2873084298308554, "eval_arxiv_token_set_f1_sem": 0.0037490863754161387, "eval_arxiv_token_set_precision": 0.22817390153959008, "eval_arxiv_token_set_recall": 0.41772290533826184, "eval_arxiv_true_num_tokens": 64.0, "step": 30000 }, { "epoch": 5.76, "eval_python_code_alpaca_accuracy": 0.1338125, "eval_python_code_alpaca_bleu_score": 3.073501627024064, "eval_python_code_alpaca_bleu_score_sem": 0.1004260312872625, "eval_python_code_alpaca_emb_cos_sim": 0.6021091938018799, "eval_python_code_alpaca_emb_cos_sim_sem": 0.012731274822083471, "eval_python_code_alpaca_emb_top1_equal": 0.046875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.3815906047821045, "eval_python_code_alpaca_n_ngrams_match_1": 6.786, "eval_python_code_alpaca_n_ngrams_match_2": 1.546, "eval_python_code_alpaca_n_ngrams_match_3": 0.428, "eval_python_code_alpaca_num_pred_words": 38.752, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 29.417525576355107, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.23933823569072102, "eval_python_code_alpaca_runtime": 12.0021, "eval_python_code_alpaca_samples_per_second": 41.659, "eval_python_code_alpaca_steps_per_second": 0.083, "eval_python_code_alpaca_token_set_f1": 0.3658373813590061, "eval_python_code_alpaca_token_set_f1_sem": 0.0062407786743706685, "eval_python_code_alpaca_token_set_precision": 0.3516124488730524, "eval_python_code_alpaca_token_set_recall": 0.423038221603183, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 30000 }, { "epoch": 5.76, "eval_wikibio_accuracy": 0.29065625, "eval_wikibio_bleu_score": 5.053948308197059, "eval_wikibio_bleu_score_sem": 0.18603968757630915, "eval_wikibio_emb_cos_sim": 0.6840104460716248, "eval_wikibio_emb_cos_sim_sem": 0.01135399615517743, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.2231764793396, "eval_wikibio_n_ngrams_match_1": 9.4, "eval_wikibio_n_ngrams_match_2": 2.918, "eval_wikibio_n_ngrams_match_3": 0.982, "eval_wikibio_num_pred_words": 36.798, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 68.24993484001781, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3166582288314724, "eval_wikibio_runtime": 15.6901, "eval_wikibio_samples_per_second": 31.867, "eval_wikibio_steps_per_second": 0.064, "eval_wikibio_token_set_f1": 0.29972557652174886, "eval_wikibio_token_set_f1_sem": 0.005375044090514448, "eval_wikibio_token_set_precision": 0.3021887502153504, "eval_wikibio_token_set_recall": 0.3136608932700702, "eval_wikibio_true_num_tokens": 61.1328125, "step": 30000 }, { "epoch": 5.76, "eval_nq_accuracy": 0.48146875, "eval_nq_bleu_score": 8.78585342678472, "eval_nq_bleu_score_sem": 0.3673154230053644, "eval_nq_emb_cos_sim": 0.7678723335266113, "eval_nq_emb_cos_sim_sem": 0.010203222554703026, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5455660820007324, "eval_nq_n_ngrams_match_1": 20.458, "eval_nq_n_ngrams_match_2": 6.692, "eval_nq_n_ngrams_match_3": 2.742, "eval_nq_num_pred_words": 48.502, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.750443840277873, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3889194328154036, "eval_nq_runtime": 20.9512, "eval_nq_samples_per_second": 23.865, "eval_nq_steps_per_second": 0.048, "eval_nq_token_set_f1": 0.40984881096717857, "eval_nq_token_set_f1_sem": 0.004945079188842102, "eval_nq_token_set_precision": 0.3578919668831182, "eval_nq_token_set_recall": 0.49188674622319023, "eval_nq_true_num_tokens": 64.0, "step": 30000 }, { "epoch": 5.76, "learning_rate": 0.001, "loss": 2.9134, "step": 30012 }, { "epoch": 5.76, "learning_rate": 0.001, "loss": 2.9031, "step": 30024 }, { "epoch": 5.77, "learning_rate": 0.001, "loss": 2.9133, "step": 30036 }, { "epoch": 5.77, "learning_rate": 0.001, "loss": 2.8988, "step": 30048 }, { "epoch": 5.77, "learning_rate": 0.001, "loss": 2.9129, "step": 30060 }, { "epoch": 5.77, "learning_rate": 0.001, "loss": 2.9092, "step": 30072 }, { "epoch": 5.78, "learning_rate": 0.001, "loss": 2.9179, "step": 30084 }, { "epoch": 5.78, "learning_rate": 0.001, "loss": 2.9137, "step": 30096 }, { "epoch": 5.78, "learning_rate": 0.001, "loss": 2.9185, "step": 30108 }, { "epoch": 5.78, "learning_rate": 0.001, "loss": 2.9126, "step": 30120 }, { "epoch": 5.79, "learning_rate": 0.001, "loss": 2.9077, "step": 30132 }, { "epoch": 5.79, "learning_rate": 0.001, "loss": 2.9208, "step": 30144 }, { "epoch": 5.79, "learning_rate": 0.001, "loss": 2.9097, "step": 30156 }, { "epoch": 5.79, "learning_rate": 0.001, "loss": 2.9093, "step": 30168 }, { "epoch": 5.79, "learning_rate": 0.001, "loss": 2.9049, "step": 30180 }, { "epoch": 5.8, "learning_rate": 0.001, "loss": 2.9035, "step": 30192 }, { "epoch": 5.8, "learning_rate": 0.001, "loss": 2.904, "step": 30204 }, { "epoch": 5.8, "learning_rate": 0.001, "loss": 2.9195, "step": 30216 }, { "epoch": 5.8, "learning_rate": 0.001, "loss": 2.913, "step": 30228 }, { "epoch": 5.81, "learning_rate": 0.001, "loss": 2.9113, "step": 30240 }, { "epoch": 5.81, "learning_rate": 0.001, "loss": 2.913, "step": 30252 }, { "epoch": 5.81, "learning_rate": 0.001, "loss": 2.9228, "step": 30264 }, { "epoch": 5.81, "learning_rate": 0.001, "loss": 2.9073, "step": 30276 }, { "epoch": 5.82, "learning_rate": 0.001, "loss": 2.9143, "step": 30288 }, { "epoch": 5.82, "learning_rate": 0.001, "loss": 2.9178, "step": 30300 }, { "epoch": 5.82, "learning_rate": 0.001, "loss": 2.9136, "step": 30312 }, { "epoch": 5.82, "learning_rate": 0.001, "loss": 2.9236, "step": 30324 }, { "epoch": 5.82, "learning_rate": 0.001, "loss": 2.9106, "step": 30336 }, { "epoch": 5.83, "learning_rate": 0.001, "loss": 2.9091, "step": 30348 }, { "epoch": 5.83, "learning_rate": 0.001, "loss": 2.9061, "step": 30360 }, { "epoch": 5.83, "learning_rate": 0.001, "loss": 2.9258, "step": 30372 }, { "epoch": 5.83, "learning_rate": 0.001, "loss": 2.9205, "step": 30384 }, { "epoch": 5.84, "learning_rate": 0.001, "loss": 2.9164, "step": 30396 }, { "epoch": 5.84, "learning_rate": 0.001, "loss": 2.9256, "step": 30408 }, { "epoch": 5.84, "learning_rate": 0.001, "loss": 2.9129, "step": 30420 }, { "epoch": 5.84, "learning_rate": 0.001, "loss": 2.9132, "step": 30432 }, { "epoch": 5.85, "learning_rate": 0.001, "loss": 2.9119, "step": 30444 }, { "epoch": 5.85, "learning_rate": 0.001, "loss": 2.9142, "step": 30456 }, { "epoch": 5.85, "learning_rate": 0.001, "loss": 2.9021, "step": 30468 }, { "epoch": 5.85, "learning_rate": 0.001, "loss": 2.9111, "step": 30480 }, { "epoch": 5.85, "learning_rate": 0.001, "loss": 2.9121, "step": 30492 }, { "epoch": 5.86, "learning_rate": 0.001, "loss": 2.914, "step": 30504 }, { "epoch": 5.86, "learning_rate": 0.001, "loss": 2.9219, "step": 30516 }, { "epoch": 5.86, "learning_rate": 0.001, "loss": 2.9169, "step": 30528 }, { "epoch": 5.86, "learning_rate": 0.001, "loss": 2.9133, "step": 30540 }, { "epoch": 5.87, "learning_rate": 0.001, "loss": 2.9096, "step": 30552 }, { "epoch": 5.87, "learning_rate": 0.001, "loss": 2.9112, "step": 30564 }, { "epoch": 5.87, "learning_rate": 0.001, "loss": 2.9115, "step": 30576 }, { "epoch": 5.87, "learning_rate": 0.001, "loss": 2.9114, "step": 30588 }, { "epoch": 5.88, "learning_rate": 0.001, "loss": 2.9059, "step": 30600 }, { "epoch": 5.88, "learning_rate": 0.001, "loss": 2.909, "step": 30612 }, { "epoch": 5.88, "learning_rate": 0.001, "loss": 2.9161, "step": 30624 }, { "epoch": 5.88, "eval_ag_news_accuracy": 0.28559375, "eval_ag_news_bleu_score": 3.92340353353268, "eval_ag_news_bleu_score_sem": 0.13422552780712915, "eval_ag_news_emb_cos_sim": 0.7396842241287231, "eval_ag_news_emb_cos_sim_sem": 0.00864020997039906, "eval_ag_news_emb_top1_equal": 0.140625, "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.9616646766662598, "eval_ag_news_n_ngrams_match_1": 11.986, "eval_ag_news_n_ngrams_match_2": 2.32, "eval_ag_news_n_ngrams_match_3": 0.646, "eval_ag_news_num_pred_words": 45.83, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 52.544723159446725, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.29149405296365205, "eval_ag_news_runtime": 16.0074, "eval_ag_news_samples_per_second": 31.236, "eval_ag_news_steps_per_second": 0.062, "eval_ag_news_token_set_f1": 0.3067500767208229, "eval_ag_news_token_set_f1_sem": 0.004254976760109095, "eval_ag_news_token_set_precision": 0.27931568804659523, "eval_ag_news_token_set_recall": 0.35896683671270074, "eval_ag_news_true_num_tokens": 56.09375, "step": 30625 }, { "epoch": 5.88, "eval_anthropic_toxic_prompts_accuracy": 0.09690625, "eval_anthropic_toxic_prompts_bleu_score": 2.317182031116432, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08863940473307765, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5967903137207031, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010474420402713422, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.641948938369751, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.986, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.29, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.404, "eval_anthropic_toxic_prompts_num_pred_words": 47.492, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 38.16614775785699, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.17395441823891908, "eval_anthropic_toxic_prompts_runtime": 21.7978, "eval_anthropic_toxic_prompts_samples_per_second": 22.938, "eval_anthropic_toxic_prompts_steps_per_second": 0.046, "eval_anthropic_toxic_prompts_token_set_f1": 0.3030149576520227, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006170249046331457, "eval_anthropic_toxic_prompts_token_set_precision": 0.34521743389186765, "eval_anthropic_toxic_prompts_token_set_recall": 0.3060803273150024, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 30625 }, { "epoch": 5.88, "eval_arxiv_accuracy": 0.30934375, "eval_arxiv_bleu_score": 3.382515072277503, "eval_arxiv_bleu_score_sem": 0.09941382190161986, "eval_arxiv_emb_cos_sim": 0.6513580083847046, "eval_arxiv_emb_cos_sim_sem": 0.00900234952223933, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8381197452545166, "eval_arxiv_n_ngrams_match_1": 12.278, "eval_arxiv_n_ngrams_match_2": 2.202, "eval_arxiv_n_ngrams_match_3": 0.412, "eval_arxiv_num_pred_words": 39.182, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 46.438076886268775, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.2883738238974642, "eval_arxiv_runtime": 17.7678, "eval_arxiv_samples_per_second": 28.141, "eval_arxiv_steps_per_second": 0.056, "eval_arxiv_token_set_f1": 0.28930532566938244, "eval_arxiv_token_set_f1_sem": 0.0039930677042840125, "eval_arxiv_token_set_precision": 0.2311305139145135, "eval_arxiv_token_set_recall": 0.416346233017776, "eval_arxiv_true_num_tokens": 64.0, "step": 30625 }, { "epoch": 5.88, "eval_python_code_alpaca_accuracy": 0.13021875, "eval_python_code_alpaca_bleu_score": 3.0414240146457576, "eval_python_code_alpaca_bleu_score_sem": 0.0941243423484182, "eval_python_code_alpaca_emb_cos_sim": 0.6037212014198303, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01196555351251558, "eval_python_code_alpaca_emb_top1_equal": 0.046875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.404144048690796, "eval_python_code_alpaca_n_ngrams_match_1": 7.038, "eval_python_code_alpaca_n_ngrams_match_2": 1.604, "eval_python_code_alpaca_n_ngrams_match_3": 0.4, "eval_python_code_alpaca_num_pred_words": 40.216, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 30.088530381579464, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2401659567842701, "eval_python_code_alpaca_runtime": 15.2337, "eval_python_code_alpaca_samples_per_second": 32.822, "eval_python_code_alpaca_steps_per_second": 0.066, "eval_python_code_alpaca_token_set_f1": 0.36518958850939326, "eval_python_code_alpaca_token_set_f1_sem": 0.005729664256662593, "eval_python_code_alpaca_token_set_precision": 0.36651890213393223, "eval_python_code_alpaca_token_set_recall": 0.40220446678496957, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 30625 }, { "epoch": 5.88, "eval_wikibio_accuracy": 0.29025, "eval_wikibio_bleu_score": 5.304119152730881, "eval_wikibio_bleu_score_sem": 0.1965021409657196, "eval_wikibio_emb_cos_sim": 0.687311053276062, "eval_wikibio_emb_cos_sim_sem": 0.010802350542997512, "eval_wikibio_emb_top1_equal": 0.1171875, "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.245105743408203, "eval_wikibio_n_ngrams_match_1": 9.61, "eval_wikibio_n_ngrams_match_2": 3.176, "eval_wikibio_n_ngrams_match_3": 1.132, "eval_wikibio_num_pred_words": 37.752, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 69.76313674572921, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3214556022208892, "eval_wikibio_runtime": 18.8061, "eval_wikibio_samples_per_second": 26.587, "eval_wikibio_steps_per_second": 0.053, "eval_wikibio_token_set_f1": 0.29773290452739015, "eval_wikibio_token_set_f1_sem": 0.005731029723955615, "eval_wikibio_token_set_precision": 0.3073059385941136, "eval_wikibio_token_set_recall": 0.29928461317683497, "eval_wikibio_true_num_tokens": 61.1328125, "step": 30625 }, { "epoch": 5.88, "eval_nq_accuracy": 0.48153125, "eval_nq_bleu_score": 9.108636317570715, "eval_nq_bleu_score_sem": 0.41082468716068044, "eval_nq_emb_cos_sim": 0.77602618932724, "eval_nq_emb_cos_sim_sem": 0.008655139832939538, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5364458560943604, "eval_nq_n_ngrams_match_1": 20.486, "eval_nq_n_ngrams_match_2": 6.876, "eval_nq_n_ngrams_match_3": 2.902, "eval_nq_num_pred_words": 48.736, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.634685585148935, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3876309017159371, "eval_nq_runtime": 20.303, "eval_nq_samples_per_second": 24.627, "eval_nq_steps_per_second": 0.049, "eval_nq_token_set_f1": 0.4124639328938928, "eval_nq_token_set_f1_sem": 0.005092074732061953, "eval_nq_token_set_precision": 0.36078636749553916, "eval_nq_token_set_recall": 0.4959744441039201, "eval_nq_true_num_tokens": 64.0, "step": 30625 }, { "epoch": 5.88, "learning_rate": 0.001, "loss": 2.9095, "step": 30636 }, { "epoch": 5.88, "learning_rate": 0.001, "loss": 2.9053, "step": 30648 }, { "epoch": 5.89, "learning_rate": 0.001, "loss": 2.916, "step": 30660 }, { "epoch": 5.89, "learning_rate": 0.001, "loss": 2.9189, "step": 30672 }, { "epoch": 5.89, "learning_rate": 0.001, "loss": 2.9144, "step": 30684 }, { "epoch": 5.89, "learning_rate": 0.001, "loss": 2.9069, "step": 30696 }, { "epoch": 5.9, "learning_rate": 0.001, "loss": 2.9071, "step": 30708 }, { "epoch": 5.9, "learning_rate": 0.001, "loss": 2.9133, "step": 30720 }, { "epoch": 5.9, "learning_rate": 0.001, "loss": 2.8953, "step": 30732 }, { "epoch": 5.9, "learning_rate": 0.001, "loss": 2.9122, "step": 30744 }, { "epoch": 5.91, "learning_rate": 0.001, "loss": 2.9116, "step": 30756 }, { "epoch": 5.91, "learning_rate": 0.001, "loss": 2.9072, "step": 30768 }, { "epoch": 5.91, "learning_rate": 0.001, "loss": 2.913, "step": 30780 }, { "epoch": 5.91, "learning_rate": 0.001, "loss": 2.8922, "step": 30792 }, { "epoch": 5.91, "learning_rate": 0.001, "loss": 2.914, "step": 30804 }, { "epoch": 5.92, "learning_rate": 0.001, "loss": 2.9051, "step": 30816 }, { "epoch": 5.92, "learning_rate": 0.001, "loss": 2.9131, "step": 30828 }, { "epoch": 5.92, "learning_rate": 0.001, "loss": 2.9065, "step": 30840 }, { "epoch": 5.92, "learning_rate": 0.001, "loss": 2.8955, "step": 30852 }, { "epoch": 5.93, "learning_rate": 0.001, "loss": 2.9023, "step": 30864 }, { "epoch": 5.93, "learning_rate": 0.001, "loss": 2.9079, "step": 30876 }, { "epoch": 5.93, "learning_rate": 0.001, "loss": 2.9058, "step": 30888 }, { "epoch": 5.93, "learning_rate": 0.001, "loss": 2.8973, "step": 30900 }, { "epoch": 5.94, "learning_rate": 0.001, "loss": 2.9036, "step": 30912 }, { "epoch": 5.94, "learning_rate": 0.001, "loss": 2.9067, "step": 30924 }, { "epoch": 5.94, "learning_rate": 0.001, "loss": 2.9141, "step": 30936 }, { "epoch": 5.94, "learning_rate": 0.001, "loss": 2.9116, "step": 30948 }, { "epoch": 5.94, "learning_rate": 0.001, "loss": 2.8932, "step": 30960 }, { "epoch": 5.95, "learning_rate": 0.001, "loss": 2.8937, "step": 30972 }, { "epoch": 5.95, "learning_rate": 0.001, "loss": 2.8956, "step": 30984 }, { "epoch": 5.95, "learning_rate": 0.001, "loss": 2.911, "step": 30996 }, { "epoch": 5.95, "learning_rate": 0.001, "loss": 2.9071, "step": 31008 }, { "epoch": 5.96, "learning_rate": 0.001, "loss": 2.9092, "step": 31020 }, { "epoch": 5.96, "learning_rate": 0.001, "loss": 2.8976, "step": 31032 }, { "epoch": 5.96, "learning_rate": 0.001, "loss": 2.9127, "step": 31044 }, { "epoch": 5.96, "learning_rate": 0.001, "loss": 2.9024, "step": 31056 }, { "epoch": 5.97, "learning_rate": 0.001, "loss": 2.9064, "step": 31068 }, { "epoch": 5.97, "learning_rate": 0.001, "loss": 2.9061, "step": 31080 }, { "epoch": 5.97, "learning_rate": 0.001, "loss": 2.895, "step": 31092 }, { "epoch": 5.97, "learning_rate": 0.001, "loss": 2.9064, "step": 31104 }, { "epoch": 5.97, "learning_rate": 0.001, "loss": 2.9021, "step": 31116 }, { "epoch": 5.98, "learning_rate": 0.001, "loss": 2.9177, "step": 31128 }, { "epoch": 5.98, "learning_rate": 0.001, "loss": 2.9069, "step": 31140 }, { "epoch": 5.98, "learning_rate": 0.001, "loss": 2.9006, "step": 31152 }, { "epoch": 5.98, "learning_rate": 0.001, "loss": 2.9117, "step": 31164 }, { "epoch": 5.99, "learning_rate": 0.001, "loss": 2.9087, "step": 31176 }, { "epoch": 5.99, "learning_rate": 0.001, "loss": 2.9145, "step": 31188 }, { "epoch": 5.99, "learning_rate": 0.001, "loss": 2.9111, "step": 31200 }, { "epoch": 5.99, "learning_rate": 0.001, "loss": 2.9092, "step": 31212 }, { "epoch": 6.0, "learning_rate": 0.001, "loss": 2.9017, "step": 31224 }, { "epoch": 6.0, "learning_rate": 0.001, "loss": 2.902, "step": 31236 }, { "epoch": 6.0, "learning_rate": 0.001, "loss": 2.8876, "step": 31248 }, { "epoch": 6.0, "eval_ag_news_accuracy": 0.287375, "eval_ag_news_bleu_score": 4.015214203939312, "eval_ag_news_bleu_score_sem": 0.1424946105711094, "eval_ag_news_emb_cos_sim": 0.7467055916786194, "eval_ag_news_emb_cos_sim_sem": 0.00897354346837115, "eval_ag_news_emb_top1_equal": 0.1796875, "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.961060047149658, "eval_ag_news_n_ngrams_match_1": 12.14, "eval_ag_news_n_ngrams_match_2": 2.428, "eval_ag_news_n_ngrams_match_3": 0.664, "eval_ag_news_num_pred_words": 46.146, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 52.51296267151468, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.291699403650167, "eval_ag_news_runtime": 26.2348, "eval_ag_news_samples_per_second": 19.059, "eval_ag_news_steps_per_second": 0.038, "eval_ag_news_token_set_f1": 0.308974160282909, "eval_ag_news_token_set_f1_sem": 0.004320376120830538, "eval_ag_news_token_set_precision": 0.2823875616511164, "eval_ag_news_token_set_recall": 0.35966361874241987, "eval_ag_news_true_num_tokens": 56.09375, "step": 31250 }, { "epoch": 6.0, "eval_anthropic_toxic_prompts_accuracy": 0.096, "eval_anthropic_toxic_prompts_bleu_score": 2.3739915493739954, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10047672394127616, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6032194495201111, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009325521678453247, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.6662943363189697, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.04, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.306, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.43, "eval_anthropic_toxic_prompts_num_pred_words": 47.864, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 39.106720668232924, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.17207862529830276, "eval_anthropic_toxic_prompts_runtime": 15.1592, "eval_anthropic_toxic_prompts_samples_per_second": 32.983, "eval_anthropic_toxic_prompts_steps_per_second": 0.066, "eval_anthropic_toxic_prompts_token_set_f1": 0.31302152092834934, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006202876001572878, "eval_anthropic_toxic_prompts_token_set_precision": 0.3572100630860854, "eval_anthropic_toxic_prompts_token_set_recall": 0.3188142744818477, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 31250 }, { "epoch": 6.0, "eval_arxiv_accuracy": 0.31265625, "eval_arxiv_bleu_score": 3.3270061688436345, "eval_arxiv_bleu_score_sem": 0.09410719973132245, "eval_arxiv_emb_cos_sim": 0.6667373180389404, "eval_arxiv_emb_cos_sim_sem": 0.007759442910920113, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8215150833129883, "eval_arxiv_n_ngrams_match_1": 12.286, "eval_arxiv_n_ngrams_match_2": 2.14, "eval_arxiv_n_ngrams_match_3": 0.404, "eval_arxiv_num_pred_words": 39.652, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 45.673354864037535, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.28890175618743996, "eval_arxiv_runtime": 13.9317, "eval_arxiv_samples_per_second": 35.889, "eval_arxiv_steps_per_second": 0.072, "eval_arxiv_token_set_f1": 0.29112317015796885, "eval_arxiv_token_set_f1_sem": 0.00383495199730816, "eval_arxiv_token_set_precision": 0.23451290562972754, "eval_arxiv_token_set_recall": 0.410144685003836, "eval_arxiv_true_num_tokens": 64.0, "step": 31250 }, { "epoch": 6.0, "eval_python_code_alpaca_accuracy": 0.13575, "eval_python_code_alpaca_bleu_score": 3.12962272204674, "eval_python_code_alpaca_bleu_score_sem": 0.09627211251719704, "eval_python_code_alpaca_emb_cos_sim": 0.629444420337677, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009285553953719267, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.366333484649658, "eval_python_code_alpaca_n_ngrams_match_1": 7.464, "eval_python_code_alpaca_n_ngrams_match_2": 1.708, "eval_python_code_alpaca_n_ngrams_match_3": 0.394, "eval_python_code_alpaca_num_pred_words": 40.52, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 28.972105409499672, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.25402462240221274, "eval_python_code_alpaca_runtime": 12.9798, "eval_python_code_alpaca_samples_per_second": 38.521, "eval_python_code_alpaca_steps_per_second": 0.077, "eval_python_code_alpaca_token_set_f1": 0.3870647442475046, "eval_python_code_alpaca_token_set_f1_sem": 0.005686533325171371, "eval_python_code_alpaca_token_set_precision": 0.3932321531850331, "eval_python_code_alpaca_token_set_recall": 0.41167386461257605, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 31250 }, { "epoch": 6.0, "eval_wikibio_accuracy": 0.28753125, "eval_wikibio_bleu_score": 5.232131776967655, "eval_wikibio_bleu_score_sem": 0.1854721115056108, "eval_wikibio_emb_cos_sim": 0.7211657166481018, "eval_wikibio_emb_cos_sim_sem": 0.009413412345577635, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.320174694061279, "eval_wikibio_n_ngrams_match_1": 10.026, "eval_wikibio_n_ngrams_match_2": 3.172, "eval_wikibio_n_ngrams_match_3": 1.074, "eval_wikibio_num_pred_words": 39.128, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 75.20176444623209, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33436423551524785, "eval_wikibio_runtime": 17.6294, "eval_wikibio_samples_per_second": 28.362, "eval_wikibio_steps_per_second": 0.057, "eval_wikibio_token_set_f1": 0.3076759294899552, "eval_wikibio_token_set_f1_sem": 0.004869346963903943, "eval_wikibio_token_set_precision": 0.321417208075223, "eval_wikibio_token_set_recall": 0.30604146929154535, "eval_wikibio_true_num_tokens": 61.1328125, "step": 31250 }, { "epoch": 6.0, "eval_nq_accuracy": 0.480625, "eval_nq_bleu_score": 9.321623981279211, "eval_nq_bleu_score_sem": 0.4011978620674311, "eval_nq_emb_cos_sim": 0.7748517990112305, "eval_nq_emb_cos_sim_sem": 0.009179478897095951, "eval_nq_emb_top1_equal": 0.1796875, "eval_nq_emb_top1_equal_sem": 0.034068008879424266, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5365450382232666, "eval_nq_n_ngrams_match_1": 20.56, "eval_nq_n_ngrams_match_2": 6.96, "eval_nq_n_ngrams_match_3": 2.94, "eval_nq_num_pred_words": 49.014, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.635938782309685, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3921534682185354, "eval_nq_runtime": 25.3882, "eval_nq_samples_per_second": 19.694, "eval_nq_steps_per_second": 0.039, "eval_nq_token_set_f1": 0.4133887882062554, "eval_nq_token_set_f1_sem": 0.004906133716717383, "eval_nq_token_set_precision": 0.3629658953597597, "eval_nq_token_set_recall": 0.49369864023304144, "eval_nq_true_num_tokens": 64.0, "step": 31250 }, { "epoch": 6.0, "learning_rate": 0.001, "loss": 2.8757, "step": 31260 }, { "epoch": 6.0, "learning_rate": 0.001, "loss": 2.8896, "step": 31272 }, { "epoch": 6.01, "learning_rate": 0.001, "loss": 2.8965, "step": 31284 }, { "epoch": 6.01, "learning_rate": 0.001, "loss": 2.8779, "step": 31296 }, { "epoch": 6.01, "learning_rate": 0.001, "loss": 2.8833, "step": 31308 }, { "epoch": 6.01, "learning_rate": 0.001, "loss": 2.8868, "step": 31320 }, { "epoch": 6.02, "learning_rate": 0.001, "loss": 2.8871, "step": 31332 }, { "epoch": 6.02, "learning_rate": 0.001, "loss": 2.8783, "step": 31344 }, { "epoch": 6.02, "learning_rate": 0.001, "loss": 2.8899, "step": 31356 }, { "epoch": 6.02, "learning_rate": 0.001, "loss": 2.886, "step": 31368 }, { "epoch": 6.03, "learning_rate": 0.001, "loss": 2.8927, "step": 31380 }, { "epoch": 6.03, "learning_rate": 0.001, "loss": 2.8869, "step": 31392 }, { "epoch": 6.03, "learning_rate": 0.001, "loss": 2.8895, "step": 31404 }, { "epoch": 6.03, "learning_rate": 0.001, "loss": 2.8845, "step": 31416 }, { "epoch": 6.03, "learning_rate": 0.001, "loss": 2.8887, "step": 31428 }, { "epoch": 6.04, "learning_rate": 0.001, "loss": 2.8739, "step": 31440 }, { "epoch": 6.04, "learning_rate": 0.001, "loss": 2.8859, "step": 31452 }, { "epoch": 6.04, "learning_rate": 0.001, "loss": 2.8845, "step": 31464 }, { "epoch": 6.04, "learning_rate": 0.001, "loss": 2.8819, "step": 31476 }, { "epoch": 6.05, "learning_rate": 0.001, "loss": 2.8898, "step": 31488 }, { "epoch": 6.05, "learning_rate": 0.001, "loss": 2.8784, "step": 31500 }, { "epoch": 6.05, "learning_rate": 0.001, "loss": 2.8811, "step": 31512 }, { "epoch": 6.05, "learning_rate": 0.001, "loss": 2.8754, "step": 31524 }, { "epoch": 6.06, "learning_rate": 0.001, "loss": 2.8832, "step": 31536 }, { "epoch": 6.06, "learning_rate": 0.001, "loss": 2.8815, "step": 31548 }, { "epoch": 6.06, "learning_rate": 0.001, "loss": 2.8804, "step": 31560 }, { "epoch": 6.06, "learning_rate": 0.001, "loss": 2.8885, "step": 31572 }, { "epoch": 6.06, "learning_rate": 0.001, "loss": 2.8886, "step": 31584 }, { "epoch": 6.07, "learning_rate": 0.001, "loss": 2.8744, "step": 31596 }, { "epoch": 6.07, "learning_rate": 0.001, "loss": 2.8763, "step": 31608 }, { "epoch": 6.07, "learning_rate": 0.001, "loss": 2.8784, "step": 31620 }, { "epoch": 6.07, "learning_rate": 0.001, "loss": 2.8879, "step": 31632 }, { "epoch": 6.08, "learning_rate": 0.001, "loss": 2.8826, "step": 31644 }, { "epoch": 6.08, "learning_rate": 0.001, "loss": 2.8887, "step": 31656 }, { "epoch": 6.08, "learning_rate": 0.001, "loss": 2.8835, "step": 31668 }, { "epoch": 6.08, "learning_rate": 0.001, "loss": 2.8862, "step": 31680 }, { "epoch": 6.09, "learning_rate": 0.001, "loss": 2.8757, "step": 31692 }, { "epoch": 6.09, "learning_rate": 0.001, "loss": 2.8834, "step": 31704 }, { "epoch": 6.09, "learning_rate": 0.001, "loss": 2.8781, "step": 31716 }, { "epoch": 6.09, "learning_rate": 0.001, "loss": 2.8809, "step": 31728 }, { "epoch": 6.09, "learning_rate": 0.001, "loss": 2.8739, "step": 31740 }, { "epoch": 6.1, "learning_rate": 0.001, "loss": 2.8733, "step": 31752 }, { "epoch": 6.1, "learning_rate": 0.001, "loss": 2.8898, "step": 31764 }, { "epoch": 6.1, "learning_rate": 0.001, "loss": 2.8733, "step": 31776 }, { "epoch": 6.1, "learning_rate": 0.001, "loss": 2.8903, "step": 31788 }, { "epoch": 6.11, "learning_rate": 0.001, "loss": 2.8926, "step": 31800 }, { "epoch": 6.11, "learning_rate": 0.001, "loss": 2.8678, "step": 31812 }, { "epoch": 6.11, "learning_rate": 0.001, "loss": 2.8809, "step": 31824 }, { "epoch": 6.11, "learning_rate": 0.001, "loss": 2.8914, "step": 31836 }, { "epoch": 6.12, "learning_rate": 0.001, "loss": 2.8796, "step": 31848 }, { "epoch": 6.12, "learning_rate": 0.001, "loss": 2.88, "step": 31860 }, { "epoch": 6.12, "learning_rate": 0.001, "loss": 2.8807, "step": 31872 }, { "epoch": 6.12, "eval_ag_news_accuracy": 0.28703125, "eval_ag_news_bleu_score": 3.9932676838373107, "eval_ag_news_bleu_score_sem": 0.13873922496043958, "eval_ag_news_emb_cos_sim": 0.7425938844680786, "eval_ag_news_emb_cos_sim_sem": 0.009404906584712531, "eval_ag_news_emb_top1_equal": 0.171875, "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.9517323970794678, "eval_ag_news_n_ngrams_match_1": 12.048, "eval_ag_news_n_ngrams_match_2": 2.434, "eval_ag_news_n_ngrams_match_3": 0.642, "eval_ag_news_num_pred_words": 45.244, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 52.02541749188316, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.29268099682497567, "eval_ag_news_runtime": 15.554, "eval_ag_news_samples_per_second": 32.146, "eval_ag_news_steps_per_second": 0.064, "eval_ag_news_token_set_f1": 0.3065164175911375, "eval_ag_news_token_set_f1_sem": 0.004548383426737349, "eval_ag_news_token_set_precision": 0.28022749987454626, "eval_ag_news_token_set_recall": 0.35378235643225114, "eval_ag_news_true_num_tokens": 56.09375, "step": 31875 }, { "epoch": 6.12, "eval_anthropic_toxic_prompts_accuracy": 0.09775, "eval_anthropic_toxic_prompts_bleu_score": 2.3144127335439033, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.08886186451074533, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5984156131744385, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010762831945617978, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0390625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.017191973462108996, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.5901401042938232, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.006, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.284, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.398, "eval_anthropic_toxic_prompts_num_pred_words": 47.342, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 36.23915283173629, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1761338177152958, "eval_anthropic_toxic_prompts_runtime": 9.8608, "eval_anthropic_toxic_prompts_samples_per_second": 50.706, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3076996036298104, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006039570351417131, "eval_anthropic_toxic_prompts_token_set_precision": 0.35492655821637065, "eval_anthropic_toxic_prompts_token_set_recall": 0.3071782174691777, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 31875 }, { "epoch": 6.12, "eval_arxiv_accuracy": 0.31171875, "eval_arxiv_bleu_score": 3.6209685211712053, "eval_arxiv_bleu_score_sem": 0.10203255348629121, "eval_arxiv_emb_cos_sim": 0.6666562557220459, "eval_arxiv_emb_cos_sim_sem": 0.007851389553669168, "eval_arxiv_emb_top1_equal": 0.15625, "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8036117553710938, "eval_arxiv_n_ngrams_match_1": 12.728, "eval_arxiv_n_ngrams_match_2": 2.322, "eval_arxiv_n_ngrams_match_3": 0.48, "eval_arxiv_num_pred_words": 39.948, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 44.86292614630713, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.29874082080978565, "eval_arxiv_runtime": 11.4394, "eval_arxiv_samples_per_second": 43.709, "eval_arxiv_steps_per_second": 0.087, "eval_arxiv_token_set_f1": 0.3005493610398855, "eval_arxiv_token_set_f1_sem": 0.004011125521915269, "eval_arxiv_token_set_precision": 0.23983913848504776, "eval_arxiv_token_set_recall": 0.42529561998233933, "eval_arxiv_true_num_tokens": 64.0, "step": 31875 }, { "epoch": 6.12, "eval_python_code_alpaca_accuracy": 0.13559375, "eval_python_code_alpaca_bleu_score": 3.2798934394768655, "eval_python_code_alpaca_bleu_score_sem": 0.11962363733603859, "eval_python_code_alpaca_emb_cos_sim": 0.6181508302688599, "eval_python_code_alpaca_emb_cos_sim_sem": 0.012074139599347836, "eval_python_code_alpaca_emb_top1_equal": 0.046875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.3517544269561768, "eval_python_code_alpaca_n_ngrams_match_1": 7.464, "eval_python_code_alpaca_n_ngrams_match_2": 1.746, "eval_python_code_alpaca_n_ngrams_match_3": 0.496, "eval_python_code_alpaca_num_pred_words": 40.594, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 28.552783499566868, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.25366845011818695, "eval_python_code_alpaca_runtime": 27.361, "eval_python_code_alpaca_samples_per_second": 18.274, "eval_python_code_alpaca_steps_per_second": 0.037, "eval_python_code_alpaca_token_set_f1": 0.382675939671801, "eval_python_code_alpaca_token_set_f1_sem": 0.00593786920103333, "eval_python_code_alpaca_token_set_precision": 0.38861998928986086, "eval_python_code_alpaca_token_set_recall": 0.4088522752550497, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 31875 }, { "epoch": 6.12, "eval_wikibio_accuracy": 0.287, "eval_wikibio_bleu_score": 5.426647334274031, "eval_wikibio_bleu_score_sem": 0.20950147700261437, "eval_wikibio_emb_cos_sim": 0.6828176975250244, "eval_wikibio_emb_cos_sim_sem": 0.01142746733175503, "eval_wikibio_emb_top1_equal": 0.109375, "eval_wikibio_emb_top1_equal_sem": 0.027695207821224692, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.277820110321045, "eval_wikibio_n_ngrams_match_1": 9.624, "eval_wikibio_n_ngrams_match_2": 3.098, "eval_wikibio_n_ngrams_match_3": 1.1, "eval_wikibio_num_pred_words": 37.146, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 72.08313533345931, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.323363437138928, "eval_wikibio_runtime": 18.8164, "eval_wikibio_samples_per_second": 26.573, "eval_wikibio_steps_per_second": 0.053, "eval_wikibio_token_set_f1": 0.301290889736359, "eval_wikibio_token_set_f1_sem": 0.005463492256677466, "eval_wikibio_token_set_precision": 0.30933469186489587, "eval_wikibio_token_set_recall": 0.31016110244566325, "eval_wikibio_true_num_tokens": 61.1328125, "step": 31875 }, { "epoch": 6.12, "eval_nq_accuracy": 0.484375, "eval_nq_bleu_score": 9.371793599953639, "eval_nq_bleu_score_sem": 0.41320039465612174, "eval_nq_emb_cos_sim": 0.7752724885940552, "eval_nq_emb_cos_sim_sem": 0.008654330481739227, "eval_nq_emb_top1_equal": 0.203125, "eval_nq_emb_top1_equal_sem": 0.03570055125142555, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5239036083221436, "eval_nq_n_ngrams_match_1": 20.84, "eval_nq_n_ngrams_match_2": 6.944, "eval_nq_n_ngrams_match_3": 2.992, "eval_nq_num_pred_words": 48.744, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.477207854742506, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3976274855456138, "eval_nq_runtime": 16.8623, "eval_nq_samples_per_second": 29.652, "eval_nq_steps_per_second": 0.059, "eval_nq_token_set_f1": 0.4178742295278702, "eval_nq_token_set_f1_sem": 0.005004144786754618, "eval_nq_token_set_precision": 0.3687919901897754, "eval_nq_token_set_recall": 0.49331111295269586, "eval_nq_true_num_tokens": 64.0, "step": 31875 }, { "epoch": 6.12, "learning_rate": 0.001, "loss": 2.8861, "step": 31884 }, { "epoch": 6.12, "learning_rate": 0.001, "loss": 2.8842, "step": 31896 }, { "epoch": 6.13, "learning_rate": 0.001, "loss": 2.8994, "step": 31908 }, { "epoch": 6.13, "learning_rate": 0.001, "loss": 2.8784, "step": 31920 }, { "epoch": 6.13, "learning_rate": 0.001, "loss": 2.8736, "step": 31932 }, { "epoch": 6.13, "learning_rate": 0.001, "loss": 2.8889, "step": 31944 }, { "epoch": 6.14, "learning_rate": 0.001, "loss": 2.8885, "step": 31956 }, { "epoch": 6.14, "learning_rate": 0.001, "loss": 2.8855, "step": 31968 }, { "epoch": 6.14, "learning_rate": 0.001, "loss": 2.8923, "step": 31980 }, { "epoch": 6.14, "learning_rate": 0.001, "loss": 2.885, "step": 31992 }, { "epoch": 6.15, "learning_rate": 0.001, "loss": 2.8806, "step": 32004 }, { "epoch": 6.15, "learning_rate": 0.001, "loss": 2.8674, "step": 32016 }, { "epoch": 6.15, "learning_rate": 0.001, "loss": 2.8798, "step": 32028 }, { "epoch": 6.15, "learning_rate": 0.001, "loss": 2.8716, "step": 32040 }, { "epoch": 6.15, "learning_rate": 0.001, "loss": 2.894, "step": 32052 }, { "epoch": 6.16, "learning_rate": 0.001, "loss": 2.8951, "step": 32064 }, { "epoch": 6.16, "learning_rate": 0.001, "loss": 2.8926, "step": 32076 }, { "epoch": 6.16, "learning_rate": 0.001, "loss": 2.8789, "step": 32088 }, { "epoch": 6.16, "learning_rate": 0.001, "loss": 2.8742, "step": 32100 }, { "epoch": 6.17, "learning_rate": 0.001, "loss": 2.8793, "step": 32112 }, { "epoch": 6.17, "learning_rate": 0.001, "loss": 2.8785, "step": 32124 }, { "epoch": 6.17, "learning_rate": 0.001, "loss": 2.8787, "step": 32136 }, { "epoch": 6.17, "learning_rate": 0.001, "loss": 2.8778, "step": 32148 }, { "epoch": 6.18, "learning_rate": 0.001, "loss": 2.8796, "step": 32160 }, { "epoch": 6.18, "learning_rate": 0.001, "loss": 2.8815, "step": 32172 }, { "epoch": 6.18, "learning_rate": 0.001, "loss": 2.8788, "step": 32184 }, { "epoch": 6.18, "learning_rate": 0.001, "loss": 2.8923, "step": 32196 }, { "epoch": 6.18, "learning_rate": 0.001, "loss": 2.8701, "step": 32208 }, { "epoch": 6.19, "learning_rate": 0.001, "loss": 2.8825, "step": 32220 }, { "epoch": 6.19, "learning_rate": 0.001, "loss": 2.8752, "step": 32232 }, { "epoch": 6.19, "learning_rate": 0.001, "loss": 2.8884, "step": 32244 }, { "epoch": 6.19, "learning_rate": 0.001, "loss": 2.8802, "step": 32256 }, { "epoch": 6.2, "learning_rate": 0.001, "loss": 2.8843, "step": 32268 }, { "epoch": 6.2, "learning_rate": 0.001, "loss": 2.8851, "step": 32280 }, { "epoch": 6.2, "learning_rate": 0.001, "loss": 2.8824, "step": 32292 }, { "epoch": 6.2, "learning_rate": 0.001, "loss": 2.8841, "step": 32304 }, { "epoch": 6.21, "learning_rate": 0.001, "loss": 2.8817, "step": 32316 }, { "epoch": 6.21, "learning_rate": 0.001, "loss": 2.8835, "step": 32328 }, { "epoch": 6.21, "learning_rate": 0.001, "loss": 2.8795, "step": 32340 }, { "epoch": 6.21, "learning_rate": 0.001, "loss": 2.8823, "step": 32352 }, { "epoch": 6.21, "learning_rate": 0.001, "loss": 2.8797, "step": 32364 }, { "epoch": 6.22, "learning_rate": 0.001, "loss": 2.8828, "step": 32376 }, { "epoch": 6.22, "learning_rate": 0.001, "loss": 2.865, "step": 32388 }, { "epoch": 6.22, "learning_rate": 0.001, "loss": 2.8777, "step": 32400 }, { "epoch": 6.22, "learning_rate": 0.001, "loss": 2.8828, "step": 32412 }, { "epoch": 6.23, "learning_rate": 0.001, "loss": 2.8739, "step": 32424 }, { "epoch": 6.23, "learning_rate": 0.001, "loss": 2.8788, "step": 32436 }, { "epoch": 6.23, "learning_rate": 0.001, "loss": 2.8715, "step": 32448 }, { "epoch": 6.23, "learning_rate": 0.001, "loss": 2.8719, "step": 32460 }, { "epoch": 6.24, "learning_rate": 0.001, "loss": 2.8832, "step": 32472 }, { "epoch": 6.24, "learning_rate": 0.001, "loss": 2.8856, "step": 32484 }, { "epoch": 6.24, "learning_rate": 0.001, "loss": 2.8796, "step": 32496 }, { "epoch": 6.24, "eval_ag_news_accuracy": 0.28634375, "eval_ag_news_bleu_score": 3.959102074807527, "eval_ag_news_bleu_score_sem": 0.14156623777629107, "eval_ag_news_emb_cos_sim": 0.7416963577270508, "eval_ag_news_emb_cos_sim_sem": 0.00971437969807139, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.943134069442749, "eval_ag_news_n_ngrams_match_1": 12.116, "eval_ag_news_n_ngrams_match_2": 2.384, "eval_ag_news_n_ngrams_match_3": 0.62, "eval_ag_news_num_pred_words": 45.854, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 51.580003558481124, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2956122570199592, "eval_ag_news_runtime": 13.9485, "eval_ag_news_samples_per_second": 35.846, "eval_ag_news_steps_per_second": 0.072, "eval_ag_news_token_set_f1": 0.3102278457633787, "eval_ag_news_token_set_f1_sem": 0.0042975261292153245, "eval_ag_news_token_set_precision": 0.28516797454872583, "eval_ag_news_token_set_recall": 0.3568302725876157, "eval_ag_news_true_num_tokens": 56.09375, "step": 32500 }, { "epoch": 6.24, "eval_anthropic_toxic_prompts_accuracy": 0.097375, "eval_anthropic_toxic_prompts_bleu_score": 2.346702232100953, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10244771733148358, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.5916671752929688, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010358631156052853, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.61450457572937, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 4.842, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.244, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.396, "eval_anthropic_toxic_prompts_num_pred_words": 46.092, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 37.13294480574547, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.17110643622776062, "eval_anthropic_toxic_prompts_runtime": 14.0638, "eval_anthropic_toxic_prompts_samples_per_second": 35.552, "eval_anthropic_toxic_prompts_steps_per_second": 0.071, "eval_anthropic_toxic_prompts_token_set_f1": 0.2987623579083474, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006121050598138175, "eval_anthropic_toxic_prompts_token_set_precision": 0.33639435889346403, "eval_anthropic_toxic_prompts_token_set_recall": 0.304703249539111, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 32500 }, { "epoch": 6.24, "eval_arxiv_accuracy": 0.31234375, "eval_arxiv_bleu_score": 3.6087078264334114, "eval_arxiv_bleu_score_sem": 0.10859388044335394, "eval_arxiv_emb_cos_sim": 0.6700974106788635, "eval_arxiv_emb_cos_sim_sem": 0.008436426006581841, "eval_arxiv_emb_top1_equal": 0.1640625, "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.803508758544922, "eval_arxiv_n_ngrams_match_1": 12.746, "eval_arxiv_n_ngrams_match_2": 2.37, "eval_arxiv_n_ngrams_match_3": 0.5, "eval_arxiv_num_pred_words": 39.796, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 44.858305645253836, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.29815410898570505, "eval_arxiv_runtime": 16.1807, "eval_arxiv_samples_per_second": 30.901, "eval_arxiv_steps_per_second": 0.062, "eval_arxiv_token_set_f1": 0.2996501984122197, "eval_arxiv_token_set_f1_sem": 0.004016272096315681, "eval_arxiv_token_set_precision": 0.24099347320557093, "eval_arxiv_token_set_recall": 0.4225381584185373, "eval_arxiv_true_num_tokens": 64.0, "step": 32500 }, { "epoch": 6.24, "eval_python_code_alpaca_accuracy": 0.13284375, "eval_python_code_alpaca_bleu_score": 3.3482002984629546, "eval_python_code_alpaca_bleu_score_sem": 0.10628220002759486, "eval_python_code_alpaca_emb_cos_sim": 0.6326918601989746, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010186780015427714, "eval_python_code_alpaca_emb_top1_equal": 0.0546875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.020175758285348722, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.338104248046875, "eval_python_code_alpaca_n_ngrams_match_1": 7.432, "eval_python_code_alpaca_n_ngrams_match_2": 1.736, "eval_python_code_alpaca_n_ngrams_match_3": 0.478, "eval_python_code_alpaca_num_pred_words": 40.242, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 28.165680916826854, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2606050391084044, "eval_python_code_alpaca_runtime": 13.5828, "eval_python_code_alpaca_samples_per_second": 36.811, "eval_python_code_alpaca_steps_per_second": 0.074, "eval_python_code_alpaca_token_set_f1": 0.385296779102028, "eval_python_code_alpaca_token_set_f1_sem": 0.005639627371203578, "eval_python_code_alpaca_token_set_precision": 0.3927205218850797, "eval_python_code_alpaca_token_set_recall": 0.41302414576091084, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 32500 }, { "epoch": 6.24, "eval_wikibio_accuracy": 0.2883125, "eval_wikibio_bleu_score": 5.321655166915569, "eval_wikibio_bleu_score_sem": 0.1951197750528287, "eval_wikibio_emb_cos_sim": 0.6970815062522888, "eval_wikibio_emb_cos_sim_sem": 0.010530089145606726, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.244655132293701, "eval_wikibio_n_ngrams_match_1": 9.666, "eval_wikibio_n_ngrams_match_2": 3.104, "eval_wikibio_n_ngrams_match_3": 1.1, "eval_wikibio_num_pred_words": 36.992, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 69.73170778258094, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32495916898779176, "eval_wikibio_runtime": 20.358, "eval_wikibio_samples_per_second": 24.56, "eval_wikibio_steps_per_second": 0.049, "eval_wikibio_token_set_f1": 0.30214246471603373, "eval_wikibio_token_set_f1_sem": 0.0056686680788928245, "eval_wikibio_token_set_precision": 0.3095661244852683, "eval_wikibio_token_set_recall": 0.309299235578218, "eval_wikibio_true_num_tokens": 61.1328125, "step": 32500 }, { "epoch": 6.24, "eval_nq_accuracy": 0.483125, "eval_nq_bleu_score": 9.238648708247194, "eval_nq_bleu_score_sem": 0.3943277651760115, "eval_nq_emb_cos_sim": 0.7835253477096558, "eval_nq_emb_cos_sim_sem": 0.008719733171005023, "eval_nq_emb_top1_equal": 0.2109375, "eval_nq_emb_top1_equal_sem": 0.03620184850179216, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.520721197128296, "eval_nq_n_ngrams_match_1": 20.984, "eval_nq_n_ngrams_match_2": 6.988, "eval_nq_n_ngrams_match_3": 2.914, "eval_nq_num_pred_words": 49.11, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.437563364790996, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3980849746181533, "eval_nq_runtime": 15.8495, "eval_nq_samples_per_second": 31.547, "eval_nq_steps_per_second": 0.063, "eval_nq_token_set_f1": 0.42203291575286733, "eval_nq_token_set_f1_sem": 0.004843009113480223, "eval_nq_token_set_precision": 0.3713153285202159, "eval_nq_token_set_recall": 0.5011721417165745, "eval_nq_true_num_tokens": 64.0, "step": 32500 }, { "epoch": 6.24, "learning_rate": 0.001, "loss": 2.8826, "step": 32508 }, { "epoch": 6.24, "learning_rate": 0.001, "loss": 2.874, "step": 32520 }, { "epoch": 6.25, "learning_rate": 0.001, "loss": 2.874, "step": 32532 }, { "epoch": 6.25, "learning_rate": 0.001, "loss": 2.8716, "step": 32544 }, { "epoch": 6.25, "learning_rate": 0.001, "loss": 2.8831, "step": 32556 }, { "epoch": 6.25, "learning_rate": 0.001, "loss": 2.8823, "step": 32568 }, { "epoch": 6.26, "learning_rate": 0.001, "loss": 2.8761, "step": 32580 }, { "epoch": 6.26, "learning_rate": 0.001, "loss": 2.8757, "step": 32592 }, { "epoch": 6.26, "learning_rate": 0.001, "loss": 2.8733, "step": 32604 }, { "epoch": 6.26, "learning_rate": 0.001, "loss": 2.8778, "step": 32616 }, { "epoch": 6.26, "learning_rate": 0.001, "loss": 2.8672, "step": 32628 }, { "epoch": 6.27, "learning_rate": 0.001, "loss": 2.8822, "step": 32640 }, { "epoch": 6.27, "learning_rate": 0.001, "loss": 2.8829, "step": 32652 }, { "epoch": 6.27, "learning_rate": 0.001, "loss": 2.8925, "step": 32664 }, { "epoch": 6.27, "learning_rate": 0.001, "loss": 2.8867, "step": 32676 }, { "epoch": 6.28, "learning_rate": 0.001, "loss": 2.8827, "step": 32688 }, { "epoch": 6.28, "learning_rate": 0.001, "loss": 2.8769, "step": 32700 }, { "epoch": 6.28, "learning_rate": 0.001, "loss": 2.8743, "step": 32712 }, { "epoch": 6.28, "learning_rate": 0.001, "loss": 2.8834, "step": 32724 }, { "epoch": 6.29, "learning_rate": 0.001, "loss": 2.8744, "step": 32736 }, { "epoch": 6.29, "learning_rate": 0.001, "loss": 2.8808, "step": 32748 }, { "epoch": 6.29, "learning_rate": 0.001, "loss": 2.8799, "step": 32760 }, { "epoch": 6.29, "learning_rate": 0.001, "loss": 2.8685, "step": 32772 }, { "epoch": 6.29, "learning_rate": 0.001, "loss": 2.8736, "step": 32784 }, { "epoch": 6.3, "learning_rate": 0.001, "loss": 2.8736, "step": 32796 }, { "epoch": 6.3, "learning_rate": 0.001, "loss": 2.8609, "step": 32808 }, { "epoch": 6.3, "learning_rate": 0.001, "loss": 2.8627, "step": 32820 }, { "epoch": 6.3, "learning_rate": 0.001, "loss": 2.8806, "step": 32832 }, { "epoch": 6.31, "learning_rate": 0.001, "loss": 2.8693, "step": 32844 }, { "epoch": 6.31, "learning_rate": 0.001, "loss": 2.8781, "step": 32856 }, { "epoch": 6.31, "learning_rate": 0.001, "loss": 2.8852, "step": 32868 }, { "epoch": 6.31, "learning_rate": 0.001, "loss": 2.8699, "step": 32880 }, { "epoch": 6.32, "learning_rate": 0.001, "loss": 2.8728, "step": 32892 }, { "epoch": 6.32, "learning_rate": 0.001, "loss": 2.8764, "step": 32904 }, { "epoch": 6.32, "learning_rate": 0.001, "loss": 2.8722, "step": 32916 }, { "epoch": 6.32, "learning_rate": 0.001, "loss": 2.8667, "step": 32928 }, { "epoch": 6.32, "learning_rate": 0.001, "loss": 2.8789, "step": 32940 }, { "epoch": 6.33, "learning_rate": 0.001, "loss": 2.8793, "step": 32952 }, { "epoch": 6.33, "learning_rate": 0.001, "loss": 2.8865, "step": 32964 }, { "epoch": 6.33, "learning_rate": 0.001, "loss": 2.8736, "step": 32976 }, { "epoch": 6.33, "learning_rate": 0.001, "loss": 2.8707, "step": 32988 }, { "epoch": 6.34, "learning_rate": 0.001, "loss": 2.8649, "step": 33000 }, { "epoch": 6.34, "learning_rate": 0.001, "loss": 2.8785, "step": 33012 }, { "epoch": 6.34, "learning_rate": 0.001, "loss": 2.8749, "step": 33024 }, { "epoch": 6.34, "learning_rate": 0.001, "loss": 2.8682, "step": 33036 }, { "epoch": 6.35, "learning_rate": 0.001, "loss": 2.8829, "step": 33048 }, { "epoch": 6.35, "learning_rate": 0.001, "loss": 2.8848, "step": 33060 }, { "epoch": 6.35, "learning_rate": 0.001, "loss": 2.8784, "step": 33072 }, { "epoch": 6.35, "learning_rate": 0.001, "loss": 2.8633, "step": 33084 }, { "epoch": 6.35, "learning_rate": 0.001, "loss": 2.8725, "step": 33096 }, { "epoch": 6.36, "learning_rate": 0.001, "loss": 2.89, "step": 33108 }, { "epoch": 6.36, "learning_rate": 0.001, "loss": 2.8774, "step": 33120 }, { "epoch": 6.36, "eval_ag_news_accuracy": 0.287, "eval_ag_news_bleu_score": 3.950032735542012, "eval_ag_news_bleu_score_sem": 0.14019400129715076, "eval_ag_news_emb_cos_sim": 0.7462149858474731, "eval_ag_news_emb_cos_sim_sem": 0.008832162523585865, "eval_ag_news_emb_top1_equal": 0.1640625, "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.9389305114746094, "eval_ag_news_n_ngrams_match_1": 12.298, "eval_ag_news_n_ngrams_match_2": 2.374, "eval_ag_news_n_ngrams_match_3": 0.602, "eval_ag_news_num_pred_words": 46.454, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 51.36363909240867, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2955832942289821, "eval_ag_news_runtime": 17.5499, "eval_ag_news_samples_per_second": 28.49, "eval_ag_news_steps_per_second": 0.057, "eval_ag_news_token_set_f1": 0.3136387395423549, "eval_ag_news_token_set_f1_sem": 0.00412849699900138, "eval_ag_news_token_set_precision": 0.2888232867182405, "eval_ag_news_token_set_recall": 0.36097510299699265, "eval_ag_news_true_num_tokens": 56.09375, "step": 33125 }, { "epoch": 6.36, "eval_anthropic_toxic_prompts_accuracy": 0.09821875, "eval_anthropic_toxic_prompts_bleu_score": 2.371140635721898, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09505355554870674, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6085380911827087, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009689355665312555, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.61575984954834, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.106, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.328, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.404, "eval_anthropic_toxic_prompts_num_pred_words": 47.336, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 37.179586086846456, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1765119750915582, "eval_anthropic_toxic_prompts_runtime": 10.3987, "eval_anthropic_toxic_prompts_samples_per_second": 48.083, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.31001100730730663, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006122938187370318, "eval_anthropic_toxic_prompts_token_set_precision": 0.35626542884031015, "eval_anthropic_toxic_prompts_token_set_recall": 0.3126130971859074, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 33125 }, { "epoch": 6.36, "eval_arxiv_accuracy": 0.311375, "eval_arxiv_bleu_score": 3.6810036229380168, "eval_arxiv_bleu_score_sem": 0.10860359271281318, "eval_arxiv_emb_cos_sim": 0.6774710416793823, "eval_arxiv_emb_cos_sim_sem": 0.0073367804846182, "eval_arxiv_emb_top1_equal": 0.15625, "eval_arxiv_emb_top1_equal_sem": 0.03221922156442571, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.8185007572174072, "eval_arxiv_n_ngrams_match_1": 12.88, "eval_arxiv_n_ngrams_match_2": 2.358, "eval_arxiv_n_ngrams_match_3": 0.51, "eval_arxiv_num_pred_words": 40.502, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 45.53588776801289, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.29958586833396983, "eval_arxiv_runtime": 14.4801, "eval_arxiv_samples_per_second": 34.53, "eval_arxiv_steps_per_second": 0.069, "eval_arxiv_token_set_f1": 0.3002830128301934, "eval_arxiv_token_set_f1_sem": 0.004005425068673722, "eval_arxiv_token_set_precision": 0.24487552387867292, "eval_arxiv_token_set_recall": 0.41109997736572546, "eval_arxiv_true_num_tokens": 64.0, "step": 33125 }, { "epoch": 6.36, "eval_python_code_alpaca_accuracy": 0.13428125, "eval_python_code_alpaca_bleu_score": 3.228763862148937, "eval_python_code_alpaca_bleu_score_sem": 0.1005497722214129, "eval_python_code_alpaca_emb_cos_sim": 0.6419084072113037, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010345804026126365, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.3029255867004395, "eval_python_code_alpaca_n_ngrams_match_1": 7.472, "eval_python_code_alpaca_n_ngrams_match_2": 1.722, "eval_python_code_alpaca_n_ngrams_match_3": 0.456, "eval_python_code_alpaca_num_pred_words": 41.152, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 27.19207543905038, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.25525504929475035, "eval_python_code_alpaca_runtime": 13.8904, "eval_python_code_alpaca_samples_per_second": 35.996, "eval_python_code_alpaca_steps_per_second": 0.072, "eval_python_code_alpaca_token_set_f1": 0.3894889916093848, "eval_python_code_alpaca_token_set_f1_sem": 0.005579658653125345, "eval_python_code_alpaca_token_set_precision": 0.3956317457741966, "eval_python_code_alpaca_token_set_recall": 0.4152763301239062, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 33125 }, { "epoch": 6.36, "eval_wikibio_accuracy": 0.28784375, "eval_wikibio_bleu_score": 5.389823621646703, "eval_wikibio_bleu_score_sem": 0.18975478186988518, "eval_wikibio_emb_cos_sim": 0.7103409767150879, "eval_wikibio_emb_cos_sim_sem": 0.008903025863911852, "eval_wikibio_emb_top1_equal": 0.1328125, "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.252353668212891, "eval_wikibio_n_ngrams_match_1": 10.014, "eval_wikibio_n_ngrams_match_2": 3.18, "eval_wikibio_n_ngrams_match_3": 1.116, "eval_wikibio_num_pred_words": 38.466, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 70.2706115630885, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33469833254930753, "eval_wikibio_runtime": 15.3005, "eval_wikibio_samples_per_second": 32.679, "eval_wikibio_steps_per_second": 0.065, "eval_wikibio_token_set_f1": 0.3123403663021789, "eval_wikibio_token_set_f1_sem": 0.004990513917775753, "eval_wikibio_token_set_precision": 0.3243231676402658, "eval_wikibio_token_set_recall": 0.31240476052988475, "eval_wikibio_true_num_tokens": 61.1328125, "step": 33125 }, { "epoch": 6.36, "eval_nq_accuracy": 0.48215625, "eval_nq_bleu_score": 9.278246429298965, "eval_nq_bleu_score_sem": 0.3980576074068516, "eval_nq_emb_cos_sim": 0.7856423854827881, "eval_nq_emb_cos_sim_sem": 0.007734071825205571, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5158660411834717, "eval_nq_n_ngrams_match_1": 20.962, "eval_nq_n_ngrams_match_2": 6.952, "eval_nq_n_ngrams_match_3": 2.928, "eval_nq_num_pred_words": 49.082, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.377323410601164, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.39737245069653515, "eval_nq_runtime": 21.5111, "eval_nq_samples_per_second": 23.244, "eval_nq_steps_per_second": 0.046, "eval_nq_token_set_f1": 0.421570094993196, "eval_nq_token_set_f1_sem": 0.004781149110041133, "eval_nq_token_set_precision": 0.37095400610548196, "eval_nq_token_set_recall": 0.49802642997450525, "eval_nq_true_num_tokens": 64.0, "step": 33125 }, { "epoch": 6.36, "learning_rate": 0.001, "loss": 2.8703, "step": 33132 }, { "epoch": 6.36, "learning_rate": 0.001, "loss": 2.8646, "step": 33144 }, { "epoch": 6.37, "learning_rate": 0.001, "loss": 2.8706, "step": 33156 }, { "epoch": 6.37, "learning_rate": 0.001, "loss": 2.8871, "step": 33168 }, { "epoch": 6.37, "learning_rate": 0.001, "loss": 2.8742, "step": 33180 }, { "epoch": 6.37, "learning_rate": 0.001, "loss": 2.8735, "step": 33192 }, { "epoch": 6.38, "learning_rate": 0.001, "loss": 2.8754, "step": 33204 }, { "epoch": 6.38, "learning_rate": 0.001, "loss": 2.8793, "step": 33216 }, { "epoch": 6.38, "learning_rate": 0.001, "loss": 2.8842, "step": 33228 }, { "epoch": 6.38, "learning_rate": 0.001, "loss": 2.8782, "step": 33240 }, { "epoch": 6.38, "learning_rate": 0.001, "loss": 2.8842, "step": 33252 }, { "epoch": 6.39, "learning_rate": 0.001, "loss": 2.8792, "step": 33264 }, { "epoch": 6.39, "learning_rate": 0.001, "loss": 2.8858, "step": 33276 }, { "epoch": 6.39, "learning_rate": 0.001, "loss": 2.8721, "step": 33288 }, { "epoch": 6.39, "learning_rate": 0.001, "loss": 2.8655, "step": 33300 }, { "epoch": 6.4, "learning_rate": 0.001, "loss": 2.8731, "step": 33312 }, { "epoch": 6.4, "learning_rate": 0.001, "loss": 2.8652, "step": 33324 }, { "epoch": 6.4, "learning_rate": 0.001, "loss": 2.8749, "step": 33336 }, { "epoch": 6.4, "learning_rate": 0.001, "loss": 2.8709, "step": 33348 }, { "epoch": 6.41, "learning_rate": 0.001, "loss": 2.8792, "step": 33360 }, { "epoch": 6.41, "learning_rate": 0.001, "loss": 2.8707, "step": 33372 }, { "epoch": 6.41, "learning_rate": 0.001, "loss": 2.8802, "step": 33384 }, { "epoch": 6.41, "learning_rate": 0.001, "loss": 2.8873, "step": 33396 }, { "epoch": 6.41, "learning_rate": 0.001, "loss": 2.8721, "step": 33408 }, { "epoch": 6.42, "learning_rate": 0.001, "loss": 2.8604, "step": 33420 }, { "epoch": 6.42, "learning_rate": 0.001, "loss": 2.8753, "step": 33432 }, { "epoch": 6.42, "learning_rate": 0.001, "loss": 2.8706, "step": 33444 }, { "epoch": 6.42, "learning_rate": 0.001, "loss": 2.8678, "step": 33456 }, { "epoch": 6.43, "learning_rate": 0.001, "loss": 2.8655, "step": 33468 }, { "epoch": 6.43, "learning_rate": 0.001, "loss": 2.8677, "step": 33480 }, { "epoch": 6.43, "learning_rate": 0.001, "loss": 2.8738, "step": 33492 }, { "epoch": 6.43, "learning_rate": 0.001, "loss": 2.879, "step": 33504 }, { "epoch": 6.44, "learning_rate": 0.001, "loss": 2.8717, "step": 33516 }, { "epoch": 6.44, "learning_rate": 0.001, "loss": 2.8657, "step": 33528 }, { "epoch": 6.44, "learning_rate": 0.001, "loss": 2.8666, "step": 33540 }, { "epoch": 6.44, "learning_rate": 0.001, "loss": 2.8687, "step": 33552 }, { "epoch": 6.44, "learning_rate": 0.001, "loss": 2.8776, "step": 33564 }, { "epoch": 6.45, "learning_rate": 0.001, "loss": 2.8732, "step": 33576 }, { "epoch": 6.45, "learning_rate": 0.001, "loss": 2.8661, "step": 33588 }, { "epoch": 6.45, "learning_rate": 0.001, "loss": 2.8723, "step": 33600 }, { "epoch": 6.45, "learning_rate": 0.001, "loss": 2.8683, "step": 33612 }, { "epoch": 6.46, "learning_rate": 0.001, "loss": 2.8685, "step": 33624 }, { "epoch": 6.46, "learning_rate": 0.001, "loss": 2.8665, "step": 33636 }, { "epoch": 6.46, "learning_rate": 0.001, "loss": 2.8733, "step": 33648 }, { "epoch": 6.46, "learning_rate": 0.001, "loss": 2.868, "step": 33660 }, { "epoch": 6.47, "learning_rate": 0.001, "loss": 2.8701, "step": 33672 }, { "epoch": 6.47, "learning_rate": 0.001, "loss": 2.8698, "step": 33684 }, { "epoch": 6.47, "learning_rate": 0.001, "loss": 2.8715, "step": 33696 }, { "epoch": 6.47, "learning_rate": 0.001, "loss": 2.8769, "step": 33708 }, { "epoch": 6.47, "learning_rate": 0.001, "loss": 2.8681, "step": 33720 }, { "epoch": 6.48, "learning_rate": 0.001, "loss": 2.8633, "step": 33732 }, { "epoch": 6.48, "learning_rate": 0.001, "loss": 2.8674, "step": 33744 }, { "epoch": 6.48, "eval_ag_news_accuracy": 0.2890625, "eval_ag_news_bleu_score": 3.932410982518147, "eval_ag_news_bleu_score_sem": 0.1291998117660071, "eval_ag_news_emb_cos_sim": 0.7504019737243652, "eval_ag_news_emb_cos_sim_sem": 0.008021780701120109, "eval_ag_news_emb_top1_equal": 0.15625, "eval_ag_news_emb_top1_equal_sem": 0.03221922156442571, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.9291231632232666, "eval_ag_news_n_ngrams_match_1": 12.074, "eval_ag_news_n_ngrams_match_2": 2.352, "eval_ag_news_n_ngrams_match_3": 0.648, "eval_ag_news_num_pred_words": 46.18, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 50.86236012300009, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.29408939111489535, "eval_ag_news_runtime": 15.9685, "eval_ag_news_samples_per_second": 31.312, "eval_ag_news_steps_per_second": 0.063, "eval_ag_news_token_set_f1": 0.3085081850473356, "eval_ag_news_token_set_f1_sem": 0.004399674595433429, "eval_ag_news_token_set_precision": 0.2835485122791629, "eval_ag_news_token_set_recall": 0.3551056267540652, "eval_ag_news_true_num_tokens": 56.09375, "step": 33750 }, { "epoch": 6.48, "eval_anthropic_toxic_prompts_accuracy": 0.09928125, "eval_anthropic_toxic_prompts_bleu_score": 2.5553378948791043, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09940717692837908, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6188018321990967, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009696636533392812, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.610792875289917, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.254, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.43, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.468, "eval_anthropic_toxic_prompts_num_pred_words": 47.198, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 36.99537390711386, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.18254919545813222, "eval_anthropic_toxic_prompts_runtime": 10.184, "eval_anthropic_toxic_prompts_samples_per_second": 49.097, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.31396583357836516, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006127741967123549, "eval_anthropic_toxic_prompts_token_set_precision": 0.36607387202302716, "eval_anthropic_toxic_prompts_token_set_recall": 0.31002572874258566, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 33750 }, { "epoch": 6.48, "eval_arxiv_accuracy": 0.312875, "eval_arxiv_bleu_score": 3.548344708985615, "eval_arxiv_bleu_score_sem": 0.11083964357254245, "eval_arxiv_emb_cos_sim": 0.6740222573280334, "eval_arxiv_emb_cos_sim_sem": 0.007520927309751884, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.783830165863037, "eval_arxiv_n_ngrams_match_1": 12.43, "eval_arxiv_n_ngrams_match_2": 2.204, "eval_arxiv_n_ngrams_match_3": 0.472, "eval_arxiv_num_pred_words": 38.772, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 43.98418624800976, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.29352644525619054, "eval_arxiv_runtime": 10.6071, "eval_arxiv_samples_per_second": 47.138, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.29642046247294596, "eval_arxiv_token_set_f1_sem": 0.0040327139143596525, "eval_arxiv_token_set_precision": 0.2370876540307842, "eval_arxiv_token_set_recall": 0.4261550629059122, "eval_arxiv_true_num_tokens": 64.0, "step": 33750 }, { "epoch": 6.48, "eval_python_code_alpaca_accuracy": 0.13721875, "eval_python_code_alpaca_bleu_score": 3.2779011790289307, "eval_python_code_alpaca_bleu_score_sem": 0.11043417760361648, "eval_python_code_alpaca_emb_cos_sim": 0.642796516418457, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011234746915671919, "eval_python_code_alpaca_emb_top1_equal": 0.078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.312835931777954, "eval_python_code_alpaca_n_ngrams_match_1": 7.644, "eval_python_code_alpaca_n_ngrams_match_2": 1.794, "eval_python_code_alpaca_n_ngrams_match_3": 0.476, "eval_python_code_alpaca_num_pred_words": 41.902, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 27.46289804620583, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2591009943649206, "eval_python_code_alpaca_runtime": 10.5335, "eval_python_code_alpaca_samples_per_second": 47.468, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.3986237679436292, "eval_python_code_alpaca_token_set_f1_sem": 0.005568005774957073, "eval_python_code_alpaca_token_set_precision": 0.407473030960325, "eval_python_code_alpaca_token_set_recall": 0.42484056543566645, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 33750 }, { "epoch": 6.48, "eval_wikibio_accuracy": 0.28815625, "eval_wikibio_bleu_score": 5.297948565492029, "eval_wikibio_bleu_score_sem": 0.1816595475722539, "eval_wikibio_emb_cos_sim": 0.6929820775985718, "eval_wikibio_emb_cos_sim_sem": 0.010644598181743776, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.286059856414795, "eval_wikibio_n_ngrams_match_1": 9.828, "eval_wikibio_n_ngrams_match_2": 3.126, "eval_wikibio_n_ngrams_match_3": 1.08, "eval_wikibio_num_pred_words": 37.772, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 72.67953578609689, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32688126627313485, "eval_wikibio_runtime": 19.7744, "eval_wikibio_samples_per_second": 25.285, "eval_wikibio_steps_per_second": 0.051, "eval_wikibio_token_set_f1": 0.30820207438733105, "eval_wikibio_token_set_f1_sem": 0.0051921793765689825, "eval_wikibio_token_set_precision": 0.3166989989819676, "eval_wikibio_token_set_recall": 0.3150842318918094, "eval_wikibio_true_num_tokens": 61.1328125, "step": 33750 }, { "epoch": 6.48, "eval_nq_accuracy": 0.48315625, "eval_nq_bleu_score": 9.2809600316322, "eval_nq_bleu_score_sem": 0.4064657603452962, "eval_nq_emb_cos_sim": 0.7859359979629517, "eval_nq_emb_cos_sim_sem": 0.008534873142169193, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.5102248191833496, "eval_nq_n_ngrams_match_1": 20.94, "eval_nq_n_ngrams_match_2": 6.91, "eval_nq_n_ngrams_match_3": 2.92, "eval_nq_num_pred_words": 48.784, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.307696755829227, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.3991825878678902, "eval_nq_runtime": 15.1079, "eval_nq_samples_per_second": 33.095, "eval_nq_steps_per_second": 0.066, "eval_nq_token_set_f1": 0.4207198722805135, "eval_nq_token_set_f1_sem": 0.004964994288404282, "eval_nq_token_set_precision": 0.37042648453248295, "eval_nq_token_set_recall": 0.4984827871399623, "eval_nq_true_num_tokens": 64.0, "step": 33750 }, { "epoch": 6.48, "learning_rate": 0.001, "loss": 2.8675, "step": 33756 }, { "epoch": 6.48, "learning_rate": 0.001, "loss": 2.8792, "step": 33768 }, { "epoch": 6.49, "learning_rate": 0.001, "loss": 2.8704, "step": 33780 }, { "epoch": 6.49, "learning_rate": 0.001, "loss": 2.8732, "step": 33792 }, { "epoch": 6.49, "learning_rate": 0.001, "loss": 2.8837, "step": 33804 }, { "epoch": 6.49, "learning_rate": 0.001, "loss": 2.8697, "step": 33816 }, { "epoch": 6.5, "learning_rate": 0.001, "loss": 2.8661, "step": 33828 }, { "epoch": 6.5, "learning_rate": 0.001, "loss": 2.8688, "step": 33840 }, { "epoch": 6.5, "learning_rate": 0.001, "loss": 2.8496, "step": 33852 }, { "epoch": 6.5, "learning_rate": 0.001, "loss": 2.8764, "step": 33864 }, { "epoch": 6.5, "learning_rate": 0.001, "loss": 2.8662, "step": 33876 }, { "epoch": 6.51, "learning_rate": 0.001, "loss": 2.8684, "step": 33888 }, { "epoch": 6.51, "learning_rate": 0.001, "loss": 2.856, "step": 33900 }, { "epoch": 6.51, "learning_rate": 0.001, "loss": 2.8704, "step": 33912 }, { "epoch": 6.51, "learning_rate": 0.001, "loss": 2.8734, "step": 33924 }, { "epoch": 6.52, "learning_rate": 0.001, "loss": 2.8691, "step": 33936 }, { "epoch": 6.52, "learning_rate": 0.001, "loss": 2.8683, "step": 33948 }, { "epoch": 6.52, "learning_rate": 0.001, "loss": 2.8743, "step": 33960 }, { "epoch": 6.52, "learning_rate": 0.001, "loss": 2.8647, "step": 33972 }, { "epoch": 6.53, "learning_rate": 0.001, "loss": 2.8595, "step": 33984 }, { "epoch": 6.53, "learning_rate": 0.001, "loss": 2.8642, "step": 33996 }, { "epoch": 6.53, "learning_rate": 0.001, "loss": 2.8719, "step": 34008 }, { "epoch": 6.53, "learning_rate": 0.001, "loss": 2.8643, "step": 34020 }, { "epoch": 6.53, "learning_rate": 0.001, "loss": 2.866, "step": 34032 }, { "epoch": 6.54, "learning_rate": 0.001, "loss": 2.8662, "step": 34044 }, { "epoch": 6.54, "learning_rate": 0.001, "loss": 2.8591, "step": 34056 }, { "epoch": 6.54, "learning_rate": 0.001, "loss": 2.8704, "step": 34068 }, { "epoch": 6.54, "learning_rate": 0.001, "loss": 2.8533, "step": 34080 }, { "epoch": 6.55, "learning_rate": 0.001, "loss": 2.8709, "step": 34092 }, { "epoch": 6.55, "learning_rate": 0.001, "loss": 2.8714, "step": 34104 }, { "epoch": 6.55, "learning_rate": 0.001, "loss": 2.871, "step": 34116 }, { "epoch": 6.55, "learning_rate": 0.001, "loss": 2.8706, "step": 34128 }, { "epoch": 6.56, "learning_rate": 0.001, "loss": 2.8688, "step": 34140 }, { "epoch": 6.56, "learning_rate": 0.001, "loss": 2.8647, "step": 34152 }, { "epoch": 6.56, "learning_rate": 0.001, "loss": 2.8655, "step": 34164 }, { "epoch": 6.56, "learning_rate": 0.001, "loss": 2.8585, "step": 34176 }, { "epoch": 6.56, "learning_rate": 0.001, "loss": 2.8598, "step": 34188 }, { "epoch": 6.57, "learning_rate": 0.001, "loss": 2.8537, "step": 34200 }, { "epoch": 6.57, "learning_rate": 0.001, "loss": 2.8604, "step": 34212 }, { "epoch": 6.57, "learning_rate": 0.001, "loss": 2.8705, "step": 34224 }, { "epoch": 6.57, "learning_rate": 0.001, "loss": 2.8551, "step": 34236 }, { "epoch": 6.58, "learning_rate": 0.001, "loss": 2.8538, "step": 34248 }, { "epoch": 6.58, "learning_rate": 0.001, "loss": 2.8747, "step": 34260 }, { "epoch": 6.58, "learning_rate": 0.001, "loss": 2.8553, "step": 34272 }, { "epoch": 6.58, "learning_rate": 0.001, "loss": 2.8694, "step": 34284 }, { "epoch": 6.59, "learning_rate": 0.001, "loss": 2.8557, "step": 34296 }, { "epoch": 6.59, "learning_rate": 0.001, "loss": 2.8744, "step": 34308 }, { "epoch": 6.59, "learning_rate": 0.001, "loss": 2.8597, "step": 34320 }, { "epoch": 6.59, "learning_rate": 0.001, "loss": 2.8696, "step": 34332 }, { "epoch": 6.59, "learning_rate": 0.001, "loss": 2.866, "step": 34344 }, { "epoch": 6.6, "learning_rate": 0.001, "loss": 2.8548, "step": 34356 }, { "epoch": 6.6, "learning_rate": 0.001, "loss": 2.8526, "step": 34368 }, { "epoch": 6.6, "eval_ag_news_accuracy": 0.28865625, "eval_ag_news_bleu_score": 3.9808220742861793, "eval_ag_news_bleu_score_sem": 0.1251884902370004, "eval_ag_news_emb_cos_sim": 0.7487285137176514, "eval_ag_news_emb_cos_sim_sem": 0.008285175455255417, "eval_ag_news_emb_top1_equal": 0.140625, "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.9099693298339844, "eval_ag_news_n_ngrams_match_1": 12.098, "eval_ag_news_n_ngrams_match_2": 2.398, "eval_ag_news_n_ngrams_match_3": 0.638, "eval_ag_news_num_pred_words": 45.91, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 49.89742158773554, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2956530095620596, "eval_ag_news_runtime": 12.8691, "eval_ag_news_samples_per_second": 38.853, "eval_ag_news_steps_per_second": 0.078, "eval_ag_news_token_set_f1": 0.3104337388231065, "eval_ag_news_token_set_f1_sem": 0.004452844806865152, "eval_ag_news_token_set_precision": 0.2843858954523405, "eval_ag_news_token_set_recall": 0.36018926161695275, "eval_ag_news_true_num_tokens": 56.09375, "step": 34375 }, { "epoch": 6.6, "eval_anthropic_toxic_prompts_accuracy": 0.09878125, "eval_anthropic_toxic_prompts_bleu_score": 2.570596415059724, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10578402473129396, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6138656735420227, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009576375110978532, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.5956509113311768, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.202, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.418, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.482, "eval_anthropic_toxic_prompts_num_pred_words": 46.518, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 36.43941109571111, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1817760515076232, "eval_anthropic_toxic_prompts_runtime": 10.4761, "eval_anthropic_toxic_prompts_samples_per_second": 47.728, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.31408359049780926, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006057114708069728, "eval_anthropic_toxic_prompts_token_set_precision": 0.3648893419076783, "eval_anthropic_toxic_prompts_token_set_recall": 0.30846587214679694, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 34375 }, { "epoch": 6.6, "eval_arxiv_accuracy": 0.31528125, "eval_arxiv_bleu_score": 3.4961232713119115, "eval_arxiv_bleu_score_sem": 0.10449275208560148, "eval_arxiv_emb_cos_sim": 0.6694149374961853, "eval_arxiv_emb_cos_sim_sem": 0.008328695369065456, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.768296480178833, "eval_arxiv_n_ngrams_match_1": 12.516, "eval_arxiv_n_ngrams_match_2": 2.214, "eval_arxiv_n_ngrams_match_3": 0.458, "eval_arxiv_num_pred_words": 39.366, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 43.306228943868845, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.29682351028940945, "eval_arxiv_runtime": 10.9782, "eval_arxiv_samples_per_second": 45.545, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.29969631609345126, "eval_arxiv_token_set_f1_sem": 0.0040482923164340354, "eval_arxiv_token_set_precision": 0.2407923349090727, "eval_arxiv_token_set_recall": 0.4186512239012958, "eval_arxiv_true_num_tokens": 64.0, "step": 34375 }, { "epoch": 6.6, "eval_python_code_alpaca_accuracy": 0.1371875, "eval_python_code_alpaca_bleu_score": 3.538106681301775, "eval_python_code_alpaca_bleu_score_sem": 0.12999780892390916, "eval_python_code_alpaca_emb_cos_sim": 0.6586180925369263, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009541149565246536, "eval_python_code_alpaca_emb_top1_equal": 0.0625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.2873189449310303, "eval_python_code_alpaca_n_ngrams_match_1": 7.834, "eval_python_code_alpaca_n_ngrams_match_2": 1.87, "eval_python_code_alpaca_n_ngrams_match_3": 0.552, "eval_python_code_alpaca_num_pred_words": 40.97, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 26.770992848076013, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2664782288377152, "eval_python_code_alpaca_runtime": 11.5815, "eval_python_code_alpaca_samples_per_second": 43.172, "eval_python_code_alpaca_steps_per_second": 0.086, "eval_python_code_alpaca_token_set_f1": 0.39915023487651846, "eval_python_code_alpaca_token_set_f1_sem": 0.005585884508409135, "eval_python_code_alpaca_token_set_precision": 0.4136092999715175, "eval_python_code_alpaca_token_set_recall": 0.4118775048746247, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 34375 }, { "epoch": 6.6, "eval_wikibio_accuracy": 0.29109375, "eval_wikibio_bleu_score": 5.100301677590534, "eval_wikibio_bleu_score_sem": 0.18325907342749737, "eval_wikibio_emb_cos_sim": 0.7025485038757324, "eval_wikibio_emb_cos_sim_sem": 0.009842035916765808, "eval_wikibio_emb_top1_equal": 0.1171875, "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.213316440582275, "eval_wikibio_n_ngrams_match_1": 9.51, "eval_wikibio_n_ngrams_match_2": 2.996, "eval_wikibio_n_ngrams_match_3": 0.996, "eval_wikibio_num_pred_words": 37.558, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 67.5802946018747, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32257824975748883, "eval_wikibio_runtime": 10.9509, "eval_wikibio_samples_per_second": 45.659, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.30043513728860616, "eval_wikibio_token_set_f1_sem": 0.005366059131199708, "eval_wikibio_token_set_precision": 0.30797517960411785, "eval_wikibio_token_set_recall": 0.3090812801699436, "eval_wikibio_true_num_tokens": 61.1328125, "step": 34375 }, { "epoch": 6.6, "eval_nq_accuracy": 0.484875, "eval_nq_bleu_score": 9.238379832253832, "eval_nq_bleu_score_sem": 0.40565419091997434, "eval_nq_emb_cos_sim": 0.7774143218994141, "eval_nq_emb_cos_sim_sem": 0.009179519726855444, "eval_nq_emb_top1_equal": 0.203125, "eval_nq_emb_top1_equal_sem": 0.03570055125142555, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.499204158782959, "eval_nq_n_ngrams_match_1": 20.678, "eval_nq_n_ngrams_match_2": 6.872, "eval_nq_n_ngrams_match_3": 2.89, "eval_nq_num_pred_words": 48.342, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.172802486831891, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.39509830356612174, "eval_nq_runtime": 11.4948, "eval_nq_samples_per_second": 43.498, "eval_nq_steps_per_second": 0.087, "eval_nq_token_set_f1": 0.4153722602516122, "eval_nq_token_set_f1_sem": 0.004972620376333147, "eval_nq_token_set_precision": 0.3643574149174973, "eval_nq_token_set_recall": 0.49514487767926674, "eval_nq_true_num_tokens": 64.0, "step": 34375 }, { "epoch": 6.6, "learning_rate": 0.001, "loss": 2.8702, "step": 34380 }, { "epoch": 6.6, "learning_rate": 0.001, "loss": 2.8599, "step": 34392 }, { "epoch": 6.61, "learning_rate": 0.001, "loss": 2.866, "step": 34404 }, { "epoch": 6.61, "learning_rate": 0.001, "loss": 2.8627, "step": 34416 }, { "epoch": 6.61, "learning_rate": 0.001, "loss": 2.8713, "step": 34428 }, { "epoch": 6.61, "learning_rate": 0.001, "loss": 2.8665, "step": 34440 }, { "epoch": 6.62, "learning_rate": 0.001, "loss": 2.8668, "step": 34452 }, { "epoch": 6.62, "learning_rate": 0.001, "loss": 2.8657, "step": 34464 }, { "epoch": 6.62, "learning_rate": 0.001, "loss": 2.8609, "step": 34476 }, { "epoch": 6.62, "learning_rate": 0.001, "loss": 2.8525, "step": 34488 }, { "epoch": 6.62, "learning_rate": 0.001, "loss": 2.8596, "step": 34500 }, { "epoch": 6.63, "learning_rate": 0.001, "loss": 2.874, "step": 34512 }, { "epoch": 6.63, "learning_rate": 0.001, "loss": 2.8531, "step": 34524 }, { "epoch": 6.63, "learning_rate": 0.001, "loss": 2.8666, "step": 34536 }, { "epoch": 6.63, "learning_rate": 0.001, "loss": 2.8631, "step": 34548 }, { "epoch": 6.64, "learning_rate": 0.001, "loss": 2.8582, "step": 34560 }, { "epoch": 6.64, "learning_rate": 0.001, "loss": 2.845, "step": 34572 }, { "epoch": 6.64, "learning_rate": 0.001, "loss": 2.8664, "step": 34584 }, { "epoch": 6.64, "learning_rate": 0.001, "loss": 2.8569, "step": 34596 }, { "epoch": 6.65, "learning_rate": 0.001, "loss": 2.8645, "step": 34608 }, { "epoch": 6.65, "learning_rate": 0.001, "loss": 2.8643, "step": 34620 }, { "epoch": 6.65, "learning_rate": 0.001, "loss": 2.862, "step": 34632 }, { "epoch": 6.65, "learning_rate": 0.001, "loss": 2.8655, "step": 34644 }, { "epoch": 6.65, "learning_rate": 0.001, "loss": 2.8637, "step": 34656 }, { "epoch": 6.66, "learning_rate": 0.001, "loss": 2.8467, "step": 34668 }, { "epoch": 6.66, "learning_rate": 0.001, "loss": 2.8719, "step": 34680 }, { "epoch": 6.66, "learning_rate": 0.001, "loss": 2.8639, "step": 34692 }, { "epoch": 6.66, "learning_rate": 0.001, "loss": 2.8483, "step": 34704 }, { "epoch": 6.67, "learning_rate": 0.001, "loss": 2.8562, "step": 34716 }, { "epoch": 6.67, "learning_rate": 0.001, "loss": 2.865, "step": 34728 }, { "epoch": 6.67, "learning_rate": 0.001, "loss": 2.8623, "step": 34740 }, { "epoch": 6.67, "learning_rate": 0.001, "loss": 2.84, "step": 34752 }, { "epoch": 6.68, "learning_rate": 0.001, "loss": 2.8599, "step": 34764 }, { "epoch": 6.68, "learning_rate": 0.001, "loss": 2.8637, "step": 34776 }, { "epoch": 6.68, "learning_rate": 0.001, "loss": 2.8508, "step": 34788 }, { "epoch": 6.68, "learning_rate": 0.001, "loss": 2.8584, "step": 34800 }, { "epoch": 6.68, "learning_rate": 0.001, "loss": 2.8557, "step": 34812 }, { "epoch": 6.69, "learning_rate": 0.001, "loss": 2.868, "step": 34824 }, { "epoch": 6.69, "learning_rate": 0.001, "loss": 2.8637, "step": 34836 }, { "epoch": 6.69, "learning_rate": 0.001, "loss": 2.8637, "step": 34848 }, { "epoch": 6.69, "learning_rate": 0.001, "loss": 2.8508, "step": 34860 }, { "epoch": 6.7, "learning_rate": 0.001, "loss": 2.8701, "step": 34872 }, { "epoch": 6.7, "learning_rate": 0.001, "loss": 2.8561, "step": 34884 }, { "epoch": 6.7, "learning_rate": 0.001, "loss": 2.8635, "step": 34896 }, { "epoch": 6.7, "learning_rate": 0.001, "loss": 2.865, "step": 34908 }, { "epoch": 6.71, "learning_rate": 0.001, "loss": 2.8588, "step": 34920 }, { "epoch": 6.71, "learning_rate": 0.001, "loss": 2.8595, "step": 34932 }, { "epoch": 6.71, "learning_rate": 0.001, "loss": 2.8503, "step": 34944 }, { "epoch": 6.71, "learning_rate": 0.001, "loss": 2.8504, "step": 34956 }, { "epoch": 6.71, "learning_rate": 0.001, "loss": 2.8673, "step": 34968 }, { "epoch": 6.72, "learning_rate": 0.001, "loss": 2.8522, "step": 34980 }, { "epoch": 6.72, "learning_rate": 0.001, "loss": 2.861, "step": 34992 }, { "epoch": 6.72, "eval_ag_news_accuracy": 0.29171875, "eval_ag_news_bleu_score": 3.9790343658795435, "eval_ag_news_bleu_score_sem": 0.12816532315134122, "eval_ag_news_emb_cos_sim": 0.7471901774406433, "eval_ag_news_emb_cos_sim_sem": 0.007859010889260052, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.9064903259277344, "eval_ag_news_n_ngrams_match_1": 12.22, "eval_ag_news_n_ngrams_match_2": 2.448, "eval_ag_news_n_ngrams_match_3": 0.65, "eval_ag_news_num_pred_words": 45.856, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 49.72412987917137, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.30035677940403466, "eval_ag_news_runtime": 11.6855, "eval_ag_news_samples_per_second": 42.788, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.3132406257938226, "eval_ag_news_token_set_f1_sem": 0.004412036118865149, "eval_ag_news_token_set_precision": 0.28730491043904277, "eval_ag_news_token_set_recall": 0.3611314712369899, "eval_ag_news_true_num_tokens": 56.09375, "step": 35000 }, { "epoch": 6.72, "eval_anthropic_toxic_prompts_accuracy": 0.09875, "eval_anthropic_toxic_prompts_bleu_score": 2.497886972594568, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.101341392859124, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6152929663658142, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01059685699763757, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.5900990962982178, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.208, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.382, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.47, "eval_anthropic_toxic_prompts_num_pred_words": 47.38, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 36.237666767186695, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.17834744464066038, "eval_anthropic_toxic_prompts_runtime": 11.1501, "eval_anthropic_toxic_prompts_samples_per_second": 44.843, "eval_anthropic_toxic_prompts_steps_per_second": 0.09, "eval_anthropic_toxic_prompts_token_set_f1": 0.31645419333667907, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005970010312402791, "eval_anthropic_toxic_prompts_token_set_precision": 0.3619720257387812, "eval_anthropic_toxic_prompts_token_set_recall": 0.31606308323792415, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 35000 }, { "epoch": 6.72, "eval_arxiv_accuracy": 0.31521875, "eval_arxiv_bleu_score": 3.486571756532496, "eval_arxiv_bleu_score_sem": 0.11443993388116293, "eval_arxiv_emb_cos_sim": 0.6798828840255737, "eval_arxiv_emb_cos_sim_sem": 0.008565536950094392, "eval_arxiv_emb_top1_equal": 0.1484375, "eval_arxiv_emb_top1_equal_sem": 0.031548465007086954, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.7692196369171143, "eval_arxiv_n_ngrams_match_1": 12.686, "eval_arxiv_n_ngrams_match_2": 2.23, "eval_arxiv_n_ngrams_match_3": 0.458, "eval_arxiv_num_pred_words": 39.274, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 43.346225839789405, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.29977012288826155, "eval_arxiv_runtime": 11.1014, "eval_arxiv_samples_per_second": 45.039, "eval_arxiv_steps_per_second": 0.09, "eval_arxiv_token_set_f1": 0.2976885077159939, "eval_arxiv_token_set_f1_sem": 0.00418356139217434, "eval_arxiv_token_set_precision": 0.24099664930764575, "eval_arxiv_token_set_recall": 0.418809256534706, "eval_arxiv_true_num_tokens": 64.0, "step": 35000 }, { "epoch": 6.72, "eval_python_code_alpaca_accuracy": 0.13825, "eval_python_code_alpaca_bleu_score": 3.212306300204904, "eval_python_code_alpaca_bleu_score_sem": 0.09695396669486495, "eval_python_code_alpaca_emb_cos_sim": 0.64945387840271, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010687332451955791, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.2819371223449707, "eval_python_code_alpaca_n_ngrams_match_1": 7.844, "eval_python_code_alpaca_n_ngrams_match_2": 1.864, "eval_python_code_alpaca_n_ngrams_match_3": 0.484, "eval_python_code_alpaca_num_pred_words": 42.6, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 26.62730311725348, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2625065191240496, "eval_python_code_alpaca_runtime": 11.0188, "eval_python_code_alpaca_samples_per_second": 45.377, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.40376819021253124, "eval_python_code_alpaca_token_set_f1_sem": 0.005833305827812379, "eval_python_code_alpaca_token_set_precision": 0.41775042920541255, "eval_python_code_alpaca_token_set_recall": 0.41977667646916167, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 35000 }, { "epoch": 6.72, "eval_wikibio_accuracy": 0.29409375, "eval_wikibio_bleu_score": 4.97081038203576, "eval_wikibio_bleu_score_sem": 0.18702653628909408, "eval_wikibio_emb_cos_sim": 0.6821502447128296, "eval_wikibio_emb_cos_sim_sem": 0.011951348707478394, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.175293922424316, "eval_wikibio_n_ngrams_match_1": 9.322, "eval_wikibio_n_ngrams_match_2": 2.962, "eval_wikibio_n_ngrams_match_3": 0.976, "eval_wikibio_num_pred_words": 36.47, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 65.05895913805116, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.31270342671269835, "eval_wikibio_runtime": 10.2647, "eval_wikibio_samples_per_second": 48.711, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.29586921961166124, "eval_wikibio_token_set_f1_sem": 0.005922116862736919, "eval_wikibio_token_set_precision": 0.3007645948794808, "eval_wikibio_token_set_recall": 0.3076653750410545, "eval_wikibio_true_num_tokens": 61.1328125, "step": 35000 }, { "epoch": 6.72, "eval_nq_accuracy": 0.486125, "eval_nq_bleu_score": 9.535454820459188, "eval_nq_bleu_score_sem": 0.41691831562754833, "eval_nq_emb_cos_sim": 0.7831763029098511, "eval_nq_emb_cos_sim_sem": 0.008866690012139399, "eval_nq_emb_top1_equal": 0.2109375, "eval_nq_emb_top1_equal_sem": 0.03620184850179216, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4910407066345215, "eval_nq_n_ngrams_match_1": 21.108, "eval_nq_n_ngrams_match_2": 7.026, "eval_nq_n_ngrams_match_3": 3.03, "eval_nq_num_pred_words": 48.928, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 12.073834904395445, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.40223704983995545, "eval_nq_runtime": 10.7933, "eval_nq_samples_per_second": 46.325, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.4235553063558491, "eval_nq_token_set_f1_sem": 0.004986664205900059, "eval_nq_token_set_precision": 0.3738815575146614, "eval_nq_token_set_recall": 0.4991581075893894, "eval_nq_true_num_tokens": 64.0, "step": 35000 }, { "epoch": 6.72, "learning_rate": 0.001, "loss": 2.8634, "step": 35004 }, { "epoch": 6.72, "learning_rate": 0.001, "loss": 2.8536, "step": 35016 }, { "epoch": 6.73, "learning_rate": 0.001, "loss": 2.8584, "step": 35028 }, { "epoch": 6.73, "learning_rate": 0.001, "loss": 2.8567, "step": 35040 }, { "epoch": 6.73, "learning_rate": 0.001, "loss": 2.8584, "step": 35052 }, { "epoch": 6.73, "learning_rate": 0.001, "loss": 2.8626, "step": 35064 }, { "epoch": 6.74, "learning_rate": 0.001, "loss": 2.8454, "step": 35076 }, { "epoch": 6.74, "learning_rate": 0.001, "loss": 2.8607, "step": 35088 }, { "epoch": 6.74, "learning_rate": 0.001, "loss": 2.8514, "step": 35100 }, { "epoch": 6.74, "learning_rate": 0.001, "loss": 2.8495, "step": 35112 }, { "epoch": 6.74, "learning_rate": 0.001, "loss": 2.8601, "step": 35124 }, { "epoch": 6.75, "learning_rate": 0.001, "loss": 2.8479, "step": 35136 }, { "epoch": 6.75, "learning_rate": 0.001, "loss": 2.8581, "step": 35148 }, { "epoch": 6.75, "learning_rate": 0.001, "loss": 2.8582, "step": 35160 }, { "epoch": 6.75, "learning_rate": 0.001, "loss": 2.85, "step": 35172 }, { "epoch": 6.76, "learning_rate": 0.001, "loss": 2.8455, "step": 35184 }, { "epoch": 6.76, "learning_rate": 0.001, "loss": 2.8609, "step": 35196 }, { "epoch": 6.76, "learning_rate": 0.001, "loss": 2.8598, "step": 35208 }, { "epoch": 6.76, "learning_rate": 0.001, "loss": 2.8527, "step": 35220 }, { "epoch": 6.76, "learning_rate": 0.001, "loss": 2.847, "step": 35232 }, { "epoch": 6.77, "learning_rate": 0.001, "loss": 2.8607, "step": 35244 }, { "epoch": 6.77, "learning_rate": 0.001, "loss": 2.8562, "step": 35256 }, { "epoch": 6.77, "learning_rate": 0.001, "loss": 2.8587, "step": 35268 }, { "epoch": 6.77, "learning_rate": 0.001, "loss": 2.8676, "step": 35280 }, { "epoch": 6.78, "learning_rate": 0.001, "loss": 2.8592, "step": 35292 }, { "epoch": 6.78, "learning_rate": 0.001, "loss": 2.8564, "step": 35304 }, { "epoch": 6.78, "learning_rate": 0.001, "loss": 2.846, "step": 35316 }, { "epoch": 6.78, "learning_rate": 0.001, "loss": 2.8488, "step": 35328 }, { "epoch": 6.79, "learning_rate": 0.001, "loss": 2.8617, "step": 35340 }, { "epoch": 6.79, "learning_rate": 0.001, "loss": 2.8433, "step": 35352 }, { "epoch": 6.79, "learning_rate": 0.001, "loss": 2.8579, "step": 35364 }, { "epoch": 6.79, "learning_rate": 0.001, "loss": 2.8581, "step": 35376 }, { "epoch": 6.79, "learning_rate": 0.001, "loss": 2.8591, "step": 35388 }, { "epoch": 6.8, "learning_rate": 0.001, "loss": 2.8593, "step": 35400 }, { "epoch": 6.8, "learning_rate": 0.001, "loss": 2.8516, "step": 35412 }, { "epoch": 6.8, "learning_rate": 0.001, "loss": 2.8501, "step": 35424 }, { "epoch": 6.8, "learning_rate": 0.001, "loss": 2.8415, "step": 35436 }, { "epoch": 6.81, "learning_rate": 0.001, "loss": 2.8656, "step": 35448 }, { "epoch": 6.81, "learning_rate": 0.001, "loss": 2.8542, "step": 35460 }, { "epoch": 6.81, "learning_rate": 0.001, "loss": 2.8538, "step": 35472 }, { "epoch": 6.81, "learning_rate": 0.001, "loss": 2.8559, "step": 35484 }, { "epoch": 6.82, "learning_rate": 0.001, "loss": 2.8585, "step": 35496 }, { "epoch": 6.82, "learning_rate": 0.001, "loss": 2.8572, "step": 35508 }, { "epoch": 6.82, "learning_rate": 0.001, "loss": 2.8563, "step": 35520 }, { "epoch": 6.82, "learning_rate": 0.001, "loss": 2.8456, "step": 35532 }, { "epoch": 6.82, "learning_rate": 0.001, "loss": 2.8541, "step": 35544 }, { "epoch": 6.83, "learning_rate": 0.001, "loss": 2.8548, "step": 35556 }, { "epoch": 6.83, "learning_rate": 0.001, "loss": 2.8595, "step": 35568 }, { "epoch": 6.83, "learning_rate": 0.001, "loss": 2.8608, "step": 35580 }, { "epoch": 6.83, "learning_rate": 0.001, "loss": 2.86, "step": 35592 }, { "epoch": 6.84, "learning_rate": 0.001, "loss": 2.8533, "step": 35604 }, { "epoch": 6.84, "learning_rate": 0.001, "loss": 2.8521, "step": 35616 }, { "epoch": 6.84, "eval_ag_news_accuracy": 0.29221875, "eval_ag_news_bleu_score": 4.033302907273738, "eval_ag_news_bleu_score_sem": 0.1416210678430828, "eval_ag_news_emb_cos_sim": 0.7565293312072754, "eval_ag_news_emb_cos_sim_sem": 0.007986794865589019, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.9059700965881348, "eval_ag_news_n_ngrams_match_1": 12.378, "eval_ag_news_n_ngrams_match_2": 2.482, "eval_ag_news_n_ngrams_match_3": 0.668, "eval_ag_news_num_pred_words": 45.898, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 49.6982686553891, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.30169908200188245, "eval_ag_news_runtime": 11.7446, "eval_ag_news_samples_per_second": 42.573, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.3151272655574294, "eval_ag_news_token_set_f1_sem": 0.0043363109390232865, "eval_ag_news_token_set_precision": 0.2887459213716017, "eval_ag_news_token_set_recall": 0.3646203924574114, "eval_ag_news_true_num_tokens": 56.09375, "step": 35625 }, { "epoch": 6.84, "eval_anthropic_toxic_prompts_accuracy": 0.0999375, "eval_anthropic_toxic_prompts_bleu_score": 2.4457907647733035, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09071905044115455, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6070419549942017, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010261396400904696, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.5999157428741455, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.152, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.382, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.45, "eval_anthropic_toxic_prompts_num_pred_words": 47.064, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 36.595150911539015, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.17732715009504396, "eval_anthropic_toxic_prompts_runtime": 10.355, "eval_anthropic_toxic_prompts_samples_per_second": 48.286, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3167116575512957, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006645970120400522, "eval_anthropic_toxic_prompts_token_set_precision": 0.3589571931969173, "eval_anthropic_toxic_prompts_token_set_recall": 0.31623793770486097, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 35625 }, { "epoch": 6.84, "eval_arxiv_accuracy": 0.31759375, "eval_arxiv_bleu_score": 3.596547915167506, "eval_arxiv_bleu_score_sem": 0.10501735834026857, "eval_arxiv_emb_cos_sim": 0.6719647645950317, "eval_arxiv_emb_cos_sim_sem": 0.008454620274248387, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.7631771564483643, "eval_arxiv_n_ngrams_match_1": 12.754, "eval_arxiv_n_ngrams_match_2": 2.27, "eval_arxiv_n_ngrams_match_3": 0.46, "eval_arxiv_num_pred_words": 39.976, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 43.08509684470342, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.30090602886694595, "eval_arxiv_runtime": 10.1914, "eval_arxiv_samples_per_second": 49.061, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3017335648735391, "eval_arxiv_token_set_f1_sem": 0.004063672816348415, "eval_arxiv_token_set_precision": 0.24524005137144733, "eval_arxiv_token_set_recall": 0.41690086063031406, "eval_arxiv_true_num_tokens": 64.0, "step": 35625 }, { "epoch": 6.84, "eval_python_code_alpaca_accuracy": 0.1419375, "eval_python_code_alpaca_bleu_score": 3.3131736778955037, "eval_python_code_alpaca_bleu_score_sem": 0.10066290300174355, "eval_python_code_alpaca_emb_cos_sim": 0.6628965139389038, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008923437450936777, "eval_python_code_alpaca_emb_top1_equal": 0.0625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.243400812149048, "eval_python_code_alpaca_n_ngrams_match_1": 7.802, "eval_python_code_alpaca_n_ngrams_match_2": 1.872, "eval_python_code_alpaca_n_ngrams_match_3": 0.474, "eval_python_code_alpaca_num_pred_words": 41.076, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 25.62070496143208, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2667085180924852, "eval_python_code_alpaca_runtime": 9.733, "eval_python_code_alpaca_samples_per_second": 51.372, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.40651029567760116, "eval_python_code_alpaca_token_set_f1_sem": 0.005650780074059194, "eval_python_code_alpaca_token_set_precision": 0.418461687800907, "eval_python_code_alpaca_token_set_recall": 0.426684185014457, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 35625 }, { "epoch": 6.84, "eval_wikibio_accuracy": 0.2973125, "eval_wikibio_bleu_score": 5.0371287381319, "eval_wikibio_bleu_score_sem": 0.19139549386612043, "eval_wikibio_emb_cos_sim": 0.6902219653129578, "eval_wikibio_emb_cos_sim_sem": 0.010707008444765298, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.148153781890869, "eval_wikibio_n_ngrams_match_1": 9.06, "eval_wikibio_n_ngrams_match_2": 2.838, "eval_wikibio_n_ngrams_match_3": 0.96, "eval_wikibio_num_pred_words": 35.634, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 63.31699533960944, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3129187764131648, "eval_wikibio_runtime": 10.2798, "eval_wikibio_samples_per_second": 48.639, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.29125349625234354, "eval_wikibio_token_set_f1_sem": 0.005700134087778099, "eval_wikibio_token_set_precision": 0.29512971044700426, "eval_wikibio_token_set_recall": 0.30567532670449404, "eval_wikibio_true_num_tokens": 61.1328125, "step": 35625 }, { "epoch": 6.84, "eval_nq_accuracy": 0.48709375, "eval_nq_bleu_score": 9.50862347290992, "eval_nq_bleu_score_sem": 0.3929691996343114, "eval_nq_emb_cos_sim": 0.7866525053977966, "eval_nq_emb_cos_sim_sem": 0.008427224164171798, "eval_nq_emb_top1_equal": 0.1484375, "eval_nq_emb_top1_equal_sem": 0.031548465007086954, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4843738079071045, "eval_nq_n_ngrams_match_1": 21.074, "eval_nq_n_ngrams_match_2": 7.102, "eval_nq_n_ngrams_match_3": 3.042, "eval_nq_num_pred_words": 49.09, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.993607600649542, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4013508693416742, "eval_nq_runtime": 10.8869, "eval_nq_samples_per_second": 45.927, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.42014996183504355, "eval_nq_token_set_f1_sem": 0.0048032256108574, "eval_nq_token_set_precision": 0.37189658457401875, "eval_nq_token_set_recall": 0.4938735890737705, "eval_nq_true_num_tokens": 64.0, "step": 35625 }, { "epoch": 6.84, "learning_rate": 0.001, "loss": 2.8578, "step": 35628 }, { "epoch": 6.84, "learning_rate": 0.001, "loss": 2.8646, "step": 35640 }, { "epoch": 6.85, "learning_rate": 0.001, "loss": 2.8592, "step": 35652 }, { "epoch": 6.85, "learning_rate": 0.001, "loss": 2.8525, "step": 35664 }, { "epoch": 6.85, "learning_rate": 0.001, "loss": 2.8558, "step": 35676 }, { "epoch": 6.85, "learning_rate": 0.001, "loss": 2.8457, "step": 35688 }, { "epoch": 6.85, "learning_rate": 0.001, "loss": 2.8419, "step": 35700 }, { "epoch": 6.86, "learning_rate": 0.001, "loss": 2.8466, "step": 35712 }, { "epoch": 6.86, "learning_rate": 0.001, "loss": 2.856, "step": 35724 }, { "epoch": 6.86, "learning_rate": 0.001, "loss": 2.854, "step": 35736 }, { "epoch": 6.86, "learning_rate": 0.001, "loss": 2.8529, "step": 35748 }, { "epoch": 6.87, "learning_rate": 0.001, "loss": 2.8482, "step": 35760 }, { "epoch": 6.87, "learning_rate": 0.001, "loss": 2.858, "step": 35772 }, { "epoch": 6.87, "learning_rate": 0.001, "loss": 2.8507, "step": 35784 }, { "epoch": 6.87, "learning_rate": 0.001, "loss": 2.8616, "step": 35796 }, { "epoch": 6.88, "learning_rate": 0.001, "loss": 2.8568, "step": 35808 }, { "epoch": 6.88, "learning_rate": 0.001, "loss": 2.8616, "step": 35820 }, { "epoch": 6.88, "learning_rate": 0.001, "loss": 2.8503, "step": 35832 }, { "epoch": 6.88, "learning_rate": 0.001, "loss": 2.8434, "step": 35844 }, { "epoch": 6.88, "learning_rate": 0.001, "loss": 2.8444, "step": 35856 }, { "epoch": 6.89, "learning_rate": 0.001, "loss": 2.8429, "step": 35868 }, { "epoch": 6.89, "learning_rate": 0.001, "loss": 2.8538, "step": 35880 }, { "epoch": 6.89, "learning_rate": 0.001, "loss": 2.8436, "step": 35892 }, { "epoch": 6.89, "learning_rate": 0.001, "loss": 2.8491, "step": 35904 }, { "epoch": 6.9, "learning_rate": 0.001, "loss": 2.8533, "step": 35916 }, { "epoch": 6.9, "learning_rate": 0.001, "loss": 2.8437, "step": 35928 }, { "epoch": 6.9, "learning_rate": 0.001, "loss": 2.8423, "step": 35940 }, { "epoch": 6.9, "learning_rate": 0.001, "loss": 2.8602, "step": 35952 }, { "epoch": 6.91, "learning_rate": 0.001, "loss": 2.8524, "step": 35964 }, { "epoch": 6.91, "learning_rate": 0.001, "loss": 2.852, "step": 35976 }, { "epoch": 6.91, "learning_rate": 0.001, "loss": 2.8552, "step": 35988 }, { "epoch": 6.91, "learning_rate": 0.001, "loss": 2.8545, "step": 36000 }, { "epoch": 6.91, "learning_rate": 0.001, "loss": 2.8462, "step": 36012 }, { "epoch": 6.92, "learning_rate": 0.001, "loss": 2.8455, "step": 36024 }, { "epoch": 6.92, "learning_rate": 0.001, "loss": 2.8533, "step": 36036 }, { "epoch": 6.92, "learning_rate": 0.001, "loss": 2.8509, "step": 36048 }, { "epoch": 6.92, "learning_rate": 0.001, "loss": 2.8451, "step": 36060 }, { "epoch": 6.93, "learning_rate": 0.001, "loss": 2.8382, "step": 36072 }, { "epoch": 6.93, "learning_rate": 0.001, "loss": 2.8433, "step": 36084 }, { "epoch": 6.93, "learning_rate": 0.001, "loss": 2.8458, "step": 36096 }, { "epoch": 6.93, "learning_rate": 0.001, "loss": 2.8464, "step": 36108 }, { "epoch": 6.94, "learning_rate": 0.001, "loss": 2.85, "step": 36120 }, { "epoch": 6.94, "learning_rate": 0.001, "loss": 2.8432, "step": 36132 }, { "epoch": 6.94, "learning_rate": 0.001, "loss": 2.8462, "step": 36144 }, { "epoch": 6.94, "learning_rate": 0.001, "loss": 2.85, "step": 36156 }, { "epoch": 6.94, "learning_rate": 0.001, "loss": 2.8524, "step": 36168 }, { "epoch": 6.95, "learning_rate": 0.001, "loss": 2.843, "step": 36180 }, { "epoch": 6.95, "learning_rate": 0.001, "loss": 2.8532, "step": 36192 }, { "epoch": 6.95, "learning_rate": 0.001, "loss": 2.8448, "step": 36204 }, { "epoch": 6.95, "learning_rate": 0.001, "loss": 2.8499, "step": 36216 }, { "epoch": 6.96, "learning_rate": 0.001, "loss": 2.8455, "step": 36228 }, { "epoch": 6.96, "learning_rate": 0.001, "loss": 2.8371, "step": 36240 }, { "epoch": 6.96, "eval_ag_news_accuracy": 0.29059375, "eval_ag_news_bleu_score": 4.001013091157865, "eval_ag_news_bleu_score_sem": 0.14201708960718484, "eval_ag_news_emb_cos_sim": 0.7538927793502808, "eval_ag_news_emb_cos_sim_sem": 0.008352503728662555, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.900416612625122, "eval_ag_news_n_ngrams_match_1": 12.128, "eval_ag_news_n_ngrams_match_2": 2.406, "eval_ag_news_n_ngrams_match_3": 0.624, "eval_ag_news_num_pred_words": 46.148, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 49.42303507742963, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.2973890165050359, "eval_ag_news_runtime": 10.7502, "eval_ag_news_samples_per_second": 46.511, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3108292272235678, "eval_ag_news_token_set_f1_sem": 0.004354173725518134, "eval_ag_news_token_set_precision": 0.28546916876106476, "eval_ag_news_token_set_recall": 0.35694876421234406, "eval_ag_news_true_num_tokens": 56.09375, "step": 36250 }, { "epoch": 6.96, "eval_anthropic_toxic_prompts_accuracy": 0.099625, "eval_anthropic_toxic_prompts_bleu_score": 2.4159594631973844, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10111036844234905, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6089364886283875, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010003519591201774, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.583393096923828, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.14, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.338, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.414, "eval_anthropic_toxic_prompts_num_pred_words": 47.616, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 35.99546998987554, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1792649419521139, "eval_anthropic_toxic_prompts_runtime": 10.3329, "eval_anthropic_toxic_prompts_samples_per_second": 48.389, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3109371902762712, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006244863566339072, "eval_anthropic_toxic_prompts_token_set_precision": 0.3576372670219784, "eval_anthropic_toxic_prompts_token_set_recall": 0.31091232640093097, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 36250 }, { "epoch": 6.96, "eval_arxiv_accuracy": 0.3146875, "eval_arxiv_bleu_score": 3.6703718121870126, "eval_arxiv_bleu_score_sem": 0.10700140531860355, "eval_arxiv_emb_cos_sim": 0.6836249828338623, "eval_arxiv_emb_cos_sim_sem": 0.008382395722424399, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.7564890384674072, "eval_arxiv_n_ngrams_match_1": 12.984, "eval_arxiv_n_ngrams_match_2": 2.346, "eval_arxiv_n_ngrams_match_3": 0.5, "eval_arxiv_num_pred_words": 40.41, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 42.7979001071644, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.30671241646693637, "eval_arxiv_runtime": 10.5603, "eval_arxiv_samples_per_second": 47.347, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.30646650884920795, "eval_arxiv_token_set_f1_sem": 0.003758961329871487, "eval_arxiv_token_set_precision": 0.24882269285300268, "eval_arxiv_token_set_recall": 0.4172174684794033, "eval_arxiv_true_num_tokens": 64.0, "step": 36250 }, { "epoch": 6.96, "eval_python_code_alpaca_accuracy": 0.14065625, "eval_python_code_alpaca_bleu_score": 3.2848233900059314, "eval_python_code_alpaca_bleu_score_sem": 0.10176746688337283, "eval_python_code_alpaca_emb_cos_sim": 0.6695069074630737, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00981511593436849, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.2280962467193604, "eval_python_code_alpaca_n_ngrams_match_1": 7.768, "eval_python_code_alpaca_n_ngrams_match_2": 1.9, "eval_python_code_alpaca_n_ngrams_match_3": 0.502, "eval_python_code_alpaca_num_pred_words": 42.712, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 25.231576522248368, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2609131432789731, "eval_python_code_alpaca_runtime": 9.9834, "eval_python_code_alpaca_samples_per_second": 50.083, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.40384864133353643, "eval_python_code_alpaca_token_set_f1_sem": 0.005792776192029661, "eval_python_code_alpaca_token_set_precision": 0.4170913553167784, "eval_python_code_alpaca_token_set_recall": 0.4216073861025638, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 36250 }, { "epoch": 6.96, "eval_wikibio_accuracy": 0.29371875, "eval_wikibio_bleu_score": 5.271636939003589, "eval_wikibio_bleu_score_sem": 0.19323567576436282, "eval_wikibio_emb_cos_sim": 0.7105895280838013, "eval_wikibio_emb_cos_sim_sem": 0.008956441067498839, "eval_wikibio_emb_top1_equal": 0.109375, "eval_wikibio_emb_top1_equal_sem": 0.027695207821224692, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.201203346252441, "eval_wikibio_n_ngrams_match_1": 9.732, "eval_wikibio_n_ngrams_match_2": 3.082, "eval_wikibio_n_ngrams_match_3": 1.07, "eval_wikibio_num_pred_words": 37.694, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 66.76662608915578, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32668907550985005, "eval_wikibio_runtime": 10.9455, "eval_wikibio_samples_per_second": 45.681, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.3046040829945229, "eval_wikibio_token_set_f1_sem": 0.0052864212939238405, "eval_wikibio_token_set_precision": 0.3145774256119936, "eval_wikibio_token_set_recall": 0.3093503197085187, "eval_wikibio_true_num_tokens": 61.1328125, "step": 36250 }, { "epoch": 6.96, "eval_nq_accuracy": 0.4876875, "eval_nq_bleu_score": 9.48210526660597, "eval_nq_bleu_score_sem": 0.40051845469570535, "eval_nq_emb_cos_sim": 0.7906562089920044, "eval_nq_emb_cos_sim_sem": 0.00784313667389513, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4820315837860107, "eval_nq_n_ngrams_match_1": 21.25, "eval_nq_n_ngrams_match_2": 7.096, "eval_nq_n_ngrams_match_3": 3.04, "eval_nq_num_pred_words": 48.962, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.965548756506747, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4038071437912738, "eval_nq_runtime": 11.5124, "eval_nq_samples_per_second": 43.432, "eval_nq_steps_per_second": 0.087, "eval_nq_token_set_f1": 0.42580213402058825, "eval_nq_token_set_f1_sem": 0.00475173811494621, "eval_nq_token_set_precision": 0.37531346390125303, "eval_nq_token_set_recall": 0.5037030093758051, "eval_nq_true_num_tokens": 64.0, "step": 36250 }, { "epoch": 6.96, "learning_rate": 0.001, "loss": 2.8489, "step": 36252 }, { "epoch": 6.96, "learning_rate": 0.001, "loss": 2.8478, "step": 36264 }, { "epoch": 6.97, "learning_rate": 0.001, "loss": 2.8597, "step": 36276 }, { "epoch": 6.97, "learning_rate": 0.001, "loss": 2.8406, "step": 36288 }, { "epoch": 6.97, "learning_rate": 0.001, "loss": 2.8589, "step": 36300 }, { "epoch": 6.97, "learning_rate": 0.001, "loss": 2.8369, "step": 36312 }, { "epoch": 6.97, "learning_rate": 0.001, "loss": 2.8513, "step": 36324 }, { "epoch": 6.98, "learning_rate": 0.001, "loss": 2.8321, "step": 36336 }, { "epoch": 6.98, "learning_rate": 0.001, "loss": 2.8498, "step": 36348 }, { "epoch": 6.98, "learning_rate": 0.001, "loss": 2.8382, "step": 36360 }, { "epoch": 6.98, "learning_rate": 0.001, "loss": 2.8383, "step": 36372 }, { "epoch": 6.99, "learning_rate": 0.001, "loss": 2.8456, "step": 36384 }, { "epoch": 6.99, "learning_rate": 0.001, "loss": 2.8407, "step": 36396 }, { "epoch": 6.99, "learning_rate": 0.001, "loss": 2.8485, "step": 36408 }, { "epoch": 6.99, "learning_rate": 0.001, "loss": 2.8449, "step": 36420 }, { "epoch": 7.0, "learning_rate": 0.001, "loss": 2.853, "step": 36432 }, { "epoch": 7.0, "learning_rate": 0.001, "loss": 2.8466, "step": 36444 }, { "epoch": 7.0, "learning_rate": 0.001, "loss": 2.8484, "step": 36456 }, { "epoch": 7.0, "learning_rate": 0.001, "loss": 2.843, "step": 36468 }, { "epoch": 7.0, "learning_rate": 0.001, "loss": 2.8334, "step": 36480 }, { "epoch": 7.01, "learning_rate": 0.001, "loss": 2.835, "step": 36492 }, { "epoch": 7.01, "learning_rate": 0.001, "loss": 2.8219, "step": 36504 }, { "epoch": 7.01, "learning_rate": 0.001, "loss": 2.8247, "step": 36516 }, { "epoch": 7.01, "learning_rate": 0.001, "loss": 2.8225, "step": 36528 }, { "epoch": 7.02, "learning_rate": 0.001, "loss": 2.817, "step": 36540 }, { "epoch": 7.02, "learning_rate": 0.001, "loss": 2.8337, "step": 36552 }, { "epoch": 7.02, "learning_rate": 0.001, "loss": 2.8255, "step": 36564 }, { "epoch": 7.02, "learning_rate": 0.001, "loss": 2.8304, "step": 36576 }, { "epoch": 7.03, "learning_rate": 0.001, "loss": 2.816, "step": 36588 }, { "epoch": 7.03, "learning_rate": 0.001, "loss": 2.8233, "step": 36600 }, { "epoch": 7.03, "learning_rate": 0.001, "loss": 2.8291, "step": 36612 }, { "epoch": 7.03, "learning_rate": 0.001, "loss": 2.83, "step": 36624 }, { "epoch": 7.03, "learning_rate": 0.001, "loss": 2.8223, "step": 36636 }, { "epoch": 7.04, "learning_rate": 0.001, "loss": 2.8189, "step": 36648 }, { "epoch": 7.04, "learning_rate": 0.001, "loss": 2.8093, "step": 36660 }, { "epoch": 7.04, "learning_rate": 0.001, "loss": 2.8338, "step": 36672 }, { "epoch": 7.04, "learning_rate": 0.001, "loss": 2.8249, "step": 36684 }, { "epoch": 7.05, "learning_rate": 0.001, "loss": 2.8358, "step": 36696 }, { "epoch": 7.05, "learning_rate": 0.001, "loss": 2.8367, "step": 36708 }, { "epoch": 7.05, "learning_rate": 0.001, "loss": 2.8302, "step": 36720 }, { "epoch": 7.05, "learning_rate": 0.001, "loss": 2.8186, "step": 36732 }, { "epoch": 7.06, "learning_rate": 0.001, "loss": 2.832, "step": 36744 }, { "epoch": 7.06, "learning_rate": 0.001, "loss": 2.8113, "step": 36756 }, { "epoch": 7.06, "learning_rate": 0.001, "loss": 2.8182, "step": 36768 }, { "epoch": 7.06, "learning_rate": 0.001, "loss": 2.8249, "step": 36780 }, { "epoch": 7.06, "learning_rate": 0.001, "loss": 2.8294, "step": 36792 }, { "epoch": 7.07, "learning_rate": 0.001, "loss": 2.8356, "step": 36804 }, { "epoch": 7.07, "learning_rate": 0.001, "loss": 2.8292, "step": 36816 }, { "epoch": 7.07, "learning_rate": 0.001, "loss": 2.8312, "step": 36828 }, { "epoch": 7.07, "learning_rate": 0.001, "loss": 2.8321, "step": 36840 }, { "epoch": 7.08, "learning_rate": 0.001, "loss": 2.8241, "step": 36852 }, { "epoch": 7.08, "learning_rate": 0.001, "loss": 2.8207, "step": 36864 }, { "epoch": 7.08, "eval_ag_news_accuracy": 0.29071875, "eval_ag_news_bleu_score": 4.141965842411604, "eval_ag_news_bleu_score_sem": 0.14642807861497975, "eval_ag_news_emb_cos_sim": 0.7600589990615845, "eval_ag_news_emb_cos_sim_sem": 0.008231787910537766, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.883575677871704, "eval_ag_news_n_ngrams_match_1": 12.31, "eval_ag_news_n_ngrams_match_2": 2.478, "eval_ag_news_n_ngrams_match_3": 0.67, "eval_ag_news_num_pred_words": 45.432, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 48.59767439815544, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3025313278235161, "eval_ag_news_runtime": 10.9462, "eval_ag_news_samples_per_second": 45.678, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.311197292571738, "eval_ag_news_token_set_f1_sem": 0.004346622013970811, "eval_ag_news_token_set_precision": 0.2888890686972827, "eval_ag_news_token_set_recall": 0.3517108184800994, "eval_ag_news_true_num_tokens": 56.09375, "step": 36875 }, { "epoch": 7.08, "eval_anthropic_toxic_prompts_accuracy": 0.1015, "eval_anthropic_toxic_prompts_bleu_score": 2.5673783394611256, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09869631468894487, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6269517540931702, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00992886501014333, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.581580877304077, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.342, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.474, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.482, "eval_anthropic_toxic_prompts_num_pred_words": 46.636, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 35.930297364329405, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1844880856392122, "eval_anthropic_toxic_prompts_runtime": 10.6424, "eval_anthropic_toxic_prompts_samples_per_second": 46.982, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.31599435498013273, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006232520767646726, "eval_anthropic_toxic_prompts_token_set_precision": 0.37148702586973925, "eval_anthropic_toxic_prompts_token_set_recall": 0.30336107624244457, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 36875 }, { "epoch": 7.08, "eval_arxiv_accuracy": 0.31634375, "eval_arxiv_bleu_score": 3.61060866537974, "eval_arxiv_bleu_score_sem": 0.10466893275717792, "eval_arxiv_emb_cos_sim": 0.6754040718078613, "eval_arxiv_emb_cos_sim_sem": 0.00744851836546566, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.7616000175476074, "eval_arxiv_n_ngrams_match_1": 12.778, "eval_arxiv_n_ngrams_match_2": 2.378, "eval_arxiv_n_ngrams_match_3": 0.464, "eval_arxiv_num_pred_words": 39.126, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 43.017199218494405, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.30578230002592977, "eval_arxiv_runtime": 10.2476, "eval_arxiv_samples_per_second": 48.792, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.30199824803188574, "eval_arxiv_token_set_f1_sem": 0.0037763345145071857, "eval_arxiv_token_set_precision": 0.24375933952164122, "eval_arxiv_token_set_recall": 0.41768886025886237, "eval_arxiv_true_num_tokens": 64.0, "step": 36875 }, { "epoch": 7.08, "eval_python_code_alpaca_accuracy": 0.1393125, "eval_python_code_alpaca_bleu_score": 3.4882156948923555, "eval_python_code_alpaca_bleu_score_sem": 0.11563514206491479, "eval_python_code_alpaca_emb_cos_sim": 0.6675550937652588, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010578084527893014, "eval_python_code_alpaca_emb_top1_equal": 0.0546875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.020175758285348722, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.244743585586548, "eval_python_code_alpaca_n_ngrams_match_1": 7.986, "eval_python_code_alpaca_n_ngrams_match_2": 1.942, "eval_python_code_alpaca_n_ngrams_match_3": 0.514, "eval_python_code_alpaca_num_pred_words": 41.336, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 25.65513087143046, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.27403475195271904, "eval_python_code_alpaca_runtime": 10.9427, "eval_python_code_alpaca_samples_per_second": 45.692, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.40519768568179015, "eval_python_code_alpaca_token_set_f1_sem": 0.0057966975821193995, "eval_python_code_alpaca_token_set_precision": 0.4252213515985516, "eval_python_code_alpaca_token_set_recall": 0.41795260782813726, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 36875 }, { "epoch": 7.08, "eval_wikibio_accuracy": 0.29296875, "eval_wikibio_bleu_score": 5.199053149502148, "eval_wikibio_bleu_score_sem": 0.18898659375276783, "eval_wikibio_emb_cos_sim": 0.7024262547492981, "eval_wikibio_emb_cos_sim_sem": 0.010916673211038804, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.163374423980713, "eval_wikibio_n_ngrams_match_1": 9.754, "eval_wikibio_n_ngrams_match_2": 3.068, "eval_wikibio_n_ngrams_match_3": 1.046, "eval_wikibio_num_pred_words": 37.276, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 64.28809227573059, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3276041095901913, "eval_wikibio_runtime": 10.0714, "eval_wikibio_samples_per_second": 49.645, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.30448842046652186, "eval_wikibio_token_set_f1_sem": 0.005315955675238802, "eval_wikibio_token_set_precision": 0.31493274930481996, "eval_wikibio_token_set_recall": 0.30684122258675334, "eval_wikibio_true_num_tokens": 61.1328125, "step": 36875 }, { "epoch": 7.08, "eval_nq_accuracy": 0.48971875, "eval_nq_bleu_score": 9.495678587851165, "eval_nq_bleu_score_sem": 0.4259901616668921, "eval_nq_emb_cos_sim": 0.7842116951942444, "eval_nq_emb_cos_sim_sem": 0.008655918232547972, "eval_nq_emb_top1_equal": 0.203125, "eval_nq_emb_top1_equal_sem": 0.03570055125142555, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.469813823699951, "eval_nq_n_ngrams_match_1": 20.948, "eval_nq_n_ngrams_match_2": 7.142, "eval_nq_n_ngrams_match_3": 3.06, "eval_nq_num_pred_words": 47.976, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.820245997113831, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4023191493701264, "eval_nq_runtime": 10.9667, "eval_nq_samples_per_second": 45.593, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4214398649034764, "eval_nq_token_set_f1_sem": 0.0049624740970446975, "eval_nq_token_set_precision": 0.3724223657562727, "eval_nq_token_set_recall": 0.5000541047100108, "eval_nq_true_num_tokens": 64.0, "step": 36875 }, { "epoch": 7.08, "learning_rate": 0.001, "loss": 2.8229, "step": 36876 }, { "epoch": 7.08, "learning_rate": 0.001, "loss": 2.8334, "step": 36888 }, { "epoch": 7.09, "learning_rate": 0.001, "loss": 2.8293, "step": 36900 }, { "epoch": 7.09, "learning_rate": 0.001, "loss": 2.8382, "step": 36912 }, { "epoch": 7.09, "learning_rate": 0.001, "loss": 2.8266, "step": 36924 }, { "epoch": 7.09, "learning_rate": 0.001, "loss": 2.8239, "step": 36936 }, { "epoch": 7.09, "learning_rate": 0.001, "loss": 2.8294, "step": 36948 }, { "epoch": 7.1, "learning_rate": 0.001, "loss": 2.8258, "step": 36960 }, { "epoch": 7.1, "learning_rate": 0.001, "loss": 2.8331, "step": 36972 }, { "epoch": 7.1, "learning_rate": 0.001, "loss": 2.8309, "step": 36984 }, { "epoch": 7.1, "learning_rate": 0.001, "loss": 2.8184, "step": 36996 }, { "epoch": 7.11, "learning_rate": 0.001, "loss": 2.8206, "step": 37008 }, { "epoch": 7.11, "learning_rate": 0.001, "loss": 2.8378, "step": 37020 }, { "epoch": 7.11, "learning_rate": 0.001, "loss": 2.8309, "step": 37032 }, { "epoch": 7.11, "learning_rate": 0.001, "loss": 2.8212, "step": 37044 }, { "epoch": 7.12, "learning_rate": 0.001, "loss": 2.8287, "step": 37056 }, { "epoch": 7.12, "learning_rate": 0.001, "loss": 2.8362, "step": 37068 }, { "epoch": 7.12, "learning_rate": 0.001, "loss": 2.8264, "step": 37080 }, { "epoch": 7.12, "learning_rate": 0.001, "loss": 2.8358, "step": 37092 }, { "epoch": 7.12, "learning_rate": 0.001, "loss": 2.8278, "step": 37104 }, { "epoch": 7.13, "learning_rate": 0.001, "loss": 2.8257, "step": 37116 }, { "epoch": 7.13, "learning_rate": 0.001, "loss": 2.8351, "step": 37128 }, { "epoch": 7.13, "learning_rate": 0.001, "loss": 2.8303, "step": 37140 }, { "epoch": 7.13, "learning_rate": 0.001, "loss": 2.8269, "step": 37152 }, { "epoch": 7.14, "learning_rate": 0.001, "loss": 2.8345, "step": 37164 }, { "epoch": 7.14, "learning_rate": 0.001, "loss": 2.8335, "step": 37176 }, { "epoch": 7.14, "learning_rate": 0.001, "loss": 2.834, "step": 37188 }, { "epoch": 7.14, "learning_rate": 0.001, "loss": 2.824, "step": 37200 }, { "epoch": 7.15, "learning_rate": 0.001, "loss": 2.8216, "step": 37212 }, { "epoch": 7.15, "learning_rate": 0.001, "loss": 2.8312, "step": 37224 }, { "epoch": 7.15, "learning_rate": 0.001, "loss": 2.8302, "step": 37236 }, { "epoch": 7.15, "learning_rate": 0.001, "loss": 2.83, "step": 37248 }, { "epoch": 7.15, "learning_rate": 0.001, "loss": 2.8333, "step": 37260 }, { "epoch": 7.16, "learning_rate": 0.001, "loss": 2.8228, "step": 37272 }, { "epoch": 7.16, "learning_rate": 0.001, "loss": 2.8243, "step": 37284 }, { "epoch": 7.16, "learning_rate": 0.001, "loss": 2.8237, "step": 37296 }, { "epoch": 7.16, "learning_rate": 0.001, "loss": 2.8381, "step": 37308 }, { "epoch": 7.17, "learning_rate": 0.001, "loss": 2.8253, "step": 37320 }, { "epoch": 7.17, "learning_rate": 0.001, "loss": 2.8203, "step": 37332 }, { "epoch": 7.17, "learning_rate": 0.001, "loss": 2.8171, "step": 37344 }, { "epoch": 7.17, "learning_rate": 0.001, "loss": 2.8216, "step": 37356 }, { "epoch": 7.18, "learning_rate": 0.001, "loss": 2.8243, "step": 37368 }, { "epoch": 7.18, "learning_rate": 0.001, "loss": 2.8259, "step": 37380 }, { "epoch": 7.18, "learning_rate": 0.001, "loss": 2.8358, "step": 37392 }, { "epoch": 7.18, "learning_rate": 0.001, "loss": 2.8213, "step": 37404 }, { "epoch": 7.18, "learning_rate": 0.001, "loss": 2.8266, "step": 37416 }, { "epoch": 7.19, "learning_rate": 0.001, "loss": 2.8401, "step": 37428 }, { "epoch": 7.19, "learning_rate": 0.001, "loss": 2.8282, "step": 37440 }, { "epoch": 7.19, "learning_rate": 0.001, "loss": 2.8371, "step": 37452 }, { "epoch": 7.19, "learning_rate": 0.001, "loss": 2.836, "step": 37464 }, { "epoch": 7.2, "learning_rate": 0.001, "loss": 2.823, "step": 37476 }, { "epoch": 7.2, "learning_rate": 0.001, "loss": 2.8384, "step": 37488 }, { "epoch": 7.2, "learning_rate": 0.001, "loss": 2.8208, "step": 37500 }, { "epoch": 7.2, "eval_ag_news_accuracy": 0.291875, "eval_ag_news_bleu_score": 4.075560197578808, "eval_ag_news_bleu_score_sem": 0.14055279167100063, "eval_ag_news_emb_cos_sim": 0.7409658432006836, "eval_ag_news_emb_cos_sim_sem": 0.009640743226822287, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.881091833114624, "eval_ag_news_n_ngrams_match_1": 12.298, "eval_ag_news_n_ngrams_match_2": 2.484, "eval_ag_news_n_ngrams_match_3": 0.682, "eval_ag_news_num_pred_words": 45.786, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 48.47711510665973, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.30378170198314647, "eval_ag_news_runtime": 12.221, "eval_ag_news_samples_per_second": 40.913, "eval_ag_news_steps_per_second": 0.082, "eval_ag_news_token_set_f1": 0.3154912392593799, "eval_ag_news_token_set_f1_sem": 0.004561078227661068, "eval_ag_news_token_set_precision": 0.2903076069345732, "eval_ag_news_token_set_recall": 0.3611364481283025, "eval_ag_news_true_num_tokens": 56.09375, "step": 37500 }, { "epoch": 7.2, "eval_anthropic_toxic_prompts_accuracy": 0.10059375, "eval_anthropic_toxic_prompts_bleu_score": 2.58843635674327, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10779527682021862, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6182723045349121, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009823784355727068, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.567587375640869, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.392, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.464, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.51, "eval_anthropic_toxic_prompts_num_pred_words": 48.234, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 35.431008237475915, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1856435062955619, "eval_anthropic_toxic_prompts_runtime": 10.3726, "eval_anthropic_toxic_prompts_samples_per_second": 48.204, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.3244347198169251, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006269069725487458, "eval_anthropic_toxic_prompts_token_set_precision": 0.37710394785701207, "eval_anthropic_toxic_prompts_token_set_recall": 0.3156356428491323, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 37500 }, { "epoch": 7.2, "eval_arxiv_accuracy": 0.31871875, "eval_arxiv_bleu_score": 3.564613700077253, "eval_arxiv_bleu_score_sem": 0.09531050104188801, "eval_arxiv_emb_cos_sim": 0.6837230324745178, "eval_arxiv_emb_cos_sim_sem": 0.00811848796211737, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.7428550720214844, "eval_arxiv_n_ngrams_match_1": 12.93, "eval_arxiv_n_ngrams_match_2": 2.36, "eval_arxiv_n_ngrams_match_3": 0.45, "eval_arxiv_num_pred_words": 39.578, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 42.21835470182647, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.30794959133345395, "eval_arxiv_runtime": 10.6342, "eval_arxiv_samples_per_second": 47.018, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.3062157910336005, "eval_arxiv_token_set_f1_sem": 0.003950539903797717, "eval_arxiv_token_set_precision": 0.24876342332733564, "eval_arxiv_token_set_recall": 0.4193676679084893, "eval_arxiv_true_num_tokens": 64.0, "step": 37500 }, { "epoch": 7.2, "eval_python_code_alpaca_accuracy": 0.14075, "eval_python_code_alpaca_bleu_score": 3.419494453444551, "eval_python_code_alpaca_bleu_score_sem": 0.10507328139052009, "eval_python_code_alpaca_emb_cos_sim": 0.6705363988876343, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010630884650909543, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.2194786071777344, "eval_python_code_alpaca_n_ngrams_match_1": 8.27, "eval_python_code_alpaca_n_ngrams_match_2": 2.044, "eval_python_code_alpaca_n_ngrams_match_3": 0.564, "eval_python_code_alpaca_num_pred_words": 43.53, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 25.015074100482465, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2729404149874883, "eval_python_code_alpaca_runtime": 10.1377, "eval_python_code_alpaca_samples_per_second": 49.321, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.41605554277387596, "eval_python_code_alpaca_token_set_f1_sem": 0.005467879657901665, "eval_python_code_alpaca_token_set_precision": 0.4406255824141954, "eval_python_code_alpaca_token_set_recall": 0.41881482072072057, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 37500 }, { "epoch": 7.2, "eval_wikibio_accuracy": 0.29240625, "eval_wikibio_bleu_score": 5.189507654660742, "eval_wikibio_bleu_score_sem": 0.1837294166051759, "eval_wikibio_emb_cos_sim": 0.7011621594429016, "eval_wikibio_emb_cos_sim_sem": 0.010774044324410892, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.191478729248047, "eval_wikibio_n_ngrams_match_1": 9.692, "eval_wikibio_n_ngrams_match_2": 3.042, "eval_wikibio_n_ngrams_match_3": 1.028, "eval_wikibio_num_pred_words": 37.5, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 66.1204930120782, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32669898414904897, "eval_wikibio_runtime": 10.4597, "eval_wikibio_samples_per_second": 47.802, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.3080096988415167, "eval_wikibio_token_set_f1_sem": 0.005330793027963762, "eval_wikibio_token_set_precision": 0.3151688113428011, "eval_wikibio_token_set_recall": 0.3171309092433026, "eval_wikibio_true_num_tokens": 61.1328125, "step": 37500 }, { "epoch": 7.2, "eval_nq_accuracy": 0.49121875, "eval_nq_bleu_score": 9.849262161468808, "eval_nq_bleu_score_sem": 0.40282802508063553, "eval_nq_emb_cos_sim": 0.7896238565444946, "eval_nq_emb_cos_sim_sem": 0.008385892593761787, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4660420417785645, "eval_nq_n_ngrams_match_1": 21.44, "eval_nq_n_ngrams_match_2": 7.312, "eval_nq_n_ngrams_match_3": 3.166, "eval_nq_num_pred_words": 48.942, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.775746580757938, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.40923723675163604, "eval_nq_runtime": 10.8654, "eval_nq_samples_per_second": 46.018, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.4318693510646818, "eval_nq_token_set_f1_sem": 0.004863742989369321, "eval_nq_token_set_precision": 0.3807189451573697, "eval_nq_token_set_recall": 0.5099779210025704, "eval_nq_true_num_tokens": 64.0, "step": 37500 }, { "epoch": 7.2, "learning_rate": 0.001, "loss": 2.8273, "step": 37512 }, { "epoch": 7.21, "learning_rate": 0.001, "loss": 2.8239, "step": 37524 }, { "epoch": 7.21, "learning_rate": 0.001, "loss": 2.8333, "step": 37536 }, { "epoch": 7.21, "learning_rate": 0.001, "loss": 2.8263, "step": 37548 }, { "epoch": 7.21, "learning_rate": 0.001, "loss": 2.829, "step": 37560 }, { "epoch": 7.21, "learning_rate": 0.001, "loss": 2.8368, "step": 37572 }, { "epoch": 7.22, "learning_rate": 0.001, "loss": 2.8205, "step": 37584 }, { "epoch": 7.22, "learning_rate": 0.001, "loss": 2.8289, "step": 37596 }, { "epoch": 7.22, "learning_rate": 0.001, "loss": 2.8197, "step": 37608 }, { "epoch": 7.22, "learning_rate": 0.001, "loss": 2.8303, "step": 37620 }, { "epoch": 7.23, "learning_rate": 0.001, "loss": 2.8327, "step": 37632 }, { "epoch": 7.23, "learning_rate": 0.001, "loss": 2.8223, "step": 37644 }, { "epoch": 7.23, "learning_rate": 0.001, "loss": 2.8316, "step": 37656 }, { "epoch": 7.23, "learning_rate": 0.001, "loss": 2.8262, "step": 37668 }, { "epoch": 7.24, "learning_rate": 0.001, "loss": 2.8349, "step": 37680 }, { "epoch": 7.24, "learning_rate": 0.001, "loss": 2.8337, "step": 37692 }, { "epoch": 7.24, "learning_rate": 0.001, "loss": 2.8323, "step": 37704 }, { "epoch": 7.24, "learning_rate": 0.001, "loss": 2.823, "step": 37716 }, { "epoch": 7.24, "learning_rate": 0.001, "loss": 2.8305, "step": 37728 }, { "epoch": 7.25, "learning_rate": 0.001, "loss": 2.8321, "step": 37740 }, { "epoch": 7.25, "learning_rate": 0.001, "loss": 2.8383, "step": 37752 }, { "epoch": 7.25, "learning_rate": 0.001, "loss": 2.8262, "step": 37764 }, { "epoch": 7.25, "learning_rate": 0.001, "loss": 2.8321, "step": 37776 }, { "epoch": 7.26, "learning_rate": 0.001, "loss": 2.828, "step": 37788 }, { "epoch": 7.26, "learning_rate": 0.001, "loss": 2.8239, "step": 37800 }, { "epoch": 7.26, "learning_rate": 0.001, "loss": 2.8257, "step": 37812 }, { "epoch": 7.26, "learning_rate": 0.001, "loss": 2.8249, "step": 37824 }, { "epoch": 7.26, "learning_rate": 0.001, "loss": 2.8192, "step": 37836 }, { "epoch": 7.27, "learning_rate": 0.001, "loss": 2.8276, "step": 37848 }, { "epoch": 7.27, "learning_rate": 0.001, "loss": 2.8194, "step": 37860 }, { "epoch": 7.27, "learning_rate": 0.001, "loss": 2.8247, "step": 37872 }, { "epoch": 7.27, "learning_rate": 0.001, "loss": 2.8227, "step": 37884 }, { "epoch": 7.28, "learning_rate": 0.001, "loss": 2.8239, "step": 37896 }, { "epoch": 7.28, "learning_rate": 0.001, "loss": 2.8291, "step": 37908 }, { "epoch": 7.28, "learning_rate": 0.001, "loss": 2.8142, "step": 37920 }, { "epoch": 7.28, "learning_rate": 0.001, "loss": 2.8255, "step": 37932 }, { "epoch": 7.29, "learning_rate": 0.001, "loss": 2.8271, "step": 37944 }, { "epoch": 7.29, "learning_rate": 0.001, "loss": 2.8185, "step": 37956 }, { "epoch": 7.29, "learning_rate": 0.001, "loss": 2.833, "step": 37968 }, { "epoch": 7.29, "learning_rate": 0.001, "loss": 2.8274, "step": 37980 }, { "epoch": 7.29, "learning_rate": 0.001, "loss": 2.8212, "step": 37992 }, { "epoch": 7.3, "learning_rate": 0.001, "loss": 2.8138, "step": 38004 }, { "epoch": 7.3, "learning_rate": 0.001, "loss": 2.8294, "step": 38016 }, { "epoch": 7.3, "learning_rate": 0.001, "loss": 2.8235, "step": 38028 }, { "epoch": 7.3, "learning_rate": 0.001, "loss": 2.8288, "step": 38040 }, { "epoch": 7.31, "learning_rate": 0.001, "loss": 2.8333, "step": 38052 }, { "epoch": 7.31, "learning_rate": 0.001, "loss": 2.8315, "step": 38064 }, { "epoch": 7.31, "learning_rate": 0.001, "loss": 2.8288, "step": 38076 }, { "epoch": 7.31, "learning_rate": 0.001, "loss": 2.8248, "step": 38088 }, { "epoch": 7.32, "learning_rate": 0.001, "loss": 2.8288, "step": 38100 }, { "epoch": 7.32, "learning_rate": 0.001, "loss": 2.8197, "step": 38112 }, { "epoch": 7.32, "learning_rate": 0.001, "loss": 2.8248, "step": 38124 }, { "epoch": 7.32, "eval_ag_news_accuracy": 0.291375, "eval_ag_news_bleu_score": 4.111927591650485, "eval_ag_news_bleu_score_sem": 0.13942959236006863, "eval_ag_news_emb_cos_sim": 0.7579980492591858, "eval_ag_news_emb_cos_sim_sem": 0.0078110662149008785, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8864200115203857, "eval_ag_news_n_ngrams_match_1": 12.37, "eval_ag_news_n_ngrams_match_2": 2.552, "eval_ag_news_n_ngrams_match_3": 0.724, "eval_ag_news_num_pred_words": 46.098, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 48.736099168485765, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.29970997289982537, "eval_ag_news_runtime": 10.6196, "eval_ag_news_samples_per_second": 47.083, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.3134195474069055, "eval_ag_news_token_set_f1_sem": 0.004421768378840445, "eval_ag_news_token_set_precision": 0.28808458414132176, "eval_ag_news_token_set_recall": 0.3613300297957331, "eval_ag_news_true_num_tokens": 56.09375, "step": 38125 }, { "epoch": 7.32, "eval_anthropic_toxic_prompts_accuracy": 0.10078125, "eval_anthropic_toxic_prompts_bleu_score": 2.5285294528171263, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10437604083084251, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6197388768196106, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009800552222574452, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.56309175491333, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.286, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.432, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.488, "eval_anthropic_toxic_prompts_num_pred_words": 47.19, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 35.27208136757842, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1833968358569168, "eval_anthropic_toxic_prompts_runtime": 10.2566, "eval_anthropic_toxic_prompts_samples_per_second": 48.749, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3225245430978992, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063695929910607945, "eval_anthropic_toxic_prompts_token_set_precision": 0.3682159403156869, "eval_anthropic_toxic_prompts_token_set_recall": 0.31861268834352285, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 38125 }, { "epoch": 7.32, "eval_arxiv_accuracy": 0.31896875, "eval_arxiv_bleu_score": 3.6042015729327757, "eval_arxiv_bleu_score_sem": 0.10093453348680773, "eval_arxiv_emb_cos_sim": 0.684664249420166, "eval_arxiv_emb_cos_sim_sem": 0.008271490900379536, "eval_arxiv_emb_top1_equal": 0.203125, "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.73223614692688, "eval_arxiv_n_ngrams_match_1": 13.178, "eval_arxiv_n_ngrams_match_2": 2.372, "eval_arxiv_n_ngrams_match_3": 0.458, "eval_arxiv_num_pred_words": 40.458, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 41.77241305650989, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3081740223146453, "eval_arxiv_runtime": 10.3119, "eval_arxiv_samples_per_second": 48.488, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.30799579401648186, "eval_arxiv_token_set_f1_sem": 0.00398755371803012, "eval_arxiv_token_set_precision": 0.25273382676480266, "eval_arxiv_token_set_recall": 0.4171221620812675, "eval_arxiv_true_num_tokens": 64.0, "step": 38125 }, { "epoch": 7.32, "eval_python_code_alpaca_accuracy": 0.1390625, "eval_python_code_alpaca_bleu_score": 3.1865279393231867, "eval_python_code_alpaca_bleu_score_sem": 0.09784036125422817, "eval_python_code_alpaca_emb_cos_sim": 0.6805405020713806, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009657885798909584, "eval_python_code_alpaca_emb_top1_equal": 0.046875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.01875615101164758, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.25486159324646, "eval_python_code_alpaca_n_ngrams_match_1": 8.106, "eval_python_code_alpaca_n_ngrams_match_2": 1.866, "eval_python_code_alpaca_n_ngrams_match_3": 0.428, "eval_python_code_alpaca_num_pred_words": 44.13, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 25.916027332550556, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.271618347647289, "eval_python_code_alpaca_runtime": 10.7784, "eval_python_code_alpaca_samples_per_second": 46.389, "eval_python_code_alpaca_steps_per_second": 0.093, "eval_python_code_alpaca_token_set_f1": 0.4130636735073163, "eval_python_code_alpaca_token_set_f1_sem": 0.005273878767899933, "eval_python_code_alpaca_token_set_precision": 0.43130803287374025, "eval_python_code_alpaca_token_set_recall": 0.4222864183185088, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 38125 }, { "epoch": 7.32, "eval_wikibio_accuracy": 0.29465625, "eval_wikibio_bleu_score": 5.271262261283782, "eval_wikibio_bleu_score_sem": 0.19646156731593353, "eval_wikibio_emb_cos_sim": 0.6982460021972656, "eval_wikibio_emb_cos_sim_sem": 0.010816347247970178, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.171474456787109, "eval_wikibio_n_ngrams_match_1": 9.568, "eval_wikibio_n_ngrams_match_2": 3.022, "eval_wikibio_n_ngrams_match_3": 1.098, "eval_wikibio_num_pred_words": 37.092, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 64.8109426260154, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3197709786753735, "eval_wikibio_runtime": 10.0987, "eval_wikibio_samples_per_second": 49.511, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.29769667041718517, "eval_wikibio_token_set_f1_sem": 0.00563261630225679, "eval_wikibio_token_set_precision": 0.30834025800554304, "eval_wikibio_token_set_recall": 0.3052756926770025, "eval_wikibio_true_num_tokens": 61.1328125, "step": 38125 }, { "epoch": 7.32, "eval_nq_accuracy": 0.49228125, "eval_nq_bleu_score": 9.778886937357722, "eval_nq_bleu_score_sem": 0.4131434524416253, "eval_nq_emb_cos_sim": 0.7817389965057373, "eval_nq_emb_cos_sim_sem": 0.008876008416927353, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4626686573028564, "eval_nq_n_ngrams_match_1": 21.194, "eval_nq_n_ngrams_match_2": 7.186, "eval_nq_n_ngrams_match_3": 3.146, "eval_nq_num_pred_words": 48.8, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.736089387140503, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.403009996745112, "eval_nq_runtime": 14.774, "eval_nq_samples_per_second": 33.843, "eval_nq_steps_per_second": 0.068, "eval_nq_token_set_f1": 0.4231008441320113, "eval_nq_token_set_f1_sem": 0.0050237176397528475, "eval_nq_token_set_precision": 0.3735959931421409, "eval_nq_token_set_recall": 0.5010985353875919, "eval_nq_true_num_tokens": 64.0, "step": 38125 }, { "epoch": 7.32, "learning_rate": 0.001, "loss": 2.8287, "step": 38136 }, { "epoch": 7.32, "learning_rate": 0.001, "loss": 2.832, "step": 38148 }, { "epoch": 7.33, "learning_rate": 0.001, "loss": 2.8315, "step": 38160 }, { "epoch": 7.33, "learning_rate": 0.001, "loss": 2.825, "step": 38172 }, { "epoch": 7.33, "learning_rate": 0.001, "loss": 2.8255, "step": 38184 }, { "epoch": 7.33, "learning_rate": 0.001, "loss": 2.8236, "step": 38196 }, { "epoch": 7.34, "learning_rate": 0.001, "loss": 2.8253, "step": 38208 }, { "epoch": 7.34, "learning_rate": 0.001, "loss": 2.8243, "step": 38220 }, { "epoch": 7.34, "learning_rate": 0.001, "loss": 2.8307, "step": 38232 }, { "epoch": 7.34, "learning_rate": 0.001, "loss": 2.8263, "step": 38244 }, { "epoch": 7.35, "learning_rate": 0.001, "loss": 2.832, "step": 38256 }, { "epoch": 7.35, "learning_rate": 0.001, "loss": 2.829, "step": 38268 }, { "epoch": 7.35, "learning_rate": 0.001, "loss": 2.807, "step": 38280 }, { "epoch": 7.35, "learning_rate": 0.001, "loss": 2.8258, "step": 38292 }, { "epoch": 7.35, "learning_rate": 0.001, "loss": 2.8217, "step": 38304 }, { "epoch": 7.36, "learning_rate": 0.001, "loss": 2.8187, "step": 38316 }, { "epoch": 7.36, "learning_rate": 0.001, "loss": 2.8341, "step": 38328 }, { "epoch": 7.36, "learning_rate": 0.001, "loss": 2.8234, "step": 38340 }, { "epoch": 7.36, "learning_rate": 0.001, "loss": 2.8202, "step": 38352 }, { "epoch": 7.37, "learning_rate": 0.001, "loss": 2.8275, "step": 38364 }, { "epoch": 7.37, "learning_rate": 0.001, "loss": 2.8347, "step": 38376 }, { "epoch": 7.37, "learning_rate": 0.001, "loss": 2.817, "step": 38388 }, { "epoch": 7.37, "learning_rate": 0.001, "loss": 2.8151, "step": 38400 }, { "epoch": 7.38, "learning_rate": 0.001, "loss": 2.826, "step": 38412 }, { "epoch": 7.38, "learning_rate": 0.001, "loss": 2.8238, "step": 38424 }, { "epoch": 7.38, "learning_rate": 0.001, "loss": 2.8125, "step": 38436 }, { "epoch": 7.38, "learning_rate": 0.001, "loss": 2.8216, "step": 38448 }, { "epoch": 7.38, "learning_rate": 0.001, "loss": 2.815, "step": 38460 }, { "epoch": 7.39, "learning_rate": 0.001, "loss": 2.825, "step": 38472 }, { "epoch": 7.39, "learning_rate": 0.001, "loss": 2.8262, "step": 38484 }, { "epoch": 7.39, "learning_rate": 0.001, "loss": 2.8249, "step": 38496 }, { "epoch": 7.39, "learning_rate": 0.001, "loss": 2.8236, "step": 38508 }, { "epoch": 7.4, "learning_rate": 0.001, "loss": 2.8132, "step": 38520 }, { "epoch": 7.4, "learning_rate": 0.001, "loss": 2.8217, "step": 38532 }, { "epoch": 7.4, "learning_rate": 0.001, "loss": 2.8201, "step": 38544 }, { "epoch": 7.4, "learning_rate": 0.001, "loss": 2.8233, "step": 38556 }, { "epoch": 7.41, "learning_rate": 0.001, "loss": 2.8216, "step": 38568 }, { "epoch": 7.41, "learning_rate": 0.001, "loss": 2.8185, "step": 38580 }, { "epoch": 7.41, "learning_rate": 0.001, "loss": 2.8185, "step": 38592 }, { "epoch": 7.41, "learning_rate": 0.001, "loss": 2.8179, "step": 38604 }, { "epoch": 7.41, "learning_rate": 0.001, "loss": 2.8314, "step": 38616 }, { "epoch": 7.42, "learning_rate": 0.001, "loss": 2.8191, "step": 38628 }, { "epoch": 7.42, "learning_rate": 0.001, "loss": 2.8225, "step": 38640 }, { "epoch": 7.42, "learning_rate": 0.001, "loss": 2.8214, "step": 38652 }, { "epoch": 7.42, "learning_rate": 0.001, "loss": 2.8172, "step": 38664 }, { "epoch": 7.43, "learning_rate": 0.001, "loss": 2.8142, "step": 38676 }, { "epoch": 7.43, "learning_rate": 0.001, "loss": 2.8199, "step": 38688 }, { "epoch": 7.43, "learning_rate": 0.001, "loss": 2.8232, "step": 38700 }, { "epoch": 7.43, "learning_rate": 0.001, "loss": 2.8322, "step": 38712 }, { "epoch": 7.44, "learning_rate": 0.001, "loss": 2.8291, "step": 38724 }, { "epoch": 7.44, "learning_rate": 0.001, "loss": 2.8145, "step": 38736 }, { "epoch": 7.44, "learning_rate": 0.001, "loss": 2.8183, "step": 38748 }, { "epoch": 7.44, "eval_ag_news_accuracy": 0.29221875, "eval_ag_news_bleu_score": 4.092322841649292, "eval_ag_news_bleu_score_sem": 0.13873710704880135, "eval_ag_news_emb_cos_sim": 0.7574640512466431, "eval_ag_news_emb_cos_sim_sem": 0.007962351010541716, "eval_ag_news_emb_top1_equal": 0.140625, "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8607640266418457, "eval_ag_news_n_ngrams_match_1": 12.348, "eval_ag_news_n_ngrams_match_2": 2.478, "eval_ag_news_n_ngrams_match_3": 0.676, "eval_ag_news_num_pred_words": 45.734, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 47.50163001902676, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3018411152657613, "eval_ag_news_runtime": 11.495, "eval_ag_news_samples_per_second": 43.497, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.3163764829339234, "eval_ag_news_token_set_f1_sem": 0.004349552993178556, "eval_ag_news_token_set_precision": 0.29036863620803255, "eval_ag_news_token_set_recall": 0.36311352940842073, "eval_ag_news_true_num_tokens": 56.09375, "step": 38750 }, { "epoch": 7.44, "eval_anthropic_toxic_prompts_accuracy": 0.10159375, "eval_anthropic_toxic_prompts_bleu_score": 2.503779413713762, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09726843972739116, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6251869201660156, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009606774184011522, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.5444931983947754, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.338, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.418, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.452, "eval_anthropic_toxic_prompts_num_pred_words": 47.476, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 34.62213434193815, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.18346216916206595, "eval_anthropic_toxic_prompts_runtime": 9.9524, "eval_anthropic_toxic_prompts_samples_per_second": 50.239, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3175617659110513, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005847210547486618, "eval_anthropic_toxic_prompts_token_set_precision": 0.37391020862000574, "eval_anthropic_toxic_prompts_token_set_recall": 0.3082319678316208, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 38750 }, { "epoch": 7.44, "eval_arxiv_accuracy": 0.31990625, "eval_arxiv_bleu_score": 3.6723109283666857, "eval_arxiv_bleu_score_sem": 0.10926994837007345, "eval_arxiv_emb_cos_sim": 0.683717668056488, "eval_arxiv_emb_cos_sim_sem": 0.008375941986246168, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.735908269882202, "eval_arxiv_n_ngrams_match_1": 13.108, "eval_arxiv_n_ngrams_match_2": 2.36, "eval_arxiv_n_ngrams_match_3": 0.466, "eval_arxiv_num_pred_words": 39.274, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 41.926088478229644, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.31073900886607725, "eval_arxiv_runtime": 10.6028, "eval_arxiv_samples_per_second": 47.158, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.31002259454764525, "eval_arxiv_token_set_f1_sem": 0.0040055636315141354, "eval_arxiv_token_set_precision": 0.253605436479157, "eval_arxiv_token_set_recall": 0.41870470996861786, "eval_arxiv_true_num_tokens": 64.0, "step": 38750 }, { "epoch": 7.44, "eval_python_code_alpaca_accuracy": 0.1410625, "eval_python_code_alpaca_bleu_score": 3.6544305731855826, "eval_python_code_alpaca_bleu_score_sem": 0.1204447519376356, "eval_python_code_alpaca_emb_cos_sim": 0.6821444034576416, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00996385084567442, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.227888822555542, "eval_python_code_alpaca_n_ngrams_match_1": 8.356, "eval_python_code_alpaca_n_ngrams_match_2": 2.092, "eval_python_code_alpaca_n_ngrams_match_3": 0.558, "eval_python_code_alpaca_num_pred_words": 42.0, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 25.226343426340456, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2830427325951065, "eval_python_code_alpaca_runtime": 10.2567, "eval_python_code_alpaca_samples_per_second": 48.749, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.41815996751662404, "eval_python_code_alpaca_token_set_f1_sem": 0.005375304110234739, "eval_python_code_alpaca_token_set_precision": 0.4452742185040244, "eval_python_code_alpaca_token_set_recall": 0.4220512123802343, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 38750 }, { "epoch": 7.44, "eval_wikibio_accuracy": 0.29215625, "eval_wikibio_bleu_score": 5.254225044681138, "eval_wikibio_bleu_score_sem": 0.179976243470723, "eval_wikibio_emb_cos_sim": 0.6872029304504395, "eval_wikibio_emb_cos_sim_sem": 0.010441273881993127, "eval_wikibio_emb_top1_equal": 0.09375, "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.18137264251709, "eval_wikibio_n_ngrams_match_1": 9.648, "eval_wikibio_n_ngrams_match_2": 3.068, "eval_wikibio_n_ngrams_match_3": 1.054, "eval_wikibio_num_pred_words": 36.922, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 65.45563877093632, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.325080923640121, "eval_wikibio_runtime": 10.5243, "eval_wikibio_samples_per_second": 47.509, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.3024434052551784, "eval_wikibio_token_set_f1_sem": 0.005322398063439081, "eval_wikibio_token_set_precision": 0.30908104158307004, "eval_wikibio_token_set_recall": 0.31218924436671847, "eval_wikibio_true_num_tokens": 61.1328125, "step": 38750 }, { "epoch": 7.44, "eval_nq_accuracy": 0.49053125, "eval_nq_bleu_score": 9.805626824020768, "eval_nq_bleu_score_sem": 0.4191953099360163, "eval_nq_emb_cos_sim": 0.7905921936035156, "eval_nq_emb_cos_sim_sem": 0.008200836977207087, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4598374366760254, "eval_nq_n_ngrams_match_1": 21.226, "eval_nq_n_ngrams_match_2": 7.172, "eval_nq_n_ngrams_match_3": 3.116, "eval_nq_num_pred_words": 48.53, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.702908921562596, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.40809499842489294, "eval_nq_runtime": 10.5915, "eval_nq_samples_per_second": 47.208, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.42651127835422487, "eval_nq_token_set_f1_sem": 0.004904623713934699, "eval_nq_token_set_precision": 0.3773730221591363, "eval_nq_token_set_recall": 0.5004069504882075, "eval_nq_true_num_tokens": 64.0, "step": 38750 }, { "epoch": 7.44, "learning_rate": 0.001, "loss": 2.8275, "step": 38760 }, { "epoch": 7.44, "learning_rate": 0.001, "loss": 2.8203, "step": 38772 }, { "epoch": 7.45, "learning_rate": 0.001, "loss": 2.8069, "step": 38784 }, { "epoch": 7.45, "learning_rate": 0.001, "loss": 2.8236, "step": 38796 }, { "epoch": 7.45, "learning_rate": 0.001, "loss": 2.8185, "step": 38808 }, { "epoch": 7.45, "learning_rate": 0.001, "loss": 2.8194, "step": 38820 }, { "epoch": 7.46, "learning_rate": 0.001, "loss": 2.8139, "step": 38832 }, { "epoch": 7.46, "learning_rate": 0.001, "loss": 2.8256, "step": 38844 }, { "epoch": 7.46, "learning_rate": 0.001, "loss": 2.813, "step": 38856 }, { "epoch": 7.46, "learning_rate": 0.001, "loss": 2.8113, "step": 38868 }, { "epoch": 7.47, "learning_rate": 0.001, "loss": 2.8201, "step": 38880 }, { "epoch": 7.47, "learning_rate": 0.001, "loss": 2.8146, "step": 38892 }, { "epoch": 7.47, "learning_rate": 0.001, "loss": 2.8161, "step": 38904 }, { "epoch": 7.47, "learning_rate": 0.001, "loss": 2.8254, "step": 38916 }, { "epoch": 7.47, "learning_rate": 0.001, "loss": 2.8223, "step": 38928 }, { "epoch": 7.48, "learning_rate": 0.001, "loss": 2.8317, "step": 38940 }, { "epoch": 7.48, "learning_rate": 0.001, "loss": 2.8283, "step": 38952 }, { "epoch": 7.48, "learning_rate": 0.001, "loss": 2.8192, "step": 38964 }, { "epoch": 7.48, "learning_rate": 0.001, "loss": 2.8155, "step": 38976 }, { "epoch": 7.49, "learning_rate": 0.001, "loss": 2.8107, "step": 38988 }, { "epoch": 7.49, "learning_rate": 0.001, "loss": 2.8153, "step": 39000 }, { "epoch": 7.49, "learning_rate": 0.001, "loss": 2.8124, "step": 39012 }, { "epoch": 7.49, "learning_rate": 0.001, "loss": 2.8276, "step": 39024 }, { "epoch": 7.5, "learning_rate": 0.001, "loss": 2.8281, "step": 39036 }, { "epoch": 7.5, "learning_rate": 0.001, "loss": 2.8193, "step": 39048 }, { "epoch": 7.5, "learning_rate": 0.001, "loss": 2.8259, "step": 39060 }, { "epoch": 7.5, "learning_rate": 0.001, "loss": 2.8145, "step": 39072 }, { "epoch": 7.5, "learning_rate": 0.001, "loss": 2.8225, "step": 39084 }, { "epoch": 7.51, "learning_rate": 0.001, "loss": 2.8141, "step": 39096 }, { "epoch": 7.51, "learning_rate": 0.001, "loss": 2.8303, "step": 39108 }, { "epoch": 7.51, "learning_rate": 0.001, "loss": 2.8158, "step": 39120 }, { "epoch": 7.51, "learning_rate": 0.001, "loss": 2.8204, "step": 39132 }, { "epoch": 7.52, "learning_rate": 0.001, "loss": 2.8237, "step": 39144 }, { "epoch": 7.52, "learning_rate": 0.001, "loss": 2.8171, "step": 39156 }, { "epoch": 7.52, "learning_rate": 0.001, "loss": 2.8067, "step": 39168 }, { "epoch": 7.52, "learning_rate": 0.001, "loss": 2.815, "step": 39180 }, { "epoch": 7.53, "learning_rate": 0.001, "loss": 2.8203, "step": 39192 }, { "epoch": 7.53, "learning_rate": 0.001, "loss": 2.813, "step": 39204 }, { "epoch": 7.53, "learning_rate": 0.001, "loss": 2.8213, "step": 39216 }, { "epoch": 7.53, "learning_rate": 0.001, "loss": 2.8127, "step": 39228 }, { "epoch": 7.53, "learning_rate": 0.001, "loss": 2.8122, "step": 39240 }, { "epoch": 7.54, "learning_rate": 0.001, "loss": 2.8169, "step": 39252 }, { "epoch": 7.54, "learning_rate": 0.001, "loss": 2.8127, "step": 39264 }, { "epoch": 7.54, "learning_rate": 0.001, "loss": 2.8191, "step": 39276 }, { "epoch": 7.54, "learning_rate": 0.001, "loss": 2.8175, "step": 39288 }, { "epoch": 7.55, "learning_rate": 0.001, "loss": 2.8263, "step": 39300 }, { "epoch": 7.55, "learning_rate": 0.001, "loss": 2.8166, "step": 39312 }, { "epoch": 7.55, "learning_rate": 0.001, "loss": 2.8157, "step": 39324 }, { "epoch": 7.55, "learning_rate": 0.001, "loss": 2.8152, "step": 39336 }, { "epoch": 7.56, "learning_rate": 0.001, "loss": 2.8113, "step": 39348 }, { "epoch": 7.56, "learning_rate": 0.001, "loss": 2.8274, "step": 39360 }, { "epoch": 7.56, "learning_rate": 0.001, "loss": 2.8222, "step": 39372 }, { "epoch": 7.56, "eval_ag_news_accuracy": 0.2934375, "eval_ag_news_bleu_score": 4.232245651322968, "eval_ag_news_bleu_score_sem": 0.13472187155546678, "eval_ag_news_emb_cos_sim": 0.7605491280555725, "eval_ag_news_emb_cos_sim_sem": 0.008212281822211318, "eval_ag_news_emb_top1_equal": 0.1796875, "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8588790893554688, "eval_ag_news_n_ngrams_match_1": 12.464, "eval_ag_news_n_ngrams_match_2": 2.56, "eval_ag_news_n_ngrams_match_3": 0.708, "eval_ag_news_num_pred_words": 45.63, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 47.41217675881853, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.30793974285537384, "eval_ag_news_runtime": 11.0359, "eval_ag_news_samples_per_second": 45.307, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.31833152832966594, "eval_ag_news_token_set_f1_sem": 0.004331108932723995, "eval_ag_news_token_set_precision": 0.29271179587824764, "eval_ag_news_token_set_recall": 0.3666719128884171, "eval_ag_news_true_num_tokens": 56.09375, "step": 39375 }, { "epoch": 7.56, "eval_anthropic_toxic_prompts_accuracy": 0.10053125, "eval_anthropic_toxic_prompts_bleu_score": 2.501564855564238, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09554543720920272, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6118472814559937, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01040809575113645, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.561389684677124, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.24, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.414, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.462, "eval_anthropic_toxic_prompts_num_pred_words": 46.39, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 35.21209687110814, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1838394200757527, "eval_anthropic_toxic_prompts_runtime": 10.0901, "eval_anthropic_toxic_prompts_samples_per_second": 49.553, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.3112454745607655, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006341604969589165, "eval_anthropic_toxic_prompts_token_set_precision": 0.3638612525653445, "eval_anthropic_toxic_prompts_token_set_recall": 0.3025721379702232, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 39375 }, { "epoch": 7.56, "eval_arxiv_accuracy": 0.32, "eval_arxiv_bleu_score": 3.6777920054093856, "eval_arxiv_bleu_score_sem": 0.11388419298267789, "eval_arxiv_emb_cos_sim": 0.6902914643287659, "eval_arxiv_emb_cos_sim_sem": 0.008033634502263798, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.7115771770477295, "eval_arxiv_n_ngrams_match_1": 13.036, "eval_arxiv_n_ngrams_match_2": 2.39, "eval_arxiv_n_ngrams_match_3": 0.506, "eval_arxiv_num_pred_words": 39.628, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 40.918291051345854, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3072787419538987, "eval_arxiv_runtime": 10.6262, "eval_arxiv_samples_per_second": 47.054, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.31042993335090185, "eval_arxiv_token_set_f1_sem": 0.004080640908553339, "eval_arxiv_token_set_precision": 0.25353127484434584, "eval_arxiv_token_set_recall": 0.4239575036633763, "eval_arxiv_true_num_tokens": 64.0, "step": 39375 }, { "epoch": 7.56, "eval_python_code_alpaca_accuracy": 0.141625, "eval_python_code_alpaca_bleu_score": 3.5105260376012732, "eval_python_code_alpaca_bleu_score_sem": 0.10893562223743992, "eval_python_code_alpaca_emb_cos_sim": 0.6751098036766052, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009235796306329617, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.2059597969055176, "eval_python_code_alpaca_n_ngrams_match_1": 8.188, "eval_python_code_alpaca_n_ngrams_match_2": 2.02, "eval_python_code_alpaca_n_ngrams_match_3": 0.576, "eval_python_code_alpaca_num_pred_words": 43.474, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 24.67917564915801, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.27569497009935495, "eval_python_code_alpaca_runtime": 10.1835, "eval_python_code_alpaca_samples_per_second": 49.099, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4221533717590738, "eval_python_code_alpaca_token_set_f1_sem": 0.005083677924871681, "eval_python_code_alpaca_token_set_precision": 0.44041471237088187, "eval_python_code_alpaca_token_set_recall": 0.43207969115662226, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 39375 }, { "epoch": 7.56, "eval_wikibio_accuracy": 0.29253125, "eval_wikibio_bleu_score": 5.460954327502383, "eval_wikibio_bleu_score_sem": 0.18422578950178453, "eval_wikibio_emb_cos_sim": 0.7087117433547974, "eval_wikibio_emb_cos_sim_sem": 0.010381389137159719, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.178219318389893, "eval_wikibio_n_ngrams_match_1": 9.854, "eval_wikibio_n_ngrams_match_2": 3.244, "eval_wikibio_n_ngrams_match_3": 1.11, "eval_wikibio_num_pred_words": 37.582, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 65.24956101168432, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3318237601867632, "eval_wikibio_runtime": 11.0453, "eval_wikibio_samples_per_second": 45.268, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.3089731518117434, "eval_wikibio_token_set_f1_sem": 0.0049584876825302625, "eval_wikibio_token_set_precision": 0.31891424363255483, "eval_wikibio_token_set_recall": 0.31357305002437286, "eval_wikibio_true_num_tokens": 61.1328125, "step": 39375 }, { "epoch": 7.56, "eval_nq_accuracy": 0.49253125, "eval_nq_bleu_score": 9.833208098369386, "eval_nq_bleu_score_sem": 0.42513112227800615, "eval_nq_emb_cos_sim": 0.7943933010101318, "eval_nq_emb_cos_sim_sem": 0.007816172569015777, "eval_nq_emb_top1_equal": 0.1953125, "eval_nq_emb_top1_equal_sem": 0.035178457165496856, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.453270196914673, "eval_nq_n_ngrams_match_1": 21.134, "eval_nq_n_ngrams_match_2": 7.224, "eval_nq_n_ngrams_match_3": 3.188, "eval_nq_num_pred_words": 48.602, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.626304926489219, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4037706010589045, "eval_nq_runtime": 11.1033, "eval_nq_samples_per_second": 45.032, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.4258722839505337, "eval_nq_token_set_f1_sem": 0.0050227033138624125, "eval_nq_token_set_precision": 0.37511438114790796, "eval_nq_token_set_recall": 0.5048118445670626, "eval_nq_true_num_tokens": 64.0, "step": 39375 }, { "epoch": 7.56, "learning_rate": 0.001, "loss": 2.831, "step": 39384 }, { "epoch": 7.56, "learning_rate": 0.001, "loss": 2.8308, "step": 39396 }, { "epoch": 7.57, "learning_rate": 0.001, "loss": 2.8288, "step": 39408 }, { "epoch": 7.57, "learning_rate": 0.001, "loss": 2.8161, "step": 39420 }, { "epoch": 7.57, "learning_rate": 0.001, "loss": 2.8104, "step": 39432 }, { "epoch": 7.57, "learning_rate": 0.001, "loss": 2.8054, "step": 39444 }, { "epoch": 7.58, "learning_rate": 0.001, "loss": 2.8279, "step": 39456 }, { "epoch": 7.58, "learning_rate": 0.001, "loss": 2.82, "step": 39468 }, { "epoch": 7.58, "learning_rate": 0.001, "loss": 2.8221, "step": 39480 }, { "epoch": 7.58, "learning_rate": 0.001, "loss": 2.8232, "step": 39492 }, { "epoch": 7.59, "learning_rate": 0.001, "loss": 2.8194, "step": 39504 }, { "epoch": 7.59, "learning_rate": 0.001, "loss": 2.838, "step": 39516 }, { "epoch": 7.59, "learning_rate": 0.001, "loss": 2.8254, "step": 39528 }, { "epoch": 7.59, "learning_rate": 0.001, "loss": 2.8267, "step": 39540 }, { "epoch": 7.59, "learning_rate": 0.001, "loss": 2.8236, "step": 39552 }, { "epoch": 7.6, "learning_rate": 0.001, "loss": 2.8241, "step": 39564 }, { "epoch": 7.6, "learning_rate": 0.001, "loss": 2.8128, "step": 39576 }, { "epoch": 7.6, "learning_rate": 0.001, "loss": 2.8207, "step": 39588 }, { "epoch": 7.6, "learning_rate": 0.001, "loss": 2.8099, "step": 39600 }, { "epoch": 7.61, "learning_rate": 0.001, "loss": 2.8089, "step": 39612 }, { "epoch": 7.61, "learning_rate": 0.001, "loss": 2.8153, "step": 39624 }, { "epoch": 7.61, "learning_rate": 0.001, "loss": 2.8131, "step": 39636 }, { "epoch": 7.61, "learning_rate": 0.001, "loss": 2.8234, "step": 39648 }, { "epoch": 7.62, "learning_rate": 0.001, "loss": 2.8111, "step": 39660 }, { "epoch": 7.62, "learning_rate": 0.001, "loss": 2.8163, "step": 39672 }, { "epoch": 7.62, "learning_rate": 0.001, "loss": 2.8207, "step": 39684 }, { "epoch": 7.62, "learning_rate": 0.001, "loss": 2.8124, "step": 39696 }, { "epoch": 7.62, "learning_rate": 0.001, "loss": 2.8207, "step": 39708 }, { "epoch": 7.63, "learning_rate": 0.001, "loss": 2.817, "step": 39720 }, { "epoch": 7.63, "learning_rate": 0.001, "loss": 2.814, "step": 39732 }, { "epoch": 7.63, "learning_rate": 0.001, "loss": 2.815, "step": 39744 }, { "epoch": 7.63, "learning_rate": 0.001, "loss": 2.8181, "step": 39756 }, { "epoch": 7.64, "learning_rate": 0.001, "loss": 2.8105, "step": 39768 }, { "epoch": 7.64, "learning_rate": 0.001, "loss": 2.8187, "step": 39780 }, { "epoch": 7.64, "learning_rate": 0.001, "loss": 2.8116, "step": 39792 }, { "epoch": 7.64, "learning_rate": 0.001, "loss": 2.8081, "step": 39804 }, { "epoch": 7.65, "learning_rate": 0.001, "loss": 2.8137, "step": 39816 }, { "epoch": 7.65, "learning_rate": 0.001, "loss": 2.8222, "step": 39828 }, { "epoch": 7.65, "learning_rate": 0.001, "loss": 2.8052, "step": 39840 }, { "epoch": 7.65, "learning_rate": 0.001, "loss": 2.8164, "step": 39852 }, { "epoch": 7.65, "learning_rate": 0.001, "loss": 2.8154, "step": 39864 }, { "epoch": 7.66, "learning_rate": 0.001, "loss": 2.8179, "step": 39876 }, { "epoch": 7.66, "learning_rate": 0.001, "loss": 2.8073, "step": 39888 }, { "epoch": 7.66, "learning_rate": 0.001, "loss": 2.8167, "step": 39900 }, { "epoch": 7.66, "learning_rate": 0.001, "loss": 2.8165, "step": 39912 }, { "epoch": 7.67, "learning_rate": 0.001, "loss": 2.8188, "step": 39924 }, { "epoch": 7.67, "learning_rate": 0.001, "loss": 2.8268, "step": 39936 }, { "epoch": 7.67, "learning_rate": 0.001, "loss": 2.8147, "step": 39948 }, { "epoch": 7.67, "learning_rate": 0.001, "loss": 2.8206, "step": 39960 }, { "epoch": 7.68, "learning_rate": 0.001, "loss": 2.8176, "step": 39972 }, { "epoch": 7.68, "learning_rate": 0.001, "loss": 2.8258, "step": 39984 }, { "epoch": 7.68, "learning_rate": 0.001, "loss": 2.8287, "step": 39996 }, { "epoch": 7.68, "eval_ag_news_accuracy": 0.2924375, "eval_ag_news_bleu_score": 4.140216952249, "eval_ag_news_bleu_score_sem": 0.1383443335620197, "eval_ag_news_emb_cos_sim": 0.7485324144363403, "eval_ag_news_emb_cos_sim_sem": 0.009648313854486055, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8584718704223633, "eval_ag_news_n_ngrams_match_1": 12.436, "eval_ag_news_n_ngrams_match_2": 2.546, "eval_ag_news_n_ngrams_match_3": 0.684, "eval_ag_news_num_pred_words": 46.256, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 47.39287355336471, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.30593732841335797, "eval_ag_news_runtime": 10.8668, "eval_ag_news_samples_per_second": 46.012, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.31779052000531055, "eval_ag_news_token_set_f1_sem": 0.004517178711662994, "eval_ag_news_token_set_precision": 0.292657434325159, "eval_ag_news_token_set_recall": 0.3648653117395539, "eval_ag_news_true_num_tokens": 56.09375, "step": 40000 }, { "epoch": 7.68, "eval_anthropic_toxic_prompts_accuracy": 0.1010625, "eval_anthropic_toxic_prompts_bleu_score": 2.4924053251186677, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.0958977419730683, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6073935031890869, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010615059826382719, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.5465736389160156, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.186, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.418, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.484, "eval_anthropic_toxic_prompts_num_pred_words": 46.86, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 34.69423861146998, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1808282606779184, "eval_anthropic_toxic_prompts_runtime": 10.2638, "eval_anthropic_toxic_prompts_samples_per_second": 48.715, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3196874132490534, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006190038015658034, "eval_anthropic_toxic_prompts_token_set_precision": 0.3633774501719923, "eval_anthropic_toxic_prompts_token_set_recall": 0.3225252960679559, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 40000 }, { "epoch": 7.68, "eval_arxiv_accuracy": 0.3200625, "eval_arxiv_bleu_score": 3.621264815878394, "eval_arxiv_bleu_score_sem": 0.1081188396662897, "eval_arxiv_emb_cos_sim": 0.6864925622940063, "eval_arxiv_emb_cos_sim_sem": 0.007601289496061015, "eval_arxiv_emb_top1_equal": 0.171875, "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.707294225692749, "eval_arxiv_n_ngrams_match_1": 12.866, "eval_arxiv_n_ngrams_match_2": 2.33, "eval_arxiv_n_ngrams_match_3": 0.48, "eval_arxiv_num_pred_words": 38.898, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 40.74341476188715, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3067538649257645, "eval_arxiv_runtime": 10.7068, "eval_arxiv_samples_per_second": 46.7, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.30269986008952854, "eval_arxiv_token_set_f1_sem": 0.004115864401618083, "eval_arxiv_token_set_precision": 0.24789558209074794, "eval_arxiv_token_set_recall": 0.4130067723409454, "eval_arxiv_true_num_tokens": 64.0, "step": 40000 }, { "epoch": 7.68, "eval_python_code_alpaca_accuracy": 0.14084375, "eval_python_code_alpaca_bleu_score": 3.431169638393987, "eval_python_code_alpaca_bleu_score_sem": 0.11242873401818722, "eval_python_code_alpaca_emb_cos_sim": 0.672085165977478, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009780764277198588, "eval_python_code_alpaca_emb_top1_equal": 0.015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.011004959004867984, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.2109286785125732, "eval_python_code_alpaca_n_ngrams_match_1": 8.304, "eval_python_code_alpaca_n_ngrams_match_2": 2.006, "eval_python_code_alpaca_n_ngrams_match_3": 0.552, "eval_python_code_alpaca_num_pred_words": 43.478, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 24.80210871811856, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.27806485172692175, "eval_python_code_alpaca_runtime": 11.0289, "eval_python_code_alpaca_samples_per_second": 45.335, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4180941118980831, "eval_python_code_alpaca_token_set_f1_sem": 0.005268215952449477, "eval_python_code_alpaca_token_set_precision": 0.44559717967585744, "eval_python_code_alpaca_token_set_recall": 0.4197696143230586, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 40000 }, { "epoch": 7.68, "eval_wikibio_accuracy": 0.29596875, "eval_wikibio_bleu_score": 5.171943944801153, "eval_wikibio_bleu_score_sem": 0.17957982544618623, "eval_wikibio_emb_cos_sim": 0.7065252661705017, "eval_wikibio_emb_cos_sim_sem": 0.010924187203874888, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.171440601348877, "eval_wikibio_n_ngrams_match_1": 9.714, "eval_wikibio_n_ngrams_match_2": 3.114, "eval_wikibio_n_ngrams_match_3": 1.032, "eval_wikibio_num_pred_words": 37.366, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 64.80874846029297, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3284593122552165, "eval_wikibio_runtime": 10.2532, "eval_wikibio_samples_per_second": 48.765, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.30357482221895987, "eval_wikibio_token_set_f1_sem": 0.0053931187158150744, "eval_wikibio_token_set_precision": 0.3138596259243643, "eval_wikibio_token_set_recall": 0.30868501911837587, "eval_wikibio_true_num_tokens": 61.1328125, "step": 40000 }, { "epoch": 7.68, "eval_nq_accuracy": 0.49109375, "eval_nq_bleu_score": 9.820259624837865, "eval_nq_bleu_score_sem": 0.4250234602086885, "eval_nq_emb_cos_sim": 0.7942675352096558, "eval_nq_emb_cos_sim_sem": 0.007867145231022655, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4483070373535156, "eval_nq_n_ngrams_match_1": 21.216, "eval_nq_n_ngrams_match_2": 7.218, "eval_nq_n_ngrams_match_3": 3.176, "eval_nq_num_pred_words": 48.662, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.568744678536394, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.404875463760787, "eval_nq_runtime": 11.6787, "eval_nq_samples_per_second": 42.813, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.42698738512552853, "eval_nq_token_set_f1_sem": 0.00503677341066409, "eval_nq_token_set_precision": 0.3771447610985095, "eval_nq_token_set_recall": 0.5050118819720776, "eval_nq_true_num_tokens": 64.0, "step": 40000 }, { "epoch": 7.68, "learning_rate": 0.001, "loss": 2.8281, "step": 40008 }, { "epoch": 7.68, "learning_rate": 0.001, "loss": 2.8157, "step": 40020 }, { "epoch": 7.69, "learning_rate": 0.001, "loss": 2.8209, "step": 40032 }, { "epoch": 7.69, "learning_rate": 0.001, "loss": 2.8125, "step": 40044 }, { "epoch": 7.69, "learning_rate": 0.001, "loss": 2.8116, "step": 40056 }, { "epoch": 7.69, "learning_rate": 0.001, "loss": 2.8113, "step": 40068 }, { "epoch": 7.7, "learning_rate": 0.001, "loss": 2.818, "step": 40080 }, { "epoch": 7.7, "learning_rate": 0.001, "loss": 2.8147, "step": 40092 }, { "epoch": 7.7, "learning_rate": 0.001, "loss": 2.8093, "step": 40104 }, { "epoch": 7.7, "learning_rate": 0.001, "loss": 2.8197, "step": 40116 }, { "epoch": 7.71, "learning_rate": 0.001, "loss": 2.8151, "step": 40128 }, { "epoch": 7.71, "learning_rate": 0.001, "loss": 2.8225, "step": 40140 }, { "epoch": 7.71, "learning_rate": 0.001, "loss": 2.8144, "step": 40152 }, { "epoch": 7.71, "learning_rate": 0.001, "loss": 2.8209, "step": 40164 }, { "epoch": 7.71, "learning_rate": 0.001, "loss": 2.8147, "step": 40176 }, { "epoch": 7.72, "learning_rate": 0.001, "loss": 2.8059, "step": 40188 }, { "epoch": 7.72, "learning_rate": 0.001, "loss": 2.8107, "step": 40200 }, { "epoch": 7.72, "learning_rate": 0.001, "loss": 2.8189, "step": 40212 }, { "epoch": 7.72, "learning_rate": 0.001, "loss": 2.8034, "step": 40224 }, { "epoch": 7.73, "learning_rate": 0.001, "loss": 2.81, "step": 40236 }, { "epoch": 7.73, "learning_rate": 0.001, "loss": 2.8135, "step": 40248 }, { "epoch": 7.73, "learning_rate": 0.001, "loss": 2.8087, "step": 40260 }, { "epoch": 7.73, "learning_rate": 0.001, "loss": 2.8111, "step": 40272 }, { "epoch": 7.74, "learning_rate": 0.001, "loss": 2.8172, "step": 40284 }, { "epoch": 7.74, "learning_rate": 0.001, "loss": 2.814, "step": 40296 }, { "epoch": 7.74, "learning_rate": 0.001, "loss": 2.8064, "step": 40308 }, { "epoch": 7.74, "learning_rate": 0.001, "loss": 2.8208, "step": 40320 }, { "epoch": 7.74, "learning_rate": 0.001, "loss": 2.821, "step": 40332 }, { "epoch": 7.75, "learning_rate": 0.001, "loss": 2.8185, "step": 40344 }, { "epoch": 7.75, "learning_rate": 0.001, "loss": 2.817, "step": 40356 }, { "epoch": 7.75, "learning_rate": 0.001, "loss": 2.8162, "step": 40368 }, { "epoch": 7.75, "learning_rate": 0.001, "loss": 2.819, "step": 40380 }, { "epoch": 7.76, "learning_rate": 0.001, "loss": 2.8049, "step": 40392 }, { "epoch": 7.76, "learning_rate": 0.001, "loss": 2.8036, "step": 40404 }, { "epoch": 7.76, "learning_rate": 0.001, "loss": 2.8131, "step": 40416 }, { "epoch": 7.76, "learning_rate": 0.001, "loss": 2.8061, "step": 40428 }, { "epoch": 7.76, "learning_rate": 0.001, "loss": 2.8096, "step": 40440 }, { "epoch": 7.77, "learning_rate": 0.001, "loss": 2.8056, "step": 40452 }, { "epoch": 7.77, "learning_rate": 0.001, "loss": 2.812, "step": 40464 }, { "epoch": 7.77, "learning_rate": 0.001, "loss": 2.8169, "step": 40476 }, { "epoch": 7.77, "learning_rate": 0.001, "loss": 2.8156, "step": 40488 }, { "epoch": 7.78, "learning_rate": 0.001, "loss": 2.8143, "step": 40500 }, { "epoch": 7.78, "learning_rate": 0.001, "loss": 2.7972, "step": 40512 }, { "epoch": 7.78, "learning_rate": 0.001, "loss": 2.8215, "step": 40524 }, { "epoch": 7.78, "learning_rate": 0.001, "loss": 2.8117, "step": 40536 }, { "epoch": 7.79, "learning_rate": 0.001, "loss": 2.8097, "step": 40548 }, { "epoch": 7.79, "learning_rate": 0.001, "loss": 2.8149, "step": 40560 }, { "epoch": 7.79, "learning_rate": 0.001, "loss": 2.8134, "step": 40572 }, { "epoch": 7.79, "learning_rate": 0.001, "loss": 2.8077, "step": 40584 }, { "epoch": 7.79, "learning_rate": 0.001, "loss": 2.8043, "step": 40596 }, { "epoch": 7.8, "learning_rate": 0.001, "loss": 2.8064, "step": 40608 }, { "epoch": 7.8, "learning_rate": 0.001, "loss": 2.8065, "step": 40620 }, { "epoch": 7.8, "eval_ag_news_accuracy": 0.29590625, "eval_ag_news_bleu_score": 4.187945534360631, "eval_ag_news_bleu_score_sem": 0.1398010890099884, "eval_ag_news_emb_cos_sim": 0.7519018054008484, "eval_ag_news_emb_cos_sim_sem": 0.008621114155301085, "eval_ag_news_emb_top1_equal": 0.140625, "eval_ag_news_emb_top1_equal_sem": 0.030847557647994725, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8446338176727295, "eval_ag_news_n_ngrams_match_1": 12.398, "eval_ag_news_n_ngrams_match_2": 2.512, "eval_ag_news_n_ngrams_match_3": 0.666, "eval_ag_news_num_pred_words": 45.26, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 46.74156528159807, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3055589003690906, "eval_ag_news_runtime": 10.5921, "eval_ag_news_samples_per_second": 47.205, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.3209573739907631, "eval_ag_news_token_set_f1_sem": 0.004328376611851938, "eval_ag_news_token_set_precision": 0.29215007611101823, "eval_ag_news_token_set_recall": 0.37500439803465396, "eval_ag_news_true_num_tokens": 56.09375, "step": 40625 }, { "epoch": 7.8, "eval_anthropic_toxic_prompts_accuracy": 0.102375, "eval_anthropic_toxic_prompts_bleu_score": 2.4227166833591163, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09118447815273842, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6076961755752563, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010887983719548537, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.5038912296295166, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.226, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.388, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.442, "eval_anthropic_toxic_prompts_num_pred_words": 47.72, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 33.244562823647634, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1809839020715016, "eval_anthropic_toxic_prompts_runtime": 10.1648, "eval_anthropic_toxic_prompts_samples_per_second": 49.19, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3221772628366725, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006381706208999953, "eval_anthropic_toxic_prompts_token_set_precision": 0.3665683970802171, "eval_anthropic_toxic_prompts_token_set_recall": 0.3203213777646176, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 40625 }, { "epoch": 7.8, "eval_arxiv_accuracy": 0.3235, "eval_arxiv_bleu_score": 3.616187518736761, "eval_arxiv_bleu_score_sem": 0.09964012874421592, "eval_arxiv_emb_cos_sim": 0.6798217296600342, "eval_arxiv_emb_cos_sim_sem": 0.008391075997584119, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6890108585357666, "eval_arxiv_n_ngrams_match_1": 12.916, "eval_arxiv_n_ngrams_match_2": 2.334, "eval_arxiv_n_ngrams_match_3": 0.468, "eval_arxiv_num_pred_words": 38.716, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 40.00525652223078, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.308544018687944, "eval_arxiv_runtime": 10.3625, "eval_arxiv_samples_per_second": 48.251, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3081009332619882, "eval_arxiv_token_set_f1_sem": 0.004253189007534848, "eval_arxiv_token_set_precision": 0.24966233377607916, "eval_arxiv_token_set_recall": 0.42339492294018294, "eval_arxiv_true_num_tokens": 64.0, "step": 40625 }, { "epoch": 7.8, "eval_python_code_alpaca_accuracy": 0.14378125, "eval_python_code_alpaca_bleu_score": 3.4505165418448582, "eval_python_code_alpaca_bleu_score_sem": 0.10040792060601474, "eval_python_code_alpaca_emb_cos_sim": 0.6895774006843567, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009768596350324591, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.199202060699463, "eval_python_code_alpaca_n_ngrams_match_1": 8.442, "eval_python_code_alpaca_n_ngrams_match_2": 2.018, "eval_python_code_alpaca_n_ngrams_match_3": 0.528, "eval_python_code_alpaca_num_pred_words": 43.73, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 24.512962535064982, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.28123435590066515, "eval_python_code_alpaca_runtime": 13.0735, "eval_python_code_alpaca_samples_per_second": 38.245, "eval_python_code_alpaca_steps_per_second": 0.076, "eval_python_code_alpaca_token_set_f1": 0.4257524777173994, "eval_python_code_alpaca_token_set_f1_sem": 0.005134369614680399, "eval_python_code_alpaca_token_set_precision": 0.4522889066162074, "eval_python_code_alpaca_token_set_recall": 0.42402667478777445, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 40625 }, { "epoch": 7.8, "eval_wikibio_accuracy": 0.2949375, "eval_wikibio_bleu_score": 5.144805388924942, "eval_wikibio_bleu_score_sem": 0.19115575748875538, "eval_wikibio_emb_cos_sim": 0.6942859292030334, "eval_wikibio_emb_cos_sim_sem": 0.010799363385109303, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.108237266540527, "eval_wikibio_n_ngrams_match_1": 9.454, "eval_wikibio_n_ngrams_match_2": 3.01, "eval_wikibio_n_ngrams_match_3": 1.024, "eval_wikibio_num_pred_words": 36.324, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 60.839379383417054, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32059685715014324, "eval_wikibio_runtime": 10.2524, "eval_wikibio_samples_per_second": 48.769, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.2979910034566148, "eval_wikibio_token_set_f1_sem": 0.005759204884132401, "eval_wikibio_token_set_precision": 0.30381430918912883, "eval_wikibio_token_set_recall": 0.3086597426916664, "eval_wikibio_true_num_tokens": 61.1328125, "step": 40625 }, { "epoch": 7.8, "eval_nq_accuracy": 0.4915, "eval_nq_bleu_score": 10.038028004828684, "eval_nq_bleu_score_sem": 0.43433816005689, "eval_nq_emb_cos_sim": 0.7980464696884155, "eval_nq_emb_cos_sim_sem": 0.007813135361744961, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.442344903945923, "eval_nq_n_ngrams_match_1": 21.238, "eval_nq_n_ngrams_match_2": 7.314, "eval_nq_n_ngrams_match_3": 3.238, "eval_nq_num_pred_words": 48.518, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.499975488658384, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.40468805185802637, "eval_nq_runtime": 10.5702, "eval_nq_samples_per_second": 47.303, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4289723217509848, "eval_nq_token_set_f1_sem": 0.005111068929936343, "eval_nq_token_set_precision": 0.37697849387241883, "eval_nq_token_set_recall": 0.5090523696248492, "eval_nq_true_num_tokens": 64.0, "step": 40625 }, { "epoch": 7.8, "learning_rate": 0.001, "loss": 2.8153, "step": 40632 }, { "epoch": 7.8, "learning_rate": 0.001, "loss": 2.8023, "step": 40644 }, { "epoch": 7.81, "learning_rate": 0.001, "loss": 2.8064, "step": 40656 }, { "epoch": 7.81, "learning_rate": 0.001, "loss": 2.8067, "step": 40668 }, { "epoch": 7.81, "learning_rate": 0.001, "loss": 2.8056, "step": 40680 }, { "epoch": 7.81, "learning_rate": 0.001, "loss": 2.8091, "step": 40692 }, { "epoch": 7.82, "learning_rate": 0.001, "loss": 2.8008, "step": 40704 }, { "epoch": 7.82, "learning_rate": 0.001, "loss": 2.8087, "step": 40716 }, { "epoch": 7.82, "learning_rate": 0.001, "loss": 2.8108, "step": 40728 }, { "epoch": 7.82, "learning_rate": 0.001, "loss": 2.8141, "step": 40740 }, { "epoch": 7.82, "learning_rate": 0.001, "loss": 2.8075, "step": 40752 }, { "epoch": 7.83, "learning_rate": 0.001, "loss": 2.7981, "step": 40764 }, { "epoch": 7.83, "learning_rate": 0.001, "loss": 2.8175, "step": 40776 }, { "epoch": 7.83, "learning_rate": 0.001, "loss": 2.8087, "step": 40788 }, { "epoch": 7.83, "learning_rate": 0.001, "loss": 2.8076, "step": 40800 }, { "epoch": 7.84, "learning_rate": 0.001, "loss": 2.8103, "step": 40812 }, { "epoch": 7.84, "learning_rate": 0.001, "loss": 2.7975, "step": 40824 }, { "epoch": 7.84, "learning_rate": 0.001, "loss": 2.8146, "step": 40836 }, { "epoch": 7.84, "learning_rate": 0.001, "loss": 2.8108, "step": 40848 }, { "epoch": 7.85, "learning_rate": 0.001, "loss": 2.8077, "step": 40860 }, { "epoch": 7.85, "learning_rate": 0.001, "loss": 2.814, "step": 40872 }, { "epoch": 7.85, "learning_rate": 0.001, "loss": 2.8142, "step": 40884 }, { "epoch": 7.85, "learning_rate": 0.001, "loss": 2.8021, "step": 40896 }, { "epoch": 7.85, "learning_rate": 0.001, "loss": 2.7992, "step": 40908 }, { "epoch": 7.86, "learning_rate": 0.001, "loss": 2.8041, "step": 40920 }, { "epoch": 7.86, "learning_rate": 0.001, "loss": 2.8072, "step": 40932 }, { "epoch": 7.86, "learning_rate": 0.001, "loss": 2.8022, "step": 40944 }, { "epoch": 7.86, "learning_rate": 0.001, "loss": 2.8137, "step": 40956 }, { "epoch": 7.87, "learning_rate": 0.001, "loss": 2.8154, "step": 40968 }, { "epoch": 7.87, "learning_rate": 0.001, "loss": 2.8197, "step": 40980 }, { "epoch": 7.87, "learning_rate": 0.001, "loss": 2.826, "step": 40992 }, { "epoch": 7.87, "learning_rate": 0.001, "loss": 2.8034, "step": 41004 }, { "epoch": 7.88, "learning_rate": 0.001, "loss": 2.8072, "step": 41016 }, { "epoch": 7.88, "learning_rate": 0.001, "loss": 2.8039, "step": 41028 }, { "epoch": 7.88, "learning_rate": 0.001, "loss": 2.8114, "step": 41040 }, { "epoch": 7.88, "learning_rate": 0.001, "loss": 2.8191, "step": 41052 }, { "epoch": 7.88, "learning_rate": 0.001, "loss": 2.8163, "step": 41064 }, { "epoch": 7.89, "learning_rate": 0.001, "loss": 2.8037, "step": 41076 }, { "epoch": 7.89, "learning_rate": 0.001, "loss": 2.8145, "step": 41088 }, { "epoch": 7.89, "learning_rate": 0.001, "loss": 2.8048, "step": 41100 }, { "epoch": 7.89, "learning_rate": 0.001, "loss": 2.8228, "step": 41112 }, { "epoch": 7.9, "learning_rate": 0.001, "loss": 2.8036, "step": 41124 }, { "epoch": 7.9, "learning_rate": 0.001, "loss": 2.8107, "step": 41136 }, { "epoch": 7.9, "learning_rate": 0.001, "loss": 2.8136, "step": 41148 }, { "epoch": 7.9, "learning_rate": 0.001, "loss": 2.8043, "step": 41160 }, { "epoch": 7.91, "learning_rate": 0.001, "loss": 2.8011, "step": 41172 }, { "epoch": 7.91, "learning_rate": 0.001, "loss": 2.8096, "step": 41184 }, { "epoch": 7.91, "learning_rate": 0.001, "loss": 2.8014, "step": 41196 }, { "epoch": 7.91, "learning_rate": 0.001, "loss": 2.8209, "step": 41208 }, { "epoch": 7.91, "learning_rate": 0.001, "loss": 2.8052, "step": 41220 }, { "epoch": 7.92, "learning_rate": 0.001, "loss": 2.8069, "step": 41232 }, { "epoch": 7.92, "learning_rate": 0.001, "loss": 2.8139, "step": 41244 }, { "epoch": 7.92, "eval_ag_news_accuracy": 0.295375, "eval_ag_news_bleu_score": 4.177243248663384, "eval_ag_news_bleu_score_sem": 0.14355102147081386, "eval_ag_news_emb_cos_sim": 0.7614268064498901, "eval_ag_news_emb_cos_sim_sem": 0.008624405560751985, "eval_ag_news_emb_top1_equal": 0.109375, "eval_ag_news_emb_top1_equal_sem": 0.027695207821224692, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8472166061401367, "eval_ag_news_n_ngrams_match_1": 12.584, "eval_ag_news_n_ngrams_match_2": 2.516, "eval_ag_news_n_ngrams_match_3": 0.656, "eval_ag_news_num_pred_words": 45.628, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 46.862444893392656, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3127816446024006, "eval_ag_news_runtime": 11.8325, "eval_ag_news_samples_per_second": 42.256, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.3203641725089709, "eval_ag_news_token_set_f1_sem": 0.004295153381906319, "eval_ag_news_token_set_precision": 0.29743015779836435, "eval_ag_news_token_set_recall": 0.363793417571401, "eval_ag_news_true_num_tokens": 56.09375, "step": 41250 }, { "epoch": 7.92, "eval_anthropic_toxic_prompts_accuracy": 0.1014375, "eval_anthropic_toxic_prompts_bleu_score": 2.5142923835005013, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09967327239409667, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6296899318695068, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009661943750167774, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.528766393661499, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.29, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.456, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.482, "eval_anthropic_toxic_prompts_num_pred_words": 47.448, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 34.081898026000744, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1844820827318796, "eval_anthropic_toxic_prompts_runtime": 10.0815, "eval_anthropic_toxic_prompts_samples_per_second": 49.596, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.31619125754705546, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006245202508929617, "eval_anthropic_toxic_prompts_token_set_precision": 0.36581050128141396, "eval_anthropic_toxic_prompts_token_set_recall": 0.3068939653457516, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 41250 }, { "epoch": 7.92, "eval_arxiv_accuracy": 0.319375, "eval_arxiv_bleu_score": 3.761050652950992, "eval_arxiv_bleu_score_sem": 0.10826957781035133, "eval_arxiv_emb_cos_sim": 0.6977087259292603, "eval_arxiv_emb_cos_sim_sem": 0.007638775166544408, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.7040607929229736, "eval_arxiv_n_ngrams_match_1": 13.176, "eval_arxiv_n_ngrams_match_2": 2.45, "eval_arxiv_n_ngrams_match_3": 0.498, "eval_arxiv_num_pred_words": 39.362, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 40.6118864280509, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.31268868526372107, "eval_arxiv_runtime": 10.5471, "eval_arxiv_samples_per_second": 47.406, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3121361393205705, "eval_arxiv_token_set_f1_sem": 0.004010391958789305, "eval_arxiv_token_set_precision": 0.2563655983833098, "eval_arxiv_token_set_recall": 0.421282631478863, "eval_arxiv_true_num_tokens": 64.0, "step": 41250 }, { "epoch": 7.92, "eval_python_code_alpaca_accuracy": 0.14034375, "eval_python_code_alpaca_bleu_score": 3.41548439262854, "eval_python_code_alpaca_bleu_score_sem": 0.11493549322295714, "eval_python_code_alpaca_emb_cos_sim": 0.6701538562774658, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009477135087803568, "eval_python_code_alpaca_emb_top1_equal": 0.0625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02147948148198014, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.21610426902771, "eval_python_code_alpaca_n_ngrams_match_1": 8.056, "eval_python_code_alpaca_n_ngrams_match_2": 1.93, "eval_python_code_alpaca_n_ngrams_match_3": 0.48, "eval_python_code_alpaca_num_pred_words": 42.104, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 24.930807034364012, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.27747833785570963, "eval_python_code_alpaca_runtime": 9.8767, "eval_python_code_alpaca_samples_per_second": 50.624, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.41409101157084693, "eval_python_code_alpaca_token_set_f1_sem": 0.005415821955688519, "eval_python_code_alpaca_token_set_precision": 0.4317412697423453, "eval_python_code_alpaca_token_set_recall": 0.4210692909568659, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 41250 }, { "epoch": 7.92, "eval_wikibio_accuracy": 0.29575, "eval_wikibio_bleu_score": 5.240046314179688, "eval_wikibio_bleu_score_sem": 0.19925473711020505, "eval_wikibio_emb_cos_sim": 0.6854864358901978, "eval_wikibio_emb_cos_sim_sem": 0.012112673672687936, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.137763977050781, "eval_wikibio_n_ngrams_match_1": 9.428, "eval_wikibio_n_ngrams_match_2": 3.016, "eval_wikibio_n_ngrams_match_3": 1.034, "eval_wikibio_num_pred_words": 36.42, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 62.66254978294218, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32025842546471583, "eval_wikibio_runtime": 10.5785, "eval_wikibio_samples_per_second": 47.265, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.29862073475428635, "eval_wikibio_token_set_f1_sem": 0.005543650424542434, "eval_wikibio_token_set_precision": 0.3045874857049161, "eval_wikibio_token_set_recall": 0.31166296994765863, "eval_wikibio_true_num_tokens": 61.1328125, "step": 41250 }, { "epoch": 7.92, "eval_nq_accuracy": 0.49271875, "eval_nq_bleu_score": 9.950861057961662, "eval_nq_bleu_score_sem": 0.4292405034709257, "eval_nq_emb_cos_sim": 0.7908686399459839, "eval_nq_emb_cos_sim_sem": 0.008533793787720609, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4374239444732666, "eval_nq_n_ngrams_match_1": 21.256, "eval_nq_n_ngrams_match_2": 7.31, "eval_nq_n_ngrams_match_3": 3.186, "eval_nq_num_pred_words": 48.534, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.443523588018904, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4075687612438058, "eval_nq_runtime": 12.0139, "eval_nq_samples_per_second": 41.618, "eval_nq_steps_per_second": 0.083, "eval_nq_token_set_f1": 0.4253036128874263, "eval_nq_token_set_f1_sem": 0.004909166781233974, "eval_nq_token_set_precision": 0.3760358083916659, "eval_nq_token_set_recall": 0.4997178555960679, "eval_nq_true_num_tokens": 64.0, "step": 41250 }, { "epoch": 7.92, "learning_rate": 0.001, "loss": 2.8079, "step": 41256 }, { "epoch": 7.92, "learning_rate": 0.001, "loss": 2.8034, "step": 41268 }, { "epoch": 7.93, "learning_rate": 0.001, "loss": 2.8039, "step": 41280 }, { "epoch": 7.93, "learning_rate": 0.001, "loss": 2.805, "step": 41292 }, { "epoch": 7.93, "learning_rate": 0.001, "loss": 2.803, "step": 41304 }, { "epoch": 7.93, "learning_rate": 0.001, "loss": 2.8036, "step": 41316 }, { "epoch": 7.94, "learning_rate": 0.001, "loss": 2.8032, "step": 41328 }, { "epoch": 7.94, "learning_rate": 0.001, "loss": 2.802, "step": 41340 }, { "epoch": 7.94, "learning_rate": 0.001, "loss": 2.8106, "step": 41352 }, { "epoch": 7.94, "learning_rate": 0.001, "loss": 2.8119, "step": 41364 }, { "epoch": 7.94, "learning_rate": 0.001, "loss": 2.8121, "step": 41376 }, { "epoch": 7.95, "learning_rate": 0.001, "loss": 2.8116, "step": 41388 }, { "epoch": 7.95, "learning_rate": 0.001, "loss": 2.7999, "step": 41400 }, { "epoch": 7.95, "learning_rate": 0.001, "loss": 2.7944, "step": 41412 }, { "epoch": 7.95, "learning_rate": 0.001, "loss": 2.808, "step": 41424 }, { "epoch": 7.96, "learning_rate": 0.001, "loss": 2.8095, "step": 41436 }, { "epoch": 7.96, "learning_rate": 0.001, "loss": 2.8134, "step": 41448 }, { "epoch": 7.96, "learning_rate": 0.001, "loss": 2.8068, "step": 41460 }, { "epoch": 7.96, "learning_rate": 0.001, "loss": 2.8013, "step": 41472 }, { "epoch": 7.97, "learning_rate": 0.001, "loss": 2.8034, "step": 41484 }, { "epoch": 7.97, "learning_rate": 0.001, "loss": 2.7999, "step": 41496 }, { "epoch": 7.97, "learning_rate": 0.001, "loss": 2.8067, "step": 41508 }, { "epoch": 7.97, "learning_rate": 0.001, "loss": 2.8093, "step": 41520 }, { "epoch": 7.97, "learning_rate": 0.001, "loss": 2.8152, "step": 41532 }, { "epoch": 7.98, "learning_rate": 0.001, "loss": 2.8127, "step": 41544 }, { "epoch": 7.98, "learning_rate": 0.001, "loss": 2.8125, "step": 41556 }, { "epoch": 7.98, "learning_rate": 0.001, "loss": 2.8059, "step": 41568 }, { "epoch": 7.98, "learning_rate": 0.001, "loss": 2.8018, "step": 41580 }, { "epoch": 7.99, "learning_rate": 0.001, "loss": 2.7959, "step": 41592 }, { "epoch": 7.99, "learning_rate": 0.001, "loss": 2.8037, "step": 41604 }, { "epoch": 7.99, "learning_rate": 0.001, "loss": 2.8089, "step": 41616 }, { "epoch": 7.99, "learning_rate": 0.001, "loss": 2.8035, "step": 41628 }, { "epoch": 8.0, "learning_rate": 0.001, "loss": 2.8035, "step": 41640 }, { "epoch": 8.0, "learning_rate": 0.001, "loss": 2.8021, "step": 41652 }, { "epoch": 8.0, "learning_rate": 0.001, "loss": 2.7977, "step": 41664 }, { "epoch": 8.0, "learning_rate": 0.001, "loss": 2.7911, "step": 41676 }, { "epoch": 8.0, "learning_rate": 0.001, "loss": 2.7865, "step": 41688 }, { "epoch": 8.01, "learning_rate": 0.001, "loss": 2.789, "step": 41700 }, { "epoch": 8.01, "learning_rate": 0.001, "loss": 2.7829, "step": 41712 }, { "epoch": 8.01, "learning_rate": 0.001, "loss": 2.7769, "step": 41724 }, { "epoch": 8.01, "learning_rate": 0.001, "loss": 2.7864, "step": 41736 }, { "epoch": 8.02, "learning_rate": 0.001, "loss": 2.7816, "step": 41748 }, { "epoch": 8.02, "learning_rate": 0.001, "loss": 2.7839, "step": 41760 }, { "epoch": 8.02, "learning_rate": 0.001, "loss": 2.7808, "step": 41772 }, { "epoch": 8.02, "learning_rate": 0.001, "loss": 2.7745, "step": 41784 }, { "epoch": 8.03, "learning_rate": 0.001, "loss": 2.7806, "step": 41796 }, { "epoch": 8.03, "learning_rate": 0.001, "loss": 2.7783, "step": 41808 }, { "epoch": 8.03, "learning_rate": 0.001, "loss": 2.7971, "step": 41820 }, { "epoch": 8.03, "learning_rate": 0.001, "loss": 2.7891, "step": 41832 }, { "epoch": 8.03, "learning_rate": 0.001, "loss": 2.7824, "step": 41844 }, { "epoch": 8.04, "learning_rate": 0.001, "loss": 2.7817, "step": 41856 }, { "epoch": 8.04, "learning_rate": 0.001, "loss": 2.7798, "step": 41868 }, { "epoch": 8.04, "eval_ag_news_accuracy": 0.298875, "eval_ag_news_bleu_score": 4.216803539825259, "eval_ag_news_bleu_score_sem": 0.1391509284087185, "eval_ag_news_emb_cos_sim": 0.7642207145690918, "eval_ag_news_emb_cos_sim_sem": 0.008319944629643382, "eval_ag_news_emb_top1_equal": 0.1796875, "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8362133502960205, "eval_ag_news_n_ngrams_match_1": 12.586, "eval_ag_news_n_ngrams_match_2": 2.526, "eval_ag_news_n_ngrams_match_3": 0.702, "eval_ag_news_num_pred_words": 46.136, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 46.34963190293231, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3104282891347031, "eval_ag_news_runtime": 11.7509, "eval_ag_news_samples_per_second": 42.55, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.31949873768250103, "eval_ag_news_token_set_f1_sem": 0.0044022580996051636, "eval_ag_news_token_set_precision": 0.29650399861574117, "eval_ag_news_token_set_recall": 0.359295249581459, "eval_ag_news_true_num_tokens": 56.09375, "step": 41875 }, { "epoch": 8.04, "eval_anthropic_toxic_prompts_accuracy": 0.103, "eval_anthropic_toxic_prompts_bleu_score": 2.692852577293663, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11015006852059467, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6287031173706055, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010656534300950963, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.488476037979126, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.47, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.546, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.524, "eval_anthropic_toxic_prompts_num_pred_words": 46.588, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 32.73602121980649, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1890378390716641, "eval_anthropic_toxic_prompts_runtime": 10.095, "eval_anthropic_toxic_prompts_samples_per_second": 49.53, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.324569476821007, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0060201748353898195, "eval_anthropic_toxic_prompts_token_set_precision": 0.3810019545048906, "eval_anthropic_toxic_prompts_token_set_recall": 0.3139210790656579, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 41875 }, { "epoch": 8.04, "eval_arxiv_accuracy": 0.32178125, "eval_arxiv_bleu_score": 3.9026349411821912, "eval_arxiv_bleu_score_sem": 0.10981482156059526, "eval_arxiv_emb_cos_sim": 0.6997609734535217, "eval_arxiv_emb_cos_sim_sem": 0.00893799589437475, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.685302495956421, "eval_arxiv_n_ngrams_match_1": 13.678, "eval_arxiv_n_ngrams_match_2": 2.644, "eval_arxiv_n_ngrams_match_3": 0.574, "eval_arxiv_num_pred_words": 40.272, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 39.85717726145954, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.31819758297255984, "eval_arxiv_runtime": 10.2214, "eval_arxiv_samples_per_second": 48.917, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3179237768339998, "eval_arxiv_token_set_f1_sem": 0.0042574798222937155, "eval_arxiv_token_set_precision": 0.2635666782952128, "eval_arxiv_token_set_recall": 0.4224262657217067, "eval_arxiv_true_num_tokens": 64.0, "step": 41875 }, { "epoch": 8.04, "eval_python_code_alpaca_accuracy": 0.14471875, "eval_python_code_alpaca_bleu_score": 3.6295983478892175, "eval_python_code_alpaca_bleu_score_sem": 0.11931880643535772, "eval_python_code_alpaca_emb_cos_sim": 0.7095414400100708, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009777772509498822, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.1657066345214844, "eval_python_code_alpaca_n_ngrams_match_1": 8.67, "eval_python_code_alpaca_n_ngrams_match_2": 2.13, "eval_python_code_alpaca_n_ngrams_match_3": 0.604, "eval_python_code_alpaca_num_pred_words": 43.742, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.7054892328067, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2863440812496213, "eval_python_code_alpaca_runtime": 10.1339, "eval_python_code_alpaca_samples_per_second": 49.339, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.4300042660492909, "eval_python_code_alpaca_token_set_f1_sem": 0.005728997846493136, "eval_python_code_alpaca_token_set_precision": 0.46406024475119134, "eval_python_code_alpaca_token_set_recall": 0.4226458078759465, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 41875 }, { "epoch": 8.04, "eval_wikibio_accuracy": 0.29603125, "eval_wikibio_bleu_score": 5.460197619821154, "eval_wikibio_bleu_score_sem": 0.21460976470417648, "eval_wikibio_emb_cos_sim": 0.6786633729934692, "eval_wikibio_emb_cos_sim_sem": 0.01287330838447617, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.167712688446045, "eval_wikibio_n_ngrams_match_1": 9.56, "eval_wikibio_n_ngrams_match_2": 3.084, "eval_wikibio_n_ngrams_match_3": 1.102, "eval_wikibio_num_pred_words": 36.522, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 64.56759686604164, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3259823976842989, "eval_wikibio_runtime": 10.2692, "eval_wikibio_samples_per_second": 48.689, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3011836760564445, "eval_wikibio_token_set_f1_sem": 0.005640154974613203, "eval_wikibio_token_set_precision": 0.30988461485318836, "eval_wikibio_token_set_recall": 0.30740702169879425, "eval_wikibio_true_num_tokens": 61.1328125, "step": 41875 }, { "epoch": 8.04, "eval_nq_accuracy": 0.49340625, "eval_nq_bleu_score": 9.898326171736961, "eval_nq_bleu_score_sem": 0.41346381222116096, "eval_nq_emb_cos_sim": 0.797620952129364, "eval_nq_emb_cos_sim_sem": 0.007820779746393624, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.432384967803955, "eval_nq_n_ngrams_match_1": 21.38, "eval_nq_n_ngrams_match_2": 7.334, "eval_nq_n_ngrams_match_3": 3.188, "eval_nq_num_pred_words": 48.608, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.386004978814977, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4102706025229157, "eval_nq_runtime": 10.9539, "eval_nq_samples_per_second": 45.646, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.43081662352813604, "eval_nq_token_set_f1_sem": 0.004871615340993273, "eval_nq_token_set_precision": 0.3823319508357155, "eval_nq_token_set_recall": 0.5045832181826024, "eval_nq_true_num_tokens": 64.0, "step": 41875 }, { "epoch": 8.04, "learning_rate": 0.001, "loss": 2.7836, "step": 41880 }, { "epoch": 8.04, "learning_rate": 0.001, "loss": 2.7917, "step": 41892 }, { "epoch": 8.05, "learning_rate": 0.001, "loss": 2.7972, "step": 41904 }, { "epoch": 8.05, "learning_rate": 0.001, "loss": 2.786, "step": 41916 }, { "epoch": 8.05, "learning_rate": 0.001, "loss": 2.781, "step": 41928 }, { "epoch": 8.05, "learning_rate": 0.001, "loss": 2.7773, "step": 41940 }, { "epoch": 8.06, "learning_rate": 0.001, "loss": 2.7867, "step": 41952 }, { "epoch": 8.06, "learning_rate": 0.001, "loss": 2.7793, "step": 41964 }, { "epoch": 8.06, "learning_rate": 0.001, "loss": 2.7882, "step": 41976 }, { "epoch": 8.06, "learning_rate": 0.001, "loss": 2.7921, "step": 41988 }, { "epoch": 8.06, "learning_rate": 0.001, "loss": 2.7739, "step": 42000 }, { "epoch": 8.07, "learning_rate": 0.001, "loss": 2.776, "step": 42012 }, { "epoch": 8.07, "learning_rate": 0.001, "loss": 2.7826, "step": 42024 }, { "epoch": 8.07, "learning_rate": 0.001, "loss": 2.7925, "step": 42036 }, { "epoch": 8.07, "learning_rate": 0.001, "loss": 2.7859, "step": 42048 }, { "epoch": 8.08, "learning_rate": 0.001, "loss": 2.7863, "step": 42060 }, { "epoch": 8.08, "learning_rate": 0.001, "loss": 2.7818, "step": 42072 }, { "epoch": 8.08, "learning_rate": 0.001, "loss": 2.7965, "step": 42084 }, { "epoch": 8.08, "learning_rate": 0.001, "loss": 2.7978, "step": 42096 }, { "epoch": 8.09, "learning_rate": 0.001, "loss": 2.7906, "step": 42108 }, { "epoch": 8.09, "learning_rate": 0.001, "loss": 2.7857, "step": 42120 }, { "epoch": 8.09, "learning_rate": 0.001, "loss": 2.7746, "step": 42132 }, { "epoch": 8.09, "learning_rate": 0.001, "loss": 2.7751, "step": 42144 }, { "epoch": 8.09, "learning_rate": 0.001, "loss": 2.7789, "step": 42156 }, { "epoch": 8.1, "learning_rate": 0.001, "loss": 2.7903, "step": 42168 }, { "epoch": 8.1, "learning_rate": 0.001, "loss": 2.7946, "step": 42180 }, { "epoch": 8.1, "learning_rate": 0.001, "loss": 2.7783, "step": 42192 }, { "epoch": 8.1, "learning_rate": 0.001, "loss": 2.7893, "step": 42204 }, { "epoch": 8.11, "learning_rate": 0.001, "loss": 2.7776, "step": 42216 }, { "epoch": 8.11, "learning_rate": 0.001, "loss": 2.784, "step": 42228 }, { "epoch": 8.11, "learning_rate": 0.001, "loss": 2.7696, "step": 42240 }, { "epoch": 8.11, "learning_rate": 0.001, "loss": 2.7725, "step": 42252 }, { "epoch": 8.12, "learning_rate": 0.001, "loss": 2.8001, "step": 42264 }, { "epoch": 8.12, "learning_rate": 0.001, "loss": 2.788, "step": 42276 }, { "epoch": 8.12, "learning_rate": 0.001, "loss": 2.7794, "step": 42288 }, { "epoch": 8.12, "learning_rate": 0.001, "loss": 2.789, "step": 42300 }, { "epoch": 8.12, "learning_rate": 0.001, "loss": 2.78, "step": 42312 }, { "epoch": 8.13, "learning_rate": 0.001, "loss": 2.7789, "step": 42324 }, { "epoch": 8.13, "learning_rate": 0.001, "loss": 2.7854, "step": 42336 }, { "epoch": 8.13, "learning_rate": 0.001, "loss": 2.7901, "step": 42348 }, { "epoch": 8.13, "learning_rate": 0.001, "loss": 2.7913, "step": 42360 }, { "epoch": 8.14, "learning_rate": 0.001, "loss": 2.7893, "step": 42372 }, { "epoch": 8.14, "learning_rate": 0.001, "loss": 2.7824, "step": 42384 }, { "epoch": 8.14, "learning_rate": 0.001, "loss": 2.7815, "step": 42396 }, { "epoch": 8.14, "learning_rate": 0.001, "loss": 2.7916, "step": 42408 }, { "epoch": 8.15, "learning_rate": 0.001, "loss": 2.7818, "step": 42420 }, { "epoch": 8.15, "learning_rate": 0.001, "loss": 2.7879, "step": 42432 }, { "epoch": 8.15, "learning_rate": 0.001, "loss": 2.793, "step": 42444 }, { "epoch": 8.15, "learning_rate": 0.001, "loss": 2.7963, "step": 42456 }, { "epoch": 8.15, "learning_rate": 0.001, "loss": 2.7811, "step": 42468 }, { "epoch": 8.16, "learning_rate": 0.001, "loss": 2.7965, "step": 42480 }, { "epoch": 8.16, "learning_rate": 0.001, "loss": 2.7928, "step": 42492 }, { "epoch": 8.16, "eval_ag_news_accuracy": 0.297375, "eval_ag_news_bleu_score": 4.077741056324943, "eval_ag_news_bleu_score_sem": 0.1350246057891333, "eval_ag_news_emb_cos_sim": 0.7508853673934937, "eval_ag_news_emb_cos_sim_sem": 0.00949579099517021, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.833709955215454, "eval_ag_news_n_ngrams_match_1": 12.524, "eval_ag_news_n_ngrams_match_2": 2.474, "eval_ag_news_n_ngrams_match_3": 0.656, "eval_ag_news_num_pred_words": 45.466, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 46.23374557759033, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3063146823454444, "eval_ag_news_runtime": 10.6701, "eval_ag_news_samples_per_second": 46.86, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.3184797730117816, "eval_ag_news_token_set_f1_sem": 0.004624105725727724, "eval_ag_news_token_set_precision": 0.2937868065160725, "eval_ag_news_token_set_recall": 0.36516289536224156, "eval_ag_news_true_num_tokens": 56.09375, "step": 42500 }, { "epoch": 8.16, "eval_anthropic_toxic_prompts_accuracy": 0.10253125, "eval_anthropic_toxic_prompts_bleu_score": 2.6640877193255896, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10581202263670453, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6246901750564575, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010404943298576738, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.5179667472839355, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.426, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.554, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.53, "eval_anthropic_toxic_prompts_num_pred_words": 46.976, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 33.71580596999187, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.18708885265436348, "eval_anthropic_toxic_prompts_runtime": 10.203, "eval_anthropic_toxic_prompts_samples_per_second": 49.005, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3264954087635187, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006192588117650118, "eval_anthropic_toxic_prompts_token_set_precision": 0.3819508454617556, "eval_anthropic_toxic_prompts_token_set_recall": 0.3202988365945977, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 42500 }, { "epoch": 8.16, "eval_arxiv_accuracy": 0.3184375, "eval_arxiv_bleu_score": 3.580929209552589, "eval_arxiv_bleu_score_sem": 0.10186227418187782, "eval_arxiv_emb_cos_sim": 0.6993412971496582, "eval_arxiv_emb_cos_sim_sem": 0.009551301687381598, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.711540460586548, "eval_arxiv_n_ngrams_match_1": 13.08, "eval_arxiv_n_ngrams_match_2": 2.39, "eval_arxiv_n_ngrams_match_3": 0.456, "eval_arxiv_num_pred_words": 39.25, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 40.916788704081455, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3121365437937691, "eval_arxiv_runtime": 10.9533, "eval_arxiv_samples_per_second": 45.648, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.30865046215223163, "eval_arxiv_token_set_f1_sem": 0.004286575269854986, "eval_arxiv_token_set_precision": 0.2526619170063179, "eval_arxiv_token_set_recall": 0.4225832185125916, "eval_arxiv_true_num_tokens": 64.0, "step": 42500 }, { "epoch": 8.16, "eval_python_code_alpaca_accuracy": 0.14125, "eval_python_code_alpaca_bleu_score": 3.5710510886767723, "eval_python_code_alpaca_bleu_score_sem": 0.11153592195455794, "eval_python_code_alpaca_emb_cos_sim": 0.7040446996688843, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008902066364563721, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.198880434036255, "eval_python_code_alpaca_n_ngrams_match_1": 8.64, "eval_python_code_alpaca_n_ngrams_match_2": 2.144, "eval_python_code_alpaca_n_ngrams_match_3": 0.568, "eval_python_code_alpaca_num_pred_words": 43.582, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 24.505079780439473, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.28752798008359337, "eval_python_code_alpaca_runtime": 10.9883, "eval_python_code_alpaca_samples_per_second": 45.503, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.43116652732053623, "eval_python_code_alpaca_token_set_f1_sem": 0.005447268684502152, "eval_python_code_alpaca_token_set_precision": 0.46050557861543545, "eval_python_code_alpaca_token_set_recall": 0.429471640728056, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 42500 }, { "epoch": 8.16, "eval_wikibio_accuracy": 0.2949375, "eval_wikibio_bleu_score": 5.27167073038927, "eval_wikibio_bleu_score_sem": 0.1909683163189286, "eval_wikibio_emb_cos_sim": 0.6939498782157898, "eval_wikibio_emb_cos_sim_sem": 0.011721736628058092, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.1994404792785645, "eval_wikibio_n_ngrams_match_1": 9.294, "eval_wikibio_n_ngrams_match_2": 2.986, "eval_wikibio_n_ngrams_match_3": 0.996, "eval_wikibio_num_pred_words": 35.432, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 66.64902909345066, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3216035112591869, "eval_wikibio_runtime": 10.5796, "eval_wikibio_samples_per_second": 47.261, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.29555198082348944, "eval_wikibio_token_set_f1_sem": 0.005628781659182481, "eval_wikibio_token_set_precision": 0.30057251601266827, "eval_wikibio_token_set_recall": 0.3074309043198922, "eval_wikibio_true_num_tokens": 61.1328125, "step": 42500 }, { "epoch": 8.16, "eval_nq_accuracy": 0.4941875, "eval_nq_bleu_score": 9.86346669547191, "eval_nq_bleu_score_sem": 0.41970100155541007, "eval_nq_emb_cos_sim": 0.7970328330993652, "eval_nq_emb_cos_sim_sem": 0.008574894867552828, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.434400796890259, "eval_nq_n_ngrams_match_1": 21.284, "eval_nq_n_ngrams_match_2": 7.254, "eval_nq_n_ngrams_match_3": 3.158, "eval_nq_num_pred_words": 48.788, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.408980368277076, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.40653414259813897, "eval_nq_runtime": 10.8612, "eval_nq_samples_per_second": 46.035, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.424985100738458, "eval_nq_token_set_f1_sem": 0.0050258083679207526, "eval_nq_token_set_precision": 0.3769450608963914, "eval_nq_token_set_recall": 0.49888037346179454, "eval_nq_true_num_tokens": 64.0, "step": 42500 }, { "epoch": 8.16, "learning_rate": 0.001, "loss": 2.7843, "step": 42504 }, { "epoch": 8.16, "learning_rate": 0.001, "loss": 2.7901, "step": 42516 }, { "epoch": 8.17, "learning_rate": 0.001, "loss": 2.7809, "step": 42528 }, { "epoch": 8.17, "learning_rate": 0.001, "loss": 2.7845, "step": 42540 }, { "epoch": 8.17, "learning_rate": 0.001, "loss": 2.7916, "step": 42552 }, { "epoch": 8.17, "learning_rate": 0.001, "loss": 2.7911, "step": 42564 }, { "epoch": 8.18, "learning_rate": 0.001, "loss": 2.7982, "step": 42576 }, { "epoch": 8.18, "learning_rate": 0.001, "loss": 2.7913, "step": 42588 }, { "epoch": 8.18, "learning_rate": 0.001, "loss": 2.7861, "step": 42600 }, { "epoch": 8.18, "learning_rate": 0.001, "loss": 2.7847, "step": 42612 }, { "epoch": 8.18, "learning_rate": 0.001, "loss": 2.7814, "step": 42624 }, { "epoch": 8.19, "learning_rate": 0.001, "loss": 2.7878, "step": 42636 }, { "epoch": 8.19, "learning_rate": 0.001, "loss": 2.7898, "step": 42648 }, { "epoch": 8.19, "learning_rate": 0.001, "loss": 2.7916, "step": 42660 }, { "epoch": 8.19, "learning_rate": 0.001, "loss": 2.7857, "step": 42672 }, { "epoch": 8.2, "learning_rate": 0.001, "loss": 2.7969, "step": 42684 }, { "epoch": 8.2, "learning_rate": 0.001, "loss": 2.7868, "step": 42696 }, { "epoch": 8.2, "learning_rate": 0.001, "loss": 2.7846, "step": 42708 }, { "epoch": 8.2, "learning_rate": 0.001, "loss": 2.7865, "step": 42720 }, { "epoch": 8.21, "learning_rate": 0.001, "loss": 2.7857, "step": 42732 }, { "epoch": 8.21, "learning_rate": 0.001, "loss": 2.7814, "step": 42744 }, { "epoch": 8.21, "learning_rate": 0.001, "loss": 2.7769, "step": 42756 }, { "epoch": 8.21, "learning_rate": 0.001, "loss": 2.7811, "step": 42768 }, { "epoch": 8.21, "learning_rate": 0.001, "loss": 2.7775, "step": 42780 }, { "epoch": 8.22, "learning_rate": 0.001, "loss": 2.7836, "step": 42792 }, { "epoch": 8.22, "learning_rate": 0.001, "loss": 2.7802, "step": 42804 }, { "epoch": 8.22, "learning_rate": 0.001, "loss": 2.7857, "step": 42816 }, { "epoch": 8.22, "learning_rate": 0.001, "loss": 2.7701, "step": 42828 }, { "epoch": 8.23, "learning_rate": 0.001, "loss": 2.7836, "step": 42840 }, { "epoch": 8.23, "learning_rate": 0.001, "loss": 2.7806, "step": 42852 }, { "epoch": 8.23, "learning_rate": 0.001, "loss": 2.7904, "step": 42864 }, { "epoch": 8.23, "learning_rate": 0.001, "loss": 2.7826, "step": 42876 }, { "epoch": 8.24, "learning_rate": 0.001, "loss": 2.7906, "step": 42888 }, { "epoch": 8.24, "learning_rate": 0.001, "loss": 2.7784, "step": 42900 }, { "epoch": 8.24, "learning_rate": 0.001, "loss": 2.787, "step": 42912 }, { "epoch": 8.24, "learning_rate": 0.001, "loss": 2.77, "step": 42924 }, { "epoch": 8.24, "learning_rate": 0.001, "loss": 2.7838, "step": 42936 }, { "epoch": 8.25, "learning_rate": 0.001, "loss": 2.7824, "step": 42948 }, { "epoch": 8.25, "learning_rate": 0.001, "loss": 2.7813, "step": 42960 }, { "epoch": 8.25, "learning_rate": 0.001, "loss": 2.7774, "step": 42972 }, { "epoch": 8.25, "learning_rate": 0.001, "loss": 2.7897, "step": 42984 }, { "epoch": 8.26, "learning_rate": 0.001, "loss": 2.7716, "step": 42996 }, { "epoch": 8.26, "learning_rate": 0.001, "loss": 2.7899, "step": 43008 }, { "epoch": 8.26, "learning_rate": 0.001, "loss": 2.7809, "step": 43020 }, { "epoch": 8.26, "learning_rate": 0.001, "loss": 2.7779, "step": 43032 }, { "epoch": 8.26, "learning_rate": 0.001, "loss": 2.7801, "step": 43044 }, { "epoch": 8.27, "learning_rate": 0.001, "loss": 2.7778, "step": 43056 }, { "epoch": 8.27, "learning_rate": 0.001, "loss": 2.8035, "step": 43068 }, { "epoch": 8.27, "learning_rate": 0.001, "loss": 2.7826, "step": 43080 }, { "epoch": 8.27, "learning_rate": 0.001, "loss": 2.784, "step": 43092 }, { "epoch": 8.28, "learning_rate": 0.001, "loss": 2.7824, "step": 43104 }, { "epoch": 8.28, "learning_rate": 0.001, "loss": 2.7747, "step": 43116 }, { "epoch": 8.28, "eval_ag_news_accuracy": 0.29796875, "eval_ag_news_bleu_score": 4.289186033478596, "eval_ag_news_bleu_score_sem": 0.1472273291851427, "eval_ag_news_emb_cos_sim": 0.7602236270904541, "eval_ag_news_emb_cos_sim_sem": 0.008700125008279836, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8286235332489014, "eval_ag_news_n_ngrams_match_1": 12.692, "eval_ag_news_n_ngrams_match_2": 2.608, "eval_ag_news_n_ngrams_match_3": 0.724, "eval_ag_news_num_pred_words": 46.834, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 45.99917829829024, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.30951333819984755, "eval_ag_news_runtime": 11.2096, "eval_ag_news_samples_per_second": 44.605, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.32048263955705764, "eval_ag_news_token_set_f1_sem": 0.004396185704757408, "eval_ag_news_token_set_precision": 0.2992848077245601, "eval_ag_news_token_set_recall": 0.35936025524546333, "eval_ag_news_true_num_tokens": 56.09375, "step": 43125 }, { "epoch": 8.28, "eval_anthropic_toxic_prompts_accuracy": 0.10271875, "eval_anthropic_toxic_prompts_bleu_score": 2.475494185401394, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09886072913482825, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6274254322052002, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009971455059108096, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4859540462493896, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.4, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.426, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.452, "eval_anthropic_toxic_prompts_num_pred_words": 47.574, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 32.653565265347964, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.18590882169211093, "eval_anthropic_toxic_prompts_runtime": 11.2239, "eval_anthropic_toxic_prompts_samples_per_second": 44.548, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.3254527599800401, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006217887336521121, "eval_anthropic_toxic_prompts_token_set_precision": 0.37812043057603234, "eval_anthropic_toxic_prompts_token_set_recall": 0.31746609671449844, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 43125 }, { "epoch": 8.28, "eval_arxiv_accuracy": 0.320625, "eval_arxiv_bleu_score": 3.7092596609580166, "eval_arxiv_bleu_score_sem": 0.10988733229751972, "eval_arxiv_emb_cos_sim": 0.70307457447052, "eval_arxiv_emb_cos_sim_sem": 0.008716305446840973, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6934268474578857, "eval_arxiv_n_ngrams_match_1": 13.586, "eval_arxiv_n_ngrams_match_2": 2.464, "eval_arxiv_n_ngrams_match_3": 0.466, "eval_arxiv_num_pred_words": 40.584, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 40.182309937092974, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3200854283346377, "eval_arxiv_runtime": 10.5425, "eval_arxiv_samples_per_second": 47.427, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3161266797735679, "eval_arxiv_token_set_f1_sem": 0.00410605037504695, "eval_arxiv_token_set_precision": 0.26409995385144985, "eval_arxiv_token_set_recall": 0.42019213761650775, "eval_arxiv_true_num_tokens": 64.0, "step": 43125 }, { "epoch": 8.28, "eval_python_code_alpaca_accuracy": 0.14328125, "eval_python_code_alpaca_bleu_score": 3.6580480867216787, "eval_python_code_alpaca_bleu_score_sem": 0.12125716275977651, "eval_python_code_alpaca_emb_cos_sim": 0.7039538025856018, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009341827240470899, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.166282892227173, "eval_python_code_alpaca_n_ngrams_match_1": 8.592, "eval_python_code_alpaca_n_ngrams_match_2": 2.182, "eval_python_code_alpaca_n_ngrams_match_3": 0.65, "eval_python_code_alpaca_num_pred_words": 43.54, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.719153640376163, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2852001742178809, "eval_python_code_alpaca_runtime": 10.3977, "eval_python_code_alpaca_samples_per_second": 48.087, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.43869896485484505, "eval_python_code_alpaca_token_set_f1_sem": 0.00560460810782458, "eval_python_code_alpaca_token_set_precision": 0.46536812284666607, "eval_python_code_alpaca_token_set_recall": 0.4419565581122088, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 43125 }, { "epoch": 8.28, "eval_wikibio_accuracy": 0.29796875, "eval_wikibio_bleu_score": 5.451557623292682, "eval_wikibio_bleu_score_sem": 0.1839750438125555, "eval_wikibio_emb_cos_sim": 0.7180080413818359, "eval_wikibio_emb_cos_sim_sem": 0.009667340522410693, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.098819732666016, "eval_wikibio_n_ngrams_match_1": 9.702, "eval_wikibio_n_ngrams_match_2": 3.142, "eval_wikibio_n_ngrams_match_3": 1.104, "eval_wikibio_num_pred_words": 36.826, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 60.269111938402695, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3337485597739648, "eval_wikibio_runtime": 9.9718, "eval_wikibio_samples_per_second": 50.141, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3083721893814285, "eval_wikibio_token_set_f1_sem": 0.005254341766396219, "eval_wikibio_token_set_precision": 0.313680133888289, "eval_wikibio_token_set_recall": 0.3167676217626774, "eval_wikibio_true_num_tokens": 61.1328125, "step": 43125 }, { "epoch": 8.28, "eval_nq_accuracy": 0.49509375, "eval_nq_bleu_score": 9.962303375764531, "eval_nq_bleu_score_sem": 0.4212394240939915, "eval_nq_emb_cos_sim": 0.8032087087631226, "eval_nq_emb_cos_sim_sem": 0.007502148913106757, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.424966812133789, "eval_nq_n_ngrams_match_1": 21.508, "eval_nq_n_ngrams_match_2": 7.358, "eval_nq_n_ngrams_match_3": 3.202, "eval_nq_num_pred_words": 49.312, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.301854328625993, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4108295881109665, "eval_nq_runtime": 11.256, "eval_nq_samples_per_second": 44.421, "eval_nq_steps_per_second": 0.089, "eval_nq_token_set_f1": 0.43293763220999165, "eval_nq_token_set_f1_sem": 0.004937672230892428, "eval_nq_token_set_precision": 0.38471865967487573, "eval_nq_token_set_recall": 0.506869447852444, "eval_nq_true_num_tokens": 64.0, "step": 43125 }, { "epoch": 8.28, "learning_rate": 0.001, "loss": 2.7911, "step": 43128 }, { "epoch": 8.28, "learning_rate": 0.001, "loss": 2.7842, "step": 43140 }, { "epoch": 8.29, "learning_rate": 0.001, "loss": 2.7887, "step": 43152 }, { "epoch": 8.29, "learning_rate": 0.001, "loss": 2.7756, "step": 43164 }, { "epoch": 8.29, "learning_rate": 0.001, "loss": 2.7861, "step": 43176 }, { "epoch": 8.29, "learning_rate": 0.001, "loss": 2.7899, "step": 43188 }, { "epoch": 8.29, "learning_rate": 0.001, "loss": 2.7839, "step": 43200 }, { "epoch": 8.3, "learning_rate": 0.001, "loss": 2.7764, "step": 43212 }, { "epoch": 8.3, "learning_rate": 0.001, "loss": 2.7805, "step": 43224 }, { "epoch": 8.3, "learning_rate": 0.001, "loss": 2.7846, "step": 43236 }, { "epoch": 8.3, "learning_rate": 0.001, "loss": 2.7854, "step": 43248 }, { "epoch": 8.31, "learning_rate": 0.001, "loss": 2.7817, "step": 43260 }, { "epoch": 8.31, "learning_rate": 0.001, "loss": 2.7878, "step": 43272 }, { "epoch": 8.31, "learning_rate": 0.001, "loss": 2.783, "step": 43284 }, { "epoch": 8.31, "learning_rate": 0.001, "loss": 2.7924, "step": 43296 }, { "epoch": 8.32, "learning_rate": 0.001, "loss": 2.7858, "step": 43308 }, { "epoch": 8.32, "learning_rate": 0.001, "loss": 2.7888, "step": 43320 }, { "epoch": 8.32, "learning_rate": 0.001, "loss": 2.7817, "step": 43332 }, { "epoch": 8.32, "learning_rate": 0.001, "loss": 2.7796, "step": 43344 }, { "epoch": 8.32, "learning_rate": 0.001, "loss": 2.7882, "step": 43356 }, { "epoch": 8.33, "learning_rate": 0.001, "loss": 2.789, "step": 43368 }, { "epoch": 8.33, "learning_rate": 0.001, "loss": 2.7892, "step": 43380 }, { "epoch": 8.33, "learning_rate": 0.001, "loss": 2.7771, "step": 43392 }, { "epoch": 8.33, "learning_rate": 0.001, "loss": 2.7921, "step": 43404 }, { "epoch": 8.34, "learning_rate": 0.001, "loss": 2.7865, "step": 43416 }, { "epoch": 8.34, "learning_rate": 0.001, "loss": 2.7811, "step": 43428 }, { "epoch": 8.34, "learning_rate": 0.001, "loss": 2.7865, "step": 43440 }, { "epoch": 8.34, "learning_rate": 0.001, "loss": 2.7826, "step": 43452 }, { "epoch": 8.35, "learning_rate": 0.001, "loss": 2.7732, "step": 43464 }, { "epoch": 8.35, "learning_rate": 0.001, "loss": 2.7793, "step": 43476 }, { "epoch": 8.35, "learning_rate": 0.001, "loss": 2.7883, "step": 43488 }, { "epoch": 8.35, "learning_rate": 0.001, "loss": 2.7863, "step": 43500 }, { "epoch": 8.35, "learning_rate": 0.001, "loss": 2.7837, "step": 43512 }, { "epoch": 8.36, "learning_rate": 0.001, "loss": 2.7856, "step": 43524 }, { "epoch": 8.36, "learning_rate": 0.001, "loss": 2.7796, "step": 43536 }, { "epoch": 8.36, "learning_rate": 0.001, "loss": 2.7888, "step": 43548 }, { "epoch": 8.36, "learning_rate": 0.001, "loss": 2.782, "step": 43560 }, { "epoch": 8.37, "learning_rate": 0.001, "loss": 2.7827, "step": 43572 }, { "epoch": 8.37, "learning_rate": 0.001, "loss": 2.785, "step": 43584 }, { "epoch": 8.37, "learning_rate": 0.001, "loss": 2.7844, "step": 43596 }, { "epoch": 8.37, "learning_rate": 0.001, "loss": 2.7806, "step": 43608 }, { "epoch": 8.38, "learning_rate": 0.001, "loss": 2.781, "step": 43620 }, { "epoch": 8.38, "learning_rate": 0.001, "loss": 2.7777, "step": 43632 }, { "epoch": 8.38, "learning_rate": 0.001, "loss": 2.7923, "step": 43644 }, { "epoch": 8.38, "learning_rate": 0.001, "loss": 2.7845, "step": 43656 }, { "epoch": 8.38, "learning_rate": 0.001, "loss": 2.7809, "step": 43668 }, { "epoch": 8.39, "learning_rate": 0.001, "loss": 2.7774, "step": 43680 }, { "epoch": 8.39, "learning_rate": 0.001, "loss": 2.7963, "step": 43692 }, { "epoch": 8.39, "learning_rate": 0.001, "loss": 2.7849, "step": 43704 }, { "epoch": 8.39, "learning_rate": 0.001, "loss": 2.7833, "step": 43716 }, { "epoch": 8.4, "learning_rate": 0.001, "loss": 2.7779, "step": 43728 }, { "epoch": 8.4, "learning_rate": 0.001, "loss": 2.7859, "step": 43740 }, { "epoch": 8.4, "eval_ag_news_accuracy": 0.296375, "eval_ag_news_bleu_score": 4.127681050947515, "eval_ag_news_bleu_score_sem": 0.1394653004526821, "eval_ag_news_emb_cos_sim": 0.7563239336013794, "eval_ag_news_emb_cos_sim_sem": 0.008866142103108756, "eval_ag_news_emb_top1_equal": 0.171875, "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.838296413421631, "eval_ag_news_n_ngrams_match_1": 12.674, "eval_ag_news_n_ngrams_match_2": 2.552, "eval_ag_news_n_ngrams_match_3": 0.654, "eval_ag_news_num_pred_words": 47.252, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 46.446281740943554, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3095242437772054, "eval_ag_news_runtime": 12.4816, "eval_ag_news_samples_per_second": 40.059, "eval_ag_news_steps_per_second": 0.08, "eval_ag_news_token_set_f1": 0.31812557604777497, "eval_ag_news_token_set_f1_sem": 0.004140194139602665, "eval_ag_news_token_set_precision": 0.30030903788951346, "eval_ag_news_token_set_recall": 0.35390381441230057, "eval_ag_news_true_num_tokens": 56.09375, "step": 43750 }, { "epoch": 8.4, "eval_anthropic_toxic_prompts_accuracy": 0.10359375, "eval_anthropic_toxic_prompts_bleu_score": 2.6464367895337446, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10675059715711638, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6354289054870605, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00967367769621101, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4955379962921143, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.542, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.562, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.52, "eval_anthropic_toxic_prompts_num_pred_words": 48.818, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 32.968019855168535, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.186614958033128, "eval_anthropic_toxic_prompts_runtime": 12.1145, "eval_anthropic_toxic_prompts_samples_per_second": 41.273, "eval_anthropic_toxic_prompts_steps_per_second": 0.083, "eval_anthropic_toxic_prompts_token_set_f1": 0.3232138201309163, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.005916100528997787, "eval_anthropic_toxic_prompts_token_set_precision": 0.38813685524287767, "eval_anthropic_toxic_prompts_token_set_recall": 0.3055174839589522, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 43750 }, { "epoch": 8.4, "eval_arxiv_accuracy": 0.32078125, "eval_arxiv_bleu_score": 3.769695008452207, "eval_arxiv_bleu_score_sem": 0.11125574381752283, "eval_arxiv_emb_cos_sim": 0.7036663293838501, "eval_arxiv_emb_cos_sim_sem": 0.008145491579667203, "eval_arxiv_emb_top1_equal": 0.171875, "eval_arxiv_emb_top1_equal_sem": 0.03347745514062371, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.698707342147827, "eval_arxiv_n_ngrams_match_1": 13.406, "eval_arxiv_n_ngrams_match_2": 2.462, "eval_arxiv_n_ngrams_match_3": 0.52, "eval_arxiv_num_pred_words": 40.34, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 40.395053612932166, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.31429062003146413, "eval_arxiv_runtime": 13.2845, "eval_arxiv_samples_per_second": 37.638, "eval_arxiv_steps_per_second": 0.075, "eval_arxiv_token_set_f1": 0.31195369477363527, "eval_arxiv_token_set_f1_sem": 0.004173339921038991, "eval_arxiv_token_set_precision": 0.26016474718564564, "eval_arxiv_token_set_recall": 0.4105074224751023, "eval_arxiv_true_num_tokens": 64.0, "step": 43750 }, { "epoch": 8.4, "eval_python_code_alpaca_accuracy": 0.1455625, "eval_python_code_alpaca_bleu_score": 3.6777738058875826, "eval_python_code_alpaca_bleu_score_sem": 0.11551648618370353, "eval_python_code_alpaca_emb_cos_sim": 0.7076461315155029, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008949351177325897, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.1563408374786377, "eval_python_code_alpaca_n_ngrams_match_1": 8.81, "eval_python_code_alpaca_n_ngrams_match_2": 2.23, "eval_python_code_alpaca_n_ngrams_match_3": 0.628, "eval_python_code_alpaca_num_pred_words": 44.006, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.484504894441393, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29306394825778914, "eval_python_code_alpaca_runtime": 11.5314, "eval_python_code_alpaca_samples_per_second": 43.36, "eval_python_code_alpaca_steps_per_second": 0.087, "eval_python_code_alpaca_token_set_f1": 0.43354780343817956, "eval_python_code_alpaca_token_set_f1_sem": 0.005401078005187498, "eval_python_code_alpaca_token_set_precision": 0.4809611521464254, "eval_python_code_alpaca_token_set_recall": 0.4182455721870356, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 43750 }, { "epoch": 8.4, "eval_wikibio_accuracy": 0.29396875, "eval_wikibio_bleu_score": 5.452889389272654, "eval_wikibio_bleu_score_sem": 0.18067991348148654, "eval_wikibio_emb_cos_sim": 0.7179189920425415, "eval_wikibio_emb_cos_sim_sem": 0.010318616674658805, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.226851463317871, "eval_wikibio_n_ngrams_match_1": 10.146, "eval_wikibio_n_ngrams_match_2": 3.27, "eval_wikibio_n_ngrams_match_3": 1.132, "eval_wikibio_num_pred_words": 38.906, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 68.501213697156, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3433434774041647, "eval_wikibio_runtime": 11.5794, "eval_wikibio_samples_per_second": 43.18, "eval_wikibio_steps_per_second": 0.086, "eval_wikibio_token_set_f1": 0.3132033298013379, "eval_wikibio_token_set_f1_sem": 0.0050542039627546895, "eval_wikibio_token_set_precision": 0.3283312891350125, "eval_wikibio_token_set_recall": 0.3106371168149282, "eval_wikibio_true_num_tokens": 61.1328125, "step": 43750 }, { "epoch": 8.4, "eval_nq_accuracy": 0.496625, "eval_nq_bleu_score": 10.234519340711536, "eval_nq_bleu_score_sem": 0.41931663053445006, "eval_nq_emb_cos_sim": 0.7993790507316589, "eval_nq_emb_cos_sim_sem": 0.008028297657571105, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4206948280334473, "eval_nq_n_ngrams_match_1": 21.614, "eval_nq_n_ngrams_match_2": 7.53, "eval_nq_n_ngrams_match_3": 3.316, "eval_nq_num_pred_words": 49.404, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.253675968494578, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4116959281410013, "eval_nq_runtime": 12.5266, "eval_nq_samples_per_second": 39.915, "eval_nq_steps_per_second": 0.08, "eval_nq_token_set_f1": 0.43264911018815577, "eval_nq_token_set_f1_sem": 0.004965873225720561, "eval_nq_token_set_precision": 0.38849705980467175, "eval_nq_token_set_recall": 0.49799832891518625, "eval_nq_true_num_tokens": 64.0, "step": 43750 }, { "epoch": 8.4, "learning_rate": 0.001, "loss": 2.7913, "step": 43752 }, { "epoch": 8.4, "learning_rate": 0.001, "loss": 2.7918, "step": 43764 }, { "epoch": 8.41, "learning_rate": 0.001, "loss": 2.784, "step": 43776 }, { "epoch": 8.41, "learning_rate": 0.001, "loss": 2.7799, "step": 43788 }, { "epoch": 8.41, "learning_rate": 0.001, "loss": 2.7774, "step": 43800 }, { "epoch": 8.41, "learning_rate": 0.001, "loss": 2.7851, "step": 43812 }, { "epoch": 8.41, "learning_rate": 0.001, "loss": 2.7882, "step": 43824 }, { "epoch": 8.42, "learning_rate": 0.001, "loss": 2.7819, "step": 43836 }, { "epoch": 8.42, "learning_rate": 0.001, "loss": 2.7885, "step": 43848 }, { "epoch": 8.42, "learning_rate": 0.001, "loss": 2.7754, "step": 43860 }, { "epoch": 8.42, "learning_rate": 0.001, "loss": 2.7903, "step": 43872 }, { "epoch": 8.43, "learning_rate": 0.001, "loss": 2.7701, "step": 43884 }, { "epoch": 8.43, "learning_rate": 0.001, "loss": 2.7852, "step": 43896 }, { "epoch": 8.43, "learning_rate": 0.001, "loss": 2.7846, "step": 43908 }, { "epoch": 8.43, "learning_rate": 0.001, "loss": 2.7845, "step": 43920 }, { "epoch": 8.44, "learning_rate": 0.001, "loss": 2.7871, "step": 43932 }, { "epoch": 8.44, "learning_rate": 0.001, "loss": 2.7864, "step": 43944 }, { "epoch": 8.44, "learning_rate": 0.001, "loss": 2.781, "step": 43956 }, { "epoch": 8.44, "learning_rate": 0.001, "loss": 2.7906, "step": 43968 }, { "epoch": 8.44, "learning_rate": 0.001, "loss": 2.7856, "step": 43980 }, { "epoch": 8.45, "learning_rate": 0.001, "loss": 2.7854, "step": 43992 }, { "epoch": 8.45, "learning_rate": 0.001, "loss": 2.781, "step": 44004 }, { "epoch": 8.45, "learning_rate": 0.001, "loss": 2.7868, "step": 44016 }, { "epoch": 8.45, "learning_rate": 0.001, "loss": 2.7853, "step": 44028 }, { "epoch": 8.46, "learning_rate": 0.001, "loss": 2.7719, "step": 44040 }, { "epoch": 8.46, "learning_rate": 0.001, "loss": 2.7774, "step": 44052 }, { "epoch": 8.46, "learning_rate": 0.001, "loss": 2.7786, "step": 44064 }, { "epoch": 8.46, "learning_rate": 0.001, "loss": 2.7846, "step": 44076 }, { "epoch": 8.47, "learning_rate": 0.001, "loss": 2.7842, "step": 44088 }, { "epoch": 8.47, "learning_rate": 0.001, "loss": 2.7916, "step": 44100 }, { "epoch": 8.47, "learning_rate": 0.001, "loss": 2.7861, "step": 44112 }, { "epoch": 8.47, "learning_rate": 0.001, "loss": 2.7797, "step": 44124 }, { "epoch": 8.47, "learning_rate": 0.001, "loss": 2.7787, "step": 44136 }, { "epoch": 8.48, "learning_rate": 0.001, "loss": 2.7876, "step": 44148 }, { "epoch": 8.48, "learning_rate": 0.001, "loss": 2.7783, "step": 44160 }, { "epoch": 8.48, "learning_rate": 0.001, "loss": 2.7773, "step": 44172 }, { "epoch": 8.48, "learning_rate": 0.001, "loss": 2.7775, "step": 44184 }, { "epoch": 8.49, "learning_rate": 0.001, "loss": 2.7783, "step": 44196 }, { "epoch": 8.49, "learning_rate": 0.001, "loss": 2.7855, "step": 44208 }, { "epoch": 8.49, "learning_rate": 0.001, "loss": 2.781, "step": 44220 }, { "epoch": 8.49, "learning_rate": 0.001, "loss": 2.7758, "step": 44232 }, { "epoch": 8.5, "learning_rate": 0.001, "loss": 2.7756, "step": 44244 }, { "epoch": 8.5, "learning_rate": 0.001, "loss": 2.7865, "step": 44256 }, { "epoch": 8.5, "learning_rate": 0.001, "loss": 2.7716, "step": 44268 }, { "epoch": 8.5, "learning_rate": 0.001, "loss": 2.7787, "step": 44280 }, { "epoch": 8.5, "learning_rate": 0.001, "loss": 2.7825, "step": 44292 }, { "epoch": 8.51, "learning_rate": 0.001, "loss": 2.7846, "step": 44304 }, { "epoch": 8.51, "learning_rate": 0.001, "loss": 2.7797, "step": 44316 }, { "epoch": 8.51, "learning_rate": 0.001, "loss": 2.7888, "step": 44328 }, { "epoch": 8.51, "learning_rate": 0.001, "loss": 2.7689, "step": 44340 }, { "epoch": 8.52, "learning_rate": 0.001, "loss": 2.7922, "step": 44352 }, { "epoch": 8.52, "learning_rate": 0.001, "loss": 2.77, "step": 44364 }, { "epoch": 8.52, "eval_ag_news_accuracy": 0.2985625, "eval_ag_news_bleu_score": 4.291809719559645, "eval_ag_news_bleu_score_sem": 0.14202437651325187, "eval_ag_news_emb_cos_sim": 0.7695584297180176, "eval_ag_news_emb_cos_sim_sem": 0.00787921240058693, "eval_ag_news_emb_top1_equal": 0.1484375, "eval_ag_news_emb_top1_equal_sem": 0.031548465007086954, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.81347393989563, "eval_ag_news_n_ngrams_match_1": 12.722, "eval_ag_news_n_ngrams_match_2": 2.614, "eval_ag_news_n_ngrams_match_3": 0.71, "eval_ag_news_num_pred_words": 46.164, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 45.307561536541535, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.31273392151992785, "eval_ag_news_runtime": 10.801, "eval_ag_news_samples_per_second": 46.292, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3229267044627019, "eval_ag_news_token_set_f1_sem": 0.004233260819430299, "eval_ag_news_token_set_precision": 0.2997766880442096, "eval_ag_news_token_set_recall": 0.36729325100895005, "eval_ag_news_true_num_tokens": 56.09375, "step": 44375 }, { "epoch": 8.52, "eval_anthropic_toxic_prompts_accuracy": 0.10415625, "eval_anthropic_toxic_prompts_bleu_score": 2.659425178421464, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10760492846382186, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6349513530731201, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010008689824133946, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4967308044433594, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.478, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.554, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.528, "eval_anthropic_toxic_prompts_num_pred_words": 47.546, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 33.00736784061575, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.18674929383812144, "eval_anthropic_toxic_prompts_runtime": 10.3269, "eval_anthropic_toxic_prompts_samples_per_second": 48.417, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3274998293274113, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006351060424454388, "eval_anthropic_toxic_prompts_token_set_precision": 0.3814174997872746, "eval_anthropic_toxic_prompts_token_set_recall": 0.32270478698508404, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 44375 }, { "epoch": 8.52, "eval_arxiv_accuracy": 0.32190625, "eval_arxiv_bleu_score": 3.8319262827744947, "eval_arxiv_bleu_score_sem": 0.1095366128052342, "eval_arxiv_emb_cos_sim": 0.7129121422767639, "eval_arxiv_emb_cos_sim_sem": 0.007783770862134011, "eval_arxiv_emb_top1_equal": 0.203125, "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6637754440307617, "eval_arxiv_n_ngrams_match_1": 13.664, "eval_arxiv_n_ngrams_match_2": 2.506, "eval_arxiv_n_ngrams_match_3": 0.538, "eval_arxiv_num_pred_words": 40.39, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 39.0083390095431, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.32292598975976605, "eval_arxiv_runtime": 10.6021, "eval_arxiv_samples_per_second": 47.16, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.3208577635863141, "eval_arxiv_token_set_f1_sem": 0.0040449914744743855, "eval_arxiv_token_set_precision": 0.26562525757372335, "eval_arxiv_token_set_recall": 0.42531107879627794, "eval_arxiv_true_num_tokens": 64.0, "step": 44375 }, { "epoch": 8.52, "eval_python_code_alpaca_accuracy": 0.1444375, "eval_python_code_alpaca_bleu_score": 3.561220880741698, "eval_python_code_alpaca_bleu_score_sem": 0.1016785175749165, "eval_python_code_alpaca_emb_cos_sim": 0.7064552903175354, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008590842181358136, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.161862850189209, "eval_python_code_alpaca_n_ngrams_match_1": 8.456, "eval_python_code_alpaca_n_ngrams_match_2": 2.076, "eval_python_code_alpaca_n_ngrams_match_3": 0.542, "eval_python_code_alpaca_num_pred_words": 42.658, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.614545341030077, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2855763855082162, "eval_python_code_alpaca_runtime": 11.0184, "eval_python_code_alpaca_samples_per_second": 45.379, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4337527075136103, "eval_python_code_alpaca_token_set_f1_sem": 0.005524804980645295, "eval_python_code_alpaca_token_set_precision": 0.456940664183077, "eval_python_code_alpaca_token_set_recall": 0.441517397443801, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 44375 }, { "epoch": 8.52, "eval_wikibio_accuracy": 0.2968125, "eval_wikibio_bleu_score": 5.070998927409954, "eval_wikibio_bleu_score_sem": 0.18104906205014448, "eval_wikibio_emb_cos_sim": 0.7035855054855347, "eval_wikibio_emb_cos_sim_sem": 0.011877870945455714, "eval_wikibio_emb_top1_equal": 0.109375, "eval_wikibio_emb_top1_equal_sem": 0.027695207821224692, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.074985027313232, "eval_wikibio_n_ngrams_match_1": 9.364, "eval_wikibio_n_ngrams_match_2": 2.972, "eval_wikibio_n_ngrams_match_3": 1.03, "eval_wikibio_num_pred_words": 36.584, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 58.84959944513198, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.31476794312823275, "eval_wikibio_runtime": 10.1544, "eval_wikibio_samples_per_second": 49.24, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.2935711021463623, "eval_wikibio_token_set_f1_sem": 0.005645602466548096, "eval_wikibio_token_set_precision": 0.3020569331321563, "eval_wikibio_token_set_recall": 0.3023993335384537, "eval_wikibio_true_num_tokens": 61.1328125, "step": 44375 }, { "epoch": 8.52, "eval_nq_accuracy": 0.49346875, "eval_nq_bleu_score": 10.24725951532769, "eval_nq_bleu_score_sem": 0.42530713635991835, "eval_nq_emb_cos_sim": 0.7986217141151428, "eval_nq_emb_cos_sim_sem": 0.007920463628566968, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4164085388183594, "eval_nq_n_ngrams_match_1": 21.634, "eval_nq_n_ngrams_match_2": 7.536, "eval_nq_n_ngrams_match_3": 3.352, "eval_nq_num_pred_words": 49.11, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.20554268883261, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.41185383705597056, "eval_nq_runtime": 10.7717, "eval_nq_samples_per_second": 46.418, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.43351899242381453, "eval_nq_token_set_f1_sem": 0.00510057992256785, "eval_nq_token_set_precision": 0.38465580979919245, "eval_nq_token_set_recall": 0.5083314649684932, "eval_nq_true_num_tokens": 64.0, "step": 44375 }, { "epoch": 8.52, "learning_rate": 0.001, "loss": 2.7874, "step": 44376 }, { "epoch": 8.52, "learning_rate": 0.001, "loss": 2.7874, "step": 44388 }, { "epoch": 8.53, "learning_rate": 0.001, "loss": 2.7789, "step": 44400 }, { "epoch": 8.53, "learning_rate": 0.001, "loss": 2.7753, "step": 44412 }, { "epoch": 8.53, "learning_rate": 0.001, "loss": 2.7847, "step": 44424 }, { "epoch": 8.53, "learning_rate": 0.001, "loss": 2.7791, "step": 44436 }, { "epoch": 8.53, "learning_rate": 0.001, "loss": 2.7838, "step": 44448 }, { "epoch": 8.54, "learning_rate": 0.001, "loss": 2.7905, "step": 44460 }, { "epoch": 8.54, "learning_rate": 0.001, "loss": 2.7815, "step": 44472 }, { "epoch": 8.54, "learning_rate": 0.001, "loss": 2.786, "step": 44484 }, { "epoch": 8.54, "learning_rate": 0.001, "loss": 2.7862, "step": 44496 }, { "epoch": 8.55, "learning_rate": 0.001, "loss": 2.7911, "step": 44508 }, { "epoch": 8.55, "learning_rate": 0.001, "loss": 2.7829, "step": 44520 }, { "epoch": 8.55, "learning_rate": 0.001, "loss": 2.7816, "step": 44532 }, { "epoch": 8.55, "learning_rate": 0.001, "loss": 2.7829, "step": 44544 }, { "epoch": 8.56, "learning_rate": 0.001, "loss": 2.7761, "step": 44556 }, { "epoch": 8.56, "learning_rate": 0.001, "loss": 2.7737, "step": 44568 }, { "epoch": 8.56, "learning_rate": 0.001, "loss": 2.7638, "step": 44580 }, { "epoch": 8.56, "learning_rate": 0.001, "loss": 2.776, "step": 44592 }, { "epoch": 8.56, "learning_rate": 0.001, "loss": 2.7793, "step": 44604 }, { "epoch": 8.57, "learning_rate": 0.001, "loss": 2.7748, "step": 44616 }, { "epoch": 8.57, "learning_rate": 0.001, "loss": 2.7686, "step": 44628 }, { "epoch": 8.57, "learning_rate": 0.001, "loss": 2.7773, "step": 44640 }, { "epoch": 8.57, "learning_rate": 0.001, "loss": 2.7829, "step": 44652 }, { "epoch": 8.58, "learning_rate": 0.001, "loss": 2.7753, "step": 44664 }, { "epoch": 8.58, "learning_rate": 0.001, "loss": 2.7822, "step": 44676 }, { "epoch": 8.58, "learning_rate": 0.001, "loss": 2.773, "step": 44688 }, { "epoch": 8.58, "learning_rate": 0.001, "loss": 2.7747, "step": 44700 }, { "epoch": 8.59, "learning_rate": 0.001, "loss": 2.7878, "step": 44712 }, { "epoch": 8.59, "learning_rate": 0.001, "loss": 2.7755, "step": 44724 }, { "epoch": 8.59, "learning_rate": 0.001, "loss": 2.7809, "step": 44736 }, { "epoch": 8.59, "learning_rate": 0.001, "loss": 2.772, "step": 44748 }, { "epoch": 8.59, "learning_rate": 0.001, "loss": 2.7662, "step": 44760 }, { "epoch": 8.6, "learning_rate": 0.001, "loss": 2.779, "step": 44772 }, { "epoch": 8.6, "learning_rate": 0.001, "loss": 2.7859, "step": 44784 }, { "epoch": 8.6, "learning_rate": 0.001, "loss": 2.7768, "step": 44796 }, { "epoch": 8.6, "learning_rate": 0.001, "loss": 2.7768, "step": 44808 }, { "epoch": 8.61, "learning_rate": 0.001, "loss": 2.7768, "step": 44820 }, { "epoch": 8.61, "learning_rate": 0.001, "loss": 2.7815, "step": 44832 }, { "epoch": 8.61, "learning_rate": 0.001, "loss": 2.7897, "step": 44844 }, { "epoch": 8.61, "learning_rate": 0.001, "loss": 2.7753, "step": 44856 }, { "epoch": 8.62, "learning_rate": 0.001, "loss": 2.772, "step": 44868 }, { "epoch": 8.62, "learning_rate": 0.001, "loss": 2.7694, "step": 44880 }, { "epoch": 8.62, "learning_rate": 0.001, "loss": 2.7731, "step": 44892 }, { "epoch": 8.62, "learning_rate": 0.001, "loss": 2.7706, "step": 44904 }, { "epoch": 8.62, "learning_rate": 0.001, "loss": 2.7756, "step": 44916 }, { "epoch": 8.63, "learning_rate": 0.001, "loss": 2.7916, "step": 44928 }, { "epoch": 8.63, "learning_rate": 0.001, "loss": 2.7768, "step": 44940 }, { "epoch": 8.63, "learning_rate": 0.001, "loss": 2.7844, "step": 44952 }, { "epoch": 8.63, "learning_rate": 0.001, "loss": 2.7745, "step": 44964 }, { "epoch": 8.64, "learning_rate": 0.001, "loss": 2.7755, "step": 44976 }, { "epoch": 8.64, "learning_rate": 0.001, "loss": 2.7729, "step": 44988 }, { "epoch": 8.64, "learning_rate": 0.001, "loss": 2.7853, "step": 45000 }, { "epoch": 8.64, "eval_ag_news_accuracy": 0.29778125, "eval_ag_news_bleu_score": 4.252840525792193, "eval_ag_news_bleu_score_sem": 0.14399064432957762, "eval_ag_news_emb_cos_sim": 0.761573076248169, "eval_ag_news_emb_cos_sim_sem": 0.009271340587603476, "eval_ag_news_emb_top1_equal": 0.1640625, "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8181345462799072, "eval_ag_news_n_ngrams_match_1": 12.546, "eval_ag_news_n_ngrams_match_2": 2.49, "eval_ag_news_n_ngrams_match_3": 0.654, "eval_ag_news_num_pred_words": 45.39, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 45.519215080909994, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3099763610330088, "eval_ag_news_runtime": 10.9598, "eval_ag_news_samples_per_second": 45.621, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.31806577721666496, "eval_ag_news_token_set_f1_sem": 0.004402298022498817, "eval_ag_news_token_set_precision": 0.2957539883176623, "eval_ag_news_token_set_recall": 0.3604130285861673, "eval_ag_news_true_num_tokens": 56.09375, "step": 45000 }, { "epoch": 8.64, "eval_anthropic_toxic_prompts_accuracy": 0.1025, "eval_anthropic_toxic_prompts_bleu_score": 2.5315821913123773, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10042717556961148, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6284630298614502, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01012618865233724, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.520474672317505, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.31, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.494, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.466, "eval_anthropic_toxic_prompts_num_pred_words": 46.468, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 33.80046880345548, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.18441072889578983, "eval_anthropic_toxic_prompts_runtime": 10.6014, "eval_anthropic_toxic_prompts_samples_per_second": 47.163, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.3225545158462773, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006462191919774411, "eval_anthropic_toxic_prompts_token_set_precision": 0.38040121224395923, "eval_anthropic_toxic_prompts_token_set_recall": 0.31025760395916663, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 45000 }, { "epoch": 8.64, "eval_arxiv_accuracy": 0.321375, "eval_arxiv_bleu_score": 3.6814723018023328, "eval_arxiv_bleu_score_sem": 0.1039049498476154, "eval_arxiv_emb_cos_sim": 0.7027502059936523, "eval_arxiv_emb_cos_sim_sem": 0.007261334991605668, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6967861652374268, "eval_arxiv_n_ngrams_match_1": 13.286, "eval_arxiv_n_ngrams_match_2": 2.462, "eval_arxiv_n_ngrams_match_3": 0.474, "eval_arxiv_num_pred_words": 39.318, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 40.31752206839012, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.31524613343789953, "eval_arxiv_runtime": 10.9257, "eval_arxiv_samples_per_second": 45.764, "eval_arxiv_steps_per_second": 0.092, "eval_arxiv_token_set_f1": 0.3151740392038086, "eval_arxiv_token_set_f1_sem": 0.004050397018570638, "eval_arxiv_token_set_precision": 0.2599937548131332, "eval_arxiv_token_set_recall": 0.42169620401308466, "eval_arxiv_true_num_tokens": 64.0, "step": 45000 }, { "epoch": 8.64, "eval_python_code_alpaca_accuracy": 0.14265625, "eval_python_code_alpaca_bleu_score": 3.8009433394205474, "eval_python_code_alpaca_bleu_score_sem": 0.12128623274109651, "eval_python_code_alpaca_emb_cos_sim": 0.6998119950294495, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009761663193744548, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.148423671722412, "eval_python_code_alpaca_n_ngrams_match_1": 8.656, "eval_python_code_alpaca_n_ngrams_match_2": 2.17, "eval_python_code_alpaca_n_ngrams_match_3": 0.6, "eval_python_code_alpaca_num_pred_words": 42.156, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.299308260081176, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2961311542876535, "eval_python_code_alpaca_runtime": 10.8895, "eval_python_code_alpaca_samples_per_second": 45.916, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.4358760336980557, "eval_python_code_alpaca_token_set_f1_sem": 0.005442843303239688, "eval_python_code_alpaca_token_set_precision": 0.46578876224594257, "eval_python_code_alpaca_token_set_recall": 0.4349771077123037, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 45000 }, { "epoch": 8.64, "eval_wikibio_accuracy": 0.294125, "eval_wikibio_bleu_score": 5.662584307024523, "eval_wikibio_bleu_score_sem": 0.20226142434278857, "eval_wikibio_emb_cos_sim": 0.6998697519302368, "eval_wikibio_emb_cos_sim_sem": 0.010777700563519212, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.150162220001221, "eval_wikibio_n_ngrams_match_1": 9.824, "eval_wikibio_n_ngrams_match_2": 3.28, "eval_wikibio_n_ngrams_match_3": 1.194, "eval_wikibio_num_pred_words": 37.482, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 63.44429139641754, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33701230416048844, "eval_wikibio_runtime": 12.8761, "eval_wikibio_samples_per_second": 38.832, "eval_wikibio_steps_per_second": 0.078, "eval_wikibio_token_set_f1": 0.30976859066711854, "eval_wikibio_token_set_f1_sem": 0.005179778718883501, "eval_wikibio_token_set_precision": 0.31861442959538344, "eval_wikibio_token_set_recall": 0.3155990255051859, "eval_wikibio_true_num_tokens": 61.1328125, "step": 45000 }, { "epoch": 8.64, "eval_nq_accuracy": 0.4961875, "eval_nq_bleu_score": 10.13577607583043, "eval_nq_bleu_score_sem": 0.4370365677875096, "eval_nq_emb_cos_sim": 0.798082172870636, "eval_nq_emb_cos_sim_sem": 0.007582819956790132, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.411942958831787, "eval_nq_n_ngrams_match_1": 21.49, "eval_nq_n_ngrams_match_2": 7.38, "eval_nq_n_ngrams_match_3": 3.282, "eval_nq_num_pred_words": 48.64, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.155615002669718, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.41128913931593736, "eval_nq_runtime": 11.0077, "eval_nq_samples_per_second": 45.423, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4320660897067614, "eval_nq_token_set_f1_sem": 0.005146433859068285, "eval_nq_token_set_precision": 0.3832323142103081, "eval_nq_token_set_recall": 0.5062301708201041, "eval_nq_true_num_tokens": 64.0, "step": 45000 }, { "epoch": 8.64, "learning_rate": 0.001, "loss": 2.7753, "step": 45012 }, { "epoch": 8.65, "learning_rate": 0.001, "loss": 2.7798, "step": 45024 }, { "epoch": 8.65, "learning_rate": 0.001, "loss": 2.7824, "step": 45036 }, { "epoch": 8.65, "learning_rate": 0.001, "loss": 2.7792, "step": 45048 }, { "epoch": 8.65, "learning_rate": 0.001, "loss": 2.7785, "step": 45060 }, { "epoch": 8.65, "learning_rate": 0.001, "loss": 2.7735, "step": 45072 }, { "epoch": 8.66, "learning_rate": 0.001, "loss": 2.7846, "step": 45084 }, { "epoch": 8.66, "learning_rate": 0.001, "loss": 2.7724, "step": 45096 }, { "epoch": 8.66, "learning_rate": 0.001, "loss": 2.7724, "step": 45108 }, { "epoch": 8.66, "learning_rate": 0.001, "loss": 2.7705, "step": 45120 }, { "epoch": 8.67, "learning_rate": 0.001, "loss": 2.7803, "step": 45132 }, { "epoch": 8.67, "learning_rate": 0.001, "loss": 2.7714, "step": 45144 }, { "epoch": 8.67, "learning_rate": 0.001, "loss": 2.782, "step": 45156 }, { "epoch": 8.67, "learning_rate": 0.001, "loss": 2.7797, "step": 45168 }, { "epoch": 8.68, "learning_rate": 0.001, "loss": 2.7732, "step": 45180 }, { "epoch": 8.68, "learning_rate": 0.001, "loss": 2.7729, "step": 45192 }, { "epoch": 8.68, "learning_rate": 0.001, "loss": 2.7803, "step": 45204 }, { "epoch": 8.68, "learning_rate": 0.001, "loss": 2.7669, "step": 45216 }, { "epoch": 8.68, "learning_rate": 0.001, "loss": 2.7791, "step": 45228 }, { "epoch": 8.69, "learning_rate": 0.001, "loss": 2.7834, "step": 45240 }, { "epoch": 8.69, "learning_rate": 0.001, "loss": 2.7811, "step": 45252 }, { "epoch": 8.69, "learning_rate": 0.001, "loss": 2.7766, "step": 45264 }, { "epoch": 8.69, "learning_rate": 0.001, "loss": 2.7703, "step": 45276 }, { "epoch": 8.7, "learning_rate": 0.001, "loss": 2.7712, "step": 45288 }, { "epoch": 8.7, "learning_rate": 0.001, "loss": 2.7797, "step": 45300 }, { "epoch": 8.7, "learning_rate": 0.001, "loss": 2.7919, "step": 45312 }, { "epoch": 8.7, "learning_rate": 0.001, "loss": 2.7725, "step": 45324 }, { "epoch": 8.71, "learning_rate": 0.001, "loss": 2.7795, "step": 45336 }, { "epoch": 8.71, "learning_rate": 0.001, "loss": 2.7747, "step": 45348 }, { "epoch": 8.71, "learning_rate": 0.001, "loss": 2.7695, "step": 45360 }, { "epoch": 8.71, "learning_rate": 0.001, "loss": 2.7867, "step": 45372 }, { "epoch": 8.71, "learning_rate": 0.001, "loss": 2.7756, "step": 45384 }, { "epoch": 8.72, "learning_rate": 0.001, "loss": 2.7844, "step": 45396 }, { "epoch": 8.72, "learning_rate": 0.001, "loss": 2.7747, "step": 45408 }, { "epoch": 8.72, "learning_rate": 0.001, "loss": 2.7765, "step": 45420 }, { "epoch": 8.72, "learning_rate": 0.001, "loss": 2.7729, "step": 45432 }, { "epoch": 8.73, "learning_rate": 0.001, "loss": 2.78, "step": 45444 }, { "epoch": 8.73, "learning_rate": 0.001, "loss": 2.779, "step": 45456 }, { "epoch": 8.73, "learning_rate": 0.001, "loss": 2.7784, "step": 45468 }, { "epoch": 8.73, "learning_rate": 0.001, "loss": 2.7881, "step": 45480 }, { "epoch": 8.74, "learning_rate": 0.001, "loss": 2.776, "step": 45492 }, { "epoch": 8.74, "learning_rate": 0.001, "loss": 2.775, "step": 45504 }, { "epoch": 8.74, "learning_rate": 0.001, "loss": 2.7809, "step": 45516 }, { "epoch": 8.74, "learning_rate": 0.001, "loss": 2.7689, "step": 45528 }, { "epoch": 8.74, "learning_rate": 0.001, "loss": 2.7737, "step": 45540 }, { "epoch": 8.75, "learning_rate": 0.001, "loss": 2.7702, "step": 45552 }, { "epoch": 8.75, "learning_rate": 0.001, "loss": 2.7819, "step": 45564 }, { "epoch": 8.75, "learning_rate": 0.001, "loss": 2.7718, "step": 45576 }, { "epoch": 8.75, "learning_rate": 0.001, "loss": 2.7724, "step": 45588 }, { "epoch": 8.76, "learning_rate": 0.001, "loss": 2.7775, "step": 45600 }, { "epoch": 8.76, "learning_rate": 0.001, "loss": 2.7866, "step": 45612 }, { "epoch": 8.76, "learning_rate": 0.001, "loss": 2.7763, "step": 45624 }, { "epoch": 8.76, "eval_ag_news_accuracy": 0.2981875, "eval_ag_news_bleu_score": 4.3578400637933274, "eval_ag_news_bleu_score_sem": 0.14883707926161324, "eval_ag_news_emb_cos_sim": 0.763740062713623, "eval_ag_news_emb_cos_sim_sem": 0.008077497517762496, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.824906826019287, "eval_ag_news_n_ngrams_match_1": 12.81, "eval_ag_news_n_ngrams_match_2": 2.642, "eval_ag_news_n_ngrams_match_3": 0.728, "eval_ag_news_num_pred_words": 46.29, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 45.828530140816156, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3138468381859568, "eval_ag_news_runtime": 10.7475, "eval_ag_news_samples_per_second": 46.522, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3250934128970296, "eval_ag_news_token_set_f1_sem": 0.004294252512354449, "eval_ag_news_token_set_precision": 0.30407289679426225, "eval_ag_news_token_set_recall": 0.3649210660592528, "eval_ag_news_true_num_tokens": 56.09375, "step": 45625 }, { "epoch": 8.76, "eval_anthropic_toxic_prompts_accuracy": 0.103875, "eval_anthropic_toxic_prompts_bleu_score": 2.6399002406813015, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10639424604383535, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6314331293106079, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008894787472116989, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4990384578704834, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.474, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.53, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.538, "eval_anthropic_toxic_prompts_num_pred_words": 47.836, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 33.08362536025581, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.18654160059443092, "eval_anthropic_toxic_prompts_runtime": 11.0286, "eval_anthropic_toxic_prompts_samples_per_second": 45.337, "eval_anthropic_toxic_prompts_steps_per_second": 0.091, "eval_anthropic_toxic_prompts_token_set_f1": 0.3312069635362678, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006540513237655385, "eval_anthropic_toxic_prompts_token_set_precision": 0.38125365875602346, "eval_anthropic_toxic_prompts_token_set_recall": 0.32555335433824906, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 45625 }, { "epoch": 8.76, "eval_arxiv_accuracy": 0.32359375, "eval_arxiv_bleu_score": 3.701269023552277, "eval_arxiv_bleu_score_sem": 0.10371974574933927, "eval_arxiv_emb_cos_sim": 0.7032526135444641, "eval_arxiv_emb_cos_sim_sem": 0.007770770535001924, "eval_arxiv_emb_top1_equal": 0.1640625, "eval_arxiv_emb_top1_equal_sem": 0.03286167651298939, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6717491149902344, "eval_arxiv_n_ngrams_match_1": 13.438, "eval_arxiv_n_ngrams_match_2": 2.416, "eval_arxiv_n_ngrams_match_3": 0.476, "eval_arxiv_num_pred_words": 40.228, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 39.32062203596728, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3178484965802355, "eval_arxiv_runtime": 10.3312, "eval_arxiv_samples_per_second": 48.397, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.314820727511316, "eval_arxiv_token_set_f1_sem": 0.004035596099446558, "eval_arxiv_token_set_precision": 0.2603014215915019, "eval_arxiv_token_set_recall": 0.41557391776458413, "eval_arxiv_true_num_tokens": 64.0, "step": 45625 }, { "epoch": 8.76, "eval_python_code_alpaca_accuracy": 0.1435, "eval_python_code_alpaca_bleu_score": 3.7175162554215166, "eval_python_code_alpaca_bleu_score_sem": 0.11682191147286755, "eval_python_code_alpaca_emb_cos_sim": 0.709485650062561, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00816893181288434, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.166703701019287, "eval_python_code_alpaca_n_ngrams_match_1": 8.694, "eval_python_code_alpaca_n_ngrams_match_2": 2.252, "eval_python_code_alpaca_n_ngrams_match_3": 0.686, "eval_python_code_alpaca_num_pred_words": 44.552, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.729136969158485, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2842620228140523, "eval_python_code_alpaca_runtime": 10.8712, "eval_python_code_alpaca_samples_per_second": 45.993, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.43770505860819836, "eval_python_code_alpaca_token_set_f1_sem": 0.005227562385291587, "eval_python_code_alpaca_token_set_precision": 0.46721530110310105, "eval_python_code_alpaca_token_set_recall": 0.43838408230338305, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 45625 }, { "epoch": 8.76, "eval_wikibio_accuracy": 0.2973125, "eval_wikibio_bleu_score": 5.505173689369643, "eval_wikibio_bleu_score_sem": 0.18336472313143126, "eval_wikibio_emb_cos_sim": 0.7184184193611145, "eval_wikibio_emb_cos_sim_sem": 0.01003264701478985, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.102202892303467, "eval_wikibio_n_ngrams_match_1": 10.128, "eval_wikibio_n_ngrams_match_2": 3.286, "eval_wikibio_n_ngrams_match_3": 1.16, "eval_wikibio_num_pred_words": 38.672, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 60.4733572677629, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.341123537531227, "eval_wikibio_runtime": 10.3262, "eval_wikibio_samples_per_second": 48.421, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3140838242134466, "eval_wikibio_token_set_f1_sem": 0.0048917191914225186, "eval_wikibio_token_set_precision": 0.3275089197620919, "eval_wikibio_token_set_recall": 0.3139662622455534, "eval_wikibio_true_num_tokens": 61.1328125, "step": 45625 }, { "epoch": 8.76, "eval_nq_accuracy": 0.4973125, "eval_nq_bleu_score": 9.879743998732101, "eval_nq_bleu_score_sem": 0.4298679560786954, "eval_nq_emb_cos_sim": 0.7971436977386475, "eval_nq_emb_cos_sim_sem": 0.008095892641503977, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.4060308933258057, "eval_nq_n_ngrams_match_1": 21.548, "eval_nq_n_ngrams_match_2": 7.334, "eval_nq_n_ngrams_match_3": 3.186, "eval_nq_num_pred_words": 48.726, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.089856850799437, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4125605925813294, "eval_nq_runtime": 10.3239, "eval_nq_samples_per_second": 48.431, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.43067224105822605, "eval_nq_token_set_f1_sem": 0.005032645768006537, "eval_nq_token_set_precision": 0.38256301616897675, "eval_nq_token_set_recall": 0.5040065194179415, "eval_nq_true_num_tokens": 64.0, "step": 45625 }, { "epoch": 8.76, "learning_rate": 0.001, "loss": 2.7735, "step": 45636 }, { "epoch": 8.76, "learning_rate": 0.001, "loss": 2.7899, "step": 45648 }, { "epoch": 8.77, "learning_rate": 0.001, "loss": 2.763, "step": 45660 }, { "epoch": 8.77, "learning_rate": 0.001, "loss": 2.7674, "step": 45672 }, { "epoch": 8.77, "learning_rate": 0.001, "loss": 2.7747, "step": 45684 }, { "epoch": 8.77, "learning_rate": 0.001, "loss": 2.7745, "step": 45696 }, { "epoch": 8.78, "learning_rate": 0.001, "loss": 2.7774, "step": 45708 }, { "epoch": 8.78, "learning_rate": 0.001, "loss": 2.7728, "step": 45720 }, { "epoch": 8.78, "learning_rate": 0.001, "loss": 2.7789, "step": 45732 }, { "epoch": 8.78, "learning_rate": 0.001, "loss": 2.7671, "step": 45744 }, { "epoch": 8.79, "learning_rate": 0.001, "loss": 2.7653, "step": 45756 }, { "epoch": 8.79, "learning_rate": 0.001, "loss": 2.7749, "step": 45768 }, { "epoch": 8.79, "learning_rate": 0.001, "loss": 2.769, "step": 45780 }, { "epoch": 8.79, "learning_rate": 0.001, "loss": 2.7781, "step": 45792 }, { "epoch": 8.79, "learning_rate": 0.001, "loss": 2.7702, "step": 45804 }, { "epoch": 8.8, "learning_rate": 0.001, "loss": 2.7756, "step": 45816 }, { "epoch": 8.8, "learning_rate": 0.001, "loss": 2.7747, "step": 45828 }, { "epoch": 8.8, "learning_rate": 0.001, "loss": 2.7691, "step": 45840 }, { "epoch": 8.8, "learning_rate": 0.001, "loss": 2.7792, "step": 45852 }, { "epoch": 8.81, "learning_rate": 0.001, "loss": 2.7765, "step": 45864 }, { "epoch": 8.81, "learning_rate": 0.001, "loss": 2.7764, "step": 45876 }, { "epoch": 8.81, "learning_rate": 0.001, "loss": 2.7789, "step": 45888 }, { "epoch": 8.81, "learning_rate": 0.001, "loss": 2.7811, "step": 45900 }, { "epoch": 8.82, "learning_rate": 0.001, "loss": 2.7754, "step": 45912 }, { "epoch": 8.82, "learning_rate": 0.001, "loss": 2.7782, "step": 45924 }, { "epoch": 8.82, "learning_rate": 0.001, "loss": 2.7747, "step": 45936 }, { "epoch": 8.82, "learning_rate": 0.001, "loss": 2.777, "step": 45948 }, { "epoch": 8.82, "learning_rate": 0.001, "loss": 2.7843, "step": 45960 }, { "epoch": 8.83, "learning_rate": 0.001, "loss": 2.7663, "step": 45972 }, { "epoch": 8.83, "learning_rate": 0.001, "loss": 2.7744, "step": 45984 }, { "epoch": 8.83, "learning_rate": 0.001, "loss": 2.7813, "step": 45996 }, { "epoch": 8.83, "learning_rate": 0.001, "loss": 2.7748, "step": 46008 }, { "epoch": 8.84, "learning_rate": 0.001, "loss": 2.7655, "step": 46020 }, { "epoch": 8.84, "learning_rate": 0.001, "loss": 2.781, "step": 46032 }, { "epoch": 8.84, "learning_rate": 0.001, "loss": 2.7822, "step": 46044 }, { "epoch": 8.84, "learning_rate": 0.001, "loss": 2.7846, "step": 46056 }, { "epoch": 8.85, "learning_rate": 0.001, "loss": 2.7745, "step": 46068 }, { "epoch": 8.85, "learning_rate": 0.001, "loss": 2.7844, "step": 46080 }, { "epoch": 8.85, "learning_rate": 0.001, "loss": 2.7663, "step": 46092 }, { "epoch": 8.85, "learning_rate": 0.001, "loss": 2.7775, "step": 46104 }, { "epoch": 8.85, "learning_rate": 0.001, "loss": 2.7611, "step": 46116 }, { "epoch": 8.86, "learning_rate": 0.001, "loss": 2.7722, "step": 46128 }, { "epoch": 8.86, "learning_rate": 0.001, "loss": 2.7753, "step": 46140 }, { "epoch": 8.86, "learning_rate": 0.001, "loss": 2.7818, "step": 46152 }, { "epoch": 8.86, "learning_rate": 0.001, "loss": 2.7633, "step": 46164 }, { "epoch": 8.87, "learning_rate": 0.001, "loss": 2.7788, "step": 46176 }, { "epoch": 8.87, "learning_rate": 0.001, "loss": 2.789, "step": 46188 }, { "epoch": 8.87, "learning_rate": 0.001, "loss": 2.7722, "step": 46200 }, { "epoch": 8.87, "learning_rate": 0.001, "loss": 2.7755, "step": 46212 }, { "epoch": 8.88, "learning_rate": 0.001, "loss": 2.7746, "step": 46224 }, { "epoch": 8.88, "learning_rate": 0.001, "loss": 2.7727, "step": 46236 }, { "epoch": 8.88, "learning_rate": 0.001, "loss": 2.766, "step": 46248 }, { "epoch": 8.88, "eval_ag_news_accuracy": 0.29840625, "eval_ag_news_bleu_score": 4.31373880770984, "eval_ag_news_bleu_score_sem": 0.14891728801673904, "eval_ag_news_emb_cos_sim": 0.7668585777282715, "eval_ag_news_emb_cos_sim_sem": 0.008202250872265726, "eval_ag_news_emb_top1_equal": 0.125, "eval_ag_news_emb_top1_equal_sem": 0.02934655822437397, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8158342838287354, "eval_ag_news_n_ngrams_match_1": 12.81, "eval_ag_news_n_ngrams_match_2": 2.57, "eval_ag_news_n_ngrams_match_3": 0.706, "eval_ag_news_num_pred_words": 46.0, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 45.41462927317122, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3122905712875661, "eval_ag_news_runtime": 13.3588, "eval_ag_news_samples_per_second": 37.428, "eval_ag_news_steps_per_second": 0.075, "eval_ag_news_token_set_f1": 0.32392258441957117, "eval_ag_news_token_set_f1_sem": 0.004329541468647039, "eval_ag_news_token_set_precision": 0.30184441470228995, "eval_ag_news_token_set_recall": 0.36679101991713964, "eval_ag_news_true_num_tokens": 56.09375, "step": 46250 }, { "epoch": 8.88, "eval_anthropic_toxic_prompts_accuracy": 0.1038125, "eval_anthropic_toxic_prompts_bleu_score": 2.6833036236164887, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10954931079641665, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6263077259063721, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010921873737018344, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.5129191875457764, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.434, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.558, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.508, "eval_anthropic_toxic_prompts_num_pred_words": 46.23, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 33.54605220678183, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19023205204409382, "eval_anthropic_toxic_prompts_runtime": 11.8708, "eval_anthropic_toxic_prompts_samples_per_second": 42.12, "eval_anthropic_toxic_prompts_steps_per_second": 0.084, "eval_anthropic_toxic_prompts_token_set_f1": 0.3229738713485022, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006354645238200712, "eval_anthropic_toxic_prompts_token_set_precision": 0.38190129987442994, "eval_anthropic_toxic_prompts_token_set_recall": 0.3117707201913881, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 46250 }, { "epoch": 8.88, "eval_arxiv_accuracy": 0.32334375, "eval_arxiv_bleu_score": 3.8999461751598536, "eval_arxiv_bleu_score_sem": 0.11473406671058033, "eval_arxiv_emb_cos_sim": 0.7049464583396912, "eval_arxiv_emb_cos_sim_sem": 0.008832316623000732, "eval_arxiv_emb_top1_equal": 0.203125, "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6608574390411377, "eval_arxiv_n_ngrams_match_1": 13.764, "eval_arxiv_n_ngrams_match_2": 2.548, "eval_arxiv_n_ngrams_match_3": 0.548, "eval_arxiv_num_pred_words": 40.022, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 38.89467839344808, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3231329056106329, "eval_arxiv_runtime": 12.1071, "eval_arxiv_samples_per_second": 41.298, "eval_arxiv_steps_per_second": 0.083, "eval_arxiv_token_set_f1": 0.31954391318957837, "eval_arxiv_token_set_f1_sem": 0.004254608967652773, "eval_arxiv_token_set_precision": 0.266894297936754, "eval_arxiv_token_set_recall": 0.4180085983595963, "eval_arxiv_true_num_tokens": 64.0, "step": 46250 }, { "epoch": 8.88, "eval_python_code_alpaca_accuracy": 0.14528125, "eval_python_code_alpaca_bleu_score": 3.6883015490260704, "eval_python_code_alpaca_bleu_score_sem": 0.11907441131673664, "eval_python_code_alpaca_emb_cos_sim": 0.7158737182617188, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008345321642258496, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.148151159286499, "eval_python_code_alpaca_n_ngrams_match_1": 8.682, "eval_python_code_alpaca_n_ngrams_match_2": 2.24, "eval_python_code_alpaca_n_ngrams_match_3": 0.664, "eval_python_code_alpaca_num_pred_words": 45.278, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.292959773892136, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2826779664379918, "eval_python_code_alpaca_runtime": 10.3254, "eval_python_code_alpaca_samples_per_second": 48.424, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.4362769880595835, "eval_python_code_alpaca_token_set_f1_sem": 0.005545972938048647, "eval_python_code_alpaca_token_set_precision": 0.4676311072069239, "eval_python_code_alpaca_token_set_recall": 0.4350531625377758, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 46250 }, { "epoch": 8.88, "eval_wikibio_accuracy": 0.30046875, "eval_wikibio_bleu_score": 5.282017018894081, "eval_wikibio_bleu_score_sem": 0.19720415492134732, "eval_wikibio_emb_cos_sim": 0.6897943615913391, "eval_wikibio_emb_cos_sim_sem": 0.012623049617641591, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.131740093231201, "eval_wikibio_n_ngrams_match_1": 9.526, "eval_wikibio_n_ngrams_match_2": 3.098, "eval_wikibio_n_ngrams_match_3": 1.082, "eval_wikibio_num_pred_words": 36.246, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 62.28621250724138, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3235842453627053, "eval_wikibio_runtime": 12.5273, "eval_wikibio_samples_per_second": 39.913, "eval_wikibio_steps_per_second": 0.08, "eval_wikibio_token_set_f1": 0.30051406466350117, "eval_wikibio_token_set_f1_sem": 0.005713289104704219, "eval_wikibio_token_set_precision": 0.3073511396358744, "eval_wikibio_token_set_recall": 0.31177012278177396, "eval_wikibio_true_num_tokens": 61.1328125, "step": 46250 }, { "epoch": 8.88, "eval_nq_accuracy": 0.49709375, "eval_nq_bleu_score": 10.638222654418671, "eval_nq_bleu_score_sem": 0.45024079081057866, "eval_nq_emb_cos_sim": 0.8051466941833496, "eval_nq_emb_cos_sim_sem": 0.007682704036493903, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.403503179550171, "eval_nq_n_ngrams_match_1": 21.642, "eval_nq_n_ngrams_match_2": 7.668, "eval_nq_n_ngrams_match_3": 3.518, "eval_nq_num_pred_words": 48.744, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.061860265451713, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4145017684330846, "eval_nq_runtime": 11.0429, "eval_nq_samples_per_second": 45.278, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4342943505567684, "eval_nq_token_set_f1_sem": 0.005005554288096789, "eval_nq_token_set_precision": 0.3870788850096803, "eval_nq_token_set_recall": 0.5057862840241787, "eval_nq_true_num_tokens": 64.0, "step": 46250 }, { "epoch": 8.88, "learning_rate": 0.001, "loss": 2.774, "step": 46260 }, { "epoch": 8.88, "learning_rate": 0.001, "loss": 2.7667, "step": 46272 }, { "epoch": 8.89, "learning_rate": 0.001, "loss": 2.7829, "step": 46284 }, { "epoch": 8.89, "learning_rate": 0.001, "loss": 2.7785, "step": 46296 }, { "epoch": 8.89, "learning_rate": 0.001, "loss": 2.7654, "step": 46308 }, { "epoch": 8.89, "learning_rate": 0.001, "loss": 2.7785, "step": 46320 }, { "epoch": 8.9, "learning_rate": 0.001, "loss": 2.7762, "step": 46332 }, { "epoch": 8.9, "learning_rate": 0.001, "loss": 2.7741, "step": 46344 }, { "epoch": 8.9, "learning_rate": 0.001, "loss": 2.7732, "step": 46356 }, { "epoch": 8.9, "learning_rate": 0.001, "loss": 2.785, "step": 46368 }, { "epoch": 8.91, "learning_rate": 0.001, "loss": 2.7871, "step": 46380 }, { "epoch": 8.91, "learning_rate": 0.001, "loss": 2.7701, "step": 46392 }, { "epoch": 8.91, "learning_rate": 0.001, "loss": 2.7816, "step": 46404 }, { "epoch": 8.91, "learning_rate": 0.001, "loss": 2.7728, "step": 46416 }, { "epoch": 8.91, "learning_rate": 0.001, "loss": 2.7675, "step": 46428 }, { "epoch": 8.92, "learning_rate": 0.001, "loss": 2.7648, "step": 46440 }, { "epoch": 8.92, "learning_rate": 0.001, "loss": 2.7681, "step": 46452 }, { "epoch": 8.92, "learning_rate": 0.001, "loss": 2.773, "step": 46464 }, { "epoch": 8.92, "learning_rate": 0.001, "loss": 2.7703, "step": 46476 }, { "epoch": 8.93, "learning_rate": 0.001, "loss": 2.7712, "step": 46488 }, { "epoch": 8.93, "learning_rate": 0.001, "loss": 2.7682, "step": 46500 }, { "epoch": 8.93, "learning_rate": 0.001, "loss": 2.7594, "step": 46512 }, { "epoch": 8.93, "learning_rate": 0.001, "loss": 2.7809, "step": 46524 }, { "epoch": 8.94, "learning_rate": 0.001, "loss": 2.7777, "step": 46536 }, { "epoch": 8.94, "learning_rate": 0.001, "loss": 2.7752, "step": 46548 }, { "epoch": 8.94, "learning_rate": 0.001, "loss": 2.7684, "step": 46560 }, { "epoch": 8.94, "learning_rate": 0.001, "loss": 2.7754, "step": 46572 }, { "epoch": 8.94, "learning_rate": 0.001, "loss": 2.763, "step": 46584 }, { "epoch": 8.95, "learning_rate": 0.001, "loss": 2.7762, "step": 46596 }, { "epoch": 8.95, "learning_rate": 0.001, "loss": 2.773, "step": 46608 }, { "epoch": 8.95, "learning_rate": 0.001, "loss": 2.7737, "step": 46620 }, { "epoch": 8.95, "learning_rate": 0.001, "loss": 2.7605, "step": 46632 }, { "epoch": 8.96, "learning_rate": 0.001, "loss": 2.7769, "step": 46644 }, { "epoch": 8.96, "learning_rate": 0.001, "loss": 2.7697, "step": 46656 }, { "epoch": 8.96, "learning_rate": 0.001, "loss": 2.7778, "step": 46668 }, { "epoch": 8.96, "learning_rate": 0.001, "loss": 2.7639, "step": 46680 }, { "epoch": 8.97, "learning_rate": 0.001, "loss": 2.7688, "step": 46692 }, { "epoch": 8.97, "learning_rate": 0.001, "loss": 2.7701, "step": 46704 }, { "epoch": 8.97, "learning_rate": 0.001, "loss": 2.7646, "step": 46716 }, { "epoch": 8.97, "learning_rate": 0.001, "loss": 2.7634, "step": 46728 }, { "epoch": 8.97, "learning_rate": 0.001, "loss": 2.778, "step": 46740 }, { "epoch": 8.98, "learning_rate": 0.001, "loss": 2.7694, "step": 46752 }, { "epoch": 8.98, "learning_rate": 0.001, "loss": 2.7716, "step": 46764 }, { "epoch": 8.98, "learning_rate": 0.001, "loss": 2.773, "step": 46776 }, { "epoch": 8.98, "learning_rate": 0.001, "loss": 2.7676, "step": 46788 }, { "epoch": 8.99, "learning_rate": 0.001, "loss": 2.7777, "step": 46800 }, { "epoch": 8.99, "learning_rate": 0.001, "loss": 2.7714, "step": 46812 }, { "epoch": 8.99, "learning_rate": 0.001, "loss": 2.7716, "step": 46824 }, { "epoch": 8.99, "learning_rate": 0.001, "loss": 2.7684, "step": 46836 }, { "epoch": 9.0, "learning_rate": 0.001, "loss": 2.7678, "step": 46848 }, { "epoch": 9.0, "learning_rate": 0.001, "loss": 2.7729, "step": 46860 }, { "epoch": 9.0, "learning_rate": 0.001, "loss": 2.7707, "step": 46872 }, { "epoch": 9.0, "eval_ag_news_accuracy": 0.29903125, "eval_ag_news_bleu_score": 4.288653808307046, "eval_ag_news_bleu_score_sem": 0.14459158741794412, "eval_ag_news_emb_cos_sim": 0.7732715606689453, "eval_ag_news_emb_cos_sim_sem": 0.00808783139818176, "eval_ag_news_emb_top1_equal": 0.1796875, "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8124728202819824, "eval_ag_news_n_ngrams_match_1": 12.812, "eval_ag_news_n_ngrams_match_2": 2.654, "eval_ag_news_n_ngrams_match_3": 0.746, "eval_ag_news_num_pred_words": 46.692, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 45.262225945002136, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3134112998323353, "eval_ag_news_runtime": 10.5205, "eval_ag_news_samples_per_second": 47.526, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.32487076987979246, "eval_ag_news_token_set_f1_sem": 0.004333593802880955, "eval_ag_news_token_set_precision": 0.30193949526342484, "eval_ag_news_token_set_recall": 0.3704356691823834, "eval_ag_news_true_num_tokens": 56.09375, "step": 46875 }, { "epoch": 9.0, "eval_anthropic_toxic_prompts_accuracy": 0.1038125, "eval_anthropic_toxic_prompts_bleu_score": 2.6723761364822427, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10595742970731784, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6198536157608032, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010522718056928875, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4786829948425293, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.214, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.51, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.556, "eval_anthropic_toxic_prompts_num_pred_words": 47.322, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 32.417000592861584, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1824072993406963, "eval_anthropic_toxic_prompts_runtime": 9.7625, "eval_anthropic_toxic_prompts_samples_per_second": 51.216, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3239000278615702, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006445313758389343, "eval_anthropic_toxic_prompts_token_set_precision": 0.36797836907206205, "eval_anthropic_toxic_prompts_token_set_recall": 0.32676964757090793, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 46875 }, { "epoch": 9.0, "eval_arxiv_accuracy": 0.32184375, "eval_arxiv_bleu_score": 3.641969790826636, "eval_arxiv_bleu_score_sem": 0.10476167297881656, "eval_arxiv_emb_cos_sim": 0.6878165006637573, "eval_arxiv_emb_cos_sim_sem": 0.00910235477413257, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6736721992492676, "eval_arxiv_n_ngrams_match_1": 13.046, "eval_arxiv_n_ngrams_match_2": 2.392, "eval_arxiv_n_ngrams_match_3": 0.482, "eval_arxiv_num_pred_words": 38.78, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 39.39631166069638, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3104139493609104, "eval_arxiv_runtime": 10.9606, "eval_arxiv_samples_per_second": 45.618, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.31033714250777356, "eval_arxiv_token_set_f1_sem": 0.00431074284094741, "eval_arxiv_token_set_precision": 0.2536412884278891, "eval_arxiv_token_set_recall": 0.42613121192327474, "eval_arxiv_true_num_tokens": 64.0, "step": 46875 }, { "epoch": 9.0, "eval_python_code_alpaca_accuracy": 0.1441875, "eval_python_code_alpaca_bleu_score": 3.592983674218678, "eval_python_code_alpaca_bleu_score_sem": 0.10707368222914622, "eval_python_code_alpaca_emb_cos_sim": 0.7041624784469604, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01002766512558696, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.1552317142486572, "eval_python_code_alpaca_n_ngrams_match_1": 8.356, "eval_python_code_alpaca_n_ngrams_match_2": 2.122, "eval_python_code_alpaca_n_ngrams_match_3": 0.586, "eval_python_code_alpaca_num_pred_words": 42.308, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.458472123962313, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.28172292893895556, "eval_python_code_alpaca_runtime": 10.184, "eval_python_code_alpaca_samples_per_second": 49.097, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4299302781801553, "eval_python_code_alpaca_token_set_f1_sem": 0.005656002653046869, "eval_python_code_alpaca_token_set_precision": 0.44608991108380747, "eval_python_code_alpaca_token_set_recall": 0.4457493382957042, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 46875 }, { "epoch": 9.0, "eval_wikibio_accuracy": 0.2995625, "eval_wikibio_bleu_score": 5.255827214641384, "eval_wikibio_bleu_score_sem": 0.18697395818757404, "eval_wikibio_emb_cos_sim": 0.6974311470985413, "eval_wikibio_emb_cos_sim_sem": 0.011648033643813682, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.123207092285156, "eval_wikibio_n_ngrams_match_1": 9.766, "eval_wikibio_n_ngrams_match_2": 3.122, "eval_wikibio_n_ngrams_match_3": 1.054, "eval_wikibio_num_pred_words": 37.344, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 61.756985356057086, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32980918775204293, "eval_wikibio_runtime": 10.2956, "eval_wikibio_samples_per_second": 48.564, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3070015958413056, "eval_wikibio_token_set_f1_sem": 0.0055645537462260955, "eval_wikibio_token_set_precision": 0.31397095048371115, "eval_wikibio_token_set_recall": 0.3187160784834163, "eval_wikibio_true_num_tokens": 61.1328125, "step": 46875 }, { "epoch": 9.0, "eval_nq_accuracy": 0.497375, "eval_nq_bleu_score": 10.043926883687972, "eval_nq_bleu_score_sem": 0.4276172846833347, "eval_nq_emb_cos_sim": 0.8026154041290283, "eval_nq_emb_cos_sim_sem": 0.00783238659334682, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.406917095184326, "eval_nq_n_ngrams_match_1": 21.462, "eval_nq_n_ngrams_match_2": 7.45, "eval_nq_n_ngrams_match_3": 3.236, "eval_nq_num_pred_words": 48.618, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 11.099689058568261, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.41010460940472193, "eval_nq_runtime": 10.3816, "eval_nq_samples_per_second": 48.162, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.43221719653866686, "eval_nq_token_set_f1_sem": 0.005051430922410576, "eval_nq_token_set_precision": 0.38060409292633585, "eval_nq_token_set_recall": 0.513445150778036, "eval_nq_true_num_tokens": 64.0, "step": 46875 }, { "epoch": 9.0, "learning_rate": 0.001, "loss": 2.7587, "step": 46884 }, { "epoch": 9.0, "learning_rate": 0.001, "loss": 2.7557, "step": 46896 }, { "epoch": 9.01, "learning_rate": 0.001, "loss": 2.7481, "step": 46908 }, { "epoch": 9.01, "learning_rate": 0.001, "loss": 2.7496, "step": 46920 }, { "epoch": 9.01, "learning_rate": 0.001, "loss": 2.7439, "step": 46932 }, { "epoch": 9.01, "learning_rate": 0.001, "loss": 2.7462, "step": 46944 }, { "epoch": 9.02, "learning_rate": 0.001, "loss": 2.7593, "step": 46956 }, { "epoch": 9.02, "learning_rate": 0.001, "loss": 2.7543, "step": 46968 }, { "epoch": 9.02, "learning_rate": 0.001, "loss": 2.7362, "step": 46980 }, { "epoch": 9.02, "learning_rate": 0.001, "loss": 2.7432, "step": 46992 }, { "epoch": 9.03, "learning_rate": 0.001, "loss": 2.7518, "step": 47004 }, { "epoch": 9.03, "learning_rate": 0.001, "loss": 2.7525, "step": 47016 }, { "epoch": 9.03, "learning_rate": 0.001, "loss": 2.7502, "step": 47028 }, { "epoch": 9.03, "learning_rate": 0.001, "loss": 2.7586, "step": 47040 }, { "epoch": 9.03, "learning_rate": 0.001, "loss": 2.7568, "step": 47052 }, { "epoch": 9.04, "learning_rate": 0.001, "loss": 2.763, "step": 47064 }, { "epoch": 9.04, "learning_rate": 0.001, "loss": 2.7491, "step": 47076 }, { "epoch": 9.04, "learning_rate": 0.001, "loss": 2.7499, "step": 47088 }, { "epoch": 9.04, "learning_rate": 0.001, "loss": 2.7512, "step": 47100 }, { "epoch": 9.05, "learning_rate": 0.001, "loss": 2.758, "step": 47112 }, { "epoch": 9.05, "learning_rate": 0.001, "loss": 2.7539, "step": 47124 }, { "epoch": 9.05, "learning_rate": 0.001, "loss": 2.7612, "step": 47136 }, { "epoch": 9.05, "learning_rate": 0.001, "loss": 2.7435, "step": 47148 }, { "epoch": 9.06, "learning_rate": 0.001, "loss": 2.7508, "step": 47160 }, { "epoch": 9.06, "learning_rate": 0.001, "loss": 2.752, "step": 47172 }, { "epoch": 9.06, "learning_rate": 0.001, "loss": 2.758, "step": 47184 }, { "epoch": 9.06, "learning_rate": 0.001, "loss": 2.7456, "step": 47196 }, { "epoch": 9.06, "learning_rate": 0.001, "loss": 2.7555, "step": 47208 }, { "epoch": 9.07, "learning_rate": 0.001, "loss": 2.7513, "step": 47220 }, { "epoch": 9.07, "learning_rate": 0.001, "loss": 2.7564, "step": 47232 }, { "epoch": 9.07, "learning_rate": 0.001, "loss": 2.7463, "step": 47244 }, { "epoch": 9.07, "learning_rate": 0.001, "loss": 2.7569, "step": 47256 }, { "epoch": 9.08, "learning_rate": 0.001, "loss": 2.7583, "step": 47268 }, { "epoch": 9.08, "learning_rate": 0.001, "loss": 2.7581, "step": 47280 }, { "epoch": 9.08, "learning_rate": 0.001, "loss": 2.7566, "step": 47292 }, { "epoch": 9.08, "learning_rate": 0.001, "loss": 2.7626, "step": 47304 }, { "epoch": 9.09, "learning_rate": 0.001, "loss": 2.7437, "step": 47316 }, { "epoch": 9.09, "learning_rate": 0.001, "loss": 2.7656, "step": 47328 }, { "epoch": 9.09, "learning_rate": 0.001, "loss": 2.7414, "step": 47340 }, { "epoch": 9.09, "learning_rate": 0.001, "loss": 2.7427, "step": 47352 }, { "epoch": 9.09, "learning_rate": 0.001, "loss": 2.7606, "step": 47364 }, { "epoch": 9.1, "learning_rate": 0.001, "loss": 2.7566, "step": 47376 }, { "epoch": 9.1, "learning_rate": 0.001, "loss": 2.7488, "step": 47388 }, { "epoch": 9.1, "learning_rate": 0.001, "loss": 2.7566, "step": 47400 }, { "epoch": 9.1, "learning_rate": 0.001, "loss": 2.7555, "step": 47412 }, { "epoch": 9.11, "learning_rate": 0.001, "loss": 2.7526, "step": 47424 }, { "epoch": 9.11, "learning_rate": 0.001, "loss": 2.7527, "step": 47436 }, { "epoch": 9.11, "learning_rate": 0.001, "loss": 2.7515, "step": 47448 }, { "epoch": 9.11, "learning_rate": 0.001, "loss": 2.7468, "step": 47460 }, { "epoch": 9.12, "learning_rate": 0.001, "loss": 2.7412, "step": 47472 }, { "epoch": 9.12, "learning_rate": 0.001, "loss": 2.7546, "step": 47484 }, { "epoch": 9.12, "learning_rate": 0.001, "loss": 2.7521, "step": 47496 }, { "epoch": 9.12, "eval_ag_news_accuracy": 0.29959375, "eval_ag_news_bleu_score": 4.22949652749428, "eval_ag_news_bleu_score_sem": 0.13746238198174335, "eval_ag_news_emb_cos_sim": 0.7705955505371094, "eval_ag_news_emb_cos_sim_sem": 0.008421604145340673, "eval_ag_news_emb_top1_equal": 0.171875, "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.8094165325164795, "eval_ag_news_n_ngrams_match_1": 12.772, "eval_ag_news_n_ngrams_match_2": 2.596, "eval_ag_news_n_ngrams_match_3": 0.712, "eval_ag_news_num_pred_words": 46.474, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 45.124102737258156, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.31430772925392514, "eval_ag_news_runtime": 12.0799, "eval_ag_news_samples_per_second": 41.391, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.3221694479981205, "eval_ag_news_token_set_f1_sem": 0.004173123746570576, "eval_ag_news_token_set_precision": 0.2997731322387018, "eval_ag_news_token_set_recall": 0.36404314770694457, "eval_ag_news_true_num_tokens": 56.09375, "step": 47500 }, { "epoch": 9.12, "eval_anthropic_toxic_prompts_accuracy": 0.105, "eval_anthropic_toxic_prompts_bleu_score": 2.6824998451745734, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09735606462677479, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6408157348632812, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009020674181718457, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.456223726272583, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.654, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.638, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.548, "eval_anthropic_toxic_prompts_num_pred_words": 48.08, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.697053475790963, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.193398076506703, "eval_anthropic_toxic_prompts_runtime": 10.4325, "eval_anthropic_toxic_prompts_samples_per_second": 47.927, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.33546083030101165, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006248965843310193, "eval_anthropic_toxic_prompts_token_set_precision": 0.3985590046768791, "eval_anthropic_toxic_prompts_token_set_recall": 0.32071277025793865, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 47500 }, { "epoch": 9.12, "eval_arxiv_accuracy": 0.32346875, "eval_arxiv_bleu_score": 3.83866917627666, "eval_arxiv_bleu_score_sem": 0.11142097098331777, "eval_arxiv_emb_cos_sim": 0.7052998542785645, "eval_arxiv_emb_cos_sim_sem": 0.008977505930675683, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.654217004776001, "eval_arxiv_n_ngrams_match_1": 13.54, "eval_arxiv_n_ngrams_match_2": 2.512, "eval_arxiv_n_ngrams_match_3": 0.534, "eval_arxiv_num_pred_words": 40.57, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 38.637256480882606, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.31631203107164363, "eval_arxiv_runtime": 10.9376, "eval_arxiv_samples_per_second": 45.714, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.3159684513390401, "eval_arxiv_token_set_f1_sem": 0.004085429987867343, "eval_arxiv_token_set_precision": 0.2614633675206258, "eval_arxiv_token_set_recall": 0.4207364699353341, "eval_arxiv_true_num_tokens": 64.0, "step": 47500 }, { "epoch": 9.12, "eval_python_code_alpaca_accuracy": 0.14646875, "eval_python_code_alpaca_bleu_score": 3.8212681127184696, "eval_python_code_alpaca_bleu_score_sem": 0.12041746269843684, "eval_python_code_alpaca_emb_cos_sim": 0.7284071445465088, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008505652205260984, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.1190266609191895, "eval_python_code_alpaca_n_ngrams_match_1": 8.992, "eval_python_code_alpaca_n_ngrams_match_2": 2.388, "eval_python_code_alpaca_n_ngrams_match_3": 0.696, "eval_python_code_alpaca_num_pred_words": 44.718, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.62434776081528, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2945015354590723, "eval_python_code_alpaca_runtime": 10.6084, "eval_python_code_alpaca_samples_per_second": 47.133, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.44566224977647795, "eval_python_code_alpaca_token_set_f1_sem": 0.005389132628955182, "eval_python_code_alpaca_token_set_precision": 0.48580211359744807, "eval_python_code_alpaca_token_set_recall": 0.4351788171634512, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 47500 }, { "epoch": 9.12, "eval_wikibio_accuracy": 0.2994375, "eval_wikibio_bleu_score": 5.3457438873014596, "eval_wikibio_bleu_score_sem": 0.18799731893879146, "eval_wikibio_emb_cos_sim": 0.7072981595993042, "eval_wikibio_emb_cos_sim_sem": 0.01047188764053572, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.10506534576416, "eval_wikibio_n_ngrams_match_1": 9.658, "eval_wikibio_n_ngrams_match_2": 3.094, "eval_wikibio_n_ngrams_match_3": 1.072, "eval_wikibio_num_pred_words": 36.484, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 60.646707423566404, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32754757004868085, "eval_wikibio_runtime": 10.4914, "eval_wikibio_samples_per_second": 47.658, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.3056928148803203, "eval_wikibio_token_set_f1_sem": 0.005388830498402596, "eval_wikibio_token_set_precision": 0.3142341671727403, "eval_wikibio_token_set_recall": 0.31170697687950055, "eval_wikibio_true_num_tokens": 61.1328125, "step": 47500 }, { "epoch": 9.12, "eval_nq_accuracy": 0.5001875, "eval_nq_bleu_score": 10.247925520087861, "eval_nq_bleu_score_sem": 0.43562072258183493, "eval_nq_emb_cos_sim": 0.8029464483261108, "eval_nq_emb_cos_sim_sem": 0.00777242348171696, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.391540050506592, "eval_nq_n_ngrams_match_1": 21.606, "eval_nq_n_ngrams_match_2": 7.576, "eval_nq_n_ngrams_match_3": 3.34, "eval_nq_num_pred_words": 49.124, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.930314223634, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4132165959253411, "eval_nq_runtime": 10.4544, "eval_nq_samples_per_second": 47.827, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.43017255422591094, "eval_nq_token_set_f1_sem": 0.005125741734852646, "eval_nq_token_set_precision": 0.38525483841614144, "eval_nq_token_set_recall": 0.49745545675268665, "eval_nq_true_num_tokens": 64.0, "step": 47500 }, { "epoch": 9.12, "learning_rate": 0.001, "loss": 2.7467, "step": 47508 }, { "epoch": 9.12, "learning_rate": 0.001, "loss": 2.756, "step": 47520 }, { "epoch": 9.13, "learning_rate": 0.001, "loss": 2.749, "step": 47532 }, { "epoch": 9.13, "learning_rate": 0.001, "loss": 2.7496, "step": 47544 }, { "epoch": 9.13, "learning_rate": 0.001, "loss": 2.7415, "step": 47556 }, { "epoch": 9.13, "learning_rate": 0.001, "loss": 2.7536, "step": 47568 }, { "epoch": 9.14, "learning_rate": 0.001, "loss": 2.7458, "step": 47580 }, { "epoch": 9.14, "learning_rate": 0.001, "loss": 2.754, "step": 47592 }, { "epoch": 9.14, "learning_rate": 0.001, "loss": 2.7511, "step": 47604 }, { "epoch": 9.14, "learning_rate": 0.001, "loss": 2.7557, "step": 47616 }, { "epoch": 9.15, "learning_rate": 0.001, "loss": 2.7573, "step": 47628 }, { "epoch": 9.15, "learning_rate": 0.001, "loss": 2.7593, "step": 47640 }, { "epoch": 9.15, "learning_rate": 0.001, "loss": 2.7505, "step": 47652 }, { "epoch": 9.15, "learning_rate": 0.001, "loss": 2.7482, "step": 47664 }, { "epoch": 9.15, "learning_rate": 0.001, "loss": 2.7605, "step": 47676 }, { "epoch": 9.16, "learning_rate": 0.001, "loss": 2.759, "step": 47688 }, { "epoch": 9.16, "learning_rate": 0.001, "loss": 2.7598, "step": 47700 }, { "epoch": 9.16, "learning_rate": 0.001, "loss": 2.7586, "step": 47712 }, { "epoch": 9.16, "learning_rate": 0.001, "loss": 2.7666, "step": 47724 }, { "epoch": 9.17, "learning_rate": 0.001, "loss": 2.7557, "step": 47736 }, { "epoch": 9.17, "learning_rate": 0.001, "loss": 2.7563, "step": 47748 }, { "epoch": 9.17, "learning_rate": 0.001, "loss": 2.7472, "step": 47760 }, { "epoch": 9.17, "learning_rate": 0.001, "loss": 2.7543, "step": 47772 }, { "epoch": 9.18, "learning_rate": 0.001, "loss": 2.756, "step": 47784 }, { "epoch": 9.18, "learning_rate": 0.001, "loss": 2.7686, "step": 47796 }, { "epoch": 9.18, "learning_rate": 0.001, "loss": 2.7496, "step": 47808 }, { "epoch": 9.18, "learning_rate": 0.001, "loss": 2.7612, "step": 47820 }, { "epoch": 9.18, "learning_rate": 0.001, "loss": 2.7648, "step": 47832 }, { "epoch": 9.19, "learning_rate": 0.001, "loss": 2.7478, "step": 47844 }, { "epoch": 9.19, "learning_rate": 0.001, "loss": 2.7375, "step": 47856 }, { "epoch": 9.19, "learning_rate": 0.001, "loss": 2.7452, "step": 47868 }, { "epoch": 9.19, "learning_rate": 0.001, "loss": 2.7489, "step": 47880 }, { "epoch": 9.2, "learning_rate": 0.001, "loss": 2.7521, "step": 47892 }, { "epoch": 9.2, "learning_rate": 0.001, "loss": 2.7564, "step": 47904 }, { "epoch": 9.2, "learning_rate": 0.001, "loss": 2.7504, "step": 47916 }, { "epoch": 9.2, "learning_rate": 0.001, "loss": 2.755, "step": 47928 }, { "epoch": 9.21, "learning_rate": 0.001, "loss": 2.7465, "step": 47940 }, { "epoch": 9.21, "learning_rate": 0.001, "loss": 2.7471, "step": 47952 }, { "epoch": 9.21, "learning_rate": 0.001, "loss": 2.749, "step": 47964 }, { "epoch": 9.21, "learning_rate": 0.001, "loss": 2.7607, "step": 47976 }, { "epoch": 9.21, "learning_rate": 0.001, "loss": 2.7441, "step": 47988 }, { "epoch": 9.22, "learning_rate": 0.001, "loss": 2.7466, "step": 48000 }, { "epoch": 9.22, "learning_rate": 0.001, "loss": 2.7465, "step": 48012 }, { "epoch": 9.22, "learning_rate": 0.001, "loss": 2.7501, "step": 48024 }, { "epoch": 9.22, "learning_rate": 0.001, "loss": 2.7498, "step": 48036 }, { "epoch": 9.23, "learning_rate": 0.001, "loss": 2.7591, "step": 48048 }, { "epoch": 9.23, "learning_rate": 0.001, "loss": 2.755, "step": 48060 }, { "epoch": 9.23, "learning_rate": 0.001, "loss": 2.7467, "step": 48072 }, { "epoch": 9.23, "learning_rate": 0.001, "loss": 2.7501, "step": 48084 }, { "epoch": 9.24, "learning_rate": 0.001, "loss": 2.7473, "step": 48096 }, { "epoch": 9.24, "learning_rate": 0.001, "loss": 2.7489, "step": 48108 }, { "epoch": 9.24, "learning_rate": 0.001, "loss": 2.7483, "step": 48120 }, { "epoch": 9.24, "eval_ag_news_accuracy": 0.30184375, "eval_ag_news_bleu_score": 4.351701686434694, "eval_ag_news_bleu_score_sem": 0.1420752862709685, "eval_ag_news_emb_cos_sim": 0.7652785778045654, "eval_ag_news_emb_cos_sim_sem": 0.009476753131988938, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7872631549835205, "eval_ag_news_n_ngrams_match_1": 12.978, "eval_ag_news_n_ngrams_match_2": 2.668, "eval_ag_news_n_ngrams_match_3": 0.734, "eval_ag_news_num_pred_words": 46.406, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 44.1354429636502, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3174233587706966, "eval_ag_news_runtime": 10.3954, "eval_ag_news_samples_per_second": 48.098, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.327430124189282, "eval_ag_news_token_set_f1_sem": 0.004446779222228204, "eval_ag_news_token_set_precision": 0.30576685643142293, "eval_ag_news_token_set_recall": 0.3660499994773382, "eval_ag_news_true_num_tokens": 56.09375, "step": 48125 }, { "epoch": 9.24, "eval_anthropic_toxic_prompts_accuracy": 0.1050625, "eval_anthropic_toxic_prompts_bleu_score": 2.699989161794298, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11689087010030202, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.640740156173706, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010851065714431027, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4292941093444824, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.57, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.578, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.54, "eval_anthropic_toxic_prompts_num_pred_words": 47.834, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 30.854854906908304, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1909183661228704, "eval_anthropic_toxic_prompts_runtime": 10.1024, "eval_anthropic_toxic_prompts_samples_per_second": 49.493, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.3300152090117992, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063356743567588554, "eval_anthropic_toxic_prompts_token_set_precision": 0.3875014729189655, "eval_anthropic_toxic_prompts_token_set_recall": 0.31639474196486, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 48125 }, { "epoch": 9.24, "eval_arxiv_accuracy": 0.32525, "eval_arxiv_bleu_score": 3.854995038599177, "eval_arxiv_bleu_score_sem": 0.10864260153314576, "eval_arxiv_emb_cos_sim": 0.722553551197052, "eval_arxiv_emb_cos_sim_sem": 0.00705173607978296, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6563189029693604, "eval_arxiv_n_ngrams_match_1": 13.468, "eval_arxiv_n_ngrams_match_2": 2.532, "eval_arxiv_n_ngrams_match_3": 0.562, "eval_arxiv_num_pred_words": 40.548, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 38.71855346954226, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.31788153585586487, "eval_arxiv_runtime": 10.528, "eval_arxiv_samples_per_second": 47.493, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.31475247216705377, "eval_arxiv_token_set_f1_sem": 0.004326166434345451, "eval_arxiv_token_set_precision": 0.26123899148043805, "eval_arxiv_token_set_recall": 0.4153508682192829, "eval_arxiv_true_num_tokens": 64.0, "step": 48125 }, { "epoch": 9.24, "eval_python_code_alpaca_accuracy": 0.14628125, "eval_python_code_alpaca_bleu_score": 3.669469749310094, "eval_python_code_alpaca_bleu_score_sem": 0.11159693307673213, "eval_python_code_alpaca_emb_cos_sim": 0.711249589920044, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008566130957240592, "eval_python_code_alpaca_emb_top1_equal": 0.0703125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.022687306110270106, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.11362624168396, "eval_python_code_alpaca_n_ngrams_match_1": 8.55, "eval_python_code_alpaca_n_ngrams_match_2": 2.212, "eval_python_code_alpaca_n_ngrams_match_3": 0.676, "eval_python_code_alpaca_num_pred_words": 43.384, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.502496119103448, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.28401764689301384, "eval_python_code_alpaca_runtime": 11.2649, "eval_python_code_alpaca_samples_per_second": 44.386, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.43633529654027564, "eval_python_code_alpaca_token_set_f1_sem": 0.005892912011820268, "eval_python_code_alpaca_token_set_precision": 0.46512658426664066, "eval_python_code_alpaca_token_set_recall": 0.43621480267366586, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 48125 }, { "epoch": 9.24, "eval_wikibio_accuracy": 0.297625, "eval_wikibio_bleu_score": 5.310316157940524, "eval_wikibio_bleu_score_sem": 0.1770462429434233, "eval_wikibio_emb_cos_sim": 0.7053753137588501, "eval_wikibio_emb_cos_sim_sem": 0.010619001215262996, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.077270030975342, "eval_wikibio_n_ngrams_match_1": 9.948, "eval_wikibio_n_ngrams_match_2": 3.258, "eval_wikibio_n_ngrams_match_3": 1.124, "eval_wikibio_num_pred_words": 38.33, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 58.98422474645511, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3346791863394518, "eval_wikibio_runtime": 11.2154, "eval_wikibio_samples_per_second": 44.582, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.3108615880404229, "eval_wikibio_token_set_f1_sem": 0.005273087001684077, "eval_wikibio_token_set_precision": 0.32113380387226925, "eval_wikibio_token_set_recall": 0.316084648908643, "eval_wikibio_true_num_tokens": 61.1328125, "step": 48125 }, { "epoch": 9.24, "eval_nq_accuracy": 0.5020625, "eval_nq_bleu_score": 10.132491349184725, "eval_nq_bleu_score_sem": 0.4566772896043115, "eval_nq_emb_cos_sim": 0.8031895160675049, "eval_nq_emb_cos_sim_sem": 0.007887558135136596, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.38545298576355, "eval_nq_n_ngrams_match_1": 21.718, "eval_nq_n_ngrams_match_2": 7.534, "eval_nq_n_ngrams_match_3": 3.314, "eval_nq_num_pred_words": 48.938, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.863982780000132, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4145385300040625, "eval_nq_runtime": 10.8793, "eval_nq_samples_per_second": 45.959, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.4305506371636064, "eval_nq_token_set_f1_sem": 0.005198653767924212, "eval_nq_token_set_precision": 0.384813827332616, "eval_nq_token_set_recall": 0.4992643605675793, "eval_nq_true_num_tokens": 64.0, "step": 48125 }, { "epoch": 9.24, "learning_rate": 0.001, "loss": 2.7471, "step": 48132 }, { "epoch": 9.24, "learning_rate": 0.001, "loss": 2.7484, "step": 48144 }, { "epoch": 9.25, "learning_rate": 0.001, "loss": 2.7563, "step": 48156 }, { "epoch": 9.25, "learning_rate": 0.001, "loss": 2.7557, "step": 48168 }, { "epoch": 9.25, "learning_rate": 0.001, "loss": 2.752, "step": 48180 }, { "epoch": 9.25, "learning_rate": 0.001, "loss": 2.7522, "step": 48192 }, { "epoch": 9.26, "learning_rate": 0.001, "loss": 2.7558, "step": 48204 }, { "epoch": 9.26, "learning_rate": 0.001, "loss": 2.7547, "step": 48216 }, { "epoch": 9.26, "learning_rate": 0.001, "loss": 2.7492, "step": 48228 }, { "epoch": 9.26, "learning_rate": 0.001, "loss": 2.7484, "step": 48240 }, { "epoch": 9.26, "learning_rate": 0.001, "loss": 2.7612, "step": 48252 }, { "epoch": 9.27, "learning_rate": 0.001, "loss": 2.7669, "step": 48264 }, { "epoch": 9.27, "learning_rate": 0.001, "loss": 2.7553, "step": 48276 }, { "epoch": 9.27, "learning_rate": 0.001, "loss": 2.7476, "step": 48288 }, { "epoch": 9.27, "learning_rate": 0.001, "loss": 2.7514, "step": 48300 }, { "epoch": 9.28, "learning_rate": 0.001, "loss": 2.7576, "step": 48312 }, { "epoch": 9.28, "learning_rate": 0.001, "loss": 2.7538, "step": 48324 }, { "epoch": 9.28, "learning_rate": 0.001, "loss": 2.7554, "step": 48336 }, { "epoch": 9.28, "learning_rate": 0.001, "loss": 2.7491, "step": 48348 }, { "epoch": 9.29, "learning_rate": 0.001, "loss": 2.7403, "step": 48360 }, { "epoch": 9.29, "learning_rate": 0.001, "loss": 2.7501, "step": 48372 }, { "epoch": 9.29, "learning_rate": 0.001, "loss": 2.7492, "step": 48384 }, { "epoch": 9.29, "learning_rate": 0.001, "loss": 2.7498, "step": 48396 }, { "epoch": 9.29, "learning_rate": 0.001, "loss": 2.7402, "step": 48408 }, { "epoch": 9.3, "learning_rate": 0.001, "loss": 2.7572, "step": 48420 }, { "epoch": 9.3, "learning_rate": 0.001, "loss": 2.744, "step": 48432 }, { "epoch": 9.3, "learning_rate": 0.001, "loss": 2.7585, "step": 48444 }, { "epoch": 9.3, "learning_rate": 0.001, "loss": 2.7455, "step": 48456 }, { "epoch": 9.31, "learning_rate": 0.001, "loss": 2.7497, "step": 48468 }, { "epoch": 9.31, "learning_rate": 0.001, "loss": 2.7569, "step": 48480 }, { "epoch": 9.31, "learning_rate": 0.001, "loss": 2.753, "step": 48492 }, { "epoch": 9.31, "learning_rate": 0.001, "loss": 2.765, "step": 48504 }, { "epoch": 9.32, "learning_rate": 0.001, "loss": 2.7542, "step": 48516 }, { "epoch": 9.32, "learning_rate": 0.001, "loss": 2.7562, "step": 48528 }, { "epoch": 9.32, "learning_rate": 0.001, "loss": 2.752, "step": 48540 }, { "epoch": 9.32, "learning_rate": 0.001, "loss": 2.7527, "step": 48552 }, { "epoch": 9.32, "learning_rate": 0.001, "loss": 2.7635, "step": 48564 }, { "epoch": 9.33, "learning_rate": 0.001, "loss": 2.7518, "step": 48576 }, { "epoch": 9.33, "learning_rate": 0.001, "loss": 2.7575, "step": 48588 }, { "epoch": 9.33, "learning_rate": 0.001, "loss": 2.7459, "step": 48600 }, { "epoch": 9.33, "learning_rate": 0.001, "loss": 2.7645, "step": 48612 }, { "epoch": 9.34, "learning_rate": 0.001, "loss": 2.7505, "step": 48624 }, { "epoch": 9.34, "learning_rate": 0.001, "loss": 2.7481, "step": 48636 }, { "epoch": 9.34, "learning_rate": 0.001, "loss": 2.7618, "step": 48648 }, { "epoch": 9.34, "learning_rate": 0.001, "loss": 2.7423, "step": 48660 }, { "epoch": 9.35, "learning_rate": 0.001, "loss": 2.7547, "step": 48672 }, { "epoch": 9.35, "learning_rate": 0.001, "loss": 2.7459, "step": 48684 }, { "epoch": 9.35, "learning_rate": 0.001, "loss": 2.7513, "step": 48696 }, { "epoch": 9.35, "learning_rate": 0.001, "loss": 2.7506, "step": 48708 }, { "epoch": 9.35, "learning_rate": 0.001, "loss": 2.7547, "step": 48720 }, { "epoch": 9.36, "learning_rate": 0.001, "loss": 2.7411, "step": 48732 }, { "epoch": 9.36, "learning_rate": 0.001, "loss": 2.752, "step": 48744 }, { "epoch": 9.36, "eval_ag_news_accuracy": 0.30134375, "eval_ag_news_bleu_score": 4.396749226674127, "eval_ag_news_bleu_score_sem": 0.14041484471418364, "eval_ag_news_emb_cos_sim": 0.7777345180511475, "eval_ag_news_emb_cos_sim_sem": 0.007292612563012307, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.79011869430542, "eval_ag_news_n_ngrams_match_1": 13.072, "eval_ag_news_n_ngrams_match_2": 2.704, "eval_ag_news_n_ngrams_match_3": 0.744, "eval_ag_news_num_pred_words": 46.302, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 44.261653570436415, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3226398517360535, "eval_ag_news_runtime": 10.412, "eval_ag_news_samples_per_second": 48.022, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.32864575946343094, "eval_ag_news_token_set_f1_sem": 0.004257841407606742, "eval_ag_news_token_set_precision": 0.30865923718689464, "eval_ag_news_token_set_recall": 0.36671814008622605, "eval_ag_news_true_num_tokens": 56.09375, "step": 48750 }, { "epoch": 9.36, "eval_anthropic_toxic_prompts_accuracy": 0.10453125, "eval_anthropic_toxic_prompts_bleu_score": 2.8131528566844612, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11057926695678694, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6412379741668701, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00939366391287225, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.473548650741577, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.714, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.646, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.59, "eval_anthropic_toxic_prompts_num_pred_words": 47.042, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 32.25098710697477, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1974110960979811, "eval_anthropic_toxic_prompts_runtime": 10.7744, "eval_anthropic_toxic_prompts_samples_per_second": 46.406, "eval_anthropic_toxic_prompts_steps_per_second": 0.093, "eval_anthropic_toxic_prompts_token_set_f1": 0.3313660028572389, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006179051291841012, "eval_anthropic_toxic_prompts_token_set_precision": 0.3974568862134835, "eval_anthropic_toxic_prompts_token_set_recall": 0.3114973119014473, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 48750 }, { "epoch": 9.36, "eval_arxiv_accuracy": 0.32503125, "eval_arxiv_bleu_score": 3.8107220012480023, "eval_arxiv_bleu_score_sem": 0.10939840891064047, "eval_arxiv_emb_cos_sim": 0.7042320370674133, "eval_arxiv_emb_cos_sim_sem": 0.008122784965031958, "eval_arxiv_emb_top1_equal": 0.203125, "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6602439880371094, "eval_arxiv_n_ngrams_match_1": 13.64, "eval_arxiv_n_ngrams_match_2": 2.492, "eval_arxiv_n_ngrams_match_3": 0.502, "eval_arxiv_num_pred_words": 39.824, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 38.87082573090418, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.32390275596990414, "eval_arxiv_runtime": 10.2748, "eval_arxiv_samples_per_second": 48.663, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3177359400014232, "eval_arxiv_token_set_f1_sem": 0.00394593972515579, "eval_arxiv_token_set_precision": 0.2644396575022485, "eval_arxiv_token_set_recall": 0.41493651747201754, "eval_arxiv_true_num_tokens": 64.0, "step": 48750 }, { "epoch": 9.36, "eval_python_code_alpaca_accuracy": 0.1439375, "eval_python_code_alpaca_bleu_score": 3.653396042760056, "eval_python_code_alpaca_bleu_score_sem": 0.11215840486870118, "eval_python_code_alpaca_emb_cos_sim": 0.6965563297271729, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009391043564274986, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.1472861766815186, "eval_python_code_alpaca_n_ngrams_match_1": 8.22, "eval_python_code_alpaca_n_ngrams_match_2": 2.028, "eval_python_code_alpaca_n_ngrams_match_3": 0.53, "eval_python_code_alpaca_num_pred_words": 40.438, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.272820480194248, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2893894594645693, "eval_python_code_alpaca_runtime": 9.9787, "eval_python_code_alpaca_samples_per_second": 50.107, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.42216432224459405, "eval_python_code_alpaca_token_set_f1_sem": 0.005704082989878473, "eval_python_code_alpaca_token_set_precision": 0.441826665075511, "eval_python_code_alpaca_token_set_recall": 0.4339781565612325, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 48750 }, { "epoch": 9.36, "eval_wikibio_accuracy": 0.29796875, "eval_wikibio_bleu_score": 5.128538995564363, "eval_wikibio_bleu_score_sem": 0.18220165220355836, "eval_wikibio_emb_cos_sim": 0.7084054350852966, "eval_wikibio_emb_cos_sim_sem": 0.011019566180599549, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.093959331512451, "eval_wikibio_n_ngrams_match_1": 9.522, "eval_wikibio_n_ngrams_match_2": 2.992, "eval_wikibio_n_ngrams_match_3": 1.01, "eval_wikibio_num_pred_words": 36.41, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 59.97689060893038, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3251450937570389, "eval_wikibio_runtime": 10.2684, "eval_wikibio_samples_per_second": 48.693, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.30141272476178205, "eval_wikibio_token_set_f1_sem": 0.0056895415323902476, "eval_wikibio_token_set_precision": 0.30743642302499147, "eval_wikibio_token_set_recall": 0.31338408345119867, "eval_wikibio_true_num_tokens": 61.1328125, "step": 48750 }, { "epoch": 9.36, "eval_nq_accuracy": 0.500375, "eval_nq_bleu_score": 10.53056712935079, "eval_nq_bleu_score_sem": 0.4490580378016007, "eval_nq_emb_cos_sim": 0.8015233874320984, "eval_nq_emb_cos_sim_sem": 0.00786959830931489, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.385740280151367, "eval_nq_n_ngrams_match_1": 21.868, "eval_nq_n_ngrams_match_2": 7.642, "eval_nq_n_ngrams_match_3": 3.478, "eval_nq_num_pred_words": 48.982, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.86710438967117, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.418429712006624, "eval_nq_runtime": 11.0633, "eval_nq_samples_per_second": 45.195, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.4365160381662473, "eval_nq_token_set_f1_sem": 0.005191955086409502, "eval_nq_token_set_precision": 0.38999808114205353, "eval_nq_token_set_recall": 0.5080066668214502, "eval_nq_true_num_tokens": 64.0, "step": 48750 }, { "epoch": 9.36, "learning_rate": 0.001, "loss": 2.7599, "step": 48756 }, { "epoch": 9.36, "learning_rate": 0.001, "loss": 2.755, "step": 48768 }, { "epoch": 9.37, "learning_rate": 0.001, "loss": 2.7586, "step": 48780 }, { "epoch": 9.37, "learning_rate": 0.001, "loss": 2.7593, "step": 48792 }, { "epoch": 9.37, "learning_rate": 0.001, "loss": 2.7457, "step": 48804 }, { "epoch": 9.37, "learning_rate": 0.001, "loss": 2.7441, "step": 48816 }, { "epoch": 9.38, "learning_rate": 0.001, "loss": 2.7489, "step": 48828 }, { "epoch": 9.38, "learning_rate": 0.001, "loss": 2.7499, "step": 48840 }, { "epoch": 9.38, "learning_rate": 0.001, "loss": 2.7431, "step": 48852 }, { "epoch": 9.38, "learning_rate": 0.001, "loss": 2.7586, "step": 48864 }, { "epoch": 9.38, "learning_rate": 0.001, "loss": 2.7487, "step": 48876 }, { "epoch": 9.39, "learning_rate": 0.001, "loss": 2.7443, "step": 48888 }, { "epoch": 9.39, "learning_rate": 0.001, "loss": 2.747, "step": 48900 }, { "epoch": 9.39, "learning_rate": 0.001, "loss": 2.7525, "step": 48912 }, { "epoch": 9.39, "learning_rate": 0.001, "loss": 2.754, "step": 48924 }, { "epoch": 9.4, "learning_rate": 0.001, "loss": 2.7516, "step": 48936 }, { "epoch": 9.4, "learning_rate": 0.001, "loss": 2.7499, "step": 48948 }, { "epoch": 9.4, "learning_rate": 0.001, "loss": 2.7482, "step": 48960 }, { "epoch": 9.4, "learning_rate": 0.001, "loss": 2.7515, "step": 48972 }, { "epoch": 9.41, "learning_rate": 0.001, "loss": 2.744, "step": 48984 }, { "epoch": 9.41, "learning_rate": 0.001, "loss": 2.7539, "step": 48996 }, { "epoch": 9.41, "learning_rate": 0.001, "loss": 2.7483, "step": 49008 }, { "epoch": 9.41, "learning_rate": 0.001, "loss": 2.7514, "step": 49020 }, { "epoch": 9.41, "learning_rate": 0.001, "loss": 2.7509, "step": 49032 }, { "epoch": 9.42, "learning_rate": 0.001, "loss": 2.7539, "step": 49044 }, { "epoch": 9.42, "learning_rate": 0.001, "loss": 2.7379, "step": 49056 }, { "epoch": 9.42, "learning_rate": 0.001, "loss": 2.7464, "step": 49068 }, { "epoch": 9.42, "learning_rate": 0.001, "loss": 2.7512, "step": 49080 }, { "epoch": 9.43, "learning_rate": 0.001, "loss": 2.7504, "step": 49092 }, { "epoch": 9.43, "learning_rate": 0.001, "loss": 2.7508, "step": 49104 }, { "epoch": 9.43, "learning_rate": 0.001, "loss": 2.7522, "step": 49116 }, { "epoch": 9.43, "learning_rate": 0.001, "loss": 2.7509, "step": 49128 }, { "epoch": 9.44, "learning_rate": 0.001, "loss": 2.7486, "step": 49140 }, { "epoch": 9.44, "learning_rate": 0.001, "loss": 2.7575, "step": 49152 }, { "epoch": 9.44, "learning_rate": 0.001, "loss": 2.7398, "step": 49164 }, { "epoch": 9.44, "learning_rate": 0.001, "loss": 2.761, "step": 49176 }, { "epoch": 9.44, "learning_rate": 0.001, "loss": 2.7516, "step": 49188 }, { "epoch": 9.45, "learning_rate": 0.001, "loss": 2.7523, "step": 49200 }, { "epoch": 9.45, "learning_rate": 0.001, "loss": 2.7451, "step": 49212 }, { "epoch": 9.45, "learning_rate": 0.001, "loss": 2.7512, "step": 49224 }, { "epoch": 9.45, "learning_rate": 0.001, "loss": 2.7526, "step": 49236 }, { "epoch": 9.46, "learning_rate": 0.001, "loss": 2.7538, "step": 49248 }, { "epoch": 9.46, "learning_rate": 0.001, "loss": 2.7435, "step": 49260 }, { "epoch": 9.46, "learning_rate": 0.001, "loss": 2.7519, "step": 49272 }, { "epoch": 9.46, "learning_rate": 0.001, "loss": 2.7502, "step": 49284 }, { "epoch": 9.47, "learning_rate": 0.001, "loss": 2.7587, "step": 49296 }, { "epoch": 9.47, "learning_rate": 0.001, "loss": 2.7557, "step": 49308 }, { "epoch": 9.47, "learning_rate": 0.001, "loss": 2.7477, "step": 49320 }, { "epoch": 9.47, "learning_rate": 0.001, "loss": 2.7637, "step": 49332 }, { "epoch": 9.47, "learning_rate": 0.001, "loss": 2.7625, "step": 49344 }, { "epoch": 9.48, "learning_rate": 0.001, "loss": 2.7565, "step": 49356 }, { "epoch": 9.48, "learning_rate": 0.001, "loss": 2.7475, "step": 49368 }, { "epoch": 9.48, "eval_ag_news_accuracy": 0.3011875, "eval_ag_news_bleu_score": 4.357760725215447, "eval_ag_news_bleu_score_sem": 0.14459949443749961, "eval_ag_news_emb_cos_sim": 0.776526927947998, "eval_ag_news_emb_cos_sim_sem": 0.006940873404586609, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7870981693267822, "eval_ag_news_n_ngrams_match_1": 12.952, "eval_ag_news_n_ngrams_match_2": 2.692, "eval_ag_news_n_ngrams_match_3": 0.726, "eval_ag_news_num_pred_words": 46.32, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 44.12816184926364, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.31948391192609604, "eval_ag_news_runtime": 11.3224, "eval_ag_news_samples_per_second": 44.16, "eval_ag_news_steps_per_second": 0.088, "eval_ag_news_token_set_f1": 0.3277880997004768, "eval_ag_news_token_set_f1_sem": 0.004296509623992959, "eval_ag_news_token_set_precision": 0.306916125249126, "eval_ag_news_token_set_recall": 0.36826670582017923, "eval_ag_news_true_num_tokens": 56.09375, "step": 49375 }, { "epoch": 9.48, "eval_anthropic_toxic_prompts_accuracy": 0.10715625, "eval_anthropic_toxic_prompts_bleu_score": 2.8659356446403557, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11802020499974405, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6357718706130981, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00998766513217209, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4657299518585205, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.614, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.646, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.614, "eval_anthropic_toxic_prompts_num_pred_words": 47.482, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.999809570448026, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19273474312050431, "eval_anthropic_toxic_prompts_runtime": 12.8921, "eval_anthropic_toxic_prompts_samples_per_second": 38.783, "eval_anthropic_toxic_prompts_steps_per_second": 0.078, "eval_anthropic_toxic_prompts_token_set_f1": 0.33540217317294596, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00617058560226162, "eval_anthropic_toxic_prompts_token_set_precision": 0.3952195862486079, "eval_anthropic_toxic_prompts_token_set_recall": 0.325109361773473, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 49375 }, { "epoch": 9.48, "eval_arxiv_accuracy": 0.32528125, "eval_arxiv_bleu_score": 3.8059983785055898, "eval_arxiv_bleu_score_sem": 0.11226245781831974, "eval_arxiv_emb_cos_sim": 0.7064568996429443, "eval_arxiv_emb_cos_sim_sem": 0.007243781487745389, "eval_arxiv_emb_top1_equal": 0.1796875, "eval_arxiv_emb_top1_equal_sem": 0.034068008879424266, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.636347770690918, "eval_arxiv_n_ngrams_match_1": 13.794, "eval_arxiv_n_ngrams_match_2": 2.508, "eval_arxiv_n_ngrams_match_3": 0.51, "eval_arxiv_num_pred_words": 40.514, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 37.952970343168325, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.32522123159039995, "eval_arxiv_runtime": 12.0727, "eval_arxiv_samples_per_second": 41.416, "eval_arxiv_steps_per_second": 0.083, "eval_arxiv_token_set_f1": 0.3223919267568179, "eval_arxiv_token_set_f1_sem": 0.003921306275013076, "eval_arxiv_token_set_precision": 0.2682886994738338, "eval_arxiv_token_set_recall": 0.42271750291043814, "eval_arxiv_true_num_tokens": 64.0, "step": 49375 }, { "epoch": 9.48, "eval_python_code_alpaca_accuracy": 0.14565625, "eval_python_code_alpaca_bleu_score": 3.672034122069554, "eval_python_code_alpaca_bleu_score_sem": 0.1187501943966979, "eval_python_code_alpaca_emb_cos_sim": 0.7004073858261108, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010131539985009107, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.1419548988342285, "eval_python_code_alpaca_n_ngrams_match_1": 8.712, "eval_python_code_alpaca_n_ngrams_match_2": 2.202, "eval_python_code_alpaca_n_ngrams_match_3": 0.618, "eval_python_code_alpaca_num_pred_words": 43.778, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 23.149076757101334, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2891343836772943, "eval_python_code_alpaca_runtime": 11.0329, "eval_python_code_alpaca_samples_per_second": 45.319, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.43220825848463384, "eval_python_code_alpaca_token_set_f1_sem": 0.005445043835031553, "eval_python_code_alpaca_token_set_precision": 0.46672497782899824, "eval_python_code_alpaca_token_set_recall": 0.4240971527630146, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 49375 }, { "epoch": 9.48, "eval_wikibio_accuracy": 0.29778125, "eval_wikibio_bleu_score": 5.119697203143855, "eval_wikibio_bleu_score_sem": 0.18412999052839607, "eval_wikibio_emb_cos_sim": 0.6921262145042419, "eval_wikibio_emb_cos_sim_sem": 0.011651315829641467, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.101794719696045, "eval_wikibio_n_ngrams_match_1": 9.74, "eval_wikibio_n_ngrams_match_2": 3.076, "eval_wikibio_n_ngrams_match_3": 1.02, "eval_wikibio_num_pred_words": 38.008, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 60.44867873674017, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32660946377484895, "eval_wikibio_runtime": 10.0789, "eval_wikibio_samples_per_second": 49.609, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3051208709511552, "eval_wikibio_token_set_f1_sem": 0.005336039690711099, "eval_wikibio_token_set_precision": 0.31516698974788926, "eval_wikibio_token_set_recall": 0.3107078789718283, "eval_wikibio_true_num_tokens": 61.1328125, "step": 49375 }, { "epoch": 9.48, "eval_nq_accuracy": 0.50165625, "eval_nq_bleu_score": 10.328163708855888, "eval_nq_bleu_score_sem": 0.4448969767087939, "eval_nq_emb_cos_sim": 0.8024446964263916, "eval_nq_emb_cos_sim_sem": 0.0077750859771627346, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3828232288360596, "eval_nq_n_ngrams_match_1": 21.804, "eval_nq_n_ngrams_match_2": 7.586, "eval_nq_n_ngrams_match_3": 3.342, "eval_nq_num_pred_words": 49.382, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.835450678712899, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.41610867838610055, "eval_nq_runtime": 10.7151, "eval_nq_samples_per_second": 46.663, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.43548373784904204, "eval_nq_token_set_f1_sem": 0.005039144897405993, "eval_nq_token_set_precision": 0.3877664544571668, "eval_nq_token_set_recall": 0.5082956347348438, "eval_nq_true_num_tokens": 64.0, "step": 49375 }, { "epoch": 9.48, "learning_rate": 0.001, "loss": 2.7451, "step": 49380 }, { "epoch": 9.48, "learning_rate": 0.001, "loss": 2.7433, "step": 49392 }, { "epoch": 9.49, "learning_rate": 0.001, "loss": 2.7543, "step": 49404 }, { "epoch": 9.49, "learning_rate": 0.001, "loss": 2.745, "step": 49416 }, { "epoch": 9.49, "learning_rate": 0.001, "loss": 2.733, "step": 49428 }, { "epoch": 9.49, "learning_rate": 0.001, "loss": 2.7444, "step": 49440 }, { "epoch": 9.5, "learning_rate": 0.001, "loss": 2.7471, "step": 49452 }, { "epoch": 9.5, "learning_rate": 0.001, "loss": 2.7535, "step": 49464 }, { "epoch": 9.5, "learning_rate": 0.001, "loss": 2.7492, "step": 49476 }, { "epoch": 9.5, "learning_rate": 0.001, "loss": 2.7472, "step": 49488 }, { "epoch": 9.5, "learning_rate": 0.001, "loss": 2.7531, "step": 49500 }, { "epoch": 9.51, "learning_rate": 0.001, "loss": 2.758, "step": 49512 }, { "epoch": 9.51, "learning_rate": 0.001, "loss": 2.7482, "step": 49524 }, { "epoch": 9.51, "learning_rate": 0.001, "loss": 2.7561, "step": 49536 }, { "epoch": 9.51, "learning_rate": 0.001, "loss": 2.7579, "step": 49548 }, { "epoch": 9.52, "learning_rate": 0.001, "loss": 2.7521, "step": 49560 }, { "epoch": 9.52, "learning_rate": 0.001, "loss": 2.755, "step": 49572 }, { "epoch": 9.52, "learning_rate": 0.001, "loss": 2.7384, "step": 49584 }, { "epoch": 9.52, "learning_rate": 0.001, "loss": 2.7443, "step": 49596 }, { "epoch": 9.53, "learning_rate": 0.001, "loss": 2.7304, "step": 49608 }, { "epoch": 9.53, "learning_rate": 0.001, "loss": 2.7553, "step": 49620 }, { "epoch": 9.53, "learning_rate": 0.001, "loss": 2.7575, "step": 49632 }, { "epoch": 9.53, "learning_rate": 0.001, "loss": 2.7403, "step": 49644 }, { "epoch": 9.53, "learning_rate": 0.001, "loss": 2.7566, "step": 49656 }, { "epoch": 9.54, "learning_rate": 0.001, "loss": 2.742, "step": 49668 }, { "epoch": 9.54, "learning_rate": 0.001, "loss": 2.7477, "step": 49680 }, { "epoch": 9.54, "learning_rate": 0.001, "loss": 2.7456, "step": 49692 }, { "epoch": 9.54, "learning_rate": 0.001, "loss": 2.7419, "step": 49704 }, { "epoch": 9.55, "learning_rate": 0.001, "loss": 2.7433, "step": 49716 }, { "epoch": 9.55, "learning_rate": 0.001, "loss": 2.7371, "step": 49728 }, { "epoch": 9.55, "learning_rate": 0.001, "loss": 2.7544, "step": 49740 }, { "epoch": 9.55, "learning_rate": 0.001, "loss": 2.747, "step": 49752 }, { "epoch": 9.56, "learning_rate": 0.001, "loss": 2.7396, "step": 49764 }, { "epoch": 9.56, "learning_rate": 0.001, "loss": 2.7428, "step": 49776 }, { "epoch": 9.56, "learning_rate": 0.001, "loss": 2.7432, "step": 49788 }, { "epoch": 9.56, "learning_rate": 0.001, "loss": 2.748, "step": 49800 }, { "epoch": 9.56, "learning_rate": 0.001, "loss": 2.7556, "step": 49812 }, { "epoch": 9.57, "learning_rate": 0.001, "loss": 2.742, "step": 49824 }, { "epoch": 9.57, "learning_rate": 0.001, "loss": 2.7481, "step": 49836 }, { "epoch": 9.57, "learning_rate": 0.001, "loss": 2.7448, "step": 49848 }, { "epoch": 9.57, "learning_rate": 0.001, "loss": 2.7375, "step": 49860 }, { "epoch": 9.58, "learning_rate": 0.001, "loss": 2.7477, "step": 49872 }, { "epoch": 9.58, "learning_rate": 0.001, "loss": 2.7526, "step": 49884 }, { "epoch": 9.58, "learning_rate": 0.001, "loss": 2.7439, "step": 49896 }, { "epoch": 9.58, "learning_rate": 0.001, "loss": 2.7462, "step": 49908 }, { "epoch": 9.59, "learning_rate": 0.001, "loss": 2.7546, "step": 49920 }, { "epoch": 9.59, "learning_rate": 0.001, "loss": 2.7428, "step": 49932 }, { "epoch": 9.59, "learning_rate": 0.001, "loss": 2.7475, "step": 49944 }, { "epoch": 9.59, "learning_rate": 0.001, "loss": 2.7541, "step": 49956 }, { "epoch": 9.59, "learning_rate": 0.001, "loss": 2.7523, "step": 49968 }, { "epoch": 9.6, "learning_rate": 0.001, "loss": 2.7482, "step": 49980 }, { "epoch": 9.6, "learning_rate": 0.001, "loss": 2.7436, "step": 49992 }, { "epoch": 9.6, "eval_ag_news_accuracy": 0.30128125, "eval_ag_news_bleu_score": 4.401626065564337, "eval_ag_news_bleu_score_sem": 0.1445457507032042, "eval_ag_news_emb_cos_sim": 0.7778183221817017, "eval_ag_news_emb_cos_sim_sem": 0.008032925908373208, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.772658109664917, "eval_ag_news_n_ngrams_match_1": 13.148, "eval_ag_news_n_ngrams_match_2": 2.694, "eval_ag_news_n_ngrams_match_3": 0.75, "eval_ag_news_num_pred_words": 46.634, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 43.49552719327691, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.32072590060030487, "eval_ag_news_runtime": 10.4683, "eval_ag_news_samples_per_second": 47.763, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.33126265519725334, "eval_ag_news_token_set_f1_sem": 0.004283752556446434, "eval_ag_news_token_set_precision": 0.3102873960943213, "eval_ag_news_token_set_recall": 0.371257938336186, "eval_ag_news_true_num_tokens": 56.09375, "step": 50000 }, { "epoch": 9.6, "eval_anthropic_toxic_prompts_accuracy": 0.10528125, "eval_anthropic_toxic_prompts_bleu_score": 2.646041919126248, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10067921182804163, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6425341963768005, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009844991464517635, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.45570969581604, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.612, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.612, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.524, "eval_anthropic_toxic_prompts_num_pred_words": 47.558, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.680764411817897, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19188037681201425, "eval_anthropic_toxic_prompts_runtime": 9.9395, "eval_anthropic_toxic_prompts_samples_per_second": 50.304, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3319364934553117, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006188616059526924, "eval_anthropic_toxic_prompts_token_set_precision": 0.39262972766636134, "eval_anthropic_toxic_prompts_token_set_recall": 0.3185882702626803, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 50000 }, { "epoch": 9.6, "eval_arxiv_accuracy": 0.32771875, "eval_arxiv_bleu_score": 3.7701257023666095, "eval_arxiv_bleu_score_sem": 0.11082359700177555, "eval_arxiv_emb_cos_sim": 0.7135499715805054, "eval_arxiv_emb_cos_sim_sem": 0.008002433980564593, "eval_arxiv_emb_top1_equal": 0.203125, "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.63419246673584, "eval_arxiv_n_ngrams_match_1": 13.586, "eval_arxiv_n_ngrams_match_2": 2.446, "eval_arxiv_n_ngrams_match_3": 0.502, "eval_arxiv_num_pred_words": 39.776, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 37.87125824491667, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.32475878093368343, "eval_arxiv_runtime": 10.5645, "eval_arxiv_samples_per_second": 47.328, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.32280740944661535, "eval_arxiv_token_set_f1_sem": 0.004062348152172144, "eval_arxiv_token_set_precision": 0.26694253424269354, "eval_arxiv_token_set_recall": 0.42598548702306865, "eval_arxiv_true_num_tokens": 64.0, "step": 50000 }, { "epoch": 9.6, "eval_python_code_alpaca_accuracy": 0.14696875, "eval_python_code_alpaca_bleu_score": 3.6439864258715815, "eval_python_code_alpaca_bleu_score_sem": 0.11382405744825667, "eval_python_code_alpaca_emb_cos_sim": 0.7217139005661011, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007808518964744001, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.1127827167510986, "eval_python_code_alpaca_n_ngrams_match_1": 8.926, "eval_python_code_alpaca_n_ngrams_match_2": 2.31, "eval_python_code_alpaca_n_ngrams_match_3": 0.616, "eval_python_code_alpaca_num_pred_words": 44.906, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.483522705973908, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29347859700919776, "eval_python_code_alpaca_runtime": 9.8308, "eval_python_code_alpaca_samples_per_second": 50.86, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.44862252730393704, "eval_python_code_alpaca_token_set_f1_sem": 0.005301960274074061, "eval_python_code_alpaca_token_set_precision": 0.48279054465557647, "eval_python_code_alpaca_token_set_recall": 0.4414768495568832, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 50000 }, { "epoch": 9.6, "eval_wikibio_accuracy": 0.30303125, "eval_wikibio_bleu_score": 5.343358124895723, "eval_wikibio_bleu_score_sem": 0.19038480264481653, "eval_wikibio_emb_cos_sim": 0.7051657438278198, "eval_wikibio_emb_cos_sim_sem": 0.011177690619500295, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.021156311035156, "eval_wikibio_n_ngrams_match_1": 9.58, "eval_wikibio_n_ngrams_match_2": 3.068, "eval_wikibio_n_ngrams_match_3": 1.038, "eval_wikibio_num_pred_words": 36.482, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 55.76555088221737, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3313609812011307, "eval_wikibio_runtime": 10.4864, "eval_wikibio_samples_per_second": 47.681, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.3034471501935343, "eval_wikibio_token_set_f1_sem": 0.005550524035739132, "eval_wikibio_token_set_precision": 0.3104102153043557, "eval_wikibio_token_set_recall": 0.3109466731385039, "eval_wikibio_true_num_tokens": 61.1328125, "step": 50000 }, { "epoch": 9.6, "eval_nq_accuracy": 0.501, "eval_nq_bleu_score": 10.158360347363876, "eval_nq_bleu_score_sem": 0.4273361005121561, "eval_nq_emb_cos_sim": 0.7996821403503418, "eval_nq_emb_cos_sim_sem": 0.009176689302560135, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3760902881622314, "eval_nq_n_ngrams_match_1": 21.654, "eval_nq_n_ngrams_match_2": 7.46, "eval_nq_n_ngrams_match_3": 3.276, "eval_nq_num_pred_words": 48.66, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.76274128082477, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4169310345619456, "eval_nq_runtime": 10.4174, "eval_nq_samples_per_second": 47.996, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4342935261692336, "eval_nq_token_set_f1_sem": 0.005008949251638485, "eval_nq_token_set_precision": 0.38577029760949005, "eval_nq_token_set_recall": 0.5080714527538779, "eval_nq_true_num_tokens": 64.0, "step": 50000 }, { "epoch": 9.6, "learning_rate": 0.001, "loss": 2.7346, "step": 50004 }, { "epoch": 9.6, "learning_rate": 0.001, "loss": 2.7357, "step": 50016 }, { "epoch": 9.61, "learning_rate": 0.001, "loss": 2.7565, "step": 50028 }, { "epoch": 9.61, "learning_rate": 0.001, "loss": 2.7423, "step": 50040 }, { "epoch": 9.61, "learning_rate": 0.001, "loss": 2.7457, "step": 50052 }, { "epoch": 9.61, "learning_rate": 0.001, "loss": 2.7506, "step": 50064 }, { "epoch": 9.62, "learning_rate": 0.001, "loss": 2.7459, "step": 50076 }, { "epoch": 9.62, "learning_rate": 0.001, "loss": 2.7411, "step": 50088 }, { "epoch": 9.62, "learning_rate": 0.001, "loss": 2.7355, "step": 50100 }, { "epoch": 9.62, "learning_rate": 0.001, "loss": 2.7392, "step": 50112 }, { "epoch": 9.62, "learning_rate": 0.001, "loss": 2.7471, "step": 50124 }, { "epoch": 9.63, "learning_rate": 0.001, "loss": 2.7457, "step": 50136 }, { "epoch": 9.63, "learning_rate": 0.001, "loss": 2.7471, "step": 50148 }, { "epoch": 9.63, "learning_rate": 0.001, "loss": 2.7486, "step": 50160 }, { "epoch": 9.63, "learning_rate": 0.001, "loss": 2.7439, "step": 50172 }, { "epoch": 9.64, "learning_rate": 0.001, "loss": 2.7514, "step": 50184 }, { "epoch": 9.64, "learning_rate": 0.001, "loss": 2.7474, "step": 50196 }, { "epoch": 9.64, "learning_rate": 0.001, "loss": 2.7514, "step": 50208 }, { "epoch": 9.64, "learning_rate": 0.001, "loss": 2.7509, "step": 50220 }, { "epoch": 9.65, "learning_rate": 0.001, "loss": 2.7359, "step": 50232 }, { "epoch": 9.65, "learning_rate": 0.001, "loss": 2.7599, "step": 50244 }, { "epoch": 9.65, "learning_rate": 0.001, "loss": 2.7559, "step": 50256 }, { "epoch": 9.65, "learning_rate": 0.001, "loss": 2.7367, "step": 50268 }, { "epoch": 9.65, "learning_rate": 0.001, "loss": 2.7404, "step": 50280 }, { "epoch": 9.66, "learning_rate": 0.001, "loss": 2.7505, "step": 50292 }, { "epoch": 9.66, "learning_rate": 0.001, "loss": 2.7478, "step": 50304 }, { "epoch": 9.66, "learning_rate": 0.001, "loss": 2.7466, "step": 50316 }, { "epoch": 9.66, "learning_rate": 0.001, "loss": 2.7385, "step": 50328 }, { "epoch": 9.67, "learning_rate": 0.001, "loss": 2.727, "step": 50340 }, { "epoch": 9.67, "learning_rate": 0.001, "loss": 2.7526, "step": 50352 }, { "epoch": 9.67, "learning_rate": 0.001, "loss": 2.7375, "step": 50364 }, { "epoch": 9.67, "learning_rate": 0.001, "loss": 2.7461, "step": 50376 }, { "epoch": 9.68, "learning_rate": 0.001, "loss": 2.7515, "step": 50388 }, { "epoch": 9.68, "learning_rate": 0.001, "loss": 2.7417, "step": 50400 }, { "epoch": 9.68, "learning_rate": 0.001, "loss": 2.752, "step": 50412 }, { "epoch": 9.68, "learning_rate": 0.001, "loss": 2.7438, "step": 50424 }, { "epoch": 9.68, "learning_rate": 0.001, "loss": 2.7407, "step": 50436 }, { "epoch": 9.69, "learning_rate": 0.001, "loss": 2.7504, "step": 50448 }, { "epoch": 9.69, "learning_rate": 0.001, "loss": 2.7544, "step": 50460 }, { "epoch": 9.69, "learning_rate": 0.001, "loss": 2.739, "step": 50472 }, { "epoch": 9.69, "learning_rate": 0.001, "loss": 2.744, "step": 50484 }, { "epoch": 9.7, "learning_rate": 0.001, "loss": 2.747, "step": 50496 }, { "epoch": 9.7, "learning_rate": 0.001, "loss": 2.7388, "step": 50508 }, { "epoch": 9.7, "learning_rate": 0.001, "loss": 2.7464, "step": 50520 }, { "epoch": 9.7, "learning_rate": 0.001, "loss": 2.7439, "step": 50532 }, { "epoch": 9.71, "learning_rate": 0.001, "loss": 2.7371, "step": 50544 }, { "epoch": 9.71, "learning_rate": 0.001, "loss": 2.7383, "step": 50556 }, { "epoch": 9.71, "learning_rate": 0.001, "loss": 2.7515, "step": 50568 }, { "epoch": 9.71, "learning_rate": 0.001, "loss": 2.7446, "step": 50580 }, { "epoch": 9.71, "learning_rate": 0.001, "loss": 2.7498, "step": 50592 }, { "epoch": 9.72, "learning_rate": 0.001, "loss": 2.7567, "step": 50604 }, { "epoch": 9.72, "learning_rate": 0.001, "loss": 2.7446, "step": 50616 }, { "epoch": 9.72, "eval_ag_news_accuracy": 0.30315625, "eval_ag_news_bleu_score": 4.229310166403921, "eval_ag_news_bleu_score_sem": 0.13653759383921774, "eval_ag_news_emb_cos_sim": 0.7814064025878906, "eval_ag_news_emb_cos_sim_sem": 0.0076000323345952765, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.773273229598999, "eval_ag_news_n_ngrams_match_1": 12.932, "eval_ag_news_n_ngrams_match_2": 2.582, "eval_ag_news_n_ngrams_match_3": 0.678, "eval_ag_news_num_pred_words": 46.276, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 43.522290389540785, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.31875568997996345, "eval_ag_news_runtime": 10.7756, "eval_ag_news_samples_per_second": 46.401, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.32706445291604713, "eval_ag_news_token_set_f1_sem": 0.004265483933419019, "eval_ag_news_token_set_precision": 0.30472529299794976, "eval_ag_news_token_set_recall": 0.3699273734078367, "eval_ag_news_true_num_tokens": 56.09375, "step": 50625 }, { "epoch": 9.72, "eval_anthropic_toxic_prompts_accuracy": 0.103875, "eval_anthropic_toxic_prompts_bleu_score": 2.75102727221293, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11179479286695834, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6350839734077454, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010060457349361961, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4477925300598145, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.534, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.584, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.544, "eval_anthropic_toxic_prompts_num_pred_words": 46.946, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.43093283268095, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1924028521684284, "eval_anthropic_toxic_prompts_runtime": 11.0808, "eval_anthropic_toxic_prompts_samples_per_second": 45.123, "eval_anthropic_toxic_prompts_steps_per_second": 0.09, "eval_anthropic_toxic_prompts_token_set_f1": 0.327072750899434, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006390408494524602, "eval_anthropic_toxic_prompts_token_set_precision": 0.3914232039580596, "eval_anthropic_toxic_prompts_token_set_recall": 0.3124122785138938, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 50625 }, { "epoch": 9.72, "eval_arxiv_accuracy": 0.32596875, "eval_arxiv_bleu_score": 3.7227792440478975, "eval_arxiv_bleu_score_sem": 0.10689662456957807, "eval_arxiv_emb_cos_sim": 0.7148361802101135, "eval_arxiv_emb_cos_sim_sem": 0.008313577821340205, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6310336589813232, "eval_arxiv_n_ngrams_match_1": 13.58, "eval_arxiv_n_ngrams_match_2": 2.41, "eval_arxiv_n_ngrams_match_3": 0.502, "eval_arxiv_num_pred_words": 39.048, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 37.75181896287888, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3252440222441543, "eval_arxiv_runtime": 9.9786, "eval_arxiv_samples_per_second": 50.107, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.3204262909242834, "eval_arxiv_token_set_f1_sem": 0.004326924854411187, "eval_arxiv_token_set_precision": 0.2664551550336275, "eval_arxiv_token_set_recall": 0.42331837328961114, "eval_arxiv_true_num_tokens": 64.0, "step": 50625 }, { "epoch": 9.72, "eval_python_code_alpaca_accuracy": 0.1475, "eval_python_code_alpaca_bleu_score": 3.7156771148972076, "eval_python_code_alpaca_bleu_score_sem": 0.11075926433811897, "eval_python_code_alpaca_emb_cos_sim": 0.7210425138473511, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009259779180221332, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.127523899078369, "eval_python_code_alpaca_n_ngrams_match_1": 8.692, "eval_python_code_alpaca_n_ngrams_match_2": 2.25, "eval_python_code_alpaca_n_ngrams_match_3": 0.618, "eval_python_code_alpaca_num_pred_words": 42.658, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.81741132382553, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2960598726055982, "eval_python_code_alpaca_runtime": 11.2128, "eval_python_code_alpaca_samples_per_second": 44.592, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.4352501196633042, "eval_python_code_alpaca_token_set_f1_sem": 0.00573300760460424, "eval_python_code_alpaca_token_set_precision": 0.47193294741093567, "eval_python_code_alpaca_token_set_recall": 0.42933250784160426, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 50625 }, { "epoch": 9.72, "eval_wikibio_accuracy": 0.30403125, "eval_wikibio_bleu_score": 5.09087360581446, "eval_wikibio_bleu_score_sem": 0.17910898241923462, "eval_wikibio_emb_cos_sim": 0.6993159651756287, "eval_wikibio_emb_cos_sim_sem": 0.013285735053490507, "eval_wikibio_emb_top1_equal": 0.1328125, "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.0744476318359375, "eval_wikibio_n_ngrams_match_1": 9.562, "eval_wikibio_n_ngrams_match_2": 3.03, "eval_wikibio_n_ngrams_match_3": 0.972, "eval_wikibio_num_pred_words": 36.228, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 58.81798243273018, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3304839476884093, "eval_wikibio_runtime": 10.36, "eval_wikibio_samples_per_second": 48.263, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.2993221571546114, "eval_wikibio_token_set_f1_sem": 0.00557430686192778, "eval_wikibio_token_set_precision": 0.3083853823966142, "eval_wikibio_token_set_recall": 0.30611439348632996, "eval_wikibio_true_num_tokens": 61.1328125, "step": 50625 }, { "epoch": 9.72, "eval_nq_accuracy": 0.50015625, "eval_nq_bleu_score": 10.113071648166923, "eval_nq_bleu_score_sem": 0.4386009372837779, "eval_nq_emb_cos_sim": 0.8107307553291321, "eval_nq_emb_cos_sim_sem": 0.007513519342581586, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3781211376190186, "eval_nq_n_ngrams_match_1": 21.788, "eval_nq_n_ngrams_match_2": 7.482, "eval_nq_n_ngrams_match_3": 3.31, "eval_nq_num_pred_words": 48.79, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.784620997794168, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4199325728624945, "eval_nq_runtime": 10.4835, "eval_nq_samples_per_second": 47.694, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.436172696277002, "eval_nq_token_set_f1_sem": 0.00493042015432282, "eval_nq_token_set_precision": 0.38975861990561755, "eval_nq_token_set_recall": 0.5055145741123724, "eval_nq_true_num_tokens": 64.0, "step": 50625 }, { "epoch": 9.72, "learning_rate": 0.001, "loss": 2.7509, "step": 50628 }, { "epoch": 9.72, "learning_rate": 0.001, "loss": 2.7581, "step": 50640 }, { "epoch": 9.73, "learning_rate": 0.001, "loss": 2.7493, "step": 50652 }, { "epoch": 9.73, "learning_rate": 0.001, "loss": 2.7315, "step": 50664 }, { "epoch": 9.73, "learning_rate": 0.001, "loss": 2.7589, "step": 50676 }, { "epoch": 9.73, "learning_rate": 0.001, "loss": 2.7444, "step": 50688 }, { "epoch": 9.74, "learning_rate": 0.001, "loss": 2.7568, "step": 50700 }, { "epoch": 9.74, "learning_rate": 0.001, "loss": 2.747, "step": 50712 }, { "epoch": 9.74, "learning_rate": 0.001, "loss": 2.7505, "step": 50724 }, { "epoch": 9.74, "learning_rate": 0.001, "loss": 2.7448, "step": 50736 }, { "epoch": 9.74, "learning_rate": 0.001, "loss": 2.7422, "step": 50748 }, { "epoch": 9.75, "learning_rate": 0.001, "loss": 2.7449, "step": 50760 }, { "epoch": 9.75, "learning_rate": 0.001, "loss": 2.7501, "step": 50772 }, { "epoch": 9.75, "learning_rate": 0.001, "loss": 2.7481, "step": 50784 }, { "epoch": 9.75, "learning_rate": 0.001, "loss": 2.7487, "step": 50796 }, { "epoch": 9.76, "learning_rate": 0.001, "loss": 2.7426, "step": 50808 }, { "epoch": 9.76, "learning_rate": 0.001, "loss": 2.7384, "step": 50820 }, { "epoch": 9.76, "learning_rate": 0.001, "loss": 2.7328, "step": 50832 }, { "epoch": 9.76, "learning_rate": 0.001, "loss": 2.7513, "step": 50844 }, { "epoch": 9.76, "learning_rate": 0.001, "loss": 2.7395, "step": 50856 }, { "epoch": 9.77, "learning_rate": 0.001, "loss": 2.7481, "step": 50868 }, { "epoch": 9.77, "learning_rate": 0.001, "loss": 2.7432, "step": 50880 }, { "epoch": 9.77, "learning_rate": 0.001, "loss": 2.7414, "step": 50892 }, { "epoch": 9.77, "learning_rate": 0.001, "loss": 2.7488, "step": 50904 }, { "epoch": 9.78, "learning_rate": 0.001, "loss": 2.7486, "step": 50916 }, { "epoch": 9.78, "learning_rate": 0.001, "loss": 2.7469, "step": 50928 }, { "epoch": 9.78, "learning_rate": 0.001, "loss": 2.7547, "step": 50940 }, { "epoch": 9.78, "learning_rate": 0.001, "loss": 2.7426, "step": 50952 }, { "epoch": 9.79, "learning_rate": 0.001, "loss": 2.7471, "step": 50964 }, { "epoch": 9.79, "learning_rate": 0.001, "loss": 2.7456, "step": 50976 }, { "epoch": 9.79, "learning_rate": 0.001, "loss": 2.7451, "step": 50988 }, { "epoch": 9.79, "learning_rate": 0.001, "loss": 2.7454, "step": 51000 }, { "epoch": 9.79, "learning_rate": 0.001, "loss": 2.7453, "step": 51012 }, { "epoch": 9.8, "learning_rate": 0.001, "loss": 2.7555, "step": 51024 }, { "epoch": 9.8, "learning_rate": 0.001, "loss": 2.7391, "step": 51036 }, { "epoch": 9.8, "learning_rate": 0.001, "loss": 2.7417, "step": 51048 }, { "epoch": 9.8, "learning_rate": 0.001, "loss": 2.7468, "step": 51060 }, { "epoch": 9.81, "learning_rate": 0.001, "loss": 2.7449, "step": 51072 }, { "epoch": 9.81, "learning_rate": 0.001, "loss": 2.744, "step": 51084 }, { "epoch": 9.81, "learning_rate": 0.001, "loss": 2.749, "step": 51096 }, { "epoch": 9.81, "learning_rate": 0.001, "loss": 2.741, "step": 51108 }, { "epoch": 9.82, "learning_rate": 0.001, "loss": 2.7452, "step": 51120 }, { "epoch": 9.82, "learning_rate": 0.001, "loss": 2.7428, "step": 51132 }, { "epoch": 9.82, "learning_rate": 0.001, "loss": 2.7408, "step": 51144 }, { "epoch": 9.82, "learning_rate": 0.001, "loss": 2.7421, "step": 51156 }, { "epoch": 9.82, "learning_rate": 0.001, "loss": 2.7405, "step": 51168 }, { "epoch": 9.83, "learning_rate": 0.001, "loss": 2.7512, "step": 51180 }, { "epoch": 9.83, "learning_rate": 0.001, "loss": 2.7448, "step": 51192 }, { "epoch": 9.83, "learning_rate": 0.001, "loss": 2.7458, "step": 51204 }, { "epoch": 9.83, "learning_rate": 0.001, "loss": 2.7465, "step": 51216 }, { "epoch": 9.84, "learning_rate": 0.001, "loss": 2.7432, "step": 51228 }, { "epoch": 9.84, "learning_rate": 0.001, "loss": 2.7322, "step": 51240 }, { "epoch": 9.84, "eval_ag_news_accuracy": 0.30459375, "eval_ag_news_bleu_score": 4.3222048084947815, "eval_ag_news_bleu_score_sem": 0.14534535334293044, "eval_ag_news_emb_cos_sim": 0.7752635478973389, "eval_ag_news_emb_cos_sim_sem": 0.00852406576824869, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.773444890975952, "eval_ag_news_n_ngrams_match_1": 12.952, "eval_ag_news_n_ngrams_match_2": 2.626, "eval_ag_news_n_ngrams_match_3": 0.744, "eval_ag_news_num_pred_words": 46.588, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 43.52976212712324, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3192163543966634, "eval_ag_news_runtime": 11.1317, "eval_ag_news_samples_per_second": 44.917, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.32389958076784425, "eval_ag_news_token_set_f1_sem": 0.004484999262216496, "eval_ag_news_token_set_precision": 0.30534438193470376, "eval_ag_news_token_set_recall": 0.3600787088344293, "eval_ag_news_true_num_tokens": 56.09375, "step": 51250 }, { "epoch": 9.84, "eval_anthropic_toxic_prompts_accuracy": 0.10434375, "eval_anthropic_toxic_prompts_bleu_score": 2.6621837360314653, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1079963812776486, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6306837797164917, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010964805570582754, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4620285034179688, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.432, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.528, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.522, "eval_anthropic_toxic_prompts_num_pred_words": 47.518, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.88158286522493, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.18792841198615146, "eval_anthropic_toxic_prompts_runtime": 10.985, "eval_anthropic_toxic_prompts_samples_per_second": 45.517, "eval_anthropic_toxic_prompts_steps_per_second": 0.091, "eval_anthropic_toxic_prompts_token_set_f1": 0.3277939466939331, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064747906918940674, "eval_anthropic_toxic_prompts_token_set_precision": 0.38528915315719225, "eval_anthropic_toxic_prompts_token_set_recall": 0.32018547174095824, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 51250 }, { "epoch": 9.84, "eval_arxiv_accuracy": 0.32771875, "eval_arxiv_bleu_score": 3.912059097140368, "eval_arxiv_bleu_score_sem": 0.11646477476420701, "eval_arxiv_emb_cos_sim": 0.7122216820716858, "eval_arxiv_emb_cos_sim_sem": 0.007527273044630805, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6349117755889893, "eval_arxiv_n_ngrams_match_1": 13.87, "eval_arxiv_n_ngrams_match_2": 2.586, "eval_arxiv_n_ngrams_match_3": 0.558, "eval_arxiv_num_pred_words": 40.31, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 37.89850917599516, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.32614259780896493, "eval_arxiv_runtime": 11.6154, "eval_arxiv_samples_per_second": 43.046, "eval_arxiv_steps_per_second": 0.086, "eval_arxiv_token_set_f1": 0.32419261198933813, "eval_arxiv_token_set_f1_sem": 0.003989121926248758, "eval_arxiv_token_set_precision": 0.2711646476675902, "eval_arxiv_token_set_recall": 0.42298350778555, "eval_arxiv_true_num_tokens": 64.0, "step": 51250 }, { "epoch": 9.84, "eval_python_code_alpaca_accuracy": 0.14734375, "eval_python_code_alpaca_bleu_score": 3.7333977969059293, "eval_python_code_alpaca_bleu_score_sem": 0.11868449068358425, "eval_python_code_alpaca_emb_cos_sim": 0.7127091884613037, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009569808763689435, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.1019539833068848, "eval_python_code_alpaca_n_ngrams_match_1": 8.934, "eval_python_code_alpaca_n_ngrams_match_2": 2.362, "eval_python_code_alpaca_n_ngrams_match_3": 0.648, "eval_python_code_alpaca_num_pred_words": 43.612, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.24136811175948, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29718029935615586, "eval_python_code_alpaca_runtime": 10.7989, "eval_python_code_alpaca_samples_per_second": 46.301, "eval_python_code_alpaca_steps_per_second": 0.093, "eval_python_code_alpaca_token_set_f1": 0.44729144453998904, "eval_python_code_alpaca_token_set_f1_sem": 0.00574181211629621, "eval_python_code_alpaca_token_set_precision": 0.48234530927173214, "eval_python_code_alpaca_token_set_recall": 0.4456196199353644, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 51250 }, { "epoch": 9.84, "eval_wikibio_accuracy": 0.298, "eval_wikibio_bleu_score": 5.330900691913495, "eval_wikibio_bleu_score_sem": 0.17906310250588, "eval_wikibio_emb_cos_sim": 0.7194701433181763, "eval_wikibio_emb_cos_sim_sem": 0.011347857203274013, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.049874305725098, "eval_wikibio_n_ngrams_match_1": 9.856, "eval_wikibio_n_ngrams_match_2": 3.172, "eval_wikibio_n_ngrams_match_3": 1.062, "eval_wikibio_num_pred_words": 37.386, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 57.390242967095304, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33583352303407343, "eval_wikibio_runtime": 10.9786, "eval_wikibio_samples_per_second": 45.543, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.30827291045695915, "eval_wikibio_token_set_f1_sem": 0.005191835126986878, "eval_wikibio_token_set_precision": 0.31814739759521976, "eval_wikibio_token_set_recall": 0.3125368036028271, "eval_wikibio_true_num_tokens": 61.1328125, "step": 51250 }, { "epoch": 9.84, "eval_nq_accuracy": 0.5, "eval_nq_bleu_score": 10.506865128841985, "eval_nq_bleu_score_sem": 0.43942487607256203, "eval_nq_emb_cos_sim": 0.8112521171569824, "eval_nq_emb_cos_sim_sem": 0.007949700370543369, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.374133825302124, "eval_nq_n_ngrams_match_1": 22.0, "eval_nq_n_ngrams_match_2": 7.784, "eval_nq_n_ngrams_match_3": 3.446, "eval_nq_num_pred_words": 49.368, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.741704962333953, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.41845153063581353, "eval_nq_runtime": 11.0452, "eval_nq_samples_per_second": 45.268, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4382135816677673, "eval_nq_token_set_f1_sem": 0.005028347832057513, "eval_nq_token_set_precision": 0.3926721666486054, "eval_nq_token_set_recall": 0.5058197006359141, "eval_nq_true_num_tokens": 64.0, "step": 51250 }, { "epoch": 9.84, "learning_rate": 0.001, "loss": 2.7445, "step": 51252 }, { "epoch": 9.84, "learning_rate": 0.001, "loss": 2.7517, "step": 51264 }, { "epoch": 9.85, "learning_rate": 0.001, "loss": 2.7501, "step": 51276 }, { "epoch": 9.85, "learning_rate": 0.001, "loss": 2.7602, "step": 51288 }, { "epoch": 9.85, "learning_rate": 0.001, "loss": 2.7477, "step": 51300 }, { "epoch": 9.85, "learning_rate": 0.001, "loss": 2.7403, "step": 51312 }, { "epoch": 9.85, "learning_rate": 0.001, "loss": 2.753, "step": 51324 }, { "epoch": 9.86, "learning_rate": 0.001, "loss": 2.751, "step": 51336 }, { "epoch": 9.86, "learning_rate": 0.001, "loss": 2.7526, "step": 51348 }, { "epoch": 9.86, "learning_rate": 0.001, "loss": 2.7449, "step": 51360 }, { "epoch": 9.86, "learning_rate": 0.001, "loss": 2.7452, "step": 51372 }, { "epoch": 9.87, "learning_rate": 0.001, "loss": 2.7451, "step": 51384 }, { "epoch": 9.87, "learning_rate": 0.001, "loss": 2.7486, "step": 51396 }, { "epoch": 9.87, "learning_rate": 0.001, "loss": 2.7473, "step": 51408 }, { "epoch": 9.87, "learning_rate": 0.001, "loss": 2.7471, "step": 51420 }, { "epoch": 9.88, "learning_rate": 0.001, "loss": 2.745, "step": 51432 }, { "epoch": 9.88, "learning_rate": 0.001, "loss": 2.7584, "step": 51444 }, { "epoch": 9.88, "learning_rate": 0.001, "loss": 2.7455, "step": 51456 }, { "epoch": 9.88, "learning_rate": 0.001, "loss": 2.756, "step": 51468 }, { "epoch": 9.88, "learning_rate": 0.001, "loss": 2.7517, "step": 51480 }, { "epoch": 9.89, "learning_rate": 0.001, "loss": 2.7384, "step": 51492 }, { "epoch": 9.89, "learning_rate": 0.001, "loss": 2.7426, "step": 51504 }, { "epoch": 9.89, "learning_rate": 0.001, "loss": 2.7402, "step": 51516 }, { "epoch": 9.89, "learning_rate": 0.001, "loss": 2.7514, "step": 51528 }, { "epoch": 9.9, "learning_rate": 0.001, "loss": 2.7391, "step": 51540 }, { "epoch": 9.9, "learning_rate": 0.001, "loss": 2.7452, "step": 51552 }, { "epoch": 9.9, "learning_rate": 0.001, "loss": 2.7423, "step": 51564 }, { "epoch": 9.9, "learning_rate": 0.001, "loss": 2.7506, "step": 51576 }, { "epoch": 9.91, "learning_rate": 0.001, "loss": 2.7407, "step": 51588 }, { "epoch": 9.91, "learning_rate": 0.001, "loss": 2.7451, "step": 51600 }, { "epoch": 9.91, "learning_rate": 0.001, "loss": 2.7469, "step": 51612 }, { "epoch": 9.91, "learning_rate": 0.001, "loss": 2.7408, "step": 51624 }, { "epoch": 9.91, "learning_rate": 0.001, "loss": 2.7512, "step": 51636 }, { "epoch": 9.92, "learning_rate": 0.001, "loss": 2.7501, "step": 51648 }, { "epoch": 9.92, "learning_rate": 0.001, "loss": 2.7529, "step": 51660 }, { "epoch": 9.92, "learning_rate": 0.001, "loss": 2.7455, "step": 51672 }, { "epoch": 9.92, "learning_rate": 0.001, "loss": 2.7414, "step": 51684 }, { "epoch": 9.93, "learning_rate": 0.001, "loss": 2.748, "step": 51696 }, { "epoch": 9.93, "learning_rate": 0.001, "loss": 2.7483, "step": 51708 }, { "epoch": 9.93, "learning_rate": 0.001, "loss": 2.7284, "step": 51720 }, { "epoch": 9.93, "learning_rate": 0.001, "loss": 2.7468, "step": 51732 }, { "epoch": 9.94, "learning_rate": 0.001, "loss": 2.7437, "step": 51744 }, { "epoch": 9.94, "learning_rate": 0.001, "loss": 2.7439, "step": 51756 }, { "epoch": 9.94, "learning_rate": 0.001, "loss": 2.7327, "step": 51768 }, { "epoch": 9.94, "learning_rate": 0.001, "loss": 2.7537, "step": 51780 }, { "epoch": 9.94, "learning_rate": 0.001, "loss": 2.7393, "step": 51792 }, { "epoch": 9.95, "learning_rate": 0.001, "loss": 2.7467, "step": 51804 }, { "epoch": 9.95, "learning_rate": 0.001, "loss": 2.7483, "step": 51816 }, { "epoch": 9.95, "learning_rate": 0.001, "loss": 2.7492, "step": 51828 }, { "epoch": 9.95, "learning_rate": 0.001, "loss": 2.7394, "step": 51840 }, { "epoch": 9.96, "learning_rate": 0.001, "loss": 2.7398, "step": 51852 }, { "epoch": 9.96, "learning_rate": 0.001, "loss": 2.7456, "step": 51864 }, { "epoch": 9.96, "eval_ag_news_accuracy": 0.30215625, "eval_ag_news_bleu_score": 4.271787666162469, "eval_ag_news_bleu_score_sem": 0.14182126944335566, "eval_ag_news_emb_cos_sim": 0.7758315801620483, "eval_ag_news_emb_cos_sim_sem": 0.008193741818678081, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7651760578155518, "eval_ag_news_n_ngrams_match_1": 12.88, "eval_ag_news_n_ngrams_match_2": 2.596, "eval_ag_news_n_ngrams_match_3": 0.706, "eval_ag_news_num_pred_words": 45.916, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 43.17130583662284, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.317430982857297, "eval_ag_news_runtime": 13.6479, "eval_ag_news_samples_per_second": 36.636, "eval_ag_news_steps_per_second": 0.073, "eval_ag_news_token_set_f1": 0.32270702357586584, "eval_ag_news_token_set_f1_sem": 0.004427138295392929, "eval_ag_news_token_set_precision": 0.30369645832459413, "eval_ag_news_token_set_recall": 0.36000126454866765, "eval_ag_news_true_num_tokens": 56.09375, "step": 51875 }, { "epoch": 9.96, "eval_anthropic_toxic_prompts_accuracy": 0.10621875, "eval_anthropic_toxic_prompts_bleu_score": 2.7063884695025613, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10063318095502631, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6409755945205688, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01030565520165269, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1640625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.03286167651298939, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4423282146453857, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.574, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.618, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.56, "eval_anthropic_toxic_prompts_num_pred_words": 47.81, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.2596526924526, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19119575819917942, "eval_anthropic_toxic_prompts_runtime": 10.1386, "eval_anthropic_toxic_prompts_samples_per_second": 49.317, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.33219675504993484, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006387573394778598, "eval_anthropic_toxic_prompts_token_set_precision": 0.39673599179717645, "eval_anthropic_toxic_prompts_token_set_recall": 0.3154570124504101, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 51875 }, { "epoch": 9.96, "eval_arxiv_accuracy": 0.325625, "eval_arxiv_bleu_score": 3.6680001865650946, "eval_arxiv_bleu_score_sem": 0.10656478426341325, "eval_arxiv_emb_cos_sim": 0.7077811360359192, "eval_arxiv_emb_cos_sim_sem": 0.00885183588221734, "eval_arxiv_emb_top1_equal": 0.21875, "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.630890130996704, "eval_arxiv_n_ngrams_match_1": 13.38, "eval_arxiv_n_ngrams_match_2": 2.44, "eval_arxiv_n_ngrams_match_3": 0.47, "eval_arxiv_num_pred_words": 38.496, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 37.7464009092179, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.32117969694031323, "eval_arxiv_runtime": 10.7734, "eval_arxiv_samples_per_second": 46.41, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.31729348974676336, "eval_arxiv_token_set_f1_sem": 0.00419676339136666, "eval_arxiv_token_set_precision": 0.2613812140416253, "eval_arxiv_token_set_recall": 0.42835992297909065, "eval_arxiv_true_num_tokens": 64.0, "step": 51875 }, { "epoch": 9.96, "eval_python_code_alpaca_accuracy": 0.146375, "eval_python_code_alpaca_bleu_score": 3.7233119554006526, "eval_python_code_alpaca_bleu_score_sem": 0.12112002098702178, "eval_python_code_alpaca_emb_cos_sim": 0.7056361436843872, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010300964389122329, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.119572639465332, "eval_python_code_alpaca_n_ngrams_match_1": 8.638, "eval_python_code_alpaca_n_ngrams_match_2": 2.206, "eval_python_code_alpaca_n_ngrams_match_3": 0.636, "eval_python_code_alpaca_num_pred_words": 43.086, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.636703542001946, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29043748771936967, "eval_python_code_alpaca_runtime": 10.0391, "eval_python_code_alpaca_samples_per_second": 49.805, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4345619470500771, "eval_python_code_alpaca_token_set_f1_sem": 0.005317728832814514, "eval_python_code_alpaca_token_set_precision": 0.46534873501829915, "eval_python_code_alpaca_token_set_recall": 0.4354722309610675, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 51875 }, { "epoch": 9.96, "eval_wikibio_accuracy": 0.3028125, "eval_wikibio_bleu_score": 5.088166038314854, "eval_wikibio_bleu_score_sem": 0.1935509374253117, "eval_wikibio_emb_cos_sim": 0.7037593126296997, "eval_wikibio_emb_cos_sim_sem": 0.01229855707968917, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.015735149383545, "eval_wikibio_n_ngrams_match_1": 9.208, "eval_wikibio_n_ngrams_match_2": 2.918, "eval_wikibio_n_ngrams_match_3": 1.006, "eval_wikibio_num_pred_words": 35.48, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 55.46405478422453, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3185929641889421, "eval_wikibio_runtime": 10.2873, "eval_wikibio_samples_per_second": 48.604, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.2955220152254472, "eval_wikibio_token_set_f1_sem": 0.0056635203579463965, "eval_wikibio_token_set_precision": 0.2983115466329409, "eval_wikibio_token_set_recall": 0.31063656795951516, "eval_wikibio_true_num_tokens": 61.1328125, "step": 51875 }, { "epoch": 9.96, "eval_nq_accuracy": 0.50109375, "eval_nq_bleu_score": 10.260100703238406, "eval_nq_bleu_score_sem": 0.4403658884810294, "eval_nq_emb_cos_sim": 0.8059619665145874, "eval_nq_emb_cos_sim_sem": 0.00778136849176889, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3714003562927246, "eval_nq_n_ngrams_match_1": 21.762, "eval_nq_n_ngrams_match_2": 7.568, "eval_nq_n_ngrams_match_3": 3.362, "eval_nq_num_pred_words": 48.766, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.712382938390657, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.41838994266378327, "eval_nq_runtime": 10.3401, "eval_nq_samples_per_second": 48.356, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4369980999074413, "eval_nq_token_set_f1_sem": 0.00503054054533651, "eval_nq_token_set_precision": 0.38943443456707744, "eval_nq_token_set_recall": 0.5073206099411495, "eval_nq_true_num_tokens": 64.0, "step": 51875 }, { "epoch": 9.96, "learning_rate": 0.001, "loss": 2.7398, "step": 51876 }, { "epoch": 9.96, "learning_rate": 0.001, "loss": 2.7386, "step": 51888 }, { "epoch": 9.97, "learning_rate": 0.001, "loss": 2.7403, "step": 51900 }, { "epoch": 9.97, "learning_rate": 0.001, "loss": 2.7472, "step": 51912 }, { "epoch": 9.97, "learning_rate": 0.001, "loss": 2.7395, "step": 51924 }, { "epoch": 9.97, "learning_rate": 0.001, "loss": 2.7367, "step": 51936 }, { "epoch": 9.97, "learning_rate": 0.001, "loss": 2.7407, "step": 51948 }, { "epoch": 9.98, "learning_rate": 0.001, "loss": 2.7453, "step": 51960 }, { "epoch": 9.98, "learning_rate": 0.001, "loss": 2.7436, "step": 51972 }, { "epoch": 9.98, "learning_rate": 0.001, "loss": 2.7393, "step": 51984 }, { "epoch": 9.98, "learning_rate": 0.001, "loss": 2.7339, "step": 51996 }, { "epoch": 9.99, "learning_rate": 0.001, "loss": 2.743, "step": 52008 }, { "epoch": 9.99, "learning_rate": 0.001, "loss": 2.7382, "step": 52020 }, { "epoch": 9.99, "learning_rate": 0.001, "loss": 2.7365, "step": 52032 }, { "epoch": 9.99, "learning_rate": 0.001, "loss": 2.7432, "step": 52044 }, { "epoch": 10.0, "learning_rate": 0.001, "loss": 2.7373, "step": 52056 }, { "epoch": 10.0, "learning_rate": 0.001, "loss": 2.7395, "step": 52068 }, { "epoch": 10.0, "learning_rate": 0.001, "loss": 2.7453, "step": 52080 }, { "epoch": 10.0, "learning_rate": 0.001, "loss": 2.7269, "step": 52092 }, { "epoch": 10.0, "learning_rate": 0.001, "loss": 2.7214, "step": 52104 }, { "epoch": 10.01, "learning_rate": 0.001, "loss": 2.7286, "step": 52116 }, { "epoch": 10.01, "learning_rate": 0.001, "loss": 2.7339, "step": 52128 }, { "epoch": 10.01, "learning_rate": 0.001, "loss": 2.7257, "step": 52140 }, { "epoch": 10.01, "learning_rate": 0.001, "loss": 2.7264, "step": 52152 }, { "epoch": 10.02, "learning_rate": 0.001, "loss": 2.7313, "step": 52164 }, { "epoch": 10.02, "learning_rate": 0.001, "loss": 2.7306, "step": 52176 }, { "epoch": 10.02, "learning_rate": 0.001, "loss": 2.7184, "step": 52188 }, { "epoch": 10.02, "learning_rate": 0.001, "loss": 2.7267, "step": 52200 }, { "epoch": 10.03, "learning_rate": 0.001, "loss": 2.7126, "step": 52212 }, { "epoch": 10.03, "learning_rate": 0.001, "loss": 2.7269, "step": 52224 }, { "epoch": 10.03, "learning_rate": 0.001, "loss": 2.7272, "step": 52236 }, { "epoch": 10.03, "learning_rate": 0.001, "loss": 2.7314, "step": 52248 }, { "epoch": 10.03, "learning_rate": 0.001, "loss": 2.7298, "step": 52260 }, { "epoch": 10.04, "learning_rate": 0.001, "loss": 2.7256, "step": 52272 }, { "epoch": 10.04, "learning_rate": 0.001, "loss": 2.7284, "step": 52284 }, { "epoch": 10.04, "learning_rate": 0.001, "loss": 2.7299, "step": 52296 }, { "epoch": 10.04, "learning_rate": 0.001, "loss": 2.7293, "step": 52308 }, { "epoch": 10.05, "learning_rate": 0.001, "loss": 2.7133, "step": 52320 }, { "epoch": 10.05, "learning_rate": 0.001, "loss": 2.717, "step": 52332 }, { "epoch": 10.05, "learning_rate": 0.001, "loss": 2.7295, "step": 52344 }, { "epoch": 10.05, "learning_rate": 0.001, "loss": 2.7225, "step": 52356 }, { "epoch": 10.06, "learning_rate": 0.001, "loss": 2.726, "step": 52368 }, { "epoch": 10.06, "learning_rate": 0.001, "loss": 2.7339, "step": 52380 }, { "epoch": 10.06, "learning_rate": 0.001, "loss": 2.7193, "step": 52392 }, { "epoch": 10.06, "learning_rate": 0.001, "loss": 2.7301, "step": 52404 }, { "epoch": 10.06, "learning_rate": 0.001, "loss": 2.7269, "step": 52416 }, { "epoch": 10.07, "learning_rate": 0.001, "loss": 2.7165, "step": 52428 }, { "epoch": 10.07, "learning_rate": 0.001, "loss": 2.7266, "step": 52440 }, { "epoch": 10.07, "learning_rate": 0.001, "loss": 2.7213, "step": 52452 }, { "epoch": 10.07, "learning_rate": 0.001, "loss": 2.7249, "step": 52464 }, { "epoch": 10.08, "learning_rate": 0.001, "loss": 2.7216, "step": 52476 }, { "epoch": 10.08, "learning_rate": 0.001, "loss": 2.7294, "step": 52488 }, { "epoch": 10.08, "learning_rate": 0.001, "loss": 2.7208, "step": 52500 }, { "epoch": 10.08, "eval_ag_news_accuracy": 0.30303125, "eval_ag_news_bleu_score": 4.228255478546149, "eval_ag_news_bleu_score_sem": 0.13346445109529087, "eval_ag_news_emb_cos_sim": 0.7734034657478333, "eval_ag_news_emb_cos_sim_sem": 0.00898802815482423, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7726082801818848, "eval_ag_news_n_ngrams_match_1": 12.894, "eval_ag_news_n_ngrams_match_2": 2.572, "eval_ag_news_n_ngrams_match_3": 0.69, "eval_ag_news_num_pred_words": 45.734, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 43.493359887640956, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3178587687880139, "eval_ag_news_runtime": 10.7644, "eval_ag_news_samples_per_second": 46.449, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3262322760940814, "eval_ag_news_token_set_f1_sem": 0.004454255222716337, "eval_ag_news_token_set_precision": 0.3038468689901446, "eval_ag_news_token_set_recall": 0.3698536799096918, "eval_ag_news_true_num_tokens": 56.09375, "step": 52500 }, { "epoch": 10.08, "eval_anthropic_toxic_prompts_accuracy": 0.106125, "eval_anthropic_toxic_prompts_bleu_score": 2.7343252729613194, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11746437774251844, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6511576771736145, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009511287205987552, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.440284490585327, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.55, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.586, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.538, "eval_anthropic_toxic_prompts_num_pred_words": 47.634, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.19583182646959, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.190566117399866, "eval_anthropic_toxic_prompts_runtime": 10.2776, "eval_anthropic_toxic_prompts_samples_per_second": 48.65, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.33482702497117933, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006264567064827086, "eval_anthropic_toxic_prompts_token_set_precision": 0.3930853406408358, "eval_anthropic_toxic_prompts_token_set_recall": 0.3228208386996512, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 52500 }, { "epoch": 10.08, "eval_arxiv_accuracy": 0.32690625, "eval_arxiv_bleu_score": 3.9603489245072288, "eval_arxiv_bleu_score_sem": 0.11415442334670534, "eval_arxiv_emb_cos_sim": 0.7231601476669312, "eval_arxiv_emb_cos_sim_sem": 0.007601504181570617, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6353352069854736, "eval_arxiv_n_ngrams_match_1": 14.048, "eval_arxiv_n_ngrams_match_2": 2.61, "eval_arxiv_n_ngrams_match_3": 0.566, "eval_arxiv_num_pred_words": 40.07, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 37.91455999263026, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33180693633247804, "eval_arxiv_runtime": 10.5025, "eval_arxiv_samples_per_second": 47.608, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3277091009883879, "eval_arxiv_token_set_f1_sem": 0.003975888031076143, "eval_arxiv_token_set_precision": 0.2749666758619215, "eval_arxiv_token_set_recall": 0.4250449361622123, "eval_arxiv_true_num_tokens": 64.0, "step": 52500 }, { "epoch": 10.08, "eval_python_code_alpaca_accuracy": 0.1483125, "eval_python_code_alpaca_bleu_score": 3.781720619656825, "eval_python_code_alpaca_bleu_score_sem": 0.11532019879356221, "eval_python_code_alpaca_emb_cos_sim": 0.7247380018234253, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008669058829660261, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0973434448242188, "eval_python_code_alpaca_n_ngrams_match_1": 8.96, "eval_python_code_alpaca_n_ngrams_match_2": 2.308, "eval_python_code_alpaca_n_ngrams_match_3": 0.67, "eval_python_code_alpaca_num_pred_words": 44.014, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.139059458396538, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2968480787843321, "eval_python_code_alpaca_runtime": 11.1174, "eval_python_code_alpaca_samples_per_second": 44.974, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.44470050077138495, "eval_python_code_alpaca_token_set_f1_sem": 0.005326313761115086, "eval_python_code_alpaca_token_set_precision": 0.48224593242693986, "eval_python_code_alpaca_token_set_recall": 0.43425996236665465, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 52500 }, { "epoch": 10.08, "eval_wikibio_accuracy": 0.30234375, "eval_wikibio_bleu_score": 5.503111313402614, "eval_wikibio_bleu_score_sem": 0.2092912315387504, "eval_wikibio_emb_cos_sim": 0.7226361632347107, "eval_wikibio_emb_cos_sim_sem": 0.009754967771731888, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.101844787597656, "eval_wikibio_n_ngrams_match_1": 9.926, "eval_wikibio_n_ngrams_match_2": 3.202, "eval_wikibio_n_ngrams_match_3": 1.12, "eval_wikibio_num_pred_words": 36.796, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 60.45170535100718, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3379599294616351, "eval_wikibio_runtime": 10.9875, "eval_wikibio_samples_per_second": 45.506, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.311808223722134, "eval_wikibio_token_set_f1_sem": 0.005279325026010822, "eval_wikibio_token_set_precision": 0.3206006788993924, "eval_wikibio_token_set_recall": 0.31908003551725816, "eval_wikibio_true_num_tokens": 61.1328125, "step": 52500 }, { "epoch": 10.08, "eval_nq_accuracy": 0.50184375, "eval_nq_bleu_score": 10.447551941160691, "eval_nq_bleu_score_sem": 0.45678269801432014, "eval_nq_emb_cos_sim": 0.8081899881362915, "eval_nq_emb_cos_sim_sem": 0.00814857751923161, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.367133855819702, "eval_nq_n_ngrams_match_1": 21.864, "eval_nq_n_ngrams_match_2": 7.624, "eval_nq_n_ngrams_match_3": 3.404, "eval_nq_num_pred_words": 49.164, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.666775911899004, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4188996555677107, "eval_nq_runtime": 10.5209, "eval_nq_samples_per_second": 47.525, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.441238484716742, "eval_nq_token_set_f1_sem": 0.004935353082230968, "eval_nq_token_set_precision": 0.3933015622393701, "eval_nq_token_set_recall": 0.5134531003887985, "eval_nq_true_num_tokens": 64.0, "step": 52500 }, { "epoch": 10.08, "learning_rate": 0.001, "loss": 2.7165, "step": 52512 }, { "epoch": 10.09, "learning_rate": 0.001, "loss": 2.7325, "step": 52524 }, { "epoch": 10.09, "learning_rate": 0.001, "loss": 2.7278, "step": 52536 }, { "epoch": 10.09, "learning_rate": 0.001, "loss": 2.7407, "step": 52548 }, { "epoch": 10.09, "learning_rate": 0.001, "loss": 2.7295, "step": 52560 }, { "epoch": 10.09, "learning_rate": 0.001, "loss": 2.7285, "step": 52572 }, { "epoch": 10.1, "learning_rate": 0.001, "loss": 2.733, "step": 52584 }, { "epoch": 10.1, "learning_rate": 0.001, "loss": 2.7146, "step": 52596 }, { "epoch": 10.1, "learning_rate": 0.001, "loss": 2.7211, "step": 52608 }, { "epoch": 10.1, "learning_rate": 0.001, "loss": 2.7314, "step": 52620 }, { "epoch": 10.11, "learning_rate": 0.001, "loss": 2.7417, "step": 52632 }, { "epoch": 10.11, "learning_rate": 0.001, "loss": 2.7356, "step": 52644 }, { "epoch": 10.11, "learning_rate": 0.001, "loss": 2.7328, "step": 52656 }, { "epoch": 10.11, "learning_rate": 0.001, "loss": 2.7277, "step": 52668 }, { "epoch": 10.12, "learning_rate": 0.001, "loss": 2.7206, "step": 52680 }, { "epoch": 10.12, "learning_rate": 0.001, "loss": 2.7138, "step": 52692 }, { "epoch": 10.12, "learning_rate": 0.001, "loss": 2.7233, "step": 52704 }, { "epoch": 10.12, "learning_rate": 0.001, "loss": 2.7224, "step": 52716 }, { "epoch": 10.12, "learning_rate": 0.001, "loss": 2.7177, "step": 52728 }, { "epoch": 10.13, "learning_rate": 0.001, "loss": 2.7225, "step": 52740 }, { "epoch": 10.13, "learning_rate": 0.001, "loss": 2.7316, "step": 52752 }, { "epoch": 10.13, "learning_rate": 0.001, "loss": 2.7254, "step": 52764 }, { "epoch": 10.13, "learning_rate": 0.001, "loss": 2.7318, "step": 52776 }, { "epoch": 10.14, "learning_rate": 0.001, "loss": 2.7228, "step": 52788 }, { "epoch": 10.14, "learning_rate": 0.001, "loss": 2.7278, "step": 52800 }, { "epoch": 10.14, "learning_rate": 0.001, "loss": 2.7185, "step": 52812 }, { "epoch": 10.14, "learning_rate": 0.001, "loss": 2.7318, "step": 52824 }, { "epoch": 10.15, "learning_rate": 0.001, "loss": 2.7412, "step": 52836 }, { "epoch": 10.15, "learning_rate": 0.001, "loss": 2.738, "step": 52848 }, { "epoch": 10.15, "learning_rate": 0.001, "loss": 2.7196, "step": 52860 }, { "epoch": 10.15, "learning_rate": 0.001, "loss": 2.7326, "step": 52872 }, { "epoch": 10.15, "learning_rate": 0.001, "loss": 2.7239, "step": 52884 }, { "epoch": 10.16, "learning_rate": 0.001, "loss": 2.7201, "step": 52896 }, { "epoch": 10.16, "learning_rate": 0.001, "loss": 2.717, "step": 52908 }, { "epoch": 10.16, "learning_rate": 0.001, "loss": 2.7335, "step": 52920 }, { "epoch": 10.16, "learning_rate": 0.001, "loss": 2.7274, "step": 52932 }, { "epoch": 10.17, "learning_rate": 0.001, "loss": 2.7241, "step": 52944 }, { "epoch": 10.17, "learning_rate": 0.001, "loss": 2.733, "step": 52956 }, { "epoch": 10.17, "learning_rate": 0.001, "loss": 2.7249, "step": 52968 }, { "epoch": 10.17, "learning_rate": 0.001, "loss": 2.726, "step": 52980 }, { "epoch": 10.18, "learning_rate": 0.001, "loss": 2.721, "step": 52992 }, { "epoch": 10.18, "learning_rate": 0.001, "loss": 2.7322, "step": 53004 }, { "epoch": 10.18, "learning_rate": 0.001, "loss": 2.726, "step": 53016 }, { "epoch": 10.18, "learning_rate": 0.001, "loss": 2.7226, "step": 53028 }, { "epoch": 10.18, "learning_rate": 0.001, "loss": 2.7308, "step": 53040 }, { "epoch": 10.19, "learning_rate": 0.001, "loss": 2.7189, "step": 53052 }, { "epoch": 10.19, "learning_rate": 0.001, "loss": 2.7168, "step": 53064 }, { "epoch": 10.19, "learning_rate": 0.001, "loss": 2.719, "step": 53076 }, { "epoch": 10.19, "learning_rate": 0.001, "loss": 2.7289, "step": 53088 }, { "epoch": 10.2, "learning_rate": 0.001, "loss": 2.7239, "step": 53100 }, { "epoch": 10.2, "learning_rate": 0.001, "loss": 2.7317, "step": 53112 }, { "epoch": 10.2, "learning_rate": 0.001, "loss": 2.725, "step": 53124 }, { "epoch": 10.2, "eval_ag_news_accuracy": 0.303, "eval_ag_news_bleu_score": 4.32066133713104, "eval_ag_news_bleu_score_sem": 0.14242850889569467, "eval_ag_news_emb_cos_sim": 0.7658836245536804, "eval_ag_news_emb_cos_sim_sem": 0.009419168683121912, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7735326290130615, "eval_ag_news_n_ngrams_match_1": 13.0, "eval_ag_news_n_ngrams_match_2": 2.644, "eval_ag_news_n_ngrams_match_3": 0.678, "eval_ag_news_num_pred_words": 46.192, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 43.533581510558264, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3195773277143591, "eval_ag_news_runtime": 11.5613, "eval_ag_news_samples_per_second": 43.248, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.3280599121254573, "eval_ag_news_token_set_f1_sem": 0.00437620625410379, "eval_ag_news_token_set_precision": 0.30763405403202343, "eval_ag_news_token_set_recall": 0.368383430379265, "eval_ag_news_true_num_tokens": 56.09375, "step": 53125 }, { "epoch": 10.2, "eval_anthropic_toxic_prompts_accuracy": 0.10525, "eval_anthropic_toxic_prompts_bleu_score": 2.9140187609364174, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14611536976309447, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6332710981369019, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010925239557998644, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4764328002929688, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.718, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.642, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.562, "eval_anthropic_toxic_prompts_num_pred_words": 46.748, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 32.344138043014894, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19679542366114136, "eval_anthropic_toxic_prompts_runtime": 12.0322, "eval_anthropic_toxic_prompts_samples_per_second": 41.555, "eval_anthropic_toxic_prompts_steps_per_second": 0.083, "eval_anthropic_toxic_prompts_token_set_f1": 0.33705735741638665, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006431252472007019, "eval_anthropic_toxic_prompts_token_set_precision": 0.4005826486388677, "eval_anthropic_toxic_prompts_token_set_recall": 0.32101468312490955, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 53125 }, { "epoch": 10.2, "eval_arxiv_accuracy": 0.3265, "eval_arxiv_bleu_score": 3.846087266620141, "eval_arxiv_bleu_score_sem": 0.10120441909318453, "eval_arxiv_emb_cos_sim": 0.711337685585022, "eval_arxiv_emb_cos_sim_sem": 0.007915942061975158, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.626399040222168, "eval_arxiv_n_ngrams_match_1": 13.794, "eval_arxiv_n_ngrams_match_2": 2.516, "eval_arxiv_n_ngrams_match_3": 0.528, "eval_arxiv_num_pred_words": 40.156, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 37.57725849758345, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.326970941475356, "eval_arxiv_runtime": 10.9574, "eval_arxiv_samples_per_second": 45.631, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.3217325130845331, "eval_arxiv_token_set_f1_sem": 0.003980271053771427, "eval_arxiv_token_set_precision": 0.2674409880644995, "eval_arxiv_token_set_recall": 0.4259264503258127, "eval_arxiv_true_num_tokens": 64.0, "step": 53125 }, { "epoch": 10.2, "eval_python_code_alpaca_accuracy": 0.14871875, "eval_python_code_alpaca_bleu_score": 3.6781379074422604, "eval_python_code_alpaca_bleu_score_sem": 0.1146349535839436, "eval_python_code_alpaca_emb_cos_sim": 0.7062475681304932, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010283644668562384, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.112185001373291, "eval_python_code_alpaca_n_ngrams_match_1": 8.638, "eval_python_code_alpaca_n_ngrams_match_2": 2.234, "eval_python_code_alpaca_n_ngrams_match_3": 0.626, "eval_python_code_alpaca_num_pred_words": 42.462, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.47008797417813, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29047458800549775, "eval_python_code_alpaca_runtime": 10.1621, "eval_python_code_alpaca_samples_per_second": 49.203, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.442423761741765, "eval_python_code_alpaca_token_set_f1_sem": 0.0059070106684365135, "eval_python_code_alpaca_token_set_precision": 0.46515917016020897, "eval_python_code_alpaca_token_set_recall": 0.44947245285610093, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 53125 }, { "epoch": 10.2, "eval_wikibio_accuracy": 0.30303125, "eval_wikibio_bleu_score": 5.358139248751093, "eval_wikibio_bleu_score_sem": 0.19996863304455187, "eval_wikibio_emb_cos_sim": 0.7063294053077698, "eval_wikibio_emb_cos_sim_sem": 0.011659479147362421, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.05332612991333, "eval_wikibio_n_ngrams_match_1": 9.318, "eval_wikibio_n_ngrams_match_2": 3.08, "eval_wikibio_n_ngrams_match_3": 1.094, "eval_wikibio_num_pred_words": 35.7, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 57.58868629463752, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32126469204064945, "eval_wikibio_runtime": 9.9068, "eval_wikibio_samples_per_second": 50.47, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.29740553581817, "eval_wikibio_token_set_f1_sem": 0.005631365512427164, "eval_wikibio_token_set_precision": 0.30314015268978517, "eval_wikibio_token_set_recall": 0.3113002605458895, "eval_wikibio_true_num_tokens": 61.1328125, "step": 53125 }, { "epoch": 10.2, "eval_nq_accuracy": 0.50228125, "eval_nq_bleu_score": 10.442141514660827, "eval_nq_bleu_score_sem": 0.43914761813133124, "eval_nq_emb_cos_sim": 0.8095685243606567, "eval_nq_emb_cos_sim_sem": 0.008172592661804217, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3626022338867188, "eval_nq_n_ngrams_match_1": 21.91, "eval_nq_n_ngrams_match_2": 7.606, "eval_nq_n_ngrams_match_3": 3.422, "eval_nq_num_pred_words": 49.21, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.618547475276303, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42038613630426913, "eval_nq_runtime": 11.242, "eval_nq_samples_per_second": 44.476, "eval_nq_steps_per_second": 0.089, "eval_nq_token_set_f1": 0.439474187141627, "eval_nq_token_set_f1_sem": 0.004896226509865437, "eval_nq_token_set_precision": 0.39184903021724926, "eval_nq_token_set_recall": 0.5109066236302392, "eval_nq_true_num_tokens": 64.0, "step": 53125 }, { "epoch": 10.2, "learning_rate": 0.001, "loss": 2.733, "step": 53136 }, { "epoch": 10.21, "learning_rate": 0.001, "loss": 2.7309, "step": 53148 }, { "epoch": 10.21, "learning_rate": 0.001, "loss": 2.7305, "step": 53160 }, { "epoch": 10.21, "learning_rate": 0.001, "loss": 2.7289, "step": 53172 }, { "epoch": 10.21, "learning_rate": 0.001, "loss": 2.7369, "step": 53184 }, { "epoch": 10.21, "learning_rate": 0.001, "loss": 2.7297, "step": 53196 }, { "epoch": 10.22, "learning_rate": 0.001, "loss": 2.7275, "step": 53208 }, { "epoch": 10.22, "learning_rate": 0.001, "loss": 2.7331, "step": 53220 }, { "epoch": 10.22, "learning_rate": 0.001, "loss": 2.7264, "step": 53232 }, { "epoch": 10.22, "learning_rate": 0.001, "loss": 2.7259, "step": 53244 }, { "epoch": 10.23, "learning_rate": 0.001, "loss": 2.7298, "step": 53256 }, { "epoch": 10.23, "learning_rate": 0.001, "loss": 2.7327, "step": 53268 }, { "epoch": 10.23, "learning_rate": 0.001, "loss": 2.7277, "step": 53280 }, { "epoch": 10.23, "learning_rate": 0.001, "loss": 2.7331, "step": 53292 }, { "epoch": 10.24, "learning_rate": 0.001, "loss": 2.7362, "step": 53304 }, { "epoch": 10.24, "learning_rate": 0.001, "loss": 2.7162, "step": 53316 }, { "epoch": 10.24, "learning_rate": 0.001, "loss": 2.7294, "step": 53328 }, { "epoch": 10.24, "learning_rate": 0.001, "loss": 2.7228, "step": 53340 }, { "epoch": 10.24, "learning_rate": 0.001, "loss": 2.713, "step": 53352 }, { "epoch": 10.25, "learning_rate": 0.001, "loss": 2.721, "step": 53364 }, { "epoch": 10.25, "learning_rate": 0.001, "loss": 2.7177, "step": 53376 }, { "epoch": 10.25, "learning_rate": 0.001, "loss": 2.725, "step": 53388 }, { "epoch": 10.25, "learning_rate": 0.001, "loss": 2.7309, "step": 53400 }, { "epoch": 10.26, "learning_rate": 0.001, "loss": 2.727, "step": 53412 }, { "epoch": 10.26, "learning_rate": 0.001, "loss": 2.7242, "step": 53424 }, { "epoch": 10.26, "learning_rate": 0.001, "loss": 2.7319, "step": 53436 }, { "epoch": 10.26, "learning_rate": 0.001, "loss": 2.7211, "step": 53448 }, { "epoch": 10.26, "learning_rate": 0.001, "loss": 2.7268, "step": 53460 }, { "epoch": 10.27, "learning_rate": 0.001, "loss": 2.7193, "step": 53472 }, { "epoch": 10.27, "learning_rate": 0.001, "loss": 2.7338, "step": 53484 }, { "epoch": 10.27, "learning_rate": 0.001, "loss": 2.7304, "step": 53496 }, { "epoch": 10.27, "learning_rate": 0.001, "loss": 2.7238, "step": 53508 }, { "epoch": 10.28, "learning_rate": 0.001, "loss": 2.7245, "step": 53520 }, { "epoch": 10.28, "learning_rate": 0.001, "loss": 2.7304, "step": 53532 }, { "epoch": 10.28, "learning_rate": 0.001, "loss": 2.7279, "step": 53544 }, { "epoch": 10.28, "learning_rate": 0.001, "loss": 2.7422, "step": 53556 }, { "epoch": 10.29, "learning_rate": 0.001, "loss": 2.7271, "step": 53568 }, { "epoch": 10.29, "learning_rate": 0.001, "loss": 2.7232, "step": 53580 }, { "epoch": 10.29, "learning_rate": 0.001, "loss": 2.7283, "step": 53592 }, { "epoch": 10.29, "learning_rate": 0.001, "loss": 2.7264, "step": 53604 }, { "epoch": 10.29, "learning_rate": 0.001, "loss": 2.7294, "step": 53616 }, { "epoch": 10.3, "learning_rate": 0.001, "loss": 2.733, "step": 53628 }, { "epoch": 10.3, "learning_rate": 0.001, "loss": 2.737, "step": 53640 }, { "epoch": 10.3, "learning_rate": 0.001, "loss": 2.7252, "step": 53652 }, { "epoch": 10.3, "learning_rate": 0.001, "loss": 2.7191, "step": 53664 }, { "epoch": 10.31, "learning_rate": 0.001, "loss": 2.734, "step": 53676 }, { "epoch": 10.31, "learning_rate": 0.001, "loss": 2.7352, "step": 53688 }, { "epoch": 10.31, "learning_rate": 0.001, "loss": 2.7268, "step": 53700 }, { "epoch": 10.31, "learning_rate": 0.001, "loss": 2.7347, "step": 53712 }, { "epoch": 10.32, "learning_rate": 0.001, "loss": 2.7352, "step": 53724 }, { "epoch": 10.32, "learning_rate": 0.001, "loss": 2.7311, "step": 53736 }, { "epoch": 10.32, "learning_rate": 0.001, "loss": 2.7269, "step": 53748 }, { "epoch": 10.32, "eval_ag_news_accuracy": 0.30275, "eval_ag_news_bleu_score": 4.421029722095021, "eval_ag_news_bleu_score_sem": 0.14740592281223455, "eval_ag_news_emb_cos_sim": 0.7629153728485107, "eval_ag_news_emb_cos_sim_sem": 0.009400203918381027, "eval_ag_news_emb_top1_equal": 0.171875, "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7627270221710205, "eval_ag_news_n_ngrams_match_1": 12.954, "eval_ag_news_n_ngrams_match_2": 2.662, "eval_ag_news_n_ngrams_match_3": 0.742, "eval_ag_news_num_pred_words": 46.282, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 43.06570713008593, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.31879234611824636, "eval_ag_news_runtime": 10.8897, "eval_ag_news_samples_per_second": 45.915, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.32501083971866607, "eval_ag_news_token_set_f1_sem": 0.004351318499970537, "eval_ag_news_token_set_precision": 0.30575492319276987, "eval_ag_news_token_set_recall": 0.3632878636514925, "eval_ag_news_true_num_tokens": 56.09375, "step": 53750 }, { "epoch": 10.32, "eval_anthropic_toxic_prompts_accuracy": 0.10525, "eval_anthropic_toxic_prompts_bleu_score": 2.7905245158869563, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12349844543442566, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6437270641326904, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009195974119931902, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.449458122253418, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.514, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.586, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.514, "eval_anthropic_toxic_prompts_num_pred_words": 46.612, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.48332757106558, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19282892270001412, "eval_anthropic_toxic_prompts_runtime": 9.8181, "eval_anthropic_toxic_prompts_samples_per_second": 50.926, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.32824551154878184, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006253782341111984, "eval_anthropic_toxic_prompts_token_set_precision": 0.39086054907139633, "eval_anthropic_toxic_prompts_token_set_recall": 0.3122785192835911, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 53750 }, { "epoch": 10.32, "eval_arxiv_accuracy": 0.32775, "eval_arxiv_bleu_score": 3.889417166811716, "eval_arxiv_bleu_score_sem": 0.11068272603824485, "eval_arxiv_emb_cos_sim": 0.7147364616394043, "eval_arxiv_emb_cos_sim_sem": 0.0077809727065195925, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6153247356414795, "eval_arxiv_n_ngrams_match_1": 13.902, "eval_arxiv_n_ngrams_match_2": 2.578, "eval_arxiv_n_ngrams_match_3": 0.516, "eval_arxiv_num_pred_words": 41.028, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 37.16341225087441, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3262191461958489, "eval_arxiv_runtime": 14.3532, "eval_arxiv_samples_per_second": 34.836, "eval_arxiv_steps_per_second": 0.07, "eval_arxiv_token_set_f1": 0.3220345568752402, "eval_arxiv_token_set_f1_sem": 0.004156493584493791, "eval_arxiv_token_set_precision": 0.2710933886626981, "eval_arxiv_token_set_recall": 0.4130692034550771, "eval_arxiv_true_num_tokens": 64.0, "step": 53750 }, { "epoch": 10.32, "eval_python_code_alpaca_accuracy": 0.1473125, "eval_python_code_alpaca_bleu_score": 3.819787974852944, "eval_python_code_alpaca_bleu_score_sem": 0.11353915389351618, "eval_python_code_alpaca_emb_cos_sim": 0.7258760929107666, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00836633052915216, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.1069111824035645, "eval_python_code_alpaca_n_ngrams_match_1": 8.964, "eval_python_code_alpaca_n_ngrams_match_2": 2.438, "eval_python_code_alpaca_n_ngrams_match_3": 0.712, "eval_python_code_alpaca_num_pred_words": 45.472, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.351896731516554, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29383534490871266, "eval_python_code_alpaca_runtime": 10.9168, "eval_python_code_alpaca_samples_per_second": 45.801, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.449207088132276, "eval_python_code_alpaca_token_set_f1_sem": 0.0051724742569146664, "eval_python_code_alpaca_token_set_precision": 0.4855544528671801, "eval_python_code_alpaca_token_set_recall": 0.44060734647652167, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 53750 }, { "epoch": 10.32, "eval_wikibio_accuracy": 0.30290625, "eval_wikibio_bleu_score": 5.383839361078247, "eval_wikibio_bleu_score_sem": 0.17734073960355903, "eval_wikibio_emb_cos_sim": 0.7250243425369263, "eval_wikibio_emb_cos_sim_sem": 0.00935694412965164, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.004042148590088, "eval_wikibio_n_ngrams_match_1": 9.812, "eval_wikibio_n_ngrams_match_2": 3.104, "eval_wikibio_n_ngrams_match_3": 1.02, "eval_wikibio_num_pred_words": 36.934, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 54.8192905085521, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3401770317967601, "eval_wikibio_runtime": 9.6245, "eval_wikibio_samples_per_second": 51.951, "eval_wikibio_steps_per_second": 0.104, "eval_wikibio_token_set_f1": 0.31065285304992185, "eval_wikibio_token_set_f1_sem": 0.005027158760670689, "eval_wikibio_token_set_precision": 0.31826837412991577, "eval_wikibio_token_set_recall": 0.31702885307558276, "eval_wikibio_true_num_tokens": 61.1328125, "step": 53750 }, { "epoch": 10.32, "eval_nq_accuracy": 0.50265625, "eval_nq_bleu_score": 10.488527768048552, "eval_nq_bleu_score_sem": 0.44455737401582057, "eval_nq_emb_cos_sim": 0.8087708950042725, "eval_nq_emb_cos_sim_sem": 0.007787056340684336, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.359281539916992, "eval_nq_n_ngrams_match_1": 21.896, "eval_nq_n_ngrams_match_2": 7.718, "eval_nq_n_ngrams_match_3": 3.472, "eval_nq_num_pred_words": 49.138, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.583345009364187, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.41968451307728977, "eval_nq_runtime": 10.5456, "eval_nq_samples_per_second": 47.413, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4398026509375775, "eval_nq_token_set_f1_sem": 0.004946425865949454, "eval_nq_token_set_precision": 0.3917341367311386, "eval_nq_token_set_recall": 0.5138229726091218, "eval_nq_true_num_tokens": 64.0, "step": 53750 }, { "epoch": 10.32, "learning_rate": 0.001, "loss": 2.7475, "step": 53760 }, { "epoch": 10.32, "learning_rate": 0.001, "loss": 2.7249, "step": 53772 }, { "epoch": 10.33, "learning_rate": 0.001, "loss": 2.7186, "step": 53784 }, { "epoch": 10.33, "learning_rate": 0.001, "loss": 2.7221, "step": 53796 }, { "epoch": 10.33, "learning_rate": 0.001, "loss": 2.726, "step": 53808 }, { "epoch": 10.33, "learning_rate": 0.001, "loss": 2.7207, "step": 53820 }, { "epoch": 10.34, "learning_rate": 0.001, "loss": 2.7253, "step": 53832 }, { "epoch": 10.34, "learning_rate": 0.001, "loss": 2.7173, "step": 53844 }, { "epoch": 10.34, "learning_rate": 0.001, "loss": 2.7344, "step": 53856 }, { "epoch": 10.34, "learning_rate": 0.001, "loss": 2.7242, "step": 53868 }, { "epoch": 10.35, "learning_rate": 0.001, "loss": 2.7199, "step": 53880 }, { "epoch": 10.35, "learning_rate": 0.001, "loss": 2.7364, "step": 53892 }, { "epoch": 10.35, "learning_rate": 0.001, "loss": 2.7114, "step": 53904 }, { "epoch": 10.35, "learning_rate": 0.001, "loss": 2.7308, "step": 53916 }, { "epoch": 10.35, "learning_rate": 0.001, "loss": 2.7228, "step": 53928 }, { "epoch": 10.36, "learning_rate": 0.001, "loss": 2.7207, "step": 53940 }, { "epoch": 10.36, "learning_rate": 0.001, "loss": 2.7316, "step": 53952 }, { "epoch": 10.36, "learning_rate": 0.001, "loss": 2.7201, "step": 53964 }, { "epoch": 10.36, "learning_rate": 0.001, "loss": 2.731, "step": 53976 }, { "epoch": 10.37, "learning_rate": 0.001, "loss": 2.7243, "step": 53988 }, { "epoch": 10.37, "learning_rate": 0.001, "loss": 2.7281, "step": 54000 }, { "epoch": 10.37, "learning_rate": 0.001, "loss": 2.7357, "step": 54012 }, { "epoch": 10.37, "learning_rate": 0.001, "loss": 2.7329, "step": 54024 }, { "epoch": 10.38, "learning_rate": 0.001, "loss": 2.7318, "step": 54036 }, { "epoch": 10.38, "learning_rate": 0.001, "loss": 2.7176, "step": 54048 }, { "epoch": 10.38, "learning_rate": 0.001, "loss": 2.73, "step": 54060 }, { "epoch": 10.38, "learning_rate": 0.001, "loss": 2.7264, "step": 54072 }, { "epoch": 10.38, "learning_rate": 0.001, "loss": 2.7296, "step": 54084 }, { "epoch": 10.39, "learning_rate": 0.001, "loss": 2.7206, "step": 54096 }, { "epoch": 10.39, "learning_rate": 0.001, "loss": 2.7371, "step": 54108 }, { "epoch": 10.39, "learning_rate": 0.001, "loss": 2.7239, "step": 54120 }, { "epoch": 10.39, "learning_rate": 0.001, "loss": 2.7242, "step": 54132 }, { "epoch": 10.4, "learning_rate": 0.001, "loss": 2.7245, "step": 54144 }, { "epoch": 10.4, "learning_rate": 0.001, "loss": 2.7278, "step": 54156 }, { "epoch": 10.4, "learning_rate": 0.001, "loss": 2.7218, "step": 54168 }, { "epoch": 10.4, "learning_rate": 0.001, "loss": 2.7105, "step": 54180 }, { "epoch": 10.41, "learning_rate": 0.001, "loss": 2.7216, "step": 54192 }, { "epoch": 10.41, "learning_rate": 0.001, "loss": 2.7307, "step": 54204 }, { "epoch": 10.41, "learning_rate": 0.001, "loss": 2.7292, "step": 54216 }, { "epoch": 10.41, "learning_rate": 0.001, "loss": 2.7262, "step": 54228 }, { "epoch": 10.41, "learning_rate": 0.001, "loss": 2.7245, "step": 54240 }, { "epoch": 10.42, "learning_rate": 0.001, "loss": 2.7194, "step": 54252 }, { "epoch": 10.42, "learning_rate": 0.001, "loss": 2.7265, "step": 54264 }, { "epoch": 10.42, "learning_rate": 0.001, "loss": 2.7261, "step": 54276 }, { "epoch": 10.42, "learning_rate": 0.001, "loss": 2.7255, "step": 54288 }, { "epoch": 10.43, "learning_rate": 0.001, "loss": 2.727, "step": 54300 }, { "epoch": 10.43, "learning_rate": 0.001, "loss": 2.7382, "step": 54312 }, { "epoch": 10.43, "learning_rate": 0.001, "loss": 2.7307, "step": 54324 }, { "epoch": 10.43, "learning_rate": 0.001, "loss": 2.7222, "step": 54336 }, { "epoch": 10.44, "learning_rate": 0.001, "loss": 2.7217, "step": 54348 }, { "epoch": 10.44, "learning_rate": 0.001, "loss": 2.7219, "step": 54360 }, { "epoch": 10.44, "learning_rate": 0.001, "loss": 2.7282, "step": 54372 }, { "epoch": 10.44, "eval_ag_news_accuracy": 0.30525, "eval_ag_news_bleu_score": 4.287151852265607, "eval_ag_news_bleu_score_sem": 0.13770535603089282, "eval_ag_news_emb_cos_sim": 0.7802896499633789, "eval_ag_news_emb_cos_sim_sem": 0.007051109145411361, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7535603046417236, "eval_ag_news_n_ngrams_match_1": 13.038, "eval_ag_news_n_ngrams_match_2": 2.666, "eval_ag_news_n_ngrams_match_3": 0.716, "eval_ag_news_num_pred_words": 46.402, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 42.672739819499704, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3200070490912197, "eval_ag_news_runtime": 14.716, "eval_ag_news_samples_per_second": 33.977, "eval_ag_news_steps_per_second": 0.068, "eval_ag_news_token_set_f1": 0.32769701431828646, "eval_ag_news_token_set_f1_sem": 0.004279940614437022, "eval_ag_news_token_set_precision": 0.30821433798282394, "eval_ag_news_token_set_recall": 0.36442300756783574, "eval_ag_news_true_num_tokens": 56.09375, "step": 54375 }, { "epoch": 10.44, "eval_anthropic_toxic_prompts_accuracy": 0.10690625, "eval_anthropic_toxic_prompts_bleu_score": 2.765943346689468, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10928871291275943, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6335175037384033, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010171045411498483, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4407944679260254, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.62, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.602, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.554, "eval_anthropic_toxic_prompts_num_pred_words": 46.668, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.211745051172443, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19533282020513973, "eval_anthropic_toxic_prompts_runtime": 16.5998, "eval_anthropic_toxic_prompts_samples_per_second": 30.121, "eval_anthropic_toxic_prompts_steps_per_second": 0.06, "eval_anthropic_toxic_prompts_token_set_f1": 0.3347748405465396, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066718463553948355, "eval_anthropic_toxic_prompts_token_set_precision": 0.3944160218442546, "eval_anthropic_toxic_prompts_token_set_recall": 0.3202924111931561, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 54375 }, { "epoch": 10.44, "eval_arxiv_accuracy": 0.32890625, "eval_arxiv_bleu_score": 3.7970648527332895, "eval_arxiv_bleu_score_sem": 0.11049710479893754, "eval_arxiv_emb_cos_sim": 0.7082982659339905, "eval_arxiv_emb_cos_sim_sem": 0.00937611040901197, "eval_arxiv_emb_top1_equal": 0.203125, "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.605374574661255, "eval_arxiv_n_ngrams_match_1": 13.652, "eval_arxiv_n_ngrams_match_2": 2.482, "eval_arxiv_n_ngrams_match_3": 0.508, "eval_arxiv_num_pred_words": 39.622, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 36.79546392468632, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3226470446286839, "eval_arxiv_runtime": 14.4784, "eval_arxiv_samples_per_second": 34.534, "eval_arxiv_steps_per_second": 0.069, "eval_arxiv_token_set_f1": 0.3201853071894984, "eval_arxiv_token_set_f1_sem": 0.004213754791068077, "eval_arxiv_token_set_precision": 0.2668160522356607, "eval_arxiv_token_set_recall": 0.42337221545985365, "eval_arxiv_true_num_tokens": 64.0, "step": 54375 }, { "epoch": 10.44, "eval_python_code_alpaca_accuracy": 0.1485625, "eval_python_code_alpaca_bleu_score": 3.941212779791558, "eval_python_code_alpaca_bleu_score_sem": 0.129502782958863, "eval_python_code_alpaca_emb_cos_sim": 0.7207451462745667, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008943700206902896, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.109691858291626, "eval_python_code_alpaca_n_ngrams_match_1": 8.948, "eval_python_code_alpaca_n_ngrams_match_2": 2.36, "eval_python_code_alpaca_n_ngrams_match_3": 0.69, "eval_python_code_alpaca_num_pred_words": 43.292, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.414136606165638, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29916473399058563, "eval_python_code_alpaca_runtime": 19.5106, "eval_python_code_alpaca_samples_per_second": 25.627, "eval_python_code_alpaca_steps_per_second": 0.051, "eval_python_code_alpaca_token_set_f1": 0.4485665602284746, "eval_python_code_alpaca_token_set_f1_sem": 0.005507560376558197, "eval_python_code_alpaca_token_set_precision": 0.4817700228563396, "eval_python_code_alpaca_token_set_recall": 0.44614202823697596, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 54375 }, { "epoch": 10.44, "eval_wikibio_accuracy": 0.30446875, "eval_wikibio_bleu_score": 5.420102411962581, "eval_wikibio_bleu_score_sem": 0.19665541423216823, "eval_wikibio_emb_cos_sim": 0.6960967183113098, "eval_wikibio_emb_cos_sim_sem": 0.011985603558605623, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.005090236663818, "eval_wikibio_n_ngrams_match_1": 9.61, "eval_wikibio_n_ngrams_match_2": 3.126, "eval_wikibio_n_ngrams_match_3": 1.116, "eval_wikibio_num_pred_words": 36.826, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 54.87677607284938, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3240339619092947, "eval_wikibio_runtime": 13.1997, "eval_wikibio_samples_per_second": 37.88, "eval_wikibio_steps_per_second": 0.076, "eval_wikibio_token_set_f1": 0.30322508384429797, "eval_wikibio_token_set_f1_sem": 0.005553227425495123, "eval_wikibio_token_set_precision": 0.3115620569302409, "eval_wikibio_token_set_recall": 0.3123327368097843, "eval_wikibio_true_num_tokens": 61.1328125, "step": 54375 }, { "epoch": 10.44, "eval_nq_accuracy": 0.50334375, "eval_nq_bleu_score": 10.3531879718149, "eval_nq_bleu_score_sem": 0.43948659727354084, "eval_nq_emb_cos_sim": 0.8138612508773804, "eval_nq_emb_cos_sim_sem": 0.007374245740221845, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3566269874572754, "eval_nq_n_ngrams_match_1": 21.62, "eval_nq_n_ngrams_match_2": 7.582, "eval_nq_n_ngrams_match_3": 3.396, "eval_nq_num_pred_words": 48.902, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.555288220422122, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4159296706956379, "eval_nq_runtime": 16.7176, "eval_nq_samples_per_second": 29.909, "eval_nq_steps_per_second": 0.06, "eval_nq_token_set_f1": 0.43214432415741205, "eval_nq_token_set_f1_sem": 0.005086063762030811, "eval_nq_token_set_precision": 0.38538407778102635, "eval_nq_token_set_recall": 0.5030186085206476, "eval_nq_true_num_tokens": 64.0, "step": 54375 }, { "epoch": 10.44, "learning_rate": 0.001, "loss": 2.7361, "step": 54384 }, { "epoch": 10.44, "learning_rate": 0.001, "loss": 2.7173, "step": 54396 }, { "epoch": 10.45, "learning_rate": 0.001, "loss": 2.7239, "step": 54408 }, { "epoch": 10.45, "learning_rate": 0.001, "loss": 2.7308, "step": 54420 }, { "epoch": 10.45, "learning_rate": 0.001, "loss": 2.7387, "step": 54432 }, { "epoch": 10.45, "learning_rate": 0.001, "loss": 2.7256, "step": 54444 }, { "epoch": 10.46, "learning_rate": 0.001, "loss": 2.7304, "step": 54456 }, { "epoch": 10.46, "learning_rate": 0.001, "loss": 2.7151, "step": 54468 }, { "epoch": 10.46, "learning_rate": 0.001, "loss": 2.7263, "step": 54480 }, { "epoch": 10.46, "learning_rate": 0.001, "loss": 2.7198, "step": 54492 }, { "epoch": 10.47, "learning_rate": 0.001, "loss": 2.7233, "step": 54504 }, { "epoch": 10.47, "learning_rate": 0.001, "loss": 2.7281, "step": 54516 }, { "epoch": 10.47, "learning_rate": 0.001, "loss": 2.7307, "step": 54528 }, { "epoch": 10.47, "learning_rate": 0.001, "loss": 2.7301, "step": 54540 }, { "epoch": 10.47, "learning_rate": 0.001, "loss": 2.7309, "step": 54552 }, { "epoch": 10.48, "learning_rate": 0.001, "loss": 2.7339, "step": 54564 }, { "epoch": 10.48, "learning_rate": 0.001, "loss": 2.7172, "step": 54576 }, { "epoch": 10.48, "learning_rate": 0.001, "loss": 2.7176, "step": 54588 }, { "epoch": 10.48, "learning_rate": 0.001, "loss": 2.7293, "step": 54600 }, { "epoch": 10.49, "learning_rate": 0.001, "loss": 2.7239, "step": 54612 }, { "epoch": 10.49, "learning_rate": 0.001, "loss": 2.7284, "step": 54624 }, { "epoch": 10.49, "learning_rate": 0.001, "loss": 2.7347, "step": 54636 }, { "epoch": 10.49, "learning_rate": 0.001, "loss": 2.7322, "step": 54648 }, { "epoch": 10.5, "learning_rate": 0.001, "loss": 2.7157, "step": 54660 }, { "epoch": 10.5, "learning_rate": 0.001, "loss": 2.7291, "step": 54672 }, { "epoch": 10.5, "learning_rate": 0.001, "loss": 2.7272, "step": 54684 }, { "epoch": 10.5, "learning_rate": 0.001, "loss": 2.7334, "step": 54696 }, { "epoch": 10.5, "learning_rate": 0.001, "loss": 2.7293, "step": 54708 }, { "epoch": 10.51, "learning_rate": 0.001, "loss": 2.7238, "step": 54720 }, { "epoch": 10.51, "learning_rate": 0.001, "loss": 2.7248, "step": 54732 }, { "epoch": 10.51, "learning_rate": 0.001, "loss": 2.721, "step": 54744 }, { "epoch": 10.51, "learning_rate": 0.001, "loss": 2.7241, "step": 54756 }, { "epoch": 10.52, "learning_rate": 0.001, "loss": 2.7341, "step": 54768 }, { "epoch": 10.52, "learning_rate": 0.001, "loss": 2.7279, "step": 54780 }, { "epoch": 10.52, "learning_rate": 0.001, "loss": 2.7219, "step": 54792 }, { "epoch": 10.52, "learning_rate": 0.001, "loss": 2.7162, "step": 54804 }, { "epoch": 10.53, "learning_rate": 0.001, "loss": 2.7217, "step": 54816 }, { "epoch": 10.53, "learning_rate": 0.001, "loss": 2.7176, "step": 54828 }, { "epoch": 10.53, "learning_rate": 0.001, "loss": 2.7223, "step": 54840 }, { "epoch": 10.53, "learning_rate": 0.001, "loss": 2.7306, "step": 54852 }, { "epoch": 10.53, "learning_rate": 0.001, "loss": 2.7219, "step": 54864 }, { "epoch": 10.54, "learning_rate": 0.001, "loss": 2.7307, "step": 54876 }, { "epoch": 10.54, "learning_rate": 0.001, "loss": 2.7334, "step": 54888 }, { "epoch": 10.54, "learning_rate": 0.001, "loss": 2.7158, "step": 54900 }, { "epoch": 10.54, "learning_rate": 0.001, "loss": 2.7202, "step": 54912 }, { "epoch": 10.55, "learning_rate": 0.001, "loss": 2.7263, "step": 54924 }, { "epoch": 10.55, "learning_rate": 0.001, "loss": 2.73, "step": 54936 }, { "epoch": 10.55, "learning_rate": 0.001, "loss": 2.7268, "step": 54948 }, { "epoch": 10.55, "learning_rate": 0.001, "loss": 2.712, "step": 54960 }, { "epoch": 10.56, "learning_rate": 0.001, "loss": 2.719, "step": 54972 }, { "epoch": 10.56, "learning_rate": 0.001, "loss": 2.7191, "step": 54984 }, { "epoch": 10.56, "learning_rate": 0.001, "loss": 2.718, "step": 54996 }, { "epoch": 10.56, "eval_ag_news_accuracy": 0.304375, "eval_ag_news_bleu_score": 4.258325095410446, "eval_ag_news_bleu_score_sem": 0.14707397261193333, "eval_ag_news_emb_cos_sim": 0.7857528924942017, "eval_ag_news_emb_cos_sim_sem": 0.007713095206992798, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7430331707000732, "eval_ag_news_n_ngrams_match_1": 13.09, "eval_ag_news_n_ngrams_match_2": 2.69, "eval_ag_news_n_ngrams_match_3": 0.662, "eval_ag_news_num_pred_words": 45.962, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 42.22587440461575, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.32498122167146914, "eval_ag_news_runtime": 11.4594, "eval_ag_news_samples_per_second": 43.632, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.33116753479249383, "eval_ag_news_token_set_f1_sem": 0.004402606812542982, "eval_ag_news_token_set_precision": 0.3101122491239984, "eval_ag_news_token_set_recall": 0.3707862362369787, "eval_ag_news_true_num_tokens": 56.09375, "step": 55000 }, { "epoch": 10.56, "eval_anthropic_toxic_prompts_accuracy": 0.10628125, "eval_anthropic_toxic_prompts_bleu_score": 2.7190780770164444, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10927482698393612, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6370939016342163, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009760940770419292, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.445617198944092, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.614, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.642, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.59, "eval_anthropic_toxic_prompts_num_pred_words": 47.814, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.362634459215002, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19396980189909335, "eval_anthropic_toxic_prompts_runtime": 12.5556, "eval_anthropic_toxic_prompts_samples_per_second": 39.823, "eval_anthropic_toxic_prompts_steps_per_second": 0.08, "eval_anthropic_toxic_prompts_token_set_f1": 0.33535288439937794, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006498362012588391, "eval_anthropic_toxic_prompts_token_set_precision": 0.39267813613992103, "eval_anthropic_toxic_prompts_token_set_recall": 0.3235501114804039, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 55000 }, { "epoch": 10.56, "eval_arxiv_accuracy": 0.33071875, "eval_arxiv_bleu_score": 4.0040843338745225, "eval_arxiv_bleu_score_sem": 0.12065461584433716, "eval_arxiv_emb_cos_sim": 0.7195819616317749, "eval_arxiv_emb_cos_sim_sem": 0.007894215361567793, "eval_arxiv_emb_top1_equal": 0.21875, "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.59499454498291, "eval_arxiv_n_ngrams_match_1": 13.928, "eval_arxiv_n_ngrams_match_2": 2.64, "eval_arxiv_n_ngrams_match_3": 0.578, "eval_arxiv_num_pred_words": 40.232, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 36.415501340155096, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3283346123239741, "eval_arxiv_runtime": 10.2033, "eval_arxiv_samples_per_second": 49.004, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.32425936970857777, "eval_arxiv_token_set_f1_sem": 0.004268208409380536, "eval_arxiv_token_set_precision": 0.27274057095881565, "eval_arxiv_token_set_recall": 0.4169761827083265, "eval_arxiv_true_num_tokens": 64.0, "step": 55000 }, { "epoch": 10.56, "eval_python_code_alpaca_accuracy": 0.1479375, "eval_python_code_alpaca_bleu_score": 3.9383014092663715, "eval_python_code_alpaca_bleu_score_sem": 0.12581489261409792, "eval_python_code_alpaca_emb_cos_sim": 0.7387979626655579, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008078756654861758, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0820183753967285, "eval_python_code_alpaca_n_ngrams_match_1": 9.204, "eval_python_code_alpaca_n_ngrams_match_2": 2.49, "eval_python_code_alpaca_n_ngrams_match_3": 0.712, "eval_python_code_alpaca_num_pred_words": 44.498, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.80236337021571, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30338226927866524, "eval_python_code_alpaca_runtime": 10.1643, "eval_python_code_alpaca_samples_per_second": 49.192, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.45299241303695986, "eval_python_code_alpaca_token_set_f1_sem": 0.005447650335026252, "eval_python_code_alpaca_token_set_precision": 0.5004375215868001, "eval_python_code_alpaca_token_set_recall": 0.4384441876125228, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 55000 }, { "epoch": 10.56, "eval_wikibio_accuracy": 0.30415625, "eval_wikibio_bleu_score": 5.539772442940585, "eval_wikibio_bleu_score_sem": 0.2109495197485314, "eval_wikibio_emb_cos_sim": 0.6955208778381348, "eval_wikibio_emb_cos_sim_sem": 0.0115707692508701, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.973893165588379, "eval_wikibio_n_ngrams_match_1": 9.76, "eval_wikibio_n_ngrams_match_2": 3.158, "eval_wikibio_n_ngrams_match_3": 1.14, "eval_wikibio_num_pred_words": 36.896, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 53.19121044978153, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3320980570092723, "eval_wikibio_runtime": 10.3085, "eval_wikibio_samples_per_second": 48.504, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3054922226778108, "eval_wikibio_token_set_f1_sem": 0.005688636508994517, "eval_wikibio_token_set_precision": 0.3161374752024517, "eval_wikibio_token_set_recall": 0.3111209058044092, "eval_wikibio_true_num_tokens": 61.1328125, "step": 55000 }, { "epoch": 10.56, "eval_nq_accuracy": 0.50303125, "eval_nq_bleu_score": 10.267043155676992, "eval_nq_bleu_score_sem": 0.4405446619182153, "eval_nq_emb_cos_sim": 0.8072665929794312, "eval_nq_emb_cos_sim_sem": 0.007750441924585005, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3541324138641357, "eval_nq_n_ngrams_match_1": 21.936, "eval_nq_n_ngrams_match_2": 7.596, "eval_nq_n_ngrams_match_3": 3.342, "eval_nq_num_pred_words": 48.97, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.528990092105248, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.41986497596618677, "eval_nq_runtime": 14.1104, "eval_nq_samples_per_second": 35.435, "eval_nq_steps_per_second": 0.071, "eval_nq_token_set_f1": 0.43854430931609373, "eval_nq_token_set_f1_sem": 0.00489290361061998, "eval_nq_token_set_precision": 0.39277717501204584, "eval_nq_token_set_recall": 0.5078532499681914, "eval_nq_true_num_tokens": 64.0, "step": 55000 }, { "epoch": 10.56, "learning_rate": 0.001, "loss": 2.7156, "step": 55008 }, { "epoch": 10.56, "learning_rate": 0.001, "loss": 2.7245, "step": 55020 }, { "epoch": 10.57, "learning_rate": 0.001, "loss": 2.7322, "step": 55032 }, { "epoch": 10.57, "learning_rate": 0.001, "loss": 2.7225, "step": 55044 }, { "epoch": 10.57, "learning_rate": 0.001, "loss": 2.7176, "step": 55056 }, { "epoch": 10.57, "learning_rate": 0.001, "loss": 2.7093, "step": 55068 }, { "epoch": 10.58, "learning_rate": 0.001, "loss": 2.7231, "step": 55080 }, { "epoch": 10.58, "learning_rate": 0.001, "loss": 2.7254, "step": 55092 }, { "epoch": 10.58, "learning_rate": 0.001, "loss": 2.7353, "step": 55104 }, { "epoch": 10.58, "learning_rate": 0.001, "loss": 2.7335, "step": 55116 }, { "epoch": 10.59, "learning_rate": 0.001, "loss": 2.7253, "step": 55128 }, { "epoch": 10.59, "learning_rate": 0.001, "loss": 2.7211, "step": 55140 }, { "epoch": 10.59, "learning_rate": 0.001, "loss": 2.7252, "step": 55152 }, { "epoch": 10.59, "learning_rate": 0.001, "loss": 2.729, "step": 55164 }, { "epoch": 10.59, "learning_rate": 0.001, "loss": 2.7186, "step": 55176 }, { "epoch": 10.6, "learning_rate": 0.001, "loss": 2.7206, "step": 55188 }, { "epoch": 10.6, "learning_rate": 0.001, "loss": 2.7251, "step": 55200 }, { "epoch": 10.6, "learning_rate": 0.001, "loss": 2.7271, "step": 55212 }, { "epoch": 10.6, "learning_rate": 0.001, "loss": 2.7165, "step": 55224 }, { "epoch": 10.61, "learning_rate": 0.001, "loss": 2.7217, "step": 55236 }, { "epoch": 10.61, "learning_rate": 0.001, "loss": 2.7112, "step": 55248 }, { "epoch": 10.61, "learning_rate": 0.001, "loss": 2.7165, "step": 55260 }, { "epoch": 10.61, "learning_rate": 0.001, "loss": 2.7359, "step": 55272 }, { "epoch": 10.62, "learning_rate": 0.001, "loss": 2.7309, "step": 55284 }, { "epoch": 10.62, "learning_rate": 0.001, "loss": 2.7206, "step": 55296 }, { "epoch": 10.62, "learning_rate": 0.001, "loss": 2.7322, "step": 55308 }, { "epoch": 10.62, "learning_rate": 0.001, "loss": 2.7243, "step": 55320 }, { "epoch": 10.62, "learning_rate": 0.001, "loss": 2.7265, "step": 55332 }, { "epoch": 10.63, "learning_rate": 0.001, "loss": 2.7333, "step": 55344 }, { "epoch": 10.63, "learning_rate": 0.001, "loss": 2.7298, "step": 55356 }, { "epoch": 10.63, "learning_rate": 0.001, "loss": 2.7149, "step": 55368 }, { "epoch": 10.63, "learning_rate": 0.001, "loss": 2.7209, "step": 55380 }, { "epoch": 10.64, "learning_rate": 0.001, "loss": 2.7233, "step": 55392 }, { "epoch": 10.64, "learning_rate": 0.001, "loss": 2.7243, "step": 55404 }, { "epoch": 10.64, "learning_rate": 0.001, "loss": 2.7217, "step": 55416 }, { "epoch": 10.64, "learning_rate": 0.001, "loss": 2.7306, "step": 55428 }, { "epoch": 10.65, "learning_rate": 0.001, "loss": 2.722, "step": 55440 }, { "epoch": 10.65, "learning_rate": 0.001, "loss": 2.7347, "step": 55452 }, { "epoch": 10.65, "learning_rate": 0.001, "loss": 2.7195, "step": 55464 }, { "epoch": 10.65, "learning_rate": 0.001, "loss": 2.7126, "step": 55476 }, { "epoch": 10.65, "learning_rate": 0.001, "loss": 2.7276, "step": 55488 }, { "epoch": 10.66, "learning_rate": 0.001, "loss": 2.7262, "step": 55500 }, { "epoch": 10.66, "learning_rate": 0.001, "loss": 2.7273, "step": 55512 }, { "epoch": 10.66, "learning_rate": 0.001, "loss": 2.7221, "step": 55524 }, { "epoch": 10.66, "learning_rate": 0.001, "loss": 2.7267, "step": 55536 }, { "epoch": 10.67, "learning_rate": 0.001, "loss": 2.7218, "step": 55548 }, { "epoch": 10.67, "learning_rate": 0.001, "loss": 2.7223, "step": 55560 }, { "epoch": 10.67, "learning_rate": 0.001, "loss": 2.7192, "step": 55572 }, { "epoch": 10.67, "learning_rate": 0.001, "loss": 2.7308, "step": 55584 }, { "epoch": 10.68, "learning_rate": 0.001, "loss": 2.7208, "step": 55596 }, { "epoch": 10.68, "learning_rate": 0.001, "loss": 2.7272, "step": 55608 }, { "epoch": 10.68, "learning_rate": 0.001, "loss": 2.7349, "step": 55620 }, { "epoch": 10.68, "eval_ag_news_accuracy": 0.3035, "eval_ag_news_bleu_score": 4.3693944548121415, "eval_ag_news_bleu_score_sem": 0.14171892856740806, "eval_ag_news_emb_cos_sim": 0.7833855748176575, "eval_ag_news_emb_cos_sim_sem": 0.007463489716310139, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.756091833114624, "eval_ag_news_n_ngrams_match_1": 13.03, "eval_ag_news_n_ngrams_match_2": 2.69, "eval_ag_news_n_ngrams_match_3": 0.75, "eval_ag_news_num_pred_words": 46.122, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 42.78090392786411, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3218732411680624, "eval_ag_news_runtime": 10.4582, "eval_ag_news_samples_per_second": 47.81, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3292143717381226, "eval_ag_news_token_set_f1_sem": 0.0043415927175779944, "eval_ag_news_token_set_precision": 0.3086045254028964, "eval_ag_news_token_set_recall": 0.37102524345139065, "eval_ag_news_true_num_tokens": 56.09375, "step": 55625 }, { "epoch": 10.68, "eval_anthropic_toxic_prompts_accuracy": 0.1054375, "eval_anthropic_toxic_prompts_bleu_score": 2.674264644140458, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10298624139985543, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6428861618041992, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009882464622655376, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4474520683288574, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.62, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.556, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.51, "eval_anthropic_toxic_prompts_num_pred_words": 47.36, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.420233624321988, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19516646839807889, "eval_anthropic_toxic_prompts_runtime": 10.081, "eval_anthropic_toxic_prompts_samples_per_second": 49.598, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.3326299573257753, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006372127303267976, "eval_anthropic_toxic_prompts_token_set_precision": 0.39679224681863035, "eval_anthropic_toxic_prompts_token_set_recall": 0.3165468159861881, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 55625 }, { "epoch": 10.68, "eval_arxiv_accuracy": 0.33090625, "eval_arxiv_bleu_score": 3.8295488650601968, "eval_arxiv_bleu_score_sem": 0.12001471560226433, "eval_arxiv_emb_cos_sim": 0.718084454536438, "eval_arxiv_emb_cos_sim_sem": 0.008743845119620302, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5993494987487793, "eval_arxiv_n_ngrams_match_1": 13.798, "eval_arxiv_n_ngrams_match_2": 2.55, "eval_arxiv_n_ngrams_match_3": 0.512, "eval_arxiv_num_pred_words": 39.836, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 36.574434988006985, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3248562342422566, "eval_arxiv_runtime": 10.6632, "eval_arxiv_samples_per_second": 46.89, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.3224257945683061, "eval_arxiv_token_set_f1_sem": 0.004321349759941008, "eval_arxiv_token_set_precision": 0.27216607546232435, "eval_arxiv_token_set_recall": 0.4180924995094326, "eval_arxiv_true_num_tokens": 64.0, "step": 55625 }, { "epoch": 10.68, "eval_python_code_alpaca_accuracy": 0.14796875, "eval_python_code_alpaca_bleu_score": 3.8705052380856286, "eval_python_code_alpaca_bleu_score_sem": 0.12261524029647448, "eval_python_code_alpaca_emb_cos_sim": 0.7215318083763123, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009852892681524412, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.088245153427124, "eval_python_code_alpaca_n_ngrams_match_1": 8.904, "eval_python_code_alpaca_n_ngrams_match_2": 2.404, "eval_python_code_alpaca_n_ngrams_match_3": 0.682, "eval_python_code_alpaca_num_pred_words": 43.458, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.93854539506779, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29954159178261763, "eval_python_code_alpaca_runtime": 15.8376, "eval_python_code_alpaca_samples_per_second": 31.57, "eval_python_code_alpaca_steps_per_second": 0.063, "eval_python_code_alpaca_token_set_f1": 0.4425443412975813, "eval_python_code_alpaca_token_set_f1_sem": 0.005565341465966108, "eval_python_code_alpaca_token_set_precision": 0.4834871944462926, "eval_python_code_alpaca_token_set_recall": 0.4343382582916853, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 55625 }, { "epoch": 10.68, "eval_wikibio_accuracy": 0.3059375, "eval_wikibio_bleu_score": 5.2086348097576884, "eval_wikibio_bleu_score_sem": 0.19022722361901956, "eval_wikibio_emb_cos_sim": 0.6870605945587158, "eval_wikibio_emb_cos_sim_sem": 0.01231822253578655, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.011824131011963, "eval_wikibio_n_ngrams_match_1": 9.4, "eval_wikibio_n_ngrams_match_2": 3.014, "eval_wikibio_n_ngrams_match_3": 1.0, "eval_wikibio_num_pred_words": 35.434, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 55.247557485420494, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32254578911541076, "eval_wikibio_runtime": 10.0644, "eval_wikibio_samples_per_second": 49.68, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.2984314801837055, "eval_wikibio_token_set_f1_sem": 0.005711400051016728, "eval_wikibio_token_set_precision": 0.3020520303055395, "eval_wikibio_token_set_recall": 0.31569661931425963, "eval_wikibio_true_num_tokens": 61.1328125, "step": 55625 }, { "epoch": 10.68, "eval_nq_accuracy": 0.5031875, "eval_nq_bleu_score": 10.163381954740693, "eval_nq_bleu_score_sem": 0.4329978045610566, "eval_nq_emb_cos_sim": 0.8052500486373901, "eval_nq_emb_cos_sim_sem": 0.008247454025838168, "eval_nq_emb_top1_equal": 0.2109375, "eval_nq_emb_top1_equal_sem": 0.03620184850179216, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.355693817138672, "eval_nq_n_ngrams_match_1": 21.638, "eval_nq_n_ngrams_match_2": 7.586, "eval_nq_n_ngrams_match_3": 3.298, "eval_nq_num_pred_words": 48.234, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.545442933129905, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4174892567607175, "eval_nq_runtime": 10.1407, "eval_nq_samples_per_second": 49.306, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.43396102757839755, "eval_nq_token_set_f1_sem": 0.005101646300458602, "eval_nq_token_set_precision": 0.3870328492241951, "eval_nq_token_set_recall": 0.506937030946592, "eval_nq_true_num_tokens": 64.0, "step": 55625 }, { "epoch": 10.68, "learning_rate": 0.001, "loss": 2.7341, "step": 55632 }, { "epoch": 10.68, "learning_rate": 0.001, "loss": 2.7296, "step": 55644 }, { "epoch": 10.69, "learning_rate": 0.001, "loss": 2.7253, "step": 55656 }, { "epoch": 10.69, "learning_rate": 0.001, "loss": 2.7245, "step": 55668 }, { "epoch": 10.69, "learning_rate": 0.001, "loss": 2.7164, "step": 55680 }, { "epoch": 10.69, "learning_rate": 0.001, "loss": 2.7168, "step": 55692 }, { "epoch": 10.7, "learning_rate": 0.001, "loss": 2.7266, "step": 55704 }, { "epoch": 10.7, "learning_rate": 0.001, "loss": 2.7259, "step": 55716 }, { "epoch": 10.7, "learning_rate": 0.001, "loss": 2.7299, "step": 55728 }, { "epoch": 10.7, "learning_rate": 0.001, "loss": 2.7171, "step": 55740 }, { "epoch": 10.71, "learning_rate": 0.001, "loss": 2.7239, "step": 55752 }, { "epoch": 10.71, "learning_rate": 0.001, "loss": 2.7133, "step": 55764 }, { "epoch": 10.71, "learning_rate": 0.001, "loss": 2.7238, "step": 55776 }, { "epoch": 10.71, "learning_rate": 0.001, "loss": 2.722, "step": 55788 }, { "epoch": 10.71, "learning_rate": 0.001, "loss": 2.731, "step": 55800 }, { "epoch": 10.72, "learning_rate": 0.001, "loss": 2.7345, "step": 55812 }, { "epoch": 10.72, "learning_rate": 0.001, "loss": 2.7314, "step": 55824 }, { "epoch": 10.72, "learning_rate": 0.001, "loss": 2.7299, "step": 55836 }, { "epoch": 10.72, "learning_rate": 0.001, "loss": 2.7236, "step": 55848 }, { "epoch": 10.73, "learning_rate": 0.001, "loss": 2.7177, "step": 55860 }, { "epoch": 10.73, "learning_rate": 0.001, "loss": 2.712, "step": 55872 }, { "epoch": 10.73, "learning_rate": 0.001, "loss": 2.7253, "step": 55884 }, { "epoch": 10.73, "learning_rate": 0.001, "loss": 2.725, "step": 55896 }, { "epoch": 10.74, "learning_rate": 0.001, "loss": 2.7234, "step": 55908 }, { "epoch": 10.74, "learning_rate": 0.001, "loss": 2.7313, "step": 55920 }, { "epoch": 10.74, "learning_rate": 0.001, "loss": 2.7208, "step": 55932 }, { "epoch": 10.74, "learning_rate": 0.001, "loss": 2.7232, "step": 55944 }, { "epoch": 10.74, "learning_rate": 0.001, "loss": 2.727, "step": 55956 }, { "epoch": 10.75, "learning_rate": 0.001, "loss": 2.7235, "step": 55968 }, { "epoch": 10.75, "learning_rate": 0.001, "loss": 2.7381, "step": 55980 }, { "epoch": 10.75, "learning_rate": 0.001, "loss": 2.7286, "step": 55992 }, { "epoch": 10.75, "learning_rate": 0.001, "loss": 2.731, "step": 56004 }, { "epoch": 10.76, "learning_rate": 0.001, "loss": 2.7321, "step": 56016 }, { "epoch": 10.76, "learning_rate": 0.001, "loss": 2.7298, "step": 56028 }, { "epoch": 10.76, "learning_rate": 0.001, "loss": 2.7273, "step": 56040 }, { "epoch": 10.76, "learning_rate": 0.001, "loss": 2.7356, "step": 56052 }, { "epoch": 10.76, "learning_rate": 0.001, "loss": 2.7198, "step": 56064 }, { "epoch": 10.77, "learning_rate": 0.001, "loss": 2.7349, "step": 56076 }, { "epoch": 10.77, "learning_rate": 0.001, "loss": 2.722, "step": 56088 }, { "epoch": 10.77, "learning_rate": 0.001, "loss": 2.7354, "step": 56100 }, { "epoch": 10.77, "learning_rate": 0.001, "loss": 2.7343, "step": 56112 }, { "epoch": 10.78, "learning_rate": 0.001, "loss": 2.7287, "step": 56124 }, { "epoch": 10.78, "learning_rate": 0.001, "loss": 2.7372, "step": 56136 }, { "epoch": 10.78, "learning_rate": 0.001, "loss": 2.7405, "step": 56148 }, { "epoch": 10.78, "learning_rate": 0.001, "loss": 2.7315, "step": 56160 }, { "epoch": 10.79, "learning_rate": 0.001, "loss": 2.728, "step": 56172 }, { "epoch": 10.79, "learning_rate": 0.001, "loss": 2.7276, "step": 56184 }, { "epoch": 10.79, "learning_rate": 0.001, "loss": 2.7176, "step": 56196 }, { "epoch": 10.79, "learning_rate": 0.001, "loss": 2.7259, "step": 56208 }, { "epoch": 10.79, "learning_rate": 0.001, "loss": 2.7232, "step": 56220 }, { "epoch": 10.8, "learning_rate": 0.001, "loss": 2.722, "step": 56232 }, { "epoch": 10.8, "learning_rate": 0.001, "loss": 2.7261, "step": 56244 }, { "epoch": 10.8, "eval_ag_news_accuracy": 0.30584375, "eval_ag_news_bleu_score": 4.252377613985345, "eval_ag_news_bleu_score_sem": 0.13956869896846102, "eval_ag_news_emb_cos_sim": 0.7772521376609802, "eval_ag_news_emb_cos_sim_sem": 0.008338865271902232, "eval_ag_news_emb_top1_equal": 0.171875, "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.742692232131958, "eval_ag_news_n_ngrams_match_1": 12.956, "eval_ag_news_n_ngrams_match_2": 2.622, "eval_ag_news_n_ngrams_match_3": 0.696, "eval_ag_news_num_pred_words": 45.932, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 42.211480429328915, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.31743734148452624, "eval_ag_news_runtime": 10.443, "eval_ag_news_samples_per_second": 47.879, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3285705078935257, "eval_ag_news_token_set_f1_sem": 0.004423649195070215, "eval_ag_news_token_set_precision": 0.30705897796641934, "eval_ag_news_token_set_recall": 0.37212458409033633, "eval_ag_news_true_num_tokens": 56.09375, "step": 56250 }, { "epoch": 10.8, "eval_anthropic_toxic_prompts_accuracy": 0.10653125, "eval_anthropic_toxic_prompts_bleu_score": 2.7686883625140473, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11485083455155894, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6245629191398621, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01097283125250154, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4250752925872803, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.71, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.612, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.574, "eval_anthropic_toxic_prompts_num_pred_words": 47.024, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 30.724958126006992, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19806136476059152, "eval_anthropic_toxic_prompts_runtime": 9.9973, "eval_anthropic_toxic_prompts_samples_per_second": 50.014, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3381603542203713, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006567382204123827, "eval_anthropic_toxic_prompts_token_set_precision": 0.3974096944412263, "eval_anthropic_toxic_prompts_token_set_recall": 0.3256008528496704, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 56250 }, { "epoch": 10.8, "eval_arxiv_accuracy": 0.32865625, "eval_arxiv_bleu_score": 3.9154094006971207, "eval_arxiv_bleu_score_sem": 0.11494489811529976, "eval_arxiv_emb_cos_sim": 0.7228298783302307, "eval_arxiv_emb_cos_sim_sem": 0.009944919008158932, "eval_arxiv_emb_top1_equal": 0.21875, "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5995633602142334, "eval_arxiv_n_ngrams_match_1": 13.916, "eval_arxiv_n_ngrams_match_2": 2.596, "eval_arxiv_n_ngrams_match_3": 0.546, "eval_arxiv_num_pred_words": 40.058, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 36.58225768672877, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3291134608642131, "eval_arxiv_runtime": 9.8718, "eval_arxiv_samples_per_second": 50.649, "eval_arxiv_steps_per_second": 0.101, "eval_arxiv_token_set_f1": 0.32720644769478746, "eval_arxiv_token_set_f1_sem": 0.004287680412745275, "eval_arxiv_token_set_precision": 0.2743324296845597, "eval_arxiv_token_set_recall": 0.42559563997043814, "eval_arxiv_true_num_tokens": 64.0, "step": 56250 }, { "epoch": 10.8, "eval_python_code_alpaca_accuracy": 0.14953125, "eval_python_code_alpaca_bleu_score": 4.038940794383992, "eval_python_code_alpaca_bleu_score_sem": 0.12327250121728657, "eval_python_code_alpaca_emb_cos_sim": 0.7331154346466064, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009374375802777944, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.073585033416748, "eval_python_code_alpaca_n_ngrams_match_1": 9.072, "eval_python_code_alpaca_n_ngrams_match_2": 2.45, "eval_python_code_alpaca_n_ngrams_match_3": 0.734, "eval_python_code_alpaca_num_pred_words": 43.66, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.619269714801074, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30501259465143193, "eval_python_code_alpaca_runtime": 10.0658, "eval_python_code_alpaca_samples_per_second": 49.673, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.45239319200333894, "eval_python_code_alpaca_token_set_f1_sem": 0.005190135137561469, "eval_python_code_alpaca_token_set_precision": 0.4918878848712137, "eval_python_code_alpaca_token_set_recall": 0.44430162301796994, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 56250 }, { "epoch": 10.8, "eval_wikibio_accuracy": 0.305375, "eval_wikibio_bleu_score": 5.394592970302806, "eval_wikibio_bleu_score_sem": 0.19926590456356802, "eval_wikibio_emb_cos_sim": 0.7010586261749268, "eval_wikibio_emb_cos_sim_sem": 0.012143735900040465, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.971113681793213, "eval_wikibio_n_ngrams_match_1": 9.522, "eval_wikibio_n_ngrams_match_2": 3.1, "eval_wikibio_n_ngrams_match_3": 1.086, "eval_wikibio_num_pred_words": 36.708, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 53.04357161721144, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3291868135137792, "eval_wikibio_runtime": 10.798, "eval_wikibio_samples_per_second": 46.305, "eval_wikibio_steps_per_second": 0.093, "eval_wikibio_token_set_f1": 0.3020976043901764, "eval_wikibio_token_set_f1_sem": 0.005576644476868077, "eval_wikibio_token_set_precision": 0.3091776447519551, "eval_wikibio_token_set_recall": 0.31366625996005754, "eval_wikibio_true_num_tokens": 61.1328125, "step": 56250 }, { "epoch": 10.8, "eval_nq_accuracy": 0.505375, "eval_nq_bleu_score": 10.466972194667258, "eval_nq_bleu_score_sem": 0.45966750115476035, "eval_nq_emb_cos_sim": 0.8111673593521118, "eval_nq_emb_cos_sim_sem": 0.008248548526810435, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3507819175720215, "eval_nq_n_ngrams_match_1": 21.984, "eval_nq_n_ngrams_match_2": 7.676, "eval_nq_n_ngrams_match_3": 3.434, "eval_nq_num_pred_words": 48.81, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.493771782196534, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42138196083301094, "eval_nq_runtime": 10.5808, "eval_nq_samples_per_second": 47.255, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.440894338645274, "eval_nq_token_set_f1_sem": 0.004996597314153814, "eval_nq_token_set_precision": 0.3932978113058087, "eval_nq_token_set_recall": 0.5121360078769879, "eval_nq_true_num_tokens": 64.0, "step": 56250 }, { "epoch": 10.8, "learning_rate": 0.001, "loss": 2.712, "step": 56256 }, { "epoch": 10.8, "learning_rate": 0.001, "loss": 2.7287, "step": 56268 }, { "epoch": 10.81, "learning_rate": 0.001, "loss": 2.7187, "step": 56280 }, { "epoch": 10.81, "learning_rate": 0.001, "loss": 2.7205, "step": 56292 }, { "epoch": 10.81, "learning_rate": 0.001, "loss": 2.7198, "step": 56304 }, { "epoch": 10.81, "learning_rate": 0.001, "loss": 2.7209, "step": 56316 }, { "epoch": 10.82, "learning_rate": 0.001, "loss": 2.7162, "step": 56328 }, { "epoch": 10.82, "learning_rate": 0.001, "loss": 2.7213, "step": 56340 }, { "epoch": 10.82, "learning_rate": 0.001, "loss": 2.7219, "step": 56352 }, { "epoch": 10.82, "learning_rate": 0.001, "loss": 2.7208, "step": 56364 }, { "epoch": 10.82, "learning_rate": 0.001, "loss": 2.7237, "step": 56376 }, { "epoch": 10.83, "learning_rate": 0.001, "loss": 2.7161, "step": 56388 }, { "epoch": 10.83, "learning_rate": 0.001, "loss": 2.7152, "step": 56400 }, { "epoch": 10.83, "learning_rate": 0.001, "loss": 2.7306, "step": 56412 }, { "epoch": 10.83, "learning_rate": 0.001, "loss": 2.7218, "step": 56424 }, { "epoch": 10.84, "learning_rate": 0.001, "loss": 2.7287, "step": 56436 }, { "epoch": 10.84, "learning_rate": 0.001, "loss": 2.719, "step": 56448 }, { "epoch": 10.84, "learning_rate": 0.001, "loss": 2.726, "step": 56460 }, { "epoch": 10.84, "learning_rate": 0.001, "loss": 2.7154, "step": 56472 }, { "epoch": 10.85, "learning_rate": 0.001, "loss": 2.7255, "step": 56484 }, { "epoch": 10.85, "learning_rate": 0.001, "loss": 2.7163, "step": 56496 }, { "epoch": 10.85, "learning_rate": 0.001, "loss": 2.7176, "step": 56508 }, { "epoch": 10.85, "learning_rate": 0.001, "loss": 2.7217, "step": 56520 }, { "epoch": 10.85, "learning_rate": 0.001, "loss": 2.7195, "step": 56532 }, { "epoch": 10.86, "learning_rate": 0.001, "loss": 2.7172, "step": 56544 }, { "epoch": 10.86, "learning_rate": 0.001, "loss": 2.7215, "step": 56556 }, { "epoch": 10.86, "learning_rate": 0.001, "loss": 2.7276, "step": 56568 }, { "epoch": 10.86, "learning_rate": 0.001, "loss": 2.7087, "step": 56580 }, { "epoch": 10.87, "learning_rate": 0.001, "loss": 2.7078, "step": 56592 }, { "epoch": 10.87, "learning_rate": 0.001, "loss": 2.7211, "step": 56604 }, { "epoch": 10.87, "learning_rate": 0.001, "loss": 2.7201, "step": 56616 }, { "epoch": 10.87, "learning_rate": 0.001, "loss": 2.7147, "step": 56628 }, { "epoch": 10.88, "learning_rate": 0.001, "loss": 2.7234, "step": 56640 }, { "epoch": 10.88, "learning_rate": 0.001, "loss": 2.7252, "step": 56652 }, { "epoch": 10.88, "learning_rate": 0.001, "loss": 2.7179, "step": 56664 }, { "epoch": 10.88, "learning_rate": 0.001, "loss": 2.708, "step": 56676 }, { "epoch": 10.88, "learning_rate": 0.001, "loss": 2.7263, "step": 56688 }, { "epoch": 10.89, "learning_rate": 0.001, "loss": 2.7226, "step": 56700 }, { "epoch": 10.89, "learning_rate": 0.001, "loss": 2.7319, "step": 56712 }, { "epoch": 10.89, "learning_rate": 0.001, "loss": 2.7253, "step": 56724 }, { "epoch": 10.89, "learning_rate": 0.001, "loss": 2.7224, "step": 56736 }, { "epoch": 10.9, "learning_rate": 0.001, "loss": 2.7117, "step": 56748 }, { "epoch": 10.9, "learning_rate": 0.001, "loss": 2.7121, "step": 56760 }, { "epoch": 10.9, "learning_rate": 0.001, "loss": 2.7125, "step": 56772 }, { "epoch": 10.9, "learning_rate": 0.001, "loss": 2.7142, "step": 56784 }, { "epoch": 10.91, "learning_rate": 0.001, "loss": 2.7152, "step": 56796 }, { "epoch": 10.91, "learning_rate": 0.001, "loss": 2.7201, "step": 56808 }, { "epoch": 10.91, "learning_rate": 0.001, "loss": 2.711, "step": 56820 }, { "epoch": 10.91, "learning_rate": 0.001, "loss": 2.7276, "step": 56832 }, { "epoch": 10.91, "learning_rate": 0.001, "loss": 2.7235, "step": 56844 }, { "epoch": 10.92, "learning_rate": 0.001, "loss": 2.7286, "step": 56856 }, { "epoch": 10.92, "learning_rate": 0.001, "loss": 2.7176, "step": 56868 }, { "epoch": 10.92, "eval_ag_news_accuracy": 0.3065, "eval_ag_news_bleu_score": 4.4303056189521905, "eval_ag_news_bleu_score_sem": 0.14546669489836722, "eval_ag_news_emb_cos_sim": 0.7756615281105042, "eval_ag_news_emb_cos_sim_sem": 0.008071350663324985, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.758354425430298, "eval_ag_news_n_ngrams_match_1": 12.968, "eval_ag_news_n_ngrams_match_2": 2.738, "eval_ag_news_n_ngrams_match_3": 0.734, "eval_ag_news_num_pred_words": 45.898, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 42.87780925963757, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.32139990658253703, "eval_ag_news_runtime": 10.9105, "eval_ag_news_samples_per_second": 45.827, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.33007117375531914, "eval_ag_news_token_set_f1_sem": 0.004439771625943391, "eval_ag_news_token_set_precision": 0.3064925052940842, "eval_ag_news_token_set_recall": 0.3724746515241469, "eval_ag_news_true_num_tokens": 56.09375, "step": 56875 }, { "epoch": 10.92, "eval_anthropic_toxic_prompts_accuracy": 0.1065, "eval_anthropic_toxic_prompts_bleu_score": 2.7096081530635847, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10405677912805264, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6226505041122437, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010911427245488214, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4365551471710205, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.654, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.646, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.564, "eval_anthropic_toxic_prompts_num_pred_words": 47.57, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.0797085229193, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19296408130567488, "eval_anthropic_toxic_prompts_runtime": 9.5619, "eval_anthropic_toxic_prompts_samples_per_second": 52.291, "eval_anthropic_toxic_prompts_steps_per_second": 0.105, "eval_anthropic_toxic_prompts_token_set_f1": 0.3352031452142971, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006503343690353681, "eval_anthropic_toxic_prompts_token_set_precision": 0.39232298518835124, "eval_anthropic_toxic_prompts_token_set_recall": 0.320762978128836, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 56875 }, { "epoch": 10.92, "eval_arxiv_accuracy": 0.32825, "eval_arxiv_bleu_score": 3.8825158148039156, "eval_arxiv_bleu_score_sem": 0.11672451938898767, "eval_arxiv_emb_cos_sim": 0.7262111306190491, "eval_arxiv_emb_cos_sim_sem": 0.0079256305687766, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6267576217651367, "eval_arxiv_n_ngrams_match_1": 14.048, "eval_arxiv_n_ngrams_match_2": 2.69, "eval_arxiv_n_ngrams_match_3": 0.528, "eval_arxiv_num_pred_words": 41.036, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 37.590735425060366, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3298736000502477, "eval_arxiv_runtime": 10.3192, "eval_arxiv_samples_per_second": 48.453, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3298888878062372, "eval_arxiv_token_set_f1_sem": 0.004159619696913173, "eval_arxiv_token_set_precision": 0.27735738660891907, "eval_arxiv_token_set_recall": 0.42730287245228055, "eval_arxiv_true_num_tokens": 64.0, "step": 56875 }, { "epoch": 10.92, "eval_python_code_alpaca_accuracy": 0.14990625, "eval_python_code_alpaca_bleu_score": 4.0113907569894796, "eval_python_code_alpaca_bleu_score_sem": 0.14134455475580787, "eval_python_code_alpaca_emb_cos_sim": 0.7198358774185181, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010124257799839834, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.075366735458374, "eval_python_code_alpaca_n_ngrams_match_1": 9.124, "eval_python_code_alpaca_n_ngrams_match_2": 2.546, "eval_python_code_alpaca_n_ngrams_match_3": 0.788, "eval_python_code_alpaca_num_pred_words": 45.142, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.65782314695592, "eval_python_code_alpaca_pred_num_tokens": 62.96875, "eval_python_code_alpaca_rouge_score": 0.2958587589053272, "eval_python_code_alpaca_runtime": 10.0032, "eval_python_code_alpaca_samples_per_second": 49.984, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.450711941235212, "eval_python_code_alpaca_token_set_f1_sem": 0.005993237587569222, "eval_python_code_alpaca_token_set_precision": 0.49268291832483696, "eval_python_code_alpaca_token_set_recall": 0.4402095610325942, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 56875 }, { "epoch": 10.92, "eval_wikibio_accuracy": 0.30471875, "eval_wikibio_bleu_score": 5.496054908468534, "eval_wikibio_bleu_score_sem": 0.1968930308782966, "eval_wikibio_emb_cos_sim": 0.7178844213485718, "eval_wikibio_emb_cos_sim_sem": 0.01097977099452666, "eval_wikibio_emb_top1_equal": 0.1328125, "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.015972137451172, "eval_wikibio_n_ngrams_match_1": 9.84, "eval_wikibio_n_ngrams_match_2": 3.184, "eval_wikibio_n_ngrams_match_3": 1.116, "eval_wikibio_num_pred_words": 36.59, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 55.477200661037045, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34030002023118366, "eval_wikibio_runtime": 11.3183, "eval_wikibio_samples_per_second": 44.176, "eval_wikibio_steps_per_second": 0.088, "eval_wikibio_token_set_f1": 0.30973723267275016, "eval_wikibio_token_set_f1_sem": 0.005493719360809834, "eval_wikibio_token_set_precision": 0.31915044905063195, "eval_wikibio_token_set_recall": 0.3163183314609686, "eval_wikibio_true_num_tokens": 61.1328125, "step": 56875 }, { "epoch": 10.92, "eval_nq_accuracy": 0.504, "eval_nq_bleu_score": 10.597470227641582, "eval_nq_bleu_score_sem": 0.4344237576175824, "eval_nq_emb_cos_sim": 0.8083126544952393, "eval_nq_emb_cos_sim_sem": 0.007916100112657075, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3533241748809814, "eval_nq_n_ngrams_match_1": 21.944, "eval_nq_n_ngrams_match_2": 7.762, "eval_nq_n_ngrams_match_3": 3.482, "eval_nq_num_pred_words": 48.932, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.520483589965957, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42221122580072257, "eval_nq_runtime": 10.5334, "eval_nq_samples_per_second": 47.468, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.44059985607738433, "eval_nq_token_set_f1_sem": 0.005043803944498725, "eval_nq_token_set_precision": 0.39206069446057645, "eval_nq_token_set_recall": 0.5139015564453835, "eval_nq_true_num_tokens": 64.0, "step": 56875 }, { "epoch": 10.92, "learning_rate": 0.001, "loss": 2.716, "step": 56880 }, { "epoch": 10.92, "learning_rate": 0.001, "loss": 2.7179, "step": 56892 }, { "epoch": 10.93, "learning_rate": 0.001, "loss": 2.7112, "step": 56904 }, { "epoch": 10.93, "learning_rate": 0.001, "loss": 2.7149, "step": 56916 }, { "epoch": 10.93, "learning_rate": 0.001, "loss": 2.7163, "step": 56928 }, { "epoch": 10.93, "learning_rate": 0.001, "loss": 2.7175, "step": 56940 }, { "epoch": 10.94, "learning_rate": 0.001, "loss": 2.7073, "step": 56952 }, { "epoch": 10.94, "learning_rate": 0.001, "loss": 2.7231, "step": 56964 }, { "epoch": 10.94, "learning_rate": 0.001, "loss": 2.7157, "step": 56976 }, { "epoch": 10.94, "learning_rate": 0.001, "loss": 2.7174, "step": 56988 }, { "epoch": 10.94, "learning_rate": 0.001, "loss": 2.7086, "step": 57000 }, { "epoch": 10.95, "learning_rate": 0.001, "loss": 2.7204, "step": 57012 }, { "epoch": 10.95, "learning_rate": 0.001, "loss": 2.7218, "step": 57024 }, { "epoch": 10.95, "learning_rate": 0.001, "loss": 2.7228, "step": 57036 }, { "epoch": 10.95, "learning_rate": 0.001, "loss": 2.7219, "step": 57048 }, { "epoch": 10.96, "learning_rate": 0.001, "loss": 2.719, "step": 57060 }, { "epoch": 10.96, "learning_rate": 0.001, "loss": 2.7254, "step": 57072 }, { "epoch": 10.96, "learning_rate": 0.001, "loss": 2.7163, "step": 57084 }, { "epoch": 10.96, "learning_rate": 0.001, "loss": 2.7272, "step": 57096 }, { "epoch": 10.97, "learning_rate": 0.001, "loss": 2.7192, "step": 57108 }, { "epoch": 10.97, "learning_rate": 0.001, "loss": 2.7127, "step": 57120 }, { "epoch": 10.97, "learning_rate": 0.001, "loss": 2.7104, "step": 57132 }, { "epoch": 10.97, "learning_rate": 0.001, "loss": 2.7261, "step": 57144 }, { "epoch": 10.97, "learning_rate": 0.001, "loss": 2.7245, "step": 57156 }, { "epoch": 10.98, "learning_rate": 0.001, "loss": 2.7209, "step": 57168 }, { "epoch": 10.98, "learning_rate": 0.001, "loss": 2.7088, "step": 57180 }, { "epoch": 10.98, "learning_rate": 0.001, "loss": 2.7189, "step": 57192 }, { "epoch": 10.98, "learning_rate": 0.001, "loss": 2.7283, "step": 57204 }, { "epoch": 10.99, "learning_rate": 0.001, "loss": 2.7152, "step": 57216 }, { "epoch": 10.99, "learning_rate": 0.001, "loss": 2.7228, "step": 57228 }, { "epoch": 10.99, "learning_rate": 0.001, "loss": 2.723, "step": 57240 }, { "epoch": 10.99, "learning_rate": 0.001, "loss": 2.7193, "step": 57252 }, { "epoch": 11.0, "learning_rate": 0.001, "loss": 2.7293, "step": 57264 }, { "epoch": 11.0, "learning_rate": 0.001, "loss": 2.7189, "step": 57276 }, { "epoch": 11.0, "learning_rate": 0.001, "loss": 2.7243, "step": 57288 }, { "epoch": 11.0, "learning_rate": 0.001, "loss": 2.7026, "step": 57300 }, { "epoch": 11.0, "learning_rate": 0.001, "loss": 2.7074, "step": 57312 }, { "epoch": 11.01, "learning_rate": 0.001, "loss": 2.6989, "step": 57324 }, { "epoch": 11.01, "learning_rate": 0.001, "loss": 2.7133, "step": 57336 }, { "epoch": 11.01, "learning_rate": 0.001, "loss": 2.6993, "step": 57348 }, { "epoch": 11.01, "learning_rate": 0.001, "loss": 2.6971, "step": 57360 }, { "epoch": 11.02, "learning_rate": 0.001, "loss": 2.7009, "step": 57372 }, { "epoch": 11.02, "learning_rate": 0.001, "loss": 2.699, "step": 57384 }, { "epoch": 11.02, "learning_rate": 0.001, "loss": 2.6975, "step": 57396 }, { "epoch": 11.02, "learning_rate": 0.001, "loss": 2.696, "step": 57408 }, { "epoch": 11.03, "learning_rate": 0.001, "loss": 2.7059, "step": 57420 }, { "epoch": 11.03, "learning_rate": 0.001, "loss": 2.7122, "step": 57432 }, { "epoch": 11.03, "learning_rate": 0.001, "loss": 2.6863, "step": 57444 }, { "epoch": 11.03, "learning_rate": 0.001, "loss": 2.6929, "step": 57456 }, { "epoch": 11.03, "learning_rate": 0.001, "loss": 2.7005, "step": 57468 }, { "epoch": 11.04, "learning_rate": 0.001, "loss": 2.6949, "step": 57480 }, { "epoch": 11.04, "learning_rate": 0.001, "loss": 2.6917, "step": 57492 }, { "epoch": 11.04, "eval_ag_news_accuracy": 0.3069375, "eval_ag_news_bleu_score": 4.522254818587462, "eval_ag_news_bleu_score_sem": 0.14588636050961132, "eval_ag_news_emb_cos_sim": 0.7798112630844116, "eval_ag_news_emb_cos_sim_sem": 0.008509723327409333, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7473461627960205, "eval_ag_news_n_ngrams_match_1": 13.038, "eval_ag_news_n_ngrams_match_2": 2.792, "eval_ag_news_n_ngrams_match_3": 0.802, "eval_ag_news_num_pred_words": 46.37, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 42.408387573169776, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3190959353446634, "eval_ag_news_runtime": 9.9401, "eval_ag_news_samples_per_second": 50.302, "eval_ag_news_steps_per_second": 0.101, "eval_ag_news_token_set_f1": 0.32775963799587426, "eval_ag_news_token_set_f1_sem": 0.004329120398457251, "eval_ag_news_token_set_precision": 0.3089203197509194, "eval_ag_news_token_set_recall": 0.36637217420744195, "eval_ag_news_true_num_tokens": 56.09375, "step": 57500 }, { "epoch": 11.04, "eval_anthropic_toxic_prompts_accuracy": 0.1070625, "eval_anthropic_toxic_prompts_bleu_score": 2.816111668498021, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12483843373506187, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6405113935470581, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01103784013215156, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4244728088378906, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.664, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.634, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.578, "eval_anthropic_toxic_prompts_num_pred_words": 47.236, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 30.706452413290766, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19553180148485383, "eval_anthropic_toxic_prompts_runtime": 10.2, "eval_anthropic_toxic_prompts_samples_per_second": 49.019, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.33069441523904464, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063617832518983244, "eval_anthropic_toxic_prompts_token_set_precision": 0.3933402184179548, "eval_anthropic_toxic_prompts_token_set_recall": 0.3127276552208312, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 57500 }, { "epoch": 11.04, "eval_arxiv_accuracy": 0.3309375, "eval_arxiv_bleu_score": 3.9364015802778085, "eval_arxiv_bleu_score_sem": 0.11594943685436013, "eval_arxiv_emb_cos_sim": 0.7262336015701294, "eval_arxiv_emb_cos_sim_sem": 0.007992539349332153, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.6041362285614014, "eval_arxiv_n_ngrams_match_1": 14.014, "eval_arxiv_n_ngrams_match_2": 2.598, "eval_arxiv_n_ngrams_match_3": 0.536, "eval_arxiv_num_pred_words": 40.616, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 36.74992660674221, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33120402656786474, "eval_arxiv_runtime": 10.3325, "eval_arxiv_samples_per_second": 48.391, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.32525667067913205, "eval_arxiv_token_set_f1_sem": 0.004185553357486638, "eval_arxiv_token_set_precision": 0.2749939755144048, "eval_arxiv_token_set_recall": 0.4177945078793143, "eval_arxiv_true_num_tokens": 64.0, "step": 57500 }, { "epoch": 11.04, "eval_python_code_alpaca_accuracy": 0.147875, "eval_python_code_alpaca_bleu_score": 3.8843343841007663, "eval_python_code_alpaca_bleu_score_sem": 0.12653085280243534, "eval_python_code_alpaca_emb_cos_sim": 0.7262643575668335, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008864375886738347, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0930442810058594, "eval_python_code_alpaca_n_ngrams_match_1": 8.996, "eval_python_code_alpaca_n_ngrams_match_2": 2.416, "eval_python_code_alpaca_n_ngrams_match_3": 0.72, "eval_python_code_alpaca_num_pred_words": 44.074, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 22.04408431812774, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2957196143145694, "eval_python_code_alpaca_runtime": 10.4063, "eval_python_code_alpaca_samples_per_second": 48.048, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.445763661886549, "eval_python_code_alpaca_token_set_f1_sem": 0.00555026881762446, "eval_python_code_alpaca_token_set_precision": 0.49228168618232554, "eval_python_code_alpaca_token_set_recall": 0.43314921243799787, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 57500 }, { "epoch": 11.04, "eval_wikibio_accuracy": 0.3026875, "eval_wikibio_bleu_score": 5.283167879296112, "eval_wikibio_bleu_score_sem": 0.19151798652254653, "eval_wikibio_emb_cos_sim": 0.7082334756851196, "eval_wikibio_emb_cos_sim_sem": 0.010872272164677085, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.0369110107421875, "eval_wikibio_n_ngrams_match_1": 9.636, "eval_wikibio_n_ngrams_match_2": 3.094, "eval_wikibio_n_ngrams_match_3": 1.058, "eval_wikibio_num_pred_words": 36.9, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 56.651077678392404, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32576161499551753, "eval_wikibio_runtime": 9.6811, "eval_wikibio_samples_per_second": 51.647, "eval_wikibio_steps_per_second": 0.103, "eval_wikibio_token_set_f1": 0.3044536294926807, "eval_wikibio_token_set_f1_sem": 0.005412713035116527, "eval_wikibio_token_set_precision": 0.31294918951732376, "eval_wikibio_token_set_recall": 0.31493506428632556, "eval_wikibio_true_num_tokens": 61.1328125, "step": 57500 }, { "epoch": 11.04, "eval_nq_accuracy": 0.5053125, "eval_nq_bleu_score": 10.650641997919385, "eval_nq_bleu_score_sem": 0.46356261621733846, "eval_nq_emb_cos_sim": 0.8047137260437012, "eval_nq_emb_cos_sim_sem": 0.008218507043445279, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.340418815612793, "eval_nq_n_ngrams_match_1": 21.958, "eval_nq_n_ngrams_match_2": 7.73, "eval_nq_n_ngrams_match_3": 3.472, "eval_nq_num_pred_words": 48.902, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.38558529727981, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4205499582025919, "eval_nq_runtime": 12.9034, "eval_nq_samples_per_second": 38.749, "eval_nq_steps_per_second": 0.077, "eval_nq_token_set_f1": 0.4387483533119308, "eval_nq_token_set_f1_sem": 0.005354152301822755, "eval_nq_token_set_precision": 0.3933372177800882, "eval_nq_token_set_recall": 0.507979614733193, "eval_nq_true_num_tokens": 64.0, "step": 57500 }, { "epoch": 11.04, "learning_rate": 0.001, "loss": 2.7055, "step": 57504 }, { "epoch": 11.04, "learning_rate": 0.001, "loss": 2.6927, "step": 57516 }, { "epoch": 11.05, "learning_rate": 0.001, "loss": 2.6993, "step": 57528 }, { "epoch": 11.05, "learning_rate": 0.001, "loss": 2.7009, "step": 57540 }, { "epoch": 11.05, "learning_rate": 0.001, "loss": 2.7015, "step": 57552 }, { "epoch": 11.05, "learning_rate": 0.001, "loss": 2.7043, "step": 57564 }, { "epoch": 11.06, "learning_rate": 0.001, "loss": 2.7079, "step": 57576 }, { "epoch": 11.06, "learning_rate": 0.001, "loss": 2.7037, "step": 57588 }, { "epoch": 11.06, "learning_rate": 0.001, "loss": 2.7006, "step": 57600 }, { "epoch": 11.06, "learning_rate": 0.001, "loss": 2.7177, "step": 57612 }, { "epoch": 11.06, "learning_rate": 0.001, "loss": 2.7178, "step": 57624 }, { "epoch": 11.07, "learning_rate": 0.001, "loss": 2.7079, "step": 57636 }, { "epoch": 11.07, "learning_rate": 0.001, "loss": 2.6982, "step": 57648 }, { "epoch": 11.07, "learning_rate": 0.001, "loss": 2.7038, "step": 57660 }, { "epoch": 11.07, "learning_rate": 0.001, "loss": 2.7082, "step": 57672 }, { "epoch": 11.08, "learning_rate": 0.001, "loss": 2.7047, "step": 57684 }, { "epoch": 11.08, "learning_rate": 0.001, "loss": 2.7001, "step": 57696 }, { "epoch": 11.08, "learning_rate": 0.001, "loss": 2.6992, "step": 57708 }, { "epoch": 11.08, "learning_rate": 0.001, "loss": 2.7006, "step": 57720 }, { "epoch": 11.09, "learning_rate": 0.001, "loss": 2.7021, "step": 57732 }, { "epoch": 11.09, "learning_rate": 0.001, "loss": 2.709, "step": 57744 }, { "epoch": 11.09, "learning_rate": 0.001, "loss": 2.7006, "step": 57756 }, { "epoch": 11.09, "learning_rate": 0.001, "loss": 2.6971, "step": 57768 }, { "epoch": 11.09, "learning_rate": 0.001, "loss": 2.712, "step": 57780 }, { "epoch": 11.1, "learning_rate": 0.001, "loss": 2.6994, "step": 57792 }, { "epoch": 11.1, "learning_rate": 0.001, "loss": 2.7093, "step": 57804 }, { "epoch": 11.1, "learning_rate": 0.001, "loss": 2.695, "step": 57816 }, { "epoch": 11.1, "learning_rate": 0.001, "loss": 2.7148, "step": 57828 }, { "epoch": 11.11, "learning_rate": 0.001, "loss": 2.7063, "step": 57840 }, { "epoch": 11.11, "learning_rate": 0.001, "loss": 2.6923, "step": 57852 }, { "epoch": 11.11, "learning_rate": 0.001, "loss": 2.7013, "step": 57864 }, { "epoch": 11.11, "learning_rate": 0.001, "loss": 2.7027, "step": 57876 }, { "epoch": 11.12, "learning_rate": 0.001, "loss": 2.7069, "step": 57888 }, { "epoch": 11.12, "learning_rate": 0.001, "loss": 2.6966, "step": 57900 }, { "epoch": 11.12, "learning_rate": 0.001, "loss": 2.7048, "step": 57912 }, { "epoch": 11.12, "learning_rate": 0.001, "loss": 2.7071, "step": 57924 }, { "epoch": 11.12, "learning_rate": 0.001, "loss": 2.698, "step": 57936 }, { "epoch": 11.13, "learning_rate": 0.001, "loss": 2.6979, "step": 57948 }, { "epoch": 11.13, "learning_rate": 0.001, "loss": 2.7083, "step": 57960 }, { "epoch": 11.13, "learning_rate": 0.001, "loss": 2.7057, "step": 57972 }, { "epoch": 11.13, "learning_rate": 0.001, "loss": 2.7064, "step": 57984 }, { "epoch": 11.14, "learning_rate": 0.001, "loss": 2.7042, "step": 57996 }, { "epoch": 11.14, "learning_rate": 0.001, "loss": 2.7079, "step": 58008 }, { "epoch": 11.14, "learning_rate": 0.001, "loss": 2.7114, "step": 58020 }, { "epoch": 11.14, "learning_rate": 0.001, "loss": 2.7102, "step": 58032 }, { "epoch": 11.15, "learning_rate": 0.001, "loss": 2.6982, "step": 58044 }, { "epoch": 11.15, "learning_rate": 0.001, "loss": 2.6971, "step": 58056 }, { "epoch": 11.15, "learning_rate": 0.001, "loss": 2.6973, "step": 58068 }, { "epoch": 11.15, "learning_rate": 0.001, "loss": 2.7083, "step": 58080 }, { "epoch": 11.15, "learning_rate": 0.001, "loss": 2.6956, "step": 58092 }, { "epoch": 11.16, "learning_rate": 0.001, "loss": 2.7041, "step": 58104 }, { "epoch": 11.16, "learning_rate": 0.001, "loss": 2.6981, "step": 58116 }, { "epoch": 11.16, "eval_ag_news_accuracy": 0.30684375, "eval_ag_news_bleu_score": 4.404854953079719, "eval_ag_news_bleu_score_sem": 0.13991467928055173, "eval_ag_news_emb_cos_sim": 0.772075891494751, "eval_ag_news_emb_cos_sim_sem": 0.009697400445022071, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7400405406951904, "eval_ag_news_n_ngrams_match_1": 13.006, "eval_ag_news_n_ngrams_match_2": 2.78, "eval_ag_news_n_ngrams_match_3": 0.75, "eval_ag_news_num_pred_words": 46.46, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 42.09969688137981, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3234550037144449, "eval_ag_news_runtime": 10.4843, "eval_ag_news_samples_per_second": 47.69, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.329440871819913, "eval_ag_news_token_set_f1_sem": 0.004309609570861408, "eval_ag_news_token_set_precision": 0.31012448042069374, "eval_ag_news_token_set_recall": 0.3685082984965904, "eval_ag_news_true_num_tokens": 56.09375, "step": 58125 }, { "epoch": 11.16, "eval_anthropic_toxic_prompts_accuracy": 0.108625, "eval_anthropic_toxic_prompts_bleu_score": 2.7546323647445705, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11088759228191522, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6415292024612427, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010219925219058993, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4118399620056152, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.676, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.626, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.54, "eval_anthropic_toxic_prompts_num_pred_words": 47.426, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 30.320982421253134, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19711881354469407, "eval_anthropic_toxic_prompts_runtime": 10.9118, "eval_anthropic_toxic_prompts_samples_per_second": 45.822, "eval_anthropic_toxic_prompts_steps_per_second": 0.092, "eval_anthropic_toxic_prompts_token_set_f1": 0.3362022816654852, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062065387337703875, "eval_anthropic_toxic_prompts_token_set_precision": 0.40087196572619965, "eval_anthropic_toxic_prompts_token_set_recall": 0.3169868091230833, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 58125 }, { "epoch": 11.16, "eval_arxiv_accuracy": 0.33078125, "eval_arxiv_bleu_score": 3.878411244471973, "eval_arxiv_bleu_score_sem": 0.10709292701408764, "eval_arxiv_emb_cos_sim": 0.7227597832679749, "eval_arxiv_emb_cos_sim_sem": 0.007836613131999052, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5966672897338867, "eval_arxiv_n_ngrams_match_1": 13.876, "eval_arxiv_n_ngrams_match_2": 2.486, "eval_arxiv_n_ngrams_match_3": 0.528, "eval_arxiv_num_pred_words": 40.04, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 36.47646615394681, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3311057215710334, "eval_arxiv_runtime": 16.8109, "eval_arxiv_samples_per_second": 29.743, "eval_arxiv_steps_per_second": 0.059, "eval_arxiv_token_set_f1": 0.3261414864720136, "eval_arxiv_token_set_f1_sem": 0.003999993953918227, "eval_arxiv_token_set_precision": 0.27435228024569336, "eval_arxiv_token_set_recall": 0.42395166930538597, "eval_arxiv_true_num_tokens": 64.0, "step": 58125 }, { "epoch": 11.16, "eval_python_code_alpaca_accuracy": 0.14975, "eval_python_code_alpaca_bleu_score": 3.875740023336171, "eval_python_code_alpaca_bleu_score_sem": 0.11476498520429856, "eval_python_code_alpaca_emb_cos_sim": 0.7398732900619507, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007619849255929493, "eval_python_code_alpaca_emb_top1_equal": 0.0703125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.022687306110270106, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0788724422454834, "eval_python_code_alpaca_n_ngrams_match_1": 9.208, "eval_python_code_alpaca_n_ngrams_match_2": 2.468, "eval_python_code_alpaca_n_ngrams_match_3": 0.708, "eval_python_code_alpaca_num_pred_words": 43.934, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.733882367321574, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3066312064796719, "eval_python_code_alpaca_runtime": 9.8935, "eval_python_code_alpaca_samples_per_second": 50.538, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4491762975567623, "eval_python_code_alpaca_token_set_f1_sem": 0.00521366702632569, "eval_python_code_alpaca_token_set_precision": 0.498575835264117, "eval_python_code_alpaca_token_set_recall": 0.43109428207429284, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 58125 }, { "epoch": 11.16, "eval_wikibio_accuracy": 0.30409375, "eval_wikibio_bleu_score": 5.14882176265263, "eval_wikibio_bleu_score_sem": 0.18901377929551888, "eval_wikibio_emb_cos_sim": 0.7046616673469543, "eval_wikibio_emb_cos_sim_sem": 0.01085805748147228, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.009243488311768, "eval_wikibio_n_ngrams_match_1": 9.352, "eval_wikibio_n_ngrams_match_2": 2.99, "eval_wikibio_n_ngrams_match_3": 1.022, "eval_wikibio_num_pred_words": 36.03, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 55.10516708788541, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3221732509451103, "eval_wikibio_runtime": 9.6314, "eval_wikibio_samples_per_second": 51.913, "eval_wikibio_steps_per_second": 0.104, "eval_wikibio_token_set_f1": 0.2962359261040241, "eval_wikibio_token_set_f1_sem": 0.005827731273917247, "eval_wikibio_token_set_precision": 0.3029042606347466, "eval_wikibio_token_set_recall": 0.30852043392029155, "eval_wikibio_true_num_tokens": 61.1328125, "step": 58125 }, { "epoch": 11.16, "eval_nq_accuracy": 0.50609375, "eval_nq_bleu_score": 10.685042045736118, "eval_nq_bleu_score_sem": 0.46940046219238923, "eval_nq_emb_cos_sim": 0.8148090839385986, "eval_nq_emb_cos_sim_sem": 0.007645845958926618, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3385086059570312, "eval_nq_n_ngrams_match_1": 22.344, "eval_nq_n_ngrams_match_2": 7.904, "eval_nq_n_ngrams_match_3": 3.482, "eval_nq_num_pred_words": 49.13, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.365765587891014, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4287459749369873, "eval_nq_runtime": 10.5752, "eval_nq_samples_per_second": 47.28, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.444798741609737, "eval_nq_token_set_f1_sem": 0.005066460286545068, "eval_nq_token_set_precision": 0.40014016363801125, "eval_nq_token_set_recall": 0.5106069938321136, "eval_nq_true_num_tokens": 64.0, "step": 58125 }, { "epoch": 11.16, "learning_rate": 0.001, "loss": 2.6906, "step": 58128 }, { "epoch": 11.16, "learning_rate": 0.001, "loss": 2.7075, "step": 58140 }, { "epoch": 11.17, "learning_rate": 0.001, "loss": 2.7106, "step": 58152 }, { "epoch": 11.17, "learning_rate": 0.001, "loss": 2.7008, "step": 58164 }, { "epoch": 11.17, "learning_rate": 0.001, "loss": 2.7016, "step": 58176 }, { "epoch": 11.17, "learning_rate": 0.001, "loss": 2.697, "step": 58188 }, { "epoch": 11.18, "learning_rate": 0.001, "loss": 2.6962, "step": 58200 }, { "epoch": 11.18, "learning_rate": 0.001, "loss": 2.6975, "step": 58212 }, { "epoch": 11.18, "learning_rate": 0.001, "loss": 2.6966, "step": 58224 }, { "epoch": 11.18, "learning_rate": 0.001, "loss": 2.6928, "step": 58236 }, { "epoch": 11.18, "learning_rate": 0.001, "loss": 2.7072, "step": 58248 }, { "epoch": 11.19, "learning_rate": 0.001, "loss": 2.6941, "step": 58260 }, { "epoch": 11.19, "learning_rate": 0.001, "loss": 2.7011, "step": 58272 }, { "epoch": 11.19, "learning_rate": 0.001, "loss": 2.7051, "step": 58284 }, { "epoch": 11.19, "learning_rate": 0.001, "loss": 2.7027, "step": 58296 }, { "epoch": 11.2, "learning_rate": 0.001, "loss": 2.7041, "step": 58308 }, { "epoch": 11.2, "learning_rate": 0.001, "loss": 2.7105, "step": 58320 }, { "epoch": 11.2, "learning_rate": 0.001, "loss": 2.711, "step": 58332 }, { "epoch": 11.2, "learning_rate": 0.001, "loss": 2.6981, "step": 58344 }, { "epoch": 11.21, "learning_rate": 0.001, "loss": 2.7, "step": 58356 }, { "epoch": 11.21, "learning_rate": 0.001, "loss": 2.705, "step": 58368 }, { "epoch": 11.21, "learning_rate": 0.001, "loss": 2.711, "step": 58380 }, { "epoch": 11.21, "learning_rate": 0.001, "loss": 2.7122, "step": 58392 }, { "epoch": 11.21, "learning_rate": 0.001, "loss": 2.7075, "step": 58404 }, { "epoch": 11.22, "learning_rate": 0.001, "loss": 2.6989, "step": 58416 }, { "epoch": 11.22, "learning_rate": 0.001, "loss": 2.7039, "step": 58428 }, { "epoch": 11.22, "learning_rate": 0.001, "loss": 2.7076, "step": 58440 }, { "epoch": 11.22, "learning_rate": 0.001, "loss": 2.7064, "step": 58452 }, { "epoch": 11.23, "learning_rate": 0.001, "loss": 2.7056, "step": 58464 }, { "epoch": 11.23, "learning_rate": 0.001, "loss": 2.7038, "step": 58476 }, { "epoch": 11.23, "learning_rate": 0.001, "loss": 2.7, "step": 58488 }, { "epoch": 11.23, "learning_rate": 0.001, "loss": 2.7025, "step": 58500 }, { "epoch": 11.24, "learning_rate": 0.001, "loss": 2.6966, "step": 58512 }, { "epoch": 11.24, "learning_rate": 0.001, "loss": 2.7084, "step": 58524 }, { "epoch": 11.24, "learning_rate": 0.001, "loss": 2.7168, "step": 58536 }, { "epoch": 11.24, "learning_rate": 0.001, "loss": 2.7072, "step": 58548 }, { "epoch": 11.24, "learning_rate": 0.001, "loss": 2.6973, "step": 58560 }, { "epoch": 11.25, "learning_rate": 0.001, "loss": 2.7022, "step": 58572 }, { "epoch": 11.25, "learning_rate": 0.001, "loss": 2.699, "step": 58584 }, { "epoch": 11.25, "learning_rate": 0.001, "loss": 2.7046, "step": 58596 }, { "epoch": 11.25, "learning_rate": 0.001, "loss": 2.7215, "step": 58608 }, { "epoch": 11.26, "learning_rate": 0.001, "loss": 2.6986, "step": 58620 }, { "epoch": 11.26, "learning_rate": 0.001, "loss": 2.7048, "step": 58632 }, { "epoch": 11.26, "learning_rate": 0.001, "loss": 2.6905, "step": 58644 }, { "epoch": 11.26, "learning_rate": 0.001, "loss": 2.7061, "step": 58656 }, { "epoch": 11.26, "learning_rate": 0.001, "loss": 2.7046, "step": 58668 }, { "epoch": 11.27, "learning_rate": 0.001, "loss": 2.7047, "step": 58680 }, { "epoch": 11.27, "learning_rate": 0.001, "loss": 2.7048, "step": 58692 }, { "epoch": 11.27, "learning_rate": 0.001, "loss": 2.7144, "step": 58704 }, { "epoch": 11.27, "learning_rate": 0.001, "loss": 2.6919, "step": 58716 }, { "epoch": 11.28, "learning_rate": 0.001, "loss": 2.7014, "step": 58728 }, { "epoch": 11.28, "learning_rate": 0.001, "loss": 2.7059, "step": 58740 }, { "epoch": 11.28, "eval_ag_news_accuracy": 0.30684375, "eval_ag_news_bleu_score": 4.5296564200494105, "eval_ag_news_bleu_score_sem": 0.15152853801023605, "eval_ag_news_emb_cos_sim": 0.7873067855834961, "eval_ag_news_emb_cos_sim_sem": 0.008036674343712646, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7307772636413574, "eval_ag_news_n_ngrams_match_1": 13.292, "eval_ag_news_n_ngrams_match_2": 2.762, "eval_ag_news_n_ngrams_match_3": 0.768, "eval_ag_news_num_pred_words": 46.774, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 41.711516412654476, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.32425441213985295, "eval_ag_news_runtime": 11.3499, "eval_ag_news_samples_per_second": 44.053, "eval_ag_news_steps_per_second": 0.088, "eval_ag_news_token_set_f1": 0.3333053712726195, "eval_ag_news_token_set_f1_sem": 0.004264557480845165, "eval_ag_news_token_set_precision": 0.31513790073234066, "eval_ag_news_token_set_recall": 0.36704357239580815, "eval_ag_news_true_num_tokens": 56.09375, "step": 58750 }, { "epoch": 11.28, "eval_anthropic_toxic_prompts_accuracy": 0.1068125, "eval_anthropic_toxic_prompts_bleu_score": 2.9010616846179484, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12660661076898555, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6553249955177307, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009287884542733018, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4395790100097656, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.848, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.704, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.582, "eval_anthropic_toxic_prompts_num_pred_words": 47.102, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.173831534379364, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20190160930748008, "eval_anthropic_toxic_prompts_runtime": 9.4323, "eval_anthropic_toxic_prompts_samples_per_second": 53.009, "eval_anthropic_toxic_prompts_steps_per_second": 0.106, "eval_anthropic_toxic_prompts_token_set_f1": 0.34172570020486376, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006473508333206387, "eval_anthropic_toxic_prompts_token_set_precision": 0.4151872571029492, "eval_anthropic_toxic_prompts_token_set_recall": 0.31911770200456474, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 58750 }, { "epoch": 11.28, "eval_arxiv_accuracy": 0.33109375, "eval_arxiv_bleu_score": 3.9621848956276606, "eval_arxiv_bleu_score_sem": 0.10961241137745281, "eval_arxiv_emb_cos_sim": 0.7240477800369263, "eval_arxiv_emb_cos_sim_sem": 0.007706940450021191, "eval_arxiv_emb_top1_equal": 0.203125, "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.59248685836792, "eval_arxiv_n_ngrams_match_1": 14.056, "eval_arxiv_n_ngrams_match_2": 2.632, "eval_arxiv_n_ngrams_match_3": 0.586, "eval_arxiv_num_pred_words": 40.116, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 36.32429707851407, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33320154360712884, "eval_arxiv_runtime": 9.8297, "eval_arxiv_samples_per_second": 50.866, "eval_arxiv_steps_per_second": 0.102, "eval_arxiv_token_set_f1": 0.3285609237336241, "eval_arxiv_token_set_f1_sem": 0.00414196208913151, "eval_arxiv_token_set_precision": 0.27753659123686825, "eval_arxiv_token_set_recall": 0.42119789710111016, "eval_arxiv_true_num_tokens": 64.0, "step": 58750 }, { "epoch": 11.28, "eval_python_code_alpaca_accuracy": 0.1483125, "eval_python_code_alpaca_bleu_score": 3.9916814912080887, "eval_python_code_alpaca_bleu_score_sem": 0.12696243628711373, "eval_python_code_alpaca_emb_cos_sim": 0.7326651811599731, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008663690374831185, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0883514881134033, "eval_python_code_alpaca_n_ngrams_match_1": 9.098, "eval_python_code_alpaca_n_ngrams_match_2": 2.432, "eval_python_code_alpaca_n_ngrams_match_3": 0.742, "eval_python_code_alpaca_num_pred_words": 43.764, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.94087834744448, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3029695969529678, "eval_python_code_alpaca_runtime": 10.8584, "eval_python_code_alpaca_samples_per_second": 46.047, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.4474930196277289, "eval_python_code_alpaca_token_set_f1_sem": 0.005635936224002089, "eval_python_code_alpaca_token_set_precision": 0.49212596973618433, "eval_python_code_alpaca_token_set_recall": 0.4378380005002488, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 58750 }, { "epoch": 11.28, "eval_wikibio_accuracy": 0.30625, "eval_wikibio_bleu_score": 5.284456659195724, "eval_wikibio_bleu_score_sem": 0.19164121299613252, "eval_wikibio_emb_cos_sim": 0.7153770327568054, "eval_wikibio_emb_cos_sim_sem": 0.011638356990813383, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9758846759796143, "eval_wikibio_n_ngrams_match_1": 9.524, "eval_wikibio_n_ngrams_match_2": 3.05, "eval_wikibio_n_ngrams_match_3": 1.076, "eval_wikibio_num_pred_words": 36.496, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 53.297246849364434, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32919887920883045, "eval_wikibio_runtime": 9.5054, "eval_wikibio_samples_per_second": 52.601, "eval_wikibio_steps_per_second": 0.105, "eval_wikibio_token_set_f1": 0.2996793878624288, "eval_wikibio_token_set_f1_sem": 0.005715281630065503, "eval_wikibio_token_set_precision": 0.30903652503095136, "eval_wikibio_token_set_recall": 0.30777861230276893, "eval_wikibio_true_num_tokens": 61.1328125, "step": 58750 }, { "epoch": 11.28, "eval_nq_accuracy": 0.50759375, "eval_nq_bleu_score": 10.752475591995346, "eval_nq_bleu_score_sem": 0.4589472595975013, "eval_nq_emb_cos_sim": 0.8130066394805908, "eval_nq_emb_cos_sim_sem": 0.007647548955024262, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3384242057800293, "eval_nq_n_ngrams_match_1": 22.354, "eval_nq_n_ngrams_match_2": 7.916, "eval_nq_n_ngrams_match_3": 3.52, "eval_nq_num_pred_words": 49.122, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.36489075235929, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4286441479224601, "eval_nq_runtime": 15.4561, "eval_nq_samples_per_second": 32.35, "eval_nq_steps_per_second": 0.065, "eval_nq_token_set_f1": 0.44749500085607663, "eval_nq_token_set_f1_sem": 0.004987511294251989, "eval_nq_token_set_precision": 0.4021669948134483, "eval_nq_token_set_recall": 0.5153262770335792, "eval_nq_true_num_tokens": 64.0, "step": 58750 }, { "epoch": 11.28, "learning_rate": 0.001, "loss": 2.7045, "step": 58752 }, { "epoch": 11.28, "learning_rate": 0.001, "loss": 2.7014, "step": 58764 }, { "epoch": 11.29, "learning_rate": 0.001, "loss": 2.7051, "step": 58776 }, { "epoch": 11.29, "learning_rate": 0.001, "loss": 2.7063, "step": 58788 }, { "epoch": 11.29, "learning_rate": 0.001, "loss": 2.7027, "step": 58800 }, { "epoch": 11.29, "learning_rate": 0.001, "loss": 2.6969, "step": 58812 }, { "epoch": 11.29, "learning_rate": 0.001, "loss": 2.7054, "step": 58824 }, { "epoch": 11.3, "learning_rate": 0.001, "loss": 2.7125, "step": 58836 }, { "epoch": 11.3, "learning_rate": 0.001, "loss": 2.6916, "step": 58848 }, { "epoch": 11.3, "learning_rate": 0.001, "loss": 2.7025, "step": 58860 }, { "epoch": 11.3, "learning_rate": 0.001, "loss": 2.7124, "step": 58872 }, { "epoch": 11.31, "learning_rate": 0.001, "loss": 2.7008, "step": 58884 }, { "epoch": 11.31, "learning_rate": 0.001, "loss": 2.7055, "step": 58896 }, { "epoch": 11.31, "learning_rate": 0.001, "loss": 2.7092, "step": 58908 }, { "epoch": 11.31, "learning_rate": 0.001, "loss": 2.7101, "step": 58920 }, { "epoch": 11.32, "learning_rate": 0.001, "loss": 2.7039, "step": 58932 }, { "epoch": 11.32, "learning_rate": 0.001, "loss": 2.7004, "step": 58944 }, { "epoch": 11.32, "learning_rate": 0.001, "loss": 2.719, "step": 58956 }, { "epoch": 11.32, "learning_rate": 0.001, "loss": 2.7028, "step": 58968 }, { "epoch": 11.32, "learning_rate": 0.001, "loss": 2.7054, "step": 58980 }, { "epoch": 11.33, "learning_rate": 0.001, "loss": 2.6897, "step": 58992 }, { "epoch": 11.33, "learning_rate": 0.001, "loss": 2.6987, "step": 59004 }, { "epoch": 11.33, "learning_rate": 0.001, "loss": 2.6915, "step": 59016 }, { "epoch": 11.33, "learning_rate": 0.001, "loss": 2.7117, "step": 59028 }, { "epoch": 11.34, "learning_rate": 0.001, "loss": 2.704, "step": 59040 }, { "epoch": 11.34, "learning_rate": 0.001, "loss": 2.7062, "step": 59052 }, { "epoch": 11.34, "learning_rate": 0.001, "loss": 2.7133, "step": 59064 }, { "epoch": 11.34, "learning_rate": 0.001, "loss": 2.7066, "step": 59076 }, { "epoch": 11.35, "learning_rate": 0.001, "loss": 2.7077, "step": 59088 }, { "epoch": 11.35, "learning_rate": 0.001, "loss": 2.7007, "step": 59100 }, { "epoch": 11.35, "learning_rate": 0.001, "loss": 2.7012, "step": 59112 }, { "epoch": 11.35, "learning_rate": 0.001, "loss": 2.7049, "step": 59124 }, { "epoch": 11.35, "learning_rate": 0.001, "loss": 2.7127, "step": 59136 }, { "epoch": 11.36, "learning_rate": 0.001, "loss": 2.6985, "step": 59148 }, { "epoch": 11.36, "learning_rate": 0.001, "loss": 2.6989, "step": 59160 }, { "epoch": 11.36, "learning_rate": 0.001, "loss": 2.7088, "step": 59172 }, { "epoch": 11.36, "learning_rate": 0.001, "loss": 2.6962, "step": 59184 }, { "epoch": 11.37, "learning_rate": 0.001, "loss": 2.7118, "step": 59196 }, { "epoch": 11.37, "learning_rate": 0.001, "loss": 2.7067, "step": 59208 }, { "epoch": 11.37, "learning_rate": 0.001, "loss": 2.7033, "step": 59220 }, { "epoch": 11.37, "learning_rate": 0.001, "loss": 2.7015, "step": 59232 }, { "epoch": 11.38, "learning_rate": 0.001, "loss": 2.7106, "step": 59244 }, { "epoch": 11.38, "learning_rate": 0.001, "loss": 2.6929, "step": 59256 }, { "epoch": 11.38, "learning_rate": 0.001, "loss": 2.6965, "step": 59268 }, { "epoch": 11.38, "learning_rate": 0.001, "loss": 2.6972, "step": 59280 }, { "epoch": 11.38, "learning_rate": 0.001, "loss": 2.6967, "step": 59292 }, { "epoch": 11.39, "learning_rate": 0.001, "loss": 2.7053, "step": 59304 }, { "epoch": 11.39, "learning_rate": 0.001, "loss": 2.6973, "step": 59316 }, { "epoch": 11.39, "learning_rate": 0.001, "loss": 2.7055, "step": 59328 }, { "epoch": 11.39, "learning_rate": 0.001, "loss": 2.7058, "step": 59340 }, { "epoch": 11.4, "learning_rate": 0.001, "loss": 2.6999, "step": 59352 }, { "epoch": 11.4, "learning_rate": 0.001, "loss": 2.696, "step": 59364 }, { "epoch": 11.4, "eval_ag_news_accuracy": 0.30625, "eval_ag_news_bleu_score": 4.431863173132895, "eval_ag_news_bleu_score_sem": 0.14530038639315268, "eval_ag_news_emb_cos_sim": 0.7869585156440735, "eval_ag_news_emb_cos_sim_sem": 0.007882709271924318, "eval_ag_news_emb_top1_equal": 0.1953125, "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7377586364746094, "eval_ag_news_n_ngrams_match_1": 13.234, "eval_ag_news_n_ngrams_match_2": 2.724, "eval_ag_news_n_ngrams_match_3": 0.774, "eval_ag_news_num_pred_words": 46.442, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 42.00373893044607, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3246625035059656, "eval_ag_news_runtime": 9.7782, "eval_ag_news_samples_per_second": 51.134, "eval_ag_news_steps_per_second": 0.102, "eval_ag_news_token_set_f1": 0.33345497884835773, "eval_ag_news_token_set_f1_sem": 0.004357801549242252, "eval_ag_news_token_set_precision": 0.3132578723680101, "eval_ag_news_token_set_recall": 0.37357079520019015, "eval_ag_news_true_num_tokens": 56.09375, "step": 59375 }, { "epoch": 11.4, "eval_anthropic_toxic_prompts_accuracy": 0.10546875, "eval_anthropic_toxic_prompts_bleu_score": 2.7997797982849897, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10897744782255879, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6400052309036255, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009610886794463874, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4370694160461426, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.654, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.656, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.588, "eval_anthropic_toxic_prompts_num_pred_words": 47.252, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 31.095695960228852, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19881881976881854, "eval_anthropic_toxic_prompts_runtime": 9.9887, "eval_anthropic_toxic_prompts_samples_per_second": 50.057, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3414458554745744, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006301672437152909, "eval_anthropic_toxic_prompts_token_set_precision": 0.40264846430668666, "eval_anthropic_toxic_prompts_token_set_recall": 0.327145834464341, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 59375 }, { "epoch": 11.4, "eval_arxiv_accuracy": 0.33096875, "eval_arxiv_bleu_score": 3.8984078397780997, "eval_arxiv_bleu_score_sem": 0.1136108171750166, "eval_arxiv_emb_cos_sim": 0.7153143882751465, "eval_arxiv_emb_cos_sim_sem": 0.008298240978293795, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5796284675598145, "eval_arxiv_n_ngrams_match_1": 13.746, "eval_arxiv_n_ngrams_match_2": 2.56, "eval_arxiv_n_ngrams_match_3": 0.548, "eval_arxiv_num_pred_words": 39.7, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 35.860215138513546, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.32793192745391275, "eval_arxiv_runtime": 9.7033, "eval_arxiv_samples_per_second": 51.529, "eval_arxiv_steps_per_second": 0.103, "eval_arxiv_token_set_f1": 0.3250715510328737, "eval_arxiv_token_set_f1_sem": 0.0042098734068675625, "eval_arxiv_token_set_precision": 0.27413619203129114, "eval_arxiv_token_set_recall": 0.42157927825665653, "eval_arxiv_true_num_tokens": 64.0, "step": 59375 }, { "epoch": 11.4, "eval_python_code_alpaca_accuracy": 0.1486875, "eval_python_code_alpaca_bleu_score": 3.9670491992170747, "eval_python_code_alpaca_bleu_score_sem": 0.12656978363115465, "eval_python_code_alpaca_emb_cos_sim": 0.7320708632469177, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008293936731389619, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0828518867492676, "eval_python_code_alpaca_n_ngrams_match_1": 9.064, "eval_python_code_alpaca_n_ngrams_match_2": 2.458, "eval_python_code_alpaca_n_ngrams_match_3": 0.692, "eval_python_code_alpaca_num_pred_words": 43.398, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.82054346320137, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3076708404043146, "eval_python_code_alpaca_runtime": 9.6604, "eval_python_code_alpaca_samples_per_second": 51.757, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.45303357487297796, "eval_python_code_alpaca_token_set_f1_sem": 0.005460600680863856, "eval_python_code_alpaca_token_set_precision": 0.4877758500785313, "eval_python_code_alpaca_token_set_recall": 0.4481852572607296, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 59375 }, { "epoch": 11.4, "eval_wikibio_accuracy": 0.30703125, "eval_wikibio_bleu_score": 5.251511457631601, "eval_wikibio_bleu_score_sem": 0.18454730950711926, "eval_wikibio_emb_cos_sim": 0.7073708772659302, "eval_wikibio_emb_cos_sim_sem": 0.011683425142761689, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 4.022034645080566, "eval_wikibio_n_ngrams_match_1": 9.514, "eval_wikibio_n_ngrams_match_2": 3.102, "eval_wikibio_n_ngrams_match_3": 1.084, "eval_wikibio_num_pred_words": 36.598, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 55.814553181161685, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3289108956112218, "eval_wikibio_runtime": 15.262, "eval_wikibio_samples_per_second": 32.761, "eval_wikibio_steps_per_second": 0.066, "eval_wikibio_token_set_f1": 0.2975795067165946, "eval_wikibio_token_set_f1_sem": 0.005461913478978885, "eval_wikibio_token_set_precision": 0.3073213589436206, "eval_wikibio_token_set_recall": 0.3042370580948697, "eval_wikibio_true_num_tokens": 61.1328125, "step": 59375 }, { "epoch": 11.4, "eval_nq_accuracy": 0.506875, "eval_nq_bleu_score": 10.225693623198634, "eval_nq_bleu_score_sem": 0.44159420530919286, "eval_nq_emb_cos_sim": 0.8122824430465698, "eval_nq_emb_cos_sim_sem": 0.00771513142661148, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.340015411376953, "eval_nq_n_ngrams_match_1": 21.684, "eval_nq_n_ngrams_match_2": 7.558, "eval_nq_n_ngrams_match_3": 3.32, "eval_nq_num_pred_words": 48.564, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.381396553114586, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4160662133024913, "eval_nq_runtime": 10.4983, "eval_nq_samples_per_second": 47.627, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4354222019950613, "eval_nq_token_set_f1_sem": 0.004983935344112309, "eval_nq_token_set_precision": 0.3897972927517024, "eval_nq_token_set_recall": 0.5056233853328723, "eval_nq_true_num_tokens": 64.0, "step": 59375 }, { "epoch": 11.4, "learning_rate": 0.001, "loss": 2.6991, "step": 59376 }, { "epoch": 11.4, "learning_rate": 0.001, "loss": 2.6989, "step": 59388 }, { "epoch": 11.41, "learning_rate": 0.001, "loss": 2.7054, "step": 59400 }, { "epoch": 11.41, "learning_rate": 0.001, "loss": 2.7147, "step": 59412 }, { "epoch": 11.41, "learning_rate": 0.001, "loss": 2.7055, "step": 59424 }, { "epoch": 11.41, "learning_rate": 0.001, "loss": 2.7133, "step": 59436 }, { "epoch": 11.41, "learning_rate": 0.001, "loss": 2.7147, "step": 59448 }, { "epoch": 11.42, "learning_rate": 0.001, "loss": 2.7069, "step": 59460 }, { "epoch": 11.42, "learning_rate": 0.001, "loss": 2.7052, "step": 59472 }, { "epoch": 11.42, "learning_rate": 0.001, "loss": 2.6999, "step": 59484 }, { "epoch": 11.42, "learning_rate": 0.001, "loss": 2.7046, "step": 59496 }, { "epoch": 11.43, "learning_rate": 0.001, "loss": 2.6984, "step": 59508 }, { "epoch": 11.43, "learning_rate": 0.001, "loss": 2.7066, "step": 59520 }, { "epoch": 11.43, "learning_rate": 0.001, "loss": 2.6957, "step": 59532 }, { "epoch": 11.43, "learning_rate": 0.001, "loss": 2.7047, "step": 59544 }, { "epoch": 11.44, "learning_rate": 0.001, "loss": 2.6984, "step": 59556 }, { "epoch": 11.44, "learning_rate": 0.001, "loss": 2.6994, "step": 59568 }, { "epoch": 11.44, "learning_rate": 0.001, "loss": 2.7041, "step": 59580 }, { "epoch": 11.44, "learning_rate": 0.001, "loss": 2.7035, "step": 59592 }, { "epoch": 11.44, "learning_rate": 0.001, "loss": 2.7135, "step": 59604 }, { "epoch": 11.45, "learning_rate": 0.001, "loss": 2.7163, "step": 59616 }, { "epoch": 11.45, "learning_rate": 0.001, "loss": 2.7157, "step": 59628 }, { "epoch": 11.45, "learning_rate": 0.001, "loss": 2.7114, "step": 59640 }, { "epoch": 11.45, "learning_rate": 0.001, "loss": 2.704, "step": 59652 }, { "epoch": 11.46, "learning_rate": 0.001, "loss": 2.7019, "step": 59664 }, { "epoch": 11.46, "learning_rate": 0.001, "loss": 2.7115, "step": 59676 }, { "epoch": 11.46, "learning_rate": 0.001, "loss": 2.7079, "step": 59688 }, { "epoch": 11.46, "learning_rate": 0.001, "loss": 2.6948, "step": 59700 }, { "epoch": 11.47, "learning_rate": 0.001, "loss": 2.6998, "step": 59712 }, { "epoch": 11.47, "learning_rate": 0.001, "loss": 2.6991, "step": 59724 }, { "epoch": 11.47, "learning_rate": 0.001, "loss": 2.7023, "step": 59736 }, { "epoch": 11.47, "learning_rate": 0.001, "loss": 2.7074, "step": 59748 }, { "epoch": 11.47, "learning_rate": 0.001, "loss": 2.707, "step": 59760 }, { "epoch": 11.48, "learning_rate": 0.001, "loss": 2.7075, "step": 59772 }, { "epoch": 11.48, "learning_rate": 0.001, "loss": 2.7085, "step": 59784 }, { "epoch": 11.48, "learning_rate": 0.001, "loss": 2.7083, "step": 59796 }, { "epoch": 11.48, "learning_rate": 0.001, "loss": 2.7009, "step": 59808 }, { "epoch": 11.49, "learning_rate": 0.001, "loss": 2.7135, "step": 59820 }, { "epoch": 11.49, "learning_rate": 0.001, "loss": 2.7014, "step": 59832 }, { "epoch": 11.49, "learning_rate": 0.001, "loss": 2.7063, "step": 59844 }, { "epoch": 11.49, "learning_rate": 0.001, "loss": 2.701, "step": 59856 }, { "epoch": 11.5, "learning_rate": 0.001, "loss": 2.7035, "step": 59868 }, { "epoch": 11.5, "learning_rate": 0.001, "loss": 2.6916, "step": 59880 }, { "epoch": 11.5, "learning_rate": 0.001, "loss": 2.7103, "step": 59892 }, { "epoch": 11.5, "learning_rate": 0.001, "loss": 2.7037, "step": 59904 }, { "epoch": 11.5, "learning_rate": 0.001, "loss": 2.705, "step": 59916 }, { "epoch": 11.51, "learning_rate": 0.001, "loss": 2.6916, "step": 59928 }, { "epoch": 11.51, "learning_rate": 0.001, "loss": 2.7114, "step": 59940 }, { "epoch": 11.51, "learning_rate": 0.001, "loss": 2.7017, "step": 59952 }, { "epoch": 11.51, "learning_rate": 0.001, "loss": 2.6981, "step": 59964 }, { "epoch": 11.52, "learning_rate": 0.001, "loss": 2.7116, "step": 59976 }, { "epoch": 11.52, "learning_rate": 0.001, "loss": 2.7032, "step": 59988 }, { "epoch": 11.52, "learning_rate": 0.001, "loss": 2.7045, "step": 60000 }, { "epoch": 11.52, "eval_ag_news_accuracy": 0.30728125, "eval_ag_news_bleu_score": 4.530503729114923, "eval_ag_news_bleu_score_sem": 0.14781067904570572, "eval_ag_news_emb_cos_sim": 0.7807636260986328, "eval_ag_news_emb_cos_sim_sem": 0.008243288731825177, "eval_ag_news_emb_top1_equal": 0.171875, "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7231216430664062, "eval_ag_news_n_ngrams_match_1": 13.174, "eval_ag_news_n_ngrams_match_2": 2.712, "eval_ag_news_n_ngrams_match_3": 0.76, "eval_ag_news_num_pred_words": 46.072, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 41.393408081389815, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.32593310766267536, "eval_ag_news_runtime": 9.8832, "eval_ag_news_samples_per_second": 50.591, "eval_ag_news_steps_per_second": 0.101, "eval_ag_news_token_set_f1": 0.3318015587234676, "eval_ag_news_token_set_f1_sem": 0.004388282865926565, "eval_ag_news_token_set_precision": 0.31256188361838566, "eval_ag_news_token_set_recall": 0.36788600917976305, "eval_ag_news_true_num_tokens": 56.09375, "step": 60000 }, { "epoch": 11.52, "eval_anthropic_toxic_prompts_accuracy": 0.10790625, "eval_anthropic_toxic_prompts_bleu_score": 3.011933463772888, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1307588179861984, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6478198170661926, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009703891718237257, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.398049831390381, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.824, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.762, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658, "eval_anthropic_toxic_prompts_num_pred_words": 47.456, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.90572194214503, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20052986491482772, "eval_anthropic_toxic_prompts_runtime": 9.2695, "eval_anthropic_toxic_prompts_samples_per_second": 53.94, "eval_anthropic_toxic_prompts_steps_per_second": 0.108, "eval_anthropic_toxic_prompts_token_set_f1": 0.34510985636689057, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006500321372036515, "eval_anthropic_toxic_prompts_token_set_precision": 0.41112777818840385, "eval_anthropic_toxic_prompts_token_set_recall": 0.32562636260486505, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 60000 }, { "epoch": 11.52, "eval_arxiv_accuracy": 0.33259375, "eval_arxiv_bleu_score": 3.9454065017135553, "eval_arxiv_bleu_score_sem": 0.10428243358706764, "eval_arxiv_emb_cos_sim": 0.7328290939331055, "eval_arxiv_emb_cos_sim_sem": 0.008083398735098528, "eval_arxiv_emb_top1_equal": 0.203125, "eval_arxiv_emb_top1_equal_sem": 0.03570055125142555, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.584223747253418, "eval_arxiv_n_ngrams_match_1": 13.966, "eval_arxiv_n_ngrams_match_2": 2.624, "eval_arxiv_n_ngrams_match_3": 0.568, "eval_arxiv_num_pred_words": 40.098, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 36.025382060402094, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33173521287868857, "eval_arxiv_runtime": 9.8245, "eval_arxiv_samples_per_second": 50.893, "eval_arxiv_steps_per_second": 0.102, "eval_arxiv_token_set_f1": 0.32662150356028635, "eval_arxiv_token_set_f1_sem": 0.004007506383550244, "eval_arxiv_token_set_precision": 0.2751492498071853, "eval_arxiv_token_set_recall": 0.41868338660273136, "eval_arxiv_true_num_tokens": 64.0, "step": 60000 }, { "epoch": 11.52, "eval_python_code_alpaca_accuracy": 0.1535625, "eval_python_code_alpaca_bleu_score": 4.149384697311357, "eval_python_code_alpaca_bleu_score_sem": 0.1355033560597435, "eval_python_code_alpaca_emb_cos_sim": 0.7412024736404419, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009297430145376227, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.050917148590088, "eval_python_code_alpaca_n_ngrams_match_1": 9.282, "eval_python_code_alpaca_n_ngrams_match_2": 2.57, "eval_python_code_alpaca_n_ngrams_match_3": 0.802, "eval_python_code_alpaca_num_pred_words": 43.734, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.134719214330342, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3068012458484429, "eval_python_code_alpaca_runtime": 9.572, "eval_python_code_alpaca_samples_per_second": 52.236, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.4570120815552914, "eval_python_code_alpaca_token_set_f1_sem": 0.005492166368443662, "eval_python_code_alpaca_token_set_precision": 0.5080212430649951, "eval_python_code_alpaca_token_set_recall": 0.4389553213692472, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 60000 }, { "epoch": 11.52, "eval_wikibio_accuracy": 0.310625, "eval_wikibio_bleu_score": 5.562129065968864, "eval_wikibio_bleu_score_sem": 0.20208377637207953, "eval_wikibio_emb_cos_sim": 0.7162641882896423, "eval_wikibio_emb_cos_sim_sem": 0.011005607671208346, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9331448078155518, "eval_wikibio_n_ngrams_match_1": 9.568, "eval_wikibio_n_ngrams_match_2": 3.188, "eval_wikibio_n_ngrams_match_3": 1.16, "eval_wikibio_num_pred_words": 35.648, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 51.067322324856555, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33113779740325056, "eval_wikibio_runtime": 9.6015, "eval_wikibio_samples_per_second": 52.075, "eval_wikibio_steps_per_second": 0.104, "eval_wikibio_token_set_f1": 0.3064295331814496, "eval_wikibio_token_set_f1_sem": 0.005774884058421604, "eval_wikibio_token_set_precision": 0.31151168254344447, "eval_wikibio_token_set_recall": 0.3203673551916625, "eval_wikibio_true_num_tokens": 61.1328125, "step": 60000 }, { "epoch": 11.52, "eval_nq_accuracy": 0.5083125, "eval_nq_bleu_score": 10.859162359100454, "eval_nq_bleu_score_sem": 0.4618880507067924, "eval_nq_emb_cos_sim": 0.8117635250091553, "eval_nq_emb_cos_sim_sem": 0.007694013879873998, "eval_nq_emb_top1_equal": 0.2109375, "eval_nq_emb_top1_equal_sem": 0.03620184850179216, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.335266590118408, "eval_nq_n_ngrams_match_1": 22.096, "eval_nq_n_ngrams_match_2": 7.9, "eval_nq_n_ngrams_match_3": 3.592, "eval_nq_num_pred_words": 49.06, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.332214028406678, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42602497138570844, "eval_nq_runtime": 14.6595, "eval_nq_samples_per_second": 34.108, "eval_nq_steps_per_second": 0.068, "eval_nq_token_set_f1": 0.4409636716216321, "eval_nq_token_set_f1_sem": 0.004927524126723718, "eval_nq_token_set_precision": 0.3954249872690946, "eval_nq_token_set_recall": 0.5090542184669634, "eval_nq_true_num_tokens": 64.0, "step": 60000 }, { "epoch": 11.52, "learning_rate": 0.001, "loss": 2.7084, "step": 60012 }, { "epoch": 11.53, "learning_rate": 0.001, "loss": 2.7043, "step": 60024 }, { "epoch": 11.53, "learning_rate": 0.001, "loss": 2.6994, "step": 60036 }, { "epoch": 11.53, "learning_rate": 0.001, "loss": 2.7005, "step": 60048 }, { "epoch": 11.53, "learning_rate": 0.001, "loss": 2.7175, "step": 60060 }, { "epoch": 11.53, "learning_rate": 0.001, "loss": 2.7024, "step": 60072 }, { "epoch": 11.54, "learning_rate": 0.001, "loss": 2.6967, "step": 60084 }, { "epoch": 11.54, "learning_rate": 0.001, "loss": 2.7021, "step": 60096 }, { "epoch": 11.54, "learning_rate": 0.001, "loss": 2.698, "step": 60108 }, { "epoch": 11.54, "learning_rate": 0.001, "loss": 2.7012, "step": 60120 }, { "epoch": 11.55, "learning_rate": 0.001, "loss": 2.7036, "step": 60132 }, { "epoch": 11.55, "learning_rate": 0.001, "loss": 2.7048, "step": 60144 }, { "epoch": 11.55, "learning_rate": 0.001, "loss": 2.6945, "step": 60156 }, { "epoch": 11.55, "learning_rate": 0.001, "loss": 2.6945, "step": 60168 }, { "epoch": 11.56, "learning_rate": 0.001, "loss": 2.6921, "step": 60180 }, { "epoch": 11.56, "learning_rate": 0.001, "loss": 2.7001, "step": 60192 }, { "epoch": 11.56, "learning_rate": 0.001, "loss": 2.7035, "step": 60204 }, { "epoch": 11.56, "learning_rate": 0.001, "loss": 2.7077, "step": 60216 }, { "epoch": 11.56, "learning_rate": 0.001, "loss": 2.7006, "step": 60228 }, { "epoch": 11.57, "learning_rate": 0.001, "loss": 2.7012, "step": 60240 }, { "epoch": 11.57, "learning_rate": 0.001, "loss": 2.6988, "step": 60252 }, { "epoch": 11.57, "learning_rate": 0.001, "loss": 2.6986, "step": 60264 }, { "epoch": 11.57, "learning_rate": 0.001, "loss": 2.7102, "step": 60276 }, { "epoch": 11.58, "learning_rate": 0.001, "loss": 2.6988, "step": 60288 }, { "epoch": 11.58, "learning_rate": 0.001, "loss": 2.7014, "step": 60300 }, { "epoch": 11.58, "learning_rate": 0.001, "loss": 2.7105, "step": 60312 }, { "epoch": 11.58, "learning_rate": 0.001, "loss": 2.7154, "step": 60324 }, { "epoch": 11.59, "learning_rate": 0.001, "loss": 2.7112, "step": 60336 }, { "epoch": 11.59, "learning_rate": 0.001, "loss": 2.7088, "step": 60348 }, { "epoch": 11.59, "learning_rate": 0.001, "loss": 2.716, "step": 60360 }, { "epoch": 11.59, "learning_rate": 0.001, "loss": 2.703, "step": 60372 }, { "epoch": 11.59, "learning_rate": 0.001, "loss": 2.6966, "step": 60384 }, { "epoch": 11.6, "learning_rate": 0.001, "loss": 2.705, "step": 60396 }, { "epoch": 11.6, "learning_rate": 0.001, "loss": 2.7108, "step": 60408 }, { "epoch": 11.6, "learning_rate": 0.001, "loss": 2.7055, "step": 60420 }, { "epoch": 11.6, "learning_rate": 0.001, "loss": 2.702, "step": 60432 }, { "epoch": 11.61, "learning_rate": 0.001, "loss": 2.6863, "step": 60444 }, { "epoch": 11.61, "learning_rate": 0.001, "loss": 2.6877, "step": 60456 }, { "epoch": 11.61, "learning_rate": 0.001, "loss": 2.7015, "step": 60468 }, { "epoch": 11.61, "learning_rate": 0.001, "loss": 2.7064, "step": 60480 }, { "epoch": 11.62, "learning_rate": 0.001, "loss": 2.7014, "step": 60492 }, { "epoch": 11.62, "learning_rate": 0.001, "loss": 2.6982, "step": 60504 }, { "epoch": 11.62, "learning_rate": 0.001, "loss": 2.713, "step": 60516 }, { "epoch": 11.62, "learning_rate": 0.001, "loss": 2.7092, "step": 60528 }, { "epoch": 11.62, "learning_rate": 0.001, "loss": 2.6951, "step": 60540 }, { "epoch": 11.63, "learning_rate": 0.001, "loss": 2.7033, "step": 60552 }, { "epoch": 11.63, "learning_rate": 0.001, "loss": 2.6947, "step": 60564 }, { "epoch": 11.63, "learning_rate": 0.001, "loss": 2.7093, "step": 60576 }, { "epoch": 11.63, "learning_rate": 0.001, "loss": 2.7026, "step": 60588 }, { "epoch": 11.64, "learning_rate": 0.001, "loss": 2.6972, "step": 60600 }, { "epoch": 11.64, "learning_rate": 0.001, "loss": 2.7068, "step": 60612 }, { "epoch": 11.64, "learning_rate": 0.001, "loss": 2.6871, "step": 60624 }, { "epoch": 11.64, "eval_ag_news_accuracy": 0.30771875, "eval_ag_news_bleu_score": 4.562737182773448, "eval_ag_news_bleu_score_sem": 0.15363674128184648, "eval_ag_news_emb_cos_sim": 0.7819575071334839, "eval_ag_news_emb_cos_sim_sem": 0.007952146204846017, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.730755090713501, "eval_ag_news_n_ngrams_match_1": 13.17, "eval_ag_news_n_ngrams_match_2": 2.812, "eval_ag_news_n_ngrams_match_3": 0.764, "eval_ag_news_num_pred_words": 46.214, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 41.7105915564637, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3255476877533653, "eval_ag_news_runtime": 10.181, "eval_ag_news_samples_per_second": 49.111, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3323323333511247, "eval_ag_news_token_set_f1_sem": 0.004345378378307004, "eval_ag_news_token_set_precision": 0.3133151639567446, "eval_ag_news_token_set_recall": 0.3708701655661347, "eval_ag_news_true_num_tokens": 56.09375, "step": 60625 }, { "epoch": 11.64, "eval_anthropic_toxic_prompts_accuracy": 0.10853125, "eval_anthropic_toxic_prompts_bleu_score": 2.7090491693140275, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.09938297862119239, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6559830904006958, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0097251297786197, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.36966872215271, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.698, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.62, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.574, "eval_anthropic_toxic_prompts_num_pred_words": 47.876, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.06889558138647, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19495095790542527, "eval_anthropic_toxic_prompts_runtime": 11.653, "eval_anthropic_toxic_prompts_samples_per_second": 42.907, "eval_anthropic_toxic_prompts_steps_per_second": 0.086, "eval_anthropic_toxic_prompts_token_set_f1": 0.3354171332896632, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006217683903982814, "eval_anthropic_toxic_prompts_token_set_precision": 0.4025546215448638, "eval_anthropic_toxic_prompts_token_set_recall": 0.3150708848694828, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 60625 }, { "epoch": 11.64, "eval_arxiv_accuracy": 0.3343125, "eval_arxiv_bleu_score": 3.975372086597374, "eval_arxiv_bleu_score_sem": 0.11754027122992618, "eval_arxiv_emb_cos_sim": 0.7152624726295471, "eval_arxiv_emb_cos_sim_sem": 0.009203334671897625, "eval_arxiv_emb_top1_equal": 0.1875, "eval_arxiv_emb_top1_equal_sem": 0.034634623208270626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5604097843170166, "eval_arxiv_n_ngrams_match_1": 13.812, "eval_arxiv_n_ngrams_match_2": 2.642, "eval_arxiv_n_ngrams_match_3": 0.58, "eval_arxiv_num_pred_words": 39.996, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 35.177609424594216, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3280746894186754, "eval_arxiv_runtime": 10.9874, "eval_arxiv_samples_per_second": 45.507, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.3278418572741925, "eval_arxiv_token_set_f1_sem": 0.004322233929830818, "eval_arxiv_token_set_precision": 0.2739703220111268, "eval_arxiv_token_set_recall": 0.4314235969696949, "eval_arxiv_true_num_tokens": 64.0, "step": 60625 }, { "epoch": 11.64, "eval_python_code_alpaca_accuracy": 0.1534375, "eval_python_code_alpaca_bleu_score": 4.274866366482068, "eval_python_code_alpaca_bleu_score_sem": 0.14024241858965333, "eval_python_code_alpaca_emb_cos_sim": 0.7309517860412598, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008365820815702982, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.050659418106079, "eval_python_code_alpaca_n_ngrams_match_1": 9.262, "eval_python_code_alpaca_n_ngrams_match_2": 2.628, "eval_python_code_alpaca_n_ngrams_match_3": 0.842, "eval_python_code_alpaca_num_pred_words": 44.104, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.12927285479443, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3082043485761443, "eval_python_code_alpaca_runtime": 10.9758, "eval_python_code_alpaca_samples_per_second": 45.555, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.46014841933317135, "eval_python_code_alpaca_token_set_f1_sem": 0.005359291934462503, "eval_python_code_alpaca_token_set_precision": 0.5040148341849664, "eval_python_code_alpaca_token_set_recall": 0.44620450056910027, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 60625 }, { "epoch": 11.64, "eval_wikibio_accuracy": 0.30828125, "eval_wikibio_bleu_score": 5.299619657800155, "eval_wikibio_bleu_score_sem": 0.1901970456602861, "eval_wikibio_emb_cos_sim": 0.7095686197280884, "eval_wikibio_emb_cos_sim_sem": 0.011874961495819446, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.980469226837158, "eval_wikibio_n_ngrams_match_1": 9.584, "eval_wikibio_n_ngrams_match_2": 3.102, "eval_wikibio_n_ngrams_match_3": 1.068, "eval_wikibio_num_pred_words": 36.272, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 53.54215174864247, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3315543218178374, "eval_wikibio_runtime": 9.6104, "eval_wikibio_samples_per_second": 52.027, "eval_wikibio_steps_per_second": 0.104, "eval_wikibio_token_set_f1": 0.3016552652215996, "eval_wikibio_token_set_f1_sem": 0.005721542587219132, "eval_wikibio_token_set_precision": 0.3112307697493472, "eval_wikibio_token_set_recall": 0.3090902051571282, "eval_wikibio_true_num_tokens": 61.1328125, "step": 60625 }, { "epoch": 11.64, "eval_nq_accuracy": 0.50734375, "eval_nq_bleu_score": 10.698290321386462, "eval_nq_bleu_score_sem": 0.45302670300933406, "eval_nq_emb_cos_sim": 0.8153901696205139, "eval_nq_emb_cos_sim_sem": 0.007834353665792163, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3290982246398926, "eval_nq_n_ngrams_match_1": 22.254, "eval_nq_n_ngrams_match_2": 7.862, "eval_nq_n_ngrams_match_3": 3.494, "eval_nq_num_pred_words": 49.402, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.268677316365293, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4264352290778385, "eval_nq_runtime": 18.7127, "eval_nq_samples_per_second": 26.72, "eval_nq_steps_per_second": 0.053, "eval_nq_token_set_f1": 0.4434893634480182, "eval_nq_token_set_f1_sem": 0.0050043689965380625, "eval_nq_token_set_precision": 0.39965925507967776, "eval_nq_token_set_recall": 0.5075817085698917, "eval_nq_true_num_tokens": 64.0, "step": 60625 }, { "epoch": 11.64, "learning_rate": 0.001, "loss": 2.6982, "step": 60636 }, { "epoch": 11.65, "learning_rate": 0.001, "loss": 2.6959, "step": 60648 }, { "epoch": 11.65, "learning_rate": 0.001, "loss": 2.7022, "step": 60660 }, { "epoch": 11.65, "learning_rate": 0.001, "loss": 2.7051, "step": 60672 }, { "epoch": 11.65, "learning_rate": 0.001, "loss": 2.6916, "step": 60684 }, { "epoch": 11.65, "learning_rate": 0.001, "loss": 2.7054, "step": 60696 }, { "epoch": 11.66, "learning_rate": 0.001, "loss": 2.6958, "step": 60708 }, { "epoch": 11.66, "learning_rate": 0.001, "loss": 2.7033, "step": 60720 }, { "epoch": 11.66, "learning_rate": 0.001, "loss": 2.7103, "step": 60732 }, { "epoch": 11.66, "learning_rate": 0.001, "loss": 2.703, "step": 60744 }, { "epoch": 11.67, "learning_rate": 0.001, "loss": 2.6931, "step": 60756 }, { "epoch": 11.67, "learning_rate": 0.001, "loss": 2.7027, "step": 60768 }, { "epoch": 11.67, "learning_rate": 0.001, "loss": 2.7043, "step": 60780 }, { "epoch": 11.67, "learning_rate": 0.001, "loss": 2.709, "step": 60792 }, { "epoch": 11.68, "learning_rate": 0.001, "loss": 2.7027, "step": 60804 }, { "epoch": 11.68, "learning_rate": 0.001, "loss": 2.703, "step": 60816 }, { "epoch": 11.68, "learning_rate": 0.001, "loss": 2.7132, "step": 60828 }, { "epoch": 11.68, "learning_rate": 0.001, "loss": 2.6955, "step": 60840 }, { "epoch": 11.68, "learning_rate": 0.001, "loss": 2.7021, "step": 60852 }, { "epoch": 11.69, "learning_rate": 0.001, "loss": 2.6981, "step": 60864 }, { "epoch": 11.69, "learning_rate": 0.001, "loss": 2.704, "step": 60876 }, { "epoch": 11.69, "learning_rate": 0.001, "loss": 2.7075, "step": 60888 }, { "epoch": 11.69, "learning_rate": 0.001, "loss": 2.6954, "step": 60900 }, { "epoch": 11.7, "learning_rate": 0.001, "loss": 2.6977, "step": 60912 }, { "epoch": 11.7, "learning_rate": 0.001, "loss": 2.7029, "step": 60924 }, { "epoch": 11.7, "learning_rate": 0.001, "loss": 2.6989, "step": 60936 }, { "epoch": 11.7, "learning_rate": 0.001, "loss": 2.7017, "step": 60948 }, { "epoch": 11.71, "learning_rate": 0.001, "loss": 2.6938, "step": 60960 }, { "epoch": 11.71, "learning_rate": 0.001, "loss": 2.7032, "step": 60972 }, { "epoch": 11.71, "learning_rate": 0.001, "loss": 2.7023, "step": 60984 }, { "epoch": 11.71, "learning_rate": 0.001, "loss": 2.6931, "step": 60996 }, { "epoch": 11.71, "learning_rate": 0.001, "loss": 2.698, "step": 61008 }, { "epoch": 11.72, "learning_rate": 0.001, "loss": 2.7055, "step": 61020 }, { "epoch": 11.72, "learning_rate": 0.001, "loss": 2.694, "step": 61032 }, { "epoch": 11.72, "learning_rate": 0.001, "loss": 2.6919, "step": 61044 }, { "epoch": 11.72, "learning_rate": 0.001, "loss": 2.6991, "step": 61056 }, { "epoch": 11.73, "learning_rate": 0.001, "loss": 2.6986, "step": 61068 }, { "epoch": 11.73, "learning_rate": 0.001, "loss": 2.7055, "step": 61080 }, { "epoch": 11.73, "learning_rate": 0.001, "loss": 2.6917, "step": 61092 }, { "epoch": 11.73, "learning_rate": 0.001, "loss": 2.7058, "step": 61104 }, { "epoch": 11.74, "learning_rate": 0.001, "loss": 2.7035, "step": 61116 }, { "epoch": 11.74, "learning_rate": 0.001, "loss": 2.6925, "step": 61128 }, { "epoch": 11.74, "learning_rate": 0.001, "loss": 2.6927, "step": 61140 }, { "epoch": 11.74, "learning_rate": 0.001, "loss": 2.7005, "step": 61152 }, { "epoch": 11.74, "learning_rate": 0.001, "loss": 2.7083, "step": 61164 }, { "epoch": 11.75, "learning_rate": 0.001, "loss": 2.7073, "step": 61176 }, { "epoch": 11.75, "learning_rate": 0.001, "loss": 2.6965, "step": 61188 }, { "epoch": 11.75, "learning_rate": 0.001, "loss": 2.7074, "step": 61200 }, { "epoch": 11.75, "learning_rate": 0.001, "loss": 2.7072, "step": 61212 }, { "epoch": 11.76, "learning_rate": 0.001, "loss": 2.6894, "step": 61224 }, { "epoch": 11.76, "learning_rate": 0.001, "loss": 2.6974, "step": 61236 }, { "epoch": 11.76, "learning_rate": 0.001, "loss": 2.6879, "step": 61248 }, { "epoch": 11.76, "eval_ag_news_accuracy": 0.30709375, "eval_ag_news_bleu_score": 4.689805000042943, "eval_ag_news_bleu_score_sem": 0.1576112924153491, "eval_ag_news_emb_cos_sim": 0.7899467349052429, "eval_ag_news_emb_cos_sim_sem": 0.007180146990483527, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.720097303390503, "eval_ag_news_n_ngrams_match_1": 13.334, "eval_ag_news_n_ngrams_match_2": 2.868, "eval_ag_news_n_ngrams_match_3": 0.86, "eval_ag_news_num_pred_words": 46.058, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 41.26840946941555, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.32650227925978337, "eval_ag_news_runtime": 13.6229, "eval_ag_news_samples_per_second": 36.703, "eval_ag_news_steps_per_second": 0.073, "eval_ag_news_token_set_f1": 0.33495844107529343, "eval_ag_news_token_set_f1_sem": 0.004348004716266094, "eval_ag_news_token_set_precision": 0.315982404487058, "eval_ag_news_token_set_recall": 0.3709444094386155, "eval_ag_news_true_num_tokens": 56.09375, "step": 61250 }, { "epoch": 11.76, "eval_anthropic_toxic_prompts_accuracy": 0.10734375, "eval_anthropic_toxic_prompts_bleu_score": 2.9840282458278726, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11763155063083452, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6570160388946533, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00968382059872296, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4051480293273926, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.916, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.656, "eval_anthropic_toxic_prompts_num_pred_words": 47.062, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 30.118753852817456, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20254738923714144, "eval_anthropic_toxic_prompts_runtime": 14.6549, "eval_anthropic_toxic_prompts_samples_per_second": 34.118, "eval_anthropic_toxic_prompts_steps_per_second": 0.068, "eval_anthropic_toxic_prompts_token_set_f1": 0.33981432530372735, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006538309906611984, "eval_anthropic_toxic_prompts_token_set_precision": 0.4115871533611798, "eval_anthropic_toxic_prompts_token_set_recall": 0.3179448991673162, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 61250 }, { "epoch": 11.76, "eval_arxiv_accuracy": 0.33190625, "eval_arxiv_bleu_score": 3.9654714994858113, "eval_arxiv_bleu_score_sem": 0.11570126845297382, "eval_arxiv_emb_cos_sim": 0.7267646193504333, "eval_arxiv_emb_cos_sim_sem": 0.008554846138551802, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5788960456848145, "eval_arxiv_n_ngrams_match_1": 14.028, "eval_arxiv_n_ngrams_match_2": 2.65, "eval_arxiv_n_ngrams_match_3": 0.562, "eval_arxiv_num_pred_words": 39.778, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 35.8339599486153, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33284767659463765, "eval_arxiv_runtime": 9.7037, "eval_arxiv_samples_per_second": 51.527, "eval_arxiv_steps_per_second": 0.103, "eval_arxiv_token_set_f1": 0.3288644919866805, "eval_arxiv_token_set_f1_sem": 0.004227176318217579, "eval_arxiv_token_set_precision": 0.2775316020689215, "eval_arxiv_token_set_recall": 0.4197042043157584, "eval_arxiv_true_num_tokens": 64.0, "step": 61250 }, { "epoch": 11.76, "eval_python_code_alpaca_accuracy": 0.15603125, "eval_python_code_alpaca_bleu_score": 4.129517836489619, "eval_python_code_alpaca_bleu_score_sem": 0.1286746757690754, "eval_python_code_alpaca_emb_cos_sim": 0.7468237280845642, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007903011540560919, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.044473171234131, "eval_python_code_alpaca_n_ngrams_match_1": 9.546, "eval_python_code_alpaca_n_ngrams_match_2": 2.644, "eval_python_code_alpaca_n_ngrams_match_3": 0.78, "eval_python_code_alpaca_num_pred_words": 44.426, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.99896542920991, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3143834940007143, "eval_python_code_alpaca_runtime": 12.1683, "eval_python_code_alpaca_samples_per_second": 41.09, "eval_python_code_alpaca_steps_per_second": 0.082, "eval_python_code_alpaca_token_set_f1": 0.4651287559114941, "eval_python_code_alpaca_token_set_f1_sem": 0.005103564231543329, "eval_python_code_alpaca_token_set_precision": 0.5216447939587832, "eval_python_code_alpaca_token_set_recall": 0.44219869577637055, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 61250 }, { "epoch": 11.76, "eval_wikibio_accuracy": 0.30684375, "eval_wikibio_bleu_score": 5.5379691885995, "eval_wikibio_bleu_score_sem": 0.18660298241574955, "eval_wikibio_emb_cos_sim": 0.7317771315574646, "eval_wikibio_emb_cos_sim_sem": 0.008684989021308362, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9599239826202393, "eval_wikibio_n_ngrams_match_1": 9.894, "eval_wikibio_n_ngrams_match_2": 3.242, "eval_wikibio_n_ngrams_match_3": 1.138, "eval_wikibio_num_pred_words": 37.094, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 52.453338432193355, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3388163818003451, "eval_wikibio_runtime": 10.1905, "eval_wikibio_samples_per_second": 49.065, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.3122920384071842, "eval_wikibio_token_set_f1_sem": 0.0052821681371611915, "eval_wikibio_token_set_precision": 0.3227349334154516, "eval_wikibio_token_set_recall": 0.3209974433271776, "eval_wikibio_true_num_tokens": 61.1328125, "step": 61250 }, { "epoch": 11.76, "eval_nq_accuracy": 0.5088125, "eval_nq_bleu_score": 10.784771667148, "eval_nq_bleu_score_sem": 0.4582363601111698, "eval_nq_emb_cos_sim": 0.8136206865310669, "eval_nq_emb_cos_sim_sem": 0.007102759449716953, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.322532892227173, "eval_nq_n_ngrams_match_1": 22.012, "eval_nq_n_ngrams_match_2": 7.862, "eval_nq_n_ngrams_match_3": 3.542, "eval_nq_num_pred_words": 48.514, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.201480861244011, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4232035793316797, "eval_nq_runtime": 15.5388, "eval_nq_samples_per_second": 32.177, "eval_nq_steps_per_second": 0.064, "eval_nq_token_set_f1": 0.4425579124455575, "eval_nq_token_set_f1_sem": 0.005151415693224257, "eval_nq_token_set_precision": 0.3955402571592704, "eval_nq_token_set_recall": 0.5140255837623238, "eval_nq_true_num_tokens": 64.0, "step": 61250 }, { "epoch": 11.76, "learning_rate": 0.001, "loss": 2.6992, "step": 61260 }, { "epoch": 11.76, "learning_rate": 0.001, "loss": 2.7053, "step": 61272 }, { "epoch": 11.77, "learning_rate": 0.001, "loss": 2.6998, "step": 61284 }, { "epoch": 11.77, "learning_rate": 0.001, "loss": 2.6949, "step": 61296 }, { "epoch": 11.77, "learning_rate": 0.001, "loss": 2.6952, "step": 61308 }, { "epoch": 11.77, "learning_rate": 0.001, "loss": 2.7109, "step": 61320 }, { "epoch": 11.78, "learning_rate": 0.001, "loss": 2.71, "step": 61332 }, { "epoch": 11.78, "learning_rate": 0.001, "loss": 2.7152, "step": 61344 }, { "epoch": 11.78, "learning_rate": 0.001, "loss": 2.6965, "step": 61356 }, { "epoch": 11.78, "learning_rate": 0.001, "loss": 2.7132, "step": 61368 }, { "epoch": 11.79, "learning_rate": 0.001, "loss": 2.6985, "step": 61380 }, { "epoch": 11.79, "learning_rate": 0.001, "loss": 2.7132, "step": 61392 }, { "epoch": 11.79, "learning_rate": 0.001, "loss": 2.6961, "step": 61404 }, { "epoch": 11.79, "learning_rate": 0.001, "loss": 2.696, "step": 61416 }, { "epoch": 11.79, "learning_rate": 0.001, "loss": 2.6998, "step": 61428 }, { "epoch": 11.8, "learning_rate": 0.001, "loss": 2.6926, "step": 61440 }, { "epoch": 11.8, "learning_rate": 0.001, "loss": 2.7006, "step": 61452 }, { "epoch": 11.8, "learning_rate": 0.001, "loss": 2.6966, "step": 61464 }, { "epoch": 11.8, "learning_rate": 0.001, "loss": 2.7035, "step": 61476 }, { "epoch": 11.81, "learning_rate": 0.001, "loss": 2.6908, "step": 61488 }, { "epoch": 11.81, "learning_rate": 0.001, "loss": 2.7116, "step": 61500 }, { "epoch": 11.81, "learning_rate": 0.001, "loss": 2.6934, "step": 61512 }, { "epoch": 11.81, "learning_rate": 0.001, "loss": 2.7128, "step": 61524 }, { "epoch": 11.82, "learning_rate": 0.001, "loss": 2.7034, "step": 61536 }, { "epoch": 11.82, "learning_rate": 0.001, "loss": 2.7041, "step": 61548 }, { "epoch": 11.82, "learning_rate": 0.001, "loss": 2.7115, "step": 61560 }, { "epoch": 11.82, "learning_rate": 0.001, "loss": 2.6964, "step": 61572 }, { "epoch": 11.82, "learning_rate": 0.001, "loss": 2.7105, "step": 61584 }, { "epoch": 11.83, "learning_rate": 0.001, "loss": 2.7027, "step": 61596 }, { "epoch": 11.83, "learning_rate": 0.001, "loss": 2.7099, "step": 61608 }, { "epoch": 11.83, "learning_rate": 0.001, "loss": 2.7063, "step": 61620 }, { "epoch": 11.83, "learning_rate": 0.001, "loss": 2.7002, "step": 61632 }, { "epoch": 11.84, "learning_rate": 0.001, "loss": 2.703, "step": 61644 }, { "epoch": 11.84, "learning_rate": 0.001, "loss": 2.7044, "step": 61656 }, { "epoch": 11.84, "learning_rate": 0.001, "loss": 2.7097, "step": 61668 }, { "epoch": 11.84, "learning_rate": 0.001, "loss": 2.7104, "step": 61680 }, { "epoch": 11.85, "learning_rate": 0.001, "loss": 2.7011, "step": 61692 }, { "epoch": 11.85, "learning_rate": 0.001, "loss": 2.7054, "step": 61704 }, { "epoch": 11.85, "learning_rate": 0.001, "loss": 2.7075, "step": 61716 }, { "epoch": 11.85, "learning_rate": 0.001, "loss": 2.7001, "step": 61728 }, { "epoch": 11.85, "learning_rate": 0.001, "loss": 2.716, "step": 61740 }, { "epoch": 11.86, "learning_rate": 0.001, "loss": 2.7019, "step": 61752 }, { "epoch": 11.86, "learning_rate": 0.001, "loss": 2.6996, "step": 61764 }, { "epoch": 11.86, "learning_rate": 0.001, "loss": 2.7065, "step": 61776 }, { "epoch": 11.86, "learning_rate": 0.001, "loss": 2.7055, "step": 61788 }, { "epoch": 11.87, "learning_rate": 0.001, "loss": 2.7105, "step": 61800 }, { "epoch": 11.87, "learning_rate": 0.001, "loss": 2.6977, "step": 61812 }, { "epoch": 11.87, "learning_rate": 0.001, "loss": 2.7003, "step": 61824 }, { "epoch": 11.87, "learning_rate": 0.001, "loss": 2.708, "step": 61836 }, { "epoch": 11.88, "learning_rate": 0.001, "loss": 2.7062, "step": 61848 }, { "epoch": 11.88, "learning_rate": 0.001, "loss": 2.7097, "step": 61860 }, { "epoch": 11.88, "learning_rate": 0.001, "loss": 2.7201, "step": 61872 }, { "epoch": 11.88, "eval_ag_news_accuracy": 0.30621875, "eval_ag_news_bleu_score": 4.41548510755679, "eval_ag_news_bleu_score_sem": 0.13996736875655358, "eval_ag_news_emb_cos_sim": 0.7804596424102783, "eval_ag_news_emb_cos_sim_sem": 0.007677897978674645, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7277870178222656, "eval_ag_news_n_ngrams_match_1": 13.118, "eval_ag_news_n_ngrams_match_2": 2.752, "eval_ag_news_n_ngrams_match_3": 0.738, "eval_ag_news_num_pred_words": 45.822, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 41.58697502257866, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3249208746053033, "eval_ag_news_runtime": 10.4504, "eval_ag_news_samples_per_second": 47.845, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.33131955345454434, "eval_ag_news_token_set_f1_sem": 0.004387695599378025, "eval_ag_news_token_set_precision": 0.30903356028252893, "eval_ag_news_token_set_recall": 0.3726080987666195, "eval_ag_news_true_num_tokens": 56.09375, "step": 61875 }, { "epoch": 11.88, "eval_anthropic_toxic_prompts_accuracy": 0.1073125, "eval_anthropic_toxic_prompts_bleu_score": 2.894107461354934, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11269438968144171, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6332647800445557, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011704065244730883, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.40533709526062, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.76, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.73, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.632, "eval_anthropic_toxic_prompts_num_pred_words": 47.328, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 30.124448821467606, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19550674397571874, "eval_anthropic_toxic_prompts_runtime": 10.4005, "eval_anthropic_toxic_prompts_samples_per_second": 48.075, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.340139873600569, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00640672044392126, "eval_anthropic_toxic_prompts_token_set_precision": 0.4060256077091581, "eval_anthropic_toxic_prompts_token_set_recall": 0.3206711305859175, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 61875 }, { "epoch": 11.88, "eval_arxiv_accuracy": 0.33240625, "eval_arxiv_bleu_score": 3.6591203070377176, "eval_arxiv_bleu_score_sem": 0.09933155752252189, "eval_arxiv_emb_cos_sim": 0.7171277403831482, "eval_arxiv_emb_cos_sim_sem": 0.007953261120698031, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.576629400253296, "eval_arxiv_n_ngrams_match_1": 13.694, "eval_arxiv_n_ngrams_match_2": 2.41, "eval_arxiv_n_ngrams_match_3": 0.468, "eval_arxiv_num_pred_words": 39.536, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 35.75282904923117, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3262480438072417, "eval_arxiv_runtime": 9.6108, "eval_arxiv_samples_per_second": 52.025, "eval_arxiv_steps_per_second": 0.104, "eval_arxiv_token_set_f1": 0.3218910054823966, "eval_arxiv_token_set_f1_sem": 0.004263475240578928, "eval_arxiv_token_set_precision": 0.27046558492367223, "eval_arxiv_token_set_recall": 0.4189519441963203, "eval_arxiv_true_num_tokens": 64.0, "step": 61875 }, { "epoch": 11.88, "eval_python_code_alpaca_accuracy": 0.1515625, "eval_python_code_alpaca_bleu_score": 4.19423223548987, "eval_python_code_alpaca_bleu_score_sem": 0.14827497792935423, "eval_python_code_alpaca_emb_cos_sim": 0.7249279022216797, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01093777363561908, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.065692663192749, "eval_python_code_alpaca_n_ngrams_match_1": 9.096, "eval_python_code_alpaca_n_ngrams_match_2": 2.548, "eval_python_code_alpaca_n_ngrams_match_3": 0.816, "eval_python_code_alpaca_num_pred_words": 43.448, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.449313993181665, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3050871355268122, "eval_python_code_alpaca_runtime": 9.6769, "eval_python_code_alpaca_samples_per_second": 51.67, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.44668352804230854, "eval_python_code_alpaca_token_set_f1_sem": 0.005622410141082624, "eval_python_code_alpaca_token_set_precision": 0.49594354684894876, "eval_python_code_alpaca_token_set_recall": 0.43204885732612086, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 61875 }, { "epoch": 11.88, "eval_wikibio_accuracy": 0.31084375, "eval_wikibio_bleu_score": 5.605452686263078, "eval_wikibio_bleu_score_sem": 0.2040778845346227, "eval_wikibio_emb_cos_sim": 0.7090040445327759, "eval_wikibio_emb_cos_sim_sem": 0.011306478217659429, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9229464530944824, "eval_wikibio_n_ngrams_match_1": 9.508, "eval_wikibio_n_ngrams_match_2": 3.138, "eval_wikibio_n_ngrams_match_3": 1.156, "eval_wikibio_num_pred_words": 35.69, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 50.549166317470224, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3254971435215501, "eval_wikibio_runtime": 9.7933, "eval_wikibio_samples_per_second": 51.055, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.3017743562233243, "eval_wikibio_token_set_f1_sem": 0.005772128999832789, "eval_wikibio_token_set_precision": 0.30794965832482085, "eval_wikibio_token_set_recall": 0.31451164666341436, "eval_wikibio_true_num_tokens": 61.1328125, "step": 61875 }, { "epoch": 11.88, "eval_nq_accuracy": 0.50828125, "eval_nq_bleu_score": 10.529073664813733, "eval_nq_bleu_score_sem": 0.45185408175615854, "eval_nq_emb_cos_sim": 0.8136910796165466, "eval_nq_emb_cos_sim_sem": 0.007126814763504546, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3268489837646484, "eval_nq_n_ngrams_match_1": 21.9, "eval_nq_n_ngrams_match_2": 7.634, "eval_nq_n_ngrams_match_3": 3.426, "eval_nq_num_pred_words": 48.524, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.245606543200061, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4226750559246117, "eval_nq_runtime": 9.7738, "eval_nq_samples_per_second": 51.157, "eval_nq_steps_per_second": 0.102, "eval_nq_token_set_f1": 0.4407978412452672, "eval_nq_token_set_f1_sem": 0.004932712350449909, "eval_nq_token_set_precision": 0.3935349524457392, "eval_nq_token_set_recall": 0.5115663170727578, "eval_nq_true_num_tokens": 64.0, "step": 61875 }, { "epoch": 11.88, "learning_rate": 0.001, "loss": 2.693, "step": 61884 }, { "epoch": 11.88, "learning_rate": 0.001, "loss": 2.7015, "step": 61896 }, { "epoch": 11.89, "learning_rate": 0.001, "loss": 2.7013, "step": 61908 }, { "epoch": 11.89, "learning_rate": 0.001, "loss": 2.7005, "step": 61920 }, { "epoch": 11.89, "learning_rate": 0.001, "loss": 2.7085, "step": 61932 }, { "epoch": 11.89, "learning_rate": 0.001, "loss": 2.7053, "step": 61944 }, { "epoch": 11.9, "learning_rate": 0.001, "loss": 2.7046, "step": 61956 }, { "epoch": 11.9, "learning_rate": 0.001, "loss": 2.6943, "step": 61968 }, { "epoch": 11.9, "learning_rate": 0.001, "loss": 2.6963, "step": 61980 }, { "epoch": 11.9, "learning_rate": 0.001, "loss": 2.6982, "step": 61992 }, { "epoch": 11.91, "learning_rate": 0.001, "loss": 2.7007, "step": 62004 }, { "epoch": 11.91, "learning_rate": 0.001, "loss": 2.7096, "step": 62016 }, { "epoch": 11.91, "learning_rate": 0.001, "loss": 2.6999, "step": 62028 }, { "epoch": 11.91, "learning_rate": 0.001, "loss": 2.7005, "step": 62040 }, { "epoch": 11.91, "learning_rate": 0.001, "loss": 2.7057, "step": 62052 }, { "epoch": 11.92, "learning_rate": 0.001, "loss": 2.7108, "step": 62064 }, { "epoch": 11.92, "learning_rate": 0.001, "loss": 2.7008, "step": 62076 }, { "epoch": 11.92, "learning_rate": 0.001, "loss": 2.7008, "step": 62088 }, { "epoch": 11.92, "learning_rate": 0.001, "loss": 2.7064, "step": 62100 }, { "epoch": 11.93, "learning_rate": 0.001, "loss": 2.7059, "step": 62112 }, { "epoch": 11.93, "learning_rate": 0.001, "loss": 2.6984, "step": 62124 }, { "epoch": 11.93, "learning_rate": 0.001, "loss": 2.6981, "step": 62136 }, { "epoch": 11.93, "learning_rate": 0.001, "loss": 2.7013, "step": 62148 }, { "epoch": 11.94, "learning_rate": 0.001, "loss": 2.696, "step": 62160 }, { "epoch": 11.94, "learning_rate": 0.001, "loss": 2.6973, "step": 62172 }, { "epoch": 11.94, "learning_rate": 0.001, "loss": 2.6913, "step": 62184 }, { "epoch": 11.94, "learning_rate": 0.001, "loss": 2.703, "step": 62196 }, { "epoch": 11.94, "learning_rate": 0.001, "loss": 2.6943, "step": 62208 }, { "epoch": 11.95, "learning_rate": 0.001, "loss": 2.699, "step": 62220 }, { "epoch": 11.95, "learning_rate": 0.001, "loss": 2.6943, "step": 62232 }, { "epoch": 11.95, "learning_rate": 0.001, "loss": 2.6976, "step": 62244 }, { "epoch": 11.95, "learning_rate": 0.001, "loss": 2.7005, "step": 62256 }, { "epoch": 11.96, "learning_rate": 0.001, "loss": 2.6908, "step": 62268 }, { "epoch": 11.96, "learning_rate": 0.001, "loss": 2.6996, "step": 62280 }, { "epoch": 11.96, "learning_rate": 0.001, "loss": 2.6979, "step": 62292 }, { "epoch": 11.96, "learning_rate": 0.001, "loss": 2.7052, "step": 62304 }, { "epoch": 11.97, "learning_rate": 0.001, "loss": 2.6867, "step": 62316 }, { "epoch": 11.97, "learning_rate": 0.001, "loss": 2.7018, "step": 62328 }, { "epoch": 11.97, "learning_rate": 0.001, "loss": 2.6935, "step": 62340 }, { "epoch": 11.97, "learning_rate": 0.001, "loss": 2.6965, "step": 62352 }, { "epoch": 11.97, "learning_rate": 0.001, "loss": 2.6965, "step": 62364 }, { "epoch": 11.98, "learning_rate": 0.001, "loss": 2.7123, "step": 62376 }, { "epoch": 11.98, "learning_rate": 0.001, "loss": 2.7131, "step": 62388 }, { "epoch": 11.98, "learning_rate": 0.001, "loss": 2.7006, "step": 62400 }, { "epoch": 11.98, "learning_rate": 0.001, "loss": 2.7059, "step": 62412 }, { "epoch": 11.99, "learning_rate": 0.001, "loss": 2.695, "step": 62424 }, { "epoch": 11.99, "learning_rate": 0.001, "loss": 2.697, "step": 62436 }, { "epoch": 11.99, "learning_rate": 0.001, "loss": 2.6974, "step": 62448 }, { "epoch": 11.99, "learning_rate": 0.001, "loss": 2.6975, "step": 62460 }, { "epoch": 12.0, "learning_rate": 0.001, "loss": 2.7018, "step": 62472 }, { "epoch": 12.0, "learning_rate": 0.001, "loss": 2.7047, "step": 62484 }, { "epoch": 12.0, "learning_rate": 0.001, "loss": 2.6964, "step": 62496 }, { "epoch": 12.0, "eval_ag_news_accuracy": 0.307625, "eval_ag_news_bleu_score": 4.68007545837061, "eval_ag_news_bleu_score_sem": 0.16188845797945378, "eval_ag_news_emb_cos_sim": 0.7910676002502441, "eval_ag_news_emb_cos_sim_sem": 0.007531064285363262, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.719710111618042, "eval_ag_news_n_ngrams_match_1": 13.346, "eval_ag_news_n_ngrams_match_2": 2.89, "eval_ag_news_n_ngrams_match_3": 0.874, "eval_ag_news_num_pred_words": 46.434, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 41.25243377383498, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3275251362113679, "eval_ag_news_runtime": 10.14, "eval_ag_news_samples_per_second": 49.31, "eval_ag_news_steps_per_second": 0.099, "eval_ag_news_token_set_f1": 0.3330696441627122, "eval_ag_news_token_set_f1_sem": 0.004532917870886484, "eval_ag_news_token_set_precision": 0.31643165089656067, "eval_ag_news_token_set_recall": 0.36630463158880894, "eval_ag_news_true_num_tokens": 56.09375, "step": 62500 }, { "epoch": 12.0, "eval_anthropic_toxic_prompts_accuracy": 0.107375, "eval_anthropic_toxic_prompts_bleu_score": 2.803321481154465, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10794850811278522, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6432708501815796, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010721092736157525, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3977503776550293, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.78, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.694, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.582, "eval_anthropic_toxic_prompts_num_pred_words": 47.292, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.896767902728254, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19830859470201195, "eval_anthropic_toxic_prompts_runtime": 18.9495, "eval_anthropic_toxic_prompts_samples_per_second": 26.386, "eval_anthropic_toxic_prompts_steps_per_second": 0.053, "eval_anthropic_toxic_prompts_token_set_f1": 0.343403365396251, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006609813134501554, "eval_anthropic_toxic_prompts_token_set_precision": 0.40842633033296516, "eval_anthropic_toxic_prompts_token_set_recall": 0.32896706560018374, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 62500 }, { "epoch": 12.0, "eval_arxiv_accuracy": 0.33053125, "eval_arxiv_bleu_score": 3.914448073952949, "eval_arxiv_bleu_score_sem": 0.10625763105012422, "eval_arxiv_emb_cos_sim": 0.734485924243927, "eval_arxiv_emb_cos_sim_sem": 0.008628724295635335, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.58801531791687, "eval_arxiv_n_ngrams_match_1": 14.176, "eval_arxiv_n_ngrams_match_2": 2.602, "eval_arxiv_n_ngrams_match_3": 0.55, "eval_arxiv_num_pred_words": 41.322, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 36.162234120341, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3328358601305305, "eval_arxiv_runtime": 9.933, "eval_arxiv_samples_per_second": 50.337, "eval_arxiv_steps_per_second": 0.101, "eval_arxiv_token_set_f1": 0.329522240663839, "eval_arxiv_token_set_f1_sem": 0.004276015650791637, "eval_arxiv_token_set_precision": 0.27988477502890413, "eval_arxiv_token_set_recall": 0.4206770548598357, "eval_arxiv_true_num_tokens": 64.0, "step": 62500 }, { "epoch": 12.0, "eval_python_code_alpaca_accuracy": 0.15496875, "eval_python_code_alpaca_bleu_score": 4.102021700501079, "eval_python_code_alpaca_bleu_score_sem": 0.11913653363901756, "eval_python_code_alpaca_emb_cos_sim": 0.7436040639877319, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007712727739157343, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.028447151184082, "eval_python_code_alpaca_n_ngrams_match_1": 9.478, "eval_python_code_alpaca_n_ngrams_match_2": 2.656, "eval_python_code_alpaca_n_ngrams_match_3": 0.83, "eval_python_code_alpaca_num_pred_words": 44.92, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.66511785738537, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3096689734305772, "eval_python_code_alpaca_runtime": 9.5782, "eval_python_code_alpaca_samples_per_second": 52.202, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.46491441225281094, "eval_python_code_alpaca_token_set_f1_sem": 0.005213825815394596, "eval_python_code_alpaca_token_set_precision": 0.5197417528040283, "eval_python_code_alpaca_token_set_recall": 0.4421537842720535, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 62500 }, { "epoch": 12.0, "eval_wikibio_accuracy": 0.3054375, "eval_wikibio_bleu_score": 5.592110166892952, "eval_wikibio_bleu_score_sem": 0.18362691976756512, "eval_wikibio_emb_cos_sim": 0.7340899705886841, "eval_wikibio_emb_cos_sim_sem": 0.009408251990813084, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9633853435516357, "eval_wikibio_n_ngrams_match_1": 10.1, "eval_wikibio_n_ngrams_match_2": 3.288, "eval_wikibio_n_ngrams_match_3": 1.154, "eval_wikibio_num_pred_words": 37.666, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 52.63521295365857, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3465202796454441, "eval_wikibio_runtime": 10.7323, "eval_wikibio_samples_per_second": 46.588, "eval_wikibio_steps_per_second": 0.093, "eval_wikibio_token_set_f1": 0.3123881070814429, "eval_wikibio_token_set_f1_sem": 0.005081583372173606, "eval_wikibio_token_set_precision": 0.3261076819238875, "eval_wikibio_token_set_recall": 0.3126992653024321, "eval_wikibio_true_num_tokens": 61.1328125, "step": 62500 }, { "epoch": 12.0, "eval_nq_accuracy": 0.5085, "eval_nq_bleu_score": 10.936657635862593, "eval_nq_bleu_score_sem": 0.4680075246123519, "eval_nq_emb_cos_sim": 0.8158445358276367, "eval_nq_emb_cos_sim_sem": 0.007262077171266165, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3249571323394775, "eval_nq_n_ngrams_match_1": 22.22, "eval_nq_n_ngrams_match_2": 7.848, "eval_nq_n_ngrams_match_3": 3.62, "eval_nq_num_pred_words": 48.78, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.226241701337134, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42811813612744265, "eval_nq_runtime": 10.037, "eval_nq_samples_per_second": 49.816, "eval_nq_steps_per_second": 0.1, "eval_nq_token_set_f1": 0.44453237326395706, "eval_nq_token_set_f1_sem": 0.0050346187589386995, "eval_nq_token_set_precision": 0.39949567037331113, "eval_nq_token_set_recall": 0.5110357294680288, "eval_nq_true_num_tokens": 64.0, "step": 62500 }, { "epoch": 12.0, "learning_rate": 0.001, "loss": 2.6933, "step": 62508 }, { "epoch": 12.0, "learning_rate": 0.001, "loss": 2.6819, "step": 62520 }, { "epoch": 12.01, "learning_rate": 0.001, "loss": 2.6879, "step": 62532 }, { "epoch": 12.01, "learning_rate": 0.001, "loss": 2.6913, "step": 62544 }, { "epoch": 12.01, "learning_rate": 0.001, "loss": 2.6883, "step": 62556 }, { "epoch": 12.01, "learning_rate": 0.001, "loss": 2.6839, "step": 62568 }, { "epoch": 12.02, "learning_rate": 0.001, "loss": 2.6847, "step": 62580 }, { "epoch": 12.02, "learning_rate": 0.001, "loss": 2.6856, "step": 62592 }, { "epoch": 12.02, "learning_rate": 0.001, "loss": 2.6807, "step": 62604 }, { "epoch": 12.02, "learning_rate": 0.001, "loss": 2.6815, "step": 62616 }, { "epoch": 12.03, "learning_rate": 0.001, "loss": 2.6906, "step": 62628 }, { "epoch": 12.03, "learning_rate": 0.001, "loss": 2.668, "step": 62640 }, { "epoch": 12.03, "learning_rate": 0.001, "loss": 2.6897, "step": 62652 }, { "epoch": 12.03, "learning_rate": 0.001, "loss": 2.6769, "step": 62664 }, { "epoch": 12.03, "learning_rate": 0.001, "loss": 2.6791, "step": 62676 }, { "epoch": 12.04, "learning_rate": 0.001, "loss": 2.6782, "step": 62688 }, { "epoch": 12.04, "learning_rate": 0.001, "loss": 2.6762, "step": 62700 }, { "epoch": 12.04, "learning_rate": 0.001, "loss": 2.6817, "step": 62712 }, { "epoch": 12.04, "learning_rate": 0.001, "loss": 2.6954, "step": 62724 }, { "epoch": 12.05, "learning_rate": 0.001, "loss": 2.6888, "step": 62736 }, { "epoch": 12.05, "learning_rate": 0.001, "loss": 2.6779, "step": 62748 }, { "epoch": 12.05, "learning_rate": 0.001, "loss": 2.6978, "step": 62760 }, { "epoch": 12.05, "learning_rate": 0.001, "loss": 2.6851, "step": 62772 }, { "epoch": 12.06, "learning_rate": 0.001, "loss": 2.6896, "step": 62784 }, { "epoch": 12.06, "learning_rate": 0.001, "loss": 2.6792, "step": 62796 }, { "epoch": 12.06, "learning_rate": 0.001, "loss": 2.6835, "step": 62808 }, { "epoch": 12.06, "learning_rate": 0.001, "loss": 2.6743, "step": 62820 }, { "epoch": 12.06, "learning_rate": 0.001, "loss": 2.6724, "step": 62832 }, { "epoch": 12.07, "learning_rate": 0.001, "loss": 2.6899, "step": 62844 }, { "epoch": 12.07, "learning_rate": 0.001, "loss": 2.6925, "step": 62856 }, { "epoch": 12.07, "learning_rate": 0.001, "loss": 2.6993, "step": 62868 }, { "epoch": 12.07, "learning_rate": 0.001, "loss": 2.6914, "step": 62880 }, { "epoch": 12.08, "learning_rate": 0.001, "loss": 2.6853, "step": 62892 }, { "epoch": 12.08, "learning_rate": 0.001, "loss": 2.6852, "step": 62904 }, { "epoch": 12.08, "learning_rate": 0.001, "loss": 2.6885, "step": 62916 }, { "epoch": 12.08, "learning_rate": 0.001, "loss": 2.6994, "step": 62928 }, { "epoch": 12.09, "learning_rate": 0.001, "loss": 2.6807, "step": 62940 }, { "epoch": 12.09, "learning_rate": 0.001, "loss": 2.6899, "step": 62952 }, { "epoch": 12.09, "learning_rate": 0.001, "loss": 2.6872, "step": 62964 }, { "epoch": 12.09, "learning_rate": 0.001, "loss": 2.6832, "step": 62976 }, { "epoch": 12.09, "learning_rate": 0.001, "loss": 2.6891, "step": 62988 }, { "epoch": 12.1, "learning_rate": 0.001, "loss": 2.6916, "step": 63000 }, { "epoch": 12.1, "learning_rate": 0.001, "loss": 2.6931, "step": 63012 }, { "epoch": 12.1, "learning_rate": 0.001, "loss": 2.6927, "step": 63024 }, { "epoch": 12.1, "learning_rate": 0.001, "loss": 2.6822, "step": 63036 }, { "epoch": 12.11, "learning_rate": 0.001, "loss": 2.6804, "step": 63048 }, { "epoch": 12.11, "learning_rate": 0.001, "loss": 2.6849, "step": 63060 }, { "epoch": 12.11, "learning_rate": 0.001, "loss": 2.6888, "step": 63072 }, { "epoch": 12.11, "learning_rate": 0.001, "loss": 2.6885, "step": 63084 }, { "epoch": 12.12, "learning_rate": 0.001, "loss": 2.6834, "step": 63096 }, { "epoch": 12.12, "learning_rate": 0.001, "loss": 2.6814, "step": 63108 }, { "epoch": 12.12, "learning_rate": 0.001, "loss": 2.6914, "step": 63120 }, { "epoch": 12.12, "eval_ag_news_accuracy": 0.30765625, "eval_ag_news_bleu_score": 4.500774113097772, "eval_ag_news_bleu_score_sem": 0.14991835824230856, "eval_ag_news_emb_cos_sim": 0.7676204442977905, "eval_ag_news_emb_cos_sim_sem": 0.009299637586566985, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.72332501411438, "eval_ag_news_n_ngrams_match_1": 13.008, "eval_ag_news_n_ngrams_match_2": 2.758, "eval_ag_news_n_ngrams_match_3": 0.82, "eval_ag_news_num_pred_words": 46.208, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 41.401827158239755, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3203681514124064, "eval_ag_news_runtime": 9.9019, "eval_ag_news_samples_per_second": 50.495, "eval_ag_news_steps_per_second": 0.101, "eval_ag_news_token_set_f1": 0.32805322593426034, "eval_ag_news_token_set_f1_sem": 0.004449823759491081, "eval_ag_news_token_set_precision": 0.30871942973786576, "eval_ag_news_token_set_recall": 0.36458571455171096, "eval_ag_news_true_num_tokens": 56.09375, "step": 63125 }, { "epoch": 12.12, "eval_anthropic_toxic_prompts_accuracy": 0.10815625, "eval_anthropic_toxic_prompts_bleu_score": 2.7453645696437197, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1045175466995382, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6418386697769165, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00901131099590396, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3990821838378906, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.688, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.646, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.57, "eval_anthropic_toxic_prompts_num_pred_words": 47.066, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.936611128906975, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19742908877305798, "eval_anthropic_toxic_prompts_runtime": 9.6915, "eval_anthropic_toxic_prompts_samples_per_second": 51.592, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.3343427336566926, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006309426140482984, "eval_anthropic_toxic_prompts_token_set_precision": 0.40185734774643495, "eval_anthropic_toxic_prompts_token_set_recall": 0.31494107345323447, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 63125 }, { "epoch": 12.12, "eval_arxiv_accuracy": 0.334375, "eval_arxiv_bleu_score": 4.050790247736062, "eval_arxiv_bleu_score_sem": 0.11821522845329553, "eval_arxiv_emb_cos_sim": 0.7343416213989258, "eval_arxiv_emb_cos_sim_sem": 0.007763726742944541, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.566161870956421, "eval_arxiv_n_ngrams_match_1": 14.202, "eval_arxiv_n_ngrams_match_2": 2.712, "eval_arxiv_n_ngrams_match_3": 0.584, "eval_arxiv_num_pred_words": 40.534, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 35.38053715119034, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33711308866143963, "eval_arxiv_runtime": 10.412, "eval_arxiv_samples_per_second": 48.022, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.3304784644388723, "eval_arxiv_token_set_f1_sem": 0.004335269346582855, "eval_arxiv_token_set_precision": 0.27954310258673626, "eval_arxiv_token_set_recall": 0.4211488600504229, "eval_arxiv_true_num_tokens": 64.0, "step": 63125 }, { "epoch": 12.12, "eval_python_code_alpaca_accuracy": 0.15228125, "eval_python_code_alpaca_bleu_score": 3.9767444781405454, "eval_python_code_alpaca_bleu_score_sem": 0.12338299527737924, "eval_python_code_alpaca_emb_cos_sim": 0.7335104942321777, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008337650257285004, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.047879695892334, "eval_python_code_alpaca_n_ngrams_match_1": 9.212, "eval_python_code_alpaca_n_ngrams_match_2": 2.51, "eval_python_code_alpaca_n_ngrams_match_3": 0.706, "eval_python_code_alpaca_num_pred_words": 43.646, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.07062090151464, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30927741127756464, "eval_python_code_alpaca_runtime": 9.7784, "eval_python_code_alpaca_samples_per_second": 51.133, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.45001914824801587, "eval_python_code_alpaca_token_set_f1_sem": 0.005346833320944668, "eval_python_code_alpaca_token_set_precision": 0.49766660024009, "eval_python_code_alpaca_token_set_recall": 0.43293383592227264, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 63125 }, { "epoch": 12.12, "eval_wikibio_accuracy": 0.3104375, "eval_wikibio_bleu_score": 5.333561104118382, "eval_wikibio_bleu_score_sem": 0.19206275419922791, "eval_wikibio_emb_cos_sim": 0.7087751626968384, "eval_wikibio_emb_cos_sim_sem": 0.010300922900818327, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9345104694366455, "eval_wikibio_n_ngrams_match_1": 9.556, "eval_wikibio_n_ngrams_match_2": 3.09, "eval_wikibio_n_ngrams_match_3": 1.082, "eval_wikibio_num_pred_words": 36.578, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 51.13711064981968, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32927955702236456, "eval_wikibio_runtime": 10.4402, "eval_wikibio_samples_per_second": 47.892, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.3007899495159135, "eval_wikibio_token_set_f1_sem": 0.005768116716028562, "eval_wikibio_token_set_precision": 0.3113908594225259, "eval_wikibio_token_set_recall": 0.3073077821857895, "eval_wikibio_true_num_tokens": 61.1328125, "step": 63125 }, { "epoch": 12.12, "eval_nq_accuracy": 0.50765625, "eval_nq_bleu_score": 10.826749900962302, "eval_nq_bleu_score_sem": 0.45546872919723896, "eval_nq_emb_cos_sim": 0.8152284622192383, "eval_nq_emb_cos_sim_sem": 0.007648327354632697, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.325967788696289, "eval_nq_n_ngrams_match_1": 22.304, "eval_nq_n_ngrams_match_2": 7.932, "eval_nq_n_ngrams_match_3": 3.534, "eval_nq_num_pred_words": 48.744, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.236582141954736, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43105537576589414, "eval_nq_runtime": 10.9677, "eval_nq_samples_per_second": 45.589, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4460166092159248, "eval_nq_token_set_f1_sem": 0.004898522956187676, "eval_nq_token_set_precision": 0.40118708490219224, "eval_nq_token_set_recall": 0.5097889655947345, "eval_nq_true_num_tokens": 64.0, "step": 63125 }, { "epoch": 12.12, "learning_rate": 0.001, "loss": 2.6712, "step": 63132 }, { "epoch": 12.12, "learning_rate": 0.001, "loss": 2.6883, "step": 63144 }, { "epoch": 12.13, "learning_rate": 0.001, "loss": 2.6891, "step": 63156 }, { "epoch": 12.13, "learning_rate": 0.001, "loss": 2.6835, "step": 63168 }, { "epoch": 12.13, "learning_rate": 0.001, "loss": 2.6864, "step": 63180 }, { "epoch": 12.13, "learning_rate": 0.001, "loss": 2.6788, "step": 63192 }, { "epoch": 12.14, "learning_rate": 0.001, "loss": 2.674, "step": 63204 }, { "epoch": 12.14, "learning_rate": 0.001, "loss": 2.6813, "step": 63216 }, { "epoch": 12.14, "learning_rate": 0.001, "loss": 2.6812, "step": 63228 }, { "epoch": 12.14, "learning_rate": 0.001, "loss": 2.6826, "step": 63240 }, { "epoch": 12.15, "learning_rate": 0.001, "loss": 2.6801, "step": 63252 }, { "epoch": 12.15, "learning_rate": 0.001, "loss": 2.6855, "step": 63264 }, { "epoch": 12.15, "learning_rate": 0.001, "loss": 2.6982, "step": 63276 }, { "epoch": 12.15, "learning_rate": 0.001, "loss": 2.6845, "step": 63288 }, { "epoch": 12.15, "learning_rate": 0.001, "loss": 2.6793, "step": 63300 }, { "epoch": 12.16, "learning_rate": 0.001, "loss": 2.6855, "step": 63312 }, { "epoch": 12.16, "learning_rate": 0.001, "loss": 2.6841, "step": 63324 }, { "epoch": 12.16, "learning_rate": 0.001, "loss": 2.6852, "step": 63336 }, { "epoch": 12.16, "learning_rate": 0.001, "loss": 2.6797, "step": 63348 }, { "epoch": 12.17, "learning_rate": 0.001, "loss": 2.6775, "step": 63360 }, { "epoch": 12.17, "learning_rate": 0.001, "loss": 2.6876, "step": 63372 }, { "epoch": 12.17, "learning_rate": 0.001, "loss": 2.6904, "step": 63384 }, { "epoch": 12.17, "learning_rate": 0.001, "loss": 2.6826, "step": 63396 }, { "epoch": 12.18, "learning_rate": 0.001, "loss": 2.6827, "step": 63408 }, { "epoch": 12.18, "learning_rate": 0.001, "loss": 2.6751, "step": 63420 }, { "epoch": 12.18, "learning_rate": 0.001, "loss": 2.6682, "step": 63432 }, { "epoch": 12.18, "learning_rate": 0.001, "loss": 2.6868, "step": 63444 }, { "epoch": 12.18, "learning_rate": 0.001, "loss": 2.6698, "step": 63456 }, { "epoch": 12.19, "learning_rate": 0.001, "loss": 2.6884, "step": 63468 }, { "epoch": 12.19, "learning_rate": 0.001, "loss": 2.678, "step": 63480 }, { "epoch": 12.19, "learning_rate": 0.001, "loss": 2.6907, "step": 63492 }, { "epoch": 12.19, "learning_rate": 0.001, "loss": 2.693, "step": 63504 }, { "epoch": 12.2, "learning_rate": 0.001, "loss": 2.6939, "step": 63516 }, { "epoch": 12.2, "learning_rate": 0.001, "loss": 2.682, "step": 63528 }, { "epoch": 12.2, "learning_rate": 0.001, "loss": 2.6824, "step": 63540 }, { "epoch": 12.2, "learning_rate": 0.001, "loss": 2.6738, "step": 63552 }, { "epoch": 12.21, "learning_rate": 0.001, "loss": 2.6951, "step": 63564 }, { "epoch": 12.21, "learning_rate": 0.001, "loss": 2.6999, "step": 63576 }, { "epoch": 12.21, "learning_rate": 0.001, "loss": 2.6889, "step": 63588 }, { "epoch": 12.21, "learning_rate": 0.001, "loss": 2.685, "step": 63600 }, { "epoch": 12.21, "learning_rate": 0.001, "loss": 2.6817, "step": 63612 }, { "epoch": 12.22, "learning_rate": 0.001, "loss": 2.6827, "step": 63624 }, { "epoch": 12.22, "learning_rate": 0.001, "loss": 2.6764, "step": 63636 }, { "epoch": 12.22, "learning_rate": 0.001, "loss": 2.6874, "step": 63648 }, { "epoch": 12.22, "learning_rate": 0.001, "loss": 2.6754, "step": 63660 }, { "epoch": 12.23, "learning_rate": 0.001, "loss": 2.6842, "step": 63672 }, { "epoch": 12.23, "learning_rate": 0.001, "loss": 2.6911, "step": 63684 }, { "epoch": 12.23, "learning_rate": 0.001, "loss": 2.688, "step": 63696 }, { "epoch": 12.23, "learning_rate": 0.001, "loss": 2.6793, "step": 63708 }, { "epoch": 12.24, "learning_rate": 0.001, "loss": 2.6845, "step": 63720 }, { "epoch": 12.24, "learning_rate": 0.001, "loss": 2.6913, "step": 63732 }, { "epoch": 12.24, "learning_rate": 0.001, "loss": 2.681, "step": 63744 }, { "epoch": 12.24, "eval_ag_news_accuracy": 0.308125, "eval_ag_news_bleu_score": 4.526321771088117, "eval_ag_news_bleu_score_sem": 0.14523036454130234, "eval_ag_news_emb_cos_sim": 0.7819414734840393, "eval_ag_news_emb_cos_sim_sem": 0.008212183699079629, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7093119621276855, "eval_ag_news_n_ngrams_match_1": 13.472, "eval_ag_news_n_ngrams_match_2": 2.864, "eval_ag_news_n_ngrams_match_3": 0.84, "eval_ag_news_num_pred_words": 47.08, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.825707228670716, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3281437382031718, "eval_ag_news_runtime": 10.5789, "eval_ag_news_samples_per_second": 47.264, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.33616763176934533, "eval_ag_news_token_set_f1_sem": 0.004305632092358061, "eval_ag_news_token_set_precision": 0.3182175047987883, "eval_ag_news_token_set_recall": 0.37064655863085927, "eval_ag_news_true_num_tokens": 56.09375, "step": 63750 }, { "epoch": 12.24, "eval_anthropic_toxic_prompts_accuracy": 0.10715625, "eval_anthropic_toxic_prompts_bleu_score": 2.853472604684086, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12340967297437792, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6327046751976013, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010196941357185888, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.4093167781829834, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.742, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.678, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.604, "eval_anthropic_toxic_prompts_num_pred_words": 47.192, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 30.24457344640401, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19734087615842322, "eval_anthropic_toxic_prompts_runtime": 10.112, "eval_anthropic_toxic_prompts_samples_per_second": 49.446, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.33552012610640597, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00636690026165165, "eval_anthropic_toxic_prompts_token_set_precision": 0.39787839870433245, "eval_anthropic_toxic_prompts_token_set_recall": 0.3181068143485201, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 63750 }, { "epoch": 12.24, "eval_arxiv_accuracy": 0.3333125, "eval_arxiv_bleu_score": 3.912833552030698, "eval_arxiv_bleu_score_sem": 0.11089804161981101, "eval_arxiv_emb_cos_sim": 0.734173059463501, "eval_arxiv_emb_cos_sim_sem": 0.008368075013553807, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5636889934539795, "eval_arxiv_n_ngrams_match_1": 14.122, "eval_arxiv_n_ngrams_match_2": 2.602, "eval_arxiv_n_ngrams_match_3": 0.548, "eval_arxiv_num_pred_words": 39.988, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 35.29315350590027, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3341879957728653, "eval_arxiv_runtime": 10.7211, "eval_arxiv_samples_per_second": 46.637, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.3280883759585713, "eval_arxiv_token_set_f1_sem": 0.004342192611877894, "eval_arxiv_token_set_precision": 0.2769834131744744, "eval_arxiv_token_set_recall": 0.42032526692521877, "eval_arxiv_true_num_tokens": 64.0, "step": 63750 }, { "epoch": 12.24, "eval_python_code_alpaca_accuracy": 0.152375, "eval_python_code_alpaca_bleu_score": 4.195882689869561, "eval_python_code_alpaca_bleu_score_sem": 0.12388838000149487, "eval_python_code_alpaca_emb_cos_sim": 0.7241489887237549, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010407032860300567, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.031872034072876, "eval_python_code_alpaca_n_ngrams_match_1": 9.258, "eval_python_code_alpaca_n_ngrams_match_2": 2.616, "eval_python_code_alpaca_n_ngrams_match_3": 0.816, "eval_python_code_alpaca_num_pred_words": 42.832, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.736014803498136, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31347888850576733, "eval_python_code_alpaca_runtime": 9.7159, "eval_python_code_alpaca_samples_per_second": 51.462, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.4565831525685715, "eval_python_code_alpaca_token_set_f1_sem": 0.005788586833613359, "eval_python_code_alpaca_token_set_precision": 0.5025236524526253, "eval_python_code_alpaca_token_set_recall": 0.44148507126294895, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 63750 }, { "epoch": 12.24, "eval_wikibio_accuracy": 0.3104375, "eval_wikibio_bleu_score": 5.340486692945748, "eval_wikibio_bleu_score_sem": 0.19693909310777077, "eval_wikibio_emb_cos_sim": 0.7221648693084717, "eval_wikibio_emb_cos_sim_sem": 0.010366201783716621, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9429478645324707, "eval_wikibio_n_ngrams_match_1": 9.544, "eval_wikibio_n_ngrams_match_2": 3.122, "eval_wikibio_n_ngrams_match_3": 1.092, "eval_wikibio_num_pred_words": 36.26, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 51.570400002688736, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.329705227825844, "eval_wikibio_runtime": 10.2723, "eval_wikibio_samples_per_second": 48.674, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3011053410463612, "eval_wikibio_token_set_f1_sem": 0.005862336197062749, "eval_wikibio_token_set_precision": 0.30746018767556144, "eval_wikibio_token_set_recall": 0.31231974359214926, "eval_wikibio_true_num_tokens": 61.1328125, "step": 63750 }, { "epoch": 12.24, "eval_nq_accuracy": 0.50884375, "eval_nq_bleu_score": 10.856529369101684, "eval_nq_bleu_score_sem": 0.4462073304957313, "eval_nq_emb_cos_sim": 0.8193342685699463, "eval_nq_emb_cos_sim_sem": 0.007123159182940734, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.320971965789795, "eval_nq_n_ngrams_match_1": 22.382, "eval_nq_n_ngrams_match_2": 7.954, "eval_nq_n_ngrams_match_3": 3.542, "eval_nq_num_pred_words": 48.964, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.185569521513363, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42934264662724003, "eval_nq_runtime": 12.2828, "eval_nq_samples_per_second": 40.707, "eval_nq_steps_per_second": 0.081, "eval_nq_token_set_f1": 0.4469274221234845, "eval_nq_token_set_f1_sem": 0.004985473419810356, "eval_nq_token_set_precision": 0.401762783571906, "eval_nq_token_set_recall": 0.5118534430117191, "eval_nq_true_num_tokens": 64.0, "step": 63750 }, { "epoch": 12.24, "learning_rate": 0.001, "loss": 2.6862, "step": 63756 }, { "epoch": 12.24, "learning_rate": 0.001, "loss": 2.6789, "step": 63768 }, { "epoch": 12.25, "learning_rate": 0.001, "loss": 2.6905, "step": 63780 }, { "epoch": 12.25, "learning_rate": 0.001, "loss": 2.6861, "step": 63792 }, { "epoch": 12.25, "learning_rate": 0.001, "loss": 2.6941, "step": 63804 }, { "epoch": 12.25, "learning_rate": 0.001, "loss": 2.6741, "step": 63816 }, { "epoch": 12.26, "learning_rate": 0.001, "loss": 2.6913, "step": 63828 }, { "epoch": 12.26, "learning_rate": 0.001, "loss": 2.6862, "step": 63840 }, { "epoch": 12.26, "learning_rate": 0.001, "loss": 2.6954, "step": 63852 }, { "epoch": 12.26, "learning_rate": 0.001, "loss": 2.6804, "step": 63864 }, { "epoch": 12.26, "learning_rate": 0.001, "loss": 2.6827, "step": 63876 }, { "epoch": 12.27, "learning_rate": 0.001, "loss": 2.6885, "step": 63888 }, { "epoch": 12.27, "learning_rate": 0.001, "loss": 2.6724, "step": 63900 }, { "epoch": 12.27, "learning_rate": 0.001, "loss": 2.6847, "step": 63912 }, { "epoch": 12.27, "learning_rate": 0.001, "loss": 2.6827, "step": 63924 }, { "epoch": 12.28, "learning_rate": 0.001, "loss": 2.6838, "step": 63936 }, { "epoch": 12.28, "learning_rate": 0.001, "loss": 2.676, "step": 63948 }, { "epoch": 12.28, "learning_rate": 0.001, "loss": 2.6865, "step": 63960 }, { "epoch": 12.28, "learning_rate": 0.001, "loss": 2.6863, "step": 63972 }, { "epoch": 12.29, "learning_rate": 0.001, "loss": 2.6908, "step": 63984 }, { "epoch": 12.29, "learning_rate": 0.001, "loss": 2.6884, "step": 63996 }, { "epoch": 12.29, "learning_rate": 0.001, "loss": 2.6836, "step": 64008 }, { "epoch": 12.29, "learning_rate": 0.001, "loss": 2.6993, "step": 64020 }, { "epoch": 12.29, "learning_rate": 0.001, "loss": 2.6807, "step": 64032 }, { "epoch": 12.3, "learning_rate": 0.001, "loss": 2.6832, "step": 64044 }, { "epoch": 12.3, "learning_rate": 0.001, "loss": 2.6927, "step": 64056 }, { "epoch": 12.3, "learning_rate": 0.001, "loss": 2.6902, "step": 64068 }, { "epoch": 12.3, "learning_rate": 0.001, "loss": 2.6935, "step": 64080 }, { "epoch": 12.31, "learning_rate": 0.001, "loss": 2.6822, "step": 64092 }, { "epoch": 12.31, "learning_rate": 0.001, "loss": 2.6848, "step": 64104 }, { "epoch": 12.31, "learning_rate": 0.001, "loss": 2.6864, "step": 64116 }, { "epoch": 12.31, "learning_rate": 0.001, "loss": 2.6906, "step": 64128 }, { "epoch": 12.32, "learning_rate": 0.001, "loss": 2.6806, "step": 64140 }, { "epoch": 12.32, "learning_rate": 0.001, "loss": 2.6799, "step": 64152 }, { "epoch": 12.32, "learning_rate": 0.001, "loss": 2.6771, "step": 64164 }, { "epoch": 12.32, "learning_rate": 0.001, "loss": 2.6919, "step": 64176 }, { "epoch": 12.32, "learning_rate": 0.001, "loss": 2.6954, "step": 64188 }, { "epoch": 12.33, "learning_rate": 0.001, "loss": 2.6839, "step": 64200 }, { "epoch": 12.33, "learning_rate": 0.001, "loss": 2.6893, "step": 64212 }, { "epoch": 12.33, "learning_rate": 0.001, "loss": 2.6927, "step": 64224 }, { "epoch": 12.33, "learning_rate": 0.001, "loss": 2.6867, "step": 64236 }, { "epoch": 12.34, "learning_rate": 0.001, "loss": 2.6939, "step": 64248 }, { "epoch": 12.34, "learning_rate": 0.001, "loss": 2.6916, "step": 64260 }, { "epoch": 12.34, "learning_rate": 0.001, "loss": 2.6978, "step": 64272 }, { "epoch": 12.34, "learning_rate": 0.001, "loss": 2.6904, "step": 64284 }, { "epoch": 12.35, "learning_rate": 0.001, "loss": 2.6879, "step": 64296 }, { "epoch": 12.35, "learning_rate": 0.001, "loss": 2.6893, "step": 64308 }, { "epoch": 12.35, "learning_rate": 0.001, "loss": 2.687, "step": 64320 }, { "epoch": 12.35, "learning_rate": 0.001, "loss": 2.7007, "step": 64332 }, { "epoch": 12.35, "learning_rate": 0.001, "loss": 2.6968, "step": 64344 }, { "epoch": 12.36, "learning_rate": 0.001, "loss": 2.6867, "step": 64356 }, { "epoch": 12.36, "learning_rate": 0.001, "loss": 2.6807, "step": 64368 }, { "epoch": 12.36, "eval_ag_news_accuracy": 0.3079375, "eval_ag_news_bleu_score": 4.3634836705640465, "eval_ag_news_bleu_score_sem": 0.14205787801373654, "eval_ag_news_emb_cos_sim": 0.77836012840271, "eval_ag_news_emb_cos_sim_sem": 0.008324039459394018, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7089154720306396, "eval_ag_news_n_ngrams_match_1": 13.274, "eval_ag_news_n_ngrams_match_2": 2.708, "eval_ag_news_n_ngrams_match_3": 0.692, "eval_ag_news_num_pred_words": 46.342, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.80952344861593, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.325656206203789, "eval_ag_news_runtime": 10.1924, "eval_ag_news_samples_per_second": 49.056, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.33253575499116284, "eval_ag_news_token_set_f1_sem": 0.004387361291739367, "eval_ag_news_token_set_precision": 0.3129516059175739, "eval_ag_news_token_set_recall": 0.3715027590816579, "eval_ag_news_true_num_tokens": 56.09375, "step": 64375 }, { "epoch": 12.36, "eval_anthropic_toxic_prompts_accuracy": 0.1069375, "eval_anthropic_toxic_prompts_bleu_score": 2.8773458552155606, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12007661660760621, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6492405533790588, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009462986917594066, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3939576148986816, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.794, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.684, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.586, "eval_anthropic_toxic_prompts_num_pred_words": 47.1, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.783591316532988, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20117097610108545, "eval_anthropic_toxic_prompts_runtime": 9.3237, "eval_anthropic_toxic_prompts_samples_per_second": 53.627, "eval_anthropic_toxic_prompts_steps_per_second": 0.107, "eval_anthropic_toxic_prompts_token_set_f1": 0.3419428027184945, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006382811990095415, "eval_anthropic_toxic_prompts_token_set_precision": 0.4058209300151164, "eval_anthropic_toxic_prompts_token_set_recall": 0.32233275173455844, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 64375 }, { "epoch": 12.36, "eval_arxiv_accuracy": 0.3319375, "eval_arxiv_bleu_score": 4.02360358169393, "eval_arxiv_bleu_score_sem": 0.1192204689385041, "eval_arxiv_emb_cos_sim": 0.7298744916915894, "eval_arxiv_emb_cos_sim_sem": 0.009141886542402264, "eval_arxiv_emb_top1_equal": 0.1953125, "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.575408697128296, "eval_arxiv_n_ngrams_match_1": 14.124, "eval_arxiv_n_ngrams_match_2": 2.664, "eval_arxiv_n_ngrams_match_3": 0.588, "eval_arxiv_num_pred_words": 40.322, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 35.70921208618092, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33405053184170247, "eval_arxiv_runtime": 9.6852, "eval_arxiv_samples_per_second": 51.625, "eval_arxiv_steps_per_second": 0.103, "eval_arxiv_token_set_f1": 0.3324400308353056, "eval_arxiv_token_set_f1_sem": 0.004194391940256961, "eval_arxiv_token_set_precision": 0.28065966296273387, "eval_arxiv_token_set_recall": 0.4290765053135769, "eval_arxiv_true_num_tokens": 64.0, "step": 64375 }, { "epoch": 12.36, "eval_python_code_alpaca_accuracy": 0.15390625, "eval_python_code_alpaca_bleu_score": 4.005114341325162, "eval_python_code_alpaca_bleu_score_sem": 0.12485640490112654, "eval_python_code_alpaca_emb_cos_sim": 0.7366974949836731, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009228837466513764, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0346732139587402, "eval_python_code_alpaca_n_ngrams_match_1": 9.072, "eval_python_code_alpaca_n_ngrams_match_2": 2.47, "eval_python_code_alpaca_n_ngrams_match_3": 0.756, "eval_python_code_alpaca_num_pred_words": 43.536, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.794181540791648, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30534934416496107, "eval_python_code_alpaca_runtime": 9.5481, "eval_python_code_alpaca_samples_per_second": 52.366, "eval_python_code_alpaca_steps_per_second": 0.105, "eval_python_code_alpaca_token_set_f1": 0.4553981555124146, "eval_python_code_alpaca_token_set_f1_sem": 0.005578453409727007, "eval_python_code_alpaca_token_set_precision": 0.49367564897478905, "eval_python_code_alpaca_token_set_recall": 0.44832925593773626, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 64375 }, { "epoch": 12.36, "eval_wikibio_accuracy": 0.30709375, "eval_wikibio_bleu_score": 5.458663661379506, "eval_wikibio_bleu_score_sem": 0.20056702303355825, "eval_wikibio_emb_cos_sim": 0.7180017828941345, "eval_wikibio_emb_cos_sim_sem": 0.010004883436877807, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9794116020202637, "eval_wikibio_n_ngrams_match_1": 9.59, "eval_wikibio_n_ngrams_match_2": 3.124, "eval_wikibio_n_ngrams_match_3": 1.096, "eval_wikibio_num_pred_words": 36.012, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 53.485554174978105, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33128895330977126, "eval_wikibio_runtime": 13.0042, "eval_wikibio_samples_per_second": 38.449, "eval_wikibio_steps_per_second": 0.077, "eval_wikibio_token_set_f1": 0.3070327763150943, "eval_wikibio_token_set_f1_sem": 0.00568952249062667, "eval_wikibio_token_set_precision": 0.3138791538788945, "eval_wikibio_token_set_recall": 0.3200629318680838, "eval_wikibio_true_num_tokens": 61.1328125, "step": 64375 }, { "epoch": 12.36, "eval_nq_accuracy": 0.50953125, "eval_nq_bleu_score": 10.862770280777887, "eval_nq_bleu_score_sem": 0.45677810651998757, "eval_nq_emb_cos_sim": 0.8177816867828369, "eval_nq_emb_cos_sim_sem": 0.007354519698029731, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.322997808456421, "eval_nq_n_ngrams_match_1": 21.96, "eval_nq_n_ngrams_match_2": 7.796, "eval_nq_n_ngrams_match_3": 3.55, "eval_nq_num_pred_words": 48.716, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.206224797939898, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4223990898538695, "eval_nq_runtime": 10.7991, "eval_nq_samples_per_second": 46.3, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.4442592377819496, "eval_nq_token_set_f1_sem": 0.004963257871358416, "eval_nq_token_set_precision": 0.3972643729584669, "eval_nq_token_set_recall": 0.5160101037245501, "eval_nq_true_num_tokens": 64.0, "step": 64375 }, { "epoch": 12.36, "learning_rate": 0.001, "loss": 2.6904, "step": 64380 }, { "epoch": 12.36, "learning_rate": 0.001, "loss": 2.6856, "step": 64392 }, { "epoch": 12.37, "learning_rate": 0.001, "loss": 2.6892, "step": 64404 }, { "epoch": 12.37, "learning_rate": 0.001, "loss": 2.6934, "step": 64416 }, { "epoch": 12.37, "learning_rate": 0.001, "loss": 2.6824, "step": 64428 }, { "epoch": 12.37, "learning_rate": 0.001, "loss": 2.6914, "step": 64440 }, { "epoch": 12.38, "learning_rate": 0.001, "loss": 2.688, "step": 64452 }, { "epoch": 12.38, "learning_rate": 0.001, "loss": 2.69, "step": 64464 }, { "epoch": 12.38, "learning_rate": 0.001, "loss": 2.6853, "step": 64476 }, { "epoch": 12.38, "learning_rate": 0.001, "loss": 2.6774, "step": 64488 }, { "epoch": 12.38, "learning_rate": 0.001, "loss": 2.6925, "step": 64500 }, { "epoch": 12.39, "learning_rate": 0.001, "loss": 2.6839, "step": 64512 }, { "epoch": 12.39, "learning_rate": 0.001, "loss": 2.6878, "step": 64524 }, { "epoch": 12.39, "learning_rate": 0.001, "loss": 2.6896, "step": 64536 }, { "epoch": 12.39, "learning_rate": 0.001, "loss": 2.6886, "step": 64548 }, { "epoch": 12.4, "learning_rate": 0.001, "loss": 2.6863, "step": 64560 }, { "epoch": 12.4, "learning_rate": 0.001, "loss": 2.6873, "step": 64572 }, { "epoch": 12.4, "learning_rate": 0.001, "loss": 2.681, "step": 64584 }, { "epoch": 12.4, "learning_rate": 0.001, "loss": 2.6856, "step": 64596 }, { "epoch": 12.41, "learning_rate": 0.001, "loss": 2.6789, "step": 64608 }, { "epoch": 12.41, "learning_rate": 0.001, "loss": 2.6911, "step": 64620 }, { "epoch": 12.41, "learning_rate": 0.001, "loss": 2.6839, "step": 64632 }, { "epoch": 12.41, "learning_rate": 0.001, "loss": 2.7002, "step": 64644 }, { "epoch": 12.41, "learning_rate": 0.001, "loss": 2.6871, "step": 64656 }, { "epoch": 12.42, "learning_rate": 0.001, "loss": 2.6924, "step": 64668 }, { "epoch": 12.42, "learning_rate": 0.001, "loss": 2.6904, "step": 64680 }, { "epoch": 12.42, "learning_rate": 0.001, "loss": 2.6838, "step": 64692 }, { "epoch": 12.42, "learning_rate": 0.001, "loss": 2.6814, "step": 64704 }, { "epoch": 12.43, "learning_rate": 0.001, "loss": 2.6835, "step": 64716 }, { "epoch": 12.43, "learning_rate": 0.001, "loss": 2.6906, "step": 64728 }, { "epoch": 12.43, "learning_rate": 0.001, "loss": 2.6931, "step": 64740 }, { "epoch": 12.43, "learning_rate": 0.001, "loss": 2.6826, "step": 64752 }, { "epoch": 12.44, "learning_rate": 0.001, "loss": 2.6874, "step": 64764 }, { "epoch": 12.44, "learning_rate": 0.001, "loss": 2.6944, "step": 64776 }, { "epoch": 12.44, "learning_rate": 0.001, "loss": 2.6914, "step": 64788 }, { "epoch": 12.44, "learning_rate": 0.001, "loss": 2.6891, "step": 64800 }, { "epoch": 12.44, "learning_rate": 0.001, "loss": 2.6803, "step": 64812 }, { "epoch": 12.45, "learning_rate": 0.001, "loss": 2.6768, "step": 64824 }, { "epoch": 12.45, "learning_rate": 0.001, "loss": 2.6961, "step": 64836 }, { "epoch": 12.45, "learning_rate": 0.001, "loss": 2.6856, "step": 64848 }, { "epoch": 12.45, "learning_rate": 0.001, "loss": 2.6857, "step": 64860 }, { "epoch": 12.46, "learning_rate": 0.001, "loss": 2.6954, "step": 64872 }, { "epoch": 12.46, "learning_rate": 0.001, "loss": 2.7019, "step": 64884 }, { "epoch": 12.46, "learning_rate": 0.001, "loss": 2.6918, "step": 64896 }, { "epoch": 12.46, "learning_rate": 0.001, "loss": 2.6831, "step": 64908 }, { "epoch": 12.47, "learning_rate": 0.001, "loss": 2.6823, "step": 64920 }, { "epoch": 12.47, "learning_rate": 0.001, "loss": 2.6848, "step": 64932 }, { "epoch": 12.47, "learning_rate": 0.001, "loss": 2.6873, "step": 64944 }, { "epoch": 12.47, "learning_rate": 0.001, "loss": 2.6795, "step": 64956 }, { "epoch": 12.47, "learning_rate": 0.001, "loss": 2.6887, "step": 64968 }, { "epoch": 12.48, "learning_rate": 0.001, "loss": 2.6906, "step": 64980 }, { "epoch": 12.48, "learning_rate": 0.001, "loss": 2.6797, "step": 64992 }, { "epoch": 12.48, "eval_ag_news_accuracy": 0.308, "eval_ag_news_bleu_score": 4.374347537843929, "eval_ag_news_bleu_score_sem": 0.13721734946471748, "eval_ag_news_emb_cos_sim": 0.7834879159927368, "eval_ag_news_emb_cos_sim_sem": 0.007201536516102805, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.706989049911499, "eval_ag_news_n_ngrams_match_1": 13.07, "eval_ag_news_n_ngrams_match_2": 2.74, "eval_ag_news_n_ngrams_match_3": 0.746, "eval_ag_news_num_pred_words": 45.996, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.73098275552651, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3213701484002274, "eval_ag_news_runtime": 10.0694, "eval_ag_news_samples_per_second": 49.656, "eval_ag_news_steps_per_second": 0.099, "eval_ag_news_token_set_f1": 0.3311656501783919, "eval_ag_news_token_set_f1_sem": 0.004351692895305975, "eval_ag_news_token_set_precision": 0.3101803701591917, "eval_ag_news_token_set_recall": 0.3718769469196561, "eval_ag_news_true_num_tokens": 56.09375, "step": 65000 }, { "epoch": 12.48, "eval_anthropic_toxic_prompts_accuracy": 0.1079375, "eval_anthropic_toxic_prompts_bleu_score": 2.855755712661894, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11784352219996941, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6488279104232788, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00961289272103519, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3516156673431396, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.782, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.696, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.592, "eval_anthropic_toxic_prompts_num_pred_words": 47.336, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.548821801245555, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1992352239985944, "eval_anthropic_toxic_prompts_runtime": 9.5057, "eval_anthropic_toxic_prompts_samples_per_second": 52.6, "eval_anthropic_toxic_prompts_steps_per_second": 0.105, "eval_anthropic_toxic_prompts_token_set_f1": 0.3429785052921255, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006453345361771795, "eval_anthropic_toxic_prompts_token_set_precision": 0.4021736002401946, "eval_anthropic_toxic_prompts_token_set_recall": 0.326806010723115, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 65000 }, { "epoch": 12.48, "eval_arxiv_accuracy": 0.3331875, "eval_arxiv_bleu_score": 4.019372762429713, "eval_arxiv_bleu_score_sem": 0.1164797190885806, "eval_arxiv_emb_cos_sim": 0.7241116762161255, "eval_arxiv_emb_cos_sim_sem": 0.010409958115005025, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5568759441375732, "eval_arxiv_n_ngrams_match_1": 13.894, "eval_arxiv_n_ngrams_match_2": 2.618, "eval_arxiv_n_ngrams_match_3": 0.578, "eval_arxiv_num_pred_words": 39.56, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 35.05351676594371, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3335906106363149, "eval_arxiv_runtime": 9.7275, "eval_arxiv_samples_per_second": 51.4, "eval_arxiv_steps_per_second": 0.103, "eval_arxiv_token_set_f1": 0.3264062659970621, "eval_arxiv_token_set_f1_sem": 0.004235282809990203, "eval_arxiv_token_set_precision": 0.27493075307382425, "eval_arxiv_token_set_recall": 0.42214591942239144, "eval_arxiv_true_num_tokens": 64.0, "step": 65000 }, { "epoch": 12.48, "eval_python_code_alpaca_accuracy": 0.15196875, "eval_python_code_alpaca_bleu_score": 4.036770555605847, "eval_python_code_alpaca_bleu_score_sem": 0.12870676757623517, "eval_python_code_alpaca_emb_cos_sim": 0.7205531001091003, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010142604849832234, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0249199867248535, "eval_python_code_alpaca_n_ngrams_match_1": 8.704, "eval_python_code_alpaca_n_ngrams_match_2": 2.4, "eval_python_code_alpaca_n_ngrams_match_3": 0.72, "eval_python_code_alpaca_num_pred_words": 41.566, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.592356983351635, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30164487567760767, "eval_python_code_alpaca_runtime": 9.6549, "eval_python_code_alpaca_samples_per_second": 51.787, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.43931315354708417, "eval_python_code_alpaca_token_set_f1_sem": 0.005913209374507224, "eval_python_code_alpaca_token_set_precision": 0.4701370718763214, "eval_python_code_alpaca_token_set_recall": 0.4419615733144766, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 65000 }, { "epoch": 12.48, "eval_wikibio_accuracy": 0.31025, "eval_wikibio_bleu_score": 5.440243916062389, "eval_wikibio_bleu_score_sem": 0.19468922014442588, "eval_wikibio_emb_cos_sim": 0.7150151133537292, "eval_wikibio_emb_cos_sim_sem": 0.009565028389105988, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.914822816848755, "eval_wikibio_n_ngrams_match_1": 9.504, "eval_wikibio_n_ngrams_match_2": 3.104, "eval_wikibio_n_ngrams_match_3": 1.09, "eval_wikibio_num_pred_words": 35.788, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 50.14018672765468, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33271435348160244, "eval_wikibio_runtime": 9.1647, "eval_wikibio_samples_per_second": 54.557, "eval_wikibio_steps_per_second": 0.109, "eval_wikibio_token_set_f1": 0.30462833691382407, "eval_wikibio_token_set_f1_sem": 0.005561194622509602, "eval_wikibio_token_set_precision": 0.308499690201193, "eval_wikibio_token_set_recall": 0.32106598372663964, "eval_wikibio_true_num_tokens": 61.1328125, "step": 65000 }, { "epoch": 12.48, "eval_nq_accuracy": 0.509125, "eval_nq_bleu_score": 10.812410941434521, "eval_nq_bleu_score_sem": 0.4540598803301628, "eval_nq_emb_cos_sim": 0.8146822452545166, "eval_nq_emb_cos_sim_sem": 0.00762139749406776, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.317577838897705, "eval_nq_n_ngrams_match_1": 22.112, "eval_nq_n_ngrams_match_2": 7.936, "eval_nq_n_ngrams_match_3": 3.556, "eval_nq_num_pred_words": 49.076, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.151057009144601, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4239611719301859, "eval_nq_runtime": 10.3809, "eval_nq_samples_per_second": 48.166, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.44562993003973433, "eval_nq_token_set_f1_sem": 0.004944169031038659, "eval_nq_token_set_precision": 0.3981133594800232, "eval_nq_token_set_recall": 0.5164183346225726, "eval_nq_true_num_tokens": 64.0, "step": 65000 }, { "epoch": 12.48, "learning_rate": 0.001, "loss": 2.6821, "step": 65004 }, { "epoch": 12.48, "learning_rate": 0.001, "loss": 2.6806, "step": 65016 }, { "epoch": 12.49, "learning_rate": 0.001, "loss": 2.6801, "step": 65028 }, { "epoch": 12.49, "learning_rate": 0.001, "loss": 2.681, "step": 65040 }, { "epoch": 12.49, "learning_rate": 0.001, "loss": 2.6947, "step": 65052 }, { "epoch": 12.49, "learning_rate": 0.001, "loss": 2.6935, "step": 65064 }, { "epoch": 12.5, "learning_rate": 0.001, "loss": 2.6879, "step": 65076 }, { "epoch": 12.5, "learning_rate": 0.001, "loss": 2.6819, "step": 65088 }, { "epoch": 12.5, "learning_rate": 0.001, "loss": 2.6909, "step": 65100 }, { "epoch": 12.5, "learning_rate": 0.001, "loss": 2.6952, "step": 65112 }, { "epoch": 12.5, "learning_rate": 0.001, "loss": 2.6819, "step": 65124 }, { "epoch": 12.51, "learning_rate": 0.001, "loss": 2.687, "step": 65136 }, { "epoch": 12.51, "learning_rate": 0.001, "loss": 2.6737, "step": 65148 }, { "epoch": 12.51, "learning_rate": 0.001, "loss": 2.6802, "step": 65160 }, { "epoch": 12.51, "learning_rate": 0.001, "loss": 2.6811, "step": 65172 }, { "epoch": 12.52, "learning_rate": 0.001, "loss": 2.6866, "step": 65184 }, { "epoch": 12.52, "learning_rate": 0.001, "loss": 2.6879, "step": 65196 }, { "epoch": 12.52, "learning_rate": 0.001, "loss": 2.6738, "step": 65208 }, { "epoch": 12.52, "learning_rate": 0.001, "loss": 2.6868, "step": 65220 }, { "epoch": 12.53, "learning_rate": 0.001, "loss": 2.6861, "step": 65232 }, { "epoch": 12.53, "learning_rate": 0.001, "loss": 2.6764, "step": 65244 }, { "epoch": 12.53, "learning_rate": 0.001, "loss": 2.6772, "step": 65256 }, { "epoch": 12.53, "learning_rate": 0.001, "loss": 2.6941, "step": 65268 }, { "epoch": 12.53, "learning_rate": 0.001, "loss": 2.6784, "step": 65280 }, { "epoch": 12.54, "learning_rate": 0.001, "loss": 2.6809, "step": 65292 }, { "epoch": 12.54, "learning_rate": 0.001, "loss": 2.6854, "step": 65304 }, { "epoch": 12.54, "learning_rate": 0.001, "loss": 2.6852, "step": 65316 }, { "epoch": 12.54, "learning_rate": 0.001, "loss": 2.6801, "step": 65328 }, { "epoch": 12.55, "learning_rate": 0.001, "loss": 2.6873, "step": 65340 }, { "epoch": 12.55, "learning_rate": 0.001, "loss": 2.6878, "step": 65352 }, { "epoch": 12.55, "learning_rate": 0.001, "loss": 2.6828, "step": 65364 }, { "epoch": 12.55, "learning_rate": 0.001, "loss": 2.6776, "step": 65376 }, { "epoch": 12.56, "learning_rate": 0.001, "loss": 2.6807, "step": 65388 }, { "epoch": 12.56, "learning_rate": 0.001, "loss": 2.6886, "step": 65400 }, { "epoch": 12.56, "learning_rate": 0.001, "loss": 2.6701, "step": 65412 }, { "epoch": 12.56, "learning_rate": 0.001, "loss": 2.6794, "step": 65424 }, { "epoch": 12.56, "learning_rate": 0.001, "loss": 2.6799, "step": 65436 }, { "epoch": 12.57, "learning_rate": 0.001, "loss": 2.6858, "step": 65448 }, { "epoch": 12.57, "learning_rate": 0.001, "loss": 2.6834, "step": 65460 }, { "epoch": 12.57, "learning_rate": 0.001, "loss": 2.6737, "step": 65472 }, { "epoch": 12.57, "learning_rate": 0.001, "loss": 2.6833, "step": 65484 }, { "epoch": 12.58, "learning_rate": 0.001, "loss": 2.6794, "step": 65496 }, { "epoch": 12.58, "learning_rate": 0.001, "loss": 2.6803, "step": 65508 }, { "epoch": 12.58, "learning_rate": 0.001, "loss": 2.6815, "step": 65520 }, { "epoch": 12.58, "learning_rate": 0.001, "loss": 2.6901, "step": 65532 }, { "epoch": 12.59, "learning_rate": 0.001, "loss": 2.6797, "step": 65544 }, { "epoch": 12.59, "learning_rate": 0.001, "loss": 2.6786, "step": 65556 }, { "epoch": 12.59, "learning_rate": 0.001, "loss": 2.6901, "step": 65568 }, { "epoch": 12.59, "learning_rate": 0.001, "loss": 2.6897, "step": 65580 }, { "epoch": 12.59, "learning_rate": 0.001, "loss": 2.6794, "step": 65592 }, { "epoch": 12.6, "learning_rate": 0.001, "loss": 2.6954, "step": 65604 }, { "epoch": 12.6, "learning_rate": 0.001, "loss": 2.693, "step": 65616 }, { "epoch": 12.6, "eval_ag_news_accuracy": 0.3091875, "eval_ag_news_bleu_score": 4.52125321349297, "eval_ag_news_bleu_score_sem": 0.14446214421973427, "eval_ag_news_emb_cos_sim": 0.7879672646522522, "eval_ag_news_emb_cos_sim_sem": 0.007861516651112926, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.7005038261413574, "eval_ag_news_n_ngrams_match_1": 13.25, "eval_ag_news_n_ngrams_match_2": 2.798, "eval_ag_news_n_ngrams_match_3": 0.79, "eval_ag_news_num_pred_words": 45.912, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.46768790380134, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3276524051288814, "eval_ag_news_runtime": 10.0003, "eval_ag_news_samples_per_second": 49.998, "eval_ag_news_steps_per_second": 0.1, "eval_ag_news_token_set_f1": 0.3330800950225034, "eval_ag_news_token_set_f1_sem": 0.004447843565181526, "eval_ag_news_token_set_precision": 0.31251921517980064, "eval_ag_news_token_set_recall": 0.3719246932783074, "eval_ag_news_true_num_tokens": 56.09375, "step": 65625 }, { "epoch": 12.6, "eval_anthropic_toxic_prompts_accuracy": 0.10803125, "eval_anthropic_toxic_prompts_bleu_score": 2.878082959644404, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1155829724994168, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6497258543968201, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009816536414872208, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.39844012260437, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.762, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.746, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.63, "eval_anthropic_toxic_prompts_num_pred_words": 47.546, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.917396160691325, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19520056254722135, "eval_anthropic_toxic_prompts_runtime": 17.2126, "eval_anthropic_toxic_prompts_samples_per_second": 29.048, "eval_anthropic_toxic_prompts_steps_per_second": 0.058, "eval_anthropic_toxic_prompts_token_set_f1": 0.34596108229662076, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006492018951235808, "eval_anthropic_toxic_prompts_token_set_precision": 0.40347523374858574, "eval_anthropic_toxic_prompts_token_set_recall": 0.3318174650246588, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 65625 }, { "epoch": 12.6, "eval_arxiv_accuracy": 0.33315625, "eval_arxiv_bleu_score": 3.974489017421528, "eval_arxiv_bleu_score_sem": 0.11660001088546988, "eval_arxiv_emb_cos_sim": 0.7308480143547058, "eval_arxiv_emb_cos_sim_sem": 0.009328456152780816, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.56752347946167, "eval_arxiv_n_ngrams_match_1": 14.306, "eval_arxiv_n_ngrams_match_2": 2.68, "eval_arxiv_n_ngrams_match_3": 0.558, "eval_arxiv_num_pred_words": 40.606, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 35.4287444037503, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33742940953837974, "eval_arxiv_runtime": 10.8206, "eval_arxiv_samples_per_second": 46.208, "eval_arxiv_steps_per_second": 0.092, "eval_arxiv_token_set_f1": 0.3349212228296785, "eval_arxiv_token_set_f1_sem": 0.004246400294438401, "eval_arxiv_token_set_precision": 0.28452112061955465, "eval_arxiv_token_set_recall": 0.42511080807128715, "eval_arxiv_true_num_tokens": 64.0, "step": 65625 }, { "epoch": 12.6, "eval_python_code_alpaca_accuracy": 0.15303125, "eval_python_code_alpaca_bleu_score": 3.881755119300968, "eval_python_code_alpaca_bleu_score_sem": 0.11477316286635027, "eval_python_code_alpaca_emb_cos_sim": 0.7408278584480286, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008647342007420514, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.027876138687134, "eval_python_code_alpaca_n_ngrams_match_1": 9.184, "eval_python_code_alpaca_n_ngrams_match_2": 2.442, "eval_python_code_alpaca_n_ngrams_match_3": 0.666, "eval_python_code_alpaca_num_pred_words": 44.004, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.653321185182048, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3065611845554336, "eval_python_code_alpaca_runtime": 10.5685, "eval_python_code_alpaca_samples_per_second": 47.31, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.4558760969528058, "eval_python_code_alpaca_token_set_f1_sem": 0.0054846314345116335, "eval_python_code_alpaca_token_set_precision": 0.4970199148987927, "eval_python_code_alpaca_token_set_recall": 0.44236831507373053, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 65625 }, { "epoch": 12.6, "eval_wikibio_accuracy": 0.309625, "eval_wikibio_bleu_score": 5.386481420932874, "eval_wikibio_bleu_score_sem": 0.20129296594179516, "eval_wikibio_emb_cos_sim": 0.7220320701599121, "eval_wikibio_emb_cos_sim_sem": 0.009761132406871115, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9536080360412598, "eval_wikibio_n_ngrams_match_1": 9.648, "eval_wikibio_n_ngrams_match_2": 3.08, "eval_wikibio_n_ngrams_match_3": 1.062, "eval_wikibio_num_pred_words": 36.232, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 52.12308996244558, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3337593941987438, "eval_wikibio_runtime": 15.9833, "eval_wikibio_samples_per_second": 31.283, "eval_wikibio_steps_per_second": 0.063, "eval_wikibio_token_set_f1": 0.3018513600041814, "eval_wikibio_token_set_f1_sem": 0.005631970735854331, "eval_wikibio_token_set_precision": 0.3113866945580489, "eval_wikibio_token_set_recall": 0.3093409905871538, "eval_wikibio_true_num_tokens": 61.1328125, "step": 65625 }, { "epoch": 12.6, "eval_nq_accuracy": 0.5074375, "eval_nq_bleu_score": 10.758401580059093, "eval_nq_bleu_score_sem": 0.46143902103771645, "eval_nq_emb_cos_sim": 0.8136475086212158, "eval_nq_emb_cos_sim_sem": 0.007764021770884117, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.320363759994507, "eval_nq_n_ngrams_match_1": 22.06, "eval_nq_n_ngrams_match_2": 7.76, "eval_nq_n_ngrams_match_3": 3.518, "eval_nq_num_pred_words": 48.952, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.179376482614051, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42556207563774306, "eval_nq_runtime": 10.3468, "eval_nq_samples_per_second": 48.324, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4420851953351193, "eval_nq_token_set_f1_sem": 0.005058979697250073, "eval_nq_token_set_precision": 0.39631357733695816, "eval_nq_token_set_recall": 0.5104212615548354, "eval_nq_true_num_tokens": 64.0, "step": 65625 }, { "epoch": 12.6, "learning_rate": 0.001, "loss": 2.6874, "step": 65628 }, { "epoch": 12.6, "learning_rate": 0.001, "loss": 2.6819, "step": 65640 }, { "epoch": 12.61, "learning_rate": 0.001, "loss": 2.6784, "step": 65652 }, { "epoch": 12.61, "learning_rate": 0.001, "loss": 2.692, "step": 65664 }, { "epoch": 12.61, "learning_rate": 0.001, "loss": 2.6899, "step": 65676 }, { "epoch": 12.61, "learning_rate": 0.001, "loss": 2.6817, "step": 65688 }, { "epoch": 12.62, "learning_rate": 0.001, "loss": 2.68, "step": 65700 }, { "epoch": 12.62, "learning_rate": 0.001, "loss": 2.6841, "step": 65712 }, { "epoch": 12.62, "learning_rate": 0.001, "loss": 2.6906, "step": 65724 }, { "epoch": 12.62, "learning_rate": 0.001, "loss": 2.6877, "step": 65736 }, { "epoch": 12.62, "learning_rate": 0.001, "loss": 2.6877, "step": 65748 }, { "epoch": 12.63, "learning_rate": 0.001, "loss": 2.6867, "step": 65760 }, { "epoch": 12.63, "learning_rate": 0.001, "loss": 2.6872, "step": 65772 }, { "epoch": 12.63, "learning_rate": 0.001, "loss": 2.681, "step": 65784 }, { "epoch": 12.63, "learning_rate": 0.001, "loss": 2.6837, "step": 65796 }, { "epoch": 12.64, "learning_rate": 0.001, "loss": 2.6825, "step": 65808 }, { "epoch": 12.64, "learning_rate": 0.001, "loss": 2.678, "step": 65820 }, { "epoch": 12.64, "learning_rate": 0.001, "loss": 2.6822, "step": 65832 }, { "epoch": 12.64, "learning_rate": 0.001, "loss": 2.6865, "step": 65844 }, { "epoch": 12.65, "learning_rate": 0.001, "loss": 2.6712, "step": 65856 }, { "epoch": 12.65, "learning_rate": 0.001, "loss": 2.6836, "step": 65868 }, { "epoch": 12.65, "learning_rate": 0.001, "loss": 2.6695, "step": 65880 }, { "epoch": 12.65, "learning_rate": 0.001, "loss": 2.6868, "step": 65892 }, { "epoch": 12.65, "learning_rate": 0.001, "loss": 2.6885, "step": 65904 }, { "epoch": 12.66, "learning_rate": 0.001, "loss": 2.6816, "step": 65916 }, { "epoch": 12.66, "learning_rate": 0.001, "loss": 2.6727, "step": 65928 }, { "epoch": 12.66, "learning_rate": 0.001, "loss": 2.686, "step": 65940 }, { "epoch": 12.66, "learning_rate": 0.001, "loss": 2.6903, "step": 65952 }, { "epoch": 12.67, "learning_rate": 0.001, "loss": 2.6858, "step": 65964 }, { "epoch": 12.67, "learning_rate": 0.001, "loss": 2.6777, "step": 65976 }, { "epoch": 12.67, "learning_rate": 0.001, "loss": 2.6715, "step": 65988 }, { "epoch": 12.67, "learning_rate": 0.001, "loss": 2.6947, "step": 66000 }, { "epoch": 12.68, "learning_rate": 0.001, "loss": 2.6762, "step": 66012 }, { "epoch": 12.68, "learning_rate": 0.001, "loss": 2.6789, "step": 66024 }, { "epoch": 12.68, "learning_rate": 0.001, "loss": 2.6846, "step": 66036 }, { "epoch": 12.68, "learning_rate": 0.001, "loss": 2.6804, "step": 66048 }, { "epoch": 12.68, "learning_rate": 0.001, "loss": 2.6946, "step": 66060 }, { "epoch": 12.69, "learning_rate": 0.001, "loss": 2.6971, "step": 66072 }, { "epoch": 12.69, "learning_rate": 0.001, "loss": 2.6944, "step": 66084 }, { "epoch": 12.69, "learning_rate": 0.001, "loss": 2.6882, "step": 66096 }, { "epoch": 12.69, "learning_rate": 0.001, "loss": 2.6861, "step": 66108 }, { "epoch": 12.7, "learning_rate": 0.001, "loss": 2.699, "step": 66120 }, { "epoch": 12.7, "learning_rate": 0.001, "loss": 2.685, "step": 66132 }, { "epoch": 12.7, "learning_rate": 0.001, "loss": 2.686, "step": 66144 }, { "epoch": 12.7, "learning_rate": 0.001, "loss": 2.6768, "step": 66156 }, { "epoch": 12.71, "learning_rate": 0.001, "loss": 2.6897, "step": 66168 }, { "epoch": 12.71, "learning_rate": 0.001, "loss": 2.6843, "step": 66180 }, { "epoch": 12.71, "learning_rate": 0.001, "loss": 2.6797, "step": 66192 }, { "epoch": 12.71, "learning_rate": 0.001, "loss": 2.6775, "step": 66204 }, { "epoch": 12.71, "learning_rate": 0.001, "loss": 2.6889, "step": 66216 }, { "epoch": 12.72, "learning_rate": 0.001, "loss": 2.6816, "step": 66228 }, { "epoch": 12.72, "learning_rate": 0.001, "loss": 2.6813, "step": 66240 }, { "epoch": 12.72, "eval_ag_news_accuracy": 0.30821875, "eval_ag_news_bleu_score": 4.442264679828784, "eval_ag_news_bleu_score_sem": 0.14079742645729434, "eval_ag_news_emb_cos_sim": 0.7865789532661438, "eval_ag_news_emb_cos_sim_sem": 0.00901036071617894, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.714829683303833, "eval_ag_news_n_ngrams_match_1": 13.166, "eval_ag_news_n_ngrams_match_2": 2.81, "eval_ag_news_n_ngrams_match_3": 0.772, "eval_ag_news_num_pred_words": 46.096, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 41.051594716966605, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.32317659516122965, "eval_ag_news_runtime": 9.6051, "eval_ag_news_samples_per_second": 52.056, "eval_ag_news_steps_per_second": 0.104, "eval_ag_news_token_set_f1": 0.3328451876967112, "eval_ag_news_token_set_f1_sem": 0.004605688669783893, "eval_ag_news_token_set_precision": 0.31148388027314167, "eval_ag_news_token_set_recall": 0.37319743833185304, "eval_ag_news_true_num_tokens": 56.09375, "step": 66250 }, { "epoch": 12.72, "eval_anthropic_toxic_prompts_accuracy": 0.109125, "eval_anthropic_toxic_prompts_bleu_score": 2.858608667803098, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10941246782311037, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6308979392051697, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010247484648173561, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3844096660614014, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.772, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.71, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.578, "eval_anthropic_toxic_prompts_num_pred_words": 47.188, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.500572385663904, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19990366830034043, "eval_anthropic_toxic_prompts_runtime": 9.5142, "eval_anthropic_toxic_prompts_samples_per_second": 52.553, "eval_anthropic_toxic_prompts_steps_per_second": 0.105, "eval_anthropic_toxic_prompts_token_set_f1": 0.341990617461851, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00657963308209183, "eval_anthropic_toxic_prompts_token_set_precision": 0.4026880005881458, "eval_anthropic_toxic_prompts_token_set_recall": 0.327636712255066, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 66250 }, { "epoch": 12.72, "eval_arxiv_accuracy": 0.33315625, "eval_arxiv_bleu_score": 3.8363732637314643, "eval_arxiv_bleu_score_sem": 0.11065596222794323, "eval_arxiv_emb_cos_sim": 0.7208958864212036, "eval_arxiv_emb_cos_sim_sem": 0.009142144033304886, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.55238676071167, "eval_arxiv_n_ngrams_match_1": 13.842, "eval_arxiv_n_ngrams_match_2": 2.624, "eval_arxiv_n_ngrams_match_3": 0.52, "eval_arxiv_num_pred_words": 39.604, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.89650778419807, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3293064781497945, "eval_arxiv_runtime": 14.1947, "eval_arxiv_samples_per_second": 35.224, "eval_arxiv_steps_per_second": 0.07, "eval_arxiv_token_set_f1": 0.32895586797041415, "eval_arxiv_token_set_f1_sem": 0.0042815217620811455, "eval_arxiv_token_set_precision": 0.273232899376966, "eval_arxiv_token_set_recall": 0.4384903963321714, "eval_arxiv_true_num_tokens": 64.0, "step": 66250 }, { "epoch": 12.72, "eval_python_code_alpaca_accuracy": 0.1526875, "eval_python_code_alpaca_bleu_score": 4.138481210565817, "eval_python_code_alpaca_bleu_score_sem": 0.13264847535511604, "eval_python_code_alpaca_emb_cos_sim": 0.735192060470581, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008064912073670438, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.03822660446167, "eval_python_code_alpaca_n_ngrams_match_1": 9.11, "eval_python_code_alpaca_n_ngrams_match_2": 2.51, "eval_python_code_alpaca_n_ngrams_match_3": 0.782, "eval_python_code_alpaca_num_pred_words": 42.886, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.868202823369845, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3079078065785898, "eval_python_code_alpaca_runtime": 10.467, "eval_python_code_alpaca_samples_per_second": 47.769, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.4511836295390964, "eval_python_code_alpaca_token_set_f1_sem": 0.00555212428173263, "eval_python_code_alpaca_token_set_precision": 0.4930051052844708, "eval_python_code_alpaca_token_set_recall": 0.43687350950886256, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 66250 }, { "epoch": 12.72, "eval_wikibio_accuracy": 0.31246875, "eval_wikibio_bleu_score": 5.573293920063445, "eval_wikibio_bleu_score_sem": 0.21811539733169838, "eval_wikibio_emb_cos_sim": 0.7096949815750122, "eval_wikibio_emb_cos_sim_sem": 0.011197715640899034, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9283409118652344, "eval_wikibio_n_ngrams_match_1": 9.36, "eval_wikibio_n_ngrams_match_2": 3.042, "eval_wikibio_n_ngrams_match_3": 1.106, "eval_wikibio_num_pred_words": 35.092, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 50.82258853044036, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32281070223805103, "eval_wikibio_runtime": 12.0996, "eval_wikibio_samples_per_second": 41.324, "eval_wikibio_steps_per_second": 0.083, "eval_wikibio_token_set_f1": 0.29821455820854004, "eval_wikibio_token_set_f1_sem": 0.005784116259377843, "eval_wikibio_token_set_precision": 0.30255744755681513, "eval_wikibio_token_set_recall": 0.31359513218539303, "eval_wikibio_true_num_tokens": 61.1328125, "step": 66250 }, { "epoch": 12.72, "eval_nq_accuracy": 0.509125, "eval_nq_bleu_score": 10.913670274634493, "eval_nq_bleu_score_sem": 0.47292821120609474, "eval_nq_emb_cos_sim": 0.8075646162033081, "eval_nq_emb_cos_sim_sem": 0.007970604548860264, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.315042018890381, "eval_nq_n_ngrams_match_1": 21.996, "eval_nq_n_ngrams_match_2": 7.912, "eval_nq_n_ngrams_match_3": 3.584, "eval_nq_num_pred_words": 48.52, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.125348365707886, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4221893049153954, "eval_nq_runtime": 9.8319, "eval_nq_samples_per_second": 50.855, "eval_nq_steps_per_second": 0.102, "eval_nq_token_set_f1": 0.4429696630094818, "eval_nq_token_set_f1_sem": 0.005257962618743179, "eval_nq_token_set_precision": 0.39488895617080366, "eval_nq_token_set_recall": 0.5162034044853563, "eval_nq_true_num_tokens": 64.0, "step": 66250 }, { "epoch": 12.72, "learning_rate": 0.001, "loss": 2.682, "step": 66252 }, { "epoch": 12.72, "learning_rate": 0.001, "loss": 2.6797, "step": 66264 }, { "epoch": 12.73, "learning_rate": 0.001, "loss": 2.6733, "step": 66276 }, { "epoch": 12.73, "learning_rate": 0.001, "loss": 2.6888, "step": 66288 }, { "epoch": 12.73, "learning_rate": 0.001, "loss": 2.6859, "step": 66300 }, { "epoch": 12.73, "learning_rate": 0.001, "loss": 2.684, "step": 66312 }, { "epoch": 12.74, "learning_rate": 0.001, "loss": 2.6831, "step": 66324 }, { "epoch": 12.74, "learning_rate": 0.001, "loss": 2.678, "step": 66336 }, { "epoch": 12.74, "learning_rate": 0.001, "loss": 2.6855, "step": 66348 }, { "epoch": 12.74, "learning_rate": 0.001, "loss": 2.6827, "step": 66360 }, { "epoch": 12.74, "learning_rate": 0.001, "loss": 2.6789, "step": 66372 }, { "epoch": 12.75, "learning_rate": 0.001, "loss": 2.6834, "step": 66384 }, { "epoch": 12.75, "learning_rate": 0.001, "loss": 2.6793, "step": 66396 }, { "epoch": 12.75, "learning_rate": 0.001, "loss": 2.6647, "step": 66408 }, { "epoch": 12.75, "learning_rate": 0.001, "loss": 2.6817, "step": 66420 }, { "epoch": 12.76, "learning_rate": 0.001, "loss": 2.6787, "step": 66432 }, { "epoch": 12.76, "learning_rate": 0.001, "loss": 2.6916, "step": 66444 }, { "epoch": 12.76, "learning_rate": 0.001, "loss": 2.6769, "step": 66456 }, { "epoch": 12.76, "learning_rate": 0.001, "loss": 2.6812, "step": 66468 }, { "epoch": 12.76, "learning_rate": 0.001, "loss": 2.6789, "step": 66480 }, { "epoch": 12.77, "learning_rate": 0.001, "loss": 2.6803, "step": 66492 }, { "epoch": 12.77, "learning_rate": 0.001, "loss": 2.6928, "step": 66504 }, { "epoch": 12.77, "learning_rate": 0.001, "loss": 2.6873, "step": 66516 }, { "epoch": 12.77, "learning_rate": 0.001, "loss": 2.6802, "step": 66528 }, { "epoch": 12.78, "learning_rate": 0.001, "loss": 2.6918, "step": 66540 }, { "epoch": 12.78, "learning_rate": 0.001, "loss": 2.6771, "step": 66552 }, { "epoch": 12.78, "learning_rate": 0.001, "loss": 2.6758, "step": 66564 }, { "epoch": 12.78, "learning_rate": 0.001, "loss": 2.6857, "step": 66576 }, { "epoch": 12.79, "learning_rate": 0.001, "loss": 2.6764, "step": 66588 }, { "epoch": 12.79, "learning_rate": 0.001, "loss": 2.6821, "step": 66600 }, { "epoch": 12.79, "learning_rate": 0.001, "loss": 2.6706, "step": 66612 }, { "epoch": 12.79, "learning_rate": 0.001, "loss": 2.6728, "step": 66624 }, { "epoch": 12.79, "learning_rate": 0.001, "loss": 2.6798, "step": 66636 }, { "epoch": 12.8, "learning_rate": 0.001, "loss": 2.6915, "step": 66648 }, { "epoch": 12.8, "learning_rate": 0.001, "loss": 2.6825, "step": 66660 }, { "epoch": 12.8, "learning_rate": 0.001, "loss": 2.6724, "step": 66672 }, { "epoch": 12.8, "learning_rate": 0.001, "loss": 2.6862, "step": 66684 }, { "epoch": 12.81, "learning_rate": 0.001, "loss": 2.6827, "step": 66696 }, { "epoch": 12.81, "learning_rate": 0.001, "loss": 2.6874, "step": 66708 }, { "epoch": 12.81, "learning_rate": 0.001, "loss": 2.6923, "step": 66720 }, { "epoch": 12.81, "learning_rate": 0.001, "loss": 2.6744, "step": 66732 }, { "epoch": 12.82, "learning_rate": 0.001, "loss": 2.678, "step": 66744 }, { "epoch": 12.82, "learning_rate": 0.001, "loss": 2.6865, "step": 66756 }, { "epoch": 12.82, "learning_rate": 0.001, "loss": 2.6837, "step": 66768 }, { "epoch": 12.82, "learning_rate": 0.001, "loss": 2.695, "step": 66780 }, { "epoch": 12.82, "learning_rate": 0.001, "loss": 2.6833, "step": 66792 }, { "epoch": 12.83, "learning_rate": 0.001, "loss": 2.6914, "step": 66804 }, { "epoch": 12.83, "learning_rate": 0.001, "loss": 2.6772, "step": 66816 }, { "epoch": 12.83, "learning_rate": 0.001, "loss": 2.6745, "step": 66828 }, { "epoch": 12.83, "learning_rate": 0.001, "loss": 2.6764, "step": 66840 }, { "epoch": 12.84, "learning_rate": 0.001, "loss": 2.6876, "step": 66852 }, { "epoch": 12.84, "learning_rate": 0.001, "loss": 2.6884, "step": 66864 }, { "epoch": 12.84, "eval_ag_news_accuracy": 0.30990625, "eval_ag_news_bleu_score": 4.591833444851182, "eval_ag_news_bleu_score_sem": 0.14933293000068457, "eval_ag_news_emb_cos_sim": 0.7915327548980713, "eval_ag_news_emb_cos_sim_sem": 0.007402045538081828, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6918368339538574, "eval_ag_news_n_ngrams_match_1": 13.414, "eval_ag_news_n_ngrams_match_2": 2.868, "eval_ag_news_n_ngrams_match_3": 0.854, "eval_ag_news_num_pred_words": 46.658, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.118470288071606, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3289175244151389, "eval_ag_news_runtime": 11.2562, "eval_ag_news_samples_per_second": 44.42, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.33717054342571146, "eval_ag_news_token_set_f1_sem": 0.004481826732358835, "eval_ag_news_token_set_precision": 0.31705917946290574, "eval_ag_news_token_set_recall": 0.37557800892346177, "eval_ag_news_true_num_tokens": 56.09375, "step": 66875 }, { "epoch": 12.84, "eval_anthropic_toxic_prompts_accuracy": 0.10896875, "eval_anthropic_toxic_prompts_bleu_score": 2.837071483032518, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10972539959970126, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.649604320526123, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009455121261990721, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3737759590148926, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.646, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.676, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.608, "eval_anthropic_toxic_prompts_num_pred_words": 47.306, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.18853394422275, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19521387073120994, "eval_anthropic_toxic_prompts_runtime": 9.5167, "eval_anthropic_toxic_prompts_samples_per_second": 52.539, "eval_anthropic_toxic_prompts_steps_per_second": 0.105, "eval_anthropic_toxic_prompts_token_set_f1": 0.33675572830619005, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006248050830522037, "eval_anthropic_toxic_prompts_token_set_precision": 0.3964869716059104, "eval_anthropic_toxic_prompts_token_set_recall": 0.32680392516684714, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 66875 }, { "epoch": 12.84, "eval_arxiv_accuracy": 0.3318125, "eval_arxiv_bleu_score": 4.0579457856255505, "eval_arxiv_bleu_score_sem": 0.11436450405212448, "eval_arxiv_emb_cos_sim": 0.725619912147522, "eval_arxiv_emb_cos_sim_sem": 0.009693216711762858, "eval_arxiv_emb_top1_equal": 0.21875, "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.549726724624634, "eval_arxiv_n_ngrams_match_1": 14.132, "eval_arxiv_n_ngrams_match_2": 2.762, "eval_arxiv_n_ngrams_match_3": 0.608, "eval_arxiv_num_pred_words": 39.788, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.80380516499881, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3340143393950715, "eval_arxiv_runtime": 10.1627, "eval_arxiv_samples_per_second": 49.199, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.33056309315273186, "eval_arxiv_token_set_f1_sem": 0.0042826263102017665, "eval_arxiv_token_set_precision": 0.27711654656472556, "eval_arxiv_token_set_recall": 0.4354247871721328, "eval_arxiv_true_num_tokens": 64.0, "step": 66875 }, { "epoch": 12.84, "eval_python_code_alpaca_accuracy": 0.15496875, "eval_python_code_alpaca_bleu_score": 4.203211920627831, "eval_python_code_alpaca_bleu_score_sem": 0.1376860840598939, "eval_python_code_alpaca_emb_cos_sim": 0.7402169704437256, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008571539583284654, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9985663890838623, "eval_python_code_alpaca_n_ngrams_match_1": 9.232, "eval_python_code_alpaca_n_ngrams_match_2": 2.598, "eval_python_code_alpaca_n_ngrams_match_3": 0.82, "eval_python_code_alpaca_num_pred_words": 43.698, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.0567627086403, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30921974570170274, "eval_python_code_alpaca_runtime": 9.3537, "eval_python_code_alpaca_samples_per_second": 53.455, "eval_python_code_alpaca_steps_per_second": 0.107, "eval_python_code_alpaca_token_set_f1": 0.4596053739933328, "eval_python_code_alpaca_token_set_f1_sem": 0.005531879858469444, "eval_python_code_alpaca_token_set_precision": 0.5029333867277892, "eval_python_code_alpaca_token_set_recall": 0.4452142632244949, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 66875 }, { "epoch": 12.84, "eval_wikibio_accuracy": 0.31234375, "eval_wikibio_bleu_score": 5.632277098470768, "eval_wikibio_bleu_score_sem": 0.19903619332642364, "eval_wikibio_emb_cos_sim": 0.7091946601867676, "eval_wikibio_emb_cos_sim_sem": 0.011196256306269345, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8549296855926514, "eval_wikibio_n_ngrams_match_1": 9.93, "eval_wikibio_n_ngrams_match_2": 3.22, "eval_wikibio_n_ngrams_match_3": 1.116, "eval_wikibio_num_pred_words": 36.456, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.22529620581375, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34000471228423174, "eval_wikibio_runtime": 9.3876, "eval_wikibio_samples_per_second": 53.262, "eval_wikibio_steps_per_second": 0.107, "eval_wikibio_token_set_f1": 0.31407373977693276, "eval_wikibio_token_set_f1_sem": 0.005409696911181756, "eval_wikibio_token_set_precision": 0.32044028303480515, "eval_wikibio_token_set_recall": 0.32712047689466467, "eval_wikibio_true_num_tokens": 61.1328125, "step": 66875 }, { "epoch": 12.84, "eval_nq_accuracy": 0.51165625, "eval_nq_bleu_score": 11.062017085381326, "eval_nq_bleu_score_sem": 0.46683574358760765, "eval_nq_emb_cos_sim": 0.8149175047874451, "eval_nq_emb_cos_sim_sem": 0.007250901670965698, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3130195140838623, "eval_nq_n_ngrams_match_1": 22.3, "eval_nq_n_ngrams_match_2": 7.954, "eval_nq_n_ngrams_match_3": 3.638, "eval_nq_num_pred_words": 48.986, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.10489049501509, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4281655217571667, "eval_nq_runtime": 10.1168, "eval_nq_samples_per_second": 49.423, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.44807086562864745, "eval_nq_token_set_f1_sem": 0.004996636239231897, "eval_nq_token_set_precision": 0.40002648637197, "eval_nq_token_set_recall": 0.5194673376425163, "eval_nq_true_num_tokens": 64.0, "step": 66875 }, { "epoch": 12.84, "learning_rate": 0.001, "loss": 2.6864, "step": 66876 }, { "epoch": 12.84, "learning_rate": 0.001, "loss": 2.6706, "step": 66888 }, { "epoch": 12.85, "learning_rate": 0.001, "loss": 2.6789, "step": 66900 }, { "epoch": 12.85, "learning_rate": 0.001, "loss": 2.6782, "step": 66912 }, { "epoch": 12.85, "learning_rate": 0.001, "loss": 2.6924, "step": 66924 }, { "epoch": 12.85, "learning_rate": 0.001, "loss": 2.6715, "step": 66936 }, { "epoch": 12.85, "learning_rate": 0.001, "loss": 2.6778, "step": 66948 }, { "epoch": 12.86, "learning_rate": 0.001, "loss": 2.671, "step": 66960 }, { "epoch": 12.86, "learning_rate": 0.001, "loss": 2.6823, "step": 66972 }, { "epoch": 12.86, "learning_rate": 0.001, "loss": 2.6806, "step": 66984 }, { "epoch": 12.86, "learning_rate": 0.001, "loss": 2.6845, "step": 66996 }, { "epoch": 12.87, "learning_rate": 0.001, "loss": 2.6824, "step": 67008 }, { "epoch": 12.87, "learning_rate": 0.001, "loss": 2.6784, "step": 67020 }, { "epoch": 12.87, "learning_rate": 0.001, "loss": 2.6751, "step": 67032 }, { "epoch": 12.87, "learning_rate": 0.001, "loss": 2.6705, "step": 67044 }, { "epoch": 12.88, "learning_rate": 0.001, "loss": 2.6841, "step": 67056 }, { "epoch": 12.88, "learning_rate": 0.001, "loss": 2.6756, "step": 67068 }, { "epoch": 12.88, "learning_rate": 0.001, "loss": 2.6812, "step": 67080 }, { "epoch": 12.88, "learning_rate": 0.001, "loss": 2.6815, "step": 67092 }, { "epoch": 12.88, "learning_rate": 0.001, "loss": 2.6835, "step": 67104 }, { "epoch": 12.89, "learning_rate": 0.001, "loss": 2.6718, "step": 67116 }, { "epoch": 12.89, "learning_rate": 0.001, "loss": 2.6845, "step": 67128 }, { "epoch": 12.89, "learning_rate": 0.001, "loss": 2.6866, "step": 67140 }, { "epoch": 12.89, "learning_rate": 0.001, "loss": 2.6847, "step": 67152 }, { "epoch": 12.9, "learning_rate": 0.001, "loss": 2.6763, "step": 67164 }, { "epoch": 12.9, "learning_rate": 0.001, "loss": 2.68, "step": 67176 }, { "epoch": 12.9, "learning_rate": 0.001, "loss": 2.6935, "step": 67188 }, { "epoch": 12.9, "learning_rate": 0.001, "loss": 2.6817, "step": 67200 }, { "epoch": 12.91, "learning_rate": 0.001, "loss": 2.6757, "step": 67212 }, { "epoch": 12.91, "learning_rate": 0.001, "loss": 2.6912, "step": 67224 }, { "epoch": 12.91, "learning_rate": 0.001, "loss": 2.6834, "step": 67236 }, { "epoch": 12.91, "learning_rate": 0.001, "loss": 2.6749, "step": 67248 }, { "epoch": 12.91, "learning_rate": 0.001, "loss": 2.6718, "step": 67260 }, { "epoch": 12.92, "learning_rate": 0.001, "loss": 2.6803, "step": 67272 }, { "epoch": 12.92, "learning_rate": 0.001, "loss": 2.6856, "step": 67284 }, { "epoch": 12.92, "learning_rate": 0.001, "loss": 2.6763, "step": 67296 }, { "epoch": 12.92, "learning_rate": 0.001, "loss": 2.6815, "step": 67308 }, { "epoch": 12.93, "learning_rate": 0.001, "loss": 2.6859, "step": 67320 }, { "epoch": 12.93, "learning_rate": 0.001, "loss": 2.6765, "step": 67332 }, { "epoch": 12.93, "learning_rate": 0.001, "loss": 2.6863, "step": 67344 }, { "epoch": 12.93, "learning_rate": 0.001, "loss": 2.6805, "step": 67356 }, { "epoch": 12.94, "learning_rate": 0.001, "loss": 2.6925, "step": 67368 }, { "epoch": 12.94, "learning_rate": 0.001, "loss": 2.6823, "step": 67380 }, { "epoch": 12.94, "learning_rate": 0.001, "loss": 2.6786, "step": 67392 }, { "epoch": 12.94, "learning_rate": 0.001, "loss": 2.6854, "step": 67404 }, { "epoch": 12.94, "learning_rate": 0.001, "loss": 2.6788, "step": 67416 }, { "epoch": 12.95, "learning_rate": 0.001, "loss": 2.6905, "step": 67428 }, { "epoch": 12.95, "learning_rate": 0.001, "loss": 2.6842, "step": 67440 }, { "epoch": 12.95, "learning_rate": 0.001, "loss": 2.6868, "step": 67452 }, { "epoch": 12.95, "learning_rate": 0.001, "loss": 2.6771, "step": 67464 }, { "epoch": 12.96, "learning_rate": 0.001, "loss": 2.6762, "step": 67476 }, { "epoch": 12.96, "learning_rate": 0.001, "loss": 2.6791, "step": 67488 }, { "epoch": 12.96, "learning_rate": 0.001, "loss": 2.6745, "step": 67500 }, { "epoch": 12.96, "eval_ag_news_accuracy": 0.31034375, "eval_ag_news_bleu_score": 4.9097837775554565, "eval_ag_news_bleu_score_sem": 0.17102450069994085, "eval_ag_news_emb_cos_sim": 0.7790735960006714, "eval_ag_news_emb_cos_sim_sem": 0.00912392671658056, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.695110559463501, "eval_ag_news_n_ngrams_match_1": 13.518, "eval_ag_news_n_ngrams_match_2": 3.026, "eval_ag_news_n_ngrams_match_3": 0.934, "eval_ag_news_num_pred_words": 46.508, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.25002236286309, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3329665298968007, "eval_ag_news_runtime": 9.8951, "eval_ag_news_samples_per_second": 50.53, "eval_ag_news_steps_per_second": 0.101, "eval_ag_news_token_set_f1": 0.3412295947698137, "eval_ag_news_token_set_f1_sem": 0.00443394131806274, "eval_ag_news_token_set_precision": 0.3225903596697039, "eval_ag_news_token_set_recall": 0.37946670280096373, "eval_ag_news_true_num_tokens": 56.09375, "step": 67500 }, { "epoch": 12.96, "eval_anthropic_toxic_prompts_accuracy": 0.108625, "eval_anthropic_toxic_prompts_bleu_score": 2.8250688312923695, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10556560110337446, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6539926528930664, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00944554009794408, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.400261163711548, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.864, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.726, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.574, "eval_anthropic_toxic_prompts_num_pred_words": 47.578, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.971926604940506, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.1998104341574898, "eval_anthropic_toxic_prompts_runtime": 10.228, "eval_anthropic_toxic_prompts_samples_per_second": 48.885, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3479491034903623, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006443801240407627, "eval_anthropic_toxic_prompts_token_set_precision": 0.41329762194510244, "eval_anthropic_toxic_prompts_token_set_recall": 0.3311717031414803, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 67500 }, { "epoch": 12.96, "eval_arxiv_accuracy": 0.33596875, "eval_arxiv_bleu_score": 3.9865242934319967, "eval_arxiv_bleu_score_sem": 0.11830888829685494, "eval_arxiv_emb_cos_sim": 0.7328368425369263, "eval_arxiv_emb_cos_sim_sem": 0.007983401385739384, "eval_arxiv_emb_top1_equal": 0.125, "eval_arxiv_emb_top1_equal_sem": 0.02934655822437397, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.544830083847046, "eval_arxiv_n_ngrams_match_1": 14.01, "eval_arxiv_n_ngrams_match_2": 2.712, "eval_arxiv_n_ngrams_match_3": 0.59, "eval_arxiv_num_pred_words": 39.814, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.633800000209455, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33420571012969413, "eval_arxiv_runtime": 9.7206, "eval_arxiv_samples_per_second": 51.437, "eval_arxiv_steps_per_second": 0.103, "eval_arxiv_token_set_f1": 0.32996026233340675, "eval_arxiv_token_set_f1_sem": 0.004316633847777857, "eval_arxiv_token_set_precision": 0.2782812022263219, "eval_arxiv_token_set_recall": 0.4296221216268673, "eval_arxiv_true_num_tokens": 64.0, "step": 67500 }, { "epoch": 12.96, "eval_python_code_alpaca_accuracy": 0.1529375, "eval_python_code_alpaca_bleu_score": 4.071551624261715, "eval_python_code_alpaca_bleu_score_sem": 0.1336623014443639, "eval_python_code_alpaca_emb_cos_sim": 0.7235852479934692, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010651427288291557, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.029174327850342, "eval_python_code_alpaca_n_ngrams_match_1": 9.048, "eval_python_code_alpaca_n_ngrams_match_2": 2.522, "eval_python_code_alpaca_n_ngrams_match_3": 0.804, "eval_python_code_alpaca_num_pred_words": 44.054, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.68015051393288, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.2987197686221755, "eval_python_code_alpaca_runtime": 9.5597, "eval_python_code_alpaca_samples_per_second": 52.303, "eval_python_code_alpaca_steps_per_second": 0.105, "eval_python_code_alpaca_token_set_f1": 0.45357819135085164, "eval_python_code_alpaca_token_set_f1_sem": 0.00574702628741405, "eval_python_code_alpaca_token_set_precision": 0.489936989337317, "eval_python_code_alpaca_token_set_recall": 0.44687850692105435, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 67500 }, { "epoch": 12.96, "eval_wikibio_accuracy": 0.30984375, "eval_wikibio_bleu_score": 5.2291804459236655, "eval_wikibio_bleu_score_sem": 0.18658081950377017, "eval_wikibio_emb_cos_sim": 0.7154478430747986, "eval_wikibio_emb_cos_sim_sem": 0.00980031119528453, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.892882823944092, "eval_wikibio_n_ngrams_match_1": 9.324, "eval_wikibio_n_ngrams_match_2": 3.004, "eval_wikibio_n_ngrams_match_3": 1.032, "eval_wikibio_num_pred_words": 35.456, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 49.052091435186824, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3233041867106038, "eval_wikibio_runtime": 9.5184, "eval_wikibio_samples_per_second": 52.53, "eval_wikibio_steps_per_second": 0.105, "eval_wikibio_token_set_f1": 0.29772587721299637, "eval_wikibio_token_set_f1_sem": 0.005766830164159475, "eval_wikibio_token_set_precision": 0.30238157146024425, "eval_wikibio_token_set_recall": 0.3147604126563878, "eval_wikibio_true_num_tokens": 61.1328125, "step": 67500 }, { "epoch": 12.96, "eval_nq_accuracy": 0.51184375, "eval_nq_bleu_score": 11.119146236730009, "eval_nq_bleu_score_sem": 0.4754835950562762, "eval_nq_emb_cos_sim": 0.8124707341194153, "eval_nq_emb_cos_sim_sem": 0.00822658343329118, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3081138134002686, "eval_nq_n_ngrams_match_1": 22.21, "eval_nq_n_ngrams_match_2": 8.04, "eval_nq_n_ngrams_match_3": 3.716, "eval_nq_num_pred_words": 48.796, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.055440319856965, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42727853031246865, "eval_nq_runtime": 9.8735, "eval_nq_samples_per_second": 50.641, "eval_nq_steps_per_second": 0.101, "eval_nq_token_set_f1": 0.44714704357924, "eval_nq_token_set_f1_sem": 0.005114017566815793, "eval_nq_token_set_precision": 0.4008724627693947, "eval_nq_token_set_recall": 0.5164713116086179, "eval_nq_true_num_tokens": 64.0, "step": 67500 }, { "epoch": 12.96, "learning_rate": 0.001, "loss": 2.6721, "step": 67512 }, { "epoch": 12.97, "learning_rate": 0.001, "loss": 2.674, "step": 67524 }, { "epoch": 12.97, "learning_rate": 0.001, "loss": 2.6899, "step": 67536 }, { "epoch": 12.97, "learning_rate": 0.001, "loss": 2.6842, "step": 67548 }, { "epoch": 12.97, "learning_rate": 0.001, "loss": 2.6837, "step": 67560 }, { "epoch": 12.97, "learning_rate": 0.001, "loss": 2.6807, "step": 67572 }, { "epoch": 12.98, "learning_rate": 0.001, "loss": 2.6744, "step": 67584 }, { "epoch": 12.98, "learning_rate": 0.001, "loss": 2.6846, "step": 67596 }, { "epoch": 12.98, "learning_rate": 0.001, "loss": 2.6828, "step": 67608 }, { "epoch": 12.98, "learning_rate": 0.001, "loss": 2.6809, "step": 67620 }, { "epoch": 12.99, "learning_rate": 0.001, "loss": 2.6746, "step": 67632 }, { "epoch": 12.99, "learning_rate": 0.001, "loss": 2.6822, "step": 67644 }, { "epoch": 12.99, "learning_rate": 0.001, "loss": 2.6836, "step": 67656 }, { "epoch": 12.99, "learning_rate": 0.001, "loss": 2.6772, "step": 67668 }, { "epoch": 13.0, "learning_rate": 0.001, "loss": 2.6781, "step": 67680 }, { "epoch": 13.0, "learning_rate": 0.001, "loss": 2.6709, "step": 67692 }, { "epoch": 13.0, "learning_rate": 0.001, "loss": 2.6831, "step": 67704 }, { "epoch": 13.0, "learning_rate": 0.001, "loss": 2.6599, "step": 67716 }, { "epoch": 13.0, "learning_rate": 0.001, "loss": 2.6605, "step": 67728 }, { "epoch": 13.01, "learning_rate": 0.001, "loss": 2.6638, "step": 67740 }, { "epoch": 13.01, "learning_rate": 0.001, "loss": 2.6605, "step": 67752 }, { "epoch": 13.01, "learning_rate": 0.001, "loss": 2.6564, "step": 67764 }, { "epoch": 13.01, "learning_rate": 0.001, "loss": 2.6639, "step": 67776 }, { "epoch": 13.02, "learning_rate": 0.001, "loss": 2.665, "step": 67788 }, { "epoch": 13.02, "learning_rate": 0.001, "loss": 2.663, "step": 67800 }, { "epoch": 13.02, "learning_rate": 0.001, "loss": 2.6563, "step": 67812 }, { "epoch": 13.02, "learning_rate": 0.001, "loss": 2.6696, "step": 67824 }, { "epoch": 13.03, "learning_rate": 0.001, "loss": 2.6653, "step": 67836 }, { "epoch": 13.03, "learning_rate": 0.001, "loss": 2.6685, "step": 67848 }, { "epoch": 13.03, "learning_rate": 0.001, "loss": 2.6593, "step": 67860 }, { "epoch": 13.03, "learning_rate": 0.001, "loss": 2.6686, "step": 67872 }, { "epoch": 13.03, "learning_rate": 0.001, "loss": 2.6686, "step": 67884 }, { "epoch": 13.04, "learning_rate": 0.001, "loss": 2.6651, "step": 67896 }, { "epoch": 13.04, "learning_rate": 0.001, "loss": 2.6659, "step": 67908 }, { "epoch": 13.04, "learning_rate": 0.001, "loss": 2.6617, "step": 67920 }, { "epoch": 13.04, "learning_rate": 0.001, "loss": 2.6548, "step": 67932 }, { "epoch": 13.05, "learning_rate": 0.001, "loss": 2.672, "step": 67944 }, { "epoch": 13.05, "learning_rate": 0.001, "loss": 2.6747, "step": 67956 }, { "epoch": 13.05, "learning_rate": 0.001, "loss": 2.6748, "step": 67968 }, { "epoch": 13.05, "learning_rate": 0.001, "loss": 2.6701, "step": 67980 }, { "epoch": 13.06, "learning_rate": 0.001, "loss": 2.673, "step": 67992 }, { "epoch": 13.06, "learning_rate": 0.001, "loss": 2.6646, "step": 68004 }, { "epoch": 13.06, "learning_rate": 0.001, "loss": 2.6697, "step": 68016 }, { "epoch": 13.06, "learning_rate": 0.001, "loss": 2.6748, "step": 68028 }, { "epoch": 13.06, "learning_rate": 0.001, "loss": 2.6532, "step": 68040 }, { "epoch": 13.07, "learning_rate": 0.001, "loss": 2.6662, "step": 68052 }, { "epoch": 13.07, "learning_rate": 0.001, "loss": 2.6549, "step": 68064 }, { "epoch": 13.07, "learning_rate": 0.001, "loss": 2.6803, "step": 68076 }, { "epoch": 13.07, "learning_rate": 0.001, "loss": 2.6693, "step": 68088 }, { "epoch": 13.08, "learning_rate": 0.001, "loss": 2.6576, "step": 68100 }, { "epoch": 13.08, "learning_rate": 0.001, "loss": 2.6649, "step": 68112 }, { "epoch": 13.08, "learning_rate": 0.001, "loss": 2.6607, "step": 68124 }, { "epoch": 13.08, "eval_ag_news_accuracy": 0.309625, "eval_ag_news_bleu_score": 4.7448037431196735, "eval_ag_news_bleu_score_sem": 0.160589362417903, "eval_ag_news_emb_cos_sim": 0.7907446026802063, "eval_ag_news_emb_cos_sim_sem": 0.007665262485199981, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.701359748840332, "eval_ag_news_n_ngrams_match_1": 13.568, "eval_ag_news_n_ngrams_match_2": 2.956, "eval_ag_news_n_ngrams_match_3": 0.882, "eval_ag_news_num_pred_words": 46.826, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.50233994407163, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3312926724207253, "eval_ag_news_runtime": 9.7446, "eval_ag_news_samples_per_second": 51.311, "eval_ag_news_steps_per_second": 0.103, "eval_ag_news_token_set_f1": 0.33965231379733674, "eval_ag_news_token_set_f1_sem": 0.004385185917675816, "eval_ag_news_token_set_precision": 0.3213535078222124, "eval_ag_news_token_set_recall": 0.37530165885678113, "eval_ag_news_true_num_tokens": 56.09375, "step": 68125 }, { "epoch": 13.08, "eval_anthropic_toxic_prompts_accuracy": 0.10921875, "eval_anthropic_toxic_prompts_bleu_score": 2.8766572105496078, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12002093063810151, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6464630365371704, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009594136714903334, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3689913749694824, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.648, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.682, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.582, "eval_anthropic_toxic_prompts_num_pred_words": 47.502, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.04921251373265, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19510746121215689, "eval_anthropic_toxic_prompts_runtime": 10.6897, "eval_anthropic_toxic_prompts_samples_per_second": 46.774, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.33532634323936267, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006168976290780714, "eval_anthropic_toxic_prompts_token_set_precision": 0.40212052115917796, "eval_anthropic_toxic_prompts_token_set_recall": 0.32078323402372705, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 68125 }, { "epoch": 13.08, "eval_arxiv_accuracy": 0.33575, "eval_arxiv_bleu_score": 4.04035494667653, "eval_arxiv_bleu_score_sem": 0.11102423848290659, "eval_arxiv_emb_cos_sim": 0.7298687100410461, "eval_arxiv_emb_cos_sim_sem": 0.009560354698732836, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5450620651245117, "eval_arxiv_n_ngrams_match_1": 14.162, "eval_arxiv_n_ngrams_match_2": 2.762, "eval_arxiv_n_ngrams_match_3": 0.596, "eval_arxiv_num_pred_words": 40.256, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.64183532536346, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3356571932925493, "eval_arxiv_runtime": 10.9475, "eval_arxiv_samples_per_second": 45.672, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.3320950244764943, "eval_arxiv_token_set_f1_sem": 0.004129027109094983, "eval_arxiv_token_set_precision": 0.2800544516277869, "eval_arxiv_token_set_recall": 0.4308921783799877, "eval_arxiv_true_num_tokens": 64.0, "step": 68125 }, { "epoch": 13.08, "eval_python_code_alpaca_accuracy": 0.1523125, "eval_python_code_alpaca_bleu_score": 3.807709214154026, "eval_python_code_alpaca_bleu_score_sem": 0.11969278969621434, "eval_python_code_alpaca_emb_cos_sim": 0.7322779297828674, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008263473779539355, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0557587146759033, "eval_python_code_alpaca_n_ngrams_match_1": 9.094, "eval_python_code_alpaca_n_ngrams_match_2": 2.458, "eval_python_code_alpaca_n_ngrams_match_3": 0.742, "eval_python_code_alpaca_num_pred_words": 45.214, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.237292461322472, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29347032422015207, "eval_python_code_alpaca_runtime": 10.2936, "eval_python_code_alpaca_samples_per_second": 48.574, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.4547427611928941, "eval_python_code_alpaca_token_set_f1_sem": 0.0058399147795201055, "eval_python_code_alpaca_token_set_precision": 0.48975050999984454, "eval_python_code_alpaca_token_set_recall": 0.44880822517963664, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 68125 }, { "epoch": 13.08, "eval_wikibio_accuracy": 0.30815625, "eval_wikibio_bleu_score": 5.54789595361968, "eval_wikibio_bleu_score_sem": 0.20018210865217148, "eval_wikibio_emb_cos_sim": 0.7136615514755249, "eval_wikibio_emb_cos_sim_sem": 0.009998222259179563, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8820266723632812, "eval_wikibio_n_ngrams_match_1": 9.576, "eval_wikibio_n_ngrams_match_2": 3.124, "eval_wikibio_n_ngrams_match_3": 1.126, "eval_wikibio_num_pred_words": 35.922, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 48.522454605792824, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33374879998906337, "eval_wikibio_runtime": 9.7629, "eval_wikibio_samples_per_second": 51.214, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.30359156176689833, "eval_wikibio_token_set_f1_sem": 0.005773896479971032, "eval_wikibio_token_set_precision": 0.3112087128395294, "eval_wikibio_token_set_recall": 0.31372348842232456, "eval_wikibio_true_num_tokens": 61.1328125, "step": 68125 }, { "epoch": 13.08, "eval_nq_accuracy": 0.51140625, "eval_nq_bleu_score": 10.867103186483988, "eval_nq_bleu_score_sem": 0.4541071049699152, "eval_nq_emb_cos_sim": 0.821334958076477, "eval_nq_emb_cos_sim_sem": 0.007155670208210825, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3038601875305176, "eval_nq_n_ngrams_match_1": 22.416, "eval_nq_n_ngrams_match_2": 7.946, "eval_nq_n_ngrams_match_3": 3.558, "eval_nq_num_pred_words": 49.132, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.012759078151428, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43032634348292514, "eval_nq_runtime": 10.1803, "eval_nq_samples_per_second": 49.115, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.448369981182756, "eval_nq_token_set_f1_sem": 0.004900509370407529, "eval_nq_token_set_precision": 0.40386325250842453, "eval_nq_token_set_recall": 0.5126012702131447, "eval_nq_true_num_tokens": 64.0, "step": 68125 }, { "epoch": 13.08, "learning_rate": 0.001, "loss": 2.6699, "step": 68136 }, { "epoch": 13.09, "learning_rate": 0.001, "loss": 2.6603, "step": 68148 }, { "epoch": 13.09, "learning_rate": 0.001, "loss": 2.6621, "step": 68160 }, { "epoch": 13.09, "learning_rate": 0.001, "loss": 2.6612, "step": 68172 }, { "epoch": 13.09, "learning_rate": 0.001, "loss": 2.6615, "step": 68184 }, { "epoch": 13.09, "learning_rate": 0.001, "loss": 2.6636, "step": 68196 }, { "epoch": 13.1, "learning_rate": 0.001, "loss": 2.6679, "step": 68208 }, { "epoch": 13.1, "learning_rate": 0.001, "loss": 2.6694, "step": 68220 }, { "epoch": 13.1, "learning_rate": 0.001, "loss": 2.6671, "step": 68232 }, { "epoch": 13.1, "learning_rate": 0.001, "loss": 2.668, "step": 68244 }, { "epoch": 13.11, "learning_rate": 0.001, "loss": 2.67, "step": 68256 }, { "epoch": 13.11, "learning_rate": 0.001, "loss": 2.6634, "step": 68268 }, { "epoch": 13.11, "learning_rate": 0.001, "loss": 2.6695, "step": 68280 }, { "epoch": 13.11, "learning_rate": 0.001, "loss": 2.6688, "step": 68292 }, { "epoch": 13.12, "learning_rate": 0.001, "loss": 2.6745, "step": 68304 }, { "epoch": 13.12, "learning_rate": 0.001, "loss": 2.6649, "step": 68316 }, { "epoch": 13.12, "learning_rate": 0.001, "loss": 2.6722, "step": 68328 }, { "epoch": 13.12, "learning_rate": 0.001, "loss": 2.6601, "step": 68340 }, { "epoch": 13.12, "learning_rate": 0.001, "loss": 2.6636, "step": 68352 }, { "epoch": 13.13, "learning_rate": 0.001, "loss": 2.6732, "step": 68364 }, { "epoch": 13.13, "learning_rate": 0.001, "loss": 2.6771, "step": 68376 }, { "epoch": 13.13, "learning_rate": 0.001, "loss": 2.6675, "step": 68388 }, { "epoch": 13.13, "learning_rate": 0.001, "loss": 2.668, "step": 68400 }, { "epoch": 13.14, "learning_rate": 0.001, "loss": 2.6699, "step": 68412 }, { "epoch": 13.14, "learning_rate": 0.001, "loss": 2.6767, "step": 68424 }, { "epoch": 13.14, "learning_rate": 0.001, "loss": 2.6762, "step": 68436 }, { "epoch": 13.14, "learning_rate": 0.001, "loss": 2.6668, "step": 68448 }, { "epoch": 13.15, "learning_rate": 0.001, "loss": 2.6665, "step": 68460 }, { "epoch": 13.15, "learning_rate": 0.001, "loss": 2.6708, "step": 68472 }, { "epoch": 13.15, "learning_rate": 0.001, "loss": 2.6719, "step": 68484 }, { "epoch": 13.15, "learning_rate": 0.001, "loss": 2.6707, "step": 68496 }, { "epoch": 13.15, "learning_rate": 0.001, "loss": 2.6661, "step": 68508 }, { "epoch": 13.16, "learning_rate": 0.001, "loss": 2.6641, "step": 68520 }, { "epoch": 13.16, "learning_rate": 0.001, "loss": 2.6714, "step": 68532 }, { "epoch": 13.16, "learning_rate": 0.001, "loss": 2.6817, "step": 68544 }, { "epoch": 13.16, "learning_rate": 0.001, "loss": 2.6605, "step": 68556 }, { "epoch": 13.17, "learning_rate": 0.001, "loss": 2.6603, "step": 68568 }, { "epoch": 13.17, "learning_rate": 0.001, "loss": 2.6605, "step": 68580 }, { "epoch": 13.17, "learning_rate": 0.001, "loss": 2.6548, "step": 68592 }, { "epoch": 13.17, "learning_rate": 0.001, "loss": 2.6625, "step": 68604 }, { "epoch": 13.18, "learning_rate": 0.001, "loss": 2.668, "step": 68616 }, { "epoch": 13.18, "learning_rate": 0.001, "loss": 2.6748, "step": 68628 }, { "epoch": 13.18, "learning_rate": 0.001, "loss": 2.6626, "step": 68640 }, { "epoch": 13.18, "learning_rate": 0.001, "loss": 2.6631, "step": 68652 }, { "epoch": 13.18, "learning_rate": 0.001, "loss": 2.6686, "step": 68664 }, { "epoch": 13.19, "learning_rate": 0.001, "loss": 2.6647, "step": 68676 }, { "epoch": 13.19, "learning_rate": 0.001, "loss": 2.6649, "step": 68688 }, { "epoch": 13.19, "learning_rate": 0.001, "loss": 2.6665, "step": 68700 }, { "epoch": 13.19, "learning_rate": 0.001, "loss": 2.6709, "step": 68712 }, { "epoch": 13.2, "learning_rate": 0.001, "loss": 2.6636, "step": 68724 }, { "epoch": 13.2, "learning_rate": 0.001, "loss": 2.6579, "step": 68736 }, { "epoch": 13.2, "learning_rate": 0.001, "loss": 2.6651, "step": 68748 }, { "epoch": 13.2, "eval_ag_news_accuracy": 0.310625, "eval_ag_news_bleu_score": 4.6155978527157675, "eval_ag_news_bleu_score_sem": 0.1552693501920158, "eval_ag_news_emb_cos_sim": 0.7810306549072266, "eval_ag_news_emb_cos_sim_sem": 0.008831619882911286, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.696045160293579, "eval_ag_news_n_ngrams_match_1": 13.328, "eval_ag_news_n_ngrams_match_2": 2.858, "eval_ag_news_n_ngrams_match_3": 0.83, "eval_ag_news_num_pred_words": 46.072, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.28765765142058, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3283812898810733, "eval_ag_news_runtime": 9.9896, "eval_ag_news_samples_per_second": 50.052, "eval_ag_news_steps_per_second": 0.1, "eval_ag_news_token_set_f1": 0.3341037532538496, "eval_ag_news_token_set_f1_sem": 0.004429807422104174, "eval_ag_news_token_set_precision": 0.3158248782973446, "eval_ag_news_token_set_recall": 0.3715378578222612, "eval_ag_news_true_num_tokens": 56.09375, "step": 68750 }, { "epoch": 13.2, "eval_anthropic_toxic_prompts_accuracy": 0.10865625, "eval_anthropic_toxic_prompts_bleu_score": 2.8600628603223934, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11390856853425778, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6436045169830322, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011180452555166774, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.362196207046509, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.908, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.74, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.62, "eval_anthropic_toxic_prompts_num_pred_words": 47.882, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.852487383787974, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20083028314594234, "eval_anthropic_toxic_prompts_runtime": 10.4342, "eval_anthropic_toxic_prompts_samples_per_second": 47.919, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.3457827278665309, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006685697604567867, "eval_anthropic_toxic_prompts_token_set_precision": 0.4139586110775843, "eval_anthropic_toxic_prompts_token_set_recall": 0.3258544196661757, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 68750 }, { "epoch": 13.2, "eval_arxiv_accuracy": 0.3361875, "eval_arxiv_bleu_score": 4.030972821543451, "eval_arxiv_bleu_score_sem": 0.11366599754262402, "eval_arxiv_emb_cos_sim": 0.7315679788589478, "eval_arxiv_emb_cos_sim_sem": 0.008143049038087095, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5502352714538574, "eval_arxiv_n_ngrams_match_1": 14.044, "eval_arxiv_n_ngrams_match_2": 2.714, "eval_arxiv_n_ngrams_match_3": 0.556, "eval_arxiv_num_pred_words": 39.36, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.821509031001305, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3381924420260421, "eval_arxiv_runtime": 9.4521, "eval_arxiv_samples_per_second": 52.899, "eval_arxiv_steps_per_second": 0.106, "eval_arxiv_token_set_f1": 0.3302554859161884, "eval_arxiv_token_set_f1_sem": 0.004087113864123824, "eval_arxiv_token_set_precision": 0.2769591943860666, "eval_arxiv_token_set_recall": 0.42862873582586186, "eval_arxiv_true_num_tokens": 64.0, "step": 68750 }, { "epoch": 13.2, "eval_python_code_alpaca_accuracy": 0.15275, "eval_python_code_alpaca_bleu_score": 3.9426884987931303, "eval_python_code_alpaca_bleu_score_sem": 0.12794384498963288, "eval_python_code_alpaca_emb_cos_sim": 0.7394332885742188, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008318306829452029, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0522122383117676, "eval_python_code_alpaca_n_ngrams_match_1": 8.932, "eval_python_code_alpaca_n_ngrams_match_2": 2.364, "eval_python_code_alpaca_n_ngrams_match_3": 0.688, "eval_python_code_alpaca_num_pred_words": 42.928, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.162108303791317, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30114324266018666, "eval_python_code_alpaca_runtime": 9.5793, "eval_python_code_alpaca_samples_per_second": 52.196, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.4469390430769559, "eval_python_code_alpaca_token_set_f1_sem": 0.005585685118982306, "eval_python_code_alpaca_token_set_precision": 0.4918287461633235, "eval_python_code_alpaca_token_set_recall": 0.4340804132285679, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 68750 }, { "epoch": 13.2, "eval_wikibio_accuracy": 0.31175, "eval_wikibio_bleu_score": 5.472833364398747, "eval_wikibio_bleu_score_sem": 0.2225170765844248, "eval_wikibio_emb_cos_sim": 0.6997572779655457, "eval_wikibio_emb_cos_sim_sem": 0.011623450177330687, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.897064447402954, "eval_wikibio_n_ngrams_match_1": 9.448, "eval_wikibio_n_ngrams_match_2": 3.07, "eval_wikibio_n_ngrams_match_3": 1.076, "eval_wikibio_num_pred_words": 35.5, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 49.25763827165962, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3291576614584369, "eval_wikibio_runtime": 10.0306, "eval_wikibio_samples_per_second": 49.847, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3053627003733574, "eval_wikibio_token_set_f1_sem": 0.005795180778443444, "eval_wikibio_token_set_precision": 0.30806992090280705, "eval_wikibio_token_set_recall": 0.3209511001358559, "eval_wikibio_true_num_tokens": 61.1328125, "step": 68750 }, { "epoch": 13.2, "eval_nq_accuracy": 0.5131875, "eval_nq_bleu_score": 11.00189847339383, "eval_nq_bleu_score_sem": 0.44652311316152377, "eval_nq_emb_cos_sim": 0.8179467916488647, "eval_nq_emb_cos_sim_sem": 0.0073945690822827, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3040249347686768, "eval_nq_n_ngrams_match_1": 22.306, "eval_nq_n_ngrams_match_2": 7.938, "eval_nq_n_ngrams_match_3": 3.58, "eval_nq_num_pred_words": 48.882, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 10.014408788444781, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42907752370581764, "eval_nq_runtime": 10.1256, "eval_nq_samples_per_second": 49.38, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.4464199475182248, "eval_nq_token_set_f1_sem": 0.004902545794572234, "eval_nq_token_set_precision": 0.3995018268823006, "eval_nq_token_set_recall": 0.5150652449000738, "eval_nq_true_num_tokens": 64.0, "step": 68750 }, { "epoch": 13.2, "learning_rate": 0.001, "loss": 2.6642, "step": 68760 }, { "epoch": 13.21, "learning_rate": 0.001, "loss": 2.6662, "step": 68772 }, { "epoch": 13.21, "learning_rate": 0.001, "loss": 2.6725, "step": 68784 }, { "epoch": 13.21, "learning_rate": 0.001, "loss": 2.6595, "step": 68796 }, { "epoch": 13.21, "learning_rate": 0.001, "loss": 2.6711, "step": 68808 }, { "epoch": 13.21, "learning_rate": 0.001, "loss": 2.6715, "step": 68820 }, { "epoch": 13.22, "learning_rate": 0.001, "loss": 2.6571, "step": 68832 }, { "epoch": 13.22, "learning_rate": 0.001, "loss": 2.6644, "step": 68844 }, { "epoch": 13.22, "learning_rate": 0.001, "loss": 2.6647, "step": 68856 }, { "epoch": 13.22, "learning_rate": 0.001, "loss": 2.6854, "step": 68868 }, { "epoch": 13.23, "learning_rate": 0.001, "loss": 2.6669, "step": 68880 }, { "epoch": 13.23, "learning_rate": 0.001, "loss": 2.6665, "step": 68892 }, { "epoch": 13.23, "learning_rate": 0.001, "loss": 2.6722, "step": 68904 }, { "epoch": 13.23, "learning_rate": 0.001, "loss": 2.6685, "step": 68916 }, { "epoch": 13.24, "learning_rate": 0.001, "loss": 2.6637, "step": 68928 }, { "epoch": 13.24, "learning_rate": 0.001, "loss": 2.6648, "step": 68940 }, { "epoch": 13.24, "learning_rate": 0.001, "loss": 2.6674, "step": 68952 }, { "epoch": 13.24, "learning_rate": 0.001, "loss": 2.6636, "step": 68964 }, { "epoch": 13.24, "learning_rate": 0.001, "loss": 2.6746, "step": 68976 }, { "epoch": 13.25, "learning_rate": 0.001, "loss": 2.6598, "step": 68988 }, { "epoch": 13.25, "learning_rate": 0.001, "loss": 2.6762, "step": 69000 }, { "epoch": 13.25, "learning_rate": 0.001, "loss": 2.6702, "step": 69012 }, { "epoch": 13.25, "learning_rate": 0.001, "loss": 2.6623, "step": 69024 }, { "epoch": 13.26, "learning_rate": 0.001, "loss": 2.6636, "step": 69036 }, { "epoch": 13.26, "learning_rate": 0.001, "loss": 2.664, "step": 69048 }, { "epoch": 13.26, "learning_rate": 0.001, "loss": 2.6669, "step": 69060 }, { "epoch": 13.26, "learning_rate": 0.001, "loss": 2.6669, "step": 69072 }, { "epoch": 13.26, "learning_rate": 0.001, "loss": 2.6624, "step": 69084 }, { "epoch": 13.27, "learning_rate": 0.001, "loss": 2.6658, "step": 69096 }, { "epoch": 13.27, "learning_rate": 0.001, "loss": 2.6583, "step": 69108 }, { "epoch": 13.27, "learning_rate": 0.001, "loss": 2.671, "step": 69120 }, { "epoch": 13.27, "learning_rate": 0.001, "loss": 2.6678, "step": 69132 }, { "epoch": 13.28, "learning_rate": 0.001, "loss": 2.6701, "step": 69144 }, { "epoch": 13.28, "learning_rate": 0.001, "loss": 2.6681, "step": 69156 }, { "epoch": 13.28, "learning_rate": 0.001, "loss": 2.6664, "step": 69168 }, { "epoch": 13.28, "learning_rate": 0.001, "loss": 2.6621, "step": 69180 }, { "epoch": 13.29, "learning_rate": 0.001, "loss": 2.6734, "step": 69192 }, { "epoch": 13.29, "learning_rate": 0.001, "loss": 2.6707, "step": 69204 }, { "epoch": 13.29, "learning_rate": 0.001, "loss": 2.6621, "step": 69216 }, { "epoch": 13.29, "learning_rate": 0.001, "loss": 2.6667, "step": 69228 }, { "epoch": 13.29, "learning_rate": 0.001, "loss": 2.6536, "step": 69240 }, { "epoch": 13.3, "learning_rate": 0.001, "loss": 2.6631, "step": 69252 }, { "epoch": 13.3, "learning_rate": 0.001, "loss": 2.6661, "step": 69264 }, { "epoch": 13.3, "learning_rate": 0.001, "loss": 2.6621, "step": 69276 }, { "epoch": 13.3, "learning_rate": 0.001, "loss": 2.6574, "step": 69288 }, { "epoch": 13.31, "learning_rate": 0.001, "loss": 2.6754, "step": 69300 }, { "epoch": 13.31, "learning_rate": 0.001, "loss": 2.6827, "step": 69312 }, { "epoch": 13.31, "learning_rate": 0.001, "loss": 2.6756, "step": 69324 }, { "epoch": 13.31, "learning_rate": 0.001, "loss": 2.6699, "step": 69336 }, { "epoch": 13.32, "learning_rate": 0.001, "loss": 2.6684, "step": 69348 }, { "epoch": 13.32, "learning_rate": 0.001, "loss": 2.6604, "step": 69360 }, { "epoch": 13.32, "learning_rate": 0.001, "loss": 2.6678, "step": 69372 }, { "epoch": 13.32, "eval_ag_news_accuracy": 0.310125, "eval_ag_news_bleu_score": 4.581901813509112, "eval_ag_news_bleu_score_sem": 0.15242013163267312, "eval_ag_news_emb_cos_sim": 0.7905258536338806, "eval_ag_news_emb_cos_sim_sem": 0.0077469239798233615, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6883883476257324, "eval_ag_news_n_ngrams_match_1": 13.472, "eval_ag_news_n_ngrams_match_2": 2.842, "eval_ag_news_n_ngrams_match_3": 0.822, "eval_ag_news_num_pred_words": 46.368, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.980360563393944, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33098513498113397, "eval_ag_news_runtime": 10.1598, "eval_ag_news_samples_per_second": 49.214, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.33922392287556286, "eval_ag_news_token_set_f1_sem": 0.004505298905916181, "eval_ag_news_token_set_precision": 0.3201332667459879, "eval_ag_news_token_set_recall": 0.3767736654439968, "eval_ag_news_true_num_tokens": 56.09375, "step": 69375 }, { "epoch": 13.32, "eval_anthropic_toxic_prompts_accuracy": 0.10884375, "eval_anthropic_toxic_prompts_bleu_score": 2.913702358280627, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11300536149323476, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6561837196350098, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009399365591222364, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.375993490219116, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.992, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.77, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658, "eval_anthropic_toxic_prompts_num_pred_words": 47.786, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.2533322486271, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20566045886685008, "eval_anthropic_toxic_prompts_runtime": 10.0332, "eval_anthropic_toxic_prompts_samples_per_second": 49.835, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3432457286718833, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062829507629327, "eval_anthropic_toxic_prompts_token_set_precision": 0.42217432608134114, "eval_anthropic_toxic_prompts_token_set_recall": 0.31544592796649684, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 69375 }, { "epoch": 13.32, "eval_arxiv_accuracy": 0.33503125, "eval_arxiv_bleu_score": 4.044976229554206, "eval_arxiv_bleu_score_sem": 0.10695914419101472, "eval_arxiv_emb_cos_sim": 0.7321943044662476, "eval_arxiv_emb_cos_sim_sem": 0.009260307991461242, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5358974933624268, "eval_arxiv_n_ngrams_match_1": 14.3, "eval_arxiv_n_ngrams_match_2": 2.784, "eval_arxiv_n_ngrams_match_3": 0.604, "eval_arxiv_num_pred_words": 40.908, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.325808078652294, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33809581914191467, "eval_arxiv_runtime": 10.3364, "eval_arxiv_samples_per_second": 48.373, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3347097694629011, "eval_arxiv_token_set_f1_sem": 0.004218430772697236, "eval_arxiv_token_set_precision": 0.2829724458053254, "eval_arxiv_token_set_recall": 0.4284973973118188, "eval_arxiv_true_num_tokens": 64.0, "step": 69375 }, { "epoch": 13.32, "eval_python_code_alpaca_accuracy": 0.1530625, "eval_python_code_alpaca_bleu_score": 3.998613167735816, "eval_python_code_alpaca_bleu_score_sem": 0.12504761548996998, "eval_python_code_alpaca_emb_cos_sim": 0.7307700514793396, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009256189454108317, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.031555652618408, "eval_python_code_alpaca_n_ngrams_match_1": 9.092, "eval_python_code_alpaca_n_ngrams_match_2": 2.464, "eval_python_code_alpaca_n_ngrams_match_3": 0.706, "eval_python_code_alpaca_num_pred_words": 42.87, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.72945535067407, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3084186231728019, "eval_python_code_alpaca_runtime": 10.1751, "eval_python_code_alpaca_samples_per_second": 49.139, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.44877587607077674, "eval_python_code_alpaca_token_set_f1_sem": 0.005642050858873661, "eval_python_code_alpaca_token_set_precision": 0.4951120972017968, "eval_python_code_alpaca_token_set_recall": 0.43973178737615537, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 69375 }, { "epoch": 13.32, "eval_wikibio_accuracy": 0.31128125, "eval_wikibio_bleu_score": 5.684142259335463, "eval_wikibio_bleu_score_sem": 0.20882331101485738, "eval_wikibio_emb_cos_sim": 0.719696581363678, "eval_wikibio_emb_cos_sim_sem": 0.010332893919591872, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.928328037261963, "eval_wikibio_n_ngrams_match_1": 9.95, "eval_wikibio_n_ngrams_match_2": 3.33, "eval_wikibio_n_ngrams_match_3": 1.2, "eval_wikibio_num_pred_words": 37.204, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 50.821934213987845, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34082381137321227, "eval_wikibio_runtime": 9.5101, "eval_wikibio_samples_per_second": 52.576, "eval_wikibio_steps_per_second": 0.105, "eval_wikibio_token_set_f1": 0.31031726279902844, "eval_wikibio_token_set_f1_sem": 0.005588077460700187, "eval_wikibio_token_set_precision": 0.3217614520503324, "eval_wikibio_token_set_recall": 0.3139498032925318, "eval_wikibio_true_num_tokens": 61.1328125, "step": 69375 }, { "epoch": 13.32, "eval_nq_accuracy": 0.510875, "eval_nq_bleu_score": 10.918513871697016, "eval_nq_bleu_score_sem": 0.45210457420266803, "eval_nq_emb_cos_sim": 0.8174967765808105, "eval_nq_emb_cos_sim_sem": 0.007653251291918884, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.300996780395508, "eval_nq_n_ngrams_match_1": 22.268, "eval_nq_n_ngrams_match_2": 7.998, "eval_nq_n_ngrams_match_3": 3.62, "eval_nq_num_pred_words": 49.196, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.984129481023668, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.42666553562390397, "eval_nq_runtime": 9.8801, "eval_nq_samples_per_second": 50.607, "eval_nq_steps_per_second": 0.101, "eval_nq_token_set_f1": 0.44503201543651233, "eval_nq_token_set_f1_sem": 0.005089316551111861, "eval_nq_token_set_precision": 0.40013100551289443, "eval_nq_token_set_recall": 0.5114309493525862, "eval_nq_true_num_tokens": 64.0, "step": 69375 }, { "epoch": 13.32, "learning_rate": 0.001, "loss": 2.6693, "step": 69384 }, { "epoch": 13.32, "learning_rate": 0.001, "loss": 2.6669, "step": 69396 }, { "epoch": 13.33, "learning_rate": 0.001, "loss": 2.6726, "step": 69408 }, { "epoch": 13.33, "learning_rate": 0.001, "loss": 2.6701, "step": 69420 }, { "epoch": 13.33, "learning_rate": 0.001, "loss": 2.6811, "step": 69432 }, { "epoch": 13.33, "learning_rate": 0.001, "loss": 2.6685, "step": 69444 }, { "epoch": 13.34, "learning_rate": 0.001, "loss": 2.6592, "step": 69456 }, { "epoch": 13.34, "learning_rate": 0.001, "loss": 2.6515, "step": 69468 }, { "epoch": 13.34, "learning_rate": 0.001, "loss": 2.6728, "step": 69480 }, { "epoch": 13.34, "learning_rate": 0.001, "loss": 2.6761, "step": 69492 }, { "epoch": 13.35, "learning_rate": 0.001, "loss": 2.6652, "step": 69504 }, { "epoch": 13.35, "learning_rate": 0.001, "loss": 2.6584, "step": 69516 }, { "epoch": 13.35, "learning_rate": 0.001, "loss": 2.6581, "step": 69528 }, { "epoch": 13.35, "learning_rate": 0.001, "loss": 2.6735, "step": 69540 }, { "epoch": 13.35, "learning_rate": 0.001, "loss": 2.6673, "step": 69552 }, { "epoch": 13.36, "learning_rate": 0.001, "loss": 2.6744, "step": 69564 }, { "epoch": 13.36, "learning_rate": 0.001, "loss": 2.6665, "step": 69576 }, { "epoch": 13.36, "learning_rate": 0.001, "loss": 2.6696, "step": 69588 }, { "epoch": 13.36, "learning_rate": 0.001, "loss": 2.6674, "step": 69600 }, { "epoch": 13.37, "learning_rate": 0.001, "loss": 2.655, "step": 69612 }, { "epoch": 13.37, "learning_rate": 0.001, "loss": 2.6592, "step": 69624 }, { "epoch": 13.37, "learning_rate": 0.001, "loss": 2.6639, "step": 69636 }, { "epoch": 13.37, "learning_rate": 0.001, "loss": 2.674, "step": 69648 }, { "epoch": 13.38, "learning_rate": 0.001, "loss": 2.6659, "step": 69660 }, { "epoch": 13.38, "learning_rate": 0.001, "loss": 2.6624, "step": 69672 }, { "epoch": 13.38, "learning_rate": 0.001, "loss": 2.6572, "step": 69684 }, { "epoch": 13.38, "learning_rate": 0.001, "loss": 2.6686, "step": 69696 }, { "epoch": 13.38, "learning_rate": 0.001, "loss": 2.6699, "step": 69708 }, { "epoch": 13.39, "learning_rate": 0.001, "loss": 2.6658, "step": 69720 }, { "epoch": 13.39, "learning_rate": 0.001, "loss": 2.6625, "step": 69732 }, { "epoch": 13.39, "learning_rate": 0.001, "loss": 2.6666, "step": 69744 }, { "epoch": 13.39, "learning_rate": 0.001, "loss": 2.6587, "step": 69756 }, { "epoch": 13.4, "learning_rate": 0.001, "loss": 2.6698, "step": 69768 }, { "epoch": 13.4, "learning_rate": 0.001, "loss": 2.6676, "step": 69780 }, { "epoch": 13.4, "learning_rate": 0.001, "loss": 2.6656, "step": 69792 }, { "epoch": 13.4, "learning_rate": 0.001, "loss": 2.6658, "step": 69804 }, { "epoch": 13.41, "learning_rate": 0.001, "loss": 2.6683, "step": 69816 }, { "epoch": 13.41, "learning_rate": 0.001, "loss": 2.6737, "step": 69828 }, { "epoch": 13.41, "learning_rate": 0.001, "loss": 2.6731, "step": 69840 }, { "epoch": 13.41, "learning_rate": 0.001, "loss": 2.6654, "step": 69852 }, { "epoch": 13.41, "learning_rate": 0.001, "loss": 2.6848, "step": 69864 }, { "epoch": 13.42, "learning_rate": 0.001, "loss": 2.6713, "step": 69876 }, { "epoch": 13.42, "learning_rate": 0.001, "loss": 2.6709, "step": 69888 }, { "epoch": 13.42, "learning_rate": 0.001, "loss": 2.6807, "step": 69900 }, { "epoch": 13.42, "learning_rate": 0.001, "loss": 2.6669, "step": 69912 }, { "epoch": 13.43, "learning_rate": 0.001, "loss": 2.6664, "step": 69924 }, { "epoch": 13.43, "learning_rate": 0.001, "loss": 2.6708, "step": 69936 }, { "epoch": 13.43, "learning_rate": 0.001, "loss": 2.6699, "step": 69948 }, { "epoch": 13.43, "learning_rate": 0.001, "loss": 2.674, "step": 69960 }, { "epoch": 13.44, "learning_rate": 0.001, "loss": 2.6645, "step": 69972 }, { "epoch": 13.44, "learning_rate": 0.001, "loss": 2.6635, "step": 69984 }, { "epoch": 13.44, "learning_rate": 0.001, "loss": 2.6682, "step": 69996 }, { "epoch": 13.44, "eval_ag_news_accuracy": 0.31046875, "eval_ag_news_bleu_score": 4.573999983592996, "eval_ag_news_bleu_score_sem": 0.14607512891896027, "eval_ag_news_emb_cos_sim": 0.7891677618026733, "eval_ag_news_emb_cos_sim_sem": 0.007683784049486994, "eval_ag_news_emb_top1_equal": 0.1953125, "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.693073272705078, "eval_ag_news_n_ngrams_match_1": 13.536, "eval_ag_news_n_ngrams_match_2": 2.978, "eval_ag_news_n_ngrams_match_3": 0.842, "eval_ag_news_num_pred_words": 46.744, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.16810499819159, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.331465413135851, "eval_ag_news_runtime": 9.8732, "eval_ag_news_samples_per_second": 50.642, "eval_ag_news_steps_per_second": 0.101, "eval_ag_news_token_set_f1": 0.3381465570788198, "eval_ag_news_token_set_f1_sem": 0.0045067678996548325, "eval_ag_news_token_set_precision": 0.31930283102408513, "eval_ag_news_token_set_recall": 0.37345723278823784, "eval_ag_news_true_num_tokens": 56.09375, "step": 70000 }, { "epoch": 13.44, "eval_anthropic_toxic_prompts_accuracy": 0.1089375, "eval_anthropic_toxic_prompts_bleu_score": 2.8453579870685455, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11041804017860082, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6511286497116089, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00983327990898767, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3859524726867676, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.91, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.716, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.63, "eval_anthropic_toxic_prompts_num_pred_words": 48.428, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.54612119165763, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9765625, "eval_anthropic_toxic_prompts_rouge_score": 0.20057807952564702, "eval_anthropic_toxic_prompts_runtime": 9.388, "eval_anthropic_toxic_prompts_samples_per_second": 53.26, "eval_anthropic_toxic_prompts_steps_per_second": 0.107, "eval_anthropic_toxic_prompts_token_set_f1": 0.3402589784710454, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006507210742780276, "eval_anthropic_toxic_prompts_token_set_precision": 0.41184701315785754, "eval_anthropic_toxic_prompts_token_set_recall": 0.31321571923591446, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 70000 }, { "epoch": 13.44, "eval_arxiv_accuracy": 0.33609375, "eval_arxiv_bleu_score": 4.107066981436177, "eval_arxiv_bleu_score_sem": 0.11677242817579854, "eval_arxiv_emb_cos_sim": 0.734245777130127, "eval_arxiv_emb_cos_sim_sem": 0.008520608409581779, "eval_arxiv_emb_top1_equal": 0.21875, "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5367684364318848, "eval_arxiv_n_ngrams_match_1": 14.502, "eval_arxiv_n_ngrams_match_2": 2.758, "eval_arxiv_n_ngrams_match_3": 0.63, "eval_arxiv_num_pred_words": 40.782, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.35571692586295, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3392695386136979, "eval_arxiv_runtime": 10.1355, "eval_arxiv_samples_per_second": 49.332, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.336308068725055, "eval_arxiv_token_set_f1_sem": 0.004249186490275496, "eval_arxiv_token_set_precision": 0.2877258366278578, "eval_arxiv_token_set_recall": 0.42504069226446467, "eval_arxiv_true_num_tokens": 64.0, "step": 70000 }, { "epoch": 13.44, "eval_python_code_alpaca_accuracy": 0.15359375, "eval_python_code_alpaca_bleu_score": 4.035798468787673, "eval_python_code_alpaca_bleu_score_sem": 0.13010785153561488, "eval_python_code_alpaca_emb_cos_sim": 0.7340409755706787, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007725683285262887, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0539517402648926, "eval_python_code_alpaca_n_ngrams_match_1": 9.184, "eval_python_code_alpaca_n_ngrams_match_2": 2.476, "eval_python_code_alpaca_n_ngrams_match_3": 0.724, "eval_python_code_alpaca_num_pred_words": 43.642, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.198951867953607, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31022072658054345, "eval_python_code_alpaca_runtime": 10.0354, "eval_python_code_alpaca_samples_per_second": 49.823, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4504469738901408, "eval_python_code_alpaca_token_set_f1_sem": 0.005308590798637979, "eval_python_code_alpaca_token_set_precision": 0.49919039450371916, "eval_python_code_alpaca_token_set_recall": 0.4351724132169895, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 70000 }, { "epoch": 13.44, "eval_wikibio_accuracy": 0.31009375, "eval_wikibio_bleu_score": 5.706691967496832, "eval_wikibio_bleu_score_sem": 0.21379787769856498, "eval_wikibio_emb_cos_sim": 0.706870436668396, "eval_wikibio_emb_cos_sim_sem": 0.011782312186080373, "eval_wikibio_emb_top1_equal": 0.1171875, "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9185338020324707, "eval_wikibio_n_ngrams_match_1": 9.928, "eval_wikibio_n_ngrams_match_2": 3.302, "eval_wikibio_n_ngrams_match_3": 1.16, "eval_wikibio_num_pred_words": 37.016, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 50.32660189573987, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3437490115546855, "eval_wikibio_runtime": 11.5447, "eval_wikibio_samples_per_second": 43.31, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.31203896200058007, "eval_wikibio_token_set_f1_sem": 0.005555645412727883, "eval_wikibio_token_set_precision": 0.3215287833226996, "eval_wikibio_token_set_recall": 0.3168401352548879, "eval_wikibio_true_num_tokens": 61.1328125, "step": 70000 }, { "epoch": 13.44, "eval_nq_accuracy": 0.51159375, "eval_nq_bleu_score": 10.973708463687188, "eval_nq_bleu_score_sem": 0.4487555880509462, "eval_nq_emb_cos_sim": 0.8272305130958557, "eval_nq_emb_cos_sim_sem": 0.007815281558296475, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.295389175415039, "eval_nq_n_ngrams_match_1": 22.356, "eval_nq_n_ngrams_match_2": 7.938, "eval_nq_n_ngrams_match_3": 3.572, "eval_nq_num_pred_words": 48.956, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.928299110452118, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43195680494780875, "eval_nq_runtime": 10.5117, "eval_nq_samples_per_second": 47.566, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4467195710626159, "eval_nq_token_set_f1_sem": 0.004996189643011763, "eval_nq_token_set_precision": 0.402965767620914, "eval_nq_token_set_recall": 0.5103217045561563, "eval_nq_true_num_tokens": 64.0, "step": 70000 }, { "epoch": 13.44, "learning_rate": 0.001, "loss": 2.6702, "step": 70008 }, { "epoch": 13.44, "learning_rate": 0.001, "loss": 2.6688, "step": 70020 }, { "epoch": 13.45, "learning_rate": 0.001, "loss": 2.6746, "step": 70032 }, { "epoch": 13.45, "learning_rate": 0.001, "loss": 2.6556, "step": 70044 }, { "epoch": 13.45, "learning_rate": 0.001, "loss": 2.6615, "step": 70056 }, { "epoch": 13.45, "learning_rate": 0.001, "loss": 2.6685, "step": 70068 }, { "epoch": 13.46, "learning_rate": 0.001, "loss": 2.6616, "step": 70080 }, { "epoch": 13.46, "learning_rate": 0.001, "loss": 2.6663, "step": 70092 }, { "epoch": 13.46, "learning_rate": 0.001, "loss": 2.6654, "step": 70104 }, { "epoch": 13.46, "learning_rate": 0.001, "loss": 2.6641, "step": 70116 }, { "epoch": 13.47, "learning_rate": 0.001, "loss": 2.6651, "step": 70128 }, { "epoch": 13.47, "learning_rate": 0.001, "loss": 2.6752, "step": 70140 }, { "epoch": 13.47, "learning_rate": 0.001, "loss": 2.6638, "step": 70152 }, { "epoch": 13.47, "learning_rate": 0.001, "loss": 2.6674, "step": 70164 }, { "epoch": 13.47, "learning_rate": 0.001, "loss": 2.665, "step": 70176 }, { "epoch": 13.48, "learning_rate": 0.001, "loss": 2.6732, "step": 70188 }, { "epoch": 13.48, "learning_rate": 0.001, "loss": 2.6699, "step": 70200 }, { "epoch": 13.48, "learning_rate": 0.001, "loss": 2.6709, "step": 70212 }, { "epoch": 13.48, "learning_rate": 0.001, "loss": 2.6648, "step": 70224 }, { "epoch": 13.49, "learning_rate": 0.001, "loss": 2.6688, "step": 70236 }, { "epoch": 13.49, "learning_rate": 0.001, "loss": 2.6604, "step": 70248 }, { "epoch": 13.49, "learning_rate": 0.001, "loss": 2.6731, "step": 70260 }, { "epoch": 13.49, "learning_rate": 0.001, "loss": 2.6625, "step": 70272 }, { "epoch": 13.5, "learning_rate": 0.001, "loss": 2.6643, "step": 70284 }, { "epoch": 13.5, "learning_rate": 0.001, "loss": 2.6589, "step": 70296 }, { "epoch": 13.5, "learning_rate": 0.001, "loss": 2.6574, "step": 70308 }, { "epoch": 13.5, "learning_rate": 0.001, "loss": 2.6668, "step": 70320 }, { "epoch": 13.5, "learning_rate": 0.001, "loss": 2.6688, "step": 70332 }, { "epoch": 13.51, "learning_rate": 0.001, "loss": 2.6654, "step": 70344 }, { "epoch": 13.51, "learning_rate": 0.001, "loss": 2.6758, "step": 70356 }, { "epoch": 13.51, "learning_rate": 0.001, "loss": 2.6628, "step": 70368 }, { "epoch": 13.51, "learning_rate": 0.001, "loss": 2.6644, "step": 70380 }, { "epoch": 13.52, "learning_rate": 0.001, "loss": 2.6664, "step": 70392 }, { "epoch": 13.52, "learning_rate": 0.001, "loss": 2.6609, "step": 70404 }, { "epoch": 13.52, "learning_rate": 0.001, "loss": 2.6575, "step": 70416 }, { "epoch": 13.52, "learning_rate": 0.001, "loss": 2.6786, "step": 70428 }, { "epoch": 13.53, "learning_rate": 0.001, "loss": 2.6648, "step": 70440 }, { "epoch": 13.53, "learning_rate": 0.001, "loss": 2.6582, "step": 70452 }, { "epoch": 13.53, "learning_rate": 0.001, "loss": 2.6648, "step": 70464 }, { "epoch": 13.53, "learning_rate": 0.001, "loss": 2.6545, "step": 70476 }, { "epoch": 13.53, "learning_rate": 0.001, "loss": 2.6709, "step": 70488 }, { "epoch": 13.54, "learning_rate": 0.001, "loss": 2.6563, "step": 70500 }, { "epoch": 13.54, "learning_rate": 0.001, "loss": 2.6644, "step": 70512 }, { "epoch": 13.54, "learning_rate": 0.001, "loss": 2.6627, "step": 70524 }, { "epoch": 13.54, "learning_rate": 0.001, "loss": 2.6724, "step": 70536 }, { "epoch": 13.55, "learning_rate": 0.001, "loss": 2.6639, "step": 70548 }, { "epoch": 13.55, "learning_rate": 0.001, "loss": 2.6661, "step": 70560 }, { "epoch": 13.55, "learning_rate": 0.001, "loss": 2.6663, "step": 70572 }, { "epoch": 13.55, "learning_rate": 0.001, "loss": 2.6606, "step": 70584 }, { "epoch": 13.56, "learning_rate": 0.001, "loss": 2.6688, "step": 70596 }, { "epoch": 13.56, "learning_rate": 0.001, "loss": 2.6743, "step": 70608 }, { "epoch": 13.56, "learning_rate": 0.001, "loss": 2.6646, "step": 70620 }, { "epoch": 13.56, "eval_ag_news_accuracy": 0.311125, "eval_ag_news_bleu_score": 4.603646543310611, "eval_ag_news_bleu_score_sem": 0.1554574416001967, "eval_ag_news_emb_cos_sim": 0.7936511039733887, "eval_ag_news_emb_cos_sim_sem": 0.007636620408914288, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6890857219696045, "eval_ag_news_n_ngrams_match_1": 13.344, "eval_ag_news_n_ngrams_match_2": 2.88, "eval_ag_news_n_ngrams_match_3": 0.83, "eval_ag_news_num_pred_words": 46.178, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 40.008251565213804, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3304506415259759, "eval_ag_news_runtime": 9.7421, "eval_ag_news_samples_per_second": 51.323, "eval_ag_news_steps_per_second": 0.103, "eval_ag_news_token_set_f1": 0.3346660807683896, "eval_ag_news_token_set_f1_sem": 0.004521146964380114, "eval_ag_news_token_set_precision": 0.31481641540733385, "eval_ag_news_token_set_recall": 0.37520403392083945, "eval_ag_news_true_num_tokens": 56.09375, "step": 70625 }, { "epoch": 13.56, "eval_anthropic_toxic_prompts_accuracy": 0.1094375, "eval_anthropic_toxic_prompts_bleu_score": 2.95346093583356, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11451633694222511, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6503604650497437, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008357328225728036, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3711435794830322, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.978, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.766, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648, "eval_anthropic_toxic_prompts_num_pred_words": 47.566, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 29.111799686058593, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20569145427292096, "eval_anthropic_toxic_prompts_runtime": 9.373, "eval_anthropic_toxic_prompts_samples_per_second": 53.345, "eval_anthropic_toxic_prompts_steps_per_second": 0.107, "eval_anthropic_toxic_prompts_token_set_f1": 0.3484633256637001, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006319512916057749, "eval_anthropic_toxic_prompts_token_set_precision": 0.420448466137835, "eval_anthropic_toxic_prompts_token_set_recall": 0.3235423783033733, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 70625 }, { "epoch": 13.56, "eval_arxiv_accuracy": 0.335, "eval_arxiv_bleu_score": 4.043129903314623, "eval_arxiv_bleu_score_sem": 0.11305360510649545, "eval_arxiv_emb_cos_sim": 0.7320790886878967, "eval_arxiv_emb_cos_sim_sem": 0.007941114925792797, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.544421911239624, "eval_arxiv_n_ngrams_match_1": 14.282, "eval_arxiv_n_ngrams_match_2": 2.666, "eval_arxiv_n_ngrams_match_3": 0.572, "eval_arxiv_num_pred_words": 40.854, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.619666316445944, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33588146033544647, "eval_arxiv_runtime": 9.9187, "eval_arxiv_samples_per_second": 50.41, "eval_arxiv_steps_per_second": 0.101, "eval_arxiv_token_set_f1": 0.33192611120256144, "eval_arxiv_token_set_f1_sem": 0.00402073157478049, "eval_arxiv_token_set_precision": 0.28117471575818076, "eval_arxiv_token_set_recall": 0.4201901092794416, "eval_arxiv_true_num_tokens": 64.0, "step": 70625 }, { "epoch": 13.56, "eval_python_code_alpaca_accuracy": 0.15175, "eval_python_code_alpaca_bleu_score": 3.978865267523391, "eval_python_code_alpaca_bleu_score_sem": 0.12158400088243494, "eval_python_code_alpaca_emb_cos_sim": 0.7289148569107056, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009450559524183924, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0616848468780518, "eval_python_code_alpaca_n_ngrams_match_1": 8.994, "eval_python_code_alpaca_n_ngrams_match_2": 2.426, "eval_python_code_alpaca_n_ngrams_match_3": 0.69, "eval_python_code_alpaca_num_pred_words": 42.596, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 21.363521118500106, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30567471782513955, "eval_python_code_alpaca_runtime": 10.6029, "eval_python_code_alpaca_samples_per_second": 47.157, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.4440848260978374, "eval_python_code_alpaca_token_set_f1_sem": 0.005813810310104646, "eval_python_code_alpaca_token_set_precision": 0.48757103106215893, "eval_python_code_alpaca_token_set_recall": 0.4310197499467643, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 70625 }, { "epoch": 13.56, "eval_wikibio_accuracy": 0.30753125, "eval_wikibio_bleu_score": 5.573081484533731, "eval_wikibio_bleu_score_sem": 0.20239320777496794, "eval_wikibio_emb_cos_sim": 0.7133356332778931, "eval_wikibio_emb_cos_sim_sem": 0.011062724553474705, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.9174001216888428, "eval_wikibio_n_ngrams_match_1": 9.838, "eval_wikibio_n_ngrams_match_2": 3.182, "eval_wikibio_n_ngrams_match_3": 1.108, "eval_wikibio_num_pred_words": 36.462, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 50.269579944848715, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3430982805601802, "eval_wikibio_runtime": 9.4774, "eval_wikibio_samples_per_second": 52.757, "eval_wikibio_steps_per_second": 0.106, "eval_wikibio_token_set_f1": 0.3104528276888094, "eval_wikibio_token_set_f1_sem": 0.005324313675163187, "eval_wikibio_token_set_precision": 0.3172004388108864, "eval_wikibio_token_set_recall": 0.32004895169764813, "eval_wikibio_true_num_tokens": 61.1328125, "step": 70625 }, { "epoch": 13.56, "eval_nq_accuracy": 0.5111875, "eval_nq_bleu_score": 10.94347444405283, "eval_nq_bleu_score_sem": 0.45145234848873483, "eval_nq_emb_cos_sim": 0.8182129263877869, "eval_nq_emb_cos_sim_sem": 0.007668962846790335, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.3012492656707764, "eval_nq_n_ngrams_match_1": 22.3, "eval_nq_n_ngrams_match_2": 8.006, "eval_nq_n_ngrams_match_3": 3.596, "eval_nq_num_pred_words": 49.174, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.986650644968993, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4297293570062266, "eval_nq_runtime": 10.5276, "eval_nq_samples_per_second": 47.494, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.44614669768141174, "eval_nq_token_set_f1_sem": 0.004936122819480593, "eval_nq_token_set_precision": 0.40120142831249606, "eval_nq_token_set_recall": 0.5127012666681858, "eval_nq_true_num_tokens": 64.0, "step": 70625 }, { "epoch": 13.56, "learning_rate": 0.001, "loss": 2.6639, "step": 70632 }, { "epoch": 13.56, "learning_rate": 0.001, "loss": 2.6641, "step": 70644 }, { "epoch": 13.57, "learning_rate": 0.001, "loss": 2.6658, "step": 70656 }, { "epoch": 13.57, "learning_rate": 0.001, "loss": 2.6754, "step": 70668 }, { "epoch": 13.57, "learning_rate": 0.001, "loss": 2.6554, "step": 70680 }, { "epoch": 13.57, "learning_rate": 0.001, "loss": 2.6688, "step": 70692 }, { "epoch": 13.58, "learning_rate": 0.001, "loss": 2.6808, "step": 70704 }, { "epoch": 13.58, "learning_rate": 0.001, "loss": 2.6637, "step": 70716 }, { "epoch": 13.58, "learning_rate": 0.001, "loss": 2.6631, "step": 70728 }, { "epoch": 13.58, "learning_rate": 0.001, "loss": 2.6617, "step": 70740 }, { "epoch": 13.59, "learning_rate": 0.001, "loss": 2.6536, "step": 70752 }, { "epoch": 13.59, "learning_rate": 0.001, "loss": 2.6718, "step": 70764 }, { "epoch": 13.59, "learning_rate": 0.001, "loss": 2.6616, "step": 70776 }, { "epoch": 13.59, "learning_rate": 0.001, "loss": 2.6629, "step": 70788 }, { "epoch": 13.59, "learning_rate": 0.001, "loss": 2.6657, "step": 70800 }, { "epoch": 13.6, "learning_rate": 0.001, "loss": 2.6713, "step": 70812 }, { "epoch": 13.6, "learning_rate": 0.001, "loss": 2.6799, "step": 70824 }, { "epoch": 13.6, "learning_rate": 0.001, "loss": 2.6655, "step": 70836 }, { "epoch": 13.6, "learning_rate": 0.001, "loss": 2.6658, "step": 70848 }, { "epoch": 13.61, "learning_rate": 0.001, "loss": 2.6802, "step": 70860 }, { "epoch": 13.61, "learning_rate": 0.001, "loss": 2.6671, "step": 70872 }, { "epoch": 13.61, "learning_rate": 0.001, "loss": 2.6602, "step": 70884 }, { "epoch": 13.61, "learning_rate": 0.001, "loss": 2.6693, "step": 70896 }, { "epoch": 13.62, "learning_rate": 0.001, "loss": 2.6638, "step": 70908 }, { "epoch": 13.62, "learning_rate": 0.001, "loss": 2.6678, "step": 70920 }, { "epoch": 13.62, "learning_rate": 0.001, "loss": 2.6677, "step": 70932 }, { "epoch": 13.62, "learning_rate": 0.001, "loss": 2.6596, "step": 70944 }, { "epoch": 13.62, "learning_rate": 0.001, "loss": 2.6573, "step": 70956 }, { "epoch": 13.63, "learning_rate": 0.001, "loss": 2.6532, "step": 70968 }, { "epoch": 13.63, "learning_rate": 0.001, "loss": 2.6716, "step": 70980 }, { "epoch": 13.63, "learning_rate": 0.001, "loss": 2.6692, "step": 70992 }, { "epoch": 13.63, "learning_rate": 0.001, "loss": 2.6733, "step": 71004 }, { "epoch": 13.64, "learning_rate": 0.001, "loss": 2.6586, "step": 71016 }, { "epoch": 13.64, "learning_rate": 0.001, "loss": 2.6654, "step": 71028 }, { "epoch": 13.64, "learning_rate": 0.001, "loss": 2.6688, "step": 71040 }, { "epoch": 13.64, "learning_rate": 0.001, "loss": 2.6533, "step": 71052 }, { "epoch": 13.65, "learning_rate": 0.001, "loss": 2.6543, "step": 71064 }, { "epoch": 13.65, "learning_rate": 0.001, "loss": 2.6695, "step": 71076 }, { "epoch": 13.65, "learning_rate": 0.001, "loss": 2.672, "step": 71088 }, { "epoch": 13.65, "learning_rate": 0.001, "loss": 2.6645, "step": 71100 }, { "epoch": 13.65, "learning_rate": 0.001, "loss": 2.6653, "step": 71112 }, { "epoch": 13.66, "learning_rate": 0.001, "loss": 2.6723, "step": 71124 }, { "epoch": 13.66, "learning_rate": 0.001, "loss": 2.6645, "step": 71136 }, { "epoch": 13.66, "learning_rate": 0.001, "loss": 2.6656, "step": 71148 }, { "epoch": 13.66, "learning_rate": 0.001, "loss": 2.6629, "step": 71160 }, { "epoch": 13.67, "learning_rate": 0.001, "loss": 2.6736, "step": 71172 }, { "epoch": 13.67, "learning_rate": 0.001, "loss": 2.6761, "step": 71184 }, { "epoch": 13.67, "learning_rate": 0.001, "loss": 2.6577, "step": 71196 }, { "epoch": 13.67, "learning_rate": 0.001, "loss": 2.6663, "step": 71208 }, { "epoch": 13.68, "learning_rate": 0.001, "loss": 2.6711, "step": 71220 }, { "epoch": 13.68, "learning_rate": 0.001, "loss": 2.6666, "step": 71232 }, { "epoch": 13.68, "learning_rate": 0.001, "loss": 2.6693, "step": 71244 }, { "epoch": 13.68, "eval_ag_news_accuracy": 0.31021875, "eval_ag_news_bleu_score": 4.667875158309431, "eval_ag_news_bleu_score_sem": 0.16041649619612317, "eval_ag_news_emb_cos_sim": 0.7917337417602539, "eval_ag_news_emb_cos_sim_sem": 0.007676240422148053, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6827380657196045, "eval_ag_news_n_ngrams_match_1": 13.356, "eval_ag_news_n_ngrams_match_2": 2.856, "eval_ag_news_n_ngrams_match_3": 0.838, "eval_ag_news_num_pred_words": 46.732, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.75509725540556, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.32941069343876306, "eval_ag_news_runtime": 14.7825, "eval_ag_news_samples_per_second": 33.824, "eval_ag_news_steps_per_second": 0.068, "eval_ag_news_token_set_f1": 0.3359234164658063, "eval_ag_news_token_set_f1_sem": 0.0044045344092549345, "eval_ag_news_token_set_precision": 0.31729274051631484, "eval_ag_news_token_set_recall": 0.37187330534113067, "eval_ag_news_true_num_tokens": 56.09375, "step": 71250 }, { "epoch": 13.68, "eval_anthropic_toxic_prompts_accuracy": 0.1105625, "eval_anthropic_toxic_prompts_bleu_score": 2.952286324413179, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11558982129429374, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6516097784042358, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009899142920864546, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.357330560684204, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.848, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.768, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.652, "eval_anthropic_toxic_prompts_num_pred_words": 47.346, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.71244236456615, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20258695510547492, "eval_anthropic_toxic_prompts_runtime": 9.569, "eval_anthropic_toxic_prompts_samples_per_second": 52.252, "eval_anthropic_toxic_prompts_steps_per_second": 0.105, "eval_anthropic_toxic_prompts_token_set_f1": 0.34489874424172334, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006526369755584896, "eval_anthropic_toxic_prompts_token_set_precision": 0.4115063487217594, "eval_anthropic_toxic_prompts_token_set_recall": 0.3264698538390632, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 71250 }, { "epoch": 13.68, "eval_arxiv_accuracy": 0.3369375, "eval_arxiv_bleu_score": 4.021699182832508, "eval_arxiv_bleu_score_sem": 0.11346164989068308, "eval_arxiv_emb_cos_sim": 0.7389757633209229, "eval_arxiv_emb_cos_sim_sem": 0.007385155847085595, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5371644496917725, "eval_arxiv_n_ngrams_match_1": 14.578, "eval_arxiv_n_ngrams_match_2": 2.77, "eval_arxiv_n_ngrams_match_3": 0.564, "eval_arxiv_num_pred_words": 41.632, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.36932493961764, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3420320790702561, "eval_arxiv_runtime": 9.6675, "eval_arxiv_samples_per_second": 51.72, "eval_arxiv_steps_per_second": 0.103, "eval_arxiv_token_set_f1": 0.3372291777879464, "eval_arxiv_token_set_f1_sem": 0.004324223442877177, "eval_arxiv_token_set_precision": 0.28780419173800104, "eval_arxiv_token_set_recall": 0.42224308944809263, "eval_arxiv_true_num_tokens": 64.0, "step": 71250 }, { "epoch": 13.68, "eval_python_code_alpaca_accuracy": 0.15490625, "eval_python_code_alpaca_bleu_score": 4.105479368780234, "eval_python_code_alpaca_bleu_score_sem": 0.1397634669415215, "eval_python_code_alpaca_emb_cos_sim": 0.7371370792388916, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009321790365271016, "eval_python_code_alpaca_emb_top1_equal": 0.078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.011915445327759, "eval_python_code_alpaca_n_ngrams_match_1": 9.184, "eval_python_code_alpaca_n_ngrams_match_2": 2.566, "eval_python_code_alpaca_n_ngrams_match_3": 0.778, "eval_python_code_alpaca_num_pred_words": 43.342, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.326296570946823, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30698513107405345, "eval_python_code_alpaca_runtime": 19.6582, "eval_python_code_alpaca_samples_per_second": 25.435, "eval_python_code_alpaca_steps_per_second": 0.051, "eval_python_code_alpaca_token_set_f1": 0.4524428336990582, "eval_python_code_alpaca_token_set_f1_sem": 0.005853690275783923, "eval_python_code_alpaca_token_set_precision": 0.4975520347473566, "eval_python_code_alpaca_token_set_recall": 0.4353982240933636, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 71250 }, { "epoch": 13.68, "eval_wikibio_accuracy": 0.3101875, "eval_wikibio_bleu_score": 5.552077654959087, "eval_wikibio_bleu_score_sem": 0.20290481390862938, "eval_wikibio_emb_cos_sim": 0.7196710109710693, "eval_wikibio_emb_cos_sim_sem": 0.010501207359120126, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8840525150299072, "eval_wikibio_n_ngrams_match_1": 9.65, "eval_wikibio_n_ngrams_match_2": 3.116, "eval_wikibio_n_ngrams_match_3": 1.114, "eval_wikibio_num_pred_words": 35.86, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 48.620853100904924, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3353852515763316, "eval_wikibio_runtime": 10.0876, "eval_wikibio_samples_per_second": 49.566, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.30591112482158145, "eval_wikibio_token_set_f1_sem": 0.005615498855016447, "eval_wikibio_token_set_precision": 0.3130418628646202, "eval_wikibio_token_set_recall": 0.3176680076579202, "eval_wikibio_true_num_tokens": 61.1328125, "step": 71250 }, { "epoch": 13.68, "eval_nq_accuracy": 0.5136875, "eval_nq_bleu_score": 11.090034028053548, "eval_nq_bleu_score_sem": 0.4577365293142572, "eval_nq_emb_cos_sim": 0.827858567237854, "eval_nq_emb_cos_sim_sem": 0.007347543077512162, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.293893575668335, "eval_nq_n_ngrams_match_1": 22.402, "eval_nq_n_ngrams_match_2": 8.046, "eval_nq_n_ngrams_match_3": 3.664, "eval_nq_num_pred_words": 49.216, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.913461447185798, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43262336671948687, "eval_nq_runtime": 11.1904, "eval_nq_samples_per_second": 44.681, "eval_nq_steps_per_second": 0.089, "eval_nq_token_set_f1": 0.4496634724741236, "eval_nq_token_set_f1_sem": 0.004967090223294023, "eval_nq_token_set_precision": 0.4048731003537667, "eval_nq_token_set_recall": 0.5146779370454185, "eval_nq_true_num_tokens": 64.0, "step": 71250 }, { "epoch": 13.68, "learning_rate": 0.001, "loss": 2.6632, "step": 71256 }, { "epoch": 13.68, "learning_rate": 0.001, "loss": 2.6554, "step": 71268 }, { "epoch": 13.69, "learning_rate": 0.001, "loss": 2.6589, "step": 71280 }, { "epoch": 13.69, "learning_rate": 0.001, "loss": 2.6609, "step": 71292 }, { "epoch": 13.69, "learning_rate": 0.001, "loss": 2.6629, "step": 71304 }, { "epoch": 13.69, "learning_rate": 0.001, "loss": 2.6707, "step": 71316 }, { "epoch": 13.7, "learning_rate": 0.001, "loss": 2.6655, "step": 71328 }, { "epoch": 13.7, "learning_rate": 0.001, "loss": 2.6645, "step": 71340 }, { "epoch": 13.7, "learning_rate": 0.001, "loss": 2.6617, "step": 71352 }, { "epoch": 13.7, "learning_rate": 0.001, "loss": 2.6654, "step": 71364 }, { "epoch": 13.71, "learning_rate": 0.001, "loss": 2.6634, "step": 71376 }, { "epoch": 13.71, "learning_rate": 0.001, "loss": 2.6649, "step": 71388 }, { "epoch": 13.71, "learning_rate": 0.001, "loss": 2.6628, "step": 71400 }, { "epoch": 13.71, "learning_rate": 0.001, "loss": 2.6589, "step": 71412 }, { "epoch": 13.71, "learning_rate": 0.001, "loss": 2.6611, "step": 71424 }, { "epoch": 13.72, "learning_rate": 0.001, "loss": 2.6718, "step": 71436 }, { "epoch": 13.72, "learning_rate": 0.001, "loss": 2.665, "step": 71448 }, { "epoch": 13.72, "learning_rate": 0.001, "loss": 2.6679, "step": 71460 }, { "epoch": 13.72, "learning_rate": 0.001, "loss": 2.6719, "step": 71472 }, { "epoch": 13.73, "learning_rate": 0.001, "loss": 2.6593, "step": 71484 }, { "epoch": 13.73, "learning_rate": 0.001, "loss": 2.6693, "step": 71496 }, { "epoch": 13.73, "learning_rate": 0.001, "loss": 2.6518, "step": 71508 }, { "epoch": 13.73, "learning_rate": 0.001, "loss": 2.6698, "step": 71520 }, { "epoch": 13.74, "learning_rate": 0.001, "loss": 2.6589, "step": 71532 }, { "epoch": 13.74, "learning_rate": 0.001, "loss": 2.6649, "step": 71544 }, { "epoch": 13.74, "learning_rate": 0.001, "loss": 2.6646, "step": 71556 }, { "epoch": 13.74, "learning_rate": 0.001, "loss": 2.665, "step": 71568 }, { "epoch": 13.74, "learning_rate": 0.001, "loss": 2.6629, "step": 71580 }, { "epoch": 13.75, "learning_rate": 0.001, "loss": 2.6687, "step": 71592 }, { "epoch": 13.75, "learning_rate": 0.001, "loss": 2.6657, "step": 71604 }, { "epoch": 13.75, "learning_rate": 0.001, "loss": 2.6561, "step": 71616 }, { "epoch": 13.75, "learning_rate": 0.001, "loss": 2.661, "step": 71628 }, { "epoch": 13.76, "learning_rate": 0.001, "loss": 2.6707, "step": 71640 }, { "epoch": 13.76, "learning_rate": 0.001, "loss": 2.661, "step": 71652 }, { "epoch": 13.76, "learning_rate": 0.001, "loss": 2.6625, "step": 71664 }, { "epoch": 13.76, "learning_rate": 0.001, "loss": 2.6744, "step": 71676 }, { "epoch": 13.76, "learning_rate": 0.001, "loss": 2.6708, "step": 71688 }, { "epoch": 13.77, "learning_rate": 0.001, "loss": 2.658, "step": 71700 }, { "epoch": 13.77, "learning_rate": 0.001, "loss": 2.6655, "step": 71712 }, { "epoch": 13.77, "learning_rate": 0.001, "loss": 2.6591, "step": 71724 }, { "epoch": 13.77, "learning_rate": 0.001, "loss": 2.6566, "step": 71736 }, { "epoch": 13.78, "learning_rate": 0.001, "loss": 2.6615, "step": 71748 }, { "epoch": 13.78, "learning_rate": 0.001, "loss": 2.6641, "step": 71760 }, { "epoch": 13.78, "learning_rate": 0.001, "loss": 2.6701, "step": 71772 }, { "epoch": 13.78, "learning_rate": 0.001, "loss": 2.6658, "step": 71784 }, { "epoch": 13.79, "learning_rate": 0.001, "loss": 2.6607, "step": 71796 }, { "epoch": 13.79, "learning_rate": 0.001, "loss": 2.6566, "step": 71808 }, { "epoch": 13.79, "learning_rate": 0.001, "loss": 2.6662, "step": 71820 }, { "epoch": 13.79, "learning_rate": 0.001, "loss": 2.6689, "step": 71832 }, { "epoch": 13.79, "learning_rate": 0.001, "loss": 2.6707, "step": 71844 }, { "epoch": 13.8, "learning_rate": 0.001, "loss": 2.6592, "step": 71856 }, { "epoch": 13.8, "learning_rate": 0.001, "loss": 2.6658, "step": 71868 }, { "epoch": 13.8, "eval_ag_news_accuracy": 0.31121875, "eval_ag_news_bleu_score": 4.560318739593933, "eval_ag_news_bleu_score_sem": 0.1493677755464744, "eval_ag_news_emb_cos_sim": 0.7990758419036865, "eval_ag_news_emb_cos_sim_sem": 0.00687573610875753, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6722798347473145, "eval_ag_news_n_ngrams_match_1": 13.344, "eval_ag_news_n_ngrams_match_2": 2.886, "eval_ag_news_n_ngrams_match_3": 0.828, "eval_ag_news_num_pred_words": 46.784, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.34149580551345, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3299312068651786, "eval_ag_news_runtime": 15.38, "eval_ag_news_samples_per_second": 32.51, "eval_ag_news_steps_per_second": 0.065, "eval_ag_news_token_set_f1": 0.3333838736347365, "eval_ag_news_token_set_f1_sem": 0.004457908956553333, "eval_ag_news_token_set_precision": 0.3173950521645402, "eval_ag_news_token_set_recall": 0.3653645208821789, "eval_ag_news_true_num_tokens": 56.09375, "step": 71875 }, { "epoch": 13.8, "eval_anthropic_toxic_prompts_accuracy": 0.11028125, "eval_anthropic_toxic_prompts_bleu_score": 2.904053292432784, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11569520381222666, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6541334390640259, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009347251671583151, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3467636108398438, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.962, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.742, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.618, "eval_anthropic_toxic_prompts_num_pred_words": 47.158, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.410636816541246, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20639918789805434, "eval_anthropic_toxic_prompts_runtime": 10.2305, "eval_anthropic_toxic_prompts_samples_per_second": 48.873, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.34726239836305783, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006441835381410861, "eval_anthropic_toxic_prompts_token_set_precision": 0.4216924323209074, "eval_anthropic_toxic_prompts_token_set_recall": 0.3250254926472682, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 71875 }, { "epoch": 13.8, "eval_arxiv_accuracy": 0.33675, "eval_arxiv_bleu_score": 3.9815023022627165, "eval_arxiv_bleu_score_sem": 0.10948450822139191, "eval_arxiv_emb_cos_sim": 0.7407854795455933, "eval_arxiv_emb_cos_sim_sem": 0.008157875509139817, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5466015338897705, "eval_arxiv_n_ngrams_match_1": 14.204, "eval_arxiv_n_ngrams_match_2": 2.732, "eval_arxiv_n_ngrams_match_3": 0.566, "eval_arxiv_num_pred_words": 39.626, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.69520641984392, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34001852498102725, "eval_arxiv_runtime": 9.8907, "eval_arxiv_samples_per_second": 50.552, "eval_arxiv_steps_per_second": 0.101, "eval_arxiv_token_set_f1": 0.33070976303227806, "eval_arxiv_token_set_f1_sem": 0.004240608477417651, "eval_arxiv_token_set_precision": 0.28113501368089044, "eval_arxiv_token_set_recall": 0.4223324535662771, "eval_arxiv_true_num_tokens": 64.0, "step": 71875 }, { "epoch": 13.8, "eval_python_code_alpaca_accuracy": 0.15390625, "eval_python_code_alpaca_bleu_score": 4.2379437579218004, "eval_python_code_alpaca_bleu_score_sem": 0.14047488021794002, "eval_python_code_alpaca_emb_cos_sim": 0.7371129393577576, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009346693225840383, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0274159908294678, "eval_python_code_alpaca_n_ngrams_match_1": 9.39, "eval_python_code_alpaca_n_ngrams_match_2": 2.604, "eval_python_code_alpaca_n_ngrams_match_3": 0.828, "eval_python_code_alpaca_num_pred_words": 43.392, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.643819789875995, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3134217692089786, "eval_python_code_alpaca_runtime": 14.3324, "eval_python_code_alpaca_samples_per_second": 34.886, "eval_python_code_alpaca_steps_per_second": 0.07, "eval_python_code_alpaca_token_set_f1": 0.4600845392528296, "eval_python_code_alpaca_token_set_f1_sem": 0.00555175677990806, "eval_python_code_alpaca_token_set_precision": 0.5115891628810105, "eval_python_code_alpaca_token_set_recall": 0.4406083843291174, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 71875 }, { "epoch": 13.8, "eval_wikibio_accuracy": 0.31090625, "eval_wikibio_bleu_score": 5.59419985816136, "eval_wikibio_bleu_score_sem": 0.21470473991171027, "eval_wikibio_emb_cos_sim": 0.7257965803146362, "eval_wikibio_emb_cos_sim_sem": 0.009295922078452947, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.932488441467285, "eval_wikibio_n_ngrams_match_1": 10.064, "eval_wikibio_n_ngrams_match_2": 3.238, "eval_wikibio_n_ngrams_match_3": 1.134, "eval_wikibio_num_pred_words": 36.936, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 51.033814450909844, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.344047218691074, "eval_wikibio_runtime": 10.9745, "eval_wikibio_samples_per_second": 45.56, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.3124186337809242, "eval_wikibio_token_set_f1_sem": 0.005443322417982122, "eval_wikibio_token_set_precision": 0.3231864854188542, "eval_wikibio_token_set_recall": 0.3182054275784626, "eval_wikibio_true_num_tokens": 61.1328125, "step": 71875 }, { "epoch": 13.8, "eval_nq_accuracy": 0.5125, "eval_nq_bleu_score": 10.758493401659862, "eval_nq_bleu_score_sem": 0.45410807397169894, "eval_nq_emb_cos_sim": 0.8201106786727905, "eval_nq_emb_cos_sim_sem": 0.00801139611277373, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.293391227722168, "eval_nq_n_ngrams_match_1": 22.434, "eval_nq_n_ngrams_match_2": 7.92, "eval_nq_n_ngrams_match_3": 3.536, "eval_nq_num_pred_words": 49.216, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.908482690827114, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43085284279987485, "eval_nq_runtime": 10.6388, "eval_nq_samples_per_second": 46.998, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.4481553596754238, "eval_nq_token_set_f1_sem": 0.004928203830851493, "eval_nq_token_set_precision": 0.40346722419867526, "eval_nq_token_set_recall": 0.5136582525628065, "eval_nq_true_num_tokens": 64.0, "step": 71875 }, { "epoch": 13.8, "learning_rate": 0.001, "loss": 2.6571, "step": 71880 }, { "epoch": 13.8, "learning_rate": 0.001, "loss": 2.6649, "step": 71892 }, { "epoch": 13.81, "learning_rate": 0.001, "loss": 2.6646, "step": 71904 }, { "epoch": 13.81, "learning_rate": 0.001, "loss": 2.661, "step": 71916 }, { "epoch": 13.81, "learning_rate": 0.001, "loss": 2.6683, "step": 71928 }, { "epoch": 13.81, "learning_rate": 0.001, "loss": 2.6695, "step": 71940 }, { "epoch": 13.82, "learning_rate": 0.001, "loss": 2.6591, "step": 71952 }, { "epoch": 13.82, "learning_rate": 0.001, "loss": 2.6668, "step": 71964 }, { "epoch": 13.82, "learning_rate": 0.001, "loss": 2.6592, "step": 71976 }, { "epoch": 13.82, "learning_rate": 0.001, "loss": 2.6617, "step": 71988 }, { "epoch": 13.82, "learning_rate": 0.001, "loss": 2.6662, "step": 72000 }, { "epoch": 13.83, "learning_rate": 0.001, "loss": 2.6636, "step": 72012 }, { "epoch": 13.83, "learning_rate": 0.001, "loss": 2.6616, "step": 72024 }, { "epoch": 13.83, "learning_rate": 0.001, "loss": 2.6698, "step": 72036 }, { "epoch": 13.83, "learning_rate": 0.001, "loss": 2.6635, "step": 72048 }, { "epoch": 13.84, "learning_rate": 0.001, "loss": 2.6637, "step": 72060 }, { "epoch": 13.84, "learning_rate": 0.001, "loss": 2.6611, "step": 72072 }, { "epoch": 13.84, "learning_rate": 0.001, "loss": 2.6575, "step": 72084 }, { "epoch": 13.84, "learning_rate": 0.001, "loss": 2.668, "step": 72096 }, { "epoch": 13.85, "learning_rate": 0.001, "loss": 2.6594, "step": 72108 }, { "epoch": 13.85, "learning_rate": 0.001, "loss": 2.6617, "step": 72120 }, { "epoch": 13.85, "learning_rate": 0.001, "loss": 2.6596, "step": 72132 }, { "epoch": 13.85, "learning_rate": 0.001, "loss": 2.6554, "step": 72144 }, { "epoch": 13.85, "learning_rate": 0.001, "loss": 2.6572, "step": 72156 }, { "epoch": 13.86, "learning_rate": 0.001, "loss": 2.6648, "step": 72168 }, { "epoch": 13.86, "learning_rate": 0.001, "loss": 2.6662, "step": 72180 }, { "epoch": 13.86, "learning_rate": 0.001, "loss": 2.6582, "step": 72192 }, { "epoch": 13.86, "learning_rate": 0.001, "loss": 2.6643, "step": 72204 }, { "epoch": 13.87, "learning_rate": 0.001, "loss": 2.6635, "step": 72216 }, { "epoch": 13.87, "learning_rate": 0.001, "loss": 2.6714, "step": 72228 }, { "epoch": 13.87, "learning_rate": 0.001, "loss": 2.6551, "step": 72240 }, { "epoch": 13.87, "learning_rate": 0.001, "loss": 2.6657, "step": 72252 }, { "epoch": 13.88, "learning_rate": 0.001, "loss": 2.6672, "step": 72264 }, { "epoch": 13.88, "learning_rate": 0.001, "loss": 2.6635, "step": 72276 }, { "epoch": 13.88, "learning_rate": 0.001, "loss": 2.6541, "step": 72288 }, { "epoch": 13.88, "learning_rate": 0.001, "loss": 2.6669, "step": 72300 }, { "epoch": 13.88, "learning_rate": 0.001, "loss": 2.6763, "step": 72312 }, { "epoch": 13.89, "learning_rate": 0.001, "loss": 2.6697, "step": 72324 }, { "epoch": 13.89, "learning_rate": 0.001, "loss": 2.6648, "step": 72336 }, { "epoch": 13.89, "learning_rate": 0.001, "loss": 2.6653, "step": 72348 }, { "epoch": 13.89, "learning_rate": 0.001, "loss": 2.6721, "step": 72360 }, { "epoch": 13.9, "learning_rate": 0.001, "loss": 2.6648, "step": 72372 }, { "epoch": 13.9, "learning_rate": 0.001, "loss": 2.6573, "step": 72384 }, { "epoch": 13.9, "learning_rate": 0.001, "loss": 2.6602, "step": 72396 }, { "epoch": 13.9, "learning_rate": 0.001, "loss": 2.6713, "step": 72408 }, { "epoch": 13.91, "learning_rate": 0.001, "loss": 2.6683, "step": 72420 }, { "epoch": 13.91, "learning_rate": 0.001, "loss": 2.6581, "step": 72432 }, { "epoch": 13.91, "learning_rate": 0.001, "loss": 2.6629, "step": 72444 }, { "epoch": 13.91, "learning_rate": 0.001, "loss": 2.6677, "step": 72456 }, { "epoch": 13.91, "learning_rate": 0.001, "loss": 2.665, "step": 72468 }, { "epoch": 13.92, "learning_rate": 0.001, "loss": 2.6552, "step": 72480 }, { "epoch": 13.92, "learning_rate": 0.001, "loss": 2.6689, "step": 72492 }, { "epoch": 13.92, "eval_ag_news_accuracy": 0.31259375, "eval_ag_news_bleu_score": 4.667010394604709, "eval_ag_news_bleu_score_sem": 0.15220312413165804, "eval_ag_news_emb_cos_sim": 0.7891812324523926, "eval_ag_news_emb_cos_sim_sem": 0.008268829063478271, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6626858711242676, "eval_ag_news_n_ngrams_match_1": 13.458, "eval_ag_news_n_ngrams_match_2": 2.954, "eval_ag_news_n_ngrams_match_3": 0.844, "eval_ag_news_num_pred_words": 46.432, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.96585972657138, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3306616044189356, "eval_ag_news_runtime": 9.7923, "eval_ag_news_samples_per_second": 51.06, "eval_ag_news_steps_per_second": 0.102, "eval_ag_news_token_set_f1": 0.3407866619983471, "eval_ag_news_token_set_f1_sem": 0.00461370432169028, "eval_ag_news_token_set_precision": 0.31932943071810443, "eval_ag_news_token_set_recall": 0.38152919819743547, "eval_ag_news_true_num_tokens": 56.09375, "step": 72500 }, { "epoch": 13.92, "eval_anthropic_toxic_prompts_accuracy": 0.11, "eval_anthropic_toxic_prompts_bleu_score": 2.8838790061942934, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11380088685718076, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6470285058021545, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01016822947518235, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.360210418701172, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.838, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.712, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.604, "eval_anthropic_toxic_prompts_num_pred_words": 47.356, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.79524930077552, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20008081828547014, "eval_anthropic_toxic_prompts_runtime": 10.0899, "eval_anthropic_toxic_prompts_samples_per_second": 49.555, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.3515441764011612, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006638689362519888, "eval_anthropic_toxic_prompts_token_set_precision": 0.41299420285061667, "eval_anthropic_toxic_prompts_token_set_recall": 0.33879309911752076, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 72500 }, { "epoch": 13.92, "eval_arxiv_accuracy": 0.33575, "eval_arxiv_bleu_score": 4.099351527217666, "eval_arxiv_bleu_score_sem": 0.12123417381083582, "eval_arxiv_emb_cos_sim": 0.7331365346908569, "eval_arxiv_emb_cos_sim_sem": 0.009152979724639234, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5255320072174072, "eval_arxiv_n_ngrams_match_1": 14.052, "eval_arxiv_n_ngrams_match_2": 2.842, "eval_arxiv_n_ngrams_match_3": 0.64, "eval_arxiv_num_pred_words": 39.06, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.971842074718595, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3363836710550086, "eval_arxiv_runtime": 10.0541, "eval_arxiv_samples_per_second": 49.731, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3356608243933526, "eval_arxiv_token_set_f1_sem": 0.004551175312760204, "eval_arxiv_token_set_precision": 0.2799417438080741, "eval_arxiv_token_set_recall": 0.4449900152168845, "eval_arxiv_true_num_tokens": 64.0, "step": 72500 }, { "epoch": 13.92, "eval_python_code_alpaca_accuracy": 0.15359375, "eval_python_code_alpaca_bleu_score": 3.921870232405925, "eval_python_code_alpaca_bleu_score_sem": 0.11048887972491647, "eval_python_code_alpaca_emb_cos_sim": 0.732799768447876, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008781415083800715, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0323173999786377, "eval_python_code_alpaca_n_ngrams_match_1": 9.198, "eval_python_code_alpaca_n_ngrams_match_2": 2.572, "eval_python_code_alpaca_n_ngrams_match_3": 0.734, "eval_python_code_alpaca_num_pred_words": 44.008, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.745251974320777, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3035625584639082, "eval_python_code_alpaca_runtime": 9.4867, "eval_python_code_alpaca_samples_per_second": 52.706, "eval_python_code_alpaca_steps_per_second": 0.105, "eval_python_code_alpaca_token_set_f1": 0.4625909206655764, "eval_python_code_alpaca_token_set_f1_sem": 0.00577159057307132, "eval_python_code_alpaca_token_set_precision": 0.4957626226513615, "eval_python_code_alpaca_token_set_recall": 0.45858372168510947, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 72500 }, { "epoch": 13.92, "eval_wikibio_accuracy": 0.3130625, "eval_wikibio_bleu_score": 5.338480536302942, "eval_wikibio_bleu_score_sem": 0.19376407946131832, "eval_wikibio_emb_cos_sim": 0.7155885696411133, "eval_wikibio_emb_cos_sim_sem": 0.012235243293602696, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8659629821777344, "eval_wikibio_n_ngrams_match_1": 9.434, "eval_wikibio_n_ngrams_match_2": 2.992, "eval_wikibio_n_ngrams_match_3": 1.048, "eval_wikibio_num_pred_words": 34.994, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.7492319594119, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32572344329734937, "eval_wikibio_runtime": 9.384, "eval_wikibio_samples_per_second": 53.282, "eval_wikibio_steps_per_second": 0.107, "eval_wikibio_token_set_f1": 0.29990304429961057, "eval_wikibio_token_set_f1_sem": 0.0057470633308822966, "eval_wikibio_token_set_precision": 0.30419409834289696, "eval_wikibio_token_set_recall": 0.31386573507298776, "eval_wikibio_true_num_tokens": 61.1328125, "step": 72500 }, { "epoch": 13.92, "eval_nq_accuracy": 0.51159375, "eval_nq_bleu_score": 10.927177283354741, "eval_nq_bleu_score_sem": 0.46982529307470106, "eval_nq_emb_cos_sim": 0.8228777647018433, "eval_nq_emb_cos_sim_sem": 0.007915942061975158, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.292212963104248, "eval_nq_n_ngrams_match_1": 22.286, "eval_nq_n_ngrams_match_2": 7.988, "eval_nq_n_ngrams_match_3": 3.628, "eval_nq_num_pred_words": 48.67, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.89681475156513, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4276151414616264, "eval_nq_runtime": 9.9899, "eval_nq_samples_per_second": 50.051, "eval_nq_steps_per_second": 0.1, "eval_nq_token_set_f1": 0.4488316320842184, "eval_nq_token_set_f1_sem": 0.005098180030044119, "eval_nq_token_set_precision": 0.4011092669667956, "eval_nq_token_set_recall": 0.5215948169892849, "eval_nq_true_num_tokens": 64.0, "step": 72500 }, { "epoch": 13.92, "learning_rate": 0.001, "loss": 2.6696, "step": 72504 }, { "epoch": 13.92, "learning_rate": 0.001, "loss": 2.6658, "step": 72516 }, { "epoch": 13.93, "learning_rate": 0.001, "loss": 2.662, "step": 72528 }, { "epoch": 13.93, "learning_rate": 0.001, "loss": 2.6652, "step": 72540 }, { "epoch": 13.93, "learning_rate": 0.001, "loss": 2.6648, "step": 72552 }, { "epoch": 13.93, "learning_rate": 0.001, "loss": 2.6589, "step": 72564 }, { "epoch": 13.94, "learning_rate": 0.001, "loss": 2.6652, "step": 72576 }, { "epoch": 13.94, "learning_rate": 0.001, "loss": 2.6624, "step": 72588 }, { "epoch": 13.94, "learning_rate": 0.001, "loss": 2.6622, "step": 72600 }, { "epoch": 13.94, "learning_rate": 0.001, "loss": 2.6658, "step": 72612 }, { "epoch": 13.94, "learning_rate": 0.001, "loss": 2.6706, "step": 72624 }, { "epoch": 13.95, "learning_rate": 0.001, "loss": 2.6565, "step": 72636 }, { "epoch": 13.95, "learning_rate": 0.001, "loss": 2.6598, "step": 72648 }, { "epoch": 13.95, "learning_rate": 0.001, "loss": 2.6585, "step": 72660 }, { "epoch": 13.95, "learning_rate": 0.001, "loss": 2.66, "step": 72672 }, { "epoch": 13.96, "learning_rate": 0.001, "loss": 2.6645, "step": 72684 }, { "epoch": 13.96, "learning_rate": 0.001, "loss": 2.6662, "step": 72696 }, { "epoch": 13.96, "learning_rate": 0.001, "loss": 2.6528, "step": 72708 }, { "epoch": 13.96, "learning_rate": 0.001, "loss": 2.6655, "step": 72720 }, { "epoch": 13.97, "learning_rate": 0.001, "loss": 2.6643, "step": 72732 }, { "epoch": 13.97, "learning_rate": 0.001, "loss": 2.6659, "step": 72744 }, { "epoch": 13.97, "learning_rate": 0.001, "loss": 2.6681, "step": 72756 }, { "epoch": 13.97, "learning_rate": 0.001, "loss": 2.6662, "step": 72768 }, { "epoch": 13.97, "learning_rate": 0.001, "loss": 2.6681, "step": 72780 }, { "epoch": 13.98, "learning_rate": 0.001, "loss": 2.6612, "step": 72792 }, { "epoch": 13.98, "learning_rate": 0.001, "loss": 2.6695, "step": 72804 }, { "epoch": 13.98, "learning_rate": 0.001, "loss": 2.6705, "step": 72816 }, { "epoch": 13.98, "learning_rate": 0.001, "loss": 2.6704, "step": 72828 }, { "epoch": 13.99, "learning_rate": 0.001, "loss": 2.6697, "step": 72840 }, { "epoch": 13.99, "learning_rate": 0.001, "loss": 2.6692, "step": 72852 }, { "epoch": 13.99, "learning_rate": 0.001, "loss": 2.6619, "step": 72864 }, { "epoch": 13.99, "learning_rate": 0.001, "loss": 2.667, "step": 72876 }, { "epoch": 14.0, "learning_rate": 0.001, "loss": 2.6617, "step": 72888 }, { "epoch": 14.0, "learning_rate": 0.001, "loss": 2.6606, "step": 72900 }, { "epoch": 14.0, "learning_rate": 0.001, "loss": 2.6555, "step": 72912 }, { "epoch": 14.0, "learning_rate": 0.001, "loss": 2.6343, "step": 72924 }, { "epoch": 14.0, "learning_rate": 0.001, "loss": 2.6561, "step": 72936 }, { "epoch": 14.01, "learning_rate": 0.001, "loss": 2.6485, "step": 72948 }, { "epoch": 14.01, "learning_rate": 0.001, "loss": 2.6378, "step": 72960 }, { "epoch": 14.01, "learning_rate": 0.001, "loss": 2.6379, "step": 72972 }, { "epoch": 14.01, "learning_rate": 0.001, "loss": 2.6512, "step": 72984 }, { "epoch": 14.02, "learning_rate": 0.001, "loss": 2.6459, "step": 72996 }, { "epoch": 14.02, "learning_rate": 0.001, "loss": 2.6531, "step": 73008 }, { "epoch": 14.02, "learning_rate": 0.001, "loss": 2.648, "step": 73020 }, { "epoch": 14.02, "learning_rate": 0.001, "loss": 2.6472, "step": 73032 }, { "epoch": 14.03, "learning_rate": 0.001, "loss": 2.6505, "step": 73044 }, { "epoch": 14.03, "learning_rate": 0.001, "loss": 2.6435, "step": 73056 }, { "epoch": 14.03, "learning_rate": 0.001, "loss": 2.6491, "step": 73068 }, { "epoch": 14.03, "learning_rate": 0.001, "loss": 2.6401, "step": 73080 }, { "epoch": 14.03, "learning_rate": 0.001, "loss": 2.6467, "step": 73092 }, { "epoch": 14.04, "learning_rate": 0.001, "loss": 2.6447, "step": 73104 }, { "epoch": 14.04, "learning_rate": 0.001, "loss": 2.6413, "step": 73116 }, { "epoch": 14.04, "eval_ag_news_accuracy": 0.3124375, "eval_ag_news_bleu_score": 4.599793773144123, "eval_ag_news_bleu_score_sem": 0.15307301718055874, "eval_ag_news_emb_cos_sim": 0.7911202907562256, "eval_ag_news_emb_cos_sim_sem": 0.007782889071037822, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6850435733795166, "eval_ag_news_n_ngrams_match_1": 13.49, "eval_ag_news_n_ngrams_match_2": 2.848, "eval_ag_news_n_ngrams_match_3": 0.844, "eval_ag_news_num_pred_words": 46.564, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.8468586743301, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3319840505372907, "eval_ag_news_runtime": 9.844, "eval_ag_news_samples_per_second": 50.793, "eval_ag_news_steps_per_second": 0.102, "eval_ag_news_token_set_f1": 0.33785119592784113, "eval_ag_news_token_set_f1_sem": 0.004544684325292295, "eval_ag_news_token_set_precision": 0.3181789723740158, "eval_ag_news_token_set_recall": 0.3769574205503869, "eval_ag_news_true_num_tokens": 56.09375, "step": 73125 }, { "epoch": 14.04, "eval_anthropic_toxic_prompts_accuracy": 0.109875, "eval_anthropic_toxic_prompts_bleu_score": 2.895404776187029, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10993063822861639, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6511983871459961, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009415453833552382, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3460946083068848, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.896, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.734, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.622, "eval_anthropic_toxic_prompts_num_pred_words": 47.082, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.3916363849251, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20427428036189293, "eval_anthropic_toxic_prompts_runtime": 9.2877, "eval_anthropic_toxic_prompts_samples_per_second": 53.835, "eval_anthropic_toxic_prompts_steps_per_second": 0.108, "eval_anthropic_toxic_prompts_token_set_f1": 0.3517100808410542, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065988359986545045, "eval_anthropic_toxic_prompts_token_set_precision": 0.4195940844696324, "eval_anthropic_toxic_prompts_token_set_recall": 0.33207423984635404, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 73125 }, { "epoch": 14.04, "eval_arxiv_accuracy": 0.33490625, "eval_arxiv_bleu_score": 4.042857169101391, "eval_arxiv_bleu_score_sem": 0.11644170214522537, "eval_arxiv_emb_cos_sim": 0.7429251074790955, "eval_arxiv_emb_cos_sim_sem": 0.008031861041903801, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5403828620910645, "eval_arxiv_n_ngrams_match_1": 14.486, "eval_arxiv_n_ngrams_match_2": 2.698, "eval_arxiv_n_ngrams_match_3": 0.562, "eval_arxiv_num_pred_words": 40.288, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.480117794072534, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34497499666583126, "eval_arxiv_runtime": 9.8455, "eval_arxiv_samples_per_second": 50.785, "eval_arxiv_steps_per_second": 0.102, "eval_arxiv_token_set_f1": 0.3378610630935953, "eval_arxiv_token_set_f1_sem": 0.004186340128903121, "eval_arxiv_token_set_precision": 0.28564761725596466, "eval_arxiv_token_set_recall": 0.4320979633186641, "eval_arxiv_true_num_tokens": 64.0, "step": 73125 }, { "epoch": 14.04, "eval_python_code_alpaca_accuracy": 0.154875, "eval_python_code_alpaca_bleu_score": 4.1278632661531445, "eval_python_code_alpaca_bleu_score_sem": 0.1331676602417114, "eval_python_code_alpaca_emb_cos_sim": 0.7308018803596497, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008006857424024712, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0201494693756104, "eval_python_code_alpaca_n_ngrams_match_1": 9.274, "eval_python_code_alpaca_n_ngrams_match_2": 2.6, "eval_python_code_alpaca_n_ngrams_match_3": 0.832, "eval_python_code_alpaca_num_pred_words": 45.222, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.494354733676772, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3044311901750288, "eval_python_code_alpaca_runtime": 9.3689, "eval_python_code_alpaca_samples_per_second": 53.368, "eval_python_code_alpaca_steps_per_second": 0.107, "eval_python_code_alpaca_token_set_f1": 0.46478355465055876, "eval_python_code_alpaca_token_set_f1_sem": 0.005810942233106832, "eval_python_code_alpaca_token_set_precision": 0.5065965148749533, "eval_python_code_alpaca_token_set_recall": 0.45013731659628475, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 73125 }, { "epoch": 14.04, "eval_wikibio_accuracy": 0.316, "eval_wikibio_bleu_score": 5.489130036910917, "eval_wikibio_bleu_score_sem": 0.20050337634348733, "eval_wikibio_emb_cos_sim": 0.728935718536377, "eval_wikibio_emb_cos_sim_sem": 0.010047894954327683, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8851306438446045, "eval_wikibio_n_ngrams_match_1": 9.47, "eval_wikibio_n_ngrams_match_2": 3.056, "eval_wikibio_n_ngrams_match_3": 1.082, "eval_wikibio_num_pred_words": 34.9, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 48.67330091129572, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33099278113828823, "eval_wikibio_runtime": 9.8961, "eval_wikibio_samples_per_second": 50.525, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.30370348038269523, "eval_wikibio_token_set_f1_sem": 0.005659399365345912, "eval_wikibio_token_set_precision": 0.30651272926812007, "eval_wikibio_token_set_recall": 0.320457283020339, "eval_wikibio_true_num_tokens": 61.1328125, "step": 73125 }, { "epoch": 14.04, "eval_nq_accuracy": 0.514875, "eval_nq_bleu_score": 11.251045201256828, "eval_nq_bleu_score_sem": 0.46399444516687893, "eval_nq_emb_cos_sim": 0.8228020668029785, "eval_nq_emb_cos_sim_sem": 0.007771887426487462, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2896971702575684, "eval_nq_n_ngrams_match_1": 22.628, "eval_nq_n_ngrams_match_2": 8.254, "eval_nq_n_ngrams_match_3": 3.748, "eval_nq_num_pred_words": 49.3, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.871947709087753, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43529691620612854, "eval_nq_runtime": 10.1349, "eval_nq_samples_per_second": 49.335, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.45410653552424035, "eval_nq_token_set_f1_sem": 0.004955940786184263, "eval_nq_token_set_precision": 0.4089740665226169, "eval_nq_token_set_recall": 0.5213966947850811, "eval_nq_true_num_tokens": 64.0, "step": 73125 }, { "epoch": 14.04, "learning_rate": 0.001, "loss": 2.6454, "step": 73128 }, { "epoch": 14.04, "learning_rate": 0.001, "loss": 2.644, "step": 73140 }, { "epoch": 14.05, "learning_rate": 0.001, "loss": 2.6349, "step": 73152 }, { "epoch": 14.05, "learning_rate": 0.001, "loss": 2.6381, "step": 73164 }, { "epoch": 14.05, "learning_rate": 0.001, "loss": 2.6373, "step": 73176 }, { "epoch": 14.05, "learning_rate": 0.001, "loss": 2.6482, "step": 73188 }, { "epoch": 14.06, "learning_rate": 0.001, "loss": 2.6491, "step": 73200 }, { "epoch": 14.06, "learning_rate": 0.001, "loss": 2.6405, "step": 73212 }, { "epoch": 14.06, "learning_rate": 0.001, "loss": 2.6478, "step": 73224 }, { "epoch": 14.06, "learning_rate": 0.001, "loss": 2.6319, "step": 73236 }, { "epoch": 14.06, "learning_rate": 0.001, "loss": 2.6365, "step": 73248 }, { "epoch": 14.07, "learning_rate": 0.001, "loss": 2.6523, "step": 73260 }, { "epoch": 14.07, "learning_rate": 0.001, "loss": 2.6623, "step": 73272 }, { "epoch": 14.07, "learning_rate": 0.001, "loss": 2.6553, "step": 73284 }, { "epoch": 14.07, "learning_rate": 0.001, "loss": 2.6463, "step": 73296 }, { "epoch": 14.08, "learning_rate": 0.001, "loss": 2.6472, "step": 73308 }, { "epoch": 14.08, "learning_rate": 0.001, "loss": 2.6399, "step": 73320 }, { "epoch": 14.08, "learning_rate": 0.001, "loss": 2.6453, "step": 73332 }, { "epoch": 14.08, "learning_rate": 0.001, "loss": 2.6448, "step": 73344 }, { "epoch": 14.09, "learning_rate": 0.001, "loss": 2.6409, "step": 73356 }, { "epoch": 14.09, "learning_rate": 0.001, "loss": 2.6531, "step": 73368 }, { "epoch": 14.09, "learning_rate": 0.001, "loss": 2.6466, "step": 73380 }, { "epoch": 14.09, "learning_rate": 0.001, "loss": 2.644, "step": 73392 }, { "epoch": 14.09, "learning_rate": 0.001, "loss": 2.6444, "step": 73404 }, { "epoch": 14.1, "learning_rate": 0.001, "loss": 2.6515, "step": 73416 }, { "epoch": 14.1, "learning_rate": 0.001, "loss": 2.6436, "step": 73428 }, { "epoch": 14.1, "learning_rate": 0.001, "loss": 2.6534, "step": 73440 }, { "epoch": 14.1, "learning_rate": 0.001, "loss": 2.6497, "step": 73452 }, { "epoch": 14.11, "learning_rate": 0.001, "loss": 2.6486, "step": 73464 }, { "epoch": 14.11, "learning_rate": 0.001, "loss": 2.6612, "step": 73476 }, { "epoch": 14.11, "learning_rate": 0.001, "loss": 2.6561, "step": 73488 }, { "epoch": 14.11, "learning_rate": 0.001, "loss": 2.6508, "step": 73500 }, { "epoch": 14.12, "learning_rate": 0.001, "loss": 2.6494, "step": 73512 }, { "epoch": 14.12, "learning_rate": 0.001, "loss": 2.6489, "step": 73524 }, { "epoch": 14.12, "learning_rate": 0.001, "loss": 2.6492, "step": 73536 }, { "epoch": 14.12, "learning_rate": 0.001, "loss": 2.6522, "step": 73548 }, { "epoch": 14.12, "learning_rate": 0.001, "loss": 2.6432, "step": 73560 }, { "epoch": 14.13, "learning_rate": 0.001, "loss": 2.6493, "step": 73572 }, { "epoch": 14.13, "learning_rate": 0.001, "loss": 2.6495, "step": 73584 }, { "epoch": 14.13, "learning_rate": 0.001, "loss": 2.6495, "step": 73596 }, { "epoch": 14.13, "learning_rate": 0.001, "loss": 2.6398, "step": 73608 }, { "epoch": 14.14, "learning_rate": 0.001, "loss": 2.6435, "step": 73620 }, { "epoch": 14.14, "learning_rate": 0.001, "loss": 2.6461, "step": 73632 }, { "epoch": 14.14, "learning_rate": 0.001, "loss": 2.6514, "step": 73644 }, { "epoch": 14.14, "learning_rate": 0.001, "loss": 2.6547, "step": 73656 }, { "epoch": 14.15, "learning_rate": 0.001, "loss": 2.6494, "step": 73668 }, { "epoch": 14.15, "learning_rate": 0.001, "loss": 2.6405, "step": 73680 }, { "epoch": 14.15, "learning_rate": 0.001, "loss": 2.6492, "step": 73692 }, { "epoch": 14.15, "learning_rate": 0.001, "loss": 2.6511, "step": 73704 }, { "epoch": 14.15, "learning_rate": 0.001, "loss": 2.6544, "step": 73716 }, { "epoch": 14.16, "learning_rate": 0.001, "loss": 2.6484, "step": 73728 }, { "epoch": 14.16, "learning_rate": 0.001, "loss": 2.6379, "step": 73740 }, { "epoch": 14.16, "eval_ag_news_accuracy": 0.30990625, "eval_ag_news_bleu_score": 4.6911453936087115, "eval_ag_news_bleu_score_sem": 0.15697956399831808, "eval_ag_news_emb_cos_sim": 0.7940667867660522, "eval_ag_news_emb_cos_sim_sem": 0.0075017768354597465, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6646106243133545, "eval_ag_news_n_ngrams_match_1": 13.356, "eval_ag_news_n_ngrams_match_2": 2.988, "eval_ag_news_n_ngrams_match_3": 0.882, "eval_ag_news_num_pred_words": 46.23, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.04093161359618, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33079080594767524, "eval_ag_news_runtime": 9.9854, "eval_ag_news_samples_per_second": 50.073, "eval_ag_news_steps_per_second": 0.1, "eval_ag_news_token_set_f1": 0.33544118111473303, "eval_ag_news_token_set_f1_sem": 0.004651303418480213, "eval_ag_news_token_set_precision": 0.31543177944159256, "eval_ag_news_token_set_recall": 0.374220577683943, "eval_ag_news_true_num_tokens": 56.09375, "step": 73750 }, { "epoch": 14.16, "eval_anthropic_toxic_prompts_accuracy": 0.1095625, "eval_anthropic_toxic_prompts_bleu_score": 2.786226119952974, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10713424351407781, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6525640487670898, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009288877627851055, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.347583532333374, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.878, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.724, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.588, "eval_anthropic_toxic_prompts_num_pred_words": 47.094, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.43394086074982, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20356452372114942, "eval_anthropic_toxic_prompts_runtime": 9.884, "eval_anthropic_toxic_prompts_samples_per_second": 50.587, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3416159236421704, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006345102161436105, "eval_anthropic_toxic_prompts_token_set_precision": 0.4151898144934839, "eval_anthropic_toxic_prompts_token_set_recall": 0.3178102705218873, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 73750 }, { "epoch": 14.16, "eval_arxiv_accuracy": 0.3361875, "eval_arxiv_bleu_score": 3.9824530512943435, "eval_arxiv_bleu_score_sem": 0.11369980858364914, "eval_arxiv_emb_cos_sim": 0.733562707901001, "eval_arxiv_emb_cos_sim_sem": 0.00844667625184859, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5285377502441406, "eval_arxiv_n_ngrams_match_1": 14.238, "eval_arxiv_n_ngrams_match_2": 2.716, "eval_arxiv_n_ngrams_match_3": 0.56, "eval_arxiv_num_pred_words": 39.872, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.074106315161664, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.339855338181802, "eval_arxiv_runtime": 9.6456, "eval_arxiv_samples_per_second": 51.837, "eval_arxiv_steps_per_second": 0.104, "eval_arxiv_token_set_f1": 0.3334672824648522, "eval_arxiv_token_set_f1_sem": 0.004236619444607306, "eval_arxiv_token_set_precision": 0.2808371818029373, "eval_arxiv_token_set_recall": 0.4358752554472092, "eval_arxiv_true_num_tokens": 64.0, "step": 73750 }, { "epoch": 14.16, "eval_python_code_alpaca_accuracy": 0.15496875, "eval_python_code_alpaca_bleu_score": 4.209871652279953, "eval_python_code_alpaca_bleu_score_sem": 0.13866307576436476, "eval_python_code_alpaca_emb_cos_sim": 0.7154151797294617, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01036311123434066, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0079410076141357, "eval_python_code_alpaca_n_ngrams_match_1": 9.222, "eval_python_code_alpaca_n_ngrams_match_2": 2.588, "eval_python_code_alpaca_n_ngrams_match_3": 0.858, "eval_python_code_alpaca_num_pred_words": 44.128, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.245671297471503, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3022217892621271, "eval_python_code_alpaca_runtime": 9.4478, "eval_python_code_alpaca_samples_per_second": 52.922, "eval_python_code_alpaca_steps_per_second": 0.106, "eval_python_code_alpaca_token_set_f1": 0.45909255201429167, "eval_python_code_alpaca_token_set_f1_sem": 0.00601253214442383, "eval_python_code_alpaca_token_set_precision": 0.4983887793272989, "eval_python_code_alpaca_token_set_recall": 0.45090550869662416, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 73750 }, { "epoch": 14.16, "eval_wikibio_accuracy": 0.31446875, "eval_wikibio_bleu_score": 5.580133044479681, "eval_wikibio_bleu_score_sem": 0.20506788017215485, "eval_wikibio_emb_cos_sim": 0.7301121950149536, "eval_wikibio_emb_cos_sim_sem": 0.009828932198145969, "eval_wikibio_emb_top1_equal": 0.125, "eval_wikibio_emb_top1_equal_sem": 0.02934655822437397, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8464243412017822, "eval_wikibio_n_ngrams_match_1": 9.692, "eval_wikibio_n_ngrams_match_2": 3.22, "eval_wikibio_n_ngrams_match_3": 1.144, "eval_wikibio_num_pred_words": 35.86, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.825332124891936, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3390310997186834, "eval_wikibio_runtime": 11.1169, "eval_wikibio_samples_per_second": 44.976, "eval_wikibio_steps_per_second": 0.09, "eval_wikibio_token_set_f1": 0.30983704129249356, "eval_wikibio_token_set_f1_sem": 0.005564948964924509, "eval_wikibio_token_set_precision": 0.3160615956479311, "eval_wikibio_token_set_recall": 0.3203211879569017, "eval_wikibio_true_num_tokens": 61.1328125, "step": 73750 }, { "epoch": 14.16, "eval_nq_accuracy": 0.513875, "eval_nq_bleu_score": 11.313150061634024, "eval_nq_bleu_score_sem": 0.47206159574148193, "eval_nq_emb_cos_sim": 0.8237274885177612, "eval_nq_emb_cos_sim_sem": 0.007182782485604474, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2871272563934326, "eval_nq_n_ngrams_match_1": 22.482, "eval_nq_n_ngrams_match_2": 8.192, "eval_nq_n_ngrams_match_3": 3.75, "eval_nq_num_pred_words": 48.948, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.846610225324573, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4339535366874501, "eval_nq_runtime": 10.3605, "eval_nq_samples_per_second": 48.26, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.45193722470489195, "eval_nq_token_set_f1_sem": 0.004997454913791524, "eval_nq_token_set_precision": 0.40655292709597685, "eval_nq_token_set_recall": 0.5175811503915181, "eval_nq_true_num_tokens": 64.0, "step": 73750 }, { "epoch": 14.16, "learning_rate": 0.001, "loss": 2.655, "step": 73752 }, { "epoch": 14.16, "learning_rate": 0.001, "loss": 2.6435, "step": 73764 }, { "epoch": 14.17, "learning_rate": 0.001, "loss": 2.6485, "step": 73776 }, { "epoch": 14.17, "learning_rate": 0.001, "loss": 2.6461, "step": 73788 }, { "epoch": 14.17, "learning_rate": 0.001, "loss": 2.6503, "step": 73800 }, { "epoch": 14.17, "learning_rate": 0.001, "loss": 2.6491, "step": 73812 }, { "epoch": 14.18, "learning_rate": 0.001, "loss": 2.6544, "step": 73824 }, { "epoch": 14.18, "learning_rate": 0.001, "loss": 2.6498, "step": 73836 }, { "epoch": 14.18, "learning_rate": 0.001, "loss": 2.6506, "step": 73848 }, { "epoch": 14.18, "learning_rate": 0.001, "loss": 2.6552, "step": 73860 }, { "epoch": 14.18, "learning_rate": 0.001, "loss": 2.6498, "step": 73872 }, { "epoch": 14.19, "learning_rate": 0.001, "loss": 2.647, "step": 73884 }, { "epoch": 14.19, "learning_rate": 0.001, "loss": 2.6378, "step": 73896 }, { "epoch": 14.19, "learning_rate": 0.001, "loss": 2.6565, "step": 73908 }, { "epoch": 14.19, "learning_rate": 0.001, "loss": 2.6538, "step": 73920 }, { "epoch": 14.2, "learning_rate": 0.001, "loss": 2.6415, "step": 73932 }, { "epoch": 14.2, "learning_rate": 0.001, "loss": 2.6504, "step": 73944 }, { "epoch": 14.2, "learning_rate": 0.001, "loss": 2.6469, "step": 73956 }, { "epoch": 14.2, "learning_rate": 0.001, "loss": 2.6485, "step": 73968 }, { "epoch": 14.21, "learning_rate": 0.001, "loss": 2.6498, "step": 73980 }, { "epoch": 14.21, "learning_rate": 0.001, "loss": 2.6472, "step": 73992 }, { "epoch": 14.21, "learning_rate": 0.001, "loss": 2.6483, "step": 74004 }, { "epoch": 14.21, "learning_rate": 0.001, "loss": 2.6606, "step": 74016 }, { "epoch": 14.21, "learning_rate": 0.001, "loss": 2.6504, "step": 74028 }, { "epoch": 14.22, "learning_rate": 0.001, "loss": 2.6593, "step": 74040 }, { "epoch": 14.22, "learning_rate": 0.001, "loss": 2.6479, "step": 74052 }, { "epoch": 14.22, "learning_rate": 0.001, "loss": 2.6537, "step": 74064 }, { "epoch": 14.22, "learning_rate": 0.001, "loss": 2.6494, "step": 74076 }, { "epoch": 14.23, "learning_rate": 0.001, "loss": 2.6513, "step": 74088 }, { "epoch": 14.23, "learning_rate": 0.001, "loss": 2.6569, "step": 74100 }, { "epoch": 14.23, "learning_rate": 0.001, "loss": 2.6588, "step": 74112 }, { "epoch": 14.23, "learning_rate": 0.001, "loss": 2.6554, "step": 74124 }, { "epoch": 14.24, "learning_rate": 0.001, "loss": 2.6525, "step": 74136 }, { "epoch": 14.24, "learning_rate": 0.001, "loss": 2.6467, "step": 74148 }, { "epoch": 14.24, "learning_rate": 0.001, "loss": 2.6548, "step": 74160 }, { "epoch": 14.24, "learning_rate": 0.001, "loss": 2.6615, "step": 74172 }, { "epoch": 14.24, "learning_rate": 0.001, "loss": 2.6567, "step": 74184 }, { "epoch": 14.25, "learning_rate": 0.001, "loss": 2.6486, "step": 74196 }, { "epoch": 14.25, "learning_rate": 0.001, "loss": 2.655, "step": 74208 }, { "epoch": 14.25, "learning_rate": 0.001, "loss": 2.6449, "step": 74220 }, { "epoch": 14.25, "learning_rate": 0.001, "loss": 2.6497, "step": 74232 }, { "epoch": 14.26, "learning_rate": 0.001, "loss": 2.6564, "step": 74244 }, { "epoch": 14.26, "learning_rate": 0.001, "loss": 2.6513, "step": 74256 }, { "epoch": 14.26, "learning_rate": 0.001, "loss": 2.6639, "step": 74268 }, { "epoch": 14.26, "learning_rate": 0.001, "loss": 2.6534, "step": 74280 }, { "epoch": 14.26, "learning_rate": 0.001, "loss": 2.666, "step": 74292 }, { "epoch": 14.27, "learning_rate": 0.001, "loss": 2.6485, "step": 74304 }, { "epoch": 14.27, "learning_rate": 0.001, "loss": 2.6577, "step": 74316 }, { "epoch": 14.27, "learning_rate": 0.001, "loss": 2.6499, "step": 74328 }, { "epoch": 14.27, "learning_rate": 0.001, "loss": 2.6481, "step": 74340 }, { "epoch": 14.28, "learning_rate": 0.001, "loss": 2.6517, "step": 74352 }, { "epoch": 14.28, "learning_rate": 0.001, "loss": 2.6618, "step": 74364 }, { "epoch": 14.28, "eval_ag_news_accuracy": 0.31196875, "eval_ag_news_bleu_score": 4.530719373629623, "eval_ag_news_bleu_score_sem": 0.14707608957693488, "eval_ag_news_emb_cos_sim": 0.7935448288917542, "eval_ag_news_emb_cos_sim_sem": 0.007292474268665631, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6723897457122803, "eval_ag_news_n_ngrams_match_1": 13.556, "eval_ag_news_n_ngrams_match_2": 2.924, "eval_ag_news_n_ngrams_match_3": 0.788, "eval_ag_news_num_pred_words": 46.566, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.34582010492024, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3349469480461629, "eval_ag_news_runtime": 11.1017, "eval_ag_news_samples_per_second": 45.038, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.33913312328003714, "eval_ag_news_token_set_f1_sem": 0.004317692030863565, "eval_ag_news_token_set_precision": 0.32194428388324775, "eval_ag_news_token_set_recall": 0.3729593396042471, "eval_ag_news_true_num_tokens": 56.09375, "step": 74375 }, { "epoch": 14.28, "eval_anthropic_toxic_prompts_accuracy": 0.10971875, "eval_anthropic_toxic_prompts_bleu_score": 2.7945020810196173, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10685465770247689, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.641710638999939, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009864716189620733, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.363442897796631, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.718, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.654, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.598, "eval_anthropic_toxic_prompts_num_pred_words": 47.404, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.88847994406159, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19770273251289106, "eval_anthropic_toxic_prompts_runtime": 9.3924, "eval_anthropic_toxic_prompts_samples_per_second": 53.235, "eval_anthropic_toxic_prompts_steps_per_second": 0.106, "eval_anthropic_toxic_prompts_token_set_f1": 0.3434382938004426, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006437626654076042, "eval_anthropic_toxic_prompts_token_set_precision": 0.40657300781026523, "eval_anthropic_toxic_prompts_token_set_recall": 0.3271426314507424, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 74375 }, { "epoch": 14.28, "eval_arxiv_accuracy": 0.3364375, "eval_arxiv_bleu_score": 3.928561341510662, "eval_arxiv_bleu_score_sem": 0.11104355755983975, "eval_arxiv_emb_cos_sim": 0.7331550717353821, "eval_arxiv_emb_cos_sim_sem": 0.007875648357709727, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.515850305557251, "eval_arxiv_n_ngrams_match_1": 14.17, "eval_arxiv_n_ngrams_match_2": 2.666, "eval_arxiv_n_ngrams_match_3": 0.538, "eval_arxiv_num_pred_words": 39.366, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.64452389018019, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.340580079726118, "eval_arxiv_runtime": 10.2528, "eval_arxiv_samples_per_second": 48.767, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.33346639356391156, "eval_arxiv_token_set_f1_sem": 0.004207045965642826, "eval_arxiv_token_set_precision": 0.28174143839893606, "eval_arxiv_token_set_recall": 0.430434005989786, "eval_arxiv_true_num_tokens": 64.0, "step": 74375 }, { "epoch": 14.28, "eval_python_code_alpaca_accuracy": 0.15378125, "eval_python_code_alpaca_bleu_score": 3.862998746008432, "eval_python_code_alpaca_bleu_score_sem": 0.11164045906700341, "eval_python_code_alpaca_emb_cos_sim": 0.7203464508056641, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009563238465133291, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.013864278793335, "eval_python_code_alpaca_n_ngrams_match_1": 9.006, "eval_python_code_alpaca_n_ngrams_match_2": 2.354, "eval_python_code_alpaca_n_ngrams_match_3": 0.724, "eval_python_code_alpaca_num_pred_words": 43.384, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.365947762170272, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.29992380892071974, "eval_python_code_alpaca_runtime": 9.8246, "eval_python_code_alpaca_samples_per_second": 50.893, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4454810625851117, "eval_python_code_alpaca_token_set_f1_sem": 0.005939004477089126, "eval_python_code_alpaca_token_set_precision": 0.48828864850600895, "eval_python_code_alpaca_token_set_recall": 0.43521256158964344, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 74375 }, { "epoch": 14.28, "eval_wikibio_accuracy": 0.31159375, "eval_wikibio_bleu_score": 5.5747303718725, "eval_wikibio_bleu_score_sem": 0.19877028004601707, "eval_wikibio_emb_cos_sim": 0.7202527523040771, "eval_wikibio_emb_cos_sim_sem": 0.01039528574336717, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.859248399734497, "eval_wikibio_n_ngrams_match_1": 9.854, "eval_wikibio_n_ngrams_match_2": 3.206, "eval_wikibio_n_ngrams_match_3": 1.134, "eval_wikibio_num_pred_words": 36.246, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.42968980146304, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3406756703952334, "eval_wikibio_runtime": 10.1792, "eval_wikibio_samples_per_second": 49.12, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.3100505634113041, "eval_wikibio_token_set_f1_sem": 0.005343169258096035, "eval_wikibio_token_set_precision": 0.31721747727931304, "eval_wikibio_token_set_recall": 0.3221580480073975, "eval_wikibio_true_num_tokens": 61.1328125, "step": 74375 }, { "epoch": 14.28, "eval_nq_accuracy": 0.5143125, "eval_nq_bleu_score": 11.116941807837486, "eval_nq_bleu_score_sem": 0.4759200581539458, "eval_nq_emb_cos_sim": 0.8214974403381348, "eval_nq_emb_cos_sim_sem": 0.007312549997991484, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.288007974624634, "eval_nq_n_ngrams_match_1": 22.362, "eval_nq_n_ngrams_match_2": 7.946, "eval_nq_n_ngrams_match_3": 3.656, "eval_nq_num_pred_words": 48.956, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.855286134420407, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4301810822709363, "eval_nq_runtime": 10.264, "eval_nq_samples_per_second": 48.714, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.44629435692179015, "eval_nq_token_set_f1_sem": 0.00490550261991377, "eval_nq_token_set_precision": 0.40100556596328285, "eval_nq_token_set_recall": 0.5104525715389672, "eval_nq_true_num_tokens": 64.0, "step": 74375 }, { "epoch": 14.28, "learning_rate": 0.001, "loss": 2.6553, "step": 74376 }, { "epoch": 14.28, "learning_rate": 0.001, "loss": 2.6513, "step": 74388 }, { "epoch": 14.29, "learning_rate": 0.001, "loss": 2.6519, "step": 74400 }, { "epoch": 14.29, "learning_rate": 0.001, "loss": 2.6685, "step": 74412 }, { "epoch": 14.29, "learning_rate": 0.001, "loss": 2.6645, "step": 74424 }, { "epoch": 14.29, "learning_rate": 0.001, "loss": 2.6562, "step": 74436 }, { "epoch": 14.29, "learning_rate": 0.001, "loss": 2.6578, "step": 74448 }, { "epoch": 14.3, "learning_rate": 0.001, "loss": 2.6617, "step": 74460 }, { "epoch": 14.3, "learning_rate": 0.001, "loss": 2.6571, "step": 74472 }, { "epoch": 14.3, "learning_rate": 0.001, "loss": 2.6557, "step": 74484 }, { "epoch": 14.3, "learning_rate": 0.001, "loss": 2.655, "step": 74496 }, { "epoch": 14.31, "learning_rate": 0.001, "loss": 2.6491, "step": 74508 }, { "epoch": 14.31, "learning_rate": 0.001, "loss": 2.6406, "step": 74520 }, { "epoch": 14.31, "learning_rate": 0.001, "loss": 2.6484, "step": 74532 }, { "epoch": 14.31, "learning_rate": 0.001, "loss": 2.6479, "step": 74544 }, { "epoch": 14.32, "learning_rate": 0.001, "loss": 2.6501, "step": 74556 }, { "epoch": 14.32, "learning_rate": 0.001, "loss": 2.6554, "step": 74568 }, { "epoch": 14.32, "learning_rate": 0.001, "loss": 2.6607, "step": 74580 }, { "epoch": 14.32, "learning_rate": 0.001, "loss": 2.6487, "step": 74592 }, { "epoch": 14.32, "learning_rate": 0.001, "loss": 2.6559, "step": 74604 }, { "epoch": 14.33, "learning_rate": 0.001, "loss": 2.646, "step": 74616 }, { "epoch": 14.33, "learning_rate": 0.001, "loss": 2.6439, "step": 74628 }, { "epoch": 14.33, "learning_rate": 0.001, "loss": 2.6575, "step": 74640 }, { "epoch": 14.33, "learning_rate": 0.001, "loss": 2.6472, "step": 74652 }, { "epoch": 14.34, "learning_rate": 0.001, "loss": 2.6592, "step": 74664 }, { "epoch": 14.34, "learning_rate": 0.001, "loss": 2.6565, "step": 74676 }, { "epoch": 14.34, "learning_rate": 0.001, "loss": 2.6503, "step": 74688 }, { "epoch": 14.34, "learning_rate": 0.001, "loss": 2.6435, "step": 74700 }, { "epoch": 14.35, "learning_rate": 0.001, "loss": 2.6544, "step": 74712 }, { "epoch": 14.35, "learning_rate": 0.001, "loss": 2.6493, "step": 74724 }, { "epoch": 14.35, "learning_rate": 0.001, "loss": 2.6558, "step": 74736 }, { "epoch": 14.35, "learning_rate": 0.001, "loss": 2.6565, "step": 74748 }, { "epoch": 14.35, "learning_rate": 0.001, "loss": 2.6503, "step": 74760 }, { "epoch": 14.36, "learning_rate": 0.001, "loss": 2.6514, "step": 74772 }, { "epoch": 14.36, "learning_rate": 0.001, "loss": 2.6498, "step": 74784 }, { "epoch": 14.36, "learning_rate": 0.001, "loss": 2.6507, "step": 74796 }, { "epoch": 14.36, "learning_rate": 0.001, "loss": 2.6498, "step": 74808 }, { "epoch": 14.37, "learning_rate": 0.001, "loss": 2.651, "step": 74820 }, { "epoch": 14.37, "learning_rate": 0.001, "loss": 2.6448, "step": 74832 }, { "epoch": 14.37, "learning_rate": 0.001, "loss": 2.6486, "step": 74844 }, { "epoch": 14.37, "learning_rate": 0.001, "loss": 2.6538, "step": 74856 }, { "epoch": 14.38, "learning_rate": 0.001, "loss": 2.652, "step": 74868 }, { "epoch": 14.38, "learning_rate": 0.001, "loss": 2.6424, "step": 74880 }, { "epoch": 14.38, "learning_rate": 0.001, "loss": 2.6539, "step": 74892 }, { "epoch": 14.38, "learning_rate": 0.001, "loss": 2.6476, "step": 74904 }, { "epoch": 14.38, "learning_rate": 0.001, "loss": 2.6554, "step": 74916 }, { "epoch": 14.39, "learning_rate": 0.001, "loss": 2.654, "step": 74928 }, { "epoch": 14.39, "learning_rate": 0.001, "loss": 2.6646, "step": 74940 }, { "epoch": 14.39, "learning_rate": 0.001, "loss": 2.6498, "step": 74952 }, { "epoch": 14.39, "learning_rate": 0.001, "loss": 2.6518, "step": 74964 }, { "epoch": 14.4, "learning_rate": 0.001, "loss": 2.6518, "step": 74976 }, { "epoch": 14.4, "learning_rate": 0.001, "loss": 2.6567, "step": 74988 }, { "epoch": 14.4, "learning_rate": 0.001, "loss": 2.6476, "step": 75000 }, { "epoch": 14.4, "eval_ag_news_accuracy": 0.31125, "eval_ag_news_bleu_score": 4.825265164506628, "eval_ag_news_bleu_score_sem": 0.16061676852998474, "eval_ag_news_emb_cos_sim": 0.7942475080490112, "eval_ag_news_emb_cos_sim_sem": 0.006755775641183398, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6629862785339355, "eval_ag_news_n_ngrams_match_1": 13.442, "eval_ag_news_n_ngrams_match_2": 2.968, "eval_ag_news_n_ngrams_match_3": 0.896, "eval_ag_news_num_pred_words": 46.132, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.97756711796284, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3333763081254419, "eval_ag_news_runtime": 9.853, "eval_ag_news_samples_per_second": 50.746, "eval_ag_news_steps_per_second": 0.101, "eval_ag_news_token_set_f1": 0.33795608771709623, "eval_ag_news_token_set_f1_sem": 0.004539972483984398, "eval_ag_news_token_set_precision": 0.3174717992862559, "eval_ag_news_token_set_recall": 0.379608859965652, "eval_ag_news_true_num_tokens": 56.09375, "step": 75000 }, { "epoch": 14.4, "eval_anthropic_toxic_prompts_accuracy": 0.1095625, "eval_anthropic_toxic_prompts_bleu_score": 2.9508382467623506, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11186990624048283, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6582716107368469, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00898989183578182, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3321025371551514, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.912, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.636, "eval_anthropic_toxic_prompts_num_pred_words": 47.416, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.997144901012536, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20377580039567783, "eval_anthropic_toxic_prompts_runtime": 9.4893, "eval_anthropic_toxic_prompts_samples_per_second": 52.691, "eval_anthropic_toxic_prompts_steps_per_second": 0.105, "eval_anthropic_toxic_prompts_token_set_f1": 0.3458427859817732, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006436895383963648, "eval_anthropic_toxic_prompts_token_set_precision": 0.41559077611914735, "eval_anthropic_toxic_prompts_token_set_recall": 0.3253764235997341, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 75000 }, { "epoch": 14.4, "eval_arxiv_accuracy": 0.3355, "eval_arxiv_bleu_score": 4.128656624074922, "eval_arxiv_bleu_score_sem": 0.1183949221643065, "eval_arxiv_emb_cos_sim": 0.738603413105011, "eval_arxiv_emb_cos_sim_sem": 0.008967593518741527, "eval_arxiv_emb_top1_equal": 0.1953125, "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5226566791534424, "eval_arxiv_n_ngrams_match_1": 14.306, "eval_arxiv_n_ngrams_match_2": 2.76, "eval_arxiv_n_ngrams_match_3": 0.622, "eval_arxiv_num_pred_words": 40.336, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.87430218061502, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3392346518234537, "eval_arxiv_runtime": 10.1196, "eval_arxiv_samples_per_second": 49.409, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3360491641440138, "eval_arxiv_token_set_f1_sem": 0.004441608901171986, "eval_arxiv_token_set_precision": 0.2838872252414057, "eval_arxiv_token_set_recall": 0.43104836963579973, "eval_arxiv_true_num_tokens": 64.0, "step": 75000 }, { "epoch": 14.4, "eval_python_code_alpaca_accuracy": 0.15525, "eval_python_code_alpaca_bleu_score": 4.1750089981728955, "eval_python_code_alpaca_bleu_score_sem": 0.14244011545234558, "eval_python_code_alpaca_emb_cos_sim": 0.7288846969604492, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010393718407438171, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0037569999694824, "eval_python_code_alpaca_n_ngrams_match_1": 9.272, "eval_python_code_alpaca_n_ngrams_match_2": 2.68, "eval_python_code_alpaca_n_ngrams_match_3": 0.85, "eval_python_code_alpaca_num_pred_words": 44.27, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.161140216652083, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30784098512657615, "eval_python_code_alpaca_runtime": 9.7008, "eval_python_code_alpaca_samples_per_second": 51.542, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.45906546035014595, "eval_python_code_alpaca_token_set_f1_sem": 0.006185535015961747, "eval_python_code_alpaca_token_set_precision": 0.5026287946028317, "eval_python_code_alpaca_token_set_recall": 0.4469467322166043, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 75000 }, { "epoch": 14.4, "eval_wikibio_accuracy": 0.31290625, "eval_wikibio_bleu_score": 5.505307064833404, "eval_wikibio_bleu_score_sem": 0.20573228474271227, "eval_wikibio_emb_cos_sim": 0.7147561311721802, "eval_wikibio_emb_cos_sim_sem": 0.010479368706146405, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.854083776473999, "eval_wikibio_n_ngrams_match_1": 9.314, "eval_wikibio_n_ngrams_match_2": 3.078, "eval_wikibio_n_ngrams_match_3": 1.118, "eval_wikibio_num_pred_words": 35.33, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.18536478867824, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.32920250931498296, "eval_wikibio_runtime": 10.5288, "eval_wikibio_samples_per_second": 47.489, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.2972337502815764, "eval_wikibio_token_set_f1_sem": 0.005929955659531237, "eval_wikibio_token_set_precision": 0.3024276408245034, "eval_wikibio_token_set_recall": 0.3102296234577406, "eval_wikibio_true_num_tokens": 61.1328125, "step": 75000 }, { "epoch": 14.4, "eval_nq_accuracy": 0.51515625, "eval_nq_bleu_score": 11.306658310330809, "eval_nq_bleu_score_sem": 0.47323131156860576, "eval_nq_emb_cos_sim": 0.8234788179397583, "eval_nq_emb_cos_sim_sem": 0.007213109777286094, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.284891128540039, "eval_nq_n_ngrams_match_1": 22.39, "eval_nq_n_ngrams_match_2": 8.12, "eval_nq_n_ngrams_match_3": 3.754, "eval_nq_num_pred_words": 48.95, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.82461654544283, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.431665225931508, "eval_nq_runtime": 10.9854, "eval_nq_samples_per_second": 45.515, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4482254340269814, "eval_nq_token_set_f1_sem": 0.004960347638546244, "eval_nq_token_set_precision": 0.40238438579843505, "eval_nq_token_set_recall": 0.5151764854879013, "eval_nq_true_num_tokens": 64.0, "step": 75000 }, { "epoch": 14.4, "learning_rate": 0.001, "loss": 2.6451, "step": 75012 }, { "epoch": 14.41, "learning_rate": 0.001, "loss": 2.6433, "step": 75024 }, { "epoch": 14.41, "learning_rate": 0.001, "loss": 2.6523, "step": 75036 }, { "epoch": 14.41, "learning_rate": 0.001, "loss": 2.6496, "step": 75048 }, { "epoch": 14.41, "learning_rate": 0.001, "loss": 2.6474, "step": 75060 }, { "epoch": 14.41, "learning_rate": 0.001, "loss": 2.643, "step": 75072 }, { "epoch": 14.42, "learning_rate": 0.001, "loss": 2.6496, "step": 75084 }, { "epoch": 14.42, "learning_rate": 0.001, "loss": 2.6391, "step": 75096 }, { "epoch": 14.42, "learning_rate": 0.001, "loss": 2.6561, "step": 75108 }, { "epoch": 14.42, "learning_rate": 0.001, "loss": 2.6518, "step": 75120 }, { "epoch": 14.43, "learning_rate": 0.001, "loss": 2.6439, "step": 75132 }, { "epoch": 14.43, "learning_rate": 0.001, "loss": 2.6536, "step": 75144 }, { "epoch": 14.43, "learning_rate": 0.001, "loss": 2.6558, "step": 75156 }, { "epoch": 14.43, "learning_rate": 0.001, "loss": 2.6393, "step": 75168 }, { "epoch": 14.44, "learning_rate": 0.001, "loss": 2.6587, "step": 75180 }, { "epoch": 14.44, "learning_rate": 0.001, "loss": 2.6543, "step": 75192 }, { "epoch": 14.44, "learning_rate": 0.001, "loss": 2.6554, "step": 75204 }, { "epoch": 14.44, "learning_rate": 0.001, "loss": 2.6574, "step": 75216 }, { "epoch": 14.44, "learning_rate": 0.001, "loss": 2.6456, "step": 75228 }, { "epoch": 14.45, "learning_rate": 0.001, "loss": 2.6516, "step": 75240 }, { "epoch": 14.45, "learning_rate": 0.001, "loss": 2.6543, "step": 75252 }, { "epoch": 14.45, "learning_rate": 0.001, "loss": 2.6507, "step": 75264 }, { "epoch": 14.45, "learning_rate": 0.001, "loss": 2.6376, "step": 75276 }, { "epoch": 14.46, "learning_rate": 0.001, "loss": 2.6534, "step": 75288 }, { "epoch": 14.46, "learning_rate": 0.001, "loss": 2.6486, "step": 75300 }, { "epoch": 14.46, "learning_rate": 0.001, "loss": 2.6589, "step": 75312 }, { "epoch": 14.46, "learning_rate": 0.001, "loss": 2.6443, "step": 75324 }, { "epoch": 14.47, "learning_rate": 0.001, "loss": 2.6524, "step": 75336 }, { "epoch": 14.47, "learning_rate": 0.001, "loss": 2.6668, "step": 75348 }, { "epoch": 14.47, "learning_rate": 0.001, "loss": 2.6528, "step": 75360 }, { "epoch": 14.47, "learning_rate": 0.001, "loss": 2.6545, "step": 75372 }, { "epoch": 14.47, "learning_rate": 0.001, "loss": 2.6486, "step": 75384 }, { "epoch": 14.48, "learning_rate": 0.001, "loss": 2.6557, "step": 75396 }, { "epoch": 14.48, "learning_rate": 0.001, "loss": 2.6491, "step": 75408 }, { "epoch": 14.48, "learning_rate": 0.001, "loss": 2.6587, "step": 75420 }, { "epoch": 14.48, "learning_rate": 0.001, "loss": 2.6442, "step": 75432 }, { "epoch": 14.49, "learning_rate": 0.001, "loss": 2.6505, "step": 75444 }, { "epoch": 14.49, "learning_rate": 0.001, "loss": 2.6577, "step": 75456 }, { "epoch": 14.49, "learning_rate": 0.001, "loss": 2.651, "step": 75468 }, { "epoch": 14.49, "learning_rate": 0.001, "loss": 2.6634, "step": 75480 }, { "epoch": 14.5, "learning_rate": 0.001, "loss": 2.6424, "step": 75492 }, { "epoch": 14.5, "learning_rate": 0.001, "loss": 2.6481, "step": 75504 }, { "epoch": 14.5, "learning_rate": 0.001, "loss": 2.6439, "step": 75516 }, { "epoch": 14.5, "learning_rate": 0.001, "loss": 2.6479, "step": 75528 }, { "epoch": 14.5, "learning_rate": 0.001, "loss": 2.6532, "step": 75540 }, { "epoch": 14.51, "learning_rate": 0.001, "loss": 2.6561, "step": 75552 }, { "epoch": 14.51, "learning_rate": 0.001, "loss": 2.6614, "step": 75564 }, { "epoch": 14.51, "learning_rate": 0.001, "loss": 2.6525, "step": 75576 }, { "epoch": 14.51, "learning_rate": 0.001, "loss": 2.6416, "step": 75588 }, { "epoch": 14.52, "learning_rate": 0.001, "loss": 2.6405, "step": 75600 }, { "epoch": 14.52, "learning_rate": 0.001, "loss": 2.6492, "step": 75612 }, { "epoch": 14.52, "learning_rate": 0.001, "loss": 2.6672, "step": 75624 }, { "epoch": 14.52, "eval_ag_news_accuracy": 0.310375, "eval_ag_news_bleu_score": 4.655412802755985, "eval_ag_news_bleu_score_sem": 0.155004284280549, "eval_ag_news_emb_cos_sim": 0.7930936217308044, "eval_ag_news_emb_cos_sim_sem": 0.007596103458060652, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6820600032806396, "eval_ag_news_n_ngrams_match_1": 13.642, "eval_ag_news_n_ngrams_match_2": 2.982, "eval_ag_news_n_ngrams_match_3": 0.884, "eval_ag_news_num_pred_words": 47.028, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.72814995420812, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3343935770510794, "eval_ag_news_runtime": 10.6119, "eval_ag_news_samples_per_second": 47.117, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.3404060039707562, "eval_ag_news_token_set_f1_sem": 0.004622360860322712, "eval_ag_news_token_set_precision": 0.32387348810661365, "eval_ag_news_token_set_recall": 0.3747796600616977, "eval_ag_news_true_num_tokens": 56.09375, "step": 75625 }, { "epoch": 14.52, "eval_anthropic_toxic_prompts_accuracy": 0.1103125, "eval_anthropic_toxic_prompts_bleu_score": 2.876129619045212, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10880801230260725, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6423047780990601, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01027917380989769, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.360487699508667, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.832, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.736, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.632, "eval_anthropic_toxic_prompts_num_pred_words": 47.348, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.803234777812257, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20049765813978743, "eval_anthropic_toxic_prompts_runtime": 9.4263, "eval_anthropic_toxic_prompts_samples_per_second": 53.043, "eval_anthropic_toxic_prompts_steps_per_second": 0.106, "eval_anthropic_toxic_prompts_token_set_f1": 0.3526566551452242, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006480346989934327, "eval_anthropic_toxic_prompts_token_set_precision": 0.4134845768753208, "eval_anthropic_toxic_prompts_token_set_recall": 0.33684238966629404, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 75625 }, { "epoch": 14.52, "eval_arxiv_accuracy": 0.3369375, "eval_arxiv_bleu_score": 4.119498721662499, "eval_arxiv_bleu_score_sem": 0.11446355754821129, "eval_arxiv_emb_cos_sim": 0.7383588552474976, "eval_arxiv_emb_cos_sim_sem": 0.008242254158403135, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.522167444229126, "eval_arxiv_n_ngrams_match_1": 14.394, "eval_arxiv_n_ngrams_match_2": 2.772, "eval_arxiv_n_ngrams_match_3": 0.606, "eval_arxiv_num_pred_words": 40.548, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.85773374221124, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33948853232039194, "eval_arxiv_runtime": 10.1505, "eval_arxiv_samples_per_second": 49.259, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3376089251969639, "eval_arxiv_token_set_f1_sem": 0.004237650715090648, "eval_arxiv_token_set_precision": 0.28646494354979996, "eval_arxiv_token_set_recall": 0.4263498231370397, "eval_arxiv_true_num_tokens": 64.0, "step": 75625 }, { "epoch": 14.52, "eval_python_code_alpaca_accuracy": 0.1566875, "eval_python_code_alpaca_bleu_score": 4.22506531640581, "eval_python_code_alpaca_bleu_score_sem": 0.13964023687934293, "eval_python_code_alpaca_emb_cos_sim": 0.7337641716003418, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009433625710705657, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9880495071411133, "eval_python_code_alpaca_n_ngrams_match_1": 9.49, "eval_python_code_alpaca_n_ngrams_match_2": 2.686, "eval_python_code_alpaca_n_ngrams_match_3": 0.856, "eval_python_code_alpaca_num_pred_words": 44.048, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.846933412057098, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31379163429124846, "eval_python_code_alpaca_runtime": 9.9596, "eval_python_code_alpaca_samples_per_second": 50.203, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4676294939803047, "eval_python_code_alpaca_token_set_f1_sem": 0.00580676499840515, "eval_python_code_alpaca_token_set_precision": 0.5199176042826318, "eval_python_code_alpaca_token_set_recall": 0.45222107007232876, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 75625 }, { "epoch": 14.52, "eval_wikibio_accuracy": 0.31221875, "eval_wikibio_bleu_score": 5.477373305705958, "eval_wikibio_bleu_score_sem": 0.19590572277088825, "eval_wikibio_emb_cos_sim": 0.7205518484115601, "eval_wikibio_emb_cos_sim_sem": 0.01125361553291464, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8638317584991455, "eval_wikibio_n_ngrams_match_1": 9.856, "eval_wikibio_n_ngrams_match_2": 3.176, "eval_wikibio_n_ngrams_match_3": 1.13, "eval_wikibio_num_pred_words": 36.638, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.64757602986537, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3394769741172859, "eval_wikibio_runtime": 10.2539, "eval_wikibio_samples_per_second": 48.762, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.30748428427846725, "eval_wikibio_token_set_f1_sem": 0.005599875663073089, "eval_wikibio_token_set_precision": 0.3162653082603447, "eval_wikibio_token_set_recall": 0.315662627814797, "eval_wikibio_true_num_tokens": 61.1328125, "step": 75625 }, { "epoch": 14.52, "eval_nq_accuracy": 0.51325, "eval_nq_bleu_score": 11.027304310545896, "eval_nq_bleu_score_sem": 0.46266101652935365, "eval_nq_emb_cos_sim": 0.820936918258667, "eval_nq_emb_cos_sim_sem": 0.00768282915995042, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2858147621154785, "eval_nq_n_ngrams_match_1": 22.58, "eval_nq_n_ngrams_match_2": 8.078, "eval_nq_n_ngrams_match_3": 3.654, "eval_nq_num_pred_words": 49.356, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.833695083125722, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4340510414319413, "eval_nq_runtime": 11.4133, "eval_nq_samples_per_second": 43.808, "eval_nq_steps_per_second": 0.088, "eval_nq_token_set_f1": 0.44971215918807794, "eval_nq_token_set_f1_sem": 0.005032339931197213, "eval_nq_token_set_precision": 0.4082384831646633, "eval_nq_token_set_recall": 0.5088412461961356, "eval_nq_true_num_tokens": 64.0, "step": 75625 }, { "epoch": 14.52, "learning_rate": 0.001, "loss": 2.6491, "step": 75636 }, { "epoch": 14.53, "learning_rate": 0.001, "loss": 2.6519, "step": 75648 }, { "epoch": 14.53, "learning_rate": 0.001, "loss": 2.6525, "step": 75660 }, { "epoch": 14.53, "learning_rate": 0.001, "loss": 2.6472, "step": 75672 }, { "epoch": 14.53, "learning_rate": 0.001, "loss": 2.6535, "step": 75684 }, { "epoch": 14.53, "learning_rate": 0.001, "loss": 2.6565, "step": 75696 }, { "epoch": 14.54, "learning_rate": 0.001, "loss": 2.6594, "step": 75708 }, { "epoch": 14.54, "learning_rate": 0.001, "loss": 2.6529, "step": 75720 }, { "epoch": 14.54, "learning_rate": 0.001, "loss": 2.652, "step": 75732 }, { "epoch": 14.54, "learning_rate": 0.001, "loss": 2.652, "step": 75744 }, { "epoch": 14.55, "learning_rate": 0.001, "loss": 2.6535, "step": 75756 }, { "epoch": 14.55, "learning_rate": 0.001, "loss": 2.6563, "step": 75768 }, { "epoch": 14.55, "learning_rate": 0.001, "loss": 2.6553, "step": 75780 }, { "epoch": 14.55, "learning_rate": 0.001, "loss": 2.6571, "step": 75792 }, { "epoch": 14.56, "learning_rate": 0.001, "loss": 2.6592, "step": 75804 }, { "epoch": 14.56, "learning_rate": 0.001, "loss": 2.644, "step": 75816 }, { "epoch": 14.56, "learning_rate": 0.001, "loss": 2.6573, "step": 75828 }, { "epoch": 14.56, "learning_rate": 0.001, "loss": 2.6635, "step": 75840 }, { "epoch": 14.56, "learning_rate": 0.001, "loss": 2.6497, "step": 75852 }, { "epoch": 14.57, "learning_rate": 0.001, "loss": 2.6554, "step": 75864 }, { "epoch": 14.57, "learning_rate": 0.001, "loss": 2.6461, "step": 75876 }, { "epoch": 14.57, "learning_rate": 0.001, "loss": 2.653, "step": 75888 }, { "epoch": 14.57, "learning_rate": 0.001, "loss": 2.6535, "step": 75900 }, { "epoch": 14.58, "learning_rate": 0.001, "loss": 2.6564, "step": 75912 }, { "epoch": 14.58, "learning_rate": 0.001, "loss": 2.6555, "step": 75924 }, { "epoch": 14.58, "learning_rate": 0.001, "loss": 2.6497, "step": 75936 }, { "epoch": 14.58, "learning_rate": 0.001, "loss": 2.6697, "step": 75948 }, { "epoch": 14.59, "learning_rate": 0.001, "loss": 2.6526, "step": 75960 }, { "epoch": 14.59, "learning_rate": 0.001, "loss": 2.6437, "step": 75972 }, { "epoch": 14.59, "learning_rate": 0.001, "loss": 2.648, "step": 75984 }, { "epoch": 14.59, "learning_rate": 0.001, "loss": 2.6512, "step": 75996 }, { "epoch": 14.59, "learning_rate": 0.001, "loss": 2.6476, "step": 76008 }, { "epoch": 14.6, "learning_rate": 0.001, "loss": 2.6448, "step": 76020 }, { "epoch": 14.6, "learning_rate": 0.001, "loss": 2.6508, "step": 76032 }, { "epoch": 14.6, "learning_rate": 0.001, "loss": 2.644, "step": 76044 }, { "epoch": 14.6, "learning_rate": 0.001, "loss": 2.6455, "step": 76056 }, { "epoch": 14.61, "learning_rate": 0.001, "loss": 2.6504, "step": 76068 }, { "epoch": 14.61, "learning_rate": 0.001, "loss": 2.6489, "step": 76080 }, { "epoch": 14.61, "learning_rate": 0.001, "loss": 2.6468, "step": 76092 }, { "epoch": 14.61, "learning_rate": 0.001, "loss": 2.655, "step": 76104 }, { "epoch": 14.62, "learning_rate": 0.001, "loss": 2.6479, "step": 76116 }, { "epoch": 14.62, "learning_rate": 0.001, "loss": 2.6597, "step": 76128 }, { "epoch": 14.62, "learning_rate": 0.001, "loss": 2.6626, "step": 76140 }, { "epoch": 14.62, "learning_rate": 0.001, "loss": 2.642, "step": 76152 }, { "epoch": 14.62, "learning_rate": 0.001, "loss": 2.6433, "step": 76164 }, { "epoch": 14.63, "learning_rate": 0.001, "loss": 2.6513, "step": 76176 }, { "epoch": 14.63, "learning_rate": 0.001, "loss": 2.6539, "step": 76188 }, { "epoch": 14.63, "learning_rate": 0.001, "loss": 2.6606, "step": 76200 }, { "epoch": 14.63, "learning_rate": 0.001, "loss": 2.6458, "step": 76212 }, { "epoch": 14.64, "learning_rate": 0.001, "loss": 2.653, "step": 76224 }, { "epoch": 14.64, "learning_rate": 0.001, "loss": 2.6528, "step": 76236 }, { "epoch": 14.64, "learning_rate": 0.001, "loss": 2.6506, "step": 76248 }, { "epoch": 14.64, "eval_ag_news_accuracy": 0.31259375, "eval_ag_news_bleu_score": 4.651526411582217, "eval_ag_news_bleu_score_sem": 0.14377521332499338, "eval_ag_news_emb_cos_sim": 0.7970424294471741, "eval_ag_news_emb_cos_sim_sem": 0.0075556563129248605, "eval_ag_news_emb_top1_equal": 0.1953125, "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6626620292663574, "eval_ag_news_n_ngrams_match_1": 13.636, "eval_ag_news_n_ngrams_match_2": 2.872, "eval_ag_news_n_ngrams_match_3": 0.808, "eval_ag_news_num_pred_words": 46.364, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.964930719155106, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33827477090693736, "eval_ag_news_runtime": 9.8257, "eval_ag_news_samples_per_second": 50.887, "eval_ag_news_steps_per_second": 0.102, "eval_ag_news_token_set_f1": 0.342507602151011, "eval_ag_news_token_set_f1_sem": 0.0043908813057337, "eval_ag_news_token_set_precision": 0.32307866257631845, "eval_ag_news_token_set_recall": 0.38145538280935165, "eval_ag_news_true_num_tokens": 56.09375, "step": 76250 }, { "epoch": 14.64, "eval_anthropic_toxic_prompts_accuracy": 0.11034375, "eval_anthropic_toxic_prompts_bleu_score": 2.921761347309161, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10688136883654988, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6543354988098145, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008875401238890994, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3178293704986572, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.932, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.804, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.66, "eval_anthropic_toxic_prompts_num_pred_words": 47.764, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.600375301156884, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2025388716957945, "eval_anthropic_toxic_prompts_runtime": 10.0384, "eval_anthropic_toxic_prompts_samples_per_second": 49.809, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.34983621316398883, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00643097524341174, "eval_anthropic_toxic_prompts_token_set_precision": 0.4199480957294115, "eval_anthropic_toxic_prompts_token_set_recall": 0.3266485123791543, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 76250 }, { "epoch": 14.64, "eval_arxiv_accuracy": 0.337625, "eval_arxiv_bleu_score": 3.9350191136695796, "eval_arxiv_bleu_score_sem": 0.10507292675712303, "eval_arxiv_emb_cos_sim": 0.7389769554138184, "eval_arxiv_emb_cos_sim_sem": 0.007970572280179373, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.518223762512207, "eval_arxiv_n_ngrams_match_1": 14.144, "eval_arxiv_n_ngrams_match_2": 2.62, "eval_arxiv_n_ngrams_match_3": 0.546, "eval_arxiv_num_pred_words": 39.208, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.724472559234606, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.338280011727951, "eval_arxiv_runtime": 10.148, "eval_arxiv_samples_per_second": 49.271, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3326526064936905, "eval_arxiv_token_set_f1_sem": 0.004472971109645054, "eval_arxiv_token_set_precision": 0.2807206300186895, "eval_arxiv_token_set_recall": 0.4294484005680578, "eval_arxiv_true_num_tokens": 64.0, "step": 76250 }, { "epoch": 14.64, "eval_python_code_alpaca_accuracy": 0.15478125, "eval_python_code_alpaca_bleu_score": 4.2807724645856595, "eval_python_code_alpaca_bleu_score_sem": 0.1378699381922166, "eval_python_code_alpaca_emb_cos_sim": 0.7384339570999146, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009733584173013182, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.979473114013672, "eval_python_code_alpaca_n_ngrams_match_1": 9.456, "eval_python_code_alpaca_n_ngrams_match_2": 2.67, "eval_python_code_alpaca_n_ngrams_match_3": 0.838, "eval_python_code_alpaca_num_pred_words": 43.502, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.677446142346987, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31477142636234423, "eval_python_code_alpaca_runtime": 10.2217, "eval_python_code_alpaca_samples_per_second": 48.916, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.466077417367089, "eval_python_code_alpaca_token_set_f1_sem": 0.00570613133320064, "eval_python_code_alpaca_token_set_precision": 0.5140095977577134, "eval_python_code_alpaca_token_set_recall": 0.45111069881096394, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 76250 }, { "epoch": 14.64, "eval_wikibio_accuracy": 0.31628125, "eval_wikibio_bleu_score": 5.472883986001374, "eval_wikibio_bleu_score_sem": 0.20090337891561805, "eval_wikibio_emb_cos_sim": 0.7212482690811157, "eval_wikibio_emb_cos_sim_sem": 0.011007649817727602, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.87813663482666, "eval_wikibio_n_ngrams_match_1": 9.37, "eval_wikibio_n_ngrams_match_2": 3.092, "eval_wikibio_n_ngrams_match_3": 1.13, "eval_wikibio_num_pred_words": 35.434, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 48.33406709082238, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3255289787089194, "eval_wikibio_runtime": 10.2364, "eval_wikibio_samples_per_second": 48.845, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.29590539714172015, "eval_wikibio_token_set_f1_sem": 0.005926313063926945, "eval_wikibio_token_set_precision": 0.30286045743856127, "eval_wikibio_token_set_recall": 0.3065459299318423, "eval_wikibio_true_num_tokens": 61.1328125, "step": 76250 }, { "epoch": 14.64, "eval_nq_accuracy": 0.515625, "eval_nq_bleu_score": 10.963256287688997, "eval_nq_bleu_score_sem": 0.46914593933232945, "eval_nq_emb_cos_sim": 0.8221389651298523, "eval_nq_emb_cos_sim_sem": 0.006978323513666571, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.278108596801758, "eval_nq_n_ngrams_match_1": 22.382, "eval_nq_n_ngrams_match_2": 8.03, "eval_nq_n_ngrams_match_3": 3.596, "eval_nq_num_pred_words": 48.858, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.758206241491852, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43195307926493953, "eval_nq_runtime": 10.4376, "eval_nq_samples_per_second": 47.904, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.44679654179619294, "eval_nq_token_set_f1_sem": 0.004979462209745802, "eval_nq_token_set_precision": 0.4019680391733164, "eval_nq_token_set_recall": 0.5128056506818839, "eval_nq_true_num_tokens": 64.0, "step": 76250 }, { "epoch": 14.64, "learning_rate": 0.001, "loss": 2.6436, "step": 76260 }, { "epoch": 14.65, "learning_rate": 0.001, "loss": 2.6477, "step": 76272 }, { "epoch": 14.65, "learning_rate": 0.001, "loss": 2.6502, "step": 76284 }, { "epoch": 14.65, "learning_rate": 0.001, "loss": 2.6533, "step": 76296 }, { "epoch": 14.65, "learning_rate": 0.001, "loss": 2.6428, "step": 76308 }, { "epoch": 14.65, "learning_rate": 0.001, "loss": 2.6639, "step": 76320 }, { "epoch": 14.66, "learning_rate": 0.001, "loss": 2.6489, "step": 76332 }, { "epoch": 14.66, "learning_rate": 0.001, "loss": 2.6531, "step": 76344 }, { "epoch": 14.66, "learning_rate": 0.001, "loss": 2.6476, "step": 76356 }, { "epoch": 14.66, "learning_rate": 0.001, "loss": 2.6548, "step": 76368 }, { "epoch": 14.67, "learning_rate": 0.001, "loss": 2.6586, "step": 76380 }, { "epoch": 14.67, "learning_rate": 0.001, "loss": 2.6544, "step": 76392 }, { "epoch": 14.67, "learning_rate": 0.001, "loss": 2.6522, "step": 76404 }, { "epoch": 14.67, "learning_rate": 0.001, "loss": 2.6468, "step": 76416 }, { "epoch": 14.68, "learning_rate": 0.001, "loss": 2.6518, "step": 76428 }, { "epoch": 14.68, "learning_rate": 0.001, "loss": 2.6496, "step": 76440 }, { "epoch": 14.68, "learning_rate": 0.001, "loss": 2.6549, "step": 76452 }, { "epoch": 14.68, "learning_rate": 0.001, "loss": 2.6486, "step": 76464 }, { "epoch": 14.68, "learning_rate": 0.001, "loss": 2.651, "step": 76476 }, { "epoch": 14.69, "learning_rate": 0.001, "loss": 2.6531, "step": 76488 }, { "epoch": 14.69, "learning_rate": 0.001, "loss": 2.652, "step": 76500 }, { "epoch": 14.69, "learning_rate": 0.001, "loss": 2.6619, "step": 76512 }, { "epoch": 14.69, "learning_rate": 0.001, "loss": 2.6581, "step": 76524 }, { "epoch": 14.7, "learning_rate": 0.001, "loss": 2.6459, "step": 76536 }, { "epoch": 14.7, "learning_rate": 0.001, "loss": 2.6552, "step": 76548 }, { "epoch": 14.7, "learning_rate": 0.001, "loss": 2.6349, "step": 76560 }, { "epoch": 14.7, "learning_rate": 0.001, "loss": 2.6465, "step": 76572 }, { "epoch": 14.71, "learning_rate": 0.001, "loss": 2.6597, "step": 76584 }, { "epoch": 14.71, "learning_rate": 0.001, "loss": 2.6516, "step": 76596 }, { "epoch": 14.71, "learning_rate": 0.001, "loss": 2.6463, "step": 76608 }, { "epoch": 14.71, "learning_rate": 0.001, "loss": 2.6557, "step": 76620 }, { "epoch": 14.71, "learning_rate": 0.001, "loss": 2.6498, "step": 76632 }, { "epoch": 14.72, "learning_rate": 0.001, "loss": 2.6528, "step": 76644 }, { "epoch": 14.72, "learning_rate": 0.001, "loss": 2.6518, "step": 76656 }, { "epoch": 14.72, "learning_rate": 0.001, "loss": 2.651, "step": 76668 }, { "epoch": 14.72, "learning_rate": 0.001, "loss": 2.6452, "step": 76680 }, { "epoch": 14.73, "learning_rate": 0.001, "loss": 2.6497, "step": 76692 }, { "epoch": 14.73, "learning_rate": 0.001, "loss": 2.6517, "step": 76704 }, { "epoch": 14.73, "learning_rate": 0.001, "loss": 2.6475, "step": 76716 }, { "epoch": 14.73, "learning_rate": 0.001, "loss": 2.656, "step": 76728 }, { "epoch": 14.74, "learning_rate": 0.001, "loss": 2.6577, "step": 76740 }, { "epoch": 14.74, "learning_rate": 0.001, "loss": 2.6475, "step": 76752 }, { "epoch": 14.74, "learning_rate": 0.001, "loss": 2.6487, "step": 76764 }, { "epoch": 14.74, "learning_rate": 0.001, "loss": 2.6519, "step": 76776 }, { "epoch": 14.74, "learning_rate": 0.001, "loss": 2.6435, "step": 76788 }, { "epoch": 14.75, "learning_rate": 0.001, "loss": 2.6495, "step": 76800 }, { "epoch": 14.75, "learning_rate": 0.001, "loss": 2.6614, "step": 76812 }, { "epoch": 14.75, "learning_rate": 0.001, "loss": 2.6481, "step": 76824 }, { "epoch": 14.75, "learning_rate": 0.001, "loss": 2.6414, "step": 76836 }, { "epoch": 14.76, "learning_rate": 0.001, "loss": 2.6538, "step": 76848 }, { "epoch": 14.76, "learning_rate": 0.001, "loss": 2.6534, "step": 76860 }, { "epoch": 14.76, "learning_rate": 0.001, "loss": 2.6436, "step": 76872 }, { "epoch": 14.76, "eval_ag_news_accuracy": 0.31259375, "eval_ag_news_bleu_score": 4.414810398139664, "eval_ag_news_bleu_score_sem": 0.14185250923534423, "eval_ag_news_emb_cos_sim": 0.7913942337036133, "eval_ag_news_emb_cos_sim_sem": 0.00782191705275891, "eval_ag_news_emb_top1_equal": 0.1953125, "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6608340740203857, "eval_ag_news_n_ngrams_match_1": 13.346, "eval_ag_news_n_ngrams_match_2": 2.844, "eval_ag_news_n_ngrams_match_3": 0.776, "eval_ag_news_num_pred_words": 46.44, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.89376962909697, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3278349454089131, "eval_ag_news_runtime": 10.3588, "eval_ag_news_samples_per_second": 48.268, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3350232407183741, "eval_ag_news_token_set_f1_sem": 0.004567995271353797, "eval_ag_news_token_set_precision": 0.3177480062232345, "eval_ag_news_token_set_recall": 0.37137679807932966, "eval_ag_news_true_num_tokens": 56.09375, "step": 76875 }, { "epoch": 14.76, "eval_anthropic_toxic_prompts_accuracy": 0.1105, "eval_anthropic_toxic_prompts_bleu_score": 2.940088966186127, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1148997731793882, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6606615781784058, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009694190040545655, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3106913566589355, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.99, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.762, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648, "eval_anthropic_toxic_prompts_num_pred_words": 47.676, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.40406490695189, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.96875, "eval_anthropic_toxic_prompts_rouge_score": 0.20596362120051115, "eval_anthropic_toxic_prompts_runtime": 9.8168, "eval_anthropic_toxic_prompts_samples_per_second": 50.933, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.35098327837978505, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006311093764356088, "eval_anthropic_toxic_prompts_token_set_precision": 0.42332946711302244, "eval_anthropic_toxic_prompts_token_set_recall": 0.3259385185758552, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 76875 }, { "epoch": 14.76, "eval_arxiv_accuracy": 0.33603125, "eval_arxiv_bleu_score": 4.206553556357853, "eval_arxiv_bleu_score_sem": 0.12502509090726233, "eval_arxiv_emb_cos_sim": 0.7460159659385681, "eval_arxiv_emb_cos_sim_sem": 0.006932129250609615, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.51404070854187, "eval_arxiv_n_ngrams_match_1": 14.506, "eval_arxiv_n_ngrams_match_2": 2.766, "eval_arxiv_n_ngrams_match_3": 0.644, "eval_arxiv_num_pred_words": 41.598, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.583695913825984, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34226806225136647, "eval_arxiv_runtime": 10.3506, "eval_arxiv_samples_per_second": 48.306, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.33652455369117645, "eval_arxiv_token_set_f1_sem": 0.004275165082746081, "eval_arxiv_token_set_precision": 0.2867741291553252, "eval_arxiv_token_set_recall": 0.42542004999299515, "eval_arxiv_true_num_tokens": 64.0, "step": 76875 }, { "epoch": 14.76, "eval_python_code_alpaca_accuracy": 0.15621875, "eval_python_code_alpaca_bleu_score": 4.233694138185224, "eval_python_code_alpaca_bleu_score_sem": 0.1312488718986595, "eval_python_code_alpaca_emb_cos_sim": 0.7357479929924011, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008838668284780225, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9835448265075684, "eval_python_code_alpaca_n_ngrams_match_1": 9.292, "eval_python_code_alpaca_n_ngrams_match_2": 2.654, "eval_python_code_alpaca_n_ngrams_match_3": 0.844, "eval_python_code_alpaca_num_pred_words": 43.502, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.75773038190511, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.30978098382559377, "eval_python_code_alpaca_runtime": 10.0546, "eval_python_code_alpaca_samples_per_second": 49.729, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.45611797939762494, "eval_python_code_alpaca_token_set_f1_sem": 0.00574584353012143, "eval_python_code_alpaca_token_set_precision": 0.5062434901685975, "eval_python_code_alpaca_token_set_recall": 0.4386257049856014, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 76875 }, { "epoch": 14.76, "eval_wikibio_accuracy": 0.311625, "eval_wikibio_bleu_score": 5.285030508385922, "eval_wikibio_bleu_score_sem": 0.18725741610015192, "eval_wikibio_emb_cos_sim": 0.735741138458252, "eval_wikibio_emb_cos_sim_sem": 0.008572901453327163, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8608815670013428, "eval_wikibio_n_ngrams_match_1": 9.558, "eval_wikibio_n_ngrams_match_2": 3.09, "eval_wikibio_n_ngrams_match_3": 1.058, "eval_wikibio_num_pred_words": 35.896, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.507213705843746, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33430718083541094, "eval_wikibio_runtime": 10.4937, "eval_wikibio_samples_per_second": 47.648, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.30519041115826034, "eval_wikibio_token_set_f1_sem": 0.0055618153528707, "eval_wikibio_token_set_precision": 0.3107292305233083, "eval_wikibio_token_set_recall": 0.3172764445780841, "eval_wikibio_true_num_tokens": 61.1328125, "step": 76875 }, { "epoch": 14.76, "eval_nq_accuracy": 0.5150625, "eval_nq_bleu_score": 10.942025608957334, "eval_nq_bleu_score_sem": 0.4837637827437225, "eval_nq_emb_cos_sim": 0.8182423710823059, "eval_nq_emb_cos_sim_sem": 0.007722426124126405, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2796990871429443, "eval_nq_n_ngrams_match_1": 22.468, "eval_nq_n_ngrams_match_2": 7.99, "eval_nq_n_ngrams_match_3": 3.58, "eval_nq_num_pred_words": 48.976, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.773738923282064, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43323899516804787, "eval_nq_runtime": 10.9154, "eval_nq_samples_per_second": 45.807, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.45092714167921305, "eval_nq_token_set_f1_sem": 0.004983012150980966, "eval_nq_token_set_precision": 0.40656812132890785, "eval_nq_token_set_recall": 0.5169567217580451, "eval_nq_true_num_tokens": 64.0, "step": 76875 }, { "epoch": 14.76, "learning_rate": 0.001, "loss": 2.6433, "step": 76884 }, { "epoch": 14.76, "learning_rate": 0.001, "loss": 2.6611, "step": 76896 }, { "epoch": 14.77, "learning_rate": 0.001, "loss": 2.65, "step": 76908 }, { "epoch": 14.77, "learning_rate": 0.001, "loss": 2.6356, "step": 76920 }, { "epoch": 14.77, "learning_rate": 0.001, "loss": 2.6575, "step": 76932 }, { "epoch": 14.77, "learning_rate": 0.001, "loss": 2.6613, "step": 76944 }, { "epoch": 14.78, "learning_rate": 0.001, "loss": 2.6567, "step": 76956 }, { "epoch": 14.78, "learning_rate": 0.001, "loss": 2.6579, "step": 76968 }, { "epoch": 14.78, "learning_rate": 0.001, "loss": 2.6578, "step": 76980 }, { "epoch": 14.78, "learning_rate": 0.001, "loss": 2.647, "step": 76992 }, { "epoch": 14.79, "learning_rate": 0.001, "loss": 2.6568, "step": 77004 }, { "epoch": 14.79, "learning_rate": 0.001, "loss": 2.6512, "step": 77016 }, { "epoch": 14.79, "learning_rate": 0.001, "loss": 2.6578, "step": 77028 }, { "epoch": 14.79, "learning_rate": 0.001, "loss": 2.651, "step": 77040 }, { "epoch": 14.79, "learning_rate": 0.001, "loss": 2.6508, "step": 77052 }, { "epoch": 14.8, "learning_rate": 0.001, "loss": 2.6486, "step": 77064 }, { "epoch": 14.8, "learning_rate": 0.001, "loss": 2.6487, "step": 77076 }, { "epoch": 14.8, "learning_rate": 0.001, "loss": 2.6531, "step": 77088 }, { "epoch": 14.8, "learning_rate": 0.001, "loss": 2.647, "step": 77100 }, { "epoch": 14.81, "learning_rate": 0.001, "loss": 2.6501, "step": 77112 }, { "epoch": 14.81, "learning_rate": 0.001, "loss": 2.6495, "step": 77124 }, { "epoch": 14.81, "learning_rate": 0.001, "loss": 2.6586, "step": 77136 }, { "epoch": 14.81, "learning_rate": 0.001, "loss": 2.6559, "step": 77148 }, { "epoch": 14.82, "learning_rate": 0.001, "loss": 2.6559, "step": 77160 }, { "epoch": 14.82, "learning_rate": 0.001, "loss": 2.6496, "step": 77172 }, { "epoch": 14.82, "learning_rate": 0.001, "loss": 2.6541, "step": 77184 }, { "epoch": 14.82, "learning_rate": 0.001, "loss": 2.6519, "step": 77196 }, { "epoch": 14.82, "learning_rate": 0.001, "loss": 2.6363, "step": 77208 }, { "epoch": 14.83, "learning_rate": 0.001, "loss": 2.65, "step": 77220 }, { "epoch": 14.83, "learning_rate": 0.001, "loss": 2.656, "step": 77232 }, { "epoch": 14.83, "learning_rate": 0.001, "loss": 2.6464, "step": 77244 }, { "epoch": 14.83, "learning_rate": 0.001, "loss": 2.643, "step": 77256 }, { "epoch": 14.84, "learning_rate": 0.001, "loss": 2.6504, "step": 77268 }, { "epoch": 14.84, "learning_rate": 0.001, "loss": 2.6555, "step": 77280 }, { "epoch": 14.84, "learning_rate": 0.001, "loss": 2.6524, "step": 77292 }, { "epoch": 14.84, "learning_rate": 0.001, "loss": 2.6602, "step": 77304 }, { "epoch": 14.85, "learning_rate": 0.001, "loss": 2.6592, "step": 77316 }, { "epoch": 14.85, "learning_rate": 0.001, "loss": 2.649, "step": 77328 }, { "epoch": 14.85, "learning_rate": 0.001, "loss": 2.663, "step": 77340 }, { "epoch": 14.85, "learning_rate": 0.001, "loss": 2.6507, "step": 77352 }, { "epoch": 14.85, "learning_rate": 0.001, "loss": 2.6583, "step": 77364 }, { "epoch": 14.86, "learning_rate": 0.001, "loss": 2.6597, "step": 77376 }, { "epoch": 14.86, "learning_rate": 0.001, "loss": 2.6546, "step": 77388 }, { "epoch": 14.86, "learning_rate": 0.001, "loss": 2.6498, "step": 77400 }, { "epoch": 14.86, "learning_rate": 0.001, "loss": 2.643, "step": 77412 }, { "epoch": 14.87, "learning_rate": 0.001, "loss": 2.648, "step": 77424 }, { "epoch": 14.87, "learning_rate": 0.001, "loss": 2.6445, "step": 77436 }, { "epoch": 14.87, "learning_rate": 0.001, "loss": 2.6494, "step": 77448 }, { "epoch": 14.87, "learning_rate": 0.001, "loss": 2.6477, "step": 77460 }, { "epoch": 14.88, "learning_rate": 0.001, "loss": 2.6593, "step": 77472 }, { "epoch": 14.88, "learning_rate": 0.001, "loss": 2.6575, "step": 77484 }, { "epoch": 14.88, "learning_rate": 0.001, "loss": 2.6423, "step": 77496 }, { "epoch": 14.88, "eval_ag_news_accuracy": 0.31459375, "eval_ag_news_bleu_score": 4.631032270171614, "eval_ag_news_bleu_score_sem": 0.15999853452808432, "eval_ag_news_emb_cos_sim": 0.7956146001815796, "eval_ag_news_emb_cos_sim_sem": 0.0075687771537019085, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6569697856903076, "eval_ag_news_n_ngrams_match_1": 13.458, "eval_ag_news_n_ngrams_match_2": 2.888, "eval_ag_news_n_ngrams_match_3": 0.836, "eval_ag_news_num_pred_words": 46.48, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.743762910280346, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3311104250094676, "eval_ag_news_runtime": 10.4358, "eval_ag_news_samples_per_second": 47.912, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.33834260989411025, "eval_ag_news_token_set_f1_sem": 0.00452232620321744, "eval_ag_news_token_set_precision": 0.32190911245664305, "eval_ag_news_token_set_recall": 0.37446189049247963, "eval_ag_news_true_num_tokens": 56.09375, "step": 77500 }, { "epoch": 14.88, "eval_anthropic_toxic_prompts_accuracy": 0.10840625, "eval_anthropic_toxic_prompts_bleu_score": 2.925343675211889, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10653985519582482, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6549015045166016, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009578090619421826, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.340261697769165, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.076, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.834, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.626, "eval_anthropic_toxic_prompts_num_pred_words": 47.494, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.22651255430314, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2062731814551379, "eval_anthropic_toxic_prompts_runtime": 9.8997, "eval_anthropic_toxic_prompts_samples_per_second": 50.507, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.35468858502280093, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006479771315227887, "eval_anthropic_toxic_prompts_token_set_precision": 0.4253265250122434, "eval_anthropic_toxic_prompts_token_set_recall": 0.33424321970252674, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 77500 }, { "epoch": 14.88, "eval_arxiv_accuracy": 0.33821875, "eval_arxiv_bleu_score": 4.08397045862728, "eval_arxiv_bleu_score_sem": 0.11767461607181948, "eval_arxiv_emb_cos_sim": 0.7391788363456726, "eval_arxiv_emb_cos_sim_sem": 0.008188753344030112, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5042643547058105, "eval_arxiv_n_ngrams_match_1": 14.242, "eval_arxiv_n_ngrams_match_2": 2.708, "eval_arxiv_n_ngrams_match_3": 0.616, "eval_arxiv_num_pred_words": 39.75, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.25696951816805, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.33851789306031654, "eval_arxiv_runtime": 10.2365, "eval_arxiv_samples_per_second": 48.845, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.33304753308185286, "eval_arxiv_token_set_f1_sem": 0.004526957743319787, "eval_arxiv_token_set_precision": 0.28434613363972866, "eval_arxiv_token_set_recall": 0.42762750730347915, "eval_arxiv_true_num_tokens": 64.0, "step": 77500 }, { "epoch": 14.88, "eval_python_code_alpaca_accuracy": 0.1551875, "eval_python_code_alpaca_bleu_score": 4.339701917200884, "eval_python_code_alpaca_bleu_score_sem": 0.13338780938360195, "eval_python_code_alpaca_emb_cos_sim": 0.7565209865570068, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007302152897299453, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0038363933563232, "eval_python_code_alpaca_n_ngrams_match_1": 9.646, "eval_python_code_alpaca_n_ngrams_match_2": 2.77, "eval_python_code_alpaca_n_ngrams_match_3": 0.886, "eval_python_code_alpaca_num_pred_words": 44.646, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.162740941399093, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31889391162809305, "eval_python_code_alpaca_runtime": 10.5768, "eval_python_code_alpaca_samples_per_second": 47.273, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.465792894667353, "eval_python_code_alpaca_token_set_f1_sem": 0.005529029250145019, "eval_python_code_alpaca_token_set_precision": 0.5262871313726274, "eval_python_code_alpaca_token_set_recall": 0.43755778015183144, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 77500 }, { "epoch": 14.88, "eval_wikibio_accuracy": 0.31709375, "eval_wikibio_bleu_score": 5.456056450368803, "eval_wikibio_bleu_score_sem": 0.1970489952827261, "eval_wikibio_emb_cos_sim": 0.7289547920227051, "eval_wikibio_emb_cos_sim_sem": 0.009284352768536706, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.841438055038452, "eval_wikibio_n_ngrams_match_1": 9.774, "eval_wikibio_n_ngrams_match_2": 3.138, "eval_wikibio_n_ngrams_match_3": 1.09, "eval_wikibio_num_pred_words": 36.05, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.592428763186305, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3349919510965307, "eval_wikibio_runtime": 11.1729, "eval_wikibio_samples_per_second": 44.751, "eval_wikibio_steps_per_second": 0.09, "eval_wikibio_token_set_f1": 0.3072632426149612, "eval_wikibio_token_set_f1_sem": 0.005599271608403875, "eval_wikibio_token_set_precision": 0.31601679972051183, "eval_wikibio_token_set_recall": 0.3179809555417617, "eval_wikibio_true_num_tokens": 61.1328125, "step": 77500 }, { "epoch": 14.88, "eval_nq_accuracy": 0.51453125, "eval_nq_bleu_score": 11.130354698405046, "eval_nq_bleu_score_sem": 0.46762484990152287, "eval_nq_emb_cos_sim": 0.817885160446167, "eval_nq_emb_cos_sim_sem": 0.007382511132341537, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.277179479598999, "eval_nq_n_ngrams_match_1": 22.494, "eval_nq_n_ngrams_match_2": 8.062, "eval_nq_n_ngrams_match_3": 3.696, "eval_nq_num_pred_words": 49.206, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.749143934829249, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43142035015214286, "eval_nq_runtime": 11.216, "eval_nq_samples_per_second": 44.579, "eval_nq_steps_per_second": 0.089, "eval_nq_token_set_f1": 0.4491553393750031, "eval_nq_token_set_f1_sem": 0.005120027827010909, "eval_nq_token_set_precision": 0.40571971347760305, "eval_nq_token_set_recall": 0.5125443471047076, "eval_nq_true_num_tokens": 64.0, "step": 77500 }, { "epoch": 14.88, "learning_rate": 0.001, "loss": 2.6544, "step": 77508 }, { "epoch": 14.88, "learning_rate": 0.001, "loss": 2.6474, "step": 77520 }, { "epoch": 14.89, "learning_rate": 0.001, "loss": 2.6489, "step": 77532 }, { "epoch": 14.89, "learning_rate": 0.001, "loss": 2.6515, "step": 77544 }, { "epoch": 14.89, "learning_rate": 0.001, "loss": 2.6572, "step": 77556 }, { "epoch": 14.89, "learning_rate": 0.001, "loss": 2.6424, "step": 77568 }, { "epoch": 14.9, "learning_rate": 0.001, "loss": 2.6501, "step": 77580 }, { "epoch": 14.9, "learning_rate": 0.001, "loss": 2.6449, "step": 77592 }, { "epoch": 14.9, "learning_rate": 0.001, "loss": 2.6565, "step": 77604 }, { "epoch": 14.9, "learning_rate": 0.001, "loss": 2.6485, "step": 77616 }, { "epoch": 14.91, "learning_rate": 0.001, "loss": 2.6585, "step": 77628 }, { "epoch": 14.91, "learning_rate": 0.001, "loss": 2.6471, "step": 77640 }, { "epoch": 14.91, "learning_rate": 0.001, "loss": 2.6495, "step": 77652 }, { "epoch": 14.91, "learning_rate": 0.001, "loss": 2.6482, "step": 77664 }, { "epoch": 14.91, "learning_rate": 0.001, "loss": 2.654, "step": 77676 }, { "epoch": 14.92, "learning_rate": 0.001, "loss": 2.6467, "step": 77688 }, { "epoch": 14.92, "learning_rate": 0.001, "loss": 2.6516, "step": 77700 }, { "epoch": 14.92, "learning_rate": 0.001, "loss": 2.6523, "step": 77712 }, { "epoch": 14.92, "learning_rate": 0.001, "loss": 2.6526, "step": 77724 }, { "epoch": 14.93, "learning_rate": 0.001, "loss": 2.6512, "step": 77736 }, { "epoch": 14.93, "learning_rate": 0.001, "loss": 2.6571, "step": 77748 }, { "epoch": 14.93, "learning_rate": 0.001, "loss": 2.6638, "step": 77760 }, { "epoch": 14.93, "learning_rate": 0.001, "loss": 2.6594, "step": 77772 }, { "epoch": 14.94, "learning_rate": 0.001, "loss": 2.6554, "step": 77784 }, { "epoch": 14.94, "learning_rate": 0.001, "loss": 2.6492, "step": 77796 }, { "epoch": 14.94, "learning_rate": 0.001, "loss": 2.6453, "step": 77808 }, { "epoch": 14.94, "learning_rate": 0.001, "loss": 2.6546, "step": 77820 }, { "epoch": 14.94, "learning_rate": 0.001, "loss": 2.6391, "step": 77832 }, { "epoch": 14.95, "learning_rate": 0.001, "loss": 2.6442, "step": 77844 }, { "epoch": 14.95, "learning_rate": 0.001, "loss": 2.6521, "step": 77856 }, { "epoch": 14.95, "learning_rate": 0.001, "loss": 2.6554, "step": 77868 }, { "epoch": 14.95, "learning_rate": 0.001, "loss": 2.6605, "step": 77880 }, { "epoch": 14.96, "learning_rate": 0.001, "loss": 2.6485, "step": 77892 }, { "epoch": 14.96, "learning_rate": 0.001, "loss": 2.6481, "step": 77904 }, { "epoch": 14.96, "learning_rate": 0.001, "loss": 2.6526, "step": 77916 }, { "epoch": 14.96, "learning_rate": 0.001, "loss": 2.6386, "step": 77928 }, { "epoch": 14.97, "learning_rate": 0.001, "loss": 2.6473, "step": 77940 }, { "epoch": 14.97, "learning_rate": 0.001, "loss": 2.6386, "step": 77952 }, { "epoch": 14.97, "learning_rate": 0.001, "loss": 2.6387, "step": 77964 }, { "epoch": 14.97, "learning_rate": 0.001, "loss": 2.6499, "step": 77976 }, { "epoch": 14.97, "learning_rate": 0.001, "loss": 2.6601, "step": 77988 }, { "epoch": 14.98, "learning_rate": 0.001, "loss": 2.6469, "step": 78000 }, { "epoch": 14.98, "learning_rate": 0.001, "loss": 2.6496, "step": 78012 }, { "epoch": 14.98, "learning_rate": 0.001, "loss": 2.6581, "step": 78024 }, { "epoch": 14.98, "learning_rate": 0.001, "loss": 2.6506, "step": 78036 }, { "epoch": 14.99, "learning_rate": 0.001, "loss": 2.6489, "step": 78048 }, { "epoch": 14.99, "learning_rate": 0.001, "loss": 2.6492, "step": 78060 }, { "epoch": 14.99, "learning_rate": 0.001, "loss": 2.6506, "step": 78072 }, { "epoch": 14.99, "learning_rate": 0.001, "loss": 2.6435, "step": 78084 }, { "epoch": 15.0, "learning_rate": 0.001, "loss": 2.6378, "step": 78096 }, { "epoch": 15.0, "learning_rate": 0.001, "loss": 2.6553, "step": 78108 }, { "epoch": 15.0, "learning_rate": 0.001, "loss": 2.6509, "step": 78120 }, { "epoch": 15.0, "eval_ag_news_accuracy": 0.31146875, "eval_ag_news_bleu_score": 4.55687907341995, "eval_ag_news_bleu_score_sem": 0.14168826307002097, "eval_ag_news_emb_cos_sim": 0.8001970052719116, "eval_ag_news_emb_cos_sim_sem": 0.006763832933238567, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6746983528137207, "eval_ag_news_n_ngrams_match_1": 13.494, "eval_ag_news_n_ngrams_match_2": 2.87, "eval_ag_news_n_ngrams_match_3": 0.83, "eval_ag_news_num_pred_words": 46.804, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.43675907541347, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.331305410227578, "eval_ag_news_runtime": 10.2311, "eval_ag_news_samples_per_second": 48.87, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3361638944316805, "eval_ag_news_token_set_f1_sem": 0.004494118201335518, "eval_ag_news_token_set_precision": 0.3196571701029243, "eval_ag_news_token_set_recall": 0.3692811452088858, "eval_ag_news_true_num_tokens": 56.09375, "step": 78125 }, { "epoch": 15.0, "eval_anthropic_toxic_prompts_accuracy": 0.11009375, "eval_anthropic_toxic_prompts_bleu_score": 2.991854800804824, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11648455777747162, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6514008641242981, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009158809819168405, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3317744731903076, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.946, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.796, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.644, "eval_anthropic_toxic_prompts_num_pred_words": 47.222, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.987961553097133, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20275988453763516, "eval_anthropic_toxic_prompts_runtime": 11.1928, "eval_anthropic_toxic_prompts_samples_per_second": 44.672, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.34848142850950203, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064726623770099, "eval_anthropic_toxic_prompts_token_set_precision": 0.4191481559501271, "eval_anthropic_toxic_prompts_token_set_recall": 0.3288319094647738, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 78125 }, { "epoch": 15.0, "eval_arxiv_accuracy": 0.33540625, "eval_arxiv_bleu_score": 4.093313340945823, "eval_arxiv_bleu_score_sem": 0.11741439357679818, "eval_arxiv_emb_cos_sim": 0.7404592037200928, "eval_arxiv_emb_cos_sim_sem": 0.007134437416184446, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5302891731262207, "eval_arxiv_n_ngrams_match_1": 14.54, "eval_arxiv_n_ngrams_match_2": 2.79, "eval_arxiv_n_ngrams_match_3": 0.58, "eval_arxiv_num_pred_words": 40.86, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 34.13383677602791, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3422752678386841, "eval_arxiv_runtime": 14.9195, "eval_arxiv_samples_per_second": 33.513, "eval_arxiv_steps_per_second": 0.067, "eval_arxiv_token_set_f1": 0.3377657060348677, "eval_arxiv_token_set_f1_sem": 0.004207855857039247, "eval_arxiv_token_set_precision": 0.2888183116966359, "eval_arxiv_token_set_recall": 0.42247830133496517, "eval_arxiv_true_num_tokens": 64.0, "step": 78125 }, { "epoch": 15.0, "eval_python_code_alpaca_accuracy": 0.1550625, "eval_python_code_alpaca_bleu_score": 4.390590995116049, "eval_python_code_alpaca_bleu_score_sem": 0.14043490806783718, "eval_python_code_alpaca_emb_cos_sim": 0.7426967024803162, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010412652220587182, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9705452919006348, "eval_python_code_alpaca_n_ngrams_match_1": 9.584, "eval_python_code_alpaca_n_ngrams_match_2": 2.8, "eval_python_code_alpaca_n_ngrams_match_3": 0.93, "eval_python_code_alpaca_num_pred_words": 43.734, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.50255128033689, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31723250056380503, "eval_python_code_alpaca_runtime": 9.7869, "eval_python_code_alpaca_samples_per_second": 51.089, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.46034348301184796, "eval_python_code_alpaca_token_set_f1_sem": 0.005977222950853551, "eval_python_code_alpaca_token_set_precision": 0.5192194511962053, "eval_python_code_alpaca_token_set_recall": 0.4391917466066157, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 78125 }, { "epoch": 15.0, "eval_wikibio_accuracy": 0.31534375, "eval_wikibio_bleu_score": 5.430132314342204, "eval_wikibio_bleu_score_sem": 0.18900363820296573, "eval_wikibio_emb_cos_sim": 0.7391431927680969, "eval_wikibio_emb_cos_sim_sem": 0.010009163976179694, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8963725566864014, "eval_wikibio_n_ngrams_match_1": 9.66, "eval_wikibio_n_ngrams_match_2": 3.18, "eval_wikibio_n_ngrams_match_3": 1.086, "eval_wikibio_num_pred_words": 36.132, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 49.22356915643153, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3387682831770781, "eval_wikibio_runtime": 10.0595, "eval_wikibio_samples_per_second": 49.704, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3051529292868866, "eval_wikibio_token_set_f1_sem": 0.005453279865197709, "eval_wikibio_token_set_precision": 0.3148738837335179, "eval_wikibio_token_set_recall": 0.3126155424927755, "eval_wikibio_true_num_tokens": 61.1328125, "step": 78125 }, { "epoch": 15.0, "eval_nq_accuracy": 0.5146875, "eval_nq_bleu_score": 11.36397842627679, "eval_nq_bleu_score_sem": 0.48049478981311194, "eval_nq_emb_cos_sim": 0.8131399154663086, "eval_nq_emb_cos_sim_sem": 0.007624440628239148, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.277000665664673, "eval_nq_n_ngrams_match_1": 22.468, "eval_nq_n_ngrams_match_2": 8.19, "eval_nq_n_ngrams_match_3": 3.816, "eval_nq_num_pred_words": 49.034, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.747400807898286, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43164996757113144, "eval_nq_runtime": 11.625, "eval_nq_samples_per_second": 43.011, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4488524102037199, "eval_nq_token_set_f1_sem": 0.005137918477550579, "eval_nq_token_set_precision": 0.4047128036308987, "eval_nq_token_set_recall": 0.5131301020274545, "eval_nq_true_num_tokens": 64.0, "step": 78125 }, { "epoch": 15.0, "learning_rate": 0.001, "loss": 2.63, "step": 78132 }, { "epoch": 15.0, "learning_rate": 0.001, "loss": 2.6363, "step": 78144 }, { "epoch": 15.01, "learning_rate": 0.001, "loss": 2.625, "step": 78156 }, { "epoch": 15.01, "learning_rate": 0.001, "loss": 2.6282, "step": 78168 }, { "epoch": 15.01, "learning_rate": 0.001, "loss": 2.6378, "step": 78180 }, { "epoch": 15.01, "learning_rate": 0.001, "loss": 2.6229, "step": 78192 }, { "epoch": 15.02, "learning_rate": 0.001, "loss": 2.6198, "step": 78204 }, { "epoch": 15.02, "learning_rate": 0.001, "loss": 2.632, "step": 78216 }, { "epoch": 15.02, "learning_rate": 0.001, "loss": 2.6313, "step": 78228 }, { "epoch": 15.02, "learning_rate": 0.001, "loss": 2.6262, "step": 78240 }, { "epoch": 15.03, "learning_rate": 0.001, "loss": 2.6321, "step": 78252 }, { "epoch": 15.03, "learning_rate": 0.001, "loss": 2.6267, "step": 78264 }, { "epoch": 15.03, "learning_rate": 0.001, "loss": 2.639, "step": 78276 }, { "epoch": 15.03, "learning_rate": 0.001, "loss": 2.631, "step": 78288 }, { "epoch": 15.03, "learning_rate": 0.001, "loss": 2.6309, "step": 78300 }, { "epoch": 15.04, "learning_rate": 0.001, "loss": 2.6395, "step": 78312 }, { "epoch": 15.04, "learning_rate": 0.001, "loss": 2.6282, "step": 78324 }, { "epoch": 15.04, "learning_rate": 0.001, "loss": 2.6414, "step": 78336 }, { "epoch": 15.04, "learning_rate": 0.001, "loss": 2.6299, "step": 78348 }, { "epoch": 15.05, "learning_rate": 0.001, "loss": 2.6312, "step": 78360 }, { "epoch": 15.05, "learning_rate": 0.001, "loss": 2.6258, "step": 78372 }, { "epoch": 15.05, "learning_rate": 0.001, "loss": 2.6253, "step": 78384 }, { "epoch": 15.05, "learning_rate": 0.001, "loss": 2.6282, "step": 78396 }, { "epoch": 15.06, "learning_rate": 0.001, "loss": 2.6212, "step": 78408 }, { "epoch": 15.06, "learning_rate": 0.001, "loss": 2.6282, "step": 78420 }, { "epoch": 15.06, "learning_rate": 0.001, "loss": 2.6318, "step": 78432 }, { "epoch": 15.06, "learning_rate": 0.001, "loss": 2.6316, "step": 78444 }, { "epoch": 15.06, "learning_rate": 0.001, "loss": 2.6324, "step": 78456 }, { "epoch": 15.07, "learning_rate": 0.001, "loss": 2.6249, "step": 78468 }, { "epoch": 15.07, "learning_rate": 0.001, "loss": 2.6359, "step": 78480 }, { "epoch": 15.07, "learning_rate": 0.001, "loss": 2.6339, "step": 78492 }, { "epoch": 15.07, "learning_rate": 0.001, "loss": 2.6299, "step": 78504 }, { "epoch": 15.08, "learning_rate": 0.001, "loss": 2.6251, "step": 78516 }, { "epoch": 15.08, "learning_rate": 0.001, "loss": 2.6223, "step": 78528 }, { "epoch": 15.08, "learning_rate": 0.001, "loss": 2.6345, "step": 78540 }, { "epoch": 15.08, "learning_rate": 0.001, "loss": 2.6406, "step": 78552 }, { "epoch": 15.09, "learning_rate": 0.001, "loss": 2.6281, "step": 78564 }, { "epoch": 15.09, "learning_rate": 0.001, "loss": 2.6356, "step": 78576 }, { "epoch": 15.09, "learning_rate": 0.001, "loss": 2.6409, "step": 78588 }, { "epoch": 15.09, "learning_rate": 0.001, "loss": 2.6249, "step": 78600 }, { "epoch": 15.09, "learning_rate": 0.001, "loss": 2.6245, "step": 78612 }, { "epoch": 15.1, "learning_rate": 0.001, "loss": 2.6323, "step": 78624 }, { "epoch": 15.1, "learning_rate": 0.001, "loss": 2.6278, "step": 78636 }, { "epoch": 15.1, "learning_rate": 0.001, "loss": 2.6244, "step": 78648 }, { "epoch": 15.1, "learning_rate": 0.001, "loss": 2.6307, "step": 78660 }, { "epoch": 15.11, "learning_rate": 0.001, "loss": 2.6419, "step": 78672 }, { "epoch": 15.11, "learning_rate": 0.001, "loss": 2.6298, "step": 78684 }, { "epoch": 15.11, "learning_rate": 0.001, "loss": 2.6327, "step": 78696 }, { "epoch": 15.11, "learning_rate": 0.001, "loss": 2.6361, "step": 78708 }, { "epoch": 15.12, "learning_rate": 0.001, "loss": 2.6317, "step": 78720 }, { "epoch": 15.12, "learning_rate": 0.001, "loss": 2.6443, "step": 78732 }, { "epoch": 15.12, "learning_rate": 0.001, "loss": 2.6327, "step": 78744 }, { "epoch": 15.12, "eval_ag_news_accuracy": 0.31434375, "eval_ag_news_bleu_score": 4.709331151638334, "eval_ag_news_bleu_score_sem": 0.161182568042613, "eval_ag_news_emb_cos_sim": 0.7881615161895752, "eval_ag_news_emb_cos_sim_sem": 0.008591133258030665, "eval_ag_news_emb_top1_equal": 0.171875, "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.655778169631958, "eval_ag_news_n_ngrams_match_1": 13.542, "eval_ag_news_n_ngrams_match_2": 2.956, "eval_ag_news_n_ngrams_match_3": 0.866, "eval_ag_news_num_pred_words": 46.338, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.69762271639328, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33467655672931806, "eval_ag_news_runtime": 12.0354, "eval_ag_news_samples_per_second": 41.544, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.340935298374678, "eval_ag_news_token_set_f1_sem": 0.004550301569924958, "eval_ag_news_token_set_precision": 0.32078544261833436, "eval_ag_news_token_set_recall": 0.3810620891958097, "eval_ag_news_true_num_tokens": 56.09375, "step": 78750 }, { "epoch": 15.12, "eval_anthropic_toxic_prompts_accuracy": 0.111, "eval_anthropic_toxic_prompts_bleu_score": 2.9513907540760345, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11072114351295238, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6517083644866943, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009353276036742178, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3121449947357178, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.92, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.726, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.616, "eval_anthropic_toxic_prompts_num_pred_words": 46.33, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.44392946646036, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20610230554944842, "eval_anthropic_toxic_prompts_runtime": 13.1437, "eval_anthropic_toxic_prompts_samples_per_second": 38.041, "eval_anthropic_toxic_prompts_steps_per_second": 0.076, "eval_anthropic_toxic_prompts_token_set_f1": 0.3464527101543247, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006493648995109544, "eval_anthropic_toxic_prompts_token_set_precision": 0.41888436420096403, "eval_anthropic_toxic_prompts_token_set_recall": 0.32420068256751244, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 78750 }, { "epoch": 15.12, "eval_arxiv_accuracy": 0.3376875, "eval_arxiv_bleu_score": 4.014688682625429, "eval_arxiv_bleu_score_sem": 0.11804915764060579, "eval_arxiv_emb_cos_sim": 0.7362596988677979, "eval_arxiv_emb_cos_sim_sem": 0.0077642384320272435, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5092623233795166, "eval_arxiv_n_ngrams_match_1": 14.126, "eval_arxiv_n_ngrams_match_2": 2.63, "eval_arxiv_n_ngrams_match_3": 0.602, "eval_arxiv_num_pred_words": 39.79, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.42360287728601, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.338672533833623, "eval_arxiv_runtime": 10.5877, "eval_arxiv_samples_per_second": 47.225, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.3349007315828461, "eval_arxiv_token_set_f1_sem": 0.004199454838761151, "eval_arxiv_token_set_precision": 0.28074879187714413, "eval_arxiv_token_set_recall": 0.43604499348643977, "eval_arxiv_true_num_tokens": 64.0, "step": 78750 }, { "epoch": 15.12, "eval_python_code_alpaca_accuracy": 0.1543125, "eval_python_code_alpaca_bleu_score": 4.124095142196487, "eval_python_code_alpaca_bleu_score_sem": 0.1319102465316636, "eval_python_code_alpaca_emb_cos_sim": 0.7358843088150024, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009029650143362261, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0163207054138184, "eval_python_code_alpaca_n_ngrams_match_1": 9.438, "eval_python_code_alpaca_n_ngrams_match_2": 2.604, "eval_python_code_alpaca_n_ngrams_match_3": 0.78, "eval_python_code_alpaca_num_pred_words": 43.612, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.41603671313439, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3199745161997377, "eval_python_code_alpaca_runtime": 11.583, "eval_python_code_alpaca_samples_per_second": 43.167, "eval_python_code_alpaca_steps_per_second": 0.086, "eval_python_code_alpaca_token_set_f1": 0.46347061144057566, "eval_python_code_alpaca_token_set_f1_sem": 0.005509433724409282, "eval_python_code_alpaca_token_set_precision": 0.5158004890273531, "eval_python_code_alpaca_token_set_recall": 0.4419562188026992, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 78750 }, { "epoch": 15.12, "eval_wikibio_accuracy": 0.31309375, "eval_wikibio_bleu_score": 5.374763453331607, "eval_wikibio_bleu_score_sem": 0.19453951623300408, "eval_wikibio_emb_cos_sim": 0.7150180339813232, "eval_wikibio_emb_cos_sim_sem": 0.011016393971704596, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.891688585281372, "eval_wikibio_n_ngrams_match_1": 9.558, "eval_wikibio_n_ngrams_match_2": 3.062, "eval_wikibio_n_ngrams_match_3": 1.074, "eval_wikibio_num_pred_words": 35.806, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 48.99354649638047, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3336723205016684, "eval_wikibio_runtime": 11.9069, "eval_wikibio_samples_per_second": 41.993, "eval_wikibio_steps_per_second": 0.084, "eval_wikibio_token_set_f1": 0.30434849914422285, "eval_wikibio_token_set_f1_sem": 0.005529010601084642, "eval_wikibio_token_set_precision": 0.308564379616472, "eval_wikibio_token_set_recall": 0.32131592914417434, "eval_wikibio_true_num_tokens": 61.1328125, "step": 78750 }, { "epoch": 15.12, "eval_nq_accuracy": 0.51525, "eval_nq_bleu_score": 10.945980265156468, "eval_nq_bleu_score_sem": 0.4624122544281547, "eval_nq_emb_cos_sim": 0.8205318450927734, "eval_nq_emb_cos_sim_sem": 0.007989117552068674, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.271064519882202, "eval_nq_n_ngrams_match_1": 22.214, "eval_nq_n_ngrams_match_2": 7.958, "eval_nq_n_ngrams_match_3": 3.604, "eval_nq_num_pred_words": 48.75, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.689710214994799, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4305066219500824, "eval_nq_runtime": 11.9682, "eval_nq_samples_per_second": 41.777, "eval_nq_steps_per_second": 0.084, "eval_nq_token_set_f1": 0.44740259154456813, "eval_nq_token_set_f1_sem": 0.0049342745586682946, "eval_nq_token_set_precision": 0.40180395838782174, "eval_nq_token_set_recall": 0.5153171579555549, "eval_nq_true_num_tokens": 64.0, "step": 78750 }, { "epoch": 15.12, "learning_rate": 0.001, "loss": 2.6273, "step": 78756 }, { "epoch": 15.12, "learning_rate": 0.001, "loss": 2.636, "step": 78768 }, { "epoch": 15.13, "learning_rate": 0.001, "loss": 2.6366, "step": 78780 }, { "epoch": 15.13, "learning_rate": 0.001, "loss": 2.6433, "step": 78792 }, { "epoch": 15.13, "learning_rate": 0.001, "loss": 2.6316, "step": 78804 }, { "epoch": 15.13, "learning_rate": 0.001, "loss": 2.6306, "step": 78816 }, { "epoch": 15.14, "learning_rate": 0.001, "loss": 2.6322, "step": 78828 }, { "epoch": 15.14, "learning_rate": 0.001, "loss": 2.6302, "step": 78840 }, { "epoch": 15.14, "learning_rate": 0.001, "loss": 2.6379, "step": 78852 }, { "epoch": 15.14, "learning_rate": 0.001, "loss": 2.6326, "step": 78864 }, { "epoch": 15.15, "learning_rate": 0.001, "loss": 2.6292, "step": 78876 }, { "epoch": 15.15, "learning_rate": 0.001, "loss": 2.6304, "step": 78888 }, { "epoch": 15.15, "learning_rate": 0.001, "loss": 2.6343, "step": 78900 }, { "epoch": 15.15, "learning_rate": 0.001, "loss": 2.6395, "step": 78912 }, { "epoch": 15.15, "learning_rate": 0.001, "loss": 2.6331, "step": 78924 }, { "epoch": 15.16, "learning_rate": 0.001, "loss": 2.6351, "step": 78936 }, { "epoch": 15.16, "learning_rate": 0.001, "loss": 2.635, "step": 78948 }, { "epoch": 15.16, "learning_rate": 0.001, "loss": 2.6383, "step": 78960 }, { "epoch": 15.16, "learning_rate": 0.001, "loss": 2.635, "step": 78972 }, { "epoch": 15.17, "learning_rate": 0.001, "loss": 2.6341, "step": 78984 }, { "epoch": 15.17, "learning_rate": 0.001, "loss": 2.6361, "step": 78996 }, { "epoch": 15.17, "learning_rate": 0.001, "loss": 2.6228, "step": 79008 }, { "epoch": 15.17, "learning_rate": 0.001, "loss": 2.6412, "step": 79020 }, { "epoch": 15.18, "learning_rate": 0.001, "loss": 2.6222, "step": 79032 }, { "epoch": 15.18, "learning_rate": 0.001, "loss": 2.6325, "step": 79044 }, { "epoch": 15.18, "learning_rate": 0.001, "loss": 2.6277, "step": 79056 }, { "epoch": 15.18, "learning_rate": 0.001, "loss": 2.6319, "step": 79068 }, { "epoch": 15.18, "learning_rate": 0.001, "loss": 2.6206, "step": 79080 }, { "epoch": 15.19, "learning_rate": 0.001, "loss": 2.6344, "step": 79092 }, { "epoch": 15.19, "learning_rate": 0.001, "loss": 2.6401, "step": 79104 }, { "epoch": 15.19, "learning_rate": 0.001, "loss": 2.635, "step": 79116 }, { "epoch": 15.19, "learning_rate": 0.001, "loss": 2.6352, "step": 79128 }, { "epoch": 15.2, "learning_rate": 0.001, "loss": 2.6317, "step": 79140 }, { "epoch": 15.2, "learning_rate": 0.001, "loss": 2.6378, "step": 79152 }, { "epoch": 15.2, "learning_rate": 0.001, "loss": 2.6406, "step": 79164 }, { "epoch": 15.2, "learning_rate": 0.001, "loss": 2.6368, "step": 79176 }, { "epoch": 15.21, "learning_rate": 0.001, "loss": 2.6281, "step": 79188 }, { "epoch": 15.21, "learning_rate": 0.001, "loss": 2.6268, "step": 79200 }, { "epoch": 15.21, "learning_rate": 0.001, "loss": 2.6496, "step": 79212 }, { "epoch": 15.21, "learning_rate": 0.001, "loss": 2.6362, "step": 79224 }, { "epoch": 15.21, "learning_rate": 0.001, "loss": 2.6423, "step": 79236 }, { "epoch": 15.22, "learning_rate": 0.001, "loss": 2.6391, "step": 79248 }, { "epoch": 15.22, "learning_rate": 0.001, "loss": 2.6337, "step": 79260 }, { "epoch": 15.22, "learning_rate": 0.001, "loss": 2.6401, "step": 79272 }, { "epoch": 15.22, "learning_rate": 0.001, "loss": 2.6316, "step": 79284 }, { "epoch": 15.23, "learning_rate": 0.001, "loss": 2.6469, "step": 79296 }, { "epoch": 15.23, "learning_rate": 0.001, "loss": 2.6277, "step": 79308 }, { "epoch": 15.23, "learning_rate": 0.001, "loss": 2.6353, "step": 79320 }, { "epoch": 15.23, "learning_rate": 0.001, "loss": 2.6423, "step": 79332 }, { "epoch": 15.24, "learning_rate": 0.001, "loss": 2.635, "step": 79344 }, { "epoch": 15.24, "learning_rate": 0.001, "loss": 2.6333, "step": 79356 }, { "epoch": 15.24, "learning_rate": 0.001, "loss": 2.6291, "step": 79368 }, { "epoch": 15.24, "eval_ag_news_accuracy": 0.31440625, "eval_ag_news_bleu_score": 4.601826717340538, "eval_ag_news_bleu_score_sem": 0.14277041737178586, "eval_ag_news_emb_cos_sim": 0.7933106422424316, "eval_ag_news_emb_cos_sim_sem": 0.007978230494262705, "eval_ag_news_emb_top1_equal": 0.171875, "eval_ag_news_emb_top1_equal_sem": 0.03347745514062371, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6685566902160645, "eval_ag_news_n_ngrams_match_1": 13.466, "eval_ag_news_n_ngrams_match_2": 2.908, "eval_ag_news_n_ngrams_match_3": 0.824, "eval_ag_news_num_pred_words": 46.846, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.195294064545095, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33121353065097914, "eval_ag_news_runtime": 11.5225, "eval_ag_news_samples_per_second": 43.393, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.33602591897211426, "eval_ag_news_token_set_f1_sem": 0.004406017253693179, "eval_ag_news_token_set_precision": 0.3194280006993723, "eval_ag_news_token_set_recall": 0.36841965570753565, "eval_ag_news_true_num_tokens": 56.09375, "step": 79375 }, { "epoch": 15.24, "eval_anthropic_toxic_prompts_accuracy": 0.11059375, "eval_anthropic_toxic_prompts_bleu_score": 3.0523709849639213, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11754473206210375, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6514561176300049, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009804986861291208, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3238117694854736, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.06, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.822, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.678, "eval_anthropic_toxic_prompts_num_pred_words": 47.48, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.765986641027126, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20768753960850322, "eval_anthropic_toxic_prompts_runtime": 10.6024, "eval_anthropic_toxic_prompts_samples_per_second": 47.159, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.34962818427770315, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065926318618090875, "eval_anthropic_toxic_prompts_token_set_precision": 0.42740865047362764, "eval_anthropic_toxic_prompts_token_set_recall": 0.3224157869496725, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 79375 }, { "epoch": 15.24, "eval_arxiv_accuracy": 0.3366875, "eval_arxiv_bleu_score": 4.08733678741479, "eval_arxiv_bleu_score_sem": 0.1179969326961493, "eval_arxiv_emb_cos_sim": 0.7434097528457642, "eval_arxiv_emb_cos_sim_sem": 0.008391037143458147, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.522575616836548, "eval_arxiv_n_ngrams_match_1": 14.44, "eval_arxiv_n_ngrams_match_2": 2.752, "eval_arxiv_n_ngrams_match_3": 0.624, "eval_arxiv_num_pred_words": 40.772, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.87155636248976, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34348354748856513, "eval_arxiv_runtime": 10.197, "eval_arxiv_samples_per_second": 49.034, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.33744360938819495, "eval_arxiv_token_set_f1_sem": 0.004397577131159572, "eval_arxiv_token_set_precision": 0.2867417233004457, "eval_arxiv_token_set_recall": 0.43558523529975657, "eval_arxiv_true_num_tokens": 64.0, "step": 79375 }, { "epoch": 15.24, "eval_python_code_alpaca_accuracy": 0.15625, "eval_python_code_alpaca_bleu_score": 4.382676426037081, "eval_python_code_alpaca_bleu_score_sem": 0.13415464242537486, "eval_python_code_alpaca_emb_cos_sim": 0.7466802597045898, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008349086541210632, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9901957511901855, "eval_python_code_alpaca_n_ngrams_match_1": 9.736, "eval_python_code_alpaca_n_ngrams_match_2": 2.78, "eval_python_code_alpaca_n_ngrams_match_3": 0.874, "eval_python_code_alpaca_num_pred_words": 44.23, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.889575518600008, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32238036846327545, "eval_python_code_alpaca_runtime": 10.6375, "eval_python_code_alpaca_samples_per_second": 47.004, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.467201210929703, "eval_python_code_alpaca_token_set_f1_sem": 0.005347815623137397, "eval_python_code_alpaca_token_set_precision": 0.531586194251142, "eval_python_code_alpaca_token_set_recall": 0.43667802521573623, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 79375 }, { "epoch": 15.24, "eval_wikibio_accuracy": 0.31775, "eval_wikibio_bleu_score": 5.685628954184466, "eval_wikibio_bleu_score_sem": 0.1990435046321838, "eval_wikibio_emb_cos_sim": 0.7299209833145142, "eval_wikibio_emb_cos_sim_sem": 0.009531472253701729, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.853386163711548, "eval_wikibio_n_ngrams_match_1": 9.868, "eval_wikibio_n_ngrams_match_2": 3.306, "eval_wikibio_n_ngrams_match_3": 1.198, "eval_wikibio_num_pred_words": 36.71, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.15245915503024, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33907118076742054, "eval_wikibio_runtime": 11.1945, "eval_wikibio_samples_per_second": 44.665, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.3108423995817786, "eval_wikibio_token_set_f1_sem": 0.00566353643505371, "eval_wikibio_token_set_precision": 0.3189239347665758, "eval_wikibio_token_set_recall": 0.31872158424909264, "eval_wikibio_true_num_tokens": 61.1328125, "step": 79375 }, { "epoch": 15.24, "eval_nq_accuracy": 0.51615625, "eval_nq_bleu_score": 11.358283890189858, "eval_nq_bleu_score_sem": 0.4797350033308438, "eval_nq_emb_cos_sim": 0.8144046664237976, "eval_nq_emb_cos_sim_sem": 0.008683270220142527, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.268126964569092, "eval_nq_n_ngrams_match_1": 22.7, "eval_nq_n_ngrams_match_2": 8.308, "eval_nq_n_ngrams_match_3": 3.832, "eval_nq_num_pred_words": 49.374, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.661287921738067, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4353169928740548, "eval_nq_runtime": 10.8621, "eval_nq_samples_per_second": 46.032, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.4516246166839993, "eval_nq_token_set_f1_sem": 0.005137952527401246, "eval_nq_token_set_precision": 0.4084688851511734, "eval_nq_token_set_recall": 0.5148402668331971, "eval_nq_true_num_tokens": 64.0, "step": 79375 }, { "epoch": 15.24, "learning_rate": 0.001, "loss": 2.6231, "step": 79380 }, { "epoch": 15.24, "learning_rate": 0.001, "loss": 2.6344, "step": 79392 }, { "epoch": 15.25, "learning_rate": 0.001, "loss": 2.6367, "step": 79404 }, { "epoch": 15.25, "learning_rate": 0.001, "loss": 2.6358, "step": 79416 }, { "epoch": 15.25, "learning_rate": 0.001, "loss": 2.642, "step": 79428 }, { "epoch": 15.25, "learning_rate": 0.001, "loss": 2.6387, "step": 79440 }, { "epoch": 15.26, "learning_rate": 0.001, "loss": 2.6316, "step": 79452 }, { "epoch": 15.26, "learning_rate": 0.001, "loss": 2.6316, "step": 79464 }, { "epoch": 15.26, "learning_rate": 0.001, "loss": 2.638, "step": 79476 }, { "epoch": 15.26, "learning_rate": 0.001, "loss": 2.6454, "step": 79488 }, { "epoch": 15.26, "learning_rate": 0.001, "loss": 2.6308, "step": 79500 }, { "epoch": 15.27, "learning_rate": 0.001, "loss": 2.6296, "step": 79512 }, { "epoch": 15.27, "learning_rate": 0.001, "loss": 2.6416, "step": 79524 }, { "epoch": 15.27, "learning_rate": 0.001, "loss": 2.6402, "step": 79536 }, { "epoch": 15.27, "learning_rate": 0.001, "loss": 2.6433, "step": 79548 }, { "epoch": 15.28, "learning_rate": 0.001, "loss": 2.6407, "step": 79560 }, { "epoch": 15.28, "learning_rate": 0.001, "loss": 2.6405, "step": 79572 }, { "epoch": 15.28, "learning_rate": 0.001, "loss": 2.6403, "step": 79584 }, { "epoch": 15.28, "learning_rate": 0.001, "loss": 2.6353, "step": 79596 }, { "epoch": 15.29, "learning_rate": 0.001, "loss": 2.6438, "step": 79608 }, { "epoch": 15.29, "learning_rate": 0.001, "loss": 2.6318, "step": 79620 }, { "epoch": 15.29, "learning_rate": 0.001, "loss": 2.6454, "step": 79632 }, { "epoch": 15.29, "learning_rate": 0.001, "loss": 2.6475, "step": 79644 }, { "epoch": 15.29, "learning_rate": 0.001, "loss": 2.6392, "step": 79656 }, { "epoch": 15.3, "learning_rate": 0.001, "loss": 2.6461, "step": 79668 }, { "epoch": 15.3, "learning_rate": 0.001, "loss": 2.6252, "step": 79680 }, { "epoch": 15.3, "learning_rate": 0.001, "loss": 2.6353, "step": 79692 }, { "epoch": 15.3, "learning_rate": 0.001, "loss": 2.6367, "step": 79704 }, { "epoch": 15.31, "learning_rate": 0.001, "loss": 2.639, "step": 79716 }, { "epoch": 15.31, "learning_rate": 0.001, "loss": 2.6436, "step": 79728 }, { "epoch": 15.31, "learning_rate": 0.001, "loss": 2.6317, "step": 79740 }, { "epoch": 15.31, "learning_rate": 0.001, "loss": 2.632, "step": 79752 }, { "epoch": 15.32, "learning_rate": 0.001, "loss": 2.6428, "step": 79764 }, { "epoch": 15.32, "learning_rate": 0.001, "loss": 2.6454, "step": 79776 }, { "epoch": 15.32, "learning_rate": 0.001, "loss": 2.6387, "step": 79788 }, { "epoch": 15.32, "learning_rate": 0.001, "loss": 2.6404, "step": 79800 }, { "epoch": 15.32, "learning_rate": 0.001, "loss": 2.653, "step": 79812 }, { "epoch": 15.33, "learning_rate": 0.001, "loss": 2.6435, "step": 79824 }, { "epoch": 15.33, "learning_rate": 0.001, "loss": 2.6379, "step": 79836 }, { "epoch": 15.33, "learning_rate": 0.001, "loss": 2.6424, "step": 79848 }, { "epoch": 15.33, "learning_rate": 0.001, "loss": 2.6396, "step": 79860 }, { "epoch": 15.34, "learning_rate": 0.001, "loss": 2.6356, "step": 79872 }, { "epoch": 15.34, "learning_rate": 0.001, "loss": 2.6372, "step": 79884 }, { "epoch": 15.34, "learning_rate": 0.001, "loss": 2.6468, "step": 79896 }, { "epoch": 15.34, "learning_rate": 0.001, "loss": 2.6345, "step": 79908 }, { "epoch": 15.35, "learning_rate": 0.001, "loss": 2.6342, "step": 79920 }, { "epoch": 15.35, "learning_rate": 0.001, "loss": 2.6209, "step": 79932 }, { "epoch": 15.35, "learning_rate": 0.001, "loss": 2.6402, "step": 79944 }, { "epoch": 15.35, "learning_rate": 0.001, "loss": 2.6446, "step": 79956 }, { "epoch": 15.35, "learning_rate": 0.001, "loss": 2.64, "step": 79968 }, { "epoch": 15.36, "learning_rate": 0.001, "loss": 2.6435, "step": 79980 }, { "epoch": 15.36, "learning_rate": 0.001, "loss": 2.6394, "step": 79992 }, { "epoch": 15.36, "eval_ag_news_accuracy": 0.31578125, "eval_ag_news_bleu_score": 4.730361594997509, "eval_ag_news_bleu_score_sem": 0.15140771371019152, "eval_ag_news_emb_cos_sim": 0.788294792175293, "eval_ag_news_emb_cos_sim_sem": 0.008205163614624533, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6438395977020264, "eval_ag_news_n_ngrams_match_1": 13.59, "eval_ag_news_n_ngrams_match_2": 3.024, "eval_ag_news_n_ngrams_match_3": 0.874, "eval_ag_news_num_pred_words": 46.118, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.23837519851944, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3362795592336469, "eval_ag_news_runtime": 10.2423, "eval_ag_news_samples_per_second": 48.817, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.34467688354510934, "eval_ag_news_token_set_f1_sem": 0.00445724251939383, "eval_ag_news_token_set_precision": 0.32438450030726224, "eval_ag_news_token_set_recall": 0.39059500748455456, "eval_ag_news_true_num_tokens": 56.09375, "step": 80000 }, { "epoch": 15.36, "eval_anthropic_toxic_prompts_accuracy": 0.11084375, "eval_anthropic_toxic_prompts_bleu_score": 2.9742435628419184, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10952665715027296, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6537163257598877, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009910595668402872, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3290483951568604, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.07, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.838, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.628, "eval_anthropic_toxic_prompts_num_pred_words": 46.888, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.911768087758325, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20861706374940747, "eval_anthropic_toxic_prompts_runtime": 9.4752, "eval_anthropic_toxic_prompts_samples_per_second": 52.769, "eval_anthropic_toxic_prompts_steps_per_second": 0.106, "eval_anthropic_toxic_prompts_token_set_f1": 0.3496101650782922, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00622234355091113, "eval_anthropic_toxic_prompts_token_set_precision": 0.4268718953068824, "eval_anthropic_toxic_prompts_token_set_recall": 0.32234484236400307, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 80000 }, { "epoch": 15.36, "eval_arxiv_accuracy": 0.335375, "eval_arxiv_bleu_score": 4.0897295226619725, "eval_arxiv_bleu_score_sem": 0.1183274460994473, "eval_arxiv_emb_cos_sim": 0.7445273399353027, "eval_arxiv_emb_cos_sim_sem": 0.008040803417777699, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.506622552871704, "eval_arxiv_n_ngrams_match_1": 14.416, "eval_arxiv_n_ngrams_match_2": 2.752, "eval_arxiv_n_ngrams_match_3": 0.6, "eval_arxiv_num_pred_words": 40.114, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.335488588064585, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34368532984699696, "eval_arxiv_runtime": 11.5191, "eval_arxiv_samples_per_second": 43.406, "eval_arxiv_steps_per_second": 0.087, "eval_arxiv_token_set_f1": 0.3377013275927523, "eval_arxiv_token_set_f1_sem": 0.004391542781335758, "eval_arxiv_token_set_precision": 0.28551767564777564, "eval_arxiv_token_set_recall": 0.4328621113439869, "eval_arxiv_true_num_tokens": 64.0, "step": 80000 }, { "epoch": 15.36, "eval_python_code_alpaca_accuracy": 0.15671875, "eval_python_code_alpaca_bleu_score": 4.158878141786196, "eval_python_code_alpaca_bleu_score_sem": 0.12894559423333093, "eval_python_code_alpaca_emb_cos_sim": 0.7361272573471069, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008738172417228539, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9702227115631104, "eval_python_code_alpaca_n_ngrams_match_1": 9.498, "eval_python_code_alpaca_n_ngrams_match_2": 2.684, "eval_python_code_alpaca_n_ngrams_match_3": 0.82, "eval_python_code_alpaca_num_pred_words": 43.538, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.49626115535216, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3163294556758225, "eval_python_code_alpaca_runtime": 10.0366, "eval_python_code_alpaca_samples_per_second": 49.818, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.461687627779369, "eval_python_code_alpaca_token_set_f1_sem": 0.0059686049865851345, "eval_python_code_alpaca_token_set_precision": 0.5167242553149951, "eval_python_code_alpaca_token_set_recall": 0.43865666310553725, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 80000 }, { "epoch": 15.36, "eval_wikibio_accuracy": 0.31803125, "eval_wikibio_bleu_score": 5.205853509318029, "eval_wikibio_bleu_score_sem": 0.1989527554720283, "eval_wikibio_emb_cos_sim": 0.7154920101165771, "eval_wikibio_emb_cos_sim_sem": 0.010590209649373992, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.805198907852173, "eval_wikibio_n_ngrams_match_1": 9.166, "eval_wikibio_n_ngrams_match_2": 2.96, "eval_wikibio_n_ngrams_match_3": 1.016, "eval_wikibio_num_pred_words": 34.906, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.93418698679945, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3219322838921782, "eval_wikibio_runtime": 10.5113, "eval_wikibio_samples_per_second": 47.568, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.29404700280737817, "eval_wikibio_token_set_f1_sem": 0.006054734298112563, "eval_wikibio_token_set_precision": 0.2975466241182733, "eval_wikibio_token_set_recall": 0.31098053412762106, "eval_wikibio_true_num_tokens": 61.1328125, "step": 80000 }, { "epoch": 15.36, "eval_nq_accuracy": 0.5138125, "eval_nq_bleu_score": 11.350753812900662, "eval_nq_bleu_score_sem": 0.4824661409085098, "eval_nq_emb_cos_sim": 0.8283723592758179, "eval_nq_emb_cos_sim_sem": 0.006872088430727814, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2686431407928467, "eval_nq_n_ngrams_match_1": 22.62, "eval_nq_n_ngrams_match_2": 8.246, "eval_nq_n_ngrams_match_3": 3.792, "eval_nq_num_pred_words": 49.064, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.666276136142201, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4328425392537225, "eval_nq_runtime": 10.1429, "eval_nq_samples_per_second": 49.295, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.4511458695487511, "eval_nq_token_set_f1_sem": 0.005015187650887773, "eval_nq_token_set_precision": 0.4072391617969803, "eval_nq_token_set_recall": 0.5148671078914516, "eval_nq_true_num_tokens": 64.0, "step": 80000 }, { "epoch": 15.36, "learning_rate": 0.001, "loss": 2.6269, "step": 80004 }, { "epoch": 15.36, "learning_rate": 0.001, "loss": 2.6347, "step": 80016 }, { "epoch": 15.37, "learning_rate": 0.001, "loss": 2.629, "step": 80028 }, { "epoch": 15.37, "learning_rate": 0.001, "loss": 2.636, "step": 80040 }, { "epoch": 15.37, "learning_rate": 0.001, "loss": 2.6358, "step": 80052 }, { "epoch": 15.37, "learning_rate": 0.001, "loss": 2.6285, "step": 80064 }, { "epoch": 15.38, "learning_rate": 0.001, "loss": 2.6373, "step": 80076 }, { "epoch": 15.38, "learning_rate": 0.001, "loss": 2.6223, "step": 80088 }, { "epoch": 15.38, "learning_rate": 0.001, "loss": 2.6424, "step": 80100 }, { "epoch": 15.38, "learning_rate": 0.001, "loss": 2.6378, "step": 80112 }, { "epoch": 15.38, "learning_rate": 0.001, "loss": 2.6302, "step": 80124 }, { "epoch": 15.39, "learning_rate": 0.001, "loss": 2.6405, "step": 80136 }, { "epoch": 15.39, "learning_rate": 0.001, "loss": 2.636, "step": 80148 }, { "epoch": 15.39, "learning_rate": 0.001, "loss": 2.6346, "step": 80160 }, { "epoch": 15.39, "learning_rate": 0.001, "loss": 2.64, "step": 80172 }, { "epoch": 15.4, "learning_rate": 0.001, "loss": 2.6383, "step": 80184 }, { "epoch": 15.4, "learning_rate": 0.001, "loss": 2.6308, "step": 80196 }, { "epoch": 15.4, "learning_rate": 0.001, "loss": 2.6304, "step": 80208 }, { "epoch": 15.4, "learning_rate": 0.001, "loss": 2.626, "step": 80220 }, { "epoch": 15.41, "learning_rate": 0.001, "loss": 2.6309, "step": 80232 }, { "epoch": 15.41, "learning_rate": 0.001, "loss": 2.6377, "step": 80244 }, { "epoch": 15.41, "learning_rate": 0.001, "loss": 2.6537, "step": 80256 }, { "epoch": 15.41, "learning_rate": 0.001, "loss": 2.64, "step": 80268 }, { "epoch": 15.41, "learning_rate": 0.001, "loss": 2.6394, "step": 80280 }, { "epoch": 15.42, "learning_rate": 0.001, "loss": 2.6405, "step": 80292 }, { "epoch": 15.42, "learning_rate": 0.001, "loss": 2.6351, "step": 80304 }, { "epoch": 15.42, "learning_rate": 0.001, "loss": 2.6307, "step": 80316 }, { "epoch": 15.42, "learning_rate": 0.001, "loss": 2.6339, "step": 80328 }, { "epoch": 15.43, "learning_rate": 0.001, "loss": 2.6377, "step": 80340 }, { "epoch": 15.43, "learning_rate": 0.001, "loss": 2.6278, "step": 80352 }, { "epoch": 15.43, "learning_rate": 0.001, "loss": 2.6371, "step": 80364 }, { "epoch": 15.43, "learning_rate": 0.001, "loss": 2.6378, "step": 80376 }, { "epoch": 15.44, "learning_rate": 0.001, "loss": 2.6406, "step": 80388 }, { "epoch": 15.44, "learning_rate": 0.001, "loss": 2.6456, "step": 80400 }, { "epoch": 15.44, "learning_rate": 0.001, "loss": 2.6442, "step": 80412 }, { "epoch": 15.44, "learning_rate": 0.001, "loss": 2.6338, "step": 80424 }, { "epoch": 15.44, "learning_rate": 0.001, "loss": 2.635, "step": 80436 }, { "epoch": 15.45, "learning_rate": 0.001, "loss": 2.6362, "step": 80448 }, { "epoch": 15.45, "learning_rate": 0.001, "loss": 2.6418, "step": 80460 }, { "epoch": 15.45, "learning_rate": 0.001, "loss": 2.6343, "step": 80472 }, { "epoch": 15.45, "learning_rate": 0.001, "loss": 2.6375, "step": 80484 }, { "epoch": 15.46, "learning_rate": 0.001, "loss": 2.6333, "step": 80496 }, { "epoch": 15.46, "learning_rate": 0.001, "loss": 2.6332, "step": 80508 }, { "epoch": 15.46, "learning_rate": 0.001, "loss": 2.6374, "step": 80520 }, { "epoch": 15.46, "learning_rate": 0.001, "loss": 2.6383, "step": 80532 }, { "epoch": 15.47, "learning_rate": 0.001, "loss": 2.6341, "step": 80544 }, { "epoch": 15.47, "learning_rate": 0.001, "loss": 2.625, "step": 80556 }, { "epoch": 15.47, "learning_rate": 0.001, "loss": 2.6371, "step": 80568 }, { "epoch": 15.47, "learning_rate": 0.001, "loss": 2.6335, "step": 80580 }, { "epoch": 15.47, "learning_rate": 0.001, "loss": 2.6313, "step": 80592 }, { "epoch": 15.48, "learning_rate": 0.001, "loss": 2.636, "step": 80604 }, { "epoch": 15.48, "learning_rate": 0.001, "loss": 2.6421, "step": 80616 }, { "epoch": 15.48, "eval_ag_news_accuracy": 0.31328125, "eval_ag_news_bleu_score": 4.902969187138628, "eval_ag_news_bleu_score_sem": 0.15685461126798547, "eval_ag_news_emb_cos_sim": 0.7938907146453857, "eval_ag_news_emb_cos_sim_sem": 0.00737748577920112, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.663857936859131, "eval_ag_news_n_ngrams_match_1": 13.77, "eval_ag_news_n_ngrams_match_2": 3.082, "eval_ag_news_n_ngrams_match_3": 0.93, "eval_ag_news_num_pred_words": 46.89, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 39.011557050488804, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3369456785052954, "eval_ag_news_runtime": 10.1508, "eval_ag_news_samples_per_second": 49.257, "eval_ag_news_steps_per_second": 0.099, "eval_ag_news_token_set_f1": 0.34591004934855424, "eval_ag_news_token_set_f1_sem": 0.004406920066754583, "eval_ag_news_token_set_precision": 0.32818773207092217, "eval_ag_news_token_set_recall": 0.3836104996699221, "eval_ag_news_true_num_tokens": 56.09375, "step": 80625 }, { "epoch": 15.48, "eval_anthropic_toxic_prompts_accuracy": 0.11034375, "eval_anthropic_toxic_prompts_bleu_score": 2.8101509861178857, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10888008627699274, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6572726964950562, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009766325688861066, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3258824348449707, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.82, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.714, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.62, "eval_anthropic_toxic_prompts_num_pred_words": 47.966, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.82354027433018, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.19854912477762166, "eval_anthropic_toxic_prompts_runtime": 9.699, "eval_anthropic_toxic_prompts_samples_per_second": 51.551, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.3447942813783264, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00669296404310681, "eval_anthropic_toxic_prompts_token_set_precision": 0.40981891673460225, "eval_anthropic_toxic_prompts_token_set_recall": 0.32834953575860026, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 80625 }, { "epoch": 15.48, "eval_arxiv_accuracy": 0.3374375, "eval_arxiv_bleu_score": 4.125040820004931, "eval_arxiv_bleu_score_sem": 0.11326277337378322, "eval_arxiv_emb_cos_sim": 0.7457396388053894, "eval_arxiv_emb_cos_sim_sem": 0.008271936076466932, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5074424743652344, "eval_arxiv_n_ngrams_match_1": 14.722, "eval_arxiv_n_ngrams_match_2": 2.82, "eval_arxiv_n_ngrams_match_3": 0.576, "eval_arxiv_num_pred_words": 41.108, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.36283227996376, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3480919874396554, "eval_arxiv_runtime": 10.007, "eval_arxiv_samples_per_second": 49.965, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.34262515759688517, "eval_arxiv_token_set_f1_sem": 0.0040497237004410575, "eval_arxiv_token_set_precision": 0.29292742441436825, "eval_arxiv_token_set_recall": 0.42921025127510737, "eval_arxiv_true_num_tokens": 64.0, "step": 80625 }, { "epoch": 15.48, "eval_python_code_alpaca_accuracy": 0.1569375, "eval_python_code_alpaca_bleu_score": 4.3937920422944865, "eval_python_code_alpaca_bleu_score_sem": 0.13308568352125713, "eval_python_code_alpaca_emb_cos_sim": 0.7495800256729126, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009260718264689714, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.962172746658325, "eval_python_code_alpaca_n_ngrams_match_1": 9.832, "eval_python_code_alpaca_n_ngrams_match_2": 2.786, "eval_python_code_alpaca_n_ngrams_match_3": 0.864, "eval_python_code_alpaca_num_pred_words": 43.714, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.33994694335842, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.327535308174034, "eval_python_code_alpaca_runtime": 10.3932, "eval_python_code_alpaca_samples_per_second": 48.108, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.46835692039661836, "eval_python_code_alpaca_token_set_f1_sem": 0.005518144154934098, "eval_python_code_alpaca_token_set_precision": 0.5356498709782705, "eval_python_code_alpaca_token_set_recall": 0.4362219847384163, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 80625 }, { "epoch": 15.48, "eval_wikibio_accuracy": 0.3163125, "eval_wikibio_bleu_score": 5.4346361908823315, "eval_wikibio_bleu_score_sem": 0.1947358151126523, "eval_wikibio_emb_cos_sim": 0.7088469862937927, "eval_wikibio_emb_cos_sim_sem": 0.011166227993794348, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.848999500274658, "eval_wikibio_n_ngrams_match_1": 9.372, "eval_wikibio_n_ngrams_match_2": 3.048, "eval_wikibio_n_ngrams_match_3": 1.04, "eval_wikibio_num_pred_words": 34.668, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.94607019690226, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3337735410525501, "eval_wikibio_runtime": 12.0628, "eval_wikibio_samples_per_second": 41.45, "eval_wikibio_steps_per_second": 0.083, "eval_wikibio_token_set_f1": 0.3009399254029897, "eval_wikibio_token_set_f1_sem": 0.005763500426829387, "eval_wikibio_token_set_precision": 0.3039482143380533, "eval_wikibio_token_set_recall": 0.3146186770050787, "eval_wikibio_true_num_tokens": 61.1328125, "step": 80625 }, { "epoch": 15.48, "eval_nq_accuracy": 0.5168125, "eval_nq_bleu_score": 11.266202740249126, "eval_nq_bleu_score_sem": 0.4661878370345242, "eval_nq_emb_cos_sim": 0.8264041543006897, "eval_nq_emb_cos_sim_sem": 0.006618851067083631, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2660751342773438, "eval_nq_n_ngrams_match_1": 22.614, "eval_nq_n_ngrams_match_2": 8.156, "eval_nq_n_ngrams_match_3": 3.74, "eval_nq_num_pred_words": 49.33, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.641484921668216, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43435719091213265, "eval_nq_runtime": 12.3246, "eval_nq_samples_per_second": 40.569, "eval_nq_steps_per_second": 0.081, "eval_nq_token_set_f1": 0.4520957625381011, "eval_nq_token_set_f1_sem": 0.004988544875926743, "eval_nq_token_set_precision": 0.40853170003964134, "eval_nq_token_set_recall": 0.5148430985261564, "eval_nq_true_num_tokens": 64.0, "step": 80625 }, { "epoch": 15.48, "learning_rate": 0.001, "loss": 2.6349, "step": 80628 }, { "epoch": 15.48, "learning_rate": 0.001, "loss": 2.6355, "step": 80640 }, { "epoch": 15.49, "learning_rate": 0.001, "loss": 2.6444, "step": 80652 }, { "epoch": 15.49, "learning_rate": 0.001, "loss": 2.6255, "step": 80664 }, { "epoch": 15.49, "learning_rate": 0.001, "loss": 2.6242, "step": 80676 }, { "epoch": 15.49, "learning_rate": 0.001, "loss": 2.6362, "step": 80688 }, { "epoch": 15.5, "learning_rate": 0.001, "loss": 2.6422, "step": 80700 }, { "epoch": 15.5, "learning_rate": 0.001, "loss": 2.6354, "step": 80712 }, { "epoch": 15.5, "learning_rate": 0.001, "loss": 2.6306, "step": 80724 }, { "epoch": 15.5, "learning_rate": 0.001, "loss": 2.6432, "step": 80736 }, { "epoch": 15.5, "learning_rate": 0.001, "loss": 2.6368, "step": 80748 }, { "epoch": 15.51, "learning_rate": 0.001, "loss": 2.6361, "step": 80760 }, { "epoch": 15.51, "learning_rate": 0.001, "loss": 2.633, "step": 80772 }, { "epoch": 15.51, "learning_rate": 0.001, "loss": 2.6415, "step": 80784 }, { "epoch": 15.51, "learning_rate": 0.001, "loss": 2.6335, "step": 80796 }, { "epoch": 15.52, "learning_rate": 0.001, "loss": 2.6495, "step": 80808 }, { "epoch": 15.52, "learning_rate": 0.001, "loss": 2.6389, "step": 80820 }, { "epoch": 15.52, "learning_rate": 0.001, "loss": 2.6389, "step": 80832 }, { "epoch": 15.52, "learning_rate": 0.001, "loss": 2.6399, "step": 80844 }, { "epoch": 15.53, "learning_rate": 0.001, "loss": 2.6432, "step": 80856 }, { "epoch": 15.53, "learning_rate": 0.001, "loss": 2.6453, "step": 80868 }, { "epoch": 15.53, "learning_rate": 0.001, "loss": 2.6367, "step": 80880 }, { "epoch": 15.53, "learning_rate": 0.001, "loss": 2.6475, "step": 80892 }, { "epoch": 15.53, "learning_rate": 0.001, "loss": 2.6512, "step": 80904 }, { "epoch": 15.54, "learning_rate": 0.001, "loss": 2.6439, "step": 80916 }, { "epoch": 15.54, "learning_rate": 0.001, "loss": 2.6323, "step": 80928 }, { "epoch": 15.54, "learning_rate": 0.001, "loss": 2.636, "step": 80940 }, { "epoch": 15.54, "learning_rate": 0.001, "loss": 2.6278, "step": 80952 }, { "epoch": 15.55, "learning_rate": 0.001, "loss": 2.6308, "step": 80964 }, { "epoch": 15.55, "learning_rate": 0.001, "loss": 2.6411, "step": 80976 }, { "epoch": 15.55, "learning_rate": 0.001, "loss": 2.6276, "step": 80988 }, { "epoch": 15.55, "learning_rate": 0.001, "loss": 2.6407, "step": 81000 }, { "epoch": 15.56, "learning_rate": 0.001, "loss": 2.6379, "step": 81012 }, { "epoch": 15.56, "learning_rate": 0.001, "loss": 2.6356, "step": 81024 }, { "epoch": 15.56, "learning_rate": 0.001, "loss": 2.6261, "step": 81036 }, { "epoch": 15.56, "learning_rate": 0.001, "loss": 2.6304, "step": 81048 }, { "epoch": 15.56, "learning_rate": 0.001, "loss": 2.6357, "step": 81060 }, { "epoch": 15.57, "learning_rate": 0.001, "loss": 2.6299, "step": 81072 }, { "epoch": 15.57, "learning_rate": 0.001, "loss": 2.6393, "step": 81084 }, { "epoch": 15.57, "learning_rate": 0.001, "loss": 2.6377, "step": 81096 }, { "epoch": 15.57, "learning_rate": 0.001, "loss": 2.6428, "step": 81108 }, { "epoch": 15.58, "learning_rate": 0.001, "loss": 2.6464, "step": 81120 }, { "epoch": 15.58, "learning_rate": 0.001, "loss": 2.6274, "step": 81132 }, { "epoch": 15.58, "learning_rate": 0.001, "loss": 2.6354, "step": 81144 }, { "epoch": 15.58, "learning_rate": 0.001, "loss": 2.6391, "step": 81156 }, { "epoch": 15.59, "learning_rate": 0.001, "loss": 2.6426, "step": 81168 }, { "epoch": 15.59, "learning_rate": 0.001, "loss": 2.6315, "step": 81180 }, { "epoch": 15.59, "learning_rate": 0.001, "loss": 2.6404, "step": 81192 }, { "epoch": 15.59, "learning_rate": 0.001, "loss": 2.6285, "step": 81204 }, { "epoch": 15.59, "learning_rate": 0.001, "loss": 2.6309, "step": 81216 }, { "epoch": 15.6, "learning_rate": 0.001, "loss": 2.6412, "step": 81228 }, { "epoch": 15.6, "learning_rate": 0.001, "loss": 2.635, "step": 81240 }, { "epoch": 15.6, "eval_ag_news_accuracy": 0.316375, "eval_ag_news_bleu_score": 4.682395421855241, "eval_ag_news_bleu_score_sem": 0.15159199569377962, "eval_ag_news_emb_cos_sim": 0.7924454808235168, "eval_ag_news_emb_cos_sim_sem": 0.007152564512311178, "eval_ag_news_emb_top1_equal": 0.1640625, "eval_ag_news_emb_top1_equal_sem": 0.03286167651298939, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.641594171524048, "eval_ag_news_n_ngrams_match_1": 13.5, "eval_ag_news_n_ngrams_match_2": 3.002, "eval_ag_news_n_ngrams_match_3": 0.874, "eval_ag_news_num_pred_words": 46.048, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.152610075506715, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33551377766739854, "eval_ag_news_runtime": 10.4734, "eval_ag_news_samples_per_second": 47.74, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3415970667810647, "eval_ag_news_token_set_f1_sem": 0.004477849714244134, "eval_ag_news_token_set_precision": 0.3209268164374048, "eval_ag_news_token_set_recall": 0.3854853977883826, "eval_ag_news_true_num_tokens": 56.09375, "step": 81250 }, { "epoch": 15.6, "eval_anthropic_toxic_prompts_accuracy": 0.11021875, "eval_anthropic_toxic_prompts_bleu_score": 2.8650664247207613, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1065648789611312, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.660201907157898, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009367109422676862, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3250439167022705, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.88, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.726, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.618, "eval_anthropic_toxic_prompts_num_pred_words": 47.208, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.800219509824505, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20117776760874384, "eval_anthropic_toxic_prompts_runtime": 10.1348, "eval_anthropic_toxic_prompts_samples_per_second": 49.335, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.34381236840181684, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006386860215746904, "eval_anthropic_toxic_prompts_token_set_precision": 0.4158466991993418, "eval_anthropic_toxic_prompts_token_set_recall": 0.32037843397116683, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 81250 }, { "epoch": 15.6, "eval_arxiv_accuracy": 0.3354375, "eval_arxiv_bleu_score": 4.0443330103526085, "eval_arxiv_bleu_score_sem": 0.11662538402071894, "eval_arxiv_emb_cos_sim": 0.7389492988586426, "eval_arxiv_emb_cos_sim_sem": 0.007959178143102257, "eval_arxiv_emb_top1_equal": 0.1953125, "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4950528144836426, "eval_arxiv_n_ngrams_match_1": 14.508, "eval_arxiv_n_ngrams_match_2": 2.728, "eval_arxiv_n_ngrams_match_3": 0.592, "eval_arxiv_num_pred_words": 40.412, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.9520282514048, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3462333636337764, "eval_arxiv_runtime": 12.2979, "eval_arxiv_samples_per_second": 40.657, "eval_arxiv_steps_per_second": 0.081, "eval_arxiv_token_set_f1": 0.3420483466516191, "eval_arxiv_token_set_f1_sem": 0.004247311797294354, "eval_arxiv_token_set_precision": 0.28931396658696074, "eval_arxiv_token_set_recall": 0.43745266977638797, "eval_arxiv_true_num_tokens": 64.0, "step": 81250 }, { "epoch": 15.6, "eval_python_code_alpaca_accuracy": 0.15525, "eval_python_code_alpaca_bleu_score": 4.223859914649429, "eval_python_code_alpaca_bleu_score_sem": 0.13389798990119844, "eval_python_code_alpaca_emb_cos_sim": 0.7515406012535095, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008981141096359746, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.963074207305908, "eval_python_code_alpaca_n_ngrams_match_1": 9.464, "eval_python_code_alpaca_n_ngrams_match_2": 2.738, "eval_python_code_alpaca_n_ngrams_match_3": 0.874, "eval_python_code_alpaca_num_pred_words": 44.388, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.3573890049391, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31377960286240353, "eval_python_code_alpaca_runtime": 10.0826, "eval_python_code_alpaca_samples_per_second": 49.59, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.46755875263882934, "eval_python_code_alpaca_token_set_f1_sem": 0.005557293216973375, "eval_python_code_alpaca_token_set_precision": 0.5156998155802238, "eval_python_code_alpaca_token_set_recall": 0.45172114970371036, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 81250 }, { "epoch": 15.6, "eval_wikibio_accuracy": 0.311, "eval_wikibio_bleu_score": 5.732317034573434, "eval_wikibio_bleu_score_sem": 0.20042552946159797, "eval_wikibio_emb_cos_sim": 0.7321215867996216, "eval_wikibio_emb_cos_sim_sem": 0.011186965560350724, "eval_wikibio_emb_top1_equal": 0.1328125, "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8367044925689697, "eval_wikibio_n_ngrams_match_1": 10.26, "eval_wikibio_n_ngrams_match_2": 3.368, "eval_wikibio_n_ngrams_match_3": 1.182, "eval_wikibio_num_pred_words": 36.982, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.37240175765687, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3524071211803058, "eval_wikibio_runtime": 10.025, "eval_wikibio_samples_per_second": 49.875, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3201287172492095, "eval_wikibio_token_set_f1_sem": 0.005216591469242263, "eval_wikibio_token_set_precision": 0.3297787345605933, "eval_wikibio_token_set_recall": 0.323932980307254, "eval_wikibio_true_num_tokens": 61.1328125, "step": 81250 }, { "epoch": 15.6, "eval_nq_accuracy": 0.51759375, "eval_nq_bleu_score": 11.190894608645293, "eval_nq_bleu_score_sem": 0.48407423216222073, "eval_nq_emb_cos_sim": 0.8212544322013855, "eval_nq_emb_cos_sim_sem": 0.007672706672318217, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2649385929107666, "eval_nq_n_ngrams_match_1": 22.648, "eval_nq_n_ngrams_match_2": 8.188, "eval_nq_n_ngrams_match_3": 3.718, "eval_nq_num_pred_words": 48.948, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.630533199940784, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4364079609747853, "eval_nq_runtime": 11.217, "eval_nq_samples_per_second": 44.575, "eval_nq_steps_per_second": 0.089, "eval_nq_token_set_f1": 0.4521435887521291, "eval_nq_token_set_f1_sem": 0.0052243279546164275, "eval_nq_token_set_precision": 0.4067902147521592, "eval_nq_token_set_recall": 0.5180802961290288, "eval_nq_true_num_tokens": 64.0, "step": 81250 }, { "epoch": 15.6, "learning_rate": 0.001, "loss": 2.6447, "step": 81252 }, { "epoch": 15.6, "learning_rate": 0.001, "loss": 2.6386, "step": 81264 }, { "epoch": 15.61, "learning_rate": 0.001, "loss": 2.6318, "step": 81276 }, { "epoch": 15.61, "learning_rate": 0.001, "loss": 2.6308, "step": 81288 }, { "epoch": 15.61, "learning_rate": 0.001, "loss": 2.6345, "step": 81300 }, { "epoch": 15.61, "learning_rate": 0.001, "loss": 2.646, "step": 81312 }, { "epoch": 15.62, "learning_rate": 0.001, "loss": 2.6499, "step": 81324 }, { "epoch": 15.62, "learning_rate": 0.001, "loss": 2.6416, "step": 81336 }, { "epoch": 15.62, "learning_rate": 0.001, "loss": 2.6386, "step": 81348 }, { "epoch": 15.62, "learning_rate": 0.001, "loss": 2.6357, "step": 81360 }, { "epoch": 15.62, "learning_rate": 0.001, "loss": 2.6372, "step": 81372 }, { "epoch": 15.63, "learning_rate": 0.001, "loss": 2.626, "step": 81384 }, { "epoch": 15.63, "learning_rate": 0.001, "loss": 2.6302, "step": 81396 }, { "epoch": 15.63, "learning_rate": 0.001, "loss": 2.6263, "step": 81408 }, { "epoch": 15.63, "learning_rate": 0.001, "loss": 2.6375, "step": 81420 }, { "epoch": 15.64, "learning_rate": 0.001, "loss": 2.6435, "step": 81432 }, { "epoch": 15.64, "learning_rate": 0.001, "loss": 2.6398, "step": 81444 }, { "epoch": 15.64, "learning_rate": 0.001, "loss": 2.6348, "step": 81456 }, { "epoch": 15.64, "learning_rate": 0.001, "loss": 2.632, "step": 81468 }, { "epoch": 15.65, "learning_rate": 0.001, "loss": 2.6208, "step": 81480 }, { "epoch": 15.65, "learning_rate": 0.001, "loss": 2.6339, "step": 81492 }, { "epoch": 15.65, "learning_rate": 0.001, "loss": 2.6355, "step": 81504 }, { "epoch": 15.65, "learning_rate": 0.001, "loss": 2.6314, "step": 81516 }, { "epoch": 15.65, "learning_rate": 0.001, "loss": 2.6367, "step": 81528 }, { "epoch": 15.66, "learning_rate": 0.001, "loss": 2.6374, "step": 81540 }, { "epoch": 15.66, "learning_rate": 0.001, "loss": 2.6424, "step": 81552 }, { "epoch": 15.66, "learning_rate": 0.001, "loss": 2.6342, "step": 81564 }, { "epoch": 15.66, "learning_rate": 0.001, "loss": 2.6328, "step": 81576 }, { "epoch": 15.67, "learning_rate": 0.001, "loss": 2.6446, "step": 81588 }, { "epoch": 15.67, "learning_rate": 0.001, "loss": 2.6302, "step": 81600 }, { "epoch": 15.67, "learning_rate": 0.001, "loss": 2.6265, "step": 81612 }, { "epoch": 15.67, "learning_rate": 0.001, "loss": 2.6405, "step": 81624 }, { "epoch": 15.68, "learning_rate": 0.001, "loss": 2.6379, "step": 81636 }, { "epoch": 15.68, "learning_rate": 0.001, "loss": 2.631, "step": 81648 }, { "epoch": 15.68, "learning_rate": 0.001, "loss": 2.639, "step": 81660 }, { "epoch": 15.68, "learning_rate": 0.001, "loss": 2.6453, "step": 81672 }, { "epoch": 15.68, "learning_rate": 0.001, "loss": 2.6283, "step": 81684 }, { "epoch": 15.69, "learning_rate": 0.001, "loss": 2.646, "step": 81696 }, { "epoch": 15.69, "learning_rate": 0.001, "loss": 2.63, "step": 81708 }, { "epoch": 15.69, "learning_rate": 0.001, "loss": 2.6409, "step": 81720 }, { "epoch": 15.69, "learning_rate": 0.001, "loss": 2.6306, "step": 81732 }, { "epoch": 15.7, "learning_rate": 0.001, "loss": 2.643, "step": 81744 }, { "epoch": 15.7, "learning_rate": 0.001, "loss": 2.6389, "step": 81756 }, { "epoch": 15.7, "learning_rate": 0.001, "loss": 2.6476, "step": 81768 }, { "epoch": 15.7, "learning_rate": 0.001, "loss": 2.6407, "step": 81780 }, { "epoch": 15.71, "learning_rate": 0.001, "loss": 2.6416, "step": 81792 }, { "epoch": 15.71, "learning_rate": 0.001, "loss": 2.6351, "step": 81804 }, { "epoch": 15.71, "learning_rate": 0.001, "loss": 2.6385, "step": 81816 }, { "epoch": 15.71, "learning_rate": 0.001, "loss": 2.644, "step": 81828 }, { "epoch": 15.71, "learning_rate": 0.001, "loss": 2.639, "step": 81840 }, { "epoch": 15.72, "learning_rate": 0.001, "loss": 2.6398, "step": 81852 }, { "epoch": 15.72, "learning_rate": 0.001, "loss": 2.6313, "step": 81864 }, { "epoch": 15.72, "eval_ag_news_accuracy": 0.31365625, "eval_ag_news_bleu_score": 4.653096769290414, "eval_ag_news_bleu_score_sem": 0.14484041879097112, "eval_ag_news_emb_cos_sim": 0.7947143316268921, "eval_ag_news_emb_cos_sim_sem": 0.007573163718669581, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.661522150039673, "eval_ag_news_n_ngrams_match_1": 13.618, "eval_ag_news_n_ngrams_match_2": 2.932, "eval_ag_news_n_ngrams_match_3": 0.854, "eval_ag_news_num_pred_words": 46.554, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.920540708491075, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33406986695124685, "eval_ag_news_runtime": 10.1353, "eval_ag_news_samples_per_second": 49.332, "eval_ag_news_steps_per_second": 0.099, "eval_ag_news_token_set_f1": 0.34113471006866314, "eval_ag_news_token_set_f1_sem": 0.004315589502120115, "eval_ag_news_token_set_precision": 0.32230251196354864, "eval_ag_news_token_set_recall": 0.3760528980351533, "eval_ag_news_true_num_tokens": 56.09375, "step": 81875 }, { "epoch": 15.72, "eval_anthropic_toxic_prompts_accuracy": 0.113, "eval_anthropic_toxic_prompts_bleu_score": 3.074410466826119, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12159790861728094, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6609877347946167, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009216894761861497, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2926738262176514, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.07, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.858, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.68, "eval_anthropic_toxic_prompts_num_pred_words": 47.384, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.914732848546176, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20801436501360485, "eval_anthropic_toxic_prompts_runtime": 9.6643, "eval_anthropic_toxic_prompts_samples_per_second": 51.737, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.34938095029612537, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006275991095644129, "eval_anthropic_toxic_prompts_token_set_precision": 0.4296266110461715, "eval_anthropic_toxic_prompts_token_set_recall": 0.3205905337601249, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 81875 }, { "epoch": 15.72, "eval_arxiv_accuracy": 0.33609375, "eval_arxiv_bleu_score": 4.218588055979579, "eval_arxiv_bleu_score_sem": 0.1167802980186407, "eval_arxiv_emb_cos_sim": 0.7482851147651672, "eval_arxiv_emb_cos_sim_sem": 0.008023437599102195, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5122647285461426, "eval_arxiv_n_ngrams_match_1": 14.836, "eval_arxiv_n_ngrams_match_2": 2.89, "eval_arxiv_n_ngrams_match_3": 0.642, "eval_arxiv_num_pred_words": 41.748, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.524104873611144, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3491676657044462, "eval_arxiv_runtime": 10.0887, "eval_arxiv_samples_per_second": 49.56, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.34631324822890003, "eval_arxiv_token_set_f1_sem": 0.004176669327474083, "eval_arxiv_token_set_precision": 0.29527977866938615, "eval_arxiv_token_set_recall": 0.4368670721152247, "eval_arxiv_true_num_tokens": 64.0, "step": 81875 }, { "epoch": 15.72, "eval_python_code_alpaca_accuracy": 0.15596875, "eval_python_code_alpaca_bleu_score": 4.455513912659973, "eval_python_code_alpaca_bleu_score_sem": 0.14139354445013547, "eval_python_code_alpaca_emb_cos_sim": 0.7462294101715088, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007730929250813478, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.989055871963501, "eval_python_code_alpaca_n_ngrams_match_1": 9.472, "eval_python_code_alpaca_n_ngrams_match_2": 2.806, "eval_python_code_alpaca_n_ngrams_match_3": 0.922, "eval_python_code_alpaca_num_pred_words": 44.164, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.866916721238418, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3165625792828408, "eval_python_code_alpaca_runtime": 10.1224, "eval_python_code_alpaca_samples_per_second": 49.396, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.4589637613160641, "eval_python_code_alpaca_token_set_f1_sem": 0.005411980773500886, "eval_python_code_alpaca_token_set_precision": 0.5180586119644336, "eval_python_code_alpaca_token_set_recall": 0.43098785321148464, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 81875 }, { "epoch": 15.72, "eval_wikibio_accuracy": 0.31315625, "eval_wikibio_bleu_score": 5.584470858264593, "eval_wikibio_bleu_score_sem": 0.1974678380751109, "eval_wikibio_emb_cos_sim": 0.725496768951416, "eval_wikibio_emb_cos_sim_sem": 0.009804602929843054, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8564724922180176, "eval_wikibio_n_ngrams_match_1": 9.866, "eval_wikibio_n_ngrams_match_2": 3.182, "eval_wikibio_n_ngrams_match_3": 1.088, "eval_wikibio_num_pred_words": 36.088, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.29821193865967, "eval_wikibio_pred_num_tokens": 62.8671875, "eval_wikibio_rouge_score": 0.3451770807211191, "eval_wikibio_runtime": 9.6658, "eval_wikibio_samples_per_second": 51.729, "eval_wikibio_steps_per_second": 0.103, "eval_wikibio_token_set_f1": 0.3143199277366929, "eval_wikibio_token_set_f1_sem": 0.0051910819341498115, "eval_wikibio_token_set_precision": 0.3204884091735503, "eval_wikibio_token_set_recall": 0.3282853772180423, "eval_wikibio_true_num_tokens": 61.1328125, "step": 81875 }, { "epoch": 15.72, "eval_nq_accuracy": 0.51465625, "eval_nq_bleu_score": 11.303224318353855, "eval_nq_bleu_score_sem": 0.4823117457591126, "eval_nq_emb_cos_sim": 0.8220856785774231, "eval_nq_emb_cos_sim_sem": 0.007568303660200669, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.266073226928711, "eval_nq_n_ngrams_match_1": 22.558, "eval_nq_n_ngrams_match_2": 8.118, "eval_nq_n_ngrams_match_3": 3.754, "eval_nq_num_pred_words": 49.37, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.64146653201267, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.433758328343818, "eval_nq_runtime": 9.908, "eval_nq_samples_per_second": 50.464, "eval_nq_steps_per_second": 0.101, "eval_nq_token_set_f1": 0.44966478491130624, "eval_nq_token_set_f1_sem": 0.005027368720173926, "eval_nq_token_set_precision": 0.4063033738279913, "eval_nq_token_set_recall": 0.5125654540820727, "eval_nq_true_num_tokens": 64.0, "step": 81875 }, { "epoch": 15.72, "learning_rate": 0.001, "loss": 2.6361, "step": 81876 }, { "epoch": 15.72, "learning_rate": 0.001, "loss": 2.6425, "step": 81888 }, { "epoch": 15.73, "learning_rate": 0.001, "loss": 2.6328, "step": 81900 }, { "epoch": 15.73, "learning_rate": 0.001, "loss": 2.6434, "step": 81912 }, { "epoch": 15.73, "learning_rate": 0.001, "loss": 2.6471, "step": 81924 }, { "epoch": 15.73, "learning_rate": 0.001, "loss": 2.6456, "step": 81936 }, { "epoch": 15.74, "learning_rate": 0.001, "loss": 2.6359, "step": 81948 }, { "epoch": 15.74, "learning_rate": 0.001, "loss": 2.6537, "step": 81960 }, { "epoch": 15.74, "learning_rate": 0.001, "loss": 2.6341, "step": 81972 }, { "epoch": 15.74, "learning_rate": 0.001, "loss": 2.6323, "step": 81984 }, { "epoch": 15.74, "learning_rate": 0.001, "loss": 2.6446, "step": 81996 }, { "epoch": 15.75, "learning_rate": 0.001, "loss": 2.6406, "step": 82008 }, { "epoch": 15.75, "learning_rate": 0.001, "loss": 2.6368, "step": 82020 }, { "epoch": 15.75, "learning_rate": 0.001, "loss": 2.6343, "step": 82032 }, { "epoch": 15.75, "learning_rate": 0.001, "loss": 2.6317, "step": 82044 }, { "epoch": 15.76, "learning_rate": 0.001, "loss": 2.6499, "step": 82056 }, { "epoch": 15.76, "learning_rate": 0.001, "loss": 2.6372, "step": 82068 }, { "epoch": 15.76, "learning_rate": 0.001, "loss": 2.6332, "step": 82080 }, { "epoch": 15.76, "learning_rate": 0.001, "loss": 2.6371, "step": 82092 }, { "epoch": 15.76, "learning_rate": 0.001, "loss": 2.6379, "step": 82104 }, { "epoch": 15.77, "learning_rate": 0.001, "loss": 2.6407, "step": 82116 }, { "epoch": 15.77, "learning_rate": 0.001, "loss": 2.6285, "step": 82128 }, { "epoch": 15.77, "learning_rate": 0.001, "loss": 2.6395, "step": 82140 }, { "epoch": 15.77, "learning_rate": 0.001, "loss": 2.6399, "step": 82152 }, { "epoch": 15.78, "learning_rate": 0.001, "loss": 2.6362, "step": 82164 }, { "epoch": 15.78, "learning_rate": 0.001, "loss": 2.6344, "step": 82176 }, { "epoch": 15.78, "learning_rate": 0.001, "loss": 2.6352, "step": 82188 }, { "epoch": 15.78, "learning_rate": 0.001, "loss": 2.6345, "step": 82200 }, { "epoch": 15.79, "learning_rate": 0.001, "loss": 2.6321, "step": 82212 }, { "epoch": 15.79, "learning_rate": 0.001, "loss": 2.6396, "step": 82224 }, { "epoch": 15.79, "learning_rate": 0.001, "loss": 2.6361, "step": 82236 }, { "epoch": 15.79, "learning_rate": 0.001, "loss": 2.641, "step": 82248 }, { "epoch": 15.79, "learning_rate": 0.001, "loss": 2.6356, "step": 82260 }, { "epoch": 15.8, "learning_rate": 0.001, "loss": 2.6361, "step": 82272 }, { "epoch": 15.8, "learning_rate": 0.001, "loss": 2.6289, "step": 82284 }, { "epoch": 15.8, "learning_rate": 0.001, "loss": 2.635, "step": 82296 }, { "epoch": 15.8, "learning_rate": 0.001, "loss": 2.6331, "step": 82308 }, { "epoch": 15.81, "learning_rate": 0.001, "loss": 2.6376, "step": 82320 }, { "epoch": 15.81, "learning_rate": 0.001, "loss": 2.6374, "step": 82332 }, { "epoch": 15.81, "learning_rate": 0.001, "loss": 2.6436, "step": 82344 }, { "epoch": 15.81, "learning_rate": 0.001, "loss": 2.631, "step": 82356 }, { "epoch": 15.82, "learning_rate": 0.001, "loss": 2.6256, "step": 82368 }, { "epoch": 15.82, "learning_rate": 0.001, "loss": 2.6469, "step": 82380 }, { "epoch": 15.82, "learning_rate": 0.001, "loss": 2.6391, "step": 82392 }, { "epoch": 15.82, "learning_rate": 0.001, "loss": 2.6334, "step": 82404 }, { "epoch": 15.82, "learning_rate": 0.001, "loss": 2.6472, "step": 82416 }, { "epoch": 15.83, "learning_rate": 0.001, "loss": 2.6317, "step": 82428 }, { "epoch": 15.83, "learning_rate": 0.001, "loss": 2.6468, "step": 82440 }, { "epoch": 15.83, "learning_rate": 0.001, "loss": 2.633, "step": 82452 }, { "epoch": 15.83, "learning_rate": 0.001, "loss": 2.6291, "step": 82464 }, { "epoch": 15.84, "learning_rate": 0.001, "loss": 2.637, "step": 82476 }, { "epoch": 15.84, "learning_rate": 0.001, "loss": 2.6334, "step": 82488 }, { "epoch": 15.84, "learning_rate": 0.001, "loss": 2.6336, "step": 82500 }, { "epoch": 15.84, "eval_ag_news_accuracy": 0.3149375, "eval_ag_news_bleu_score": 4.743920818591882, "eval_ag_news_bleu_score_sem": 0.15399761909805726, "eval_ag_news_emb_cos_sim": 0.7969221472740173, "eval_ag_news_emb_cos_sim_sem": 0.007173364640595812, "eval_ag_news_emb_top1_equal": 0.1796875, "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.644028902053833, "eval_ag_news_n_ngrams_match_1": 13.57, "eval_ag_news_n_ngrams_match_2": 2.952, "eval_ag_news_n_ngrams_match_3": 0.89, "eval_ag_news_num_pred_words": 46.414, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.24561457455161, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3366953221162319, "eval_ag_news_runtime": 10.2061, "eval_ag_news_samples_per_second": 48.99, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3407323618587906, "eval_ag_news_token_set_f1_sem": 0.004428794274326815, "eval_ag_news_token_set_precision": 0.3207270524832423, "eval_ag_news_token_set_recall": 0.3785358314661035, "eval_ag_news_true_num_tokens": 56.09375, "step": 82500 }, { "epoch": 15.84, "eval_anthropic_toxic_prompts_accuracy": 0.11159375, "eval_anthropic_toxic_prompts_bleu_score": 2.8757697911840285, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11265081132312472, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.663439154624939, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009304339594353981, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2902793884277344, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.902, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.75, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.614, "eval_anthropic_toxic_prompts_num_pred_words": 47.484, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.850364289117547, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20290831595159797, "eval_anthropic_toxic_prompts_runtime": 9.4882, "eval_anthropic_toxic_prompts_samples_per_second": 52.697, "eval_anthropic_toxic_prompts_steps_per_second": 0.105, "eval_anthropic_toxic_prompts_token_set_f1": 0.34820099850808256, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006828062490744303, "eval_anthropic_toxic_prompts_token_set_precision": 0.416371584162717, "eval_anthropic_toxic_prompts_token_set_recall": 0.3252265012940169, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 82500 }, { "epoch": 15.84, "eval_arxiv_accuracy": 0.3378125, "eval_arxiv_bleu_score": 4.195023864975421, "eval_arxiv_bleu_score_sem": 0.12094020078246574, "eval_arxiv_emb_cos_sim": 0.7507302761077881, "eval_arxiv_emb_cos_sim_sem": 0.00832024558448353, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.49530029296875, "eval_arxiv_n_ngrams_match_1": 14.68, "eval_arxiv_n_ngrams_match_2": 2.862, "eval_arxiv_n_ngrams_match_3": 0.632, "eval_arxiv_num_pred_words": 39.936, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.960184178604294, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.351676517500356, "eval_arxiv_runtime": 10.1285, "eval_arxiv_samples_per_second": 49.366, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3470788505617729, "eval_arxiv_token_set_f1_sem": 0.0042230193260675754, "eval_arxiv_token_set_precision": 0.2937546752019646, "eval_arxiv_token_set_recall": 0.44480318129726143, "eval_arxiv_true_num_tokens": 64.0, "step": 82500 }, { "epoch": 15.84, "eval_python_code_alpaca_accuracy": 0.15590625, "eval_python_code_alpaca_bleu_score": 4.122700851138748, "eval_python_code_alpaca_bleu_score_sem": 0.1314244047950497, "eval_python_code_alpaca_emb_cos_sim": 0.7421751022338867, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007808900920558632, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9635770320892334, "eval_python_code_alpaca_n_ngrams_match_1": 9.418, "eval_python_code_alpaca_n_ngrams_match_2": 2.658, "eval_python_code_alpaca_n_ngrams_match_3": 0.844, "eval_python_code_alpaca_num_pred_words": 45.102, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.367124827372525, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31032192257807634, "eval_python_code_alpaca_runtime": 9.6118, "eval_python_code_alpaca_samples_per_second": 52.02, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.46543015368581114, "eval_python_code_alpaca_token_set_f1_sem": 0.00552760558152969, "eval_python_code_alpaca_token_set_precision": 0.5124026161975384, "eval_python_code_alpaca_token_set_recall": 0.4454855803306646, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 82500 }, { "epoch": 15.84, "eval_wikibio_accuracy": 0.31265625, "eval_wikibio_bleu_score": 5.538278207704747, "eval_wikibio_bleu_score_sem": 0.19132065399044007, "eval_wikibio_emb_cos_sim": 0.7391377687454224, "eval_wikibio_emb_cos_sim_sem": 0.009374521999658715, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8321797847747803, "eval_wikibio_n_ngrams_match_1": 9.972, "eval_wikibio_n_ngrams_match_2": 3.218, "eval_wikibio_n_ngrams_match_3": 1.118, "eval_wikibio_num_pred_words": 36.88, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.16305416549366, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34619185338538433, "eval_wikibio_runtime": 9.8113, "eval_wikibio_samples_per_second": 50.962, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.3139462947599833, "eval_wikibio_token_set_f1_sem": 0.0053105548907498345, "eval_wikibio_token_set_precision": 0.3241977560214357, "eval_wikibio_token_set_recall": 0.31918734131446624, "eval_wikibio_true_num_tokens": 61.1328125, "step": 82500 }, { "epoch": 15.84, "eval_nq_accuracy": 0.51628125, "eval_nq_bleu_score": 11.318945229879374, "eval_nq_bleu_score_sem": 0.47357018239481374, "eval_nq_emb_cos_sim": 0.8236929774284363, "eval_nq_emb_cos_sim_sem": 0.007367660953686526, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.261533260345459, "eval_nq_n_ngrams_match_1": 22.708, "eval_nq_n_ngrams_match_2": 8.258, "eval_nq_n_ngrams_match_3": 3.744, "eval_nq_num_pred_words": 49.266, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.597793807511305, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4374386113314346, "eval_nq_runtime": 13.4722, "eval_nq_samples_per_second": 37.114, "eval_nq_steps_per_second": 0.074, "eval_nq_token_set_f1": 0.4528090485533119, "eval_nq_token_set_f1_sem": 0.004881551339401594, "eval_nq_token_set_precision": 0.4083421458933204, "eval_nq_token_set_recall": 0.5159063932117688, "eval_nq_true_num_tokens": 64.0, "step": 82500 }, { "epoch": 15.84, "learning_rate": 0.001, "loss": 2.6388, "step": 82512 }, { "epoch": 15.85, "learning_rate": 0.001, "loss": 2.6295, "step": 82524 }, { "epoch": 15.85, "learning_rate": 0.001, "loss": 2.6355, "step": 82536 }, { "epoch": 15.85, "learning_rate": 0.001, "loss": 2.6354, "step": 82548 }, { "epoch": 15.85, "learning_rate": 0.001, "loss": 2.6374, "step": 82560 }, { "epoch": 15.85, "learning_rate": 0.001, "loss": 2.6465, "step": 82572 }, { "epoch": 15.86, "learning_rate": 0.001, "loss": 2.6461, "step": 82584 }, { "epoch": 15.86, "learning_rate": 0.001, "loss": 2.6351, "step": 82596 }, { "epoch": 15.86, "learning_rate": 0.001, "loss": 2.6353, "step": 82608 }, { "epoch": 15.86, "learning_rate": 0.001, "loss": 2.6363, "step": 82620 }, { "epoch": 15.87, "learning_rate": 0.001, "loss": 2.6353, "step": 82632 }, { "epoch": 15.87, "learning_rate": 0.001, "loss": 2.6298, "step": 82644 }, { "epoch": 15.87, "learning_rate": 0.001, "loss": 2.6401, "step": 82656 }, { "epoch": 15.87, "learning_rate": 0.001, "loss": 2.6289, "step": 82668 }, { "epoch": 15.88, "learning_rate": 0.001, "loss": 2.6451, "step": 82680 }, { "epoch": 15.88, "learning_rate": 0.001, "loss": 2.6386, "step": 82692 }, { "epoch": 15.88, "learning_rate": 0.001, "loss": 2.6327, "step": 82704 }, { "epoch": 15.88, "learning_rate": 0.001, "loss": 2.6376, "step": 82716 }, { "epoch": 15.88, "learning_rate": 0.001, "loss": 2.6367, "step": 82728 }, { "epoch": 15.89, "learning_rate": 0.001, "loss": 2.6408, "step": 82740 }, { "epoch": 15.89, "learning_rate": 0.001, "loss": 2.64, "step": 82752 }, { "epoch": 15.89, "learning_rate": 0.001, "loss": 2.6289, "step": 82764 }, { "epoch": 15.89, "learning_rate": 0.001, "loss": 2.633, "step": 82776 }, { "epoch": 15.9, "learning_rate": 0.001, "loss": 2.6398, "step": 82788 }, { "epoch": 15.9, "learning_rate": 0.001, "loss": 2.6356, "step": 82800 }, { "epoch": 15.9, "learning_rate": 0.001, "loss": 2.6277, "step": 82812 }, { "epoch": 15.9, "learning_rate": 0.001, "loss": 2.6326, "step": 82824 }, { "epoch": 15.91, "learning_rate": 0.001, "loss": 2.6276, "step": 82836 }, { "epoch": 15.91, "learning_rate": 0.001, "loss": 2.6239, "step": 82848 }, { "epoch": 15.91, "learning_rate": 0.001, "loss": 2.6373, "step": 82860 }, { "epoch": 15.91, "learning_rate": 0.001, "loss": 2.6393, "step": 82872 }, { "epoch": 15.91, "learning_rate": 0.001, "loss": 2.6413, "step": 82884 }, { "epoch": 15.92, "learning_rate": 0.001, "loss": 2.6406, "step": 82896 }, { "epoch": 15.92, "learning_rate": 0.001, "loss": 2.6303, "step": 82908 }, { "epoch": 15.92, "learning_rate": 0.001, "loss": 2.646, "step": 82920 }, { "epoch": 15.92, "learning_rate": 0.001, "loss": 2.6312, "step": 82932 }, { "epoch": 15.93, "learning_rate": 0.001, "loss": 2.6293, "step": 82944 }, { "epoch": 15.93, "learning_rate": 0.001, "loss": 2.6231, "step": 82956 }, { "epoch": 15.93, "learning_rate": 0.001, "loss": 2.6377, "step": 82968 }, { "epoch": 15.93, "learning_rate": 0.001, "loss": 2.6292, "step": 82980 }, { "epoch": 15.94, "learning_rate": 0.001, "loss": 2.6319, "step": 82992 }, { "epoch": 15.94, "learning_rate": 0.001, "loss": 2.6313, "step": 83004 }, { "epoch": 15.94, "learning_rate": 0.001, "loss": 2.6283, "step": 83016 }, { "epoch": 15.94, "learning_rate": 0.001, "loss": 2.6445, "step": 83028 }, { "epoch": 15.94, "learning_rate": 0.001, "loss": 2.6474, "step": 83040 }, { "epoch": 15.95, "learning_rate": 0.001, "loss": 2.636, "step": 83052 }, { "epoch": 15.95, "learning_rate": 0.001, "loss": 2.6451, "step": 83064 }, { "epoch": 15.95, "learning_rate": 0.001, "loss": 2.6384, "step": 83076 }, { "epoch": 15.95, "learning_rate": 0.001, "loss": 2.636, "step": 83088 }, { "epoch": 15.96, "learning_rate": 0.001, "loss": 2.6367, "step": 83100 }, { "epoch": 15.96, "learning_rate": 0.001, "loss": 2.6332, "step": 83112 }, { "epoch": 15.96, "learning_rate": 0.001, "loss": 2.6495, "step": 83124 }, { "epoch": 15.96, "eval_ag_news_accuracy": 0.31571875, "eval_ag_news_bleu_score": 4.7158153481480785, "eval_ag_news_bleu_score_sem": 0.149493396097868, "eval_ag_news_emb_cos_sim": 0.8041837215423584, "eval_ag_news_emb_cos_sim_sem": 0.006463236341302807, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6436686515808105, "eval_ag_news_n_ngrams_match_1": 13.762, "eval_ag_news_n_ngrams_match_2": 3.04, "eval_ag_news_n_ngrams_match_3": 0.84, "eval_ag_news_num_pred_words": 46.724, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.231839055277746, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33859412448786486, "eval_ag_news_runtime": 10.5173, "eval_ag_news_samples_per_second": 47.541, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.34299660295670165, "eval_ag_news_token_set_f1_sem": 0.004379750457796472, "eval_ag_news_token_set_precision": 0.3262943497457247, "eval_ag_news_token_set_recall": 0.3780150029047542, "eval_ag_news_true_num_tokens": 56.09375, "step": 83125 }, { "epoch": 15.96, "eval_anthropic_toxic_prompts_accuracy": 0.1111875, "eval_anthropic_toxic_prompts_bleu_score": 2.966256704995643, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11747048160457095, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6478981971740723, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00952050682909931, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2904324531555176, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.922, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.652, "eval_anthropic_toxic_prompts_num_pred_words": 47.238, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.854474447370443, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20393821476296053, "eval_anthropic_toxic_prompts_runtime": 9.9979, "eval_anthropic_toxic_prompts_samples_per_second": 50.01, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3500859084481026, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006656529371002729, "eval_anthropic_toxic_prompts_token_set_precision": 0.4190015808094765, "eval_anthropic_toxic_prompts_token_set_recall": 0.3309564905086757, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 83125 }, { "epoch": 15.96, "eval_arxiv_accuracy": 0.33925, "eval_arxiv_bleu_score": 4.23220891877265, "eval_arxiv_bleu_score_sem": 0.12432208954775212, "eval_arxiv_emb_cos_sim": 0.7382143139839172, "eval_arxiv_emb_cos_sim_sem": 0.008107729979034965, "eval_arxiv_emb_top1_equal": 0.1953125, "eval_arxiv_emb_top1_equal_sem": 0.035178457165496856, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.49281644821167, "eval_arxiv_n_ngrams_match_1": 14.534, "eval_arxiv_n_ngrams_match_2": 2.844, "eval_arxiv_n_ngrams_match_3": 0.65, "eval_arxiv_num_pred_words": 40.24, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.87841778748886, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34569924447872247, "eval_arxiv_runtime": 10.3184, "eval_arxiv_samples_per_second": 48.457, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3390566314840279, "eval_arxiv_token_set_f1_sem": 0.004331655636610428, "eval_arxiv_token_set_precision": 0.2888177199714239, "eval_arxiv_token_set_recall": 0.42727804965630967, "eval_arxiv_true_num_tokens": 64.0, "step": 83125 }, { "epoch": 15.96, "eval_python_code_alpaca_accuracy": 0.15775, "eval_python_code_alpaca_bleu_score": 4.216840807094817, "eval_python_code_alpaca_bleu_score_sem": 0.1336840732887577, "eval_python_code_alpaca_emb_cos_sim": 0.7436763644218445, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008376523481046717, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9375433921813965, "eval_python_code_alpaca_n_ngrams_match_1": 9.246, "eval_python_code_alpaca_n_ngrams_match_2": 2.626, "eval_python_code_alpaca_n_ngrams_match_3": 0.816, "eval_python_code_alpaca_num_pred_words": 42.952, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.86943452742655, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31167991216738633, "eval_python_code_alpaca_runtime": 10.7835, "eval_python_code_alpaca_samples_per_second": 46.367, "eval_python_code_alpaca_steps_per_second": 0.093, "eval_python_code_alpaca_token_set_f1": 0.45965523369747596, "eval_python_code_alpaca_token_set_f1_sem": 0.006060744782603551, "eval_python_code_alpaca_token_set_precision": 0.5054978829886636, "eval_python_code_alpaca_token_set_recall": 0.4481730812637992, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 83125 }, { "epoch": 15.96, "eval_wikibio_accuracy": 0.314875, "eval_wikibio_bleu_score": 5.605258287117928, "eval_wikibio_bleu_score_sem": 0.19961011748198315, "eval_wikibio_emb_cos_sim": 0.7294102311134338, "eval_wikibio_emb_cos_sim_sem": 0.01004817944555513, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8227322101593018, "eval_wikibio_n_ngrams_match_1": 10.124, "eval_wikibio_n_ngrams_match_2": 3.308, "eval_wikibio_n_ngrams_match_3": 1.138, "eval_wikibio_num_pred_words": 36.982, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.728978974335945, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3503548495126094, "eval_wikibio_runtime": 10.5536, "eval_wikibio_samples_per_second": 47.377, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.3173837592198084, "eval_wikibio_token_set_f1_sem": 0.005280946933688539, "eval_wikibio_token_set_precision": 0.3282269135592735, "eval_wikibio_token_set_recall": 0.3243064622440714, "eval_wikibio_true_num_tokens": 61.1328125, "step": 83125 }, { "epoch": 15.96, "eval_nq_accuracy": 0.51765625, "eval_nq_bleu_score": 11.121061925867597, "eval_nq_bleu_score_sem": 0.4568811427806432, "eval_nq_emb_cos_sim": 0.8262262344360352, "eval_nq_emb_cos_sim_sem": 0.007302851613022423, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2610249519348145, "eval_nq_n_ngrams_match_1": 22.408, "eval_nq_n_ngrams_match_2": 8.124, "eval_nq_n_ngrams_match_3": 3.688, "eval_nq_num_pred_words": 48.944, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.592916407911952, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.433072069476134, "eval_nq_runtime": 10.0085, "eval_nq_samples_per_second": 49.958, "eval_nq_steps_per_second": 0.1, "eval_nq_token_set_f1": 0.45073766113574465, "eval_nq_token_set_f1_sem": 0.004862898292423386, "eval_nq_token_set_precision": 0.4054533992713064, "eval_nq_token_set_recall": 0.5165805774669422, "eval_nq_true_num_tokens": 64.0, "step": 83125 }, { "epoch": 15.96, "learning_rate": 0.001, "loss": 2.6291, "step": 83136 }, { "epoch": 15.97, "learning_rate": 0.001, "loss": 2.6414, "step": 83148 }, { "epoch": 15.97, "learning_rate": 0.001, "loss": 2.6486, "step": 83160 }, { "epoch": 15.97, "learning_rate": 0.001, "loss": 2.6524, "step": 83172 }, { "epoch": 15.97, "learning_rate": 0.001, "loss": 2.6396, "step": 83184 }, { "epoch": 15.97, "learning_rate": 0.001, "loss": 2.6256, "step": 83196 }, { "epoch": 15.98, "learning_rate": 0.001, "loss": 2.6472, "step": 83208 }, { "epoch": 15.98, "learning_rate": 0.001, "loss": 2.6433, "step": 83220 }, { "epoch": 15.98, "learning_rate": 0.001, "loss": 2.632, "step": 83232 }, { "epoch": 15.98, "learning_rate": 0.001, "loss": 2.6261, "step": 83244 }, { "epoch": 15.99, "learning_rate": 0.001, "loss": 2.6394, "step": 83256 }, { "epoch": 15.99, "learning_rate": 0.001, "loss": 2.6415, "step": 83268 }, { "epoch": 15.99, "learning_rate": 0.001, "loss": 2.6361, "step": 83280 }, { "epoch": 15.99, "learning_rate": 0.001, "loss": 2.6442, "step": 83292 }, { "epoch": 16.0, "learning_rate": 0.001, "loss": 2.6426, "step": 83304 }, { "epoch": 16.0, "learning_rate": 0.001, "loss": 2.6368, "step": 83316 }, { "epoch": 16.0, "learning_rate": 0.001, "loss": 2.6352, "step": 83328 }, { "epoch": 16.0, "learning_rate": 0.001, "loss": 2.6192, "step": 83340 }, { "epoch": 16.0, "learning_rate": 0.001, "loss": 2.6171, "step": 83352 }, { "epoch": 16.01, "learning_rate": 0.001, "loss": 2.6304, "step": 83364 }, { "epoch": 16.01, "learning_rate": 0.001, "loss": 2.6274, "step": 83376 }, { "epoch": 16.01, "learning_rate": 0.001, "loss": 2.6245, "step": 83388 }, { "epoch": 16.01, "learning_rate": 0.001, "loss": 2.621, "step": 83400 }, { "epoch": 16.02, "learning_rate": 0.001, "loss": 2.6192, "step": 83412 }, { "epoch": 16.02, "learning_rate": 0.001, "loss": 2.6137, "step": 83424 }, { "epoch": 16.02, "learning_rate": 0.001, "loss": 2.6221, "step": 83436 }, { "epoch": 16.02, "learning_rate": 0.001, "loss": 2.6213, "step": 83448 }, { "epoch": 16.03, "learning_rate": 0.001, "loss": 2.6155, "step": 83460 }, { "epoch": 16.03, "learning_rate": 0.001, "loss": 2.6218, "step": 83472 }, { "epoch": 16.03, "learning_rate": 0.001, "loss": 2.6184, "step": 83484 }, { "epoch": 16.03, "learning_rate": 0.001, "loss": 2.6187, "step": 83496 }, { "epoch": 16.03, "learning_rate": 0.001, "loss": 2.6256, "step": 83508 }, { "epoch": 16.04, "learning_rate": 0.001, "loss": 2.6112, "step": 83520 }, { "epoch": 16.04, "learning_rate": 0.001, "loss": 2.6113, "step": 83532 }, { "epoch": 16.04, "learning_rate": 0.001, "loss": 2.6272, "step": 83544 }, { "epoch": 16.04, "learning_rate": 0.001, "loss": 2.6249, "step": 83556 }, { "epoch": 16.05, "learning_rate": 0.001, "loss": 2.625, "step": 83568 }, { "epoch": 16.05, "learning_rate": 0.001, "loss": 2.6192, "step": 83580 }, { "epoch": 16.05, "learning_rate": 0.001, "loss": 2.6198, "step": 83592 }, { "epoch": 16.05, "learning_rate": 0.001, "loss": 2.6224, "step": 83604 }, { "epoch": 16.06, "learning_rate": 0.001, "loss": 2.6256, "step": 83616 }, { "epoch": 16.06, "learning_rate": 0.001, "loss": 2.6299, "step": 83628 }, { "epoch": 16.06, "learning_rate": 0.001, "loss": 2.619, "step": 83640 }, { "epoch": 16.06, "learning_rate": 0.001, "loss": 2.6157, "step": 83652 }, { "epoch": 16.06, "learning_rate": 0.001, "loss": 2.63, "step": 83664 }, { "epoch": 16.07, "learning_rate": 0.001, "loss": 2.6111, "step": 83676 }, { "epoch": 16.07, "learning_rate": 0.001, "loss": 2.6288, "step": 83688 }, { "epoch": 16.07, "learning_rate": 0.001, "loss": 2.6177, "step": 83700 }, { "epoch": 16.07, "learning_rate": 0.001, "loss": 2.6263, "step": 83712 }, { "epoch": 16.08, "learning_rate": 0.001, "loss": 2.6216, "step": 83724 }, { "epoch": 16.08, "learning_rate": 0.001, "loss": 2.6263, "step": 83736 }, { "epoch": 16.08, "learning_rate": 0.001, "loss": 2.628, "step": 83748 }, { "epoch": 16.08, "eval_ag_news_accuracy": 0.31571875, "eval_ag_news_bleu_score": 4.618034986198973, "eval_ag_news_bleu_score_sem": 0.14789886440354028, "eval_ag_news_emb_cos_sim": 0.7968583106994629, "eval_ag_news_emb_cos_sim_sem": 0.007380884527406819, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6520488262176514, "eval_ag_news_n_ngrams_match_1": 13.628, "eval_ag_news_n_ngrams_match_2": 2.94, "eval_ag_news_n_ngrams_match_3": 0.836, "eval_ag_news_num_pred_words": 46.318, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.55357476106492, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33657850819526497, "eval_ag_news_runtime": 9.8453, "eval_ag_news_samples_per_second": 50.785, "eval_ag_news_steps_per_second": 0.102, "eval_ag_news_token_set_f1": 0.3380274685715532, "eval_ag_news_token_set_f1_sem": 0.004571799012330631, "eval_ag_news_token_set_precision": 0.3217310225997908, "eval_ag_news_token_set_recall": 0.3753033130487659, "eval_ag_news_true_num_tokens": 56.09375, "step": 83750 }, { "epoch": 16.08, "eval_anthropic_toxic_prompts_accuracy": 0.1109375, "eval_anthropic_toxic_prompts_bleu_score": 3.0222862580351335, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11738101824093727, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6538487672805786, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010066713522187796, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3295137882232666, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.938, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.828, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.686, "eval_anthropic_toxic_prompts_num_pred_words": 47.164, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.92476105428126, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20294638649351732, "eval_anthropic_toxic_prompts_runtime": 10.8426, "eval_anthropic_toxic_prompts_samples_per_second": 46.115, "eval_anthropic_toxic_prompts_steps_per_second": 0.092, "eval_anthropic_toxic_prompts_token_set_f1": 0.3466827605390762, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006285916213736284, "eval_anthropic_toxic_prompts_token_set_precision": 0.41859248089187356, "eval_anthropic_toxic_prompts_token_set_recall": 0.32533021540303253, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 83750 }, { "epoch": 16.08, "eval_arxiv_accuracy": 0.34034375, "eval_arxiv_bleu_score": 4.127423373112979, "eval_arxiv_bleu_score_sem": 0.12185425671865842, "eval_arxiv_emb_cos_sim": 0.73603355884552, "eval_arxiv_emb_cos_sim_sem": 0.00845819748801575, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5093226432800293, "eval_arxiv_n_ngrams_match_1": 14.444, "eval_arxiv_n_ngrams_match_2": 2.76, "eval_arxiv_n_ngrams_match_3": 0.616, "eval_arxiv_num_pred_words": 40.438, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.425619046493296, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34379042548984545, "eval_arxiv_runtime": 10.0146, "eval_arxiv_samples_per_second": 49.927, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.3358893421196323, "eval_arxiv_token_set_f1_sem": 0.004352717057264788, "eval_arxiv_token_set_precision": 0.2869200031339711, "eval_arxiv_token_set_recall": 0.42342576457847847, "eval_arxiv_true_num_tokens": 64.0, "step": 83750 }, { "epoch": 16.08, "eval_python_code_alpaca_accuracy": 0.15371875, "eval_python_code_alpaca_bleu_score": 4.368329700521992, "eval_python_code_alpaca_bleu_score_sem": 0.133721985079977, "eval_python_code_alpaca_emb_cos_sim": 0.7390093803405762, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00882011445181232, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 3.0144386291503906, "eval_python_code_alpaca_n_ngrams_match_1": 9.618, "eval_python_code_alpaca_n_ngrams_match_2": 2.738, "eval_python_code_alpaca_n_ngrams_match_3": 0.842, "eval_python_code_alpaca_num_pred_words": 42.89, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 20.377648311324897, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3275447197112485, "eval_python_code_alpaca_runtime": 9.4836, "eval_python_code_alpaca_samples_per_second": 52.722, "eval_python_code_alpaca_steps_per_second": 0.105, "eval_python_code_alpaca_token_set_f1": 0.46918880461179113, "eval_python_code_alpaca_token_set_f1_sem": 0.005625904039514021, "eval_python_code_alpaca_token_set_precision": 0.5273688185119285, "eval_python_code_alpaca_token_set_recall": 0.44039669123479835, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 83750 }, { "epoch": 16.08, "eval_wikibio_accuracy": 0.31578125, "eval_wikibio_bleu_score": 5.698544085571491, "eval_wikibio_bleu_score_sem": 0.21452601186787376, "eval_wikibio_emb_cos_sim": 0.7213823795318604, "eval_wikibio_emb_cos_sim_sem": 0.010027102070032635, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8448588848114014, "eval_wikibio_n_ngrams_match_1": 9.81, "eval_wikibio_n_ngrams_match_2": 3.258, "eval_wikibio_n_ngrams_match_3": 1.192, "eval_wikibio_num_pred_words": 36.308, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.75208645589393, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3413679034825258, "eval_wikibio_runtime": 9.9827, "eval_wikibio_samples_per_second": 50.086, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3072155253130508, "eval_wikibio_token_set_f1_sem": 0.005834321851084432, "eval_wikibio_token_set_precision": 0.31666316004807177, "eval_wikibio_token_set_recall": 0.31865292330922224, "eval_wikibio_true_num_tokens": 61.1328125, "step": 83750 }, { "epoch": 16.08, "eval_nq_accuracy": 0.5160625, "eval_nq_bleu_score": 11.519020405608947, "eval_nq_bleu_score_sem": 0.4763464428342576, "eval_nq_emb_cos_sim": 0.8216896653175354, "eval_nq_emb_cos_sim_sem": 0.0074411275204525704, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2601318359375, "eval_nq_n_ngrams_match_1": 22.814, "eval_nq_n_ngrams_match_2": 8.24, "eval_nq_n_ngrams_match_3": 3.81, "eval_nq_num_pred_words": 49.148, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.584352645592952, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4403451104988666, "eval_nq_runtime": 10.4114, "eval_nq_samples_per_second": 48.024, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4559059226611072, "eval_nq_token_set_f1_sem": 0.0048010431161150755, "eval_nq_token_set_precision": 0.41266719159446524, "eval_nq_token_set_recall": 0.5184486411962422, "eval_nq_true_num_tokens": 64.0, "step": 83750 }, { "epoch": 16.08, "learning_rate": 0.001, "loss": 2.6377, "step": 83760 }, { "epoch": 16.09, "learning_rate": 0.001, "loss": 2.6327, "step": 83772 }, { "epoch": 16.09, "learning_rate": 0.001, "loss": 2.6349, "step": 83784 }, { "epoch": 16.09, "learning_rate": 0.001, "loss": 2.6263, "step": 83796 }, { "epoch": 16.09, "learning_rate": 0.001, "loss": 2.6257, "step": 83808 }, { "epoch": 16.09, "learning_rate": 0.001, "loss": 2.6296, "step": 83820 }, { "epoch": 16.1, "learning_rate": 0.001, "loss": 2.6226, "step": 83832 }, { "epoch": 16.1, "learning_rate": 0.001, "loss": 2.6152, "step": 83844 }, { "epoch": 16.1, "learning_rate": 0.001, "loss": 2.6121, "step": 83856 }, { "epoch": 16.1, "learning_rate": 0.001, "loss": 2.6223, "step": 83868 }, { "epoch": 16.11, "learning_rate": 0.001, "loss": 2.6311, "step": 83880 }, { "epoch": 16.11, "learning_rate": 0.001, "loss": 2.6207, "step": 83892 }, { "epoch": 16.11, "learning_rate": 0.001, "loss": 2.6284, "step": 83904 }, { "epoch": 16.11, "learning_rate": 0.001, "loss": 2.622, "step": 83916 }, { "epoch": 16.12, "learning_rate": 0.001, "loss": 2.6151, "step": 83928 }, { "epoch": 16.12, "learning_rate": 0.001, "loss": 2.6243, "step": 83940 }, { "epoch": 16.12, "learning_rate": 0.001, "loss": 2.6194, "step": 83952 }, { "epoch": 16.12, "learning_rate": 0.001, "loss": 2.6235, "step": 83964 }, { "epoch": 16.12, "learning_rate": 0.001, "loss": 2.6246, "step": 83976 }, { "epoch": 16.13, "learning_rate": 0.001, "loss": 2.6273, "step": 83988 }, { "epoch": 16.13, "learning_rate": 0.001, "loss": 2.6192, "step": 84000 }, { "epoch": 16.13, "learning_rate": 0.001, "loss": 2.6333, "step": 84012 }, { "epoch": 16.13, "learning_rate": 0.001, "loss": 2.6312, "step": 84024 }, { "epoch": 16.14, "learning_rate": 0.001, "loss": 2.6152, "step": 84036 }, { "epoch": 16.14, "learning_rate": 0.001, "loss": 2.6343, "step": 84048 }, { "epoch": 16.14, "learning_rate": 0.001, "loss": 2.6167, "step": 84060 }, { "epoch": 16.14, "learning_rate": 0.001, "loss": 2.6234, "step": 84072 }, { "epoch": 16.15, "learning_rate": 0.001, "loss": 2.6369, "step": 84084 }, { "epoch": 16.15, "learning_rate": 0.001, "loss": 2.6258, "step": 84096 }, { "epoch": 16.15, "learning_rate": 0.001, "loss": 2.6303, "step": 84108 }, { "epoch": 16.15, "learning_rate": 0.001, "loss": 2.6273, "step": 84120 }, { "epoch": 16.15, "learning_rate": 0.001, "loss": 2.6277, "step": 84132 }, { "epoch": 16.16, "learning_rate": 0.001, "loss": 2.6222, "step": 84144 }, { "epoch": 16.16, "learning_rate": 0.001, "loss": 2.6205, "step": 84156 }, { "epoch": 16.16, "learning_rate": 0.001, "loss": 2.6221, "step": 84168 }, { "epoch": 16.16, "learning_rate": 0.001, "loss": 2.6219, "step": 84180 }, { "epoch": 16.17, "learning_rate": 0.001, "loss": 2.6249, "step": 84192 }, { "epoch": 16.17, "learning_rate": 0.001, "loss": 2.6221, "step": 84204 }, { "epoch": 16.17, "learning_rate": 0.001, "loss": 2.6182, "step": 84216 }, { "epoch": 16.17, "learning_rate": 0.001, "loss": 2.6208, "step": 84228 }, { "epoch": 16.18, "learning_rate": 0.001, "loss": 2.6252, "step": 84240 }, { "epoch": 16.18, "learning_rate": 0.001, "loss": 2.6221, "step": 84252 }, { "epoch": 16.18, "learning_rate": 0.001, "loss": 2.6284, "step": 84264 }, { "epoch": 16.18, "learning_rate": 0.001, "loss": 2.6267, "step": 84276 }, { "epoch": 16.18, "learning_rate": 0.001, "loss": 2.6296, "step": 84288 }, { "epoch": 16.19, "learning_rate": 0.001, "loss": 2.6327, "step": 84300 }, { "epoch": 16.19, "learning_rate": 0.001, "loss": 2.6287, "step": 84312 }, { "epoch": 16.19, "learning_rate": 0.001, "loss": 2.6225, "step": 84324 }, { "epoch": 16.19, "learning_rate": 0.001, "loss": 2.6263, "step": 84336 }, { "epoch": 16.2, "learning_rate": 0.001, "loss": 2.6138, "step": 84348 }, { "epoch": 16.2, "learning_rate": 0.001, "loss": 2.6338, "step": 84360 }, { "epoch": 16.2, "learning_rate": 0.001, "loss": 2.6146, "step": 84372 }, { "epoch": 16.2, "eval_ag_news_accuracy": 0.3146875, "eval_ag_news_bleu_score": 4.88589691783047, "eval_ag_news_bleu_score_sem": 0.15371935382029328, "eval_ag_news_emb_cos_sim": 0.7930101156234741, "eval_ag_news_emb_cos_sim_sem": 0.008083512004753901, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6391215324401855, "eval_ag_news_n_ngrams_match_1": 13.8, "eval_ag_news_n_ngrams_match_2": 3.044, "eval_ag_news_n_ngrams_match_3": 0.918, "eval_ag_news_num_pred_words": 46.578, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.058388976074355, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3414253069014549, "eval_ag_news_runtime": 10.3437, "eval_ag_news_samples_per_second": 48.339, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.34519393810923765, "eval_ag_news_token_set_f1_sem": 0.004459209421154889, "eval_ag_news_token_set_precision": 0.3295150500837755, "eval_ag_news_token_set_recall": 0.3798884725693181, "eval_ag_news_true_num_tokens": 56.09375, "step": 84375 }, { "epoch": 16.2, "eval_anthropic_toxic_prompts_accuracy": 0.112625, "eval_anthropic_toxic_prompts_bleu_score": 2.9601658378255724, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12681353408248747, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6597671508789062, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010072174173247989, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.289334535598755, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.044, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.64, "eval_anthropic_toxic_prompts_num_pred_words": 47.202, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.825006627975274, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20684666386602268, "eval_anthropic_toxic_prompts_runtime": 9.7033, "eval_anthropic_toxic_prompts_samples_per_second": 51.529, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.34441522601250485, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006292221899092376, "eval_anthropic_toxic_prompts_token_set_precision": 0.42216671479720663, "eval_anthropic_toxic_prompts_token_set_recall": 0.3192011476426655, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 84375 }, { "epoch": 16.2, "eval_arxiv_accuracy": 0.3398125, "eval_arxiv_bleu_score": 4.211391263269069, "eval_arxiv_bleu_score_sem": 0.12270232596287158, "eval_arxiv_emb_cos_sim": 0.7509287595748901, "eval_arxiv_emb_cos_sim_sem": 0.0081844379018693, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.479011297225952, "eval_arxiv_n_ngrams_match_1": 14.416, "eval_arxiv_n_ngrams_match_2": 2.816, "eval_arxiv_n_ngrams_match_3": 0.644, "eval_arxiv_num_pred_words": 39.982, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.427644918602795, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34605244704182136, "eval_arxiv_runtime": 9.5925, "eval_arxiv_samples_per_second": 52.124, "eval_arxiv_steps_per_second": 0.104, "eval_arxiv_token_set_f1": 0.3380610236518875, "eval_arxiv_token_set_f1_sem": 0.0042917909115546245, "eval_arxiv_token_set_precision": 0.2876701786778184, "eval_arxiv_token_set_recall": 0.42752571493336267, "eval_arxiv_true_num_tokens": 64.0, "step": 84375 }, { "epoch": 16.2, "eval_python_code_alpaca_accuracy": 0.1561875, "eval_python_code_alpaca_bleu_score": 4.239576376436463, "eval_python_code_alpaca_bleu_score_sem": 0.14032350530114324, "eval_python_code_alpaca_emb_cos_sim": 0.724153995513916, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011613196639341395, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.981078863143921, "eval_python_code_alpaca_n_ngrams_match_1": 9.276, "eval_python_code_alpaca_n_ngrams_match_2": 2.666, "eval_python_code_alpaca_n_ngrams_match_3": 0.864, "eval_python_code_alpaca_num_pred_words": 43.224, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.709068566420875, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.313791982881696, "eval_python_code_alpaca_runtime": 10.0186, "eval_python_code_alpaca_samples_per_second": 49.907, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4527995957374217, "eval_python_code_alpaca_token_set_f1_sem": 0.006052336887982428, "eval_python_code_alpaca_token_set_precision": 0.5023335821693716, "eval_python_code_alpaca_token_set_recall": 0.43624477146604856, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 84375 }, { "epoch": 16.2, "eval_wikibio_accuracy": 0.31346875, "eval_wikibio_bleu_score": 5.629230960932355, "eval_wikibio_bleu_score_sem": 0.20059788976519283, "eval_wikibio_emb_cos_sim": 0.7350760698318481, "eval_wikibio_emb_cos_sim_sem": 0.01087538510456632, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.836883783340454, "eval_wikibio_n_ngrams_match_1": 10.11, "eval_wikibio_n_ngrams_match_2": 3.348, "eval_wikibio_n_ngrams_match_3": 1.192, "eval_wikibio_num_pred_words": 37.386, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.38071664671274, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3462497533280867, "eval_wikibio_runtime": 9.5887, "eval_wikibio_samples_per_second": 52.145, "eval_wikibio_steps_per_second": 0.104, "eval_wikibio_token_set_f1": 0.3132445112394064, "eval_wikibio_token_set_f1_sem": 0.005398729135291608, "eval_wikibio_token_set_precision": 0.32755744273525395, "eval_wikibio_token_set_recall": 0.31590463957420123, "eval_wikibio_true_num_tokens": 61.1328125, "step": 84375 }, { "epoch": 16.2, "eval_nq_accuracy": 0.51740625, "eval_nq_bleu_score": 11.361840306640428, "eval_nq_bleu_score_sem": 0.46694134822007893, "eval_nq_emb_cos_sim": 0.8280055522918701, "eval_nq_emb_cos_sim_sem": 0.007018084455425043, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.257619619369507, "eval_nq_n_ngrams_match_1": 22.81, "eval_nq_n_ngrams_match_2": 8.182, "eval_nq_n_ngrams_match_3": 3.73, "eval_nq_num_pred_words": 49.188, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.560304895309123, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.440902518635086, "eval_nq_runtime": 9.9465, "eval_nq_samples_per_second": 50.269, "eval_nq_steps_per_second": 0.101, "eval_nq_token_set_f1": 0.4540385159517221, "eval_nq_token_set_f1_sem": 0.005017009922286769, "eval_nq_token_set_precision": 0.411597901664635, "eval_nq_token_set_recall": 0.5156963591591491, "eval_nq_true_num_tokens": 64.0, "step": 84375 }, { "epoch": 16.2, "learning_rate": 0.001, "loss": 2.6402, "step": 84384 }, { "epoch": 16.21, "learning_rate": 0.001, "loss": 2.6168, "step": 84396 }, { "epoch": 16.21, "learning_rate": 0.001, "loss": 2.6203, "step": 84408 }, { "epoch": 16.21, "learning_rate": 0.001, "loss": 2.6293, "step": 84420 }, { "epoch": 16.21, "learning_rate": 0.001, "loss": 2.6277, "step": 84432 }, { "epoch": 16.21, "learning_rate": 0.001, "loss": 2.6258, "step": 84444 }, { "epoch": 16.22, "learning_rate": 0.001, "loss": 2.6301, "step": 84456 }, { "epoch": 16.22, "learning_rate": 0.001, "loss": 2.6288, "step": 84468 }, { "epoch": 16.22, "learning_rate": 0.001, "loss": 2.6285, "step": 84480 }, { "epoch": 16.22, "learning_rate": 0.001, "loss": 2.6398, "step": 84492 }, { "epoch": 16.23, "learning_rate": 0.001, "loss": 2.6236, "step": 84504 }, { "epoch": 16.23, "learning_rate": 0.001, "loss": 2.6295, "step": 84516 }, { "epoch": 16.23, "learning_rate": 0.001, "loss": 2.6177, "step": 84528 }, { "epoch": 16.23, "learning_rate": 0.001, "loss": 2.6212, "step": 84540 }, { "epoch": 16.24, "learning_rate": 0.001, "loss": 2.6275, "step": 84552 }, { "epoch": 16.24, "learning_rate": 0.001, "loss": 2.6162, "step": 84564 }, { "epoch": 16.24, "learning_rate": 0.001, "loss": 2.6184, "step": 84576 }, { "epoch": 16.24, "learning_rate": 0.001, "loss": 2.6192, "step": 84588 }, { "epoch": 16.24, "learning_rate": 0.001, "loss": 2.6277, "step": 84600 }, { "epoch": 16.25, "learning_rate": 0.001, "loss": 2.6261, "step": 84612 }, { "epoch": 16.25, "learning_rate": 0.001, "loss": 2.6307, "step": 84624 }, { "epoch": 16.25, "learning_rate": 0.001, "loss": 2.6177, "step": 84636 }, { "epoch": 16.25, "learning_rate": 0.001, "loss": 2.6271, "step": 84648 }, { "epoch": 16.26, "learning_rate": 0.001, "loss": 2.6212, "step": 84660 }, { "epoch": 16.26, "learning_rate": 0.001, "loss": 2.6289, "step": 84672 }, { "epoch": 16.26, "learning_rate": 0.001, "loss": 2.6112, "step": 84684 }, { "epoch": 16.26, "learning_rate": 0.001, "loss": 2.6208, "step": 84696 }, { "epoch": 16.26, "learning_rate": 0.001, "loss": 2.6214, "step": 84708 }, { "epoch": 16.27, "learning_rate": 0.001, "loss": 2.6263, "step": 84720 }, { "epoch": 16.27, "learning_rate": 0.001, "loss": 2.6284, "step": 84732 }, { "epoch": 16.27, "learning_rate": 0.001, "loss": 2.6413, "step": 84744 }, { "epoch": 16.27, "learning_rate": 0.001, "loss": 2.6197, "step": 84756 }, { "epoch": 16.28, "learning_rate": 0.001, "loss": 2.6354, "step": 84768 }, { "epoch": 16.28, "learning_rate": 0.001, "loss": 2.6162, "step": 84780 }, { "epoch": 16.28, "learning_rate": 0.001, "loss": 2.6258, "step": 84792 }, { "epoch": 16.28, "learning_rate": 0.001, "loss": 2.621, "step": 84804 }, { "epoch": 16.29, "learning_rate": 0.001, "loss": 2.6229, "step": 84816 }, { "epoch": 16.29, "learning_rate": 0.001, "loss": 2.6196, "step": 84828 }, { "epoch": 16.29, "learning_rate": 0.001, "loss": 2.6161, "step": 84840 }, { "epoch": 16.29, "learning_rate": 0.001, "loss": 2.6205, "step": 84852 }, { "epoch": 16.29, "learning_rate": 0.001, "loss": 2.6281, "step": 84864 }, { "epoch": 16.3, "learning_rate": 0.001, "loss": 2.6156, "step": 84876 }, { "epoch": 16.3, "learning_rate": 0.001, "loss": 2.6242, "step": 84888 }, { "epoch": 16.3, "learning_rate": 0.001, "loss": 2.6272, "step": 84900 }, { "epoch": 16.3, "learning_rate": 0.001, "loss": 2.6186, "step": 84912 }, { "epoch": 16.31, "learning_rate": 0.001, "loss": 2.6193, "step": 84924 }, { "epoch": 16.31, "learning_rate": 0.001, "loss": 2.6317, "step": 84936 }, { "epoch": 16.31, "learning_rate": 0.001, "loss": 2.6212, "step": 84948 }, { "epoch": 16.31, "learning_rate": 0.001, "loss": 2.6306, "step": 84960 }, { "epoch": 16.32, "learning_rate": 0.001, "loss": 2.622, "step": 84972 }, { "epoch": 16.32, "learning_rate": 0.001, "loss": 2.6336, "step": 84984 }, { "epoch": 16.32, "learning_rate": 0.001, "loss": 2.6214, "step": 84996 }, { "epoch": 16.32, "eval_ag_news_accuracy": 0.3143125, "eval_ag_news_bleu_score": 4.727496476663176, "eval_ag_news_bleu_score_sem": 0.1562693439582564, "eval_ag_news_emb_cos_sim": 0.7889615297317505, "eval_ag_news_emb_cos_sim_sem": 0.00816652944251922, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6564488410949707, "eval_ag_news_n_ngrams_match_1": 13.726, "eval_ag_news_n_ngrams_match_2": 2.906, "eval_ag_news_n_ngrams_match_3": 0.848, "eval_ag_news_num_pred_words": 46.784, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.723584812681004, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33623640164923313, "eval_ag_news_runtime": 10.9815, "eval_ag_news_samples_per_second": 45.531, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.34343677388608007, "eval_ag_news_token_set_f1_sem": 0.004335082304686956, "eval_ag_news_token_set_precision": 0.3268466522484483, "eval_ag_news_token_set_recall": 0.37934872835441796, "eval_ag_news_true_num_tokens": 56.09375, "step": 85000 }, { "epoch": 16.32, "eval_anthropic_toxic_prompts_accuracy": 0.1116875, "eval_anthropic_toxic_prompts_bleu_score": 2.9282192607489956, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11937367289925747, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6589334607124329, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009213499306378338, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.315546989440918, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.944, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.79, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67, "eval_anthropic_toxic_prompts_num_pred_words": 47.57, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.537452561532632, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.921875, "eval_anthropic_toxic_prompts_rouge_score": 0.20273210609631653, "eval_anthropic_toxic_prompts_runtime": 9.7814, "eval_anthropic_toxic_prompts_samples_per_second": 51.117, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.343269996247756, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006497621599196589, "eval_anthropic_toxic_prompts_token_set_precision": 0.4162591130326654, "eval_anthropic_toxic_prompts_token_set_recall": 0.3248448605719658, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 85000 }, { "epoch": 16.32, "eval_arxiv_accuracy": 0.33671875, "eval_arxiv_bleu_score": 4.078923104170819, "eval_arxiv_bleu_score_sem": 0.11423218570266515, "eval_arxiv_emb_cos_sim": 0.7389238476753235, "eval_arxiv_emb_cos_sim_sem": 0.009505083058177845, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5032198429107666, "eval_arxiv_n_ngrams_match_1": 14.394, "eval_arxiv_n_ngrams_match_2": 2.774, "eval_arxiv_n_ngrams_match_3": 0.618, "eval_arxiv_num_pred_words": 40.238, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.22225035668231, "eval_arxiv_pred_num_tokens": 62.8515625, "eval_arxiv_rouge_score": 0.33986931113436525, "eval_arxiv_runtime": 10.073, "eval_arxiv_samples_per_second": 49.638, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.33733828360133833, "eval_arxiv_token_set_f1_sem": 0.004321378906322271, "eval_arxiv_token_set_precision": 0.2860937611205259, "eval_arxiv_token_set_recall": 0.43232976402111967, "eval_arxiv_true_num_tokens": 64.0, "step": 85000 }, { "epoch": 16.32, "eval_python_code_alpaca_accuracy": 0.15565625, "eval_python_code_alpaca_bleu_score": 4.162090658429672, "eval_python_code_alpaca_bleu_score_sem": 0.12508577022293813, "eval_python_code_alpaca_emb_cos_sim": 0.7425490617752075, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010618933385178674, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9794883728027344, "eval_python_code_alpaca_n_ngrams_match_1": 9.532, "eval_python_code_alpaca_n_ngrams_match_2": 2.646, "eval_python_code_alpaca_n_ngrams_match_3": 0.85, "eval_python_code_alpaca_num_pred_words": 44.338, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.67774639863773, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3157292150017719, "eval_python_code_alpaca_runtime": 9.7105, "eval_python_code_alpaca_samples_per_second": 51.49, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.45761048278123767, "eval_python_code_alpaca_token_set_f1_sem": 0.005685353377185493, "eval_python_code_alpaca_token_set_precision": 0.5177254218602015, "eval_python_code_alpaca_token_set_recall": 0.4312237556584592, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 85000 }, { "epoch": 16.32, "eval_wikibio_accuracy": 0.3140625, "eval_wikibio_bleu_score": 5.612636316287121, "eval_wikibio_bleu_score_sem": 0.20710063374958973, "eval_wikibio_emb_cos_sim": 0.725387692451477, "eval_wikibio_emb_cos_sim_sem": 0.01092995144395326, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8017916679382324, "eval_wikibio_n_ngrams_match_1": 10.046, "eval_wikibio_n_ngrams_match_2": 3.198, "eval_wikibio_n_ngrams_match_3": 1.138, "eval_wikibio_num_pred_words": 36.964, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.781345962281726, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3462115290660953, "eval_wikibio_runtime": 9.9508, "eval_wikibio_samples_per_second": 50.247, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.315266367220626, "eval_wikibio_token_set_f1_sem": 0.005458211988072525, "eval_wikibio_token_set_precision": 0.32534941182735977, "eval_wikibio_token_set_recall": 0.32195284964155896, "eval_wikibio_true_num_tokens": 61.1328125, "step": 85000 }, { "epoch": 16.32, "eval_nq_accuracy": 0.51740625, "eval_nq_bleu_score": 11.435618014216091, "eval_nq_bleu_score_sem": 0.47053422152115154, "eval_nq_emb_cos_sim": 0.8246973752975464, "eval_nq_emb_cos_sim_sem": 0.0071987139943415915, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2601561546325684, "eval_nq_n_ngrams_match_1": 22.584, "eval_nq_n_ngrams_match_2": 8.25, "eval_nq_n_ngrams_match_3": 3.824, "eval_nq_num_pred_words": 48.834, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.58458572737648, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4370232865482836, "eval_nq_runtime": 10.122, "eval_nq_samples_per_second": 49.398, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.45127677756435214, "eval_nq_token_set_f1_sem": 0.005047810596697375, "eval_nq_token_set_precision": 0.40730639601428537, "eval_nq_token_set_recall": 0.5170273041283616, "eval_nq_true_num_tokens": 64.0, "step": 85000 }, { "epoch": 16.32, "learning_rate": 0.001, "loss": 2.634, "step": 85008 }, { "epoch": 16.32, "learning_rate": 0.001, "loss": 2.6208, "step": 85020 }, { "epoch": 16.33, "learning_rate": 0.001, "loss": 2.6275, "step": 85032 }, { "epoch": 16.33, "learning_rate": 0.001, "loss": 2.6194, "step": 85044 }, { "epoch": 16.33, "learning_rate": 0.001, "loss": 2.633, "step": 85056 }, { "epoch": 16.33, "learning_rate": 0.001, "loss": 2.6338, "step": 85068 }, { "epoch": 16.34, "learning_rate": 0.001, "loss": 2.6284, "step": 85080 }, { "epoch": 16.34, "learning_rate": 0.001, "loss": 2.6245, "step": 85092 }, { "epoch": 16.34, "learning_rate": 0.001, "loss": 2.6322, "step": 85104 }, { "epoch": 16.34, "learning_rate": 0.001, "loss": 2.6245, "step": 85116 }, { "epoch": 16.35, "learning_rate": 0.001, "loss": 2.6284, "step": 85128 }, { "epoch": 16.35, "learning_rate": 0.001, "loss": 2.6262, "step": 85140 }, { "epoch": 16.35, "learning_rate": 0.001, "loss": 2.6295, "step": 85152 }, { "epoch": 16.35, "learning_rate": 0.001, "loss": 2.6296, "step": 85164 }, { "epoch": 16.35, "learning_rate": 0.001, "loss": 2.6367, "step": 85176 }, { "epoch": 16.36, "learning_rate": 0.001, "loss": 2.6347, "step": 85188 }, { "epoch": 16.36, "learning_rate": 0.001, "loss": 2.6218, "step": 85200 }, { "epoch": 16.36, "learning_rate": 0.001, "loss": 2.6212, "step": 85212 }, { "epoch": 16.36, "learning_rate": 0.001, "loss": 2.6307, "step": 85224 }, { "epoch": 16.37, "learning_rate": 0.001, "loss": 2.6275, "step": 85236 }, { "epoch": 16.37, "learning_rate": 0.001, "loss": 2.6322, "step": 85248 }, { "epoch": 16.37, "learning_rate": 0.001, "loss": 2.625, "step": 85260 }, { "epoch": 16.37, "learning_rate": 0.001, "loss": 2.6268, "step": 85272 }, { "epoch": 16.38, "learning_rate": 0.001, "loss": 2.6328, "step": 85284 }, { "epoch": 16.38, "learning_rate": 0.001, "loss": 2.6244, "step": 85296 }, { "epoch": 16.38, "learning_rate": 0.001, "loss": 2.6208, "step": 85308 }, { "epoch": 16.38, "learning_rate": 0.001, "loss": 2.6251, "step": 85320 }, { "epoch": 16.38, "learning_rate": 0.001, "loss": 2.6273, "step": 85332 }, { "epoch": 16.39, "learning_rate": 0.001, "loss": 2.6226, "step": 85344 }, { "epoch": 16.39, "learning_rate": 0.001, "loss": 2.631, "step": 85356 }, { "epoch": 16.39, "learning_rate": 0.001, "loss": 2.6276, "step": 85368 }, { "epoch": 16.39, "learning_rate": 0.001, "loss": 2.6216, "step": 85380 }, { "epoch": 16.4, "learning_rate": 0.001, "loss": 2.6235, "step": 85392 }, { "epoch": 16.4, "learning_rate": 0.001, "loss": 2.6208, "step": 85404 }, { "epoch": 16.4, "learning_rate": 0.001, "loss": 2.6154, "step": 85416 }, { "epoch": 16.4, "learning_rate": 0.001, "loss": 2.6171, "step": 85428 }, { "epoch": 16.41, "learning_rate": 0.001, "loss": 2.6265, "step": 85440 }, { "epoch": 16.41, "learning_rate": 0.001, "loss": 2.6313, "step": 85452 }, { "epoch": 16.41, "learning_rate": 0.001, "loss": 2.6252, "step": 85464 }, { "epoch": 16.41, "learning_rate": 0.001, "loss": 2.6331, "step": 85476 }, { "epoch": 16.41, "learning_rate": 0.001, "loss": 2.633, "step": 85488 }, { "epoch": 16.42, "learning_rate": 0.001, "loss": 2.6302, "step": 85500 }, { "epoch": 16.42, "learning_rate": 0.001, "loss": 2.632, "step": 85512 }, { "epoch": 16.42, "learning_rate": 0.001, "loss": 2.6256, "step": 85524 }, { "epoch": 16.42, "learning_rate": 0.001, "loss": 2.6249, "step": 85536 }, { "epoch": 16.43, "learning_rate": 0.001, "loss": 2.6212, "step": 85548 }, { "epoch": 16.43, "learning_rate": 0.001, "loss": 2.6228, "step": 85560 }, { "epoch": 16.43, "learning_rate": 0.001, "loss": 2.627, "step": 85572 }, { "epoch": 16.43, "learning_rate": 0.001, "loss": 2.6189, "step": 85584 }, { "epoch": 16.44, "learning_rate": 0.001, "loss": 2.6281, "step": 85596 }, { "epoch": 16.44, "learning_rate": 0.001, "loss": 2.6379, "step": 85608 }, { "epoch": 16.44, "learning_rate": 0.001, "loss": 2.618, "step": 85620 }, { "epoch": 16.44, "eval_ag_news_accuracy": 0.31503125, "eval_ag_news_bleu_score": 4.734232285240887, "eval_ag_news_bleu_score_sem": 0.14446832776285212, "eval_ag_news_emb_cos_sim": 0.8040810823440552, "eval_ag_news_emb_cos_sim_sem": 0.006647003844799893, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.637950897216797, "eval_ag_news_n_ngrams_match_1": 13.738, "eval_ag_news_n_ngrams_match_2": 3.036, "eval_ag_news_n_ngrams_match_3": 0.866, "eval_ag_news_num_pred_words": 47.152, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.01386255257826, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3392382166818193, "eval_ag_news_runtime": 11.6904, "eval_ag_news_samples_per_second": 42.77, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.3453922108777078, "eval_ag_news_token_set_f1_sem": 0.0044595911647469615, "eval_ag_news_token_set_precision": 0.32844362009404904, "eval_ag_news_token_set_recall": 0.3796238776180352, "eval_ag_news_true_num_tokens": 56.09375, "step": 85625 }, { "epoch": 16.44, "eval_anthropic_toxic_prompts_accuracy": 0.11078125, "eval_anthropic_toxic_prompts_bleu_score": 3.029252097289845, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11951039790080126, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6618566513061523, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0091044278722437, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3571712970733643, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.122, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.858, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.664, "eval_anthropic_toxic_prompts_num_pred_words": 47.086, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.707869881443887, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21110080755282318, "eval_anthropic_toxic_prompts_runtime": 9.8345, "eval_anthropic_toxic_prompts_samples_per_second": 50.842, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3503955953883387, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063915426667671815, "eval_anthropic_toxic_prompts_token_set_precision": 0.43577397594430767, "eval_anthropic_toxic_prompts_token_set_recall": 0.3220171069084458, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 85625 }, { "epoch": 16.44, "eval_arxiv_accuracy": 0.33709375, "eval_arxiv_bleu_score": 4.225227686712552, "eval_arxiv_bleu_score_sem": 0.12029798950622987, "eval_arxiv_emb_cos_sim": 0.7448480129241943, "eval_arxiv_emb_cos_sim_sem": 0.00807973261782259, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5028719902038574, "eval_arxiv_n_ngrams_match_1": 14.592, "eval_arxiv_n_ngrams_match_2": 2.874, "eval_arxiv_n_ngrams_match_3": 0.636, "eval_arxiv_num_pred_words": 40.39, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.210695916704246, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34799392645174365, "eval_arxiv_runtime": 10.1383, "eval_arxiv_samples_per_second": 49.318, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3404732212119082, "eval_arxiv_token_set_f1_sem": 0.004081932664778177, "eval_arxiv_token_set_precision": 0.28951478070597453, "eval_arxiv_token_set_recall": 0.43068390351571334, "eval_arxiv_true_num_tokens": 64.0, "step": 85625 }, { "epoch": 16.44, "eval_python_code_alpaca_accuracy": 0.158125, "eval_python_code_alpaca_bleu_score": 4.4898623454880875, "eval_python_code_alpaca_bleu_score_sem": 0.14630471170329362, "eval_python_code_alpaca_emb_cos_sim": 0.7492319345474243, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008818117086319608, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9721035957336426, "eval_python_code_alpaca_n_ngrams_match_1": 9.608, "eval_python_code_alpaca_n_ngrams_match_2": 2.742, "eval_python_code_alpaca_n_ngrams_match_3": 0.936, "eval_python_code_alpaca_num_pred_words": 43.226, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.532965872183325, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32464325540349837, "eval_python_code_alpaca_runtime": 9.6706, "eval_python_code_alpaca_samples_per_second": 51.703, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.46409365894739485, "eval_python_code_alpaca_token_set_f1_sem": 0.005555074158514538, "eval_python_code_alpaca_token_set_precision": 0.5250415209739485, "eval_python_code_alpaca_token_set_recall": 0.4338619172201921, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 85625 }, { "epoch": 16.44, "eval_wikibio_accuracy": 0.31275, "eval_wikibio_bleu_score": 5.802939841794243, "eval_wikibio_bleu_score_sem": 0.2075321053057482, "eval_wikibio_emb_cos_sim": 0.7380551099777222, "eval_wikibio_emb_cos_sim_sem": 0.009217947115985253, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8536365032196045, "eval_wikibio_n_ngrams_match_1": 10.09, "eval_wikibio_n_ngrams_match_2": 3.348, "eval_wikibio_n_ngrams_match_3": 1.172, "eval_wikibio_num_pred_words": 36.682, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.164264756101296, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35331046831363877, "eval_wikibio_runtime": 9.6557, "eval_wikibio_samples_per_second": 51.783, "eval_wikibio_steps_per_second": 0.104, "eval_wikibio_token_set_f1": 0.31607802876324065, "eval_wikibio_token_set_f1_sem": 0.005275383067759731, "eval_wikibio_token_set_precision": 0.32525107111719104, "eval_wikibio_token_set_recall": 0.324323498345772, "eval_wikibio_true_num_tokens": 61.1328125, "step": 85625 }, { "epoch": 16.44, "eval_nq_accuracy": 0.5179375, "eval_nq_bleu_score": 11.115929575777233, "eval_nq_bleu_score_sem": 0.46169152675620856, "eval_nq_emb_cos_sim": 0.8265016078948975, "eval_nq_emb_cos_sim_sem": 0.007319747889463735, "eval_nq_emb_top1_equal": 0.1953125, "eval_nq_emb_top1_equal_sem": 0.035178457165496856, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2583084106445312, "eval_nq_n_ngrams_match_1": 22.734, "eval_nq_n_ngrams_match_2": 8.128, "eval_nq_n_ngrams_match_3": 3.644, "eval_nq_num_pred_words": 49.032, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.566892218292447, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4375737394517144, "eval_nq_runtime": 10.1345, "eval_nq_samples_per_second": 49.336, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.45498667808267784, "eval_nq_token_set_f1_sem": 0.005013999226700478, "eval_nq_token_set_precision": 0.4093649179282037, "eval_nq_token_set_recall": 0.5220906308707194, "eval_nq_true_num_tokens": 64.0, "step": 85625 }, { "epoch": 16.44, "learning_rate": 0.001, "loss": 2.6312, "step": 85632 }, { "epoch": 16.44, "learning_rate": 0.001, "loss": 2.624, "step": 85644 }, { "epoch": 16.45, "learning_rate": 0.001, "loss": 2.6263, "step": 85656 }, { "epoch": 16.45, "learning_rate": 0.001, "loss": 2.6249, "step": 85668 }, { "epoch": 16.45, "learning_rate": 0.001, "loss": 2.6199, "step": 85680 }, { "epoch": 16.45, "learning_rate": 0.001, "loss": 2.6379, "step": 85692 }, { "epoch": 16.46, "learning_rate": 0.001, "loss": 2.6259, "step": 85704 }, { "epoch": 16.46, "learning_rate": 0.001, "loss": 2.6299, "step": 85716 }, { "epoch": 16.46, "learning_rate": 0.001, "loss": 2.6356, "step": 85728 }, { "epoch": 16.46, "learning_rate": 0.001, "loss": 2.6372, "step": 85740 }, { "epoch": 16.47, "learning_rate": 0.001, "loss": 2.6292, "step": 85752 }, { "epoch": 16.47, "learning_rate": 0.001, "loss": 2.6256, "step": 85764 }, { "epoch": 16.47, "learning_rate": 0.001, "loss": 2.6373, "step": 85776 }, { "epoch": 16.47, "learning_rate": 0.001, "loss": 2.6225, "step": 85788 }, { "epoch": 16.47, "learning_rate": 0.001, "loss": 2.626, "step": 85800 }, { "epoch": 16.48, "learning_rate": 0.001, "loss": 2.6246, "step": 85812 }, { "epoch": 16.48, "learning_rate": 0.001, "loss": 2.6243, "step": 85824 }, { "epoch": 16.48, "learning_rate": 0.001, "loss": 2.6226, "step": 85836 }, { "epoch": 16.48, "learning_rate": 0.001, "loss": 2.6318, "step": 85848 }, { "epoch": 16.49, "learning_rate": 0.001, "loss": 2.6255, "step": 85860 }, { "epoch": 16.49, "learning_rate": 0.001, "loss": 2.6207, "step": 85872 }, { "epoch": 16.49, "learning_rate": 0.001, "loss": 2.6243, "step": 85884 }, { "epoch": 16.49, "learning_rate": 0.001, "loss": 2.6303, "step": 85896 }, { "epoch": 16.5, "learning_rate": 0.001, "loss": 2.6345, "step": 85908 }, { "epoch": 16.5, "learning_rate": 0.001, "loss": 2.6258, "step": 85920 }, { "epoch": 16.5, "learning_rate": 0.001, "loss": 2.6255, "step": 85932 }, { "epoch": 16.5, "learning_rate": 0.001, "loss": 2.6377, "step": 85944 }, { "epoch": 16.5, "learning_rate": 0.001, "loss": 2.6261, "step": 85956 }, { "epoch": 16.51, "learning_rate": 0.001, "loss": 2.6248, "step": 85968 }, { "epoch": 16.51, "learning_rate": 0.001, "loss": 2.6278, "step": 85980 }, { "epoch": 16.51, "learning_rate": 0.001, "loss": 2.6275, "step": 85992 }, { "epoch": 16.51, "learning_rate": 0.001, "loss": 2.6224, "step": 86004 }, { "epoch": 16.52, "learning_rate": 0.001, "loss": 2.6251, "step": 86016 }, { "epoch": 16.52, "learning_rate": 0.001, "loss": 2.6217, "step": 86028 }, { "epoch": 16.52, "learning_rate": 0.001, "loss": 2.6249, "step": 86040 }, { "epoch": 16.52, "learning_rate": 0.001, "loss": 2.6233, "step": 86052 }, { "epoch": 16.53, "learning_rate": 0.001, "loss": 2.6221, "step": 86064 }, { "epoch": 16.53, "learning_rate": 0.001, "loss": 2.6342, "step": 86076 }, { "epoch": 16.53, "learning_rate": 0.001, "loss": 2.6256, "step": 86088 }, { "epoch": 16.53, "learning_rate": 0.001, "loss": 2.6303, "step": 86100 }, { "epoch": 16.53, "learning_rate": 0.001, "loss": 2.6229, "step": 86112 }, { "epoch": 16.54, "learning_rate": 0.001, "loss": 2.6348, "step": 86124 }, { "epoch": 16.54, "learning_rate": 0.001, "loss": 2.6325, "step": 86136 }, { "epoch": 16.54, "learning_rate": 0.001, "loss": 2.6287, "step": 86148 }, { "epoch": 16.54, "learning_rate": 0.001, "loss": 2.6375, "step": 86160 }, { "epoch": 16.55, "learning_rate": 0.001, "loss": 2.6331, "step": 86172 }, { "epoch": 16.55, "learning_rate": 0.001, "loss": 2.6229, "step": 86184 }, { "epoch": 16.55, "learning_rate": 0.001, "loss": 2.6298, "step": 86196 }, { "epoch": 16.55, "learning_rate": 0.001, "loss": 2.6308, "step": 86208 }, { "epoch": 16.56, "learning_rate": 0.001, "loss": 2.6335, "step": 86220 }, { "epoch": 16.56, "learning_rate": 0.001, "loss": 2.6356, "step": 86232 }, { "epoch": 16.56, "learning_rate": 0.001, "loss": 2.6324, "step": 86244 }, { "epoch": 16.56, "eval_ag_news_accuracy": 0.31484375, "eval_ag_news_bleu_score": 4.677569717950757, "eval_ag_news_bleu_score_sem": 0.1536075207240273, "eval_ag_news_emb_cos_sim": 0.7919803261756897, "eval_ag_news_emb_cos_sim_sem": 0.0076983503954590644, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6351852416992188, "eval_ag_news_n_ngrams_match_1": 13.454, "eval_ag_news_n_ngrams_match_2": 2.938, "eval_ag_news_n_ngrams_match_3": 0.862, "eval_ag_news_num_pred_words": 46.584, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.90887455110782, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3328643009688087, "eval_ag_news_runtime": 9.9686, "eval_ag_news_samples_per_second": 50.157, "eval_ag_news_steps_per_second": 0.1, "eval_ag_news_token_set_f1": 0.3380049465612954, "eval_ag_news_token_set_f1_sem": 0.004545375509986334, "eval_ag_news_token_set_precision": 0.31841268807992196, "eval_ag_news_token_set_recall": 0.3783377703612223, "eval_ag_news_true_num_tokens": 56.09375, "step": 86250 }, { "epoch": 16.56, "eval_anthropic_toxic_prompts_accuracy": 0.11259375, "eval_anthropic_toxic_prompts_bleu_score": 2.9140743862987084, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11594501363240359, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6440126299858093, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010369487262266948, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2765464782714844, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.046, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.808, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.632, "eval_anthropic_toxic_prompts_num_pred_words": 48.168, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.48415099124652, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20559105113898218, "eval_anthropic_toxic_prompts_runtime": 9.805, "eval_anthropic_toxic_prompts_samples_per_second": 50.994, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.35503537504920624, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006506715944413094, "eval_anthropic_toxic_prompts_token_set_precision": 0.4241620850914107, "eval_anthropic_toxic_prompts_token_set_recall": 0.33386737837819425, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 86250 }, { "epoch": 16.56, "eval_arxiv_accuracy": 0.3390625, "eval_arxiv_bleu_score": 4.153893751254837, "eval_arxiv_bleu_score_sem": 0.11692658314158827, "eval_arxiv_emb_cos_sim": 0.7477889060974121, "eval_arxiv_emb_cos_sim_sem": 0.007050107499214719, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.504267692565918, "eval_arxiv_n_ngrams_match_1": 14.614, "eval_arxiv_n_ngrams_match_2": 2.784, "eval_arxiv_n_ngrams_match_3": 0.612, "eval_arxiv_num_pred_words": 41.108, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.257080525465156, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34724834799599713, "eval_arxiv_runtime": 9.9529, "eval_arxiv_samples_per_second": 50.236, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.33965850333694886, "eval_arxiv_token_set_f1_sem": 0.004114648945668827, "eval_arxiv_token_set_precision": 0.29188368571785067, "eval_arxiv_token_set_recall": 0.4252304605018358, "eval_arxiv_true_num_tokens": 64.0, "step": 86250 }, { "epoch": 16.56, "eval_python_code_alpaca_accuracy": 0.155375, "eval_python_code_alpaca_bleu_score": 4.42037969556335, "eval_python_code_alpaca_bleu_score_sem": 0.14584883274688865, "eval_python_code_alpaca_emb_cos_sim": 0.7608622312545776, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006207836976038917, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.976240396499634, "eval_python_code_alpaca_n_ngrams_match_1": 9.654, "eval_python_code_alpaca_n_ngrams_match_2": 2.838, "eval_python_code_alpaca_n_ngrams_match_3": 0.914, "eval_python_code_alpaca_num_pred_words": 44.61, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.613937226072167, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31759725655093096, "eval_python_code_alpaca_runtime": 9.9947, "eval_python_code_alpaca_samples_per_second": 50.027, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4719123221850839, "eval_python_code_alpaca_token_set_f1_sem": 0.005591315482465464, "eval_python_code_alpaca_token_set_precision": 0.5294576661254947, "eval_python_code_alpaca_token_set_recall": 0.4465949770512454, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 86250 }, { "epoch": 16.56, "eval_wikibio_accuracy": 0.3121875, "eval_wikibio_bleu_score": 5.557797813810281, "eval_wikibio_bleu_score_sem": 0.19400367573684665, "eval_wikibio_emb_cos_sim": 0.7214940190315247, "eval_wikibio_emb_cos_sim_sem": 0.01092002717821796, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.840996026992798, "eval_wikibio_n_ngrams_match_1": 9.628, "eval_wikibio_n_ngrams_match_2": 3.176, "eval_wikibio_n_ngrams_match_3": 1.088, "eval_wikibio_num_pred_words": 35.774, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.57183815410644, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34312089050957295, "eval_wikibio_runtime": 9.6657, "eval_wikibio_samples_per_second": 51.729, "eval_wikibio_steps_per_second": 0.103, "eval_wikibio_token_set_f1": 0.30667002405518456, "eval_wikibio_token_set_f1_sem": 0.005711237778105406, "eval_wikibio_token_set_precision": 0.313010097137424, "eval_wikibio_token_set_recall": 0.3178837283556537, "eval_wikibio_true_num_tokens": 61.1328125, "step": 86250 }, { "epoch": 16.56, "eval_nq_accuracy": 0.51871875, "eval_nq_bleu_score": 11.108348794910503, "eval_nq_bleu_score_sem": 0.4506995806699304, "eval_nq_emb_cos_sim": 0.8245291113853455, "eval_nq_emb_cos_sim_sem": 0.007480438017767582, "eval_nq_emb_top1_equal": 0.203125, "eval_nq_emb_top1_equal_sem": 0.03570055125142555, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2540647983551025, "eval_nq_n_ngrams_match_1": 22.364, "eval_nq_n_ngrams_match_2": 8.07, "eval_nq_n_ngrams_match_3": 3.644, "eval_nq_num_pred_words": 49.282, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.526380056652824, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4303017423573058, "eval_nq_runtime": 10.1022, "eval_nq_samples_per_second": 49.494, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.4482803320685933, "eval_nq_token_set_f1_sem": 0.004701399922315706, "eval_nq_token_set_precision": 0.4042105412576047, "eval_nq_token_set_recall": 0.5114986170305418, "eval_nq_true_num_tokens": 64.0, "step": 86250 }, { "epoch": 16.56, "learning_rate": 0.001, "loss": 2.6258, "step": 86256 }, { "epoch": 16.56, "learning_rate": 0.001, "loss": 2.6309, "step": 86268 }, { "epoch": 16.57, "learning_rate": 0.001, "loss": 2.6232, "step": 86280 }, { "epoch": 16.57, "learning_rate": 0.001, "loss": 2.6133, "step": 86292 }, { "epoch": 16.57, "learning_rate": 0.001, "loss": 2.6218, "step": 86304 }, { "epoch": 16.57, "learning_rate": 0.001, "loss": 2.6275, "step": 86316 }, { "epoch": 16.58, "learning_rate": 0.001, "loss": 2.618, "step": 86328 }, { "epoch": 16.58, "learning_rate": 0.001, "loss": 2.6342, "step": 86340 }, { "epoch": 16.58, "learning_rate": 0.001, "loss": 2.6257, "step": 86352 }, { "epoch": 16.58, "learning_rate": 0.001, "loss": 2.6304, "step": 86364 }, { "epoch": 16.59, "learning_rate": 0.001, "loss": 2.6174, "step": 86376 }, { "epoch": 16.59, "learning_rate": 0.001, "loss": 2.619, "step": 86388 }, { "epoch": 16.59, "learning_rate": 0.001, "loss": 2.6274, "step": 86400 }, { "epoch": 16.59, "learning_rate": 0.001, "loss": 2.625, "step": 86412 }, { "epoch": 16.59, "learning_rate": 0.001, "loss": 2.6291, "step": 86424 }, { "epoch": 16.6, "learning_rate": 0.001, "loss": 2.6281, "step": 86436 }, { "epoch": 16.6, "learning_rate": 0.001, "loss": 2.6226, "step": 86448 }, { "epoch": 16.6, "learning_rate": 0.001, "loss": 2.6328, "step": 86460 }, { "epoch": 16.6, "learning_rate": 0.001, "loss": 2.6275, "step": 86472 }, { "epoch": 16.61, "learning_rate": 0.001, "loss": 2.6215, "step": 86484 }, { "epoch": 16.61, "learning_rate": 0.001, "loss": 2.6274, "step": 86496 }, { "epoch": 16.61, "learning_rate": 0.001, "loss": 2.6337, "step": 86508 }, { "epoch": 16.61, "learning_rate": 0.001, "loss": 2.6342, "step": 86520 }, { "epoch": 16.62, "learning_rate": 0.001, "loss": 2.6326, "step": 86532 }, { "epoch": 16.62, "learning_rate": 0.001, "loss": 2.6372, "step": 86544 }, { "epoch": 16.62, "learning_rate": 0.001, "loss": 2.6208, "step": 86556 }, { "epoch": 16.62, "learning_rate": 0.001, "loss": 2.6234, "step": 86568 }, { "epoch": 16.62, "learning_rate": 0.001, "loss": 2.6309, "step": 86580 }, { "epoch": 16.63, "learning_rate": 0.001, "loss": 2.6374, "step": 86592 }, { "epoch": 16.63, "learning_rate": 0.001, "loss": 2.6285, "step": 86604 }, { "epoch": 16.63, "learning_rate": 0.001, "loss": 2.6272, "step": 86616 }, { "epoch": 16.63, "learning_rate": 0.001, "loss": 2.6281, "step": 86628 }, { "epoch": 16.64, "learning_rate": 0.001, "loss": 2.6147, "step": 86640 }, { "epoch": 16.64, "learning_rate": 0.001, "loss": 2.6315, "step": 86652 }, { "epoch": 16.64, "learning_rate": 0.001, "loss": 2.6331, "step": 86664 }, { "epoch": 16.64, "learning_rate": 0.001, "loss": 2.6349, "step": 86676 }, { "epoch": 16.65, "learning_rate": 0.001, "loss": 2.6286, "step": 86688 }, { "epoch": 16.65, "learning_rate": 0.001, "loss": 2.6315, "step": 86700 }, { "epoch": 16.65, "learning_rate": 0.001, "loss": 2.6276, "step": 86712 }, { "epoch": 16.65, "learning_rate": 0.001, "loss": 2.635, "step": 86724 }, { "epoch": 16.65, "learning_rate": 0.001, "loss": 2.6239, "step": 86736 }, { "epoch": 16.66, "learning_rate": 0.001, "loss": 2.6185, "step": 86748 }, { "epoch": 16.66, "learning_rate": 0.001, "loss": 2.6298, "step": 86760 }, { "epoch": 16.66, "learning_rate": 0.001, "loss": 2.6291, "step": 86772 }, { "epoch": 16.66, "learning_rate": 0.001, "loss": 2.6365, "step": 86784 }, { "epoch": 16.67, "learning_rate": 0.001, "loss": 2.633, "step": 86796 }, { "epoch": 16.67, "learning_rate": 0.001, "loss": 2.6266, "step": 86808 }, { "epoch": 16.67, "learning_rate": 0.001, "loss": 2.6243, "step": 86820 }, { "epoch": 16.67, "learning_rate": 0.001, "loss": 2.6229, "step": 86832 }, { "epoch": 16.68, "learning_rate": 0.001, "loss": 2.6222, "step": 86844 }, { "epoch": 16.68, "learning_rate": 0.001, "loss": 2.6401, "step": 86856 }, { "epoch": 16.68, "learning_rate": 0.001, "loss": 2.6294, "step": 86868 }, { "epoch": 16.68, "eval_ag_news_accuracy": 0.315375, "eval_ag_news_bleu_score": 4.679309175483281, "eval_ag_news_bleu_score_sem": 0.153342466902705, "eval_ag_news_emb_cos_sim": 0.804725170135498, "eval_ag_news_emb_cos_sim_sem": 0.006796262298989668, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6281321048736572, "eval_ag_news_n_ngrams_match_1": 13.722, "eval_ag_news_n_ngrams_match_2": 3.012, "eval_ag_news_n_ngrams_match_3": 0.868, "eval_ag_news_num_pred_words": 46.602, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.64243878049348, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3384316212282945, "eval_ag_news_runtime": 10.9929, "eval_ag_news_samples_per_second": 45.484, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.345484160230268, "eval_ag_news_token_set_f1_sem": 0.004385774029902639, "eval_ag_news_token_set_precision": 0.3278777292416484, "eval_ag_news_token_set_recall": 0.38073365646483104, "eval_ag_news_true_num_tokens": 56.09375, "step": 86875 }, { "epoch": 16.68, "eval_anthropic_toxic_prompts_accuracy": 0.1115625, "eval_anthropic_toxic_prompts_bleu_score": 2.94961734013026, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11458936431158522, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6625691652297974, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010334130666177863, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.316422700881958, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.004, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.82, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.636, "eval_anthropic_toxic_prompts_num_pred_words": 47.096, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.561577985711093, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20786075367716803, "eval_anthropic_toxic_prompts_runtime": 10.8339, "eval_anthropic_toxic_prompts_samples_per_second": 46.151, "eval_anthropic_toxic_prompts_steps_per_second": 0.092, "eval_anthropic_toxic_prompts_token_set_f1": 0.3505138942601821, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006591600058821367, "eval_anthropic_toxic_prompts_token_set_precision": 0.42305282286384216, "eval_anthropic_toxic_prompts_token_set_recall": 0.3263186822116573, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 86875 }, { "epoch": 16.68, "eval_arxiv_accuracy": 0.33928125, "eval_arxiv_bleu_score": 4.1213584226465265, "eval_arxiv_bleu_score_sem": 0.1243666975247188, "eval_arxiv_emb_cos_sim": 0.7476789951324463, "eval_arxiv_emb_cos_sim_sem": 0.007997047745033802, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.5029208660125732, "eval_arxiv_n_ngrams_match_1": 14.28, "eval_arxiv_n_ngrams_match_2": 2.77, "eval_arxiv_n_ngrams_match_3": 0.654, "eval_arxiv_num_pred_words": 39.87, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 33.212319155993434, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3419498009749126, "eval_arxiv_runtime": 10.3148, "eval_arxiv_samples_per_second": 48.474, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3361871733138312, "eval_arxiv_token_set_f1_sem": 0.004443572811533335, "eval_arxiv_token_set_precision": 0.28552665801110716, "eval_arxiv_token_set_recall": 0.42890751481817746, "eval_arxiv_true_num_tokens": 64.0, "step": 86875 }, { "epoch": 16.68, "eval_python_code_alpaca_accuracy": 0.159, "eval_python_code_alpaca_bleu_score": 4.4949201955534575, "eval_python_code_alpaca_bleu_score_sem": 0.14911391424469905, "eval_python_code_alpaca_emb_cos_sim": 0.7516356706619263, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009768329639798859, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9453086853027344, "eval_python_code_alpaca_n_ngrams_match_1": 9.868, "eval_python_code_alpaca_n_ngrams_match_2": 2.9, "eval_python_code_alpaca_n_ngrams_match_3": 0.958, "eval_python_code_alpaca_num_pred_words": 44.588, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.01653160436644, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3261765212791481, "eval_python_code_alpaca_runtime": 9.9321, "eval_python_code_alpaca_samples_per_second": 50.342, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4750200440609405, "eval_python_code_alpaca_token_set_f1_sem": 0.005573481937169099, "eval_python_code_alpaca_token_set_precision": 0.5392437295997247, "eval_python_code_alpaca_token_set_recall": 0.44507940543250574, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 86875 }, { "epoch": 16.68, "eval_wikibio_accuracy": 0.31346875, "eval_wikibio_bleu_score": 5.483483780610567, "eval_wikibio_bleu_score_sem": 0.20548743364077995, "eval_wikibio_emb_cos_sim": 0.7220487594604492, "eval_wikibio_emb_cos_sim_sem": 0.010978563882443528, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.851616144180298, "eval_wikibio_n_ngrams_match_1": 9.57, "eval_wikibio_n_ngrams_match_2": 3.118, "eval_wikibio_n_ngrams_match_3": 1.096, "eval_wikibio_num_pred_words": 35.356, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.06907220141837, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33593630782168704, "eval_wikibio_runtime": 11.225, "eval_wikibio_samples_per_second": 44.543, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.3060713827508873, "eval_wikibio_token_set_f1_sem": 0.005734146280167845, "eval_wikibio_token_set_precision": 0.3124944838666867, "eval_wikibio_token_set_recall": 0.31781332903744397, "eval_wikibio_true_num_tokens": 61.1328125, "step": 86875 }, { "epoch": 16.68, "eval_nq_accuracy": 0.5173125, "eval_nq_bleu_score": 11.09245589224382, "eval_nq_bleu_score_sem": 0.4527721311143663, "eval_nq_emb_cos_sim": 0.8283681869506836, "eval_nq_emb_cos_sim_sem": 0.006580984757874625, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.250608444213867, "eval_nq_n_ngrams_match_1": 22.568, "eval_nq_n_ngrams_match_2": 8.062, "eval_nq_n_ngrams_match_3": 3.616, "eval_nq_num_pred_words": 49.19, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.49351035088769, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4355600451312414, "eval_nq_runtime": 10.2445, "eval_nq_samples_per_second": 48.806, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.4528203612599476, "eval_nq_token_set_f1_sem": 0.004808790989413383, "eval_nq_token_set_precision": 0.40919570157161866, "eval_nq_token_set_recall": 0.5152685511416994, "eval_nq_true_num_tokens": 64.0, "step": 86875 }, { "epoch": 16.68, "learning_rate": 0.001, "loss": 2.6284, "step": 86880 }, { "epoch": 16.68, "learning_rate": 0.001, "loss": 2.6391, "step": 86892 }, { "epoch": 16.69, "learning_rate": 0.001, "loss": 2.6352, "step": 86904 }, { "epoch": 16.69, "learning_rate": 0.001, "loss": 2.6215, "step": 86916 }, { "epoch": 16.69, "learning_rate": 0.001, "loss": 2.6264, "step": 86928 }, { "epoch": 16.69, "learning_rate": 0.001, "loss": 2.6277, "step": 86940 }, { "epoch": 16.7, "learning_rate": 0.001, "loss": 2.6351, "step": 86952 }, { "epoch": 16.7, "learning_rate": 0.001, "loss": 2.6242, "step": 86964 }, { "epoch": 16.7, "learning_rate": 0.001, "loss": 2.6337, "step": 86976 }, { "epoch": 16.7, "learning_rate": 0.001, "loss": 2.6224, "step": 86988 }, { "epoch": 16.71, "learning_rate": 0.001, "loss": 2.6178, "step": 87000 }, { "epoch": 16.71, "learning_rate": 0.001, "loss": 2.6285, "step": 87012 }, { "epoch": 16.71, "learning_rate": 0.001, "loss": 2.6318, "step": 87024 }, { "epoch": 16.71, "learning_rate": 0.001, "loss": 2.6251, "step": 87036 }, { "epoch": 16.71, "learning_rate": 0.001, "loss": 2.623, "step": 87048 }, { "epoch": 16.72, "learning_rate": 0.001, "loss": 2.6302, "step": 87060 }, { "epoch": 16.72, "learning_rate": 0.001, "loss": 2.6285, "step": 87072 }, { "epoch": 16.72, "learning_rate": 0.001, "loss": 2.6324, "step": 87084 }, { "epoch": 16.72, "learning_rate": 0.001, "loss": 2.6366, "step": 87096 }, { "epoch": 16.73, "learning_rate": 0.001, "loss": 2.6255, "step": 87108 }, { "epoch": 16.73, "learning_rate": 0.001, "loss": 2.6307, "step": 87120 }, { "epoch": 16.73, "learning_rate": 0.001, "loss": 2.6341, "step": 87132 }, { "epoch": 16.73, "learning_rate": 0.001, "loss": 2.6228, "step": 87144 }, { "epoch": 16.74, "learning_rate": 0.001, "loss": 2.6267, "step": 87156 }, { "epoch": 16.74, "learning_rate": 0.001, "loss": 2.6245, "step": 87168 }, { "epoch": 16.74, "learning_rate": 0.001, "loss": 2.6248, "step": 87180 }, { "epoch": 16.74, "learning_rate": 0.001, "loss": 2.6277, "step": 87192 }, { "epoch": 16.74, "learning_rate": 0.001, "loss": 2.6327, "step": 87204 }, { "epoch": 16.75, "learning_rate": 0.001, "loss": 2.628, "step": 87216 }, { "epoch": 16.75, "learning_rate": 0.001, "loss": 2.6282, "step": 87228 }, { "epoch": 16.75, "learning_rate": 0.001, "loss": 2.6221, "step": 87240 }, { "epoch": 16.75, "learning_rate": 0.001, "loss": 2.6274, "step": 87252 }, { "epoch": 16.76, "learning_rate": 0.001, "loss": 2.6143, "step": 87264 }, { "epoch": 16.76, "learning_rate": 0.001, "loss": 2.6182, "step": 87276 }, { "epoch": 16.76, "learning_rate": 0.001, "loss": 2.6306, "step": 87288 }, { "epoch": 16.76, "learning_rate": 0.001, "loss": 2.6275, "step": 87300 }, { "epoch": 16.76, "learning_rate": 0.001, "loss": 2.6252, "step": 87312 }, { "epoch": 16.77, "learning_rate": 0.001, "loss": 2.6214, "step": 87324 }, { "epoch": 16.77, "learning_rate": 0.001, "loss": 2.6312, "step": 87336 }, { "epoch": 16.77, "learning_rate": 0.001, "loss": 2.6291, "step": 87348 }, { "epoch": 16.77, "learning_rate": 0.001, "loss": 2.6267, "step": 87360 }, { "epoch": 16.78, "learning_rate": 0.001, "loss": 2.6263, "step": 87372 }, { "epoch": 16.78, "learning_rate": 0.001, "loss": 2.6312, "step": 87384 }, { "epoch": 16.78, "learning_rate": 0.001, "loss": 2.6265, "step": 87396 }, { "epoch": 16.78, "learning_rate": 0.001, "loss": 2.6306, "step": 87408 }, { "epoch": 16.79, "learning_rate": 0.001, "loss": 2.6298, "step": 87420 }, { "epoch": 16.79, "learning_rate": 0.001, "loss": 2.6242, "step": 87432 }, { "epoch": 16.79, "learning_rate": 0.001, "loss": 2.6342, "step": 87444 }, { "epoch": 16.79, "learning_rate": 0.001, "loss": 2.6242, "step": 87456 }, { "epoch": 16.79, "learning_rate": 0.001, "loss": 2.6282, "step": 87468 }, { "epoch": 16.8, "learning_rate": 0.001, "loss": 2.6298, "step": 87480 }, { "epoch": 16.8, "learning_rate": 0.001, "loss": 2.6309, "step": 87492 }, { "epoch": 16.8, "eval_ag_news_accuracy": 0.315875, "eval_ag_news_bleu_score": 4.657084491866013, "eval_ag_news_bleu_score_sem": 0.14432158159014946, "eval_ag_news_emb_cos_sim": 0.7948165535926819, "eval_ag_news_emb_cos_sim_sem": 0.006874925440468204, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6305880546569824, "eval_ag_news_n_ngrams_match_1": 13.744, "eval_ag_news_n_ngrams_match_2": 2.902, "eval_ag_news_n_ngrams_match_3": 0.8, "eval_ag_news_num_pred_words": 46.338, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.735000336601885, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3421852298285454, "eval_ag_news_runtime": 11.5052, "eval_ag_news_samples_per_second": 43.459, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.3420956438696702, "eval_ag_news_token_set_f1_sem": 0.004355803102029173, "eval_ag_news_token_set_precision": 0.3268219834945953, "eval_ag_news_token_set_recall": 0.3716286075695179, "eval_ag_news_true_num_tokens": 56.09375, "step": 87500 }, { "epoch": 16.8, "eval_anthropic_toxic_prompts_accuracy": 0.112125, "eval_anthropic_toxic_prompts_bleu_score": 2.931454337216971, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11134417270026975, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6524926424026489, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009885640782817376, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.284168004989624, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.072, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.814, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.632, "eval_anthropic_toxic_prompts_num_pred_words": 46.672, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.68677181483051, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21189186707560131, "eval_anthropic_toxic_prompts_runtime": 10.2872, "eval_anthropic_toxic_prompts_samples_per_second": 48.604, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3447543818932813, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006533334158630998, "eval_anthropic_toxic_prompts_token_set_precision": 0.42872770985861525, "eval_anthropic_toxic_prompts_token_set_recall": 0.3171399078838364, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 87500 }, { "epoch": 16.8, "eval_arxiv_accuracy": 0.33959375, "eval_arxiv_bleu_score": 4.250707398541021, "eval_arxiv_bleu_score_sem": 0.1153725770113524, "eval_arxiv_emb_cos_sim": 0.753612756729126, "eval_arxiv_emb_cos_sim_sem": 0.00784480476713385, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4876291751861572, "eval_arxiv_n_ngrams_match_1": 14.866, "eval_arxiv_n_ngrams_match_2": 2.934, "eval_arxiv_n_ngrams_match_3": 0.632, "eval_arxiv_num_pred_words": 40.53, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.70831003688664, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35340056514613727, "eval_arxiv_runtime": 14.7361, "eval_arxiv_samples_per_second": 33.93, "eval_arxiv_steps_per_second": 0.068, "eval_arxiv_token_set_f1": 0.3462515136679858, "eval_arxiv_token_set_f1_sem": 0.004426609603788637, "eval_arxiv_token_set_precision": 0.298764043174962, "eval_arxiv_token_set_recall": 0.42918116469275047, "eval_arxiv_true_num_tokens": 64.0, "step": 87500 }, { "epoch": 16.8, "eval_python_code_alpaca_accuracy": 0.158, "eval_python_code_alpaca_bleu_score": 4.494220265961678, "eval_python_code_alpaca_bleu_score_sem": 0.14884272467127235, "eval_python_code_alpaca_emb_cos_sim": 0.7626599073410034, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008429878757083476, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.927032470703125, "eval_python_code_alpaca_n_ngrams_match_1": 9.688, "eval_python_code_alpaca_n_ngrams_match_2": 2.802, "eval_python_code_alpaca_n_ngrams_match_3": 0.914, "eval_python_code_alpaca_num_pred_words": 43.178, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.672138082918018, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32646084283444116, "eval_python_code_alpaca_runtime": 11.4607, "eval_python_code_alpaca_samples_per_second": 43.627, "eval_python_code_alpaca_steps_per_second": 0.087, "eval_python_code_alpaca_token_set_f1": 0.46112390962471544, "eval_python_code_alpaca_token_set_f1_sem": 0.005661878858625047, "eval_python_code_alpaca_token_set_precision": 0.52582671037576, "eval_python_code_alpaca_token_set_recall": 0.43302542089499885, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 87500 }, { "epoch": 16.8, "eval_wikibio_accuracy": 0.31640625, "eval_wikibio_bleu_score": 5.752457003825269, "eval_wikibio_bleu_score_sem": 0.20977092202575026, "eval_wikibio_emb_cos_sim": 0.7287917733192444, "eval_wikibio_emb_cos_sim_sem": 0.01227176880619345, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8216605186462402, "eval_wikibio_n_ngrams_match_1": 9.902, "eval_wikibio_n_ngrams_match_2": 3.216, "eval_wikibio_n_ngrams_match_3": 1.144, "eval_wikibio_num_pred_words": 35.52, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.67999786667487, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3464011932803648, "eval_wikibio_runtime": 11.3364, "eval_wikibio_samples_per_second": 44.106, "eval_wikibio_steps_per_second": 0.088, "eval_wikibio_token_set_f1": 0.31653711702892806, "eval_wikibio_token_set_f1_sem": 0.005471945999342703, "eval_wikibio_token_set_precision": 0.321491679501232, "eval_wikibio_token_set_recall": 0.32963470436920483, "eval_wikibio_true_num_tokens": 61.1328125, "step": 87500 }, { "epoch": 16.8, "eval_nq_accuracy": 0.51884375, "eval_nq_bleu_score": 11.16567350559356, "eval_nq_bleu_score_sem": 0.4698487845390746, "eval_nq_emb_cos_sim": 0.8192366361618042, "eval_nq_emb_cos_sim_sem": 0.007553513409095885, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.254143476486206, "eval_nq_n_ngrams_match_1": 22.45, "eval_nq_n_ngrams_match_2": 8.03, "eval_nq_n_ngrams_match_3": 3.676, "eval_nq_num_pred_words": 48.618, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.527129603917967, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43583982559655565, "eval_nq_runtime": 10.3907, "eval_nq_samples_per_second": 48.12, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.45002597107440806, "eval_nq_token_set_f1_sem": 0.004958619229636719, "eval_nq_token_set_precision": 0.4060904601311901, "eval_nq_token_set_recall": 0.5141355170620853, "eval_nq_true_num_tokens": 64.0, "step": 87500 }, { "epoch": 16.8, "learning_rate": 0.001, "loss": 2.628, "step": 87504 }, { "epoch": 16.8, "learning_rate": 0.001, "loss": 2.6255, "step": 87516 }, { "epoch": 16.81, "learning_rate": 0.001, "loss": 2.6279, "step": 87528 }, { "epoch": 16.81, "learning_rate": 0.001, "loss": 2.6366, "step": 87540 }, { "epoch": 16.81, "learning_rate": 0.001, "loss": 2.6243, "step": 87552 }, { "epoch": 16.81, "learning_rate": 0.001, "loss": 2.6164, "step": 87564 }, { "epoch": 16.82, "learning_rate": 0.001, "loss": 2.6303, "step": 87576 }, { "epoch": 16.82, "learning_rate": 0.001, "loss": 2.6373, "step": 87588 }, { "epoch": 16.82, "learning_rate": 0.001, "loss": 2.6401, "step": 87600 }, { "epoch": 16.82, "learning_rate": 0.001, "loss": 2.6297, "step": 87612 }, { "epoch": 16.82, "learning_rate": 0.001, "loss": 2.634, "step": 87624 }, { "epoch": 16.83, "learning_rate": 0.001, "loss": 2.6239, "step": 87636 }, { "epoch": 16.83, "learning_rate": 0.001, "loss": 2.6227, "step": 87648 }, { "epoch": 16.83, "learning_rate": 0.001, "loss": 2.6219, "step": 87660 }, { "epoch": 16.83, "learning_rate": 0.001, "loss": 2.6405, "step": 87672 }, { "epoch": 16.84, "learning_rate": 0.001, "loss": 2.6322, "step": 87684 }, { "epoch": 16.84, "learning_rate": 0.001, "loss": 2.6288, "step": 87696 }, { "epoch": 16.84, "learning_rate": 0.001, "loss": 2.6369, "step": 87708 }, { "epoch": 16.84, "learning_rate": 0.001, "loss": 2.6275, "step": 87720 }, { "epoch": 16.85, "learning_rate": 0.001, "loss": 2.6224, "step": 87732 }, { "epoch": 16.85, "learning_rate": 0.001, "loss": 2.6331, "step": 87744 }, { "epoch": 16.85, "learning_rate": 0.001, "loss": 2.6277, "step": 87756 }, { "epoch": 16.85, "learning_rate": 0.001, "loss": 2.6317, "step": 87768 }, { "epoch": 16.85, "learning_rate": 0.001, "loss": 2.6255, "step": 87780 }, { "epoch": 16.86, "learning_rate": 0.001, "loss": 2.6251, "step": 87792 }, { "epoch": 16.86, "learning_rate": 0.001, "loss": 2.62, "step": 87804 }, { "epoch": 16.86, "learning_rate": 0.001, "loss": 2.6306, "step": 87816 }, { "epoch": 16.86, "learning_rate": 0.001, "loss": 2.6291, "step": 87828 }, { "epoch": 16.87, "learning_rate": 0.001, "loss": 2.6276, "step": 87840 }, { "epoch": 16.87, "learning_rate": 0.001, "loss": 2.629, "step": 87852 }, { "epoch": 16.87, "learning_rate": 0.001, "loss": 2.6154, "step": 87864 }, { "epoch": 16.87, "learning_rate": 0.001, "loss": 2.6317, "step": 87876 }, { "epoch": 16.88, "learning_rate": 0.001, "loss": 2.6317, "step": 87888 }, { "epoch": 16.88, "learning_rate": 0.001, "loss": 2.6323, "step": 87900 }, { "epoch": 16.88, "learning_rate": 0.001, "loss": 2.6311, "step": 87912 }, { "epoch": 16.88, "learning_rate": 0.001, "loss": 2.6283, "step": 87924 }, { "epoch": 16.88, "learning_rate": 0.001, "loss": 2.6135, "step": 87936 }, { "epoch": 16.89, "learning_rate": 0.001, "loss": 2.6263, "step": 87948 }, { "epoch": 16.89, "learning_rate": 0.001, "loss": 2.6272, "step": 87960 }, { "epoch": 16.89, "learning_rate": 0.001, "loss": 2.6303, "step": 87972 }, { "epoch": 16.89, "learning_rate": 0.001, "loss": 2.6263, "step": 87984 }, { "epoch": 16.9, "learning_rate": 0.001, "loss": 2.6301, "step": 87996 }, { "epoch": 16.9, "learning_rate": 0.001, "loss": 2.631, "step": 88008 }, { "epoch": 16.9, "learning_rate": 0.001, "loss": 2.6372, "step": 88020 }, { "epoch": 16.9, "learning_rate": 0.001, "loss": 2.6262, "step": 88032 }, { "epoch": 16.91, "learning_rate": 0.001, "loss": 2.6334, "step": 88044 }, { "epoch": 16.91, "learning_rate": 0.001, "loss": 2.6346, "step": 88056 }, { "epoch": 16.91, "learning_rate": 0.001, "loss": 2.633, "step": 88068 }, { "epoch": 16.91, "learning_rate": 0.001, "loss": 2.6215, "step": 88080 }, { "epoch": 16.91, "learning_rate": 0.001, "loss": 2.6256, "step": 88092 }, { "epoch": 16.92, "learning_rate": 0.001, "loss": 2.6323, "step": 88104 }, { "epoch": 16.92, "learning_rate": 0.001, "loss": 2.6276, "step": 88116 }, { "epoch": 16.92, "eval_ag_news_accuracy": 0.315375, "eval_ag_news_bleu_score": 4.761302212620485, "eval_ag_news_bleu_score_sem": 0.14603135465010694, "eval_ag_news_emb_cos_sim": 0.7959756851196289, "eval_ag_news_emb_cos_sim_sem": 0.007182710045708595, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.621840238571167, "eval_ag_news_n_ngrams_match_1": 13.6, "eval_ag_news_n_ngrams_match_2": 2.924, "eval_ag_news_n_ngrams_match_3": 0.884, "eval_ag_news_num_pred_words": 46.356, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.40634111473802, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3368090266406085, "eval_ag_news_runtime": 15.577, "eval_ag_news_samples_per_second": 32.099, "eval_ag_news_steps_per_second": 0.064, "eval_ag_news_token_set_f1": 0.34362525760917834, "eval_ag_news_token_set_f1_sem": 0.004277298319728863, "eval_ag_news_token_set_precision": 0.3243572906241741, "eval_ag_news_token_set_recall": 0.3826852145542097, "eval_ag_news_true_num_tokens": 56.09375, "step": 88125 }, { "epoch": 16.92, "eval_anthropic_toxic_prompts_accuracy": 0.1130625, "eval_anthropic_toxic_prompts_bleu_score": 2.967610211310427, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11480654972381421, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6533322334289551, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011938517626284858, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.305030584335327, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.978, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.774, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.662, "eval_anthropic_toxic_prompts_num_pred_words": 47.214, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.249374980078517, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.859375, "eval_anthropic_toxic_prompts_rouge_score": 0.2077508680093504, "eval_anthropic_toxic_prompts_runtime": 10.6444, "eval_anthropic_toxic_prompts_samples_per_second": 46.973, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.3402503238126781, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006429198441423453, "eval_anthropic_toxic_prompts_token_set_precision": 0.42224335670650015, "eval_anthropic_toxic_prompts_token_set_recall": 0.3110297696693973, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 88125 }, { "epoch": 16.92, "eval_arxiv_accuracy": 0.3396875, "eval_arxiv_bleu_score": 4.195958106552601, "eval_arxiv_bleu_score_sem": 0.12024509106008427, "eval_arxiv_emb_cos_sim": 0.7449824810028076, "eval_arxiv_emb_cos_sim_sem": 0.007879530477584286, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4940834045410156, "eval_arxiv_n_ngrams_match_1": 14.8, "eval_arxiv_n_ngrams_match_2": 2.834, "eval_arxiv_n_ngrams_match_3": 0.624, "eval_arxiv_num_pred_words": 40.88, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.920099706013254, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35012523894011904, "eval_arxiv_runtime": 12.8273, "eval_arxiv_samples_per_second": 38.979, "eval_arxiv_steps_per_second": 0.078, "eval_arxiv_token_set_f1": 0.3471188638055089, "eval_arxiv_token_set_f1_sem": 0.0040905597524204095, "eval_arxiv_token_set_precision": 0.29688315243872465, "eval_arxiv_token_set_recall": 0.43750842002303675, "eval_arxiv_true_num_tokens": 64.0, "step": 88125 }, { "epoch": 16.92, "eval_python_code_alpaca_accuracy": 0.1600625, "eval_python_code_alpaca_bleu_score": 4.494884433419647, "eval_python_code_alpaca_bleu_score_sem": 0.14699369630166378, "eval_python_code_alpaca_emb_cos_sim": 0.7556917667388916, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008510427311488367, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9216811656951904, "eval_python_code_alpaca_n_ngrams_match_1": 9.598, "eval_python_code_alpaca_n_ngrams_match_2": 2.85, "eval_python_code_alpaca_n_ngrams_match_3": 0.908, "eval_python_code_alpaca_num_pred_words": 43.852, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.572484652646338, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32320164210912244, "eval_python_code_alpaca_runtime": 11.04, "eval_python_code_alpaca_samples_per_second": 45.29, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4687875625658835, "eval_python_code_alpaca_token_set_f1_sem": 0.005874725154574615, "eval_python_code_alpaca_token_set_precision": 0.5253247145040438, "eval_python_code_alpaca_token_set_recall": 0.4462019786189241, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 88125 }, { "epoch": 16.92, "eval_wikibio_accuracy": 0.31334375, "eval_wikibio_bleu_score": 5.572736817357021, "eval_wikibio_bleu_score_sem": 0.19615787657738634, "eval_wikibio_emb_cos_sim": 0.745276927947998, "eval_wikibio_emb_cos_sim_sem": 0.008959668594132462, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.85088849067688, "eval_wikibio_n_ngrams_match_1": 9.876, "eval_wikibio_n_ngrams_match_2": 3.224, "eval_wikibio_n_ngrams_match_3": 1.174, "eval_wikibio_num_pred_words": 37.112, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.034834684163734, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3459333687339124, "eval_wikibio_runtime": 11.4967, "eval_wikibio_samples_per_second": 43.491, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.3111855269544557, "eval_wikibio_token_set_f1_sem": 0.005510770828876851, "eval_wikibio_token_set_precision": 0.3218343067096335, "eval_wikibio_token_set_recall": 0.31837329079144966, "eval_wikibio_true_num_tokens": 61.1328125, "step": 88125 }, { "epoch": 16.92, "eval_nq_accuracy": 0.5205, "eval_nq_bleu_score": 11.412114587487824, "eval_nq_bleu_score_sem": 0.47907245164006523, "eval_nq_emb_cos_sim": 0.8303395509719849, "eval_nq_emb_cos_sim_sem": 0.006778473694740039, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.252239465713501, "eval_nq_n_ngrams_match_1": 22.816, "eval_nq_n_ngrams_match_2": 8.274, "eval_nq_n_ngrams_match_3": 3.752, "eval_nq_num_pred_words": 49.164, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.509007104710902, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4398628779804204, "eval_nq_runtime": 11.2926, "eval_nq_samples_per_second": 44.277, "eval_nq_steps_per_second": 0.089, "eval_nq_token_set_f1": 0.45623160835884286, "eval_nq_token_set_f1_sem": 0.00501310246656571, "eval_nq_token_set_precision": 0.41365505697784927, "eval_nq_token_set_recall": 0.5164852625746719, "eval_nq_true_num_tokens": 64.0, "step": 88125 }, { "epoch": 16.92, "learning_rate": 0.001, "loss": 2.6295, "step": 88128 }, { "epoch": 16.92, "learning_rate": 0.001, "loss": 2.6264, "step": 88140 }, { "epoch": 16.93, "learning_rate": 0.001, "loss": 2.6211, "step": 88152 }, { "epoch": 16.93, "learning_rate": 0.001, "loss": 2.6302, "step": 88164 }, { "epoch": 16.93, "learning_rate": 0.001, "loss": 2.6378, "step": 88176 }, { "epoch": 16.93, "learning_rate": 0.001, "loss": 2.6219, "step": 88188 }, { "epoch": 16.94, "learning_rate": 0.001, "loss": 2.6288, "step": 88200 }, { "epoch": 16.94, "learning_rate": 0.001, "loss": 2.6334, "step": 88212 }, { "epoch": 16.94, "learning_rate": 0.001, "loss": 2.6375, "step": 88224 }, { "epoch": 16.94, "learning_rate": 0.001, "loss": 2.6304, "step": 88236 }, { "epoch": 16.94, "learning_rate": 0.001, "loss": 2.6238, "step": 88248 }, { "epoch": 16.95, "learning_rate": 0.001, "loss": 2.6209, "step": 88260 }, { "epoch": 16.95, "learning_rate": 0.001, "loss": 2.6316, "step": 88272 }, { "epoch": 16.95, "learning_rate": 0.001, "loss": 2.6296, "step": 88284 }, { "epoch": 16.95, "learning_rate": 0.001, "loss": 2.6196, "step": 88296 }, { "epoch": 16.96, "learning_rate": 0.001, "loss": 2.6317, "step": 88308 }, { "epoch": 16.96, "learning_rate": 0.001, "loss": 2.6374, "step": 88320 }, { "epoch": 16.96, "learning_rate": 0.001, "loss": 2.6376, "step": 88332 }, { "epoch": 16.96, "learning_rate": 0.001, "loss": 2.6301, "step": 88344 }, { "epoch": 16.97, "learning_rate": 0.001, "loss": 2.6365, "step": 88356 }, { "epoch": 16.97, "learning_rate": 0.001, "loss": 2.6311, "step": 88368 }, { "epoch": 16.97, "learning_rate": 0.001, "loss": 2.6262, "step": 88380 }, { "epoch": 16.97, "learning_rate": 0.001, "loss": 2.6271, "step": 88392 }, { "epoch": 16.97, "learning_rate": 0.001, "loss": 2.6346, "step": 88404 }, { "epoch": 16.98, "learning_rate": 0.001, "loss": 2.6248, "step": 88416 }, { "epoch": 16.98, "learning_rate": 0.001, "loss": 2.6256, "step": 88428 }, { "epoch": 16.98, "learning_rate": 0.001, "loss": 2.63, "step": 88440 }, { "epoch": 16.98, "learning_rate": 0.001, "loss": 2.6346, "step": 88452 }, { "epoch": 16.99, "learning_rate": 0.001, "loss": 2.6271, "step": 88464 }, { "epoch": 16.99, "learning_rate": 0.001, "loss": 2.6324, "step": 88476 }, { "epoch": 16.99, "learning_rate": 0.001, "loss": 2.6363, "step": 88488 }, { "epoch": 16.99, "learning_rate": 0.001, "loss": 2.6304, "step": 88500 }, { "epoch": 17.0, "learning_rate": 0.001, "loss": 2.6291, "step": 88512 }, { "epoch": 17.0, "learning_rate": 0.001, "loss": 2.6306, "step": 88524 }, { "epoch": 17.0, "learning_rate": 0.001, "loss": 2.6415, "step": 88536 }, { "epoch": 17.0, "learning_rate": 0.001, "loss": 2.602, "step": 88548 }, { "epoch": 17.0, "learning_rate": 0.001, "loss": 2.6149, "step": 88560 }, { "epoch": 17.01, "learning_rate": 0.001, "loss": 2.6113, "step": 88572 }, { "epoch": 17.01, "learning_rate": 0.001, "loss": 2.6142, "step": 88584 }, { "epoch": 17.01, "learning_rate": 0.001, "loss": 2.6126, "step": 88596 }, { "epoch": 17.01, "learning_rate": 0.001, "loss": 2.6136, "step": 88608 }, { "epoch": 17.02, "learning_rate": 0.001, "loss": 2.6139, "step": 88620 }, { "epoch": 17.02, "learning_rate": 0.001, "loss": 2.6216, "step": 88632 }, { "epoch": 17.02, "learning_rate": 0.001, "loss": 2.6034, "step": 88644 }, { "epoch": 17.02, "learning_rate": 0.001, "loss": 2.6078, "step": 88656 }, { "epoch": 17.03, "learning_rate": 0.001, "loss": 2.6058, "step": 88668 }, { "epoch": 17.03, "learning_rate": 0.001, "loss": 2.6126, "step": 88680 }, { "epoch": 17.03, "learning_rate": 0.001, "loss": 2.6021, "step": 88692 }, { "epoch": 17.03, "learning_rate": 0.001, "loss": 2.6197, "step": 88704 }, { "epoch": 17.03, "learning_rate": 0.001, "loss": 2.6012, "step": 88716 }, { "epoch": 17.04, "learning_rate": 0.001, "loss": 2.5997, "step": 88728 }, { "epoch": 17.04, "learning_rate": 0.001, "loss": 2.6143, "step": 88740 }, { "epoch": 17.04, "eval_ag_news_accuracy": 0.3156875, "eval_ag_news_bleu_score": 4.6675147192186595, "eval_ag_news_bleu_score_sem": 0.1491365633011962, "eval_ag_news_emb_cos_sim": 0.8023483753204346, "eval_ag_news_emb_cos_sim_sem": 0.007222164105726348, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.63279390335083, "eval_ag_news_n_ngrams_match_1": 13.742, "eval_ag_news_n_ngrams_match_2": 2.93, "eval_ag_news_n_ngrams_match_3": 0.828, "eval_ag_news_num_pred_words": 46.644, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.81832991023377, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33985919669849074, "eval_ag_news_runtime": 11.646, "eval_ag_news_samples_per_second": 42.933, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.3448987879333023, "eval_ag_news_token_set_f1_sem": 0.004097661930618755, "eval_ag_news_token_set_precision": 0.32733849255868835, "eval_ag_news_token_set_recall": 0.3814584780406689, "eval_ag_news_true_num_tokens": 56.09375, "step": 88750 }, { "epoch": 17.04, "eval_anthropic_toxic_prompts_accuracy": 0.11065625, "eval_anthropic_toxic_prompts_bleu_score": 2.987299517984093, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11847478846882847, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6587800979614258, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009545800865106417, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.337388753890991, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.004, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.656, "eval_anthropic_toxic_prompts_num_pred_words": 46.826, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 28.145535744470337, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.875, "eval_anthropic_toxic_prompts_rouge_score": 0.20851663610810683, "eval_anthropic_toxic_prompts_runtime": 9.6327, "eval_anthropic_toxic_prompts_samples_per_second": 51.907, "eval_anthropic_toxic_prompts_steps_per_second": 0.104, "eval_anthropic_toxic_prompts_token_set_f1": 0.3444497014907716, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006509039386899776, "eval_anthropic_toxic_prompts_token_set_precision": 0.42286864078268754, "eval_anthropic_toxic_prompts_token_set_recall": 0.31930339771790134, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 88750 }, { "epoch": 17.04, "eval_arxiv_accuracy": 0.34165625, "eval_arxiv_bleu_score": 4.162906050002206, "eval_arxiv_bleu_score_sem": 0.1149602719791407, "eval_arxiv_emb_cos_sim": 0.7473907470703125, "eval_arxiv_emb_cos_sim_sem": 0.007669048457576373, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.48599910736084, "eval_arxiv_n_ngrams_match_1": 14.626, "eval_arxiv_n_ngrams_match_2": 2.796, "eval_arxiv_n_ngrams_match_3": 0.618, "eval_arxiv_num_pred_words": 40.098, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.655036704443724, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34754595692208085, "eval_arxiv_runtime": 10.1581, "eval_arxiv_samples_per_second": 49.222, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3436310725683357, "eval_arxiv_token_set_f1_sem": 0.004235325662792161, "eval_arxiv_token_set_precision": 0.29311578404507777, "eval_arxiv_token_set_recall": 0.4329247457494549, "eval_arxiv_true_num_tokens": 64.0, "step": 88750 }, { "epoch": 17.04, "eval_python_code_alpaca_accuracy": 0.15725, "eval_python_code_alpaca_bleu_score": 4.575335067226012, "eval_python_code_alpaca_bleu_score_sem": 0.15995974271436172, "eval_python_code_alpaca_emb_cos_sim": 0.7575218677520752, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007072569135292994, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.940490484237671, "eval_python_code_alpaca_n_ngrams_match_1": 9.76, "eval_python_code_alpaca_n_ngrams_match_2": 2.866, "eval_python_code_alpaca_n_ngrams_match_3": 0.948, "eval_python_code_alpaca_num_pred_words": 44.686, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.92512651242331, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32253498688181537, "eval_python_code_alpaca_runtime": 9.3878, "eval_python_code_alpaca_samples_per_second": 53.261, "eval_python_code_alpaca_steps_per_second": 0.107, "eval_python_code_alpaca_token_set_f1": 0.46720290052314467, "eval_python_code_alpaca_token_set_f1_sem": 0.005382000646208587, "eval_python_code_alpaca_token_set_precision": 0.5306423808027152, "eval_python_code_alpaca_token_set_recall": 0.43688671655481814, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 88750 }, { "epoch": 17.04, "eval_wikibio_accuracy": 0.3148125, "eval_wikibio_bleu_score": 5.638334766672394, "eval_wikibio_bleu_score_sem": 0.20056598930431221, "eval_wikibio_emb_cos_sim": 0.7304085493087769, "eval_wikibio_emb_cos_sim_sem": 0.009793156767749807, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.82354474067688, "eval_wikibio_n_ngrams_match_1": 9.906, "eval_wikibio_n_ngrams_match_2": 3.216, "eval_wikibio_n_ngrams_match_3": 1.162, "eval_wikibio_num_pred_words": 36.65, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.766150264649106, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34350377350783534, "eval_wikibio_runtime": 9.4401, "eval_wikibio_samples_per_second": 52.965, "eval_wikibio_steps_per_second": 0.106, "eval_wikibio_token_set_f1": 0.31244307355739775, "eval_wikibio_token_set_f1_sem": 0.005371737001507651, "eval_wikibio_token_set_precision": 0.321914726382962, "eval_wikibio_token_set_recall": 0.3210492601335306, "eval_wikibio_true_num_tokens": 61.1328125, "step": 88750 }, { "epoch": 17.04, "eval_nq_accuracy": 0.51915625, "eval_nq_bleu_score": 11.011399486763613, "eval_nq_bleu_score_sem": 0.4680170479662032, "eval_nq_emb_cos_sim": 0.823344349861145, "eval_nq_emb_cos_sim_sem": 0.007228062688884349, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2518482208251953, "eval_nq_n_ngrams_match_1": 22.434, "eval_nq_n_ngrams_match_2": 7.982, "eval_nq_n_ngrams_match_3": 3.602, "eval_nq_num_pred_words": 48.856, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.505287481977458, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43349728674212645, "eval_nq_runtime": 10.1848, "eval_nq_samples_per_second": 49.093, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.4489598693999021, "eval_nq_token_set_f1_sem": 0.004888694817836205, "eval_nq_token_set_precision": 0.40569028540608576, "eval_nq_token_set_recall": 0.5106004254677634, "eval_nq_true_num_tokens": 64.0, "step": 88750 }, { "epoch": 17.04, "learning_rate": 0.001, "loss": 2.6088, "step": 88752 }, { "epoch": 17.04, "learning_rate": 0.001, "loss": 2.6073, "step": 88764 }, { "epoch": 17.05, "learning_rate": 0.001, "loss": 2.619, "step": 88776 }, { "epoch": 17.05, "learning_rate": 0.001, "loss": 2.6128, "step": 88788 }, { "epoch": 17.05, "learning_rate": 0.001, "loss": 2.6111, "step": 88800 }, { "epoch": 17.05, "learning_rate": 0.001, "loss": 2.6096, "step": 88812 }, { "epoch": 17.06, "learning_rate": 0.001, "loss": 2.6104, "step": 88824 }, { "epoch": 17.06, "learning_rate": 0.001, "loss": 2.6071, "step": 88836 }, { "epoch": 17.06, "learning_rate": 0.001, "loss": 2.612, "step": 88848 }, { "epoch": 17.06, "learning_rate": 0.001, "loss": 2.6068, "step": 88860 }, { "epoch": 17.06, "learning_rate": 0.001, "loss": 2.624, "step": 88872 }, { "epoch": 17.07, "learning_rate": 0.001, "loss": 2.6137, "step": 88884 }, { "epoch": 17.07, "learning_rate": 0.001, "loss": 2.6182, "step": 88896 }, { "epoch": 17.07, "learning_rate": 0.001, "loss": 2.6166, "step": 88908 }, { "epoch": 17.07, "learning_rate": 0.001, "loss": 2.6157, "step": 88920 }, { "epoch": 17.08, "learning_rate": 0.001, "loss": 2.6145, "step": 88932 }, { "epoch": 17.08, "learning_rate": 0.001, "loss": 2.62, "step": 88944 }, { "epoch": 17.08, "learning_rate": 0.001, "loss": 2.6154, "step": 88956 }, { "epoch": 17.08, "learning_rate": 0.001, "loss": 2.6093, "step": 88968 }, { "epoch": 17.09, "learning_rate": 0.001, "loss": 2.6231, "step": 88980 }, { "epoch": 17.09, "learning_rate": 0.001, "loss": 2.6107, "step": 88992 }, { "epoch": 17.09, "learning_rate": 0.001, "loss": 2.6167, "step": 89004 }, { "epoch": 17.09, "learning_rate": 0.001, "loss": 2.6161, "step": 89016 }, { "epoch": 17.09, "learning_rate": 0.001, "loss": 2.6162, "step": 89028 }, { "epoch": 17.1, "learning_rate": 0.001, "loss": 2.6138, "step": 89040 }, { "epoch": 17.1, "learning_rate": 0.001, "loss": 2.6074, "step": 89052 }, { "epoch": 17.1, "learning_rate": 0.001, "loss": 2.6065, "step": 89064 }, { "epoch": 17.1, "learning_rate": 0.001, "loss": 2.6103, "step": 89076 }, { "epoch": 17.11, "learning_rate": 0.001, "loss": 2.6114, "step": 89088 }, { "epoch": 17.11, "learning_rate": 0.001, "loss": 2.6103, "step": 89100 }, { "epoch": 17.11, "learning_rate": 0.001, "loss": 2.6131, "step": 89112 }, { "epoch": 17.11, "learning_rate": 0.001, "loss": 2.6238, "step": 89124 }, { "epoch": 17.12, "learning_rate": 0.001, "loss": 2.6013, "step": 89136 }, { "epoch": 17.12, "learning_rate": 0.001, "loss": 2.6099, "step": 89148 }, { "epoch": 17.12, "learning_rate": 0.001, "loss": 2.6164, "step": 89160 }, { "epoch": 17.12, "learning_rate": 0.001, "loss": 2.6151, "step": 89172 }, { "epoch": 17.12, "learning_rate": 0.001, "loss": 2.6078, "step": 89184 }, { "epoch": 17.13, "learning_rate": 0.001, "loss": 2.6167, "step": 89196 }, { "epoch": 17.13, "learning_rate": 0.001, "loss": 2.6181, "step": 89208 }, { "epoch": 17.13, "learning_rate": 0.001, "loss": 2.6267, "step": 89220 }, { "epoch": 17.13, "learning_rate": 0.001, "loss": 2.6087, "step": 89232 }, { "epoch": 17.14, "learning_rate": 0.001, "loss": 2.6252, "step": 89244 }, { "epoch": 17.14, "learning_rate": 0.001, "loss": 2.6192, "step": 89256 }, { "epoch": 17.14, "learning_rate": 0.001, "loss": 2.6178, "step": 89268 }, { "epoch": 17.14, "learning_rate": 0.001, "loss": 2.6094, "step": 89280 }, { "epoch": 17.15, "learning_rate": 0.001, "loss": 2.6052, "step": 89292 }, { "epoch": 17.15, "learning_rate": 0.001, "loss": 2.604, "step": 89304 }, { "epoch": 17.15, "learning_rate": 0.001, "loss": 2.6255, "step": 89316 }, { "epoch": 17.15, "learning_rate": 0.001, "loss": 2.6256, "step": 89328 }, { "epoch": 17.15, "learning_rate": 0.001, "loss": 2.6251, "step": 89340 }, { "epoch": 17.16, "learning_rate": 0.001, "loss": 2.6134, "step": 89352 }, { "epoch": 17.16, "learning_rate": 0.001, "loss": 2.6114, "step": 89364 }, { "epoch": 17.16, "eval_ag_news_accuracy": 0.31690625, "eval_ag_news_bleu_score": 4.694297971392216, "eval_ag_news_bleu_score_sem": 0.14418469822231234, "eval_ag_news_emb_cos_sim": 0.7981653213500977, "eval_ag_news_emb_cos_sim_sem": 0.0077829595353001765, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.63482666015625, "eval_ag_news_n_ngrams_match_1": 13.67, "eval_ag_news_n_ngrams_match_2": 2.96, "eval_ag_news_n_ngrams_match_3": 0.872, "eval_ag_news_num_pred_words": 46.352, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.89528356526304, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3386531153738642, "eval_ag_news_runtime": 9.8397, "eval_ag_news_samples_per_second": 50.814, "eval_ag_news_steps_per_second": 0.102, "eval_ag_news_token_set_f1": 0.3437386125918118, "eval_ag_news_token_set_f1_sem": 0.004223226291789117, "eval_ag_news_token_set_precision": 0.32422626731649756, "eval_ag_news_token_set_recall": 0.38526324839311427, "eval_ag_news_true_num_tokens": 56.09375, "step": 89375 }, { "epoch": 17.16, "eval_anthropic_toxic_prompts_accuracy": 0.11253125, "eval_anthropic_toxic_prompts_bleu_score": 2.804244444386395, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.106576171121192, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6520543694496155, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010191310801642635, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.304748296737671, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.91, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.688, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.592, "eval_anthropic_toxic_prompts_num_pred_words": 47.022, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.24168390507637, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20585357014902225, "eval_anthropic_toxic_prompts_runtime": 9.876, "eval_anthropic_toxic_prompts_samples_per_second": 50.628, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3465561970843481, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006598468124445643, "eval_anthropic_toxic_prompts_token_set_precision": 0.4153035290859674, "eval_anthropic_toxic_prompts_token_set_recall": 0.32719297589939617, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 89375 }, { "epoch": 17.16, "eval_arxiv_accuracy": 0.33596875, "eval_arxiv_bleu_score": 4.1826069297446375, "eval_arxiv_bleu_score_sem": 0.12194631517516684, "eval_arxiv_emb_cos_sim": 0.7566304206848145, "eval_arxiv_emb_cos_sim_sem": 0.006831079547126714, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4935834407806396, "eval_arxiv_n_ngrams_match_1": 14.622, "eval_arxiv_n_ngrams_match_2": 2.816, "eval_arxiv_n_ngrams_match_3": 0.632, "eval_arxiv_num_pred_words": 40.368, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.90364496290266, "eval_arxiv_pred_num_tokens": 62.9296875, "eval_arxiv_rouge_score": 0.34803102864349195, "eval_arxiv_runtime": 10.1579, "eval_arxiv_samples_per_second": 49.223, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3420449944327993, "eval_arxiv_token_set_f1_sem": 0.004321572434466679, "eval_arxiv_token_set_precision": 0.29222614572001665, "eval_arxiv_token_set_recall": 0.4304774748558225, "eval_arxiv_true_num_tokens": 64.0, "step": 89375 }, { "epoch": 17.16, "eval_python_code_alpaca_accuracy": 0.15809375, "eval_python_code_alpaca_bleu_score": 4.121970189101972, "eval_python_code_alpaca_bleu_score_sem": 0.1261764158826135, "eval_python_code_alpaca_emb_cos_sim": 0.7409356832504272, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010704806271930604, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.932617425918579, "eval_python_code_alpaca_n_ngrams_match_1": 9.44, "eval_python_code_alpaca_n_ngrams_match_2": 2.642, "eval_python_code_alpaca_n_ngrams_match_3": 0.784, "eval_python_code_alpaca_num_pred_words": 43.396, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.7767128888696, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31756882108525736, "eval_python_code_alpaca_runtime": 10.9009, "eval_python_code_alpaca_samples_per_second": 45.868, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.4612355606578279, "eval_python_code_alpaca_token_set_f1_sem": 0.005953285676128513, "eval_python_code_alpaca_token_set_precision": 0.515498659707188, "eval_python_code_alpaca_token_set_recall": 0.44057147854144446, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 89375 }, { "epoch": 17.16, "eval_wikibio_accuracy": 0.3150625, "eval_wikibio_bleu_score": 5.533098819145493, "eval_wikibio_bleu_score_sem": 0.20389396864023673, "eval_wikibio_emb_cos_sim": 0.732222318649292, "eval_wikibio_emb_cos_sim_sem": 0.009723669126900994, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8134427070617676, "eval_wikibio_n_ngrams_match_1": 9.482, "eval_wikibio_n_ngrams_match_2": 3.156, "eval_wikibio_n_ngrams_match_3": 1.144, "eval_wikibio_num_pred_words": 35.762, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.30614647509766, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3319803673388446, "eval_wikibio_runtime": 9.9792, "eval_wikibio_samples_per_second": 50.104, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.2993725549059298, "eval_wikibio_token_set_f1_sem": 0.00593470346574923, "eval_wikibio_token_set_precision": 0.3055587030199752, "eval_wikibio_token_set_recall": 0.3096231074058095, "eval_wikibio_true_num_tokens": 61.1328125, "step": 89375 }, { "epoch": 17.16, "eval_nq_accuracy": 0.5186875, "eval_nq_bleu_score": 11.268336451329896, "eval_nq_bleu_score_sem": 0.47553609192078244, "eval_nq_emb_cos_sim": 0.8264628648757935, "eval_nq_emb_cos_sim_sem": 0.007219576684354484, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2470948696136475, "eval_nq_n_ngrams_match_1": 22.698, "eval_nq_n_ngrams_match_2": 8.252, "eval_nq_n_ngrams_match_3": 3.73, "eval_nq_num_pred_words": 49.194, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.460212725153399, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4379513194953767, "eval_nq_runtime": 10.5909, "eval_nq_samples_per_second": 47.21, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.45475631645148573, "eval_nq_token_set_f1_sem": 0.004952126246909426, "eval_nq_token_set_precision": 0.4104870432286527, "eval_nq_token_set_recall": 0.5182013174448464, "eval_nq_true_num_tokens": 64.0, "step": 89375 }, { "epoch": 17.16, "learning_rate": 0.001, "loss": 2.6117, "step": 89376 }, { "epoch": 17.16, "learning_rate": 0.001, "loss": 2.6114, "step": 89388 }, { "epoch": 17.17, "learning_rate": 0.001, "loss": 2.606, "step": 89400 }, { "epoch": 17.17, "learning_rate": 0.001, "loss": 2.6149, "step": 89412 }, { "epoch": 17.17, "learning_rate": 0.001, "loss": 2.6144, "step": 89424 }, { "epoch": 17.17, "learning_rate": 0.001, "loss": 2.6201, "step": 89436 }, { "epoch": 17.18, "learning_rate": 0.001, "loss": 2.6128, "step": 89448 }, { "epoch": 17.18, "learning_rate": 0.001, "loss": 2.6205, "step": 89460 }, { "epoch": 17.18, "learning_rate": 0.001, "loss": 2.6242, "step": 89472 }, { "epoch": 17.18, "learning_rate": 0.001, "loss": 2.6156, "step": 89484 }, { "epoch": 17.18, "learning_rate": 0.001, "loss": 2.6061, "step": 89496 }, { "epoch": 17.19, "learning_rate": 0.001, "loss": 2.6138, "step": 89508 }, { "epoch": 17.19, "learning_rate": 0.001, "loss": 2.6254, "step": 89520 }, { "epoch": 17.19, "learning_rate": 0.001, "loss": 2.6192, "step": 89532 }, { "epoch": 17.19, "learning_rate": 0.001, "loss": 2.6137, "step": 89544 }, { "epoch": 17.2, "learning_rate": 0.001, "loss": 2.6167, "step": 89556 }, { "epoch": 17.2, "learning_rate": 0.001, "loss": 2.6159, "step": 89568 }, { "epoch": 17.2, "learning_rate": 0.001, "loss": 2.6213, "step": 89580 }, { "epoch": 17.2, "learning_rate": 0.001, "loss": 2.6192, "step": 89592 }, { "epoch": 17.21, "learning_rate": 0.001, "loss": 2.6202, "step": 89604 }, { "epoch": 17.21, "learning_rate": 0.001, "loss": 2.6054, "step": 89616 }, { "epoch": 17.21, "learning_rate": 0.001, "loss": 2.6115, "step": 89628 }, { "epoch": 17.21, "learning_rate": 0.001, "loss": 2.6216, "step": 89640 }, { "epoch": 17.21, "learning_rate": 0.001, "loss": 2.621, "step": 89652 }, { "epoch": 17.22, "learning_rate": 0.001, "loss": 2.6148, "step": 89664 }, { "epoch": 17.22, "learning_rate": 0.001, "loss": 2.6173, "step": 89676 }, { "epoch": 17.22, "learning_rate": 0.001, "loss": 2.6144, "step": 89688 }, { "epoch": 17.22, "learning_rate": 0.001, "loss": 2.6248, "step": 89700 }, { "epoch": 17.23, "learning_rate": 0.001, "loss": 2.6197, "step": 89712 }, { "epoch": 17.23, "learning_rate": 0.001, "loss": 2.6108, "step": 89724 }, { "epoch": 17.23, "learning_rate": 0.001, "loss": 2.6146, "step": 89736 }, { "epoch": 17.23, "learning_rate": 0.001, "loss": 2.6135, "step": 89748 }, { "epoch": 17.24, "learning_rate": 0.001, "loss": 2.607, "step": 89760 }, { "epoch": 17.24, "learning_rate": 0.001, "loss": 2.6227, "step": 89772 }, { "epoch": 17.24, "learning_rate": 0.001, "loss": 2.6101, "step": 89784 }, { "epoch": 17.24, "learning_rate": 0.001, "loss": 2.6225, "step": 89796 }, { "epoch": 17.24, "learning_rate": 0.001, "loss": 2.6107, "step": 89808 }, { "epoch": 17.25, "learning_rate": 0.001, "loss": 2.6199, "step": 89820 }, { "epoch": 17.25, "learning_rate": 0.001, "loss": 2.6206, "step": 89832 }, { "epoch": 17.25, "learning_rate": 0.001, "loss": 2.6202, "step": 89844 }, { "epoch": 17.25, "learning_rate": 0.001, "loss": 2.607, "step": 89856 }, { "epoch": 17.26, "learning_rate": 0.001, "loss": 2.6057, "step": 89868 }, { "epoch": 17.26, "learning_rate": 0.001, "loss": 2.6111, "step": 89880 }, { "epoch": 17.26, "learning_rate": 0.001, "loss": 2.6072, "step": 89892 }, { "epoch": 17.26, "learning_rate": 0.001, "loss": 2.617, "step": 89904 }, { "epoch": 17.26, "learning_rate": 0.001, "loss": 2.6185, "step": 89916 }, { "epoch": 17.27, "learning_rate": 0.001, "loss": 2.6171, "step": 89928 }, { "epoch": 17.27, "learning_rate": 0.001, "loss": 2.6158, "step": 89940 }, { "epoch": 17.27, "learning_rate": 0.001, "loss": 2.6148, "step": 89952 }, { "epoch": 17.27, "learning_rate": 0.001, "loss": 2.6203, "step": 89964 }, { "epoch": 17.28, "learning_rate": 0.001, "loss": 2.6193, "step": 89976 }, { "epoch": 17.28, "learning_rate": 0.001, "loss": 2.6283, "step": 89988 }, { "epoch": 17.28, "learning_rate": 0.001, "loss": 2.6204, "step": 90000 }, { "epoch": 17.28, "eval_ag_news_accuracy": 0.31640625, "eval_ag_news_bleu_score": 4.7260294896508785, "eval_ag_news_bleu_score_sem": 0.14776170162887764, "eval_ag_news_emb_cos_sim": 0.795169472694397, "eval_ag_news_emb_cos_sim_sem": 0.007596495950587409, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6366219520568848, "eval_ag_news_n_ngrams_match_1": 13.712, "eval_ag_news_n_ngrams_match_2": 2.95, "eval_ag_news_n_ngrams_match_3": 0.852, "eval_ag_news_num_pred_words": 46.868, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.96337776711517, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3375020792953003, "eval_ag_news_runtime": 10.7034, "eval_ag_news_samples_per_second": 46.714, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3427749737585387, "eval_ag_news_token_set_f1_sem": 0.004338805055273504, "eval_ag_news_token_set_precision": 0.3266052240158575, "eval_ag_news_token_set_recall": 0.3775923610504178, "eval_ag_news_true_num_tokens": 56.09375, "step": 90000 }, { "epoch": 17.28, "eval_anthropic_toxic_prompts_accuracy": 0.1096875, "eval_anthropic_toxic_prompts_bleu_score": 2.9373206136223606, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10876827192405612, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6542224884033203, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009752020126514159, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3060574531555176, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.042, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.806, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.646, "eval_anthropic_toxic_prompts_num_pred_words": 46.884, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.27737088521623, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20829231220160277, "eval_anthropic_toxic_prompts_runtime": 19.0422, "eval_anthropic_toxic_prompts_samples_per_second": 26.257, "eval_anthropic_toxic_prompts_steps_per_second": 0.053, "eval_anthropic_toxic_prompts_token_set_f1": 0.3478834925706295, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064968851025868505, "eval_anthropic_toxic_prompts_token_set_precision": 0.42644561128024494, "eval_anthropic_toxic_prompts_token_set_recall": 0.32165547307147596, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 90000 }, { "epoch": 17.28, "eval_arxiv_accuracy": 0.34090625, "eval_arxiv_bleu_score": 4.249045914147333, "eval_arxiv_bleu_score_sem": 0.12409575623378936, "eval_arxiv_emb_cos_sim": 0.7413960695266724, "eval_arxiv_emb_cos_sim_sem": 0.008937436790087473, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4815762042999268, "eval_arxiv_n_ngrams_match_1": 14.562, "eval_arxiv_n_ngrams_match_2": 2.82, "eval_arxiv_n_ngrams_match_3": 0.624, "eval_arxiv_num_pred_words": 40.066, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.51092557235896, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3487994924683863, "eval_arxiv_runtime": 10.0611, "eval_arxiv_samples_per_second": 49.697, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.34326679836964336, "eval_arxiv_token_set_f1_sem": 0.004248880201704165, "eval_arxiv_token_set_precision": 0.2917895595173516, "eval_arxiv_token_set_recall": 0.4298496292041814, "eval_arxiv_true_num_tokens": 64.0, "step": 90000 }, { "epoch": 17.28, "eval_python_code_alpaca_accuracy": 0.15740625, "eval_python_code_alpaca_bleu_score": 4.399076243455381, "eval_python_code_alpaca_bleu_score_sem": 0.13434979844042105, "eval_python_code_alpaca_emb_cos_sim": 0.7520021796226501, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009809162692016326, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9550557136535645, "eval_python_code_alpaca_n_ngrams_match_1": 9.664, "eval_python_code_alpaca_n_ngrams_match_2": 2.812, "eval_python_code_alpaca_n_ngrams_match_3": 0.868, "eval_python_code_alpaca_num_pred_words": 43.576, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.202792547762346, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32213327366366057, "eval_python_code_alpaca_runtime": 9.7654, "eval_python_code_alpaca_samples_per_second": 51.201, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.45818732986380006, "eval_python_code_alpaca_token_set_f1_sem": 0.005535650094250777, "eval_python_code_alpaca_token_set_precision": 0.5298475077770941, "eval_python_code_alpaca_token_set_recall": 0.4260776070673612, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 90000 }, { "epoch": 17.28, "eval_wikibio_accuracy": 0.31521875, "eval_wikibio_bleu_score": 5.514890354089845, "eval_wikibio_bleu_score_sem": 0.20984692919267317, "eval_wikibio_emb_cos_sim": 0.7312206625938416, "eval_wikibio_emb_cos_sim_sem": 0.009199000131946081, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8664937019348145, "eval_wikibio_n_ngrams_match_1": 9.498, "eval_wikibio_n_ngrams_match_2": 3.084, "eval_wikibio_n_ngrams_match_3": 1.116, "eval_wikibio_num_pred_words": 36.028, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 47.7745801459949, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3289124809013327, "eval_wikibio_runtime": 19.3745, "eval_wikibio_samples_per_second": 25.807, "eval_wikibio_steps_per_second": 0.052, "eval_wikibio_token_set_f1": 0.30423374802300884, "eval_wikibio_token_set_f1_sem": 0.005436087489321035, "eval_wikibio_token_set_precision": 0.30820377615406147, "eval_wikibio_token_set_recall": 0.321600153976657, "eval_wikibio_true_num_tokens": 61.1328125, "step": 90000 }, { "epoch": 17.28, "eval_nq_accuracy": 0.5193125, "eval_nq_bleu_score": 11.064626359348138, "eval_nq_bleu_score_sem": 0.4438811018196554, "eval_nq_emb_cos_sim": 0.8243062496185303, "eval_nq_emb_cos_sim_sem": 0.00740522433242186, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.250351667404175, "eval_nq_n_ngrams_match_1": 22.512, "eval_nq_n_ngrams_match_2": 8.146, "eval_nq_n_ngrams_match_3": 3.646, "eval_nq_num_pred_words": 48.812, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.491072950534342, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43625374932715244, "eval_nq_runtime": 10.2379, "eval_nq_samples_per_second": 48.838, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.4513238546504728, "eval_nq_token_set_f1_sem": 0.004952742793422712, "eval_nq_token_set_precision": 0.4073663963632933, "eval_nq_token_set_recall": 0.5142992826843792, "eval_nq_true_num_tokens": 64.0, "step": 90000 }, { "epoch": 17.28, "learning_rate": 0.001, "loss": 2.6264, "step": 90012 }, { "epoch": 17.29, "learning_rate": 0.001, "loss": 2.6256, "step": 90024 }, { "epoch": 17.29, "learning_rate": 0.001, "loss": 2.6117, "step": 90036 }, { "epoch": 17.29, "learning_rate": 0.001, "loss": 2.6142, "step": 90048 }, { "epoch": 17.29, "learning_rate": 0.001, "loss": 2.6216, "step": 90060 }, { "epoch": 17.29, "learning_rate": 0.001, "loss": 2.6087, "step": 90072 }, { "epoch": 17.3, "learning_rate": 0.001, "loss": 2.6109, "step": 90084 }, { "epoch": 17.3, "learning_rate": 0.001, "loss": 2.6042, "step": 90096 }, { "epoch": 17.3, "learning_rate": 0.001, "loss": 2.6122, "step": 90108 }, { "epoch": 17.3, "learning_rate": 0.001, "loss": 2.6001, "step": 90120 }, { "epoch": 17.31, "learning_rate": 0.001, "loss": 2.603, "step": 90132 }, { "epoch": 17.31, "learning_rate": 0.001, "loss": 2.6115, "step": 90144 }, { "epoch": 17.31, "learning_rate": 0.001, "loss": 2.6097, "step": 90156 }, { "epoch": 17.31, "learning_rate": 0.001, "loss": 2.6115, "step": 90168 }, { "epoch": 17.32, "learning_rate": 0.001, "loss": 2.6136, "step": 90180 }, { "epoch": 17.32, "learning_rate": 0.001, "loss": 2.6275, "step": 90192 }, { "epoch": 17.32, "learning_rate": 0.001, "loss": 2.6179, "step": 90204 }, { "epoch": 17.32, "learning_rate": 0.001, "loss": 2.6199, "step": 90216 }, { "epoch": 17.32, "learning_rate": 0.001, "loss": 2.6238, "step": 90228 }, { "epoch": 17.33, "learning_rate": 0.001, "loss": 2.6164, "step": 90240 }, { "epoch": 17.33, "learning_rate": 0.001, "loss": 2.6166, "step": 90252 }, { "epoch": 17.33, "learning_rate": 0.001, "loss": 2.6241, "step": 90264 }, { "epoch": 17.33, "learning_rate": 0.001, "loss": 2.6273, "step": 90276 }, { "epoch": 17.34, "learning_rate": 0.001, "loss": 2.6156, "step": 90288 }, { "epoch": 17.34, "learning_rate": 0.001, "loss": 2.6147, "step": 90300 }, { "epoch": 17.34, "learning_rate": 0.001, "loss": 2.6191, "step": 90312 }, { "epoch": 17.34, "learning_rate": 0.001, "loss": 2.6212, "step": 90324 }, { "epoch": 17.35, "learning_rate": 0.001, "loss": 2.6192, "step": 90336 }, { "epoch": 17.35, "learning_rate": 0.001, "loss": 2.6218, "step": 90348 }, { "epoch": 17.35, "learning_rate": 0.001, "loss": 2.6252, "step": 90360 }, { "epoch": 17.35, "learning_rate": 0.001, "loss": 2.6087, "step": 90372 }, { "epoch": 17.35, "learning_rate": 0.001, "loss": 2.6169, "step": 90384 }, { "epoch": 17.36, "learning_rate": 0.001, "loss": 2.6183, "step": 90396 }, { "epoch": 17.36, "learning_rate": 0.001, "loss": 2.6212, "step": 90408 }, { "epoch": 17.36, "learning_rate": 0.001, "loss": 2.6166, "step": 90420 }, { "epoch": 17.36, "learning_rate": 0.001, "loss": 2.6205, "step": 90432 }, { "epoch": 17.37, "learning_rate": 0.001, "loss": 2.6204, "step": 90444 }, { "epoch": 17.37, "learning_rate": 0.001, "loss": 2.6125, "step": 90456 }, { "epoch": 17.37, "learning_rate": 0.001, "loss": 2.6114, "step": 90468 }, { "epoch": 17.37, "learning_rate": 0.001, "loss": 2.6136, "step": 90480 }, { "epoch": 17.38, "learning_rate": 0.001, "loss": 2.6056, "step": 90492 }, { "epoch": 17.38, "learning_rate": 0.001, "loss": 2.6163, "step": 90504 }, { "epoch": 17.38, "learning_rate": 0.001, "loss": 2.6144, "step": 90516 }, { "epoch": 17.38, "learning_rate": 0.001, "loss": 2.62, "step": 90528 }, { "epoch": 17.38, "learning_rate": 0.001, "loss": 2.6213, "step": 90540 }, { "epoch": 17.39, "learning_rate": 0.001, "loss": 2.6146, "step": 90552 }, { "epoch": 17.39, "learning_rate": 0.001, "loss": 2.6233, "step": 90564 }, { "epoch": 17.39, "learning_rate": 0.001, "loss": 2.617, "step": 90576 }, { "epoch": 17.39, "learning_rate": 0.001, "loss": 2.6133, "step": 90588 }, { "epoch": 17.4, "learning_rate": 0.001, "loss": 2.6185, "step": 90600 }, { "epoch": 17.4, "learning_rate": 0.001, "loss": 2.6202, "step": 90612 }, { "epoch": 17.4, "learning_rate": 0.001, "loss": 2.6194, "step": 90624 }, { "epoch": 17.4, "eval_ag_news_accuracy": 0.316125, "eval_ag_news_bleu_score": 4.641612217623085, "eval_ag_news_bleu_score_sem": 0.1414392736342318, "eval_ag_news_emb_cos_sim": 0.7987799048423767, "eval_ag_news_emb_cos_sim_sem": 0.006944879989373176, "eval_ag_news_emb_top1_equal": 0.2890625, "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6240322589874268, "eval_ag_news_n_ngrams_match_1": 13.586, "eval_ag_news_n_ngrams_match_2": 2.906, "eval_ag_news_n_ngrams_match_3": 0.84, "eval_ag_news_num_pred_words": 46.346, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.4884265117241, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3361633313863822, "eval_ag_news_runtime": 10.892, "eval_ag_news_samples_per_second": 45.905, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.3420781776701675, "eval_ag_news_token_set_f1_sem": 0.004369658769599392, "eval_ag_news_token_set_precision": 0.32478240547633636, "eval_ag_news_token_set_recall": 0.3755447077282753, "eval_ag_news_true_num_tokens": 56.09375, "step": 90625 }, { "epoch": 17.4, "eval_anthropic_toxic_prompts_accuracy": 0.112125, "eval_anthropic_toxic_prompts_bleu_score": 2.9783182036535596, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11312830653422552, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6633878946304321, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008658115137026603, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2985990047454834, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.978, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.622, "eval_anthropic_toxic_prompts_num_pred_words": 46.624, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.074680837948137, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2064232804810187, "eval_anthropic_toxic_prompts_runtime": 10.3476, "eval_anthropic_toxic_prompts_samples_per_second": 48.32, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3449693000200593, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006635802474549386, "eval_anthropic_toxic_prompts_token_set_precision": 0.4181623826714901, "eval_anthropic_toxic_prompts_token_set_recall": 0.3224460625316401, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 90625 }, { "epoch": 17.4, "eval_arxiv_accuracy": 0.341375, "eval_arxiv_bleu_score": 4.157524255818762, "eval_arxiv_bleu_score_sem": 0.12181247753442959, "eval_arxiv_emb_cos_sim": 0.750731348991394, "eval_arxiv_emb_cos_sim_sem": 0.00741885686228161, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4882240295410156, "eval_arxiv_n_ngrams_match_1": 14.638, "eval_arxiv_n_ngrams_match_2": 2.78, "eval_arxiv_n_ngrams_match_3": 0.61, "eval_arxiv_num_pred_words": 40.312, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.72777250564538, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35108606850933954, "eval_arxiv_runtime": 11.2193, "eval_arxiv_samples_per_second": 44.566, "eval_arxiv_steps_per_second": 0.089, "eval_arxiv_token_set_f1": 0.34465462107517303, "eval_arxiv_token_set_f1_sem": 0.004016034905708582, "eval_arxiv_token_set_precision": 0.2946193091793197, "eval_arxiv_token_set_recall": 0.4323113110917569, "eval_arxiv_true_num_tokens": 64.0, "step": 90625 }, { "epoch": 17.4, "eval_python_code_alpaca_accuracy": 0.1571875, "eval_python_code_alpaca_bleu_score": 4.174079899995925, "eval_python_code_alpaca_bleu_score_sem": 0.13273837925236556, "eval_python_code_alpaca_emb_cos_sim": 0.7373853325843811, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009413866082743636, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9618608951568604, "eval_python_code_alpaca_n_ngrams_match_1": 9.5, "eval_python_code_alpaca_n_ngrams_match_2": 2.612, "eval_python_code_alpaca_n_ngrams_match_3": 0.786, "eval_python_code_alpaca_num_pred_words": 43.024, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.333916692186193, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3193054661167657, "eval_python_code_alpaca_runtime": 11.4481, "eval_python_code_alpaca_samples_per_second": 43.675, "eval_python_code_alpaca_steps_per_second": 0.087, "eval_python_code_alpaca_token_set_f1": 0.4565497441294427, "eval_python_code_alpaca_token_set_f1_sem": 0.005878026862612197, "eval_python_code_alpaca_token_set_precision": 0.5182756178373794, "eval_python_code_alpaca_token_set_recall": 0.43228177861391315, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 90625 }, { "epoch": 17.4, "eval_wikibio_accuracy": 0.317, "eval_wikibio_bleu_score": 5.748530180680383, "eval_wikibio_bleu_score_sem": 0.20994803427481568, "eval_wikibio_emb_cos_sim": 0.7392998337745667, "eval_wikibio_emb_cos_sim_sem": 0.009507575649140561, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.802290916442871, "eval_wikibio_n_ngrams_match_1": 9.934, "eval_wikibio_n_ngrams_match_2": 3.204, "eval_wikibio_n_ngrams_match_3": 1.154, "eval_wikibio_num_pred_words": 36.222, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.803708564072366, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3496231017465844, "eval_wikibio_runtime": 10.2385, "eval_wikibio_samples_per_second": 48.835, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.3143530622167457, "eval_wikibio_token_set_f1_sem": 0.0052439234527126895, "eval_wikibio_token_set_precision": 0.3220405710964759, "eval_wikibio_token_set_recall": 0.32165786468039875, "eval_wikibio_true_num_tokens": 61.1328125, "step": 90625 }, { "epoch": 17.4, "eval_nq_accuracy": 0.51896875, "eval_nq_bleu_score": 11.045099552220618, "eval_nq_bleu_score_sem": 0.45819793811311665, "eval_nq_emb_cos_sim": 0.8211745023727417, "eval_nq_emb_cos_sim_sem": 0.008357733889144953, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.246525287628174, "eval_nq_n_ngrams_match_1": 22.398, "eval_nq_n_ngrams_match_2": 8.124, "eval_nq_n_ngrams_match_3": 3.622, "eval_nq_num_pred_words": 48.816, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.454825892673409, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43370102382407916, "eval_nq_runtime": 15.2015, "eval_nq_samples_per_second": 32.891, "eval_nq_steps_per_second": 0.066, "eval_nq_token_set_f1": 0.44947621353492245, "eval_nq_token_set_f1_sem": 0.005011708707236234, "eval_nq_token_set_precision": 0.4052253033166563, "eval_nq_token_set_recall": 0.5162832689652473, "eval_nq_true_num_tokens": 64.0, "step": 90625 }, { "epoch": 17.4, "learning_rate": 0.001, "loss": 2.6188, "step": 90636 }, { "epoch": 17.41, "learning_rate": 0.001, "loss": 2.6241, "step": 90648 }, { "epoch": 17.41, "learning_rate": 0.001, "loss": 2.6115, "step": 90660 }, { "epoch": 17.41, "learning_rate": 0.001, "loss": 2.6143, "step": 90672 }, { "epoch": 17.41, "learning_rate": 0.001, "loss": 2.6123, "step": 90684 }, { "epoch": 17.41, "learning_rate": 0.001, "loss": 2.6159, "step": 90696 }, { "epoch": 17.42, "learning_rate": 0.001, "loss": 2.6135, "step": 90708 }, { "epoch": 17.42, "learning_rate": 0.001, "loss": 2.6188, "step": 90720 }, { "epoch": 17.42, "learning_rate": 0.001, "loss": 2.6207, "step": 90732 }, { "epoch": 17.42, "learning_rate": 0.001, "loss": 2.6234, "step": 90744 }, { "epoch": 17.43, "learning_rate": 0.001, "loss": 2.6194, "step": 90756 }, { "epoch": 17.43, "learning_rate": 0.001, "loss": 2.6203, "step": 90768 }, { "epoch": 17.43, "learning_rate": 0.001, "loss": 2.6155, "step": 90780 }, { "epoch": 17.43, "learning_rate": 0.001, "loss": 2.6092, "step": 90792 }, { "epoch": 17.44, "learning_rate": 0.001, "loss": 2.6288, "step": 90804 }, { "epoch": 17.44, "learning_rate": 0.001, "loss": 2.6211, "step": 90816 }, { "epoch": 17.44, "learning_rate": 0.001, "loss": 2.6159, "step": 90828 }, { "epoch": 17.44, "learning_rate": 0.001, "loss": 2.6085, "step": 90840 }, { "epoch": 17.44, "learning_rate": 0.001, "loss": 2.6203, "step": 90852 }, { "epoch": 17.45, "learning_rate": 0.001, "loss": 2.6155, "step": 90864 }, { "epoch": 17.45, "learning_rate": 0.001, "loss": 2.6127, "step": 90876 }, { "epoch": 17.45, "learning_rate": 0.001, "loss": 2.6183, "step": 90888 }, { "epoch": 17.45, "learning_rate": 0.001, "loss": 2.6188, "step": 90900 }, { "epoch": 17.46, "learning_rate": 0.001, "loss": 2.6184, "step": 90912 }, { "epoch": 17.46, "learning_rate": 0.001, "loss": 2.6121, "step": 90924 }, { "epoch": 17.46, "learning_rate": 0.001, "loss": 2.6129, "step": 90936 }, { "epoch": 17.46, "learning_rate": 0.001, "loss": 2.6132, "step": 90948 }, { "epoch": 17.47, "learning_rate": 0.001, "loss": 2.6081, "step": 90960 }, { "epoch": 17.47, "learning_rate": 0.001, "loss": 2.6108, "step": 90972 }, { "epoch": 17.47, "learning_rate": 0.001, "loss": 2.6123, "step": 90984 }, { "epoch": 17.47, "learning_rate": 0.001, "loss": 2.6201, "step": 90996 }, { "epoch": 17.47, "learning_rate": 0.001, "loss": 2.6215, "step": 91008 }, { "epoch": 17.48, "learning_rate": 0.001, "loss": 2.6179, "step": 91020 }, { "epoch": 17.48, "learning_rate": 0.001, "loss": 2.6265, "step": 91032 }, { "epoch": 17.48, "learning_rate": 0.001, "loss": 2.6203, "step": 91044 }, { "epoch": 17.48, "learning_rate": 0.001, "loss": 2.6185, "step": 91056 }, { "epoch": 17.49, "learning_rate": 0.001, "loss": 2.6203, "step": 91068 }, { "epoch": 17.49, "learning_rate": 0.001, "loss": 2.6193, "step": 91080 }, { "epoch": 17.49, "learning_rate": 0.001, "loss": 2.6171, "step": 91092 }, { "epoch": 17.49, "learning_rate": 0.001, "loss": 2.6078, "step": 91104 }, { "epoch": 17.5, "learning_rate": 0.001, "loss": 2.6086, "step": 91116 }, { "epoch": 17.5, "learning_rate": 0.001, "loss": 2.6207, "step": 91128 }, { "epoch": 17.5, "learning_rate": 0.001, "loss": 2.6108, "step": 91140 }, { "epoch": 17.5, "learning_rate": 0.001, "loss": 2.6211, "step": 91152 }, { "epoch": 17.5, "learning_rate": 0.001, "loss": 2.622, "step": 91164 }, { "epoch": 17.51, "learning_rate": 0.001, "loss": 2.611, "step": 91176 }, { "epoch": 17.51, "learning_rate": 0.001, "loss": 2.6149, "step": 91188 }, { "epoch": 17.51, "learning_rate": 0.001, "loss": 2.6263, "step": 91200 }, { "epoch": 17.51, "learning_rate": 0.001, "loss": 2.6215, "step": 91212 }, { "epoch": 17.52, "learning_rate": 0.001, "loss": 2.6143, "step": 91224 }, { "epoch": 17.52, "learning_rate": 0.001, "loss": 2.6056, "step": 91236 }, { "epoch": 17.52, "learning_rate": 0.001, "loss": 2.6288, "step": 91248 }, { "epoch": 17.52, "eval_ag_news_accuracy": 0.31796875, "eval_ag_news_bleu_score": 4.6029982173523045, "eval_ag_news_bleu_score_sem": 0.1519063419266466, "eval_ag_news_emb_cos_sim": 0.7865685224533081, "eval_ag_news_emb_cos_sim_sem": 0.008511251150667852, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6188931465148926, "eval_ag_news_n_ngrams_match_1": 13.436, "eval_ag_news_n_ngrams_match_2": 2.906, "eval_ag_news_n_ngrams_match_3": 0.826, "eval_ag_news_num_pred_words": 46.484, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.296263468135336, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33267937227613475, "eval_ag_news_runtime": 10.3221, "eval_ag_news_samples_per_second": 48.44, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3398246223304076, "eval_ag_news_token_set_f1_sem": 0.004511663921237267, "eval_ag_news_token_set_precision": 0.3192426771543661, "eval_ag_news_token_set_recall": 0.38440076752259994, "eval_ag_news_true_num_tokens": 56.09375, "step": 91250 }, { "epoch": 17.52, "eval_anthropic_toxic_prompts_accuracy": 0.1125, "eval_anthropic_toxic_prompts_bleu_score": 2.905474631709576, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11543800793860712, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6496694087982178, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009517883846324014, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.286526918411255, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.842, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.702, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.618, "eval_anthropic_toxic_prompts_num_pred_words": 46.7, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.749797906365554, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20415289339524428, "eval_anthropic_toxic_prompts_runtime": 9.824, "eval_anthropic_toxic_prompts_samples_per_second": 50.896, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3449498213857672, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006677122648178251, "eval_anthropic_toxic_prompts_token_set_precision": 0.41048496771632176, "eval_anthropic_toxic_prompts_token_set_recall": 0.3307222472771074, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 91250 }, { "epoch": 17.52, "eval_arxiv_accuracy": 0.3414375, "eval_arxiv_bleu_score": 4.060234639688883, "eval_arxiv_bleu_score_sem": 0.1168993043894669, "eval_arxiv_emb_cos_sim": 0.7421520948410034, "eval_arxiv_emb_cos_sim_sem": 0.007438515074389402, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4817111492156982, "eval_arxiv_n_ngrams_match_1": 14.06, "eval_arxiv_n_ngrams_match_2": 2.71, "eval_arxiv_n_ngrams_match_3": 0.622, "eval_arxiv_num_pred_words": 38.91, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.51531305249939, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3380762485498364, "eval_arxiv_runtime": 10.0892, "eval_arxiv_samples_per_second": 49.558, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.33548037728916624, "eval_arxiv_token_set_f1_sem": 0.00429884608294297, "eval_arxiv_token_set_precision": 0.279878095000346, "eval_arxiv_token_set_recall": 0.44572049453007173, "eval_arxiv_true_num_tokens": 64.0, "step": 91250 }, { "epoch": 17.52, "eval_python_code_alpaca_accuracy": 0.15803125, "eval_python_code_alpaca_bleu_score": 4.292263844847255, "eval_python_code_alpaca_bleu_score_sem": 0.14895782797027105, "eval_python_code_alpaca_emb_cos_sim": 0.7248610258102417, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011929689178610842, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9378304481506348, "eval_python_code_alpaca_n_ngrams_match_1": 9.314, "eval_python_code_alpaca_n_ngrams_match_2": 2.648, "eval_python_code_alpaca_n_ngrams_match_3": 0.846, "eval_python_code_alpaca_num_pred_words": 42.422, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.87485188874955, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3164786315472602, "eval_python_code_alpaca_runtime": 10.1487, "eval_python_code_alpaca_samples_per_second": 49.268, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.45986225768384253, "eval_python_code_alpaca_token_set_f1_sem": 0.006071605000027662, "eval_python_code_alpaca_token_set_precision": 0.5071217546987413, "eval_python_code_alpaca_token_set_recall": 0.4501021686448808, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 91250 }, { "epoch": 17.52, "eval_wikibio_accuracy": 0.3185625, "eval_wikibio_bleu_score": 5.86293047052854, "eval_wikibio_bleu_score_sem": 0.2097571039965183, "eval_wikibio_emb_cos_sim": 0.730091392993927, "eval_wikibio_emb_cos_sim_sem": 0.00889433636912902, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7917020320892334, "eval_wikibio_n_ngrams_match_1": 9.942, "eval_wikibio_n_ngrams_match_2": 3.288, "eval_wikibio_n_ngrams_match_3": 1.204, "eval_wikibio_num_pred_words": 36.34, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.33179022926705, "eval_wikibio_pred_num_tokens": 62.953125, "eval_wikibio_rouge_score": 0.34520514630277793, "eval_wikibio_runtime": 10.3427, "eval_wikibio_samples_per_second": 48.343, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.31497136088705324, "eval_wikibio_token_set_f1_sem": 0.005374250899187345, "eval_wikibio_token_set_precision": 0.3209662348006424, "eval_wikibio_token_set_recall": 0.32667448256406834, "eval_wikibio_true_num_tokens": 61.1328125, "step": 91250 }, { "epoch": 17.52, "eval_nq_accuracy": 0.51715625, "eval_nq_bleu_score": 11.041352901905567, "eval_nq_bleu_score_sem": 0.44647207818452034, "eval_nq_emb_cos_sim": 0.8222669959068298, "eval_nq_emb_cos_sim_sem": 0.008171162303132877, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2518060207366943, "eval_nq_n_ngrams_match_1": 22.432, "eval_nq_n_ngrams_match_2": 8.094, "eval_nq_n_ngrams_match_3": 3.67, "eval_nq_num_pred_words": 48.786, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.504886366468106, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.434008837293269, "eval_nq_runtime": 11.9339, "eval_nq_samples_per_second": 41.898, "eval_nq_steps_per_second": 0.084, "eval_nq_token_set_f1": 0.45168001599255414, "eval_nq_token_set_f1_sem": 0.004802899756250061, "eval_nq_token_set_precision": 0.4046170641156355, "eval_nq_token_set_recall": 0.5225387277168005, "eval_nq_true_num_tokens": 64.0, "step": 91250 }, { "epoch": 17.52, "learning_rate": 0.001, "loss": 2.6229, "step": 91260 }, { "epoch": 17.53, "learning_rate": 0.001, "loss": 2.6157, "step": 91272 }, { "epoch": 17.53, "learning_rate": 0.001, "loss": 2.628, "step": 91284 }, { "epoch": 17.53, "learning_rate": 0.001, "loss": 2.6244, "step": 91296 }, { "epoch": 17.53, "learning_rate": 0.001, "loss": 2.6275, "step": 91308 }, { "epoch": 17.53, "learning_rate": 0.001, "loss": 2.6255, "step": 91320 }, { "epoch": 17.54, "learning_rate": 0.001, "loss": 2.62, "step": 91332 }, { "epoch": 17.54, "learning_rate": 0.001, "loss": 2.6294, "step": 91344 }, { "epoch": 17.54, "learning_rate": 0.001, "loss": 2.6266, "step": 91356 }, { "epoch": 17.54, "learning_rate": 0.001, "loss": 2.6319, "step": 91368 }, { "epoch": 17.55, "learning_rate": 0.001, "loss": 2.6238, "step": 91380 }, { "epoch": 17.55, "learning_rate": 0.001, "loss": 2.6186, "step": 91392 }, { "epoch": 17.55, "learning_rate": 0.001, "loss": 2.624, "step": 91404 }, { "epoch": 17.55, "learning_rate": 0.001, "loss": 2.6316, "step": 91416 }, { "epoch": 17.56, "learning_rate": 0.001, "loss": 2.62, "step": 91428 }, { "epoch": 17.56, "learning_rate": 0.001, "loss": 2.6328, "step": 91440 }, { "epoch": 17.56, "learning_rate": 0.001, "loss": 2.6154, "step": 91452 }, { "epoch": 17.56, "learning_rate": 0.001, "loss": 2.6239, "step": 91464 }, { "epoch": 17.56, "learning_rate": 0.001, "loss": 2.606, "step": 91476 }, { "epoch": 17.57, "learning_rate": 0.001, "loss": 2.6158, "step": 91488 }, { "epoch": 17.57, "learning_rate": 0.001, "loss": 2.6171, "step": 91500 }, { "epoch": 17.57, "learning_rate": 0.001, "loss": 2.6214, "step": 91512 }, { "epoch": 17.57, "learning_rate": 0.001, "loss": 2.6246, "step": 91524 }, { "epoch": 17.58, "learning_rate": 0.001, "loss": 2.6191, "step": 91536 }, { "epoch": 17.58, "learning_rate": 0.001, "loss": 2.6225, "step": 91548 }, { "epoch": 17.58, "learning_rate": 0.001, "loss": 2.6137, "step": 91560 }, { "epoch": 17.58, "learning_rate": 0.001, "loss": 2.6188, "step": 91572 }, { "epoch": 17.59, "learning_rate": 0.001, "loss": 2.6293, "step": 91584 }, { "epoch": 17.59, "learning_rate": 0.001, "loss": 2.6309, "step": 91596 }, { "epoch": 17.59, "learning_rate": 0.001, "loss": 2.6172, "step": 91608 }, { "epoch": 17.59, "learning_rate": 0.001, "loss": 2.6273, "step": 91620 }, { "epoch": 17.59, "learning_rate": 0.001, "loss": 2.618, "step": 91632 }, { "epoch": 17.6, "learning_rate": 0.001, "loss": 2.618, "step": 91644 }, { "epoch": 17.6, "learning_rate": 0.001, "loss": 2.6259, "step": 91656 }, { "epoch": 17.6, "learning_rate": 0.001, "loss": 2.6216, "step": 91668 }, { "epoch": 17.6, "learning_rate": 0.001, "loss": 2.6123, "step": 91680 }, { "epoch": 17.61, "learning_rate": 0.001, "loss": 2.6238, "step": 91692 }, { "epoch": 17.61, "learning_rate": 0.001, "loss": 2.6191, "step": 91704 }, { "epoch": 17.61, "learning_rate": 0.001, "loss": 2.6136, "step": 91716 }, { "epoch": 17.61, "learning_rate": 0.001, "loss": 2.6225, "step": 91728 }, { "epoch": 17.62, "learning_rate": 0.001, "loss": 2.6212, "step": 91740 }, { "epoch": 17.62, "learning_rate": 0.001, "loss": 2.6243, "step": 91752 }, { "epoch": 17.62, "learning_rate": 0.001, "loss": 2.6137, "step": 91764 }, { "epoch": 17.62, "learning_rate": 0.001, "loss": 2.6255, "step": 91776 }, { "epoch": 17.62, "learning_rate": 0.001, "loss": 2.6165, "step": 91788 }, { "epoch": 17.63, "learning_rate": 0.001, "loss": 2.6198, "step": 91800 }, { "epoch": 17.63, "learning_rate": 0.001, "loss": 2.6106, "step": 91812 }, { "epoch": 17.63, "learning_rate": 0.001, "loss": 2.6174, "step": 91824 }, { "epoch": 17.63, "learning_rate": 0.001, "loss": 2.6133, "step": 91836 }, { "epoch": 17.64, "learning_rate": 0.001, "loss": 2.6168, "step": 91848 }, { "epoch": 17.64, "learning_rate": 0.001, "loss": 2.6194, "step": 91860 }, { "epoch": 17.64, "learning_rate": 0.001, "loss": 2.6125, "step": 91872 }, { "epoch": 17.64, "eval_ag_news_accuracy": 0.315875, "eval_ag_news_bleu_score": 4.516791984956375, "eval_ag_news_bleu_score_sem": 0.14102461811447312, "eval_ag_news_emb_cos_sim": 0.792000412940979, "eval_ag_news_emb_cos_sim_sem": 0.008333077324221573, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6316728591918945, "eval_ag_news_n_ngrams_match_1": 13.566, "eval_ag_news_n_ngrams_match_2": 2.942, "eval_ag_news_n_ngrams_match_3": 0.768, "eval_ag_news_num_pred_words": 46.216, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.77595764741365, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33733370989732975, "eval_ag_news_runtime": 10.5509, "eval_ag_news_samples_per_second": 47.389, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.34093643159678505, "eval_ag_news_token_set_f1_sem": 0.0044661393665224765, "eval_ag_news_token_set_precision": 0.3231342489407503, "eval_ag_news_token_set_recall": 0.38076027189100853, "eval_ag_news_true_num_tokens": 56.09375, "step": 91875 }, { "epoch": 17.64, "eval_anthropic_toxic_prompts_accuracy": 0.11075, "eval_anthropic_toxic_prompts_bleu_score": 2.9257154595032304, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12171122561972979, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6466835737228394, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009675006639028118, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.301609992980957, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.886, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.746, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.608, "eval_anthropic_toxic_prompts_num_pred_words": 46.58, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.15632523691011, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.8671875, "eval_anthropic_toxic_prompts_rouge_score": 0.20564758629015595, "eval_anthropic_toxic_prompts_runtime": 9.9567, "eval_anthropic_toxic_prompts_samples_per_second": 50.217, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.35088469545083006, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065935221882133825, "eval_anthropic_toxic_prompts_token_set_precision": 0.41868826503135637, "eval_anthropic_toxic_prompts_token_set_recall": 0.3326883772584242, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 91875 }, { "epoch": 17.64, "eval_arxiv_accuracy": 0.3396875, "eval_arxiv_bleu_score": 4.111235558623548, "eval_arxiv_bleu_score_sem": 0.11936061103201295, "eval_arxiv_emb_cos_sim": 0.7424968481063843, "eval_arxiv_emb_cos_sim_sem": 0.009177570435111816, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.480323553085327, "eval_arxiv_n_ngrams_match_1": 14.51, "eval_arxiv_n_ngrams_match_2": 2.776, "eval_arxiv_n_ngrams_match_3": 0.588, "eval_arxiv_num_pred_words": 39.802, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.470226218322516, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34747550565157614, "eval_arxiv_runtime": 10.4863, "eval_arxiv_samples_per_second": 47.681, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3425256557378168, "eval_arxiv_token_set_f1_sem": 0.004308085624099615, "eval_arxiv_token_set_precision": 0.2889703238854412, "eval_arxiv_token_set_recall": 0.44020837430223325, "eval_arxiv_true_num_tokens": 64.0, "step": 91875 }, { "epoch": 17.64, "eval_python_code_alpaca_accuracy": 0.1571875, "eval_python_code_alpaca_bleu_score": 4.463552230485825, "eval_python_code_alpaca_bleu_score_sem": 0.14725582592292868, "eval_python_code_alpaca_emb_cos_sim": 0.738200306892395, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011336023158465564, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9708402156829834, "eval_python_code_alpaca_n_ngrams_match_1": 9.576, "eval_python_code_alpaca_n_ngrams_match_2": 2.778, "eval_python_code_alpaca_n_ngrams_match_3": 0.918, "eval_python_code_alpaca_num_pred_words": 43.52, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.508303894775644, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3198678981380734, "eval_python_code_alpaca_runtime": 9.4479, "eval_python_code_alpaca_samples_per_second": 52.922, "eval_python_code_alpaca_steps_per_second": 0.106, "eval_python_code_alpaca_token_set_f1": 0.4662726139896116, "eval_python_code_alpaca_token_set_f1_sem": 0.00580147080001791, "eval_python_code_alpaca_token_set_precision": 0.5197490051116582, "eval_python_code_alpaca_token_set_recall": 0.44510844452876325, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 91875 }, { "epoch": 17.64, "eval_wikibio_accuracy": 0.31534375, "eval_wikibio_bleu_score": 5.556776623621821, "eval_wikibio_bleu_score_sem": 0.20818131192861844, "eval_wikibio_emb_cos_sim": 0.7368065118789673, "eval_wikibio_emb_cos_sim_sem": 0.01115140942527572, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.79640531539917, "eval_wikibio_n_ngrams_match_1": 9.658, "eval_wikibio_n_ngrams_match_2": 3.196, "eval_wikibio_n_ngrams_match_3": 1.126, "eval_wikibio_num_pred_words": 35.94, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.54078629694595, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33865040548455105, "eval_wikibio_runtime": 10.4336, "eval_wikibio_samples_per_second": 47.922, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.3051305001112317, "eval_wikibio_token_set_f1_sem": 0.005797644941532922, "eval_wikibio_token_set_precision": 0.3133082042377267, "eval_wikibio_token_set_recall": 0.3134608068816547, "eval_wikibio_true_num_tokens": 61.1328125, "step": 91875 }, { "epoch": 17.64, "eval_nq_accuracy": 0.51834375, "eval_nq_bleu_score": 11.256371756261961, "eval_nq_bleu_score_sem": 0.46382785723270537, "eval_nq_emb_cos_sim": 0.8236100077629089, "eval_nq_emb_cos_sim_sem": 0.007252571739837942, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2458207607269287, "eval_nq_n_ngrams_match_1": 22.554, "eval_nq_n_ngrams_match_2": 8.164, "eval_nq_n_ngrams_match_3": 3.736, "eval_nq_num_pred_words": 48.836, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.448167059424438, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4352164736291856, "eval_nq_runtime": 10.0587, "eval_nq_samples_per_second": 49.708, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.4530780111559071, "eval_nq_token_set_f1_sem": 0.00494427752895844, "eval_nq_token_set_precision": 0.4080953600425592, "eval_nq_token_set_recall": 0.5205275883457564, "eval_nq_true_num_tokens": 64.0, "step": 91875 }, { "epoch": 17.64, "learning_rate": 0.001, "loss": 2.6256, "step": 91884 }, { "epoch": 17.65, "learning_rate": 0.001, "loss": 2.629, "step": 91896 }, { "epoch": 17.65, "learning_rate": 0.001, "loss": 2.6062, "step": 91908 }, { "epoch": 17.65, "learning_rate": 0.001, "loss": 2.6179, "step": 91920 }, { "epoch": 17.65, "learning_rate": 0.001, "loss": 2.6205, "step": 91932 }, { "epoch": 17.65, "learning_rate": 0.001, "loss": 2.6213, "step": 91944 }, { "epoch": 17.66, "learning_rate": 0.001, "loss": 2.6211, "step": 91956 }, { "epoch": 17.66, "learning_rate": 0.001, "loss": 2.6248, "step": 91968 }, { "epoch": 17.66, "learning_rate": 0.001, "loss": 2.6149, "step": 91980 }, { "epoch": 17.66, "learning_rate": 0.001, "loss": 2.6345, "step": 91992 }, { "epoch": 17.67, "learning_rate": 0.001, "loss": 2.6224, "step": 92004 }, { "epoch": 17.67, "learning_rate": 0.001, "loss": 2.6156, "step": 92016 }, { "epoch": 17.67, "learning_rate": 0.001, "loss": 2.6164, "step": 92028 }, { "epoch": 17.67, "learning_rate": 0.001, "loss": 2.6179, "step": 92040 }, { "epoch": 17.68, "learning_rate": 0.001, "loss": 2.6132, "step": 92052 }, { "epoch": 17.68, "learning_rate": 0.001, "loss": 2.6053, "step": 92064 }, { "epoch": 17.68, "learning_rate": 0.001, "loss": 2.6062, "step": 92076 }, { "epoch": 17.68, "learning_rate": 0.001, "loss": 2.6104, "step": 92088 }, { "epoch": 17.68, "learning_rate": 0.001, "loss": 2.6137, "step": 92100 }, { "epoch": 17.69, "learning_rate": 0.001, "loss": 2.6192, "step": 92112 }, { "epoch": 17.69, "learning_rate": 0.001, "loss": 2.6272, "step": 92124 }, { "epoch": 17.69, "learning_rate": 0.001, "loss": 2.6189, "step": 92136 }, { "epoch": 17.69, "learning_rate": 0.001, "loss": 2.6212, "step": 92148 }, { "epoch": 17.7, "learning_rate": 0.001, "loss": 2.6176, "step": 92160 }, { "epoch": 17.7, "learning_rate": 0.001, "loss": 2.6266, "step": 92172 }, { "epoch": 17.7, "learning_rate": 0.001, "loss": 2.6233, "step": 92184 }, { "epoch": 17.7, "learning_rate": 0.001, "loss": 2.6166, "step": 92196 }, { "epoch": 17.71, "learning_rate": 0.001, "loss": 2.6185, "step": 92208 }, { "epoch": 17.71, "learning_rate": 0.001, "loss": 2.6131, "step": 92220 }, { "epoch": 17.71, "learning_rate": 0.001, "loss": 2.6316, "step": 92232 }, { "epoch": 17.71, "learning_rate": 0.001, "loss": 2.6215, "step": 92244 }, { "epoch": 17.71, "learning_rate": 0.001, "loss": 2.6143, "step": 92256 }, { "epoch": 17.72, "learning_rate": 0.001, "loss": 2.6252, "step": 92268 }, { "epoch": 17.72, "learning_rate": 0.001, "loss": 2.6208, "step": 92280 }, { "epoch": 17.72, "learning_rate": 0.001, "loss": 2.6159, "step": 92292 }, { "epoch": 17.72, "learning_rate": 0.001, "loss": 2.6335, "step": 92304 }, { "epoch": 17.73, "learning_rate": 0.001, "loss": 2.6136, "step": 92316 }, { "epoch": 17.73, "learning_rate": 0.001, "loss": 2.6184, "step": 92328 }, { "epoch": 17.73, "learning_rate": 0.001, "loss": 2.6164, "step": 92340 }, { "epoch": 17.73, "learning_rate": 0.001, "loss": 2.6185, "step": 92352 }, { "epoch": 17.74, "learning_rate": 0.001, "loss": 2.6112, "step": 92364 }, { "epoch": 17.74, "learning_rate": 0.001, "loss": 2.6207, "step": 92376 }, { "epoch": 17.74, "learning_rate": 0.001, "loss": 2.6166, "step": 92388 }, { "epoch": 17.74, "learning_rate": 0.001, "loss": 2.6164, "step": 92400 }, { "epoch": 17.74, "learning_rate": 0.001, "loss": 2.6232, "step": 92412 }, { "epoch": 17.75, "learning_rate": 0.001, "loss": 2.6263, "step": 92424 }, { "epoch": 17.75, "learning_rate": 0.001, "loss": 2.6137, "step": 92436 }, { "epoch": 17.75, "learning_rate": 0.001, "loss": 2.6185, "step": 92448 }, { "epoch": 17.75, "learning_rate": 0.001, "loss": 2.6181, "step": 92460 }, { "epoch": 17.76, "learning_rate": 0.001, "loss": 2.6138, "step": 92472 }, { "epoch": 17.76, "learning_rate": 0.001, "loss": 2.624, "step": 92484 }, { "epoch": 17.76, "learning_rate": 0.001, "loss": 2.6237, "step": 92496 }, { "epoch": 17.76, "eval_ag_news_accuracy": 0.31846875, "eval_ag_news_bleu_score": 4.863661073142848, "eval_ag_news_bleu_score_sem": 0.1577217389501848, "eval_ag_news_emb_cos_sim": 0.7971920967102051, "eval_ag_news_emb_cos_sim_sem": 0.008862593206755238, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6174674034118652, "eval_ag_news_n_ngrams_match_1": 13.882, "eval_ag_news_n_ngrams_match_2": 3.134, "eval_ag_news_n_ngrams_match_3": 0.886, "eval_ag_news_num_pred_words": 46.678, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.24312646658485, "eval_ag_news_pred_num_tokens": 62.8203125, "eval_ag_news_rouge_score": 0.34383639415423217, "eval_ag_news_runtime": 10.1795, "eval_ag_news_samples_per_second": 49.118, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3475284364834532, "eval_ag_news_token_set_f1_sem": 0.0044591579225977266, "eval_ag_news_token_set_precision": 0.32829322948799167, "eval_ag_news_token_set_recall": 0.38575414978716244, "eval_ag_news_true_num_tokens": 56.09375, "step": 92500 }, { "epoch": 17.76, "eval_anthropic_toxic_prompts_accuracy": 0.11146875, "eval_anthropic_toxic_prompts_bleu_score": 2.933481551177838, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11014743869809578, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6661948561668396, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009900308544643676, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3102123737335205, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.122, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.808, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.646, "eval_anthropic_toxic_prompts_num_pred_words": 47.54, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.390941970856552, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2083862066969581, "eval_anthropic_toxic_prompts_runtime": 10.3347, "eval_anthropic_toxic_prompts_samples_per_second": 48.381, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3443945950596044, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006369789903044341, "eval_anthropic_toxic_prompts_token_set_precision": 0.42680924062817843, "eval_anthropic_toxic_prompts_token_set_recall": 0.3168531000228397, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 92500 }, { "epoch": 17.76, "eval_arxiv_accuracy": 0.34125, "eval_arxiv_bleu_score": 4.274683281880005, "eval_arxiv_bleu_score_sem": 0.11992070575359076, "eval_arxiv_emb_cos_sim": 0.7586016654968262, "eval_arxiv_emb_cos_sim_sem": 0.007546569057259207, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.47928524017334, "eval_arxiv_n_ngrams_match_1": 14.936, "eval_arxiv_n_ngrams_match_2": 2.934, "eval_arxiv_n_ngrams_match_3": 0.67, "eval_arxiv_num_pred_words": 40.892, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.436529460101816, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35374131799567277, "eval_arxiv_runtime": 10.0127, "eval_arxiv_samples_per_second": 49.937, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.34634656403300706, "eval_arxiv_token_set_f1_sem": 0.004408067823719984, "eval_arxiv_token_set_precision": 0.2966732492359857, "eval_arxiv_token_set_recall": 0.4351693458902045, "eval_arxiv_true_num_tokens": 64.0, "step": 92500 }, { "epoch": 17.76, "eval_python_code_alpaca_accuracy": 0.159, "eval_python_code_alpaca_bleu_score": 4.662633093449265, "eval_python_code_alpaca_bleu_score_sem": 0.14992216460871804, "eval_python_code_alpaca_emb_cos_sim": 0.7485463619232178, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009183555946144872, "eval_python_code_alpaca_emb_top1_equal": 0.078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9587080478668213, "eval_python_code_alpaca_n_ngrams_match_1": 9.846, "eval_python_code_alpaca_n_ngrams_match_2": 3.022, "eval_python_code_alpaca_n_ngrams_match_3": 1.024, "eval_python_code_alpaca_num_pred_words": 44.854, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.273055798304927, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32336975255755696, "eval_python_code_alpaca_runtime": 10.6849, "eval_python_code_alpaca_samples_per_second": 46.795, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.4798251319211579, "eval_python_code_alpaca_token_set_f1_sem": 0.005362554241441699, "eval_python_code_alpaca_token_set_precision": 0.5381995679116696, "eval_python_code_alpaca_token_set_recall": 0.4534734870417638, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 92500 }, { "epoch": 17.76, "eval_wikibio_accuracy": 0.314875, "eval_wikibio_bleu_score": 5.802102949940852, "eval_wikibio_bleu_score_sem": 0.1873798564058999, "eval_wikibio_emb_cos_sim": 0.7352509498596191, "eval_wikibio_emb_cos_sim_sem": 0.010587505665624214, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8203341960906982, "eval_wikibio_n_ngrams_match_1": 10.366, "eval_wikibio_n_ngrams_match_2": 3.37, "eval_wikibio_n_ngrams_match_3": 1.2, "eval_wikibio_num_pred_words": 37.412, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.6194516159719, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3602097375353768, "eval_wikibio_runtime": 10.3351, "eval_wikibio_samples_per_second": 48.379, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3225081604573831, "eval_wikibio_token_set_f1_sem": 0.0050452955774064195, "eval_wikibio_token_set_precision": 0.3356725988627342, "eval_wikibio_token_set_recall": 0.32386358128472925, "eval_wikibio_true_num_tokens": 61.1328125, "step": 92500 }, { "epoch": 17.76, "eval_nq_accuracy": 0.5194375, "eval_nq_bleu_score": 11.440735583902782, "eval_nq_bleu_score_sem": 0.48206147354742257, "eval_nq_emb_cos_sim": 0.8215693235397339, "eval_nq_emb_cos_sim_sem": 0.00816039510042736, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2431929111480713, "eval_nq_n_ngrams_match_1": 22.926, "eval_nq_n_ngrams_match_2": 8.31, "eval_nq_n_ngrams_match_3": 3.812, "eval_nq_num_pred_words": 49.48, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.423371291639443, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4424263255194434, "eval_nq_runtime": 10.0777, "eval_nq_samples_per_second": 49.614, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.457561549292863, "eval_nq_token_set_f1_sem": 0.005089719355175184, "eval_nq_token_set_precision": 0.4140746048219013, "eval_nq_token_set_recall": 0.5210391169790486, "eval_nq_true_num_tokens": 64.0, "step": 92500 }, { "epoch": 17.76, "learning_rate": 0.001, "loss": 2.6206, "step": 92508 }, { "epoch": 17.76, "learning_rate": 0.001, "loss": 2.6175, "step": 92520 }, { "epoch": 17.77, "learning_rate": 0.001, "loss": 2.6143, "step": 92532 }, { "epoch": 17.77, "learning_rate": 0.001, "loss": 2.6146, "step": 92544 }, { "epoch": 17.77, "learning_rate": 0.001, "loss": 2.6086, "step": 92556 }, { "epoch": 17.77, "learning_rate": 0.001, "loss": 2.6142, "step": 92568 }, { "epoch": 17.78, "learning_rate": 0.001, "loss": 2.62, "step": 92580 }, { "epoch": 17.78, "learning_rate": 0.001, "loss": 2.612, "step": 92592 }, { "epoch": 17.78, "learning_rate": 0.001, "loss": 2.618, "step": 92604 }, { "epoch": 17.78, "learning_rate": 0.001, "loss": 2.6164, "step": 92616 }, { "epoch": 17.79, "learning_rate": 0.001, "loss": 2.625, "step": 92628 }, { "epoch": 17.79, "learning_rate": 0.001, "loss": 2.6163, "step": 92640 }, { "epoch": 17.79, "learning_rate": 0.001, "loss": 2.6151, "step": 92652 }, { "epoch": 17.79, "learning_rate": 0.001, "loss": 2.6236, "step": 92664 }, { "epoch": 17.79, "learning_rate": 0.001, "loss": 2.6201, "step": 92676 }, { "epoch": 17.8, "learning_rate": 0.001, "loss": 2.6246, "step": 92688 }, { "epoch": 17.8, "learning_rate": 0.001, "loss": 2.6206, "step": 92700 }, { "epoch": 17.8, "learning_rate": 0.001, "loss": 2.6025, "step": 92712 }, { "epoch": 17.8, "learning_rate": 0.001, "loss": 2.617, "step": 92724 }, { "epoch": 17.81, "learning_rate": 0.001, "loss": 2.62, "step": 92736 }, { "epoch": 17.81, "learning_rate": 0.001, "loss": 2.618, "step": 92748 }, { "epoch": 17.81, "learning_rate": 0.001, "loss": 2.617, "step": 92760 }, { "epoch": 17.81, "learning_rate": 0.001, "loss": 2.6165, "step": 92772 }, { "epoch": 17.82, "learning_rate": 0.001, "loss": 2.6093, "step": 92784 }, { "epoch": 17.82, "learning_rate": 0.001, "loss": 2.6139, "step": 92796 }, { "epoch": 17.82, "learning_rate": 0.001, "loss": 2.607, "step": 92808 }, { "epoch": 17.82, "learning_rate": 0.001, "loss": 2.6267, "step": 92820 }, { "epoch": 17.82, "learning_rate": 0.001, "loss": 2.6203, "step": 92832 }, { "epoch": 17.83, "learning_rate": 0.001, "loss": 2.618, "step": 92844 }, { "epoch": 17.83, "learning_rate": 0.001, "loss": 2.6199, "step": 92856 }, { "epoch": 17.83, "learning_rate": 0.001, "loss": 2.6177, "step": 92868 }, { "epoch": 17.83, "learning_rate": 0.001, "loss": 2.6215, "step": 92880 }, { "epoch": 17.84, "learning_rate": 0.001, "loss": 2.6193, "step": 92892 }, { "epoch": 17.84, "learning_rate": 0.001, "loss": 2.627, "step": 92904 }, { "epoch": 17.84, "learning_rate": 0.001, "loss": 2.6171, "step": 92916 }, { "epoch": 17.84, "learning_rate": 0.001, "loss": 2.6184, "step": 92928 }, { "epoch": 17.85, "learning_rate": 0.001, "loss": 2.6226, "step": 92940 }, { "epoch": 17.85, "learning_rate": 0.001, "loss": 2.6156, "step": 92952 }, { "epoch": 17.85, "learning_rate": 0.001, "loss": 2.6134, "step": 92964 }, { "epoch": 17.85, "learning_rate": 0.001, "loss": 2.6086, "step": 92976 }, { "epoch": 17.85, "learning_rate": 0.001, "loss": 2.6049, "step": 92988 }, { "epoch": 17.86, "learning_rate": 0.001, "loss": 2.6202, "step": 93000 }, { "epoch": 17.86, "learning_rate": 0.001, "loss": 2.6233, "step": 93012 }, { "epoch": 17.86, "learning_rate": 0.001, "loss": 2.6227, "step": 93024 }, { "epoch": 17.86, "learning_rate": 0.001, "loss": 2.6335, "step": 93036 }, { "epoch": 17.87, "learning_rate": 0.001, "loss": 2.6117, "step": 93048 }, { "epoch": 17.87, "learning_rate": 0.001, "loss": 2.6058, "step": 93060 }, { "epoch": 17.87, "learning_rate": 0.001, "loss": 2.6109, "step": 93072 }, { "epoch": 17.87, "learning_rate": 0.001, "loss": 2.615, "step": 93084 }, { "epoch": 17.88, "learning_rate": 0.001, "loss": 2.614, "step": 93096 }, { "epoch": 17.88, "learning_rate": 0.001, "loss": 2.6219, "step": 93108 }, { "epoch": 17.88, "learning_rate": 0.001, "loss": 2.6199, "step": 93120 }, { "epoch": 17.88, "eval_ag_news_accuracy": 0.31634375, "eval_ag_news_bleu_score": 4.837314789298008, "eval_ag_news_bleu_score_sem": 0.158196908235372, "eval_ag_news_emb_cos_sim": 0.7934457063674927, "eval_ag_news_emb_cos_sim_sem": 0.008294545226514996, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.62282133102417, "eval_ag_news_n_ngrams_match_1": 13.692, "eval_ag_news_n_ngrams_match_2": 3.03, "eval_ag_news_n_ngrams_match_3": 0.91, "eval_ag_news_num_pred_words": 46.386, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.44305820218369, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34048091626799803, "eval_ag_news_runtime": 11.2359, "eval_ag_news_samples_per_second": 44.5, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.3415275626137268, "eval_ag_news_token_set_f1_sem": 0.004491170834461802, "eval_ag_news_token_set_precision": 0.32531268967090105, "eval_ag_news_token_set_recall": 0.37416549819451767, "eval_ag_news_true_num_tokens": 56.09375, "step": 93125 }, { "epoch": 17.88, "eval_anthropic_toxic_prompts_accuracy": 0.111375, "eval_anthropic_toxic_prompts_bleu_score": 2.9985999175076103, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11096133851322557, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.670669436454773, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009866299989162431, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2762463092803955, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.208, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.826, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.662, "eval_anthropic_toxic_prompts_num_pred_words": 47.19, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.476202463374108, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21131656501208573, "eval_anthropic_toxic_prompts_runtime": 14.6574, "eval_anthropic_toxic_prompts_samples_per_second": 34.112, "eval_anthropic_toxic_prompts_steps_per_second": 0.068, "eval_anthropic_toxic_prompts_token_set_f1": 0.3548089564822094, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00666789748832613, "eval_anthropic_toxic_prompts_token_set_precision": 0.43700918760625473, "eval_anthropic_toxic_prompts_token_set_recall": 0.3255814772326906, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 93125 }, { "epoch": 17.88, "eval_arxiv_accuracy": 0.3411875, "eval_arxiv_bleu_score": 4.19622171537826, "eval_arxiv_bleu_score_sem": 0.12138220514264243, "eval_arxiv_emb_cos_sim": 0.7518219947814941, "eval_arxiv_emb_cos_sim_sem": 0.008517964353382229, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4892160892486572, "eval_arxiv_n_ngrams_match_1": 14.716, "eval_arxiv_n_ngrams_match_2": 2.828, "eval_arxiv_n_ngrams_match_3": 0.64, "eval_arxiv_num_pred_words": 41.174, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.76025652044602, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34704146581365936, "eval_arxiv_runtime": 10.5427, "eval_arxiv_samples_per_second": 47.426, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3422334342223793, "eval_arxiv_token_set_f1_sem": 0.004257148718547928, "eval_arxiv_token_set_precision": 0.2932820736842361, "eval_arxiv_token_set_recall": 0.4266267073825341, "eval_arxiv_true_num_tokens": 64.0, "step": 93125 }, { "epoch": 17.88, "eval_python_code_alpaca_accuracy": 0.15846875, "eval_python_code_alpaca_bleu_score": 4.301077048283588, "eval_python_code_alpaca_bleu_score_sem": 0.14803714003648313, "eval_python_code_alpaca_emb_cos_sim": 0.732406497001648, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010670660080647192, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.952526569366455, "eval_python_code_alpaca_n_ngrams_match_1": 9.51, "eval_python_code_alpaca_n_ngrams_match_2": 2.684, "eval_python_code_alpaca_n_ngrams_match_3": 0.83, "eval_python_code_alpaca_num_pred_words": 43.302, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.154287278960982, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31905578682799896, "eval_python_code_alpaca_runtime": 9.8969, "eval_python_code_alpaca_samples_per_second": 50.521, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.45926936281659464, "eval_python_code_alpaca_token_set_f1_sem": 0.005632772764396882, "eval_python_code_alpaca_token_set_precision": 0.5169281572811251, "eval_python_code_alpaca_token_set_recall": 0.43414018076482214, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 93125 }, { "epoch": 17.88, "eval_wikibio_accuracy": 0.31653125, "eval_wikibio_bleu_score": 5.873538809761199, "eval_wikibio_bleu_score_sem": 0.19959044635202375, "eval_wikibio_emb_cos_sim": 0.7330521941184998, "eval_wikibio_emb_cos_sim_sem": 0.009765970733371264, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8268773555755615, "eval_wikibio_n_ngrams_match_1": 10.268, "eval_wikibio_n_ngrams_match_2": 3.394, "eval_wikibio_n_ngrams_match_3": 1.24, "eval_wikibio_num_pred_words": 37.502, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.918925648238435, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35415308537137075, "eval_wikibio_runtime": 10.4693, "eval_wikibio_samples_per_second": 47.759, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.3200020451912247, "eval_wikibio_token_set_f1_sem": 0.005271703323204975, "eval_wikibio_token_set_precision": 0.33303423938450916, "eval_wikibio_token_set_recall": 0.3221129450327614, "eval_wikibio_true_num_tokens": 61.1328125, "step": 93125 }, { "epoch": 17.88, "eval_nq_accuracy": 0.52028125, "eval_nq_bleu_score": 11.282592584182439, "eval_nq_bleu_score_sem": 0.4686162741904307, "eval_nq_emb_cos_sim": 0.8212836980819702, "eval_nq_emb_cos_sim_sem": 0.007685375751562789, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2405052185058594, "eval_nq_n_ngrams_match_1": 22.62, "eval_nq_n_ngrams_match_2": 8.214, "eval_nq_n_ngrams_match_3": 3.722, "eval_nq_num_pred_words": 49.15, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.39807817124667, "eval_nq_pred_num_tokens": 62.9453125, "eval_nq_rouge_score": 0.4364852563185425, "eval_nq_runtime": 10.808, "eval_nq_samples_per_second": 46.262, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.4522558518762581, "eval_nq_token_set_f1_sem": 0.004922945423360847, "eval_nq_token_set_precision": 0.40761799053898984, "eval_nq_token_set_recall": 0.5183607239670648, "eval_nq_true_num_tokens": 64.0, "step": 93125 }, { "epoch": 17.88, "learning_rate": 0.001, "loss": 2.6237, "step": 93132 }, { "epoch": 17.88, "learning_rate": 0.001, "loss": 2.6273, "step": 93144 }, { "epoch": 17.89, "learning_rate": 0.001, "loss": 2.6215, "step": 93156 }, { "epoch": 17.89, "learning_rate": 0.001, "loss": 2.6119, "step": 93168 }, { "epoch": 17.89, "learning_rate": 0.001, "loss": 2.6199, "step": 93180 }, { "epoch": 17.89, "learning_rate": 0.001, "loss": 2.6234, "step": 93192 }, { "epoch": 17.9, "learning_rate": 0.001, "loss": 2.6257, "step": 93204 }, { "epoch": 17.9, "learning_rate": 0.001, "loss": 2.6225, "step": 93216 }, { "epoch": 17.9, "learning_rate": 0.001, "loss": 2.6186, "step": 93228 }, { "epoch": 17.9, "learning_rate": 0.001, "loss": 2.6217, "step": 93240 }, { "epoch": 17.91, "learning_rate": 0.001, "loss": 2.6167, "step": 93252 }, { "epoch": 17.91, "learning_rate": 0.001, "loss": 2.6303, "step": 93264 }, { "epoch": 17.91, "learning_rate": 0.001, "loss": 2.6195, "step": 93276 }, { "epoch": 17.91, "learning_rate": 0.001, "loss": 2.6265, "step": 93288 }, { "epoch": 17.91, "learning_rate": 0.001, "loss": 2.6109, "step": 93300 }, { "epoch": 17.92, "learning_rate": 0.001, "loss": 2.6246, "step": 93312 }, { "epoch": 17.92, "learning_rate": 0.001, "loss": 2.629, "step": 93324 }, { "epoch": 17.92, "learning_rate": 0.001, "loss": 2.61, "step": 93336 }, { "epoch": 17.92, "learning_rate": 0.001, "loss": 2.6157, "step": 93348 }, { "epoch": 17.93, "learning_rate": 0.001, "loss": 2.6135, "step": 93360 }, { "epoch": 17.93, "learning_rate": 0.001, "loss": 2.6127, "step": 93372 }, { "epoch": 17.93, "learning_rate": 0.001, "loss": 2.6211, "step": 93384 }, { "epoch": 17.93, "learning_rate": 0.001, "loss": 2.629, "step": 93396 }, { "epoch": 17.94, "learning_rate": 0.001, "loss": 2.6163, "step": 93408 }, { "epoch": 17.94, "learning_rate": 0.001, "loss": 2.6233, "step": 93420 }, { "epoch": 17.94, "learning_rate": 0.001, "loss": 2.6155, "step": 93432 }, { "epoch": 17.94, "learning_rate": 0.001, "loss": 2.6162, "step": 93444 }, { "epoch": 17.94, "learning_rate": 0.001, "loss": 2.6221, "step": 93456 }, { "epoch": 17.95, "learning_rate": 0.001, "loss": 2.6153, "step": 93468 }, { "epoch": 17.95, "learning_rate": 0.001, "loss": 2.6181, "step": 93480 }, { "epoch": 17.95, "learning_rate": 0.001, "loss": 2.6173, "step": 93492 }, { "epoch": 17.95, "learning_rate": 0.001, "loss": 2.6198, "step": 93504 }, { "epoch": 17.96, "learning_rate": 0.001, "loss": 2.6162, "step": 93516 }, { "epoch": 17.96, "learning_rate": 0.001, "loss": 2.6087, "step": 93528 }, { "epoch": 17.96, "learning_rate": 0.001, "loss": 2.6197, "step": 93540 }, { "epoch": 17.96, "learning_rate": 0.001, "loss": 2.6284, "step": 93552 }, { "epoch": 17.97, "learning_rate": 0.001, "loss": 2.6104, "step": 93564 }, { "epoch": 17.97, "learning_rate": 0.001, "loss": 2.6175, "step": 93576 }, { "epoch": 17.97, "learning_rate": 0.001, "loss": 2.6229, "step": 93588 }, { "epoch": 17.97, "learning_rate": 0.001, "loss": 2.619, "step": 93600 }, { "epoch": 17.97, "learning_rate": 0.001, "loss": 2.6118, "step": 93612 }, { "epoch": 17.98, "learning_rate": 0.001, "loss": 2.6205, "step": 93624 }, { "epoch": 17.98, "learning_rate": 0.001, "loss": 2.6155, "step": 93636 }, { "epoch": 17.98, "learning_rate": 0.001, "loss": 2.6202, "step": 93648 }, { "epoch": 17.98, "learning_rate": 0.001, "loss": 2.6176, "step": 93660 }, { "epoch": 17.99, "learning_rate": 0.001, "loss": 2.6097, "step": 93672 }, { "epoch": 17.99, "learning_rate": 0.001, "loss": 2.6208, "step": 93684 }, { "epoch": 17.99, "learning_rate": 0.001, "loss": 2.6108, "step": 93696 }, { "epoch": 17.99, "learning_rate": 0.001, "loss": 2.6232, "step": 93708 }, { "epoch": 18.0, "learning_rate": 0.001, "loss": 2.6141, "step": 93720 }, { "epoch": 18.0, "learning_rate": 0.001, "loss": 2.6085, "step": 93732 }, { "epoch": 18.0, "learning_rate": 0.001, "loss": 2.6281, "step": 93744 }, { "epoch": 18.0, "eval_ag_news_accuracy": 0.3144375, "eval_ag_news_bleu_score": 4.574494344549086, "eval_ag_news_bleu_score_sem": 0.1435212392138452, "eval_ag_news_emb_cos_sim": 0.793586790561676, "eval_ag_news_emb_cos_sim_sem": 0.007458764000920855, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6420984268188477, "eval_ag_news_n_ngrams_match_1": 13.376, "eval_ag_news_n_ngrams_match_2": 2.882, "eval_ag_news_n_ngrams_match_3": 0.798, "eval_ag_news_num_pred_words": 46.32, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 38.17185358256012, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33207449811473866, "eval_ag_news_runtime": 11.2693, "eval_ag_news_samples_per_second": 44.368, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.3363561703668599, "eval_ag_news_token_set_f1_sem": 0.004340804453614188, "eval_ag_news_token_set_precision": 0.3172241798990339, "eval_ag_news_token_set_recall": 0.37277672892563407, "eval_ag_news_true_num_tokens": 56.09375, "step": 93750 }, { "epoch": 18.0, "eval_anthropic_toxic_prompts_accuracy": 0.1110625, "eval_anthropic_toxic_prompts_bleu_score": 3.103489918119735, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11824354450817112, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6579990386962891, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009372159800508583, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2885220050811768, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.088, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.894, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, "eval_anthropic_toxic_prompts_num_pred_words": 47.064, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.803219344071017, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21089693352220318, "eval_anthropic_toxic_prompts_runtime": 10.5303, "eval_anthropic_toxic_prompts_samples_per_second": 47.482, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.3536362828815651, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006643735997345817, "eval_anthropic_toxic_prompts_token_set_precision": 0.4300869633408589, "eval_anthropic_toxic_prompts_token_set_recall": 0.3266644538346673, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 93750 }, { "epoch": 18.0, "eval_arxiv_accuracy": 0.3426875, "eval_arxiv_bleu_score": 4.077023537311002, "eval_arxiv_bleu_score_sem": 0.10955470998965908, "eval_arxiv_emb_cos_sim": 0.7496046423912048, "eval_arxiv_emb_cos_sim_sem": 0.00712119540321793, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4744839668273926, "eval_arxiv_n_ngrams_match_1": 14.696, "eval_arxiv_n_ngrams_match_2": 2.752, "eval_arxiv_n_ngrams_match_3": 0.566, "eval_arxiv_num_pred_words": 40.868, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.281166085241175, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34951689833591937, "eval_arxiv_runtime": 11.7101, "eval_arxiv_samples_per_second": 42.698, "eval_arxiv_steps_per_second": 0.085, "eval_arxiv_token_set_f1": 0.34436050614848424, "eval_arxiv_token_set_f1_sem": 0.004123611640607539, "eval_arxiv_token_set_precision": 0.2943533131553631, "eval_arxiv_token_set_recall": 0.4335014152226167, "eval_arxiv_true_num_tokens": 64.0, "step": 93750 }, { "epoch": 18.0, "eval_python_code_alpaca_accuracy": 0.15496875, "eval_python_code_alpaca_bleu_score": 4.313322415328112, "eval_python_code_alpaca_bleu_score_sem": 0.13878868386029222, "eval_python_code_alpaca_emb_cos_sim": 0.7393602132797241, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010233132329166585, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.95182466506958, "eval_python_code_alpaca_n_ngrams_match_1": 9.572, "eval_python_code_alpaca_n_ngrams_match_2": 2.678, "eval_python_code_alpaca_n_ngrams_match_3": 0.858, "eval_python_code_alpaca_num_pred_words": 43.4, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.140847519680477, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32137747886032575, "eval_python_code_alpaca_runtime": 10.6162, "eval_python_code_alpaca_samples_per_second": 47.098, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.4648644059294784, "eval_python_code_alpaca_token_set_f1_sem": 0.005675868936735658, "eval_python_code_alpaca_token_set_precision": 0.5258356326515998, "eval_python_code_alpaca_token_set_recall": 0.4371973371560503, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 93750 }, { "epoch": 18.0, "eval_wikibio_accuracy": 0.317875, "eval_wikibio_bleu_score": 5.756219311939541, "eval_wikibio_bleu_score_sem": 0.20848427871141167, "eval_wikibio_emb_cos_sim": 0.7319413423538208, "eval_wikibio_emb_cos_sim_sem": 0.009697173905711325, "eval_wikibio_emb_top1_equal": 0.1328125, "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7880618572235107, "eval_wikibio_n_ngrams_match_1": 9.79, "eval_wikibio_n_ngrams_match_2": 3.26, "eval_wikibio_n_ngrams_match_3": 1.194, "eval_wikibio_num_pred_words": 36.05, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.17070812211405, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3384459607199994, "eval_wikibio_runtime": 11.2505, "eval_wikibio_samples_per_second": 44.443, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.3108921579174648, "eval_wikibio_token_set_f1_sem": 0.0056522056164034756, "eval_wikibio_token_set_precision": 0.3179812734568046, "eval_wikibio_token_set_recall": 0.3198856093096847, "eval_wikibio_true_num_tokens": 61.1328125, "step": 93750 }, { "epoch": 18.0, "eval_nq_accuracy": 0.51978125, "eval_nq_bleu_score": 11.54615205377183, "eval_nq_bleu_score_sem": 0.4849643492232392, "eval_nq_emb_cos_sim": 0.8235622048377991, "eval_nq_emb_cos_sim_sem": 0.008237769470303774, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.240114212036133, "eval_nq_n_ngrams_match_1": 22.596, "eval_nq_n_ngrams_match_2": 8.252, "eval_nq_n_ngrams_match_3": 3.846, "eval_nq_num_pred_words": 48.882, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.394404180202658, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4356193735828876, "eval_nq_runtime": 11.7636, "eval_nq_samples_per_second": 42.504, "eval_nq_steps_per_second": 0.085, "eval_nq_token_set_f1": 0.4528296441176887, "eval_nq_token_set_f1_sem": 0.004923525097325228, "eval_nq_token_set_precision": 0.4080762323221383, "eval_nq_token_set_recall": 0.5193516682844476, "eval_nq_true_num_tokens": 64.0, "step": 93750 }, { "epoch": 18.0, "learning_rate": 0.001, "loss": 2.6004, "step": 93756 }, { "epoch": 18.0, "learning_rate": 0.001, "loss": 2.5943, "step": 93768 }, { "epoch": 18.01, "learning_rate": 0.001, "loss": 2.6037, "step": 93780 }, { "epoch": 18.01, "learning_rate": 0.001, "loss": 2.6072, "step": 93792 }, { "epoch": 18.01, "learning_rate": 0.001, "loss": 2.5967, "step": 93804 }, { "epoch": 18.01, "learning_rate": 0.001, "loss": 2.5966, "step": 93816 }, { "epoch": 18.02, "learning_rate": 0.001, "loss": 2.5948, "step": 93828 }, { "epoch": 18.02, "learning_rate": 0.001, "loss": 2.5989, "step": 93840 }, { "epoch": 18.02, "learning_rate": 0.001, "loss": 2.5943, "step": 93852 }, { "epoch": 18.02, "learning_rate": 0.001, "loss": 2.5957, "step": 93864 }, { "epoch": 18.03, "learning_rate": 0.001, "loss": 2.6068, "step": 93876 }, { "epoch": 18.03, "learning_rate": 0.001, "loss": 2.6083, "step": 93888 }, { "epoch": 18.03, "learning_rate": 0.001, "loss": 2.6072, "step": 93900 }, { "epoch": 18.03, "learning_rate": 0.001, "loss": 2.6034, "step": 93912 }, { "epoch": 18.03, "learning_rate": 0.001, "loss": 2.6041, "step": 93924 }, { "epoch": 18.04, "learning_rate": 0.001, "loss": 2.6046, "step": 93936 }, { "epoch": 18.04, "learning_rate": 0.001, "loss": 2.5969, "step": 93948 }, { "epoch": 18.04, "learning_rate": 0.001, "loss": 2.6026, "step": 93960 }, { "epoch": 18.04, "learning_rate": 0.001, "loss": 2.6032, "step": 93972 }, { "epoch": 18.05, "learning_rate": 0.001, "loss": 2.6057, "step": 93984 }, { "epoch": 18.05, "learning_rate": 0.001, "loss": 2.5933, "step": 93996 }, { "epoch": 18.05, "learning_rate": 0.001, "loss": 2.6049, "step": 94008 }, { "epoch": 18.05, "learning_rate": 0.001, "loss": 2.6041, "step": 94020 }, { "epoch": 18.06, "learning_rate": 0.001, "loss": 2.5942, "step": 94032 }, { "epoch": 18.06, "learning_rate": 0.001, "loss": 2.5981, "step": 94044 }, { "epoch": 18.06, "learning_rate": 0.001, "loss": 2.5913, "step": 94056 }, { "epoch": 18.06, "learning_rate": 0.001, "loss": 2.6033, "step": 94068 }, { "epoch": 18.06, "learning_rate": 0.001, "loss": 2.6011, "step": 94080 }, { "epoch": 18.07, "learning_rate": 0.001, "loss": 2.6016, "step": 94092 }, { "epoch": 18.07, "learning_rate": 0.001, "loss": 2.602, "step": 94104 }, { "epoch": 18.07, "learning_rate": 0.001, "loss": 2.6126, "step": 94116 }, { "epoch": 18.07, "learning_rate": 0.001, "loss": 2.6116, "step": 94128 }, { "epoch": 18.08, "learning_rate": 0.001, "loss": 2.6027, "step": 94140 }, { "epoch": 18.08, "learning_rate": 0.001, "loss": 2.6098, "step": 94152 }, { "epoch": 18.08, "learning_rate": 0.001, "loss": 2.5967, "step": 94164 }, { "epoch": 18.08, "learning_rate": 0.001, "loss": 2.6029, "step": 94176 }, { "epoch": 18.09, "learning_rate": 0.001, "loss": 2.5989, "step": 94188 }, { "epoch": 18.09, "learning_rate": 0.001, "loss": 2.5977, "step": 94200 }, { "epoch": 18.09, "learning_rate": 0.001, "loss": 2.6058, "step": 94212 }, { "epoch": 18.09, "learning_rate": 0.001, "loss": 2.6043, "step": 94224 }, { "epoch": 18.09, "learning_rate": 0.001, "loss": 2.6086, "step": 94236 }, { "epoch": 18.1, "learning_rate": 0.001, "loss": 2.6047, "step": 94248 }, { "epoch": 18.1, "learning_rate": 0.001, "loss": 2.612, "step": 94260 }, { "epoch": 18.1, "learning_rate": 0.001, "loss": 2.6017, "step": 94272 }, { "epoch": 18.1, "learning_rate": 0.001, "loss": 2.6039, "step": 94284 }, { "epoch": 18.11, "learning_rate": 0.001, "loss": 2.6159, "step": 94296 }, { "epoch": 18.11, "learning_rate": 0.001, "loss": 2.5975, "step": 94308 }, { "epoch": 18.11, "learning_rate": 0.001, "loss": 2.607, "step": 94320 }, { "epoch": 18.11, "learning_rate": 0.001, "loss": 2.6046, "step": 94332 }, { "epoch": 18.12, "learning_rate": 0.001, "loss": 2.6034, "step": 94344 }, { "epoch": 18.12, "learning_rate": 0.001, "loss": 2.6036, "step": 94356 }, { "epoch": 18.12, "learning_rate": 0.001, "loss": 2.6064, "step": 94368 }, { "epoch": 18.12, "eval_ag_news_accuracy": 0.3145, "eval_ag_news_bleu_score": 4.8065221613972495, "eval_ag_news_bleu_score_sem": 0.15450619368618343, "eval_ag_news_emb_cos_sim": 0.7999658584594727, "eval_ag_news_emb_cos_sim_sem": 0.007387206554683453, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.629249334335327, "eval_ag_news_n_ngrams_match_1": 13.714, "eval_ag_news_n_ngrams_match_2": 3.122, "eval_ag_news_n_ngrams_match_3": 0.9, "eval_ag_news_num_pred_words": 46.654, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.684517523536975, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33747093125143024, "eval_ag_news_runtime": 11.3175, "eval_ag_news_samples_per_second": 44.179, "eval_ag_news_steps_per_second": 0.088, "eval_ag_news_token_set_f1": 0.34560309648479715, "eval_ag_news_token_set_f1_sem": 0.004468138765622358, "eval_ag_news_token_set_precision": 0.32737728981366854, "eval_ag_news_token_set_recall": 0.38181135263852184, "eval_ag_news_true_num_tokens": 56.09375, "step": 94375 }, { "epoch": 18.12, "eval_anthropic_toxic_prompts_accuracy": 0.11028125, "eval_anthropic_toxic_prompts_bleu_score": 2.975410458576226, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11331345214952575, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6517369151115417, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010762586967061009, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.313065767288208, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.89, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.784, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.63, "eval_anthropic_toxic_prompts_num_pred_words": 46.876, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.469210720801932, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20248132834157756, "eval_anthropic_toxic_prompts_runtime": 10.8508, "eval_anthropic_toxic_prompts_samples_per_second": 46.079, "eval_anthropic_toxic_prompts_steps_per_second": 0.092, "eval_anthropic_toxic_prompts_token_set_f1": 0.3423611679932183, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006497291578651429, "eval_anthropic_toxic_prompts_token_set_precision": 0.41437085722812783, "eval_anthropic_toxic_prompts_token_set_recall": 0.3227532348392972, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 94375 }, { "epoch": 18.12, "eval_arxiv_accuracy": 0.34103125, "eval_arxiv_bleu_score": 4.138464325490081, "eval_arxiv_bleu_score_sem": 0.12211145482656759, "eval_arxiv_emb_cos_sim": 0.7511368989944458, "eval_arxiv_emb_cos_sim_sem": 0.008781480938251514, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4760892391204834, "eval_arxiv_n_ngrams_match_1": 14.762, "eval_arxiv_n_ngrams_match_2": 2.748, "eval_arxiv_n_ngrams_match_3": 0.608, "eval_arxiv_num_pred_words": 40.776, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.33302776166574, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35050254989762897, "eval_arxiv_runtime": 11.1117, "eval_arxiv_samples_per_second": 44.998, "eval_arxiv_steps_per_second": 0.09, "eval_arxiv_token_set_f1": 0.34647991406169265, "eval_arxiv_token_set_f1_sem": 0.004276592512614056, "eval_arxiv_token_set_precision": 0.29635678169109236, "eval_arxiv_token_set_recall": 0.43613273544248893, "eval_arxiv_true_num_tokens": 64.0, "step": 94375 }, { "epoch": 18.12, "eval_python_code_alpaca_accuracy": 0.1585625, "eval_python_code_alpaca_bleu_score": 4.331375591198053, "eval_python_code_alpaca_bleu_score_sem": 0.13471041632555336, "eval_python_code_alpaca_emb_cos_sim": 0.7365143299102783, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010550341364860718, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9513700008392334, "eval_python_code_alpaca_n_ngrams_match_1": 9.258, "eval_python_code_alpaca_n_ngrams_match_2": 2.708, "eval_python_code_alpaca_n_ngrams_match_3": 0.874, "eval_python_code_alpaca_num_pred_words": 42.838, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.132146839068767, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3116014285408749, "eval_python_code_alpaca_runtime": 10.5574, "eval_python_code_alpaca_samples_per_second": 47.36, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.4536628685738009, "eval_python_code_alpaca_token_set_f1_sem": 0.005900277468102476, "eval_python_code_alpaca_token_set_precision": 0.5088881956056012, "eval_python_code_alpaca_token_set_recall": 0.4367756972270833, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 94375 }, { "epoch": 18.12, "eval_wikibio_accuracy": 0.3124375, "eval_wikibio_bleu_score": 5.463337977675738, "eval_wikibio_bleu_score_sem": 0.19585233495029952, "eval_wikibio_emb_cos_sim": 0.7196528911590576, "eval_wikibio_emb_cos_sim_sem": 0.009854681288408093, "eval_wikibio_emb_top1_equal": 0.1171875, "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8302905559539795, "eval_wikibio_n_ngrams_match_1": 9.738, "eval_wikibio_n_ngrams_match_2": 3.178, "eval_wikibio_n_ngrams_match_3": 1.102, "eval_wikibio_num_pred_words": 36.014, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.07592392350552, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3391892565673838, "eval_wikibio_runtime": 10.9082, "eval_wikibio_samples_per_second": 45.837, "eval_wikibio_steps_per_second": 0.092, "eval_wikibio_token_set_f1": 0.3073926674740038, "eval_wikibio_token_set_f1_sem": 0.005887596659974254, "eval_wikibio_token_set_precision": 0.3140139764942013, "eval_wikibio_token_set_recall": 0.31815646353670773, "eval_wikibio_true_num_tokens": 61.1328125, "step": 94375 }, { "epoch": 18.12, "eval_nq_accuracy": 0.51971875, "eval_nq_bleu_score": 11.383309887274182, "eval_nq_bleu_score_sem": 0.48382154270143746, "eval_nq_emb_cos_sim": 0.8248996734619141, "eval_nq_emb_cos_sim_sem": 0.007240632986452724, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.243713617324829, "eval_nq_n_ngrams_match_1": 22.716, "eval_nq_n_ngrams_match_2": 8.26, "eval_nq_n_ngrams_match_3": 3.8, "eval_nq_num_pred_words": 49.358, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.428279377001168, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43690430494291854, "eval_nq_runtime": 11.5556, "eval_nq_samples_per_second": 43.269, "eval_nq_steps_per_second": 0.087, "eval_nq_token_set_f1": 0.45260147590760574, "eval_nq_token_set_f1_sem": 0.005032885431939979, "eval_nq_token_set_precision": 0.40921770882235925, "eval_nq_token_set_recall": 0.5149665276817206, "eval_nq_true_num_tokens": 64.0, "step": 94375 }, { "epoch": 18.12, "learning_rate": 0.001, "loss": 2.6184, "step": 94380 }, { "epoch": 18.12, "learning_rate": 0.001, "loss": 2.6144, "step": 94392 }, { "epoch": 18.13, "learning_rate": 0.001, "loss": 2.6055, "step": 94404 }, { "epoch": 18.13, "learning_rate": 0.001, "loss": 2.6063, "step": 94416 }, { "epoch": 18.13, "learning_rate": 0.001, "loss": 2.6035, "step": 94428 }, { "epoch": 18.13, "learning_rate": 0.001, "loss": 2.6109, "step": 94440 }, { "epoch": 18.14, "learning_rate": 0.001, "loss": 2.6147, "step": 94452 }, { "epoch": 18.14, "learning_rate": 0.001, "loss": 2.6091, "step": 94464 }, { "epoch": 18.14, "learning_rate": 0.001, "loss": 2.6069, "step": 94476 }, { "epoch": 18.14, "learning_rate": 0.001, "loss": 2.6091, "step": 94488 }, { "epoch": 18.15, "learning_rate": 0.001, "loss": 2.6069, "step": 94500 }, { "epoch": 18.15, "learning_rate": 0.001, "loss": 2.5967, "step": 94512 }, { "epoch": 18.15, "learning_rate": 0.001, "loss": 2.5919, "step": 94524 }, { "epoch": 18.15, "learning_rate": 0.001, "loss": 2.6039, "step": 94536 }, { "epoch": 18.15, "learning_rate": 0.001, "loss": 2.6032, "step": 94548 }, { "epoch": 18.16, "learning_rate": 0.001, "loss": 2.5983, "step": 94560 }, { "epoch": 18.16, "learning_rate": 0.001, "loss": 2.6032, "step": 94572 }, { "epoch": 18.16, "learning_rate": 0.001, "loss": 2.6019, "step": 94584 }, { "epoch": 18.16, "learning_rate": 0.001, "loss": 2.6008, "step": 94596 }, { "epoch": 18.17, "learning_rate": 0.001, "loss": 2.6038, "step": 94608 }, { "epoch": 18.17, "learning_rate": 0.001, "loss": 2.6119, "step": 94620 }, { "epoch": 18.17, "learning_rate": 0.001, "loss": 2.5952, "step": 94632 }, { "epoch": 18.17, "learning_rate": 0.001, "loss": 2.6052, "step": 94644 }, { "epoch": 18.18, "learning_rate": 0.001, "loss": 2.6077, "step": 94656 }, { "epoch": 18.18, "learning_rate": 0.001, "loss": 2.606, "step": 94668 }, { "epoch": 18.18, "learning_rate": 0.001, "loss": 2.6067, "step": 94680 }, { "epoch": 18.18, "learning_rate": 0.001, "loss": 2.6005, "step": 94692 }, { "epoch": 18.18, "learning_rate": 0.001, "loss": 2.6127, "step": 94704 }, { "epoch": 18.19, "learning_rate": 0.001, "loss": 2.6138, "step": 94716 }, { "epoch": 18.19, "learning_rate": 0.001, "loss": 2.6006, "step": 94728 }, { "epoch": 18.19, "learning_rate": 0.001, "loss": 2.6043, "step": 94740 }, { "epoch": 18.19, "learning_rate": 0.001, "loss": 2.6068, "step": 94752 }, { "epoch": 18.2, "learning_rate": 0.001, "loss": 2.6113, "step": 94764 }, { "epoch": 18.2, "learning_rate": 0.001, "loss": 2.607, "step": 94776 }, { "epoch": 18.2, "learning_rate": 0.001, "loss": 2.6117, "step": 94788 }, { "epoch": 18.2, "learning_rate": 0.001, "loss": 2.5984, "step": 94800 }, { "epoch": 18.21, "learning_rate": 0.001, "loss": 2.6152, "step": 94812 }, { "epoch": 18.21, "learning_rate": 0.001, "loss": 2.6095, "step": 94824 }, { "epoch": 18.21, "learning_rate": 0.001, "loss": 2.5988, "step": 94836 }, { "epoch": 18.21, "learning_rate": 0.001, "loss": 2.6058, "step": 94848 }, { "epoch": 18.21, "learning_rate": 0.001, "loss": 2.612, "step": 94860 }, { "epoch": 18.22, "learning_rate": 0.001, "loss": 2.6034, "step": 94872 }, { "epoch": 18.22, "learning_rate": 0.001, "loss": 2.6033, "step": 94884 }, { "epoch": 18.22, "learning_rate": 0.001, "loss": 2.6052, "step": 94896 }, { "epoch": 18.22, "learning_rate": 0.001, "loss": 2.6066, "step": 94908 }, { "epoch": 18.23, "learning_rate": 0.001, "loss": 2.6096, "step": 94920 }, { "epoch": 18.23, "learning_rate": 0.001, "loss": 2.6054, "step": 94932 }, { "epoch": 18.23, "learning_rate": 0.001, "loss": 2.6092, "step": 94944 }, { "epoch": 18.23, "learning_rate": 0.001, "loss": 2.6025, "step": 94956 }, { "epoch": 18.24, "learning_rate": 0.001, "loss": 2.6085, "step": 94968 }, { "epoch": 18.24, "learning_rate": 0.001, "loss": 2.6029, "step": 94980 }, { "epoch": 18.24, "learning_rate": 0.001, "loss": 2.6043, "step": 94992 }, { "epoch": 18.24, "eval_ag_news_accuracy": 0.3154375, "eval_ag_news_bleu_score": 4.783146663537894, "eval_ag_news_bleu_score_sem": 0.15633000150700754, "eval_ag_news_emb_cos_sim": 0.7934670448303223, "eval_ag_news_emb_cos_sim_sem": 0.008029166936321644, "eval_ag_news_emb_top1_equal": 0.1953125, "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6278457641601562, "eval_ag_news_n_ngrams_match_1": 13.704, "eval_ag_news_n_ngrams_match_2": 2.988, "eval_ag_news_n_ngrams_match_3": 0.884, "eval_ag_news_num_pred_words": 46.606, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.63166176073855, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33781881099991656, "eval_ag_news_runtime": 11.0857, "eval_ag_news_samples_per_second": 45.103, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.34362471604045963, "eval_ag_news_token_set_f1_sem": 0.004504580187691374, "eval_ag_news_token_set_precision": 0.32633752948756295, "eval_ag_news_token_set_recall": 0.37860933095874616, "eval_ag_news_true_num_tokens": 56.09375, "step": 95000 }, { "epoch": 18.24, "eval_anthropic_toxic_prompts_accuracy": 0.1110625, "eval_anthropic_toxic_prompts_bleu_score": 2.8900259276062106, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1136314521746121, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6522161960601807, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01041171906301937, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.28578782081604, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.004, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.788, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.626, "eval_anthropic_toxic_prompts_num_pred_words": 47.608, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.73003449950339, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2038527069402539, "eval_anthropic_toxic_prompts_runtime": 10.5468, "eval_anthropic_toxic_prompts_samples_per_second": 47.408, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.3549872699450282, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065296038233517645, "eval_anthropic_toxic_prompts_token_set_precision": 0.42315681055129517, "eval_anthropic_toxic_prompts_token_set_recall": 0.3393227364247595, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 95000 }, { "epoch": 18.24, "eval_arxiv_accuracy": 0.34046875, "eval_arxiv_bleu_score": 4.205228001935656, "eval_arxiv_bleu_score_sem": 0.115815289133178, "eval_arxiv_emb_cos_sim": 0.7570281624794006, "eval_arxiv_emb_cos_sim_sem": 0.007559703727470923, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.479388952255249, "eval_arxiv_n_ngrams_match_1": 14.94, "eval_arxiv_n_ngrams_match_2": 2.88, "eval_arxiv_n_ngrams_match_3": 0.618, "eval_arxiv_num_pred_words": 40.93, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.4398936945549, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3540739966092163, "eval_arxiv_runtime": 10.9365, "eval_arxiv_samples_per_second": 45.718, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.34910766433947127, "eval_arxiv_token_set_f1_sem": 0.004090998537275445, "eval_arxiv_token_set_precision": 0.29942840317882663, "eval_arxiv_token_set_recall": 0.43679318830735475, "eval_arxiv_true_num_tokens": 64.0, "step": 95000 }, { "epoch": 18.24, "eval_python_code_alpaca_accuracy": 0.15625, "eval_python_code_alpaca_bleu_score": 4.3538373229199445, "eval_python_code_alpaca_bleu_score_sem": 0.1407386582802524, "eval_python_code_alpaca_emb_cos_sim": 0.7455194592475891, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009571122559982862, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9710068702697754, "eval_python_code_alpaca_n_ngrams_match_1": 9.658, "eval_python_code_alpaca_n_ngrams_match_2": 2.682, "eval_python_code_alpaca_n_ngrams_match_3": 0.836, "eval_python_code_alpaca_num_pred_words": 43.674, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.51155531402468, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3226625391226031, "eval_python_code_alpaca_runtime": 10.4435, "eval_python_code_alpaca_samples_per_second": 47.877, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.4712154631934445, "eval_python_code_alpaca_token_set_f1_sem": 0.005424239542164947, "eval_python_code_alpaca_token_set_precision": 0.5265152919415134, "eval_python_code_alpaca_token_set_recall": 0.45126111251285717, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 95000 }, { "epoch": 18.24, "eval_wikibio_accuracy": 0.31821875, "eval_wikibio_bleu_score": 5.486517132495208, "eval_wikibio_bleu_score_sem": 0.20212476746254804, "eval_wikibio_emb_cos_sim": 0.7030634880065918, "eval_wikibio_emb_cos_sim_sem": 0.012838128936859731, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7831408977508545, "eval_wikibio_n_ngrams_match_1": 9.25, "eval_wikibio_n_ngrams_match_2": 3.1, "eval_wikibio_n_ngrams_match_3": 1.096, "eval_wikibio_num_pred_words": 34.852, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.95387979682376, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3253598902247037, "eval_wikibio_runtime": 10.4965, "eval_wikibio_samples_per_second": 47.635, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.300403327658984, "eval_wikibio_token_set_f1_sem": 0.005999880441165681, "eval_wikibio_token_set_precision": 0.30070859794356924, "eval_wikibio_token_set_recall": 0.32042129537599146, "eval_wikibio_true_num_tokens": 61.1328125, "step": 95000 }, { "epoch": 18.24, "eval_nq_accuracy": 0.51909375, "eval_nq_bleu_score": 11.440870655561664, "eval_nq_bleu_score_sem": 0.49320907865324376, "eval_nq_emb_cos_sim": 0.8287546038627625, "eval_nq_emb_cos_sim_sem": 0.006802683766487008, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2402915954589844, "eval_nq_n_ngrams_match_1": 22.792, "eval_nq_n_ngrams_match_2": 8.188, "eval_nq_n_ngrams_match_3": 3.812, "eval_nq_num_pred_words": 48.918, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.396070739577427, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4416670845091659, "eval_nq_runtime": 12.1546, "eval_nq_samples_per_second": 41.137, "eval_nq_steps_per_second": 0.082, "eval_nq_token_set_f1": 0.4556921188722675, "eval_nq_token_set_f1_sem": 0.005034046263896343, "eval_nq_token_set_precision": 0.41308612085370944, "eval_nq_token_set_recall": 0.516794620460981, "eval_nq_true_num_tokens": 64.0, "step": 95000 }, { "epoch": 18.24, "learning_rate": 0.001, "loss": 2.6112, "step": 95004 }, { "epoch": 18.24, "learning_rate": 0.001, "loss": 2.5988, "step": 95016 }, { "epoch": 18.25, "learning_rate": 0.001, "loss": 2.6152, "step": 95028 }, { "epoch": 18.25, "learning_rate": 0.001, "loss": 2.6205, "step": 95040 }, { "epoch": 18.25, "learning_rate": 0.001, "loss": 2.6002, "step": 95052 }, { "epoch": 18.25, "learning_rate": 0.001, "loss": 2.5957, "step": 95064 }, { "epoch": 18.26, "learning_rate": 0.001, "loss": 2.6038, "step": 95076 }, { "epoch": 18.26, "learning_rate": 0.001, "loss": 2.6009, "step": 95088 }, { "epoch": 18.26, "learning_rate": 0.001, "loss": 2.597, "step": 95100 }, { "epoch": 18.26, "learning_rate": 0.001, "loss": 2.6073, "step": 95112 }, { "epoch": 18.26, "learning_rate": 0.001, "loss": 2.5937, "step": 95124 }, { "epoch": 18.27, "learning_rate": 0.001, "loss": 2.606, "step": 95136 }, { "epoch": 18.27, "learning_rate": 0.001, "loss": 2.6134, "step": 95148 }, { "epoch": 18.27, "learning_rate": 0.001, "loss": 2.6033, "step": 95160 }, { "epoch": 18.27, "learning_rate": 0.001, "loss": 2.6043, "step": 95172 }, { "epoch": 18.28, "learning_rate": 0.001, "loss": 2.61, "step": 95184 }, { "epoch": 18.28, "learning_rate": 0.001, "loss": 2.6113, "step": 95196 }, { "epoch": 18.28, "learning_rate": 0.001, "loss": 2.6035, "step": 95208 }, { "epoch": 18.28, "learning_rate": 0.001, "loss": 2.6044, "step": 95220 }, { "epoch": 18.29, "learning_rate": 0.001, "loss": 2.6115, "step": 95232 }, { "epoch": 18.29, "learning_rate": 0.001, "loss": 2.6202, "step": 95244 }, { "epoch": 18.29, "learning_rate": 0.001, "loss": 2.6102, "step": 95256 }, { "epoch": 18.29, "learning_rate": 0.001, "loss": 2.6039, "step": 95268 }, { "epoch": 18.29, "learning_rate": 0.001, "loss": 2.6029, "step": 95280 }, { "epoch": 18.3, "learning_rate": 0.001, "loss": 2.6149, "step": 95292 }, { "epoch": 18.3, "learning_rate": 0.001, "loss": 2.6131, "step": 95304 }, { "epoch": 18.3, "learning_rate": 0.001, "loss": 2.6151, "step": 95316 }, { "epoch": 18.3, "learning_rate": 0.001, "loss": 2.6, "step": 95328 }, { "epoch": 18.31, "learning_rate": 0.001, "loss": 2.6045, "step": 95340 }, { "epoch": 18.31, "learning_rate": 0.001, "loss": 2.6142, "step": 95352 }, { "epoch": 18.31, "learning_rate": 0.001, "loss": 2.6059, "step": 95364 }, { "epoch": 18.31, "learning_rate": 0.001, "loss": 2.6091, "step": 95376 }, { "epoch": 18.32, "learning_rate": 0.001, "loss": 2.5962, "step": 95388 }, { "epoch": 18.32, "learning_rate": 0.001, "loss": 2.6112, "step": 95400 }, { "epoch": 18.32, "learning_rate": 0.001, "loss": 2.6074, "step": 95412 }, { "epoch": 18.32, "learning_rate": 0.001, "loss": 2.5956, "step": 95424 }, { "epoch": 18.32, "learning_rate": 0.001, "loss": 2.6096, "step": 95436 }, { "epoch": 18.33, "learning_rate": 0.001, "loss": 2.617, "step": 95448 }, { "epoch": 18.33, "learning_rate": 0.001, "loss": 2.6029, "step": 95460 }, { "epoch": 18.33, "learning_rate": 0.001, "loss": 2.617, "step": 95472 }, { "epoch": 18.33, "learning_rate": 0.001, "loss": 2.613, "step": 95484 }, { "epoch": 18.34, "learning_rate": 0.001, "loss": 2.6111, "step": 95496 }, { "epoch": 18.34, "learning_rate": 0.001, "loss": 2.6016, "step": 95508 }, { "epoch": 18.34, "learning_rate": 0.001, "loss": 2.6088, "step": 95520 }, { "epoch": 18.34, "learning_rate": 0.001, "loss": 2.6126, "step": 95532 }, { "epoch": 18.35, "learning_rate": 0.001, "loss": 2.6079, "step": 95544 }, { "epoch": 18.35, "learning_rate": 0.001, "loss": 2.6067, "step": 95556 }, { "epoch": 18.35, "learning_rate": 0.001, "loss": 2.6024, "step": 95568 }, { "epoch": 18.35, "learning_rate": 0.001, "loss": 2.6035, "step": 95580 }, { "epoch": 18.35, "learning_rate": 0.001, "loss": 2.6027, "step": 95592 }, { "epoch": 18.36, "learning_rate": 0.001, "loss": 2.6139, "step": 95604 }, { "epoch": 18.36, "learning_rate": 0.001, "loss": 2.6109, "step": 95616 }, { "epoch": 18.36, "eval_ag_news_accuracy": 0.31484375, "eval_ag_news_bleu_score": 4.6691743342691145, "eval_ag_news_bleu_score_sem": 0.15190602213001148, "eval_ag_news_emb_cos_sim": 0.7999417781829834, "eval_ag_news_emb_cos_sim_sem": 0.007424792982476567, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.621875524520874, "eval_ag_news_n_ngrams_match_1": 13.69, "eval_ag_news_n_ngrams_match_2": 2.924, "eval_ag_news_n_ngrams_match_3": 0.854, "eval_ag_news_num_pred_words": 47.118, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.40766105629688, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33940541760522486, "eval_ag_news_runtime": 13.7506, "eval_ag_news_samples_per_second": 36.362, "eval_ag_news_steps_per_second": 0.073, "eval_ag_news_token_set_f1": 0.3432769915525782, "eval_ag_news_token_set_f1_sem": 0.004322283844567325, "eval_ag_news_token_set_precision": 0.32585468129250833, "eval_ag_news_token_set_recall": 0.3809083381372029, "eval_ag_news_true_num_tokens": 56.09375, "step": 95625 }, { "epoch": 18.36, "eval_anthropic_toxic_prompts_accuracy": 0.11215625, "eval_anthropic_toxic_prompts_bleu_score": 2.908802248532186, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11383107203567008, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6600983738899231, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010381937704734868, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.297804594039917, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.96, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.822, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.638, "eval_anthropic_toxic_prompts_num_pred_words": 47.514, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.053180962636926, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2029485573796751, "eval_anthropic_toxic_prompts_runtime": 12.1911, "eval_anthropic_toxic_prompts_samples_per_second": 41.013, "eval_anthropic_toxic_prompts_steps_per_second": 0.082, "eval_anthropic_toxic_prompts_token_set_f1": 0.34661106786176654, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067618686121811815, "eval_anthropic_toxic_prompts_token_set_precision": 0.4214624193266087, "eval_anthropic_toxic_prompts_token_set_recall": 0.3258114548960384, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 95625 }, { "epoch": 18.36, "eval_arxiv_accuracy": 0.34303125, "eval_arxiv_bleu_score": 4.205971410617861, "eval_arxiv_bleu_score_sem": 0.11394144713960389, "eval_arxiv_emb_cos_sim": 0.7626786231994629, "eval_arxiv_emb_cos_sim_sem": 0.007923761619462946, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4587795734405518, "eval_arxiv_n_ngrams_match_1": 14.976, "eval_arxiv_n_ngrams_match_2": 2.86, "eval_arxiv_n_ngrams_match_3": 0.62, "eval_arxiv_num_pred_words": 41.592, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.778169916606007, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35088619096447243, "eval_arxiv_runtime": 11.8202, "eval_arxiv_samples_per_second": 42.301, "eval_arxiv_steps_per_second": 0.085, "eval_arxiv_token_set_f1": 0.3453943120528482, "eval_arxiv_token_set_f1_sem": 0.0042224657524059335, "eval_arxiv_token_set_precision": 0.2990934165056068, "eval_arxiv_token_set_recall": 0.4227365150516084, "eval_arxiv_true_num_tokens": 64.0, "step": 95625 }, { "epoch": 18.36, "eval_python_code_alpaca_accuracy": 0.15903125, "eval_python_code_alpaca_bleu_score": 4.330235826821296, "eval_python_code_alpaca_bleu_score_sem": 0.13610601802067374, "eval_python_code_alpaca_emb_cos_sim": 0.7526489496231079, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008678642627884932, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9327666759490967, "eval_python_code_alpaca_n_ngrams_match_1": 9.512, "eval_python_code_alpaca_n_ngrams_match_2": 2.702, "eval_python_code_alpaca_n_ngrams_match_3": 0.852, "eval_python_code_alpaca_num_pred_words": 43.332, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.779515522982695, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31768589357022037, "eval_python_code_alpaca_runtime": 10.8573, "eval_python_code_alpaca_samples_per_second": 46.052, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.46245710827433906, "eval_python_code_alpaca_token_set_f1_sem": 0.005804947650448553, "eval_python_code_alpaca_token_set_precision": 0.5227187278007261, "eval_python_code_alpaca_token_set_recall": 0.4397650423476415, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 95625 }, { "epoch": 18.36, "eval_wikibio_accuracy": 0.31603125, "eval_wikibio_bleu_score": 5.542624458993231, "eval_wikibio_bleu_score_sem": 0.20689460327959286, "eval_wikibio_emb_cos_sim": 0.7380982637405396, "eval_wikibio_emb_cos_sim_sem": 0.00941926087935303, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7812247276306152, "eval_wikibio_n_ngrams_match_1": 9.95, "eval_wikibio_n_ngrams_match_2": 3.204, "eval_wikibio_n_ngrams_match_3": 1.114, "eval_wikibio_num_pred_words": 37.102, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.86973732707724, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3423724842590822, "eval_wikibio_runtime": 10.8719, "eval_wikibio_samples_per_second": 45.99, "eval_wikibio_steps_per_second": 0.092, "eval_wikibio_token_set_f1": 0.3123894949471945, "eval_wikibio_token_set_f1_sem": 0.005552994663794415, "eval_wikibio_token_set_precision": 0.3201717083277487, "eval_wikibio_token_set_recall": 0.3240208272144956, "eval_wikibio_true_num_tokens": 61.1328125, "step": 95625 }, { "epoch": 18.36, "eval_nq_accuracy": 0.5205625, "eval_nq_bleu_score": 11.345577577378592, "eval_nq_bleu_score_sem": 0.4706911061260375, "eval_nq_emb_cos_sim": 0.82608962059021, "eval_nq_emb_cos_sim_sem": 0.007153023517833242, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.236826181411743, "eval_nq_n_ngrams_match_1": 22.714, "eval_nq_n_ngrams_match_2": 8.268, "eval_nq_n_ngrams_match_3": 3.744, "eval_nq_num_pred_words": 48.984, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.363565818082948, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43735779080035103, "eval_nq_runtime": 10.9841, "eval_nq_samples_per_second": 45.52, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4541292172230679, "eval_nq_token_set_f1_sem": 0.005104829263973392, "eval_nq_token_set_precision": 0.41178179183125596, "eval_nq_token_set_recall": 0.5140669627958959, "eval_nq_true_num_tokens": 64.0, "step": 95625 }, { "epoch": 18.36, "learning_rate": 0.001, "loss": 2.605, "step": 95628 }, { "epoch": 18.36, "learning_rate": 0.001, "loss": 2.6075, "step": 95640 }, { "epoch": 18.37, "learning_rate": 0.001, "loss": 2.5935, "step": 95652 }, { "epoch": 18.37, "learning_rate": 0.001, "loss": 2.609, "step": 95664 }, { "epoch": 18.37, "learning_rate": 0.001, "loss": 2.6216, "step": 95676 }, { "epoch": 18.37, "learning_rate": 0.001, "loss": 2.6099, "step": 95688 }, { "epoch": 18.38, "learning_rate": 0.001, "loss": 2.6087, "step": 95700 }, { "epoch": 18.38, "learning_rate": 0.001, "loss": 2.6047, "step": 95712 }, { "epoch": 18.38, "learning_rate": 0.001, "loss": 2.6215, "step": 95724 }, { "epoch": 18.38, "learning_rate": 0.001, "loss": 2.6056, "step": 95736 }, { "epoch": 18.38, "learning_rate": 0.001, "loss": 2.6079, "step": 95748 }, { "epoch": 18.39, "learning_rate": 0.001, "loss": 2.6124, "step": 95760 }, { "epoch": 18.39, "learning_rate": 0.001, "loss": 2.6174, "step": 95772 }, { "epoch": 18.39, "learning_rate": 0.001, "loss": 2.6194, "step": 95784 }, { "epoch": 18.39, "learning_rate": 0.001, "loss": 2.6152, "step": 95796 }, { "epoch": 18.4, "learning_rate": 0.001, "loss": 2.611, "step": 95808 }, { "epoch": 18.4, "learning_rate": 0.001, "loss": 2.5986, "step": 95820 }, { "epoch": 18.4, "learning_rate": 0.001, "loss": 2.6141, "step": 95832 }, { "epoch": 18.4, "learning_rate": 0.001, "loss": 2.6113, "step": 95844 }, { "epoch": 18.41, "learning_rate": 0.001, "loss": 2.617, "step": 95856 }, { "epoch": 18.41, "learning_rate": 0.001, "loss": 2.6106, "step": 95868 }, { "epoch": 18.41, "learning_rate": 0.001, "loss": 2.6008, "step": 95880 }, { "epoch": 18.41, "learning_rate": 0.001, "loss": 2.6066, "step": 95892 }, { "epoch": 18.41, "learning_rate": 0.001, "loss": 2.5966, "step": 95904 }, { "epoch": 18.42, "learning_rate": 0.001, "loss": 2.6114, "step": 95916 }, { "epoch": 18.42, "learning_rate": 0.001, "loss": 2.614, "step": 95928 }, { "epoch": 18.42, "learning_rate": 0.001, "loss": 2.6143, "step": 95940 }, { "epoch": 18.42, "learning_rate": 0.001, "loss": 2.6024, "step": 95952 }, { "epoch": 18.43, "learning_rate": 0.001, "loss": 2.6139, "step": 95964 }, { "epoch": 18.43, "learning_rate": 0.001, "loss": 2.6074, "step": 95976 }, { "epoch": 18.43, "learning_rate": 0.001, "loss": 2.6075, "step": 95988 }, { "epoch": 18.43, "learning_rate": 0.001, "loss": 2.6061, "step": 96000 }, { "epoch": 18.44, "learning_rate": 0.001, "loss": 2.6003, "step": 96012 }, { "epoch": 18.44, "learning_rate": 0.001, "loss": 2.6047, "step": 96024 }, { "epoch": 18.44, "learning_rate": 0.001, "loss": 2.6124, "step": 96036 }, { "epoch": 18.44, "learning_rate": 0.001, "loss": 2.5965, "step": 96048 }, { "epoch": 18.44, "learning_rate": 0.001, "loss": 2.6062, "step": 96060 }, { "epoch": 18.45, "learning_rate": 0.001, "loss": 2.6042, "step": 96072 }, { "epoch": 18.45, "learning_rate": 0.001, "loss": 2.612, "step": 96084 }, { "epoch": 18.45, "learning_rate": 0.001, "loss": 2.6099, "step": 96096 }, { "epoch": 18.45, "learning_rate": 0.001, "loss": 2.6141, "step": 96108 }, { "epoch": 18.46, "learning_rate": 0.001, "loss": 2.6099, "step": 96120 }, { "epoch": 18.46, "learning_rate": 0.001, "loss": 2.5956, "step": 96132 }, { "epoch": 18.46, "learning_rate": 0.001, "loss": 2.6142, "step": 96144 }, { "epoch": 18.46, "learning_rate": 0.001, "loss": 2.5985, "step": 96156 }, { "epoch": 18.47, "learning_rate": 0.001, "loss": 2.6056, "step": 96168 }, { "epoch": 18.47, "learning_rate": 0.001, "loss": 2.5973, "step": 96180 }, { "epoch": 18.47, "learning_rate": 0.001, "loss": 2.6124, "step": 96192 }, { "epoch": 18.47, "learning_rate": 0.001, "loss": 2.6116, "step": 96204 }, { "epoch": 18.47, "learning_rate": 0.001, "loss": 2.6044, "step": 96216 }, { "epoch": 18.48, "learning_rate": 0.001, "loss": 2.6078, "step": 96228 }, { "epoch": 18.48, "learning_rate": 0.001, "loss": 2.6053, "step": 96240 }, { "epoch": 18.48, "eval_ag_news_accuracy": 0.31578125, "eval_ag_news_bleu_score": 4.742773185263093, "eval_ag_news_bleu_score_sem": 0.14767151838953968, "eval_ag_news_emb_cos_sim": 0.797610878944397, "eval_ag_news_emb_cos_sim_sem": 0.007353679395237545, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.614140510559082, "eval_ag_news_n_ngrams_match_1": 13.82, "eval_ag_news_n_ngrams_match_2": 3.034, "eval_ag_news_n_ngrams_match_3": 0.85, "eval_ag_news_num_pred_words": 46.712, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.11942845443713, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.340488347183705, "eval_ag_news_runtime": 11.1712, "eval_ag_news_samples_per_second": 44.758, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.34519639819405223, "eval_ag_news_token_set_f1_sem": 0.004369930500342174, "eval_ag_news_token_set_precision": 0.3280042408013696, "eval_ag_news_token_set_recall": 0.37912332160173745, "eval_ag_news_true_num_tokens": 56.09375, "step": 96250 }, { "epoch": 18.48, "eval_anthropic_toxic_prompts_accuracy": 0.1115, "eval_anthropic_toxic_prompts_bleu_score": 2.986602460368294, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11548935349472776, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.675180196762085, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009086568803731716, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.291154623031616, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.968, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.78, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.65, "eval_anthropic_toxic_prompts_num_pred_words": 46.616, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.873874944238842, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20837498481236738, "eval_anthropic_toxic_prompts_runtime": 10.5555, "eval_anthropic_toxic_prompts_samples_per_second": 47.369, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.34639541159399195, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064582298334139, "eval_anthropic_toxic_prompts_token_set_precision": 0.4221812706910981, "eval_anthropic_toxic_prompts_token_set_recall": 0.32334515757111487, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 96250 }, { "epoch": 18.48, "eval_arxiv_accuracy": 0.3426875, "eval_arxiv_bleu_score": 4.164852716596905, "eval_arxiv_bleu_score_sem": 0.12307658990951294, "eval_arxiv_emb_cos_sim": 0.7445935010910034, "eval_arxiv_emb_cos_sim_sem": 0.009263523664293722, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.461890935897827, "eval_arxiv_n_ngrams_match_1": 14.508, "eval_arxiv_n_ngrams_match_2": 2.79, "eval_arxiv_n_ngrams_match_3": 0.618, "eval_arxiv_num_pred_words": 39.576, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.877197296594794, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3475244341900761, "eval_arxiv_runtime": 10.6587, "eval_arxiv_samples_per_second": 46.91, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.3401370926019041, "eval_arxiv_token_set_f1_sem": 0.004517323822640119, "eval_arxiv_token_set_precision": 0.29032364400816774, "eval_arxiv_token_set_recall": 0.4309384057692911, "eval_arxiv_true_num_tokens": 64.0, "step": 96250 }, { "epoch": 18.48, "eval_python_code_alpaca_accuracy": 0.15696875, "eval_python_code_alpaca_bleu_score": 4.380251750551483, "eval_python_code_alpaca_bleu_score_sem": 0.1424218613949205, "eval_python_code_alpaca_emb_cos_sim": 0.7421876192092896, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010029880469311815, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.933974504470825, "eval_python_code_alpaca_n_ngrams_match_1": 9.544, "eval_python_code_alpaca_n_ngrams_match_2": 2.69, "eval_python_code_alpaca_n_ngrams_match_3": 0.874, "eval_python_code_alpaca_num_pred_words": 42.79, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.802211661217957, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32092510218041737, "eval_python_code_alpaca_runtime": 10.9966, "eval_python_code_alpaca_samples_per_second": 45.468, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4613884551477792, "eval_python_code_alpaca_token_set_f1_sem": 0.0059963971616164785, "eval_python_code_alpaca_token_set_precision": 0.5199470829741978, "eval_python_code_alpaca_token_set_recall": 0.43742905957189904, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 96250 }, { "epoch": 18.48, "eval_wikibio_accuracy": 0.31628125, "eval_wikibio_bleu_score": 5.921618232308941, "eval_wikibio_bleu_score_sem": 0.21290516918769772, "eval_wikibio_emb_cos_sim": 0.7494040727615356, "eval_wikibio_emb_cos_sim_sem": 0.008387679883556452, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7998430728912354, "eval_wikibio_n_ngrams_match_1": 10.242, "eval_wikibio_n_ngrams_match_2": 3.402, "eval_wikibio_n_ngrams_match_3": 1.224, "eval_wikibio_num_pred_words": 36.68, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.69417021603945, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3592193326205584, "eval_wikibio_runtime": 11.0505, "eval_wikibio_samples_per_second": 45.247, "eval_wikibio_steps_per_second": 0.09, "eval_wikibio_token_set_f1": 0.3231720357029772, "eval_wikibio_token_set_f1_sem": 0.0051847495570598555, "eval_wikibio_token_set_precision": 0.3333367686439334, "eval_wikibio_token_set_recall": 0.32798864990216137, "eval_wikibio_true_num_tokens": 61.1328125, "step": 96250 }, { "epoch": 18.48, "eval_nq_accuracy": 0.52121875, "eval_nq_bleu_score": 11.53860695635088, "eval_nq_bleu_score_sem": 0.47311330166610505, "eval_nq_emb_cos_sim": 0.8242154121398926, "eval_nq_emb_cos_sim_sem": 0.007524452498503116, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.235832929611206, "eval_nq_n_ngrams_match_1": 22.966, "eval_nq_n_ngrams_match_2": 8.292, "eval_nq_n_ngrams_match_3": 3.822, "eval_nq_num_pred_words": 49.316, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.354270056754755, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4425937425243177, "eval_nq_runtime": 10.9955, "eval_nq_samples_per_second": 45.473, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4588045710573252, "eval_nq_token_set_f1_sem": 0.004750594426790293, "eval_nq_token_set_precision": 0.4151368708397352, "eval_nq_token_set_recall": 0.5214414714224554, "eval_nq_true_num_tokens": 64.0, "step": 96250 }, { "epoch": 18.48, "learning_rate": 0.001, "loss": 2.603, "step": 96252 }, { "epoch": 18.48, "learning_rate": 0.001, "loss": 2.6207, "step": 96264 }, { "epoch": 18.49, "learning_rate": 0.001, "loss": 2.5999, "step": 96276 }, { "epoch": 18.49, "learning_rate": 0.001, "loss": 2.5944, "step": 96288 }, { "epoch": 18.49, "learning_rate": 0.001, "loss": 2.6081, "step": 96300 }, { "epoch": 18.49, "learning_rate": 0.001, "loss": 2.6095, "step": 96312 }, { "epoch": 18.5, "learning_rate": 0.001, "loss": 2.6171, "step": 96324 }, { "epoch": 18.5, "learning_rate": 0.001, "loss": 2.6027, "step": 96336 }, { "epoch": 18.5, "learning_rate": 0.001, "loss": 2.6214, "step": 96348 }, { "epoch": 18.5, "learning_rate": 0.001, "loss": 2.6028, "step": 96360 }, { "epoch": 18.5, "learning_rate": 0.001, "loss": 2.6122, "step": 96372 }, { "epoch": 18.51, "learning_rate": 0.001, "loss": 2.6161, "step": 96384 }, { "epoch": 18.51, "learning_rate": 0.001, "loss": 2.609, "step": 96396 }, { "epoch": 18.51, "learning_rate": 0.001, "loss": 2.6114, "step": 96408 }, { "epoch": 18.51, "learning_rate": 0.001, "loss": 2.6113, "step": 96420 }, { "epoch": 18.52, "learning_rate": 0.001, "loss": 2.6072, "step": 96432 }, { "epoch": 18.52, "learning_rate": 0.001, "loss": 2.6151, "step": 96444 }, { "epoch": 18.52, "learning_rate": 0.001, "loss": 2.6153, "step": 96456 }, { "epoch": 18.52, "learning_rate": 0.001, "loss": 2.6078, "step": 96468 }, { "epoch": 18.53, "learning_rate": 0.001, "loss": 2.6103, "step": 96480 }, { "epoch": 18.53, "learning_rate": 0.001, "loss": 2.6051, "step": 96492 }, { "epoch": 18.53, "learning_rate": 0.001, "loss": 2.6097, "step": 96504 }, { "epoch": 18.53, "learning_rate": 0.001, "loss": 2.6099, "step": 96516 }, { "epoch": 18.53, "learning_rate": 0.001, "loss": 2.6046, "step": 96528 }, { "epoch": 18.54, "learning_rate": 0.001, "loss": 2.6159, "step": 96540 }, { "epoch": 18.54, "learning_rate": 0.001, "loss": 2.6068, "step": 96552 }, { "epoch": 18.54, "learning_rate": 0.001, "loss": 2.6094, "step": 96564 }, { "epoch": 18.54, "learning_rate": 0.001, "loss": 2.606, "step": 96576 }, { "epoch": 18.55, "learning_rate": 0.001, "loss": 2.6045, "step": 96588 }, { "epoch": 18.55, "learning_rate": 0.001, "loss": 2.6083, "step": 96600 }, { "epoch": 18.55, "learning_rate": 0.001, "loss": 2.6131, "step": 96612 }, { "epoch": 18.55, "learning_rate": 0.001, "loss": 2.6072, "step": 96624 }, { "epoch": 18.56, "learning_rate": 0.001, "loss": 2.6046, "step": 96636 }, { "epoch": 18.56, "learning_rate": 0.001, "loss": 2.6, "step": 96648 }, { "epoch": 18.56, "learning_rate": 0.001, "loss": 2.6041, "step": 96660 }, { "epoch": 18.56, "learning_rate": 0.001, "loss": 2.6175, "step": 96672 }, { "epoch": 18.56, "learning_rate": 0.001, "loss": 2.6146, "step": 96684 }, { "epoch": 18.57, "learning_rate": 0.001, "loss": 2.6225, "step": 96696 }, { "epoch": 18.57, "learning_rate": 0.001, "loss": 2.6086, "step": 96708 }, { "epoch": 18.57, "learning_rate": 0.001, "loss": 2.5985, "step": 96720 }, { "epoch": 18.57, "learning_rate": 0.001, "loss": 2.6153, "step": 96732 }, { "epoch": 18.58, "learning_rate": 0.001, "loss": 2.6064, "step": 96744 }, { "epoch": 18.58, "learning_rate": 0.001, "loss": 2.6024, "step": 96756 }, { "epoch": 18.58, "learning_rate": 0.001, "loss": 2.6106, "step": 96768 }, { "epoch": 18.58, "learning_rate": 0.001, "loss": 2.6136, "step": 96780 }, { "epoch": 18.59, "learning_rate": 0.001, "loss": 2.6163, "step": 96792 }, { "epoch": 18.59, "learning_rate": 0.001, "loss": 2.6076, "step": 96804 }, { "epoch": 18.59, "learning_rate": 0.001, "loss": 2.6039, "step": 96816 }, { "epoch": 18.59, "learning_rate": 0.001, "loss": 2.6025, "step": 96828 }, { "epoch": 18.59, "learning_rate": 0.001, "loss": 2.6188, "step": 96840 }, { "epoch": 18.6, "learning_rate": 0.001, "loss": 2.6186, "step": 96852 }, { "epoch": 18.6, "learning_rate": 0.001, "loss": 2.6255, "step": 96864 }, { "epoch": 18.6, "eval_ag_news_accuracy": 0.318, "eval_ag_news_bleu_score": 4.64113285841821, "eval_ag_news_bleu_score_sem": 0.14392170725688638, "eval_ag_news_emb_cos_sim": 0.7965982556343079, "eval_ag_news_emb_cos_sim_sem": 0.008046987150707657, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6175808906555176, "eval_ag_news_n_ngrams_match_1": 13.668, "eval_ag_news_n_ngrams_match_2": 2.98, "eval_ag_news_n_ngrams_match_3": 0.836, "eval_ag_news_num_pred_words": 46.512, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.247353326195324, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3365215909701731, "eval_ag_news_runtime": 11.4947, "eval_ag_news_samples_per_second": 43.498, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.3421697712491056, "eval_ag_news_token_set_f1_sem": 0.004293380207232115, "eval_ag_news_token_set_precision": 0.3263349073328334, "eval_ag_news_token_set_recall": 0.37532573581921147, "eval_ag_news_true_num_tokens": 56.09375, "step": 96875 }, { "epoch": 18.6, "eval_anthropic_toxic_prompts_accuracy": 0.11290625, "eval_anthropic_toxic_prompts_bleu_score": 2.908590531022566, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10859501232520109, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6552438735961914, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010150922925512562, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.272285223007202, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.93, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.746, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.602, "eval_anthropic_toxic_prompts_num_pred_words": 47.01, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.371535375766104, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20447244582343949, "eval_anthropic_toxic_prompts_runtime": 10.8011, "eval_anthropic_toxic_prompts_samples_per_second": 46.292, "eval_anthropic_toxic_prompts_steps_per_second": 0.093, "eval_anthropic_toxic_prompts_token_set_f1": 0.3496548281326282, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006531837971644752, "eval_anthropic_toxic_prompts_token_set_precision": 0.4181213031442762, "eval_anthropic_toxic_prompts_token_set_recall": 0.3295576475243872, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 96875 }, { "epoch": 18.6, "eval_arxiv_accuracy": 0.3429375, "eval_arxiv_bleu_score": 4.060446591726533, "eval_arxiv_bleu_score_sem": 0.11535480298229991, "eval_arxiv_emb_cos_sim": 0.7464751601219177, "eval_arxiv_emb_cos_sim_sem": 0.009393524301436558, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4663569927215576, "eval_arxiv_n_ngrams_match_1": 14.526, "eval_arxiv_n_ngrams_match_2": 2.732, "eval_arxiv_n_ngrams_match_3": 0.56, "eval_arxiv_num_pred_words": 40.552, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.01988105081965, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34454573858859844, "eval_arxiv_runtime": 11.2522, "eval_arxiv_samples_per_second": 44.436, "eval_arxiv_steps_per_second": 0.089, "eval_arxiv_token_set_f1": 0.3404924784033526, "eval_arxiv_token_set_f1_sem": 0.004156770269250585, "eval_arxiv_token_set_precision": 0.2909309788150816, "eval_arxiv_token_set_recall": 0.426627479222847, "eval_arxiv_true_num_tokens": 64.0, "step": 96875 }, { "epoch": 18.6, "eval_python_code_alpaca_accuracy": 0.158375, "eval_python_code_alpaca_bleu_score": 4.398422704861474, "eval_python_code_alpaca_bleu_score_sem": 0.13905094607098825, "eval_python_code_alpaca_emb_cos_sim": 0.7390080690383911, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009435710662617931, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.922437906265259, "eval_python_code_alpaca_n_ngrams_match_1": 9.466, "eval_python_code_alpaca_n_ngrams_match_2": 2.678, "eval_python_code_alpaca_n_ngrams_match_3": 0.886, "eval_python_code_alpaca_num_pred_words": 43.454, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.58654452443671, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31563238790502973, "eval_python_code_alpaca_runtime": 11.1047, "eval_python_code_alpaca_samples_per_second": 45.026, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.46549719927832806, "eval_python_code_alpaca_token_set_f1_sem": 0.005822094713937672, "eval_python_code_alpaca_token_set_precision": 0.5142298271878352, "eval_python_code_alpaca_token_set_recall": 0.44985480221678237, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 96875 }, { "epoch": 18.6, "eval_wikibio_accuracy": 0.31534375, "eval_wikibio_bleu_score": 5.599645037244387, "eval_wikibio_bleu_score_sem": 0.20597665951295108, "eval_wikibio_emb_cos_sim": 0.7339651584625244, "eval_wikibio_emb_cos_sim_sem": 0.009255253003817967, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.796966314315796, "eval_wikibio_n_ngrams_match_1": 9.772, "eval_wikibio_n_ngrams_match_2": 3.148, "eval_wikibio_n_ngrams_match_3": 1.11, "eval_wikibio_num_pred_words": 36.32, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.565780640048395, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3400387721418663, "eval_wikibio_runtime": 10.7954, "eval_wikibio_samples_per_second": 46.316, "eval_wikibio_steps_per_second": 0.093, "eval_wikibio_token_set_f1": 0.3101382149910275, "eval_wikibio_token_set_f1_sem": 0.005630899690839322, "eval_wikibio_token_set_precision": 0.3173958890489613, "eval_wikibio_token_set_recall": 0.3181035535584828, "eval_wikibio_true_num_tokens": 61.1328125, "step": 96875 }, { "epoch": 18.6, "eval_nq_accuracy": 0.52065625, "eval_nq_bleu_score": 11.279353191465116, "eval_nq_bleu_score_sem": 0.4598831181620717, "eval_nq_emb_cos_sim": 0.819989025592804, "eval_nq_emb_cos_sim_sem": 0.007851256527678556, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2385220527648926, "eval_nq_n_ngrams_match_1": 22.504, "eval_nq_n_ngrams_match_2": 8.152, "eval_nq_n_ngrams_match_3": 3.74, "eval_nq_num_pred_words": 49.114, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.37945869344422, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4346066736600779, "eval_nq_runtime": 11.1562, "eval_nq_samples_per_second": 44.818, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.4526704917998639, "eval_nq_token_set_f1_sem": 0.0051285257330828836, "eval_nq_token_set_precision": 0.4100285015794644, "eval_nq_token_set_recall": 0.5144674801327048, "eval_nq_true_num_tokens": 64.0, "step": 96875 }, { "epoch": 18.6, "learning_rate": 0.001, "loss": 2.6136, "step": 96876 }, { "epoch": 18.6, "learning_rate": 0.001, "loss": 2.6038, "step": 96888 }, { "epoch": 18.61, "learning_rate": 0.001, "loss": 2.6057, "step": 96900 }, { "epoch": 18.61, "learning_rate": 0.001, "loss": 2.6103, "step": 96912 }, { "epoch": 18.61, "learning_rate": 0.001, "loss": 2.6101, "step": 96924 }, { "epoch": 18.61, "learning_rate": 0.001, "loss": 2.6121, "step": 96936 }, { "epoch": 18.62, "learning_rate": 0.001, "loss": 2.6057, "step": 96948 }, { "epoch": 18.62, "learning_rate": 0.001, "loss": 2.6066, "step": 96960 }, { "epoch": 18.62, "learning_rate": 0.001, "loss": 2.6212, "step": 96972 }, { "epoch": 18.62, "learning_rate": 0.001, "loss": 2.613, "step": 96984 }, { "epoch": 18.62, "learning_rate": 0.001, "loss": 2.6131, "step": 96996 }, { "epoch": 18.63, "learning_rate": 0.001, "loss": 2.6114, "step": 97008 }, { "epoch": 18.63, "learning_rate": 0.001, "loss": 2.6172, "step": 97020 }, { "epoch": 18.63, "learning_rate": 0.001, "loss": 2.616, "step": 97032 }, { "epoch": 18.63, "learning_rate": 0.001, "loss": 2.6174, "step": 97044 }, { "epoch": 18.64, "learning_rate": 0.001, "loss": 2.6212, "step": 97056 }, { "epoch": 18.64, "learning_rate": 0.001, "loss": 2.6131, "step": 97068 }, { "epoch": 18.64, "learning_rate": 0.001, "loss": 2.6058, "step": 97080 }, { "epoch": 18.64, "learning_rate": 0.001, "loss": 2.6178, "step": 97092 }, { "epoch": 18.65, "learning_rate": 0.001, "loss": 2.6171, "step": 97104 }, { "epoch": 18.65, "learning_rate": 0.001, "loss": 2.6169, "step": 97116 }, { "epoch": 18.65, "learning_rate": 0.001, "loss": 2.608, "step": 97128 }, { "epoch": 18.65, "learning_rate": 0.001, "loss": 2.6083, "step": 97140 }, { "epoch": 18.65, "learning_rate": 0.001, "loss": 2.6132, "step": 97152 }, { "epoch": 18.66, "learning_rate": 0.001, "loss": 2.6058, "step": 97164 }, { "epoch": 18.66, "learning_rate": 0.001, "loss": 2.6055, "step": 97176 }, { "epoch": 18.66, "learning_rate": 0.001, "loss": 2.6118, "step": 97188 }, { "epoch": 18.66, "learning_rate": 0.001, "loss": 2.6261, "step": 97200 }, { "epoch": 18.67, "learning_rate": 0.001, "loss": 2.6129, "step": 97212 }, { "epoch": 18.67, "learning_rate": 0.001, "loss": 2.6032, "step": 97224 }, { "epoch": 18.67, "learning_rate": 0.001, "loss": 2.6204, "step": 97236 }, { "epoch": 18.67, "learning_rate": 0.001, "loss": 2.5961, "step": 97248 }, { "epoch": 18.68, "learning_rate": 0.001, "loss": 2.6098, "step": 97260 }, { "epoch": 18.68, "learning_rate": 0.001, "loss": 2.6017, "step": 97272 }, { "epoch": 18.68, "learning_rate": 0.001, "loss": 2.6181, "step": 97284 }, { "epoch": 18.68, "learning_rate": 0.001, "loss": 2.6091, "step": 97296 }, { "epoch": 18.68, "learning_rate": 0.001, "loss": 2.6009, "step": 97308 }, { "epoch": 18.69, "learning_rate": 0.001, "loss": 2.6159, "step": 97320 }, { "epoch": 18.69, "learning_rate": 0.001, "loss": 2.6171, "step": 97332 }, { "epoch": 18.69, "learning_rate": 0.001, "loss": 2.6073, "step": 97344 }, { "epoch": 18.69, "learning_rate": 0.001, "loss": 2.6011, "step": 97356 }, { "epoch": 18.7, "learning_rate": 0.001, "loss": 2.6134, "step": 97368 }, { "epoch": 18.7, "learning_rate": 0.001, "loss": 2.6242, "step": 97380 }, { "epoch": 18.7, "learning_rate": 0.001, "loss": 2.6147, "step": 97392 }, { "epoch": 18.7, "learning_rate": 0.001, "loss": 2.6124, "step": 97404 }, { "epoch": 18.71, "learning_rate": 0.001, "loss": 2.6087, "step": 97416 }, { "epoch": 18.71, "learning_rate": 0.001, "loss": 2.6023, "step": 97428 }, { "epoch": 18.71, "learning_rate": 0.001, "loss": 2.6037, "step": 97440 }, { "epoch": 18.71, "learning_rate": 0.001, "loss": 2.6026, "step": 97452 }, { "epoch": 18.71, "learning_rate": 0.001, "loss": 2.6107, "step": 97464 }, { "epoch": 18.72, "learning_rate": 0.001, "loss": 2.6013, "step": 97476 }, { "epoch": 18.72, "learning_rate": 0.001, "loss": 2.6075, "step": 97488 }, { "epoch": 18.72, "learning_rate": 0.001, "loss": 2.6195, "step": 97500 }, { "epoch": 18.72, "eval_ag_news_accuracy": 0.317375, "eval_ag_news_bleu_score": 4.656647181707273, "eval_ag_news_bleu_score_sem": 0.14977508035398074, "eval_ag_news_emb_cos_sim": 0.8073785305023193, "eval_ag_news_emb_cos_sim_sem": 0.007234375496537871, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6215946674346924, "eval_ag_news_n_ngrams_match_1": 13.906, "eval_ag_news_n_ngrams_match_2": 3.052, "eval_ag_news_n_ngrams_match_3": 0.826, "eval_ag_news_num_pred_words": 46.916, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.39715632484492, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.341368211273631, "eval_ag_news_runtime": 11.2426, "eval_ag_news_samples_per_second": 44.474, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.3464814273669507, "eval_ag_news_token_set_f1_sem": 0.004372458516239519, "eval_ag_news_token_set_precision": 0.3322177730898681, "eval_ag_news_token_set_recall": 0.38016865945177786, "eval_ag_news_true_num_tokens": 56.09375, "step": 97500 }, { "epoch": 18.72, "eval_anthropic_toxic_prompts_accuracy": 0.1115, "eval_anthropic_toxic_prompts_bleu_score": 2.878984964061356, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10305757242988817, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6545363664627075, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010594322918370851, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3094899654388428, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.098, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.784, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.606, "eval_anthropic_toxic_prompts_num_pred_words": 47.01, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.37116167276371, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20810156779703265, "eval_anthropic_toxic_prompts_runtime": 10.6781, "eval_anthropic_toxic_prompts_samples_per_second": 46.825, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.34719224782010083, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064782320772376785, "eval_anthropic_toxic_prompts_token_set_precision": 0.43294426130365504, "eval_anthropic_toxic_prompts_token_set_recall": 0.31978646034245195, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 97500 }, { "epoch": 18.72, "eval_arxiv_accuracy": 0.34496875, "eval_arxiv_bleu_score": 4.185301798018157, "eval_arxiv_bleu_score_sem": 0.12049250810336702, "eval_arxiv_emb_cos_sim": 0.7472431659698486, "eval_arxiv_emb_cos_sim_sem": 0.010150255161381468, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4600818157196045, "eval_arxiv_n_ngrams_match_1": 14.714, "eval_arxiv_n_ngrams_match_2": 2.794, "eval_arxiv_n_ngrams_match_3": 0.62, "eval_arxiv_num_pred_words": 40.526, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.81957974998821, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34878776997319716, "eval_arxiv_runtime": 10.8051, "eval_arxiv_samples_per_second": 46.274, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.343695321821954, "eval_arxiv_token_set_f1_sem": 0.004273697233242668, "eval_arxiv_token_set_precision": 0.2964980753229387, "eval_arxiv_token_set_recall": 0.4292485531260051, "eval_arxiv_true_num_tokens": 64.0, "step": 97500 }, { "epoch": 18.72, "eval_python_code_alpaca_accuracy": 0.156375, "eval_python_code_alpaca_bleu_score": 4.517288316973969, "eval_python_code_alpaca_bleu_score_sem": 0.14832750578827467, "eval_python_code_alpaca_emb_cos_sim": 0.7501220107078552, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009231878625051627, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.935863971710205, "eval_python_code_alpaca_n_ngrams_match_1": 9.7, "eval_python_code_alpaca_n_ngrams_match_2": 2.816, "eval_python_code_alpaca_n_ngrams_match_3": 0.922, "eval_python_code_alpaca_num_pred_words": 43.316, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.837771408088813, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3275757944672905, "eval_python_code_alpaca_runtime": 11.9482, "eval_python_code_alpaca_samples_per_second": 41.847, "eval_python_code_alpaca_steps_per_second": 0.084, "eval_python_code_alpaca_token_set_f1": 0.4627990740990327, "eval_python_code_alpaca_token_set_f1_sem": 0.005695619749963124, "eval_python_code_alpaca_token_set_precision": 0.5301099673004801, "eval_python_code_alpaca_token_set_recall": 0.434069374935861, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 97500 }, { "epoch": 18.72, "eval_wikibio_accuracy": 0.31596875, "eval_wikibio_bleu_score": 5.7350808101546376, "eval_wikibio_bleu_score_sem": 0.20187394227353678, "eval_wikibio_emb_cos_sim": 0.7440035343170166, "eval_wikibio_emb_cos_sim_sem": 0.009912491618041355, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.784790515899658, "eval_wikibio_n_ngrams_match_1": 10.23, "eval_wikibio_n_ngrams_match_2": 3.386, "eval_wikibio_n_ngrams_match_3": 1.164, "eval_wikibio_num_pred_words": 36.79, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.02644675207413, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35233759034088086, "eval_wikibio_runtime": 11.4649, "eval_wikibio_samples_per_second": 43.611, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.31787960493167655, "eval_wikibio_token_set_f1_sem": 0.005363299164133791, "eval_wikibio_token_set_precision": 0.33064591379734604, "eval_wikibio_token_set_recall": 0.318451294053591, "eval_wikibio_true_num_tokens": 61.1328125, "step": 97500 }, { "epoch": 18.72, "eval_nq_accuracy": 0.520375, "eval_nq_bleu_score": 11.471308349842047, "eval_nq_bleu_score_sem": 0.47913630570444005, "eval_nq_emb_cos_sim": 0.8260822296142578, "eval_nq_emb_cos_sim_sem": 0.007098766694365054, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2381880283355713, "eval_nq_n_ngrams_match_1": 22.806, "eval_nq_n_ngrams_match_2": 8.282, "eval_nq_n_ngrams_match_3": 3.832, "eval_nq_num_pred_words": 49.224, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.376326248292528, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43946255256202194, "eval_nq_runtime": 11.4775, "eval_nq_samples_per_second": 43.563, "eval_nq_steps_per_second": 0.087, "eval_nq_token_set_f1": 0.4543488653194585, "eval_nq_token_set_f1_sem": 0.004926261914301938, "eval_nq_token_set_precision": 0.4124802873188905, "eval_nq_token_set_recall": 0.5141252058303717, "eval_nq_true_num_tokens": 64.0, "step": 97500 }, { "epoch": 18.72, "learning_rate": 0.001, "loss": 2.6049, "step": 97512 }, { "epoch": 18.73, "learning_rate": 0.001, "loss": 2.6167, "step": 97524 }, { "epoch": 18.73, "learning_rate": 0.001, "loss": 2.6102, "step": 97536 }, { "epoch": 18.73, "learning_rate": 0.001, "loss": 2.6116, "step": 97548 }, { "epoch": 18.73, "learning_rate": 0.001, "loss": 2.6251, "step": 97560 }, { "epoch": 18.74, "learning_rate": 0.001, "loss": 2.6118, "step": 97572 }, { "epoch": 18.74, "learning_rate": 0.001, "loss": 2.6041, "step": 97584 }, { "epoch": 18.74, "learning_rate": 0.001, "loss": 2.593, "step": 97596 }, { "epoch": 18.74, "learning_rate": 0.001, "loss": 2.6046, "step": 97608 }, { "epoch": 18.74, "learning_rate": 0.001, "loss": 2.6124, "step": 97620 }, { "epoch": 18.75, "learning_rate": 0.001, "loss": 2.6093, "step": 97632 }, { "epoch": 18.75, "learning_rate": 0.001, "loss": 2.6122, "step": 97644 }, { "epoch": 18.75, "learning_rate": 0.001, "loss": 2.6156, "step": 97656 }, { "epoch": 18.75, "learning_rate": 0.001, "loss": 2.6121, "step": 97668 }, { "epoch": 18.76, "learning_rate": 0.001, "loss": 2.6202, "step": 97680 }, { "epoch": 18.76, "learning_rate": 0.001, "loss": 2.6125, "step": 97692 }, { "epoch": 18.76, "learning_rate": 0.001, "loss": 2.6084, "step": 97704 }, { "epoch": 18.76, "learning_rate": 0.001, "loss": 2.6156, "step": 97716 }, { "epoch": 18.76, "learning_rate": 0.001, "loss": 2.6145, "step": 97728 }, { "epoch": 18.77, "learning_rate": 0.001, "loss": 2.606, "step": 97740 }, { "epoch": 18.77, "learning_rate": 0.001, "loss": 2.608, "step": 97752 }, { "epoch": 18.77, "learning_rate": 0.001, "loss": 2.6111, "step": 97764 }, { "epoch": 18.77, "learning_rate": 0.001, "loss": 2.6094, "step": 97776 }, { "epoch": 18.78, "learning_rate": 0.001, "loss": 2.6085, "step": 97788 }, { "epoch": 18.78, "learning_rate": 0.001, "loss": 2.6035, "step": 97800 }, { "epoch": 18.78, "learning_rate": 0.001, "loss": 2.6077, "step": 97812 }, { "epoch": 18.78, "learning_rate": 0.001, "loss": 2.6106, "step": 97824 }, { "epoch": 18.79, "learning_rate": 0.001, "loss": 2.6105, "step": 97836 }, { "epoch": 18.79, "learning_rate": 0.001, "loss": 2.6003, "step": 97848 }, { "epoch": 18.79, "learning_rate": 0.001, "loss": 2.6069, "step": 97860 }, { "epoch": 18.79, "learning_rate": 0.001, "loss": 2.6067, "step": 97872 }, { "epoch": 18.79, "learning_rate": 0.001, "loss": 2.6129, "step": 97884 }, { "epoch": 18.8, "learning_rate": 0.001, "loss": 2.6108, "step": 97896 }, { "epoch": 18.8, "learning_rate": 0.001, "loss": 2.6032, "step": 97908 }, { "epoch": 18.8, "learning_rate": 0.001, "loss": 2.6162, "step": 97920 }, { "epoch": 18.8, "learning_rate": 0.001, "loss": 2.6087, "step": 97932 }, { "epoch": 18.81, "learning_rate": 0.001, "loss": 2.6236, "step": 97944 }, { "epoch": 18.81, "learning_rate": 0.001, "loss": 2.5985, "step": 97956 }, { "epoch": 18.81, "learning_rate": 0.001, "loss": 2.6083, "step": 97968 }, { "epoch": 18.81, "learning_rate": 0.001, "loss": 2.6107, "step": 97980 }, { "epoch": 18.82, "learning_rate": 0.001, "loss": 2.6052, "step": 97992 }, { "epoch": 18.82, "learning_rate": 0.001, "loss": 2.6117, "step": 98004 }, { "epoch": 18.82, "learning_rate": 0.001, "loss": 2.6164, "step": 98016 }, { "epoch": 18.82, "learning_rate": 0.001, "loss": 2.6024, "step": 98028 }, { "epoch": 18.82, "learning_rate": 0.001, "loss": 2.6109, "step": 98040 }, { "epoch": 18.83, "learning_rate": 0.001, "loss": 2.608, "step": 98052 }, { "epoch": 18.83, "learning_rate": 0.001, "loss": 2.6123, "step": 98064 }, { "epoch": 18.83, "learning_rate": 0.001, "loss": 2.6167, "step": 98076 }, { "epoch": 18.83, "learning_rate": 0.001, "loss": 2.6024, "step": 98088 }, { "epoch": 18.84, "learning_rate": 0.001, "loss": 2.6192, "step": 98100 }, { "epoch": 18.84, "learning_rate": 0.001, "loss": 2.6136, "step": 98112 }, { "epoch": 18.84, "learning_rate": 0.001, "loss": 2.6011, "step": 98124 }, { "epoch": 18.84, "eval_ag_news_accuracy": 0.3163125, "eval_ag_news_bleu_score": 4.893583411326613, "eval_ag_news_bleu_score_sem": 0.1602519833001547, "eval_ag_news_emb_cos_sim": 0.799976110458374, "eval_ag_news_emb_cos_sim_sem": 0.007589054397647205, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.618302345275879, "eval_ag_news_n_ngrams_match_1": 13.816, "eval_ag_news_n_ngrams_match_2": 3.046, "eval_ag_news_n_ngrams_match_3": 0.9, "eval_ag_news_num_pred_words": 46.352, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.27423529724385, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3430798967658262, "eval_ag_news_runtime": 12.3353, "eval_ag_news_samples_per_second": 40.534, "eval_ag_news_steps_per_second": 0.081, "eval_ag_news_token_set_f1": 0.34465290333492693, "eval_ag_news_token_set_f1_sem": 0.004500714902887902, "eval_ag_news_token_set_precision": 0.32818629919558523, "eval_ag_news_token_set_recall": 0.37674486971843524, "eval_ag_news_true_num_tokens": 56.09375, "step": 98125 }, { "epoch": 18.84, "eval_anthropic_toxic_prompts_accuracy": 0.113, "eval_anthropic_toxic_prompts_bleu_score": 2.978666812159319, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11180006814330817, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6559618711471558, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01104389940016951, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2950711250305176, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.898, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.804, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658, "eval_anthropic_toxic_prompts_num_pred_words": 46.402, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.979332907536705, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20266106536294742, "eval_anthropic_toxic_prompts_runtime": 10.5559, "eval_anthropic_toxic_prompts_samples_per_second": 47.367, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.34200163667029326, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006734019669357172, "eval_anthropic_toxic_prompts_token_set_precision": 0.41423840448185606, "eval_anthropic_toxic_prompts_token_set_recall": 0.3220011582193597, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 98125 }, { "epoch": 18.84, "eval_arxiv_accuracy": 0.34328125, "eval_arxiv_bleu_score": 4.196377540087479, "eval_arxiv_bleu_score_sem": 0.1167786961682254, "eval_arxiv_emb_cos_sim": 0.7467535734176636, "eval_arxiv_emb_cos_sim_sem": 0.00884328929159274, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.457857847213745, "eval_arxiv_n_ngrams_match_1": 14.758, "eval_arxiv_n_ngrams_match_2": 2.84, "eval_arxiv_n_ngrams_match_3": 0.628, "eval_arxiv_num_pred_words": 39.878, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.748892638844115, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3547328129271238, "eval_arxiv_runtime": 10.8097, "eval_arxiv_samples_per_second": 46.255, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.3460723869447406, "eval_arxiv_token_set_f1_sem": 0.004204699816453167, "eval_arxiv_token_set_precision": 0.29641136100843846, "eval_arxiv_token_set_recall": 0.4356137055554718, "eval_arxiv_true_num_tokens": 64.0, "step": 98125 }, { "epoch": 18.84, "eval_python_code_alpaca_accuracy": 0.159, "eval_python_code_alpaca_bleu_score": 4.515639884363277, "eval_python_code_alpaca_bleu_score_sem": 0.14828602698629056, "eval_python_code_alpaca_emb_cos_sim": 0.7527774572372437, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008795256372269496, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9217469692230225, "eval_python_code_alpaca_n_ngrams_match_1": 9.66, "eval_python_code_alpaca_n_ngrams_match_2": 2.788, "eval_python_code_alpaca_n_ngrams_match_3": 0.89, "eval_python_code_alpaca_num_pred_words": 42.986, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.57370682786837, "eval_python_code_alpaca_pred_num_tokens": 62.8984375, "eval_python_code_alpaca_rouge_score": 0.32571275162015967, "eval_python_code_alpaca_runtime": 10.4971, "eval_python_code_alpaca_samples_per_second": 47.632, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.4754397297562614, "eval_python_code_alpaca_token_set_f1_sem": 0.0056081020812104206, "eval_python_code_alpaca_token_set_precision": 0.5303777641992341, "eval_python_code_alpaca_token_set_recall": 0.45439303003643294, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 98125 }, { "epoch": 18.84, "eval_wikibio_accuracy": 0.318375, "eval_wikibio_bleu_score": 5.489666883691228, "eval_wikibio_bleu_score_sem": 0.1946471806560131, "eval_wikibio_emb_cos_sim": 0.7177799940109253, "eval_wikibio_emb_cos_sim_sem": 0.011559783411387933, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.772843837738037, "eval_wikibio_n_ngrams_match_1": 9.522, "eval_wikibio_n_ngrams_match_2": 3.154, "eval_wikibio_n_ngrams_match_3": 1.114, "eval_wikibio_num_pred_words": 35.61, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.5036062839656, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3313459483886061, "eval_wikibio_runtime": 10.6348, "eval_wikibio_samples_per_second": 47.015, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.3013661245541971, "eval_wikibio_token_set_f1_sem": 0.005964780329996843, "eval_wikibio_token_set_precision": 0.30579228178006457, "eval_wikibio_token_set_recall": 0.3159434351039579, "eval_wikibio_true_num_tokens": 61.1328125, "step": 98125 }, { "epoch": 18.84, "eval_nq_accuracy": 0.521375, "eval_nq_bleu_score": 11.239000215810501, "eval_nq_bleu_score_sem": 0.4673154678829763, "eval_nq_emb_cos_sim": 0.8265373706817627, "eval_nq_emb_cos_sim_sem": 0.007860237757678424, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2307910919189453, "eval_nq_n_ngrams_match_1": 22.588, "eval_nq_n_ngrams_match_2": 8.166, "eval_nq_n_ngrams_match_3": 3.694, "eval_nq_num_pred_words": 48.918, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.30722603920483, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43660076894666, "eval_nq_runtime": 11.0284, "eval_nq_samples_per_second": 45.337, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.45180013152361204, "eval_nq_token_set_f1_sem": 0.004944779576918083, "eval_nq_token_set_precision": 0.408124341946281, "eval_nq_token_set_recall": 0.5149068385648281, "eval_nq_true_num_tokens": 64.0, "step": 98125 }, { "epoch": 18.84, "learning_rate": 0.001, "loss": 2.6088, "step": 98136 }, { "epoch": 18.85, "learning_rate": 0.001, "loss": 2.6091, "step": 98148 }, { "epoch": 18.85, "learning_rate": 0.001, "loss": 2.6115, "step": 98160 }, { "epoch": 18.85, "learning_rate": 0.001, "loss": 2.6132, "step": 98172 }, { "epoch": 18.85, "learning_rate": 0.001, "loss": 2.6056, "step": 98184 }, { "epoch": 18.85, "learning_rate": 0.001, "loss": 2.6079, "step": 98196 }, { "epoch": 18.86, "learning_rate": 0.001, "loss": 2.6147, "step": 98208 }, { "epoch": 18.86, "learning_rate": 0.001, "loss": 2.6173, "step": 98220 }, { "epoch": 18.86, "learning_rate": 0.001, "loss": 2.6103, "step": 98232 }, { "epoch": 18.86, "learning_rate": 0.001, "loss": 2.6103, "step": 98244 }, { "epoch": 18.87, "learning_rate": 0.001, "loss": 2.6104, "step": 98256 }, { "epoch": 18.87, "learning_rate": 0.001, "loss": 2.6145, "step": 98268 }, { "epoch": 18.87, "learning_rate": 0.001, "loss": 2.6055, "step": 98280 }, { "epoch": 18.87, "learning_rate": 0.001, "loss": 2.6117, "step": 98292 }, { "epoch": 18.88, "learning_rate": 0.001, "loss": 2.6127, "step": 98304 }, { "epoch": 18.88, "learning_rate": 0.001, "loss": 2.597, "step": 98316 }, { "epoch": 18.88, "learning_rate": 0.001, "loss": 2.6118, "step": 98328 }, { "epoch": 18.88, "learning_rate": 0.001, "loss": 2.6107, "step": 98340 }, { "epoch": 18.88, "learning_rate": 0.001, "loss": 2.6129, "step": 98352 }, { "epoch": 18.89, "learning_rate": 0.001, "loss": 2.6034, "step": 98364 }, { "epoch": 18.89, "learning_rate": 0.001, "loss": 2.5941, "step": 98376 }, { "epoch": 18.89, "learning_rate": 0.001, "loss": 2.606, "step": 98388 }, { "epoch": 18.89, "learning_rate": 0.001, "loss": 2.5963, "step": 98400 }, { "epoch": 18.9, "learning_rate": 0.001, "loss": 2.618, "step": 98412 }, { "epoch": 18.9, "learning_rate": 0.001, "loss": 2.6036, "step": 98424 }, { "epoch": 18.9, "learning_rate": 0.001, "loss": 2.6207, "step": 98436 }, { "epoch": 18.9, "learning_rate": 0.001, "loss": 2.6092, "step": 98448 }, { "epoch": 18.91, "learning_rate": 0.001, "loss": 2.6078, "step": 98460 }, { "epoch": 18.91, "learning_rate": 0.001, "loss": 2.6122, "step": 98472 }, { "epoch": 18.91, "learning_rate": 0.001, "loss": 2.6079, "step": 98484 }, { "epoch": 18.91, "learning_rate": 0.001, "loss": 2.6117, "step": 98496 }, { "epoch": 18.91, "learning_rate": 0.001, "loss": 2.6041, "step": 98508 }, { "epoch": 18.92, "learning_rate": 0.001, "loss": 2.6091, "step": 98520 }, { "epoch": 18.92, "learning_rate": 0.001, "loss": 2.6116, "step": 98532 }, { "epoch": 18.92, "learning_rate": 0.001, "loss": 2.6173, "step": 98544 }, { "epoch": 18.92, "learning_rate": 0.001, "loss": 2.6082, "step": 98556 }, { "epoch": 18.93, "learning_rate": 0.001, "loss": 2.6039, "step": 98568 }, { "epoch": 18.93, "learning_rate": 0.001, "loss": 2.614, "step": 98580 }, { "epoch": 18.93, "learning_rate": 0.001, "loss": 2.6188, "step": 98592 }, { "epoch": 18.93, "learning_rate": 0.001, "loss": 2.6268, "step": 98604 }, { "epoch": 18.94, "learning_rate": 0.001, "loss": 2.6102, "step": 98616 }, { "epoch": 18.94, "learning_rate": 0.001, "loss": 2.6184, "step": 98628 }, { "epoch": 18.94, "learning_rate": 0.001, "loss": 2.6138, "step": 98640 }, { "epoch": 18.94, "learning_rate": 0.001, "loss": 2.6227, "step": 98652 }, { "epoch": 18.94, "learning_rate": 0.001, "loss": 2.6038, "step": 98664 }, { "epoch": 18.95, "learning_rate": 0.001, "loss": 2.6143, "step": 98676 }, { "epoch": 18.95, "learning_rate": 0.001, "loss": 2.6167, "step": 98688 }, { "epoch": 18.95, "learning_rate": 0.001, "loss": 2.618, "step": 98700 }, { "epoch": 18.95, "learning_rate": 0.001, "loss": 2.6055, "step": 98712 }, { "epoch": 18.96, "learning_rate": 0.001, "loss": 2.6141, "step": 98724 }, { "epoch": 18.96, "learning_rate": 0.001, "loss": 2.6051, "step": 98736 }, { "epoch": 18.96, "learning_rate": 0.001, "loss": 2.6072, "step": 98748 }, { "epoch": 18.96, "eval_ag_news_accuracy": 0.317, "eval_ag_news_bleu_score": 4.639039641028421, "eval_ag_news_bleu_score_sem": 0.14148395008546424, "eval_ag_news_emb_cos_sim": 0.7964121103286743, "eval_ag_news_emb_cos_sim_sem": 0.007701351382781942, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6184489727020264, "eval_ag_news_n_ngrams_match_1": 13.676, "eval_ag_news_n_ngrams_match_2": 3.044, "eval_ag_news_n_ngrams_match_3": 0.858, "eval_ag_news_num_pred_words": 46.496, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.27970112313729, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33992006339498293, "eval_ag_news_runtime": 10.7139, "eval_ag_news_samples_per_second": 46.668, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3419462202753087, "eval_ag_news_token_set_f1_sem": 0.004548638895730784, "eval_ag_news_token_set_precision": 0.3240014773087771, "eval_ag_news_token_set_recall": 0.3785108601336519, "eval_ag_news_true_num_tokens": 56.09375, "step": 98750 }, { "epoch": 18.96, "eval_anthropic_toxic_prompts_accuracy": 0.11096875, "eval_anthropic_toxic_prompts_bleu_score": 3.0972500817187685, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1276785346267627, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6599158048629761, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009879960836436025, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2998158931732178, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.92, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.852, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694, "eval_anthropic_toxic_prompts_num_pred_words": 47.044, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.107647758208152, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20362812406492492, "eval_anthropic_toxic_prompts_runtime": 11.5543, "eval_anthropic_toxic_prompts_samples_per_second": 43.274, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.3520620159799845, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065037939618324625, "eval_anthropic_toxic_prompts_token_set_precision": 0.4215860478372545, "eval_anthropic_toxic_prompts_token_set_recall": 0.33424358581275976, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 98750 }, { "epoch": 18.96, "eval_arxiv_accuracy": 0.3435, "eval_arxiv_bleu_score": 4.225538367916897, "eval_arxiv_bleu_score_sem": 0.11828177777566976, "eval_arxiv_emb_cos_sim": 0.7533184289932251, "eval_arxiv_emb_cos_sim_sem": 0.007184629044404857, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.44754695892334, "eval_arxiv_n_ngrams_match_1": 14.748, "eval_arxiv_n_ngrams_match_2": 2.838, "eval_arxiv_n_ngrams_match_3": 0.634, "eval_arxiv_num_pred_words": 40.39, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.423215250431277, "eval_arxiv_pred_num_tokens": 62.96875, "eval_arxiv_rouge_score": 0.35245989038110115, "eval_arxiv_runtime": 10.721, "eval_arxiv_samples_per_second": 46.638, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.34649696710682854, "eval_arxiv_token_set_f1_sem": 0.004167536004941654, "eval_arxiv_token_set_precision": 0.29497048474841564, "eval_arxiv_token_set_recall": 0.43866842880839385, "eval_arxiv_true_num_tokens": 64.0, "step": 98750 }, { "epoch": 18.96, "eval_python_code_alpaca_accuracy": 0.15796875, "eval_python_code_alpaca_bleu_score": 4.283376921883962, "eval_python_code_alpaca_bleu_score_sem": 0.14634451987151456, "eval_python_code_alpaca_emb_cos_sim": 0.7345660924911499, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010276673974945387, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.933269739151001, "eval_python_code_alpaca_n_ngrams_match_1": 9.234, "eval_python_code_alpaca_n_ngrams_match_2": 2.592, "eval_python_code_alpaca_n_ngrams_match_3": 0.83, "eval_python_code_alpaca_num_pred_words": 43.082, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.788965182880695, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3084574351601737, "eval_python_code_alpaca_runtime": 10.7032, "eval_python_code_alpaca_samples_per_second": 46.715, "eval_python_code_alpaca_steps_per_second": 0.093, "eval_python_code_alpaca_token_set_f1": 0.4612948208534952, "eval_python_code_alpaca_token_set_f1_sem": 0.006030784850113751, "eval_python_code_alpaca_token_set_precision": 0.5039474581549664, "eval_python_code_alpaca_token_set_recall": 0.4529264734351371, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 98750 }, { "epoch": 18.96, "eval_wikibio_accuracy": 0.31671875, "eval_wikibio_bleu_score": 5.675754276078412, "eval_wikibio_bleu_score_sem": 0.20724148934950135, "eval_wikibio_emb_cos_sim": 0.7287104725837708, "eval_wikibio_emb_cos_sim_sem": 0.01082472656828975, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8048083782196045, "eval_wikibio_n_ngrams_match_1": 9.8, "eval_wikibio_n_ngrams_match_2": 3.222, "eval_wikibio_n_ngrams_match_3": 1.174, "eval_wikibio_num_pred_words": 36.288, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.916642281352416, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3458388164725216, "eval_wikibio_runtime": 10.6871, "eval_wikibio_samples_per_second": 46.786, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.30751947622658277, "eval_wikibio_token_set_f1_sem": 0.0056847269734363725, "eval_wikibio_token_set_precision": 0.3154249383294301, "eval_wikibio_token_set_recall": 0.3165907024421341, "eval_wikibio_true_num_tokens": 61.1328125, "step": 98750 }, { "epoch": 18.96, "eval_nq_accuracy": 0.5194375, "eval_nq_bleu_score": 11.596588212619437, "eval_nq_bleu_score_sem": 0.47057519459940067, "eval_nq_emb_cos_sim": 0.8211898803710938, "eval_nq_emb_cos_sim_sem": 0.007924290430702856, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.231948137283325, "eval_nq_n_ngrams_match_1": 22.838, "eval_nq_n_ngrams_match_2": 8.412, "eval_nq_n_ngrams_match_3": 3.864, "eval_nq_num_pred_words": 49.208, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.318001154395166, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43907608242608354, "eval_nq_runtime": 11.0048, "eval_nq_samples_per_second": 45.435, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4584433124858162, "eval_nq_token_set_f1_sem": 0.004895067730193653, "eval_nq_token_set_precision": 0.4145020439959143, "eval_nq_token_set_recall": 0.5216332944121967, "eval_nq_true_num_tokens": 64.0, "step": 98750 }, { "epoch": 18.96, "learning_rate": 0.001, "loss": 2.6086, "step": 98760 }, { "epoch": 18.97, "learning_rate": 0.001, "loss": 2.6068, "step": 98772 }, { "epoch": 18.97, "learning_rate": 0.001, "loss": 2.6221, "step": 98784 }, { "epoch": 18.97, "learning_rate": 0.001, "loss": 2.6123, "step": 98796 }, { "epoch": 18.97, "learning_rate": 0.001, "loss": 2.628, "step": 98808 }, { "epoch": 18.97, "learning_rate": 0.001, "loss": 2.6204, "step": 98820 }, { "epoch": 18.98, "learning_rate": 0.001, "loss": 2.6194, "step": 98832 }, { "epoch": 18.98, "learning_rate": 0.001, "loss": 2.6176, "step": 98844 }, { "epoch": 18.98, "learning_rate": 0.001, "loss": 2.6202, "step": 98856 }, { "epoch": 18.98, "learning_rate": 0.001, "loss": 2.6173, "step": 98868 }, { "epoch": 18.99, "learning_rate": 0.001, "loss": 2.6294, "step": 98880 }, { "epoch": 18.99, "learning_rate": 0.001, "loss": 2.6163, "step": 98892 }, { "epoch": 18.99, "learning_rate": 0.001, "loss": 2.6058, "step": 98904 }, { "epoch": 18.99, "learning_rate": 0.001, "loss": 2.6067, "step": 98916 }, { "epoch": 19.0, "learning_rate": 0.001, "loss": 2.617, "step": 98928 }, { "epoch": 19.0, "learning_rate": 0.001, "loss": 2.6157, "step": 98940 }, { "epoch": 19.0, "learning_rate": 0.001, "loss": 2.6122, "step": 98952 }, { "epoch": 19.0, "learning_rate": 0.001, "loss": 2.5956, "step": 98964 }, { "epoch": 19.0, "learning_rate": 0.001, "loss": 2.6057, "step": 98976 }, { "epoch": 19.01, "learning_rate": 0.001, "loss": 2.5963, "step": 98988 }, { "epoch": 19.01, "learning_rate": 0.001, "loss": 2.5941, "step": 99000 }, { "epoch": 19.01, "learning_rate": 0.001, "loss": 2.5905, "step": 99012 }, { "epoch": 19.01, "learning_rate": 0.001, "loss": 2.5911, "step": 99024 }, { "epoch": 19.02, "learning_rate": 0.001, "loss": 2.5849, "step": 99036 }, { "epoch": 19.02, "learning_rate": 0.001, "loss": 2.5854, "step": 99048 }, { "epoch": 19.02, "learning_rate": 0.001, "loss": 2.6008, "step": 99060 }, { "epoch": 19.02, "learning_rate": 0.001, "loss": 2.6004, "step": 99072 }, { "epoch": 19.03, "learning_rate": 0.001, "loss": 2.5953, "step": 99084 }, { "epoch": 19.03, "learning_rate": 0.001, "loss": 2.6051, "step": 99096 }, { "epoch": 19.03, "learning_rate": 0.001, "loss": 2.5995, "step": 99108 }, { "epoch": 19.03, "learning_rate": 0.001, "loss": 2.5966, "step": 99120 }, { "epoch": 19.03, "learning_rate": 0.001, "loss": 2.5986, "step": 99132 }, { "epoch": 19.04, "learning_rate": 0.001, "loss": 2.603, "step": 99144 }, { "epoch": 19.04, "learning_rate": 0.001, "loss": 2.5982, "step": 99156 }, { "epoch": 19.04, "learning_rate": 0.001, "loss": 2.6001, "step": 99168 }, { "epoch": 19.04, "learning_rate": 0.001, "loss": 2.5978, "step": 99180 }, { "epoch": 19.05, "learning_rate": 0.001, "loss": 2.5911, "step": 99192 }, { "epoch": 19.05, "learning_rate": 0.001, "loss": 2.5974, "step": 99204 }, { "epoch": 19.05, "learning_rate": 0.001, "loss": 2.5962, "step": 99216 }, { "epoch": 19.05, "learning_rate": 0.001, "loss": 2.5901, "step": 99228 }, { "epoch": 19.06, "learning_rate": 0.001, "loss": 2.5936, "step": 99240 }, { "epoch": 19.06, "learning_rate": 0.001, "loss": 2.5901, "step": 99252 }, { "epoch": 19.06, "learning_rate": 0.001, "loss": 2.5891, "step": 99264 }, { "epoch": 19.06, "learning_rate": 0.001, "loss": 2.5897, "step": 99276 }, { "epoch": 19.06, "learning_rate": 0.001, "loss": 2.5926, "step": 99288 }, { "epoch": 19.07, "learning_rate": 0.001, "loss": 2.6003, "step": 99300 }, { "epoch": 19.07, "learning_rate": 0.001, "loss": 2.5813, "step": 99312 }, { "epoch": 19.07, "learning_rate": 0.001, "loss": 2.5963, "step": 99324 }, { "epoch": 19.07, "learning_rate": 0.001, "loss": 2.6078, "step": 99336 }, { "epoch": 19.08, "learning_rate": 0.001, "loss": 2.5982, "step": 99348 }, { "epoch": 19.08, "learning_rate": 0.001, "loss": 2.5907, "step": 99360 }, { "epoch": 19.08, "learning_rate": 0.001, "loss": 2.6052, "step": 99372 }, { "epoch": 19.08, "eval_ag_news_accuracy": 0.317625, "eval_ag_news_bleu_score": 4.712855237771482, "eval_ag_news_bleu_score_sem": 0.15325488685056488, "eval_ag_news_emb_cos_sim": 0.7918837070465088, "eval_ag_news_emb_cos_sim_sem": 0.008232398381296666, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6211390495300293, "eval_ag_news_n_ngrams_match_1": 13.594, "eval_ag_news_n_ngrams_match_2": 2.928, "eval_ag_news_n_ngrams_match_3": 0.854, "eval_ag_news_num_pred_words": 46.238, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.38012139184477, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.33735291546956647, "eval_ag_news_runtime": 11.0364, "eval_ag_news_samples_per_second": 45.304, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.34168681669394485, "eval_ag_news_token_set_f1_sem": 0.004451713103573308, "eval_ag_news_token_set_precision": 0.3239396791391766, "eval_ag_news_token_set_recall": 0.3780113150446624, "eval_ag_news_true_num_tokens": 56.09375, "step": 99375 }, { "epoch": 19.08, "eval_anthropic_toxic_prompts_accuracy": 0.11165625, "eval_anthropic_toxic_prompts_bleu_score": 2.8753528057104587, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11200038990985718, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.658082127571106, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009482624714822111, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2821171283721924, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.878, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.706, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.616, "eval_anthropic_toxic_prompts_num_pred_words": 46.5, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.632096623720326, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20352867002517688, "eval_anthropic_toxic_prompts_runtime": 10.6035, "eval_anthropic_toxic_prompts_samples_per_second": 47.154, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.34948299988562537, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00660868124375518, "eval_anthropic_toxic_prompts_token_set_precision": 0.4169705698003172, "eval_anthropic_toxic_prompts_token_set_recall": 0.32867003411548057, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 99375 }, { "epoch": 19.08, "eval_arxiv_accuracy": 0.3436875, "eval_arxiv_bleu_score": 4.216196809279154, "eval_arxiv_bleu_score_sem": 0.11575719857956805, "eval_arxiv_emb_cos_sim": 0.7440145015716553, "eval_arxiv_emb_cos_sim_sem": 0.009824001675414702, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4522480964660645, "eval_arxiv_n_ngrams_match_1": 14.718, "eval_arxiv_n_ngrams_match_2": 2.814, "eval_arxiv_n_ngrams_match_3": 0.63, "eval_arxiv_num_pred_words": 40.53, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.57128788957074, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3484484329841866, "eval_arxiv_runtime": 10.9272, "eval_arxiv_samples_per_second": 45.758, "eval_arxiv_steps_per_second": 0.092, "eval_arxiv_token_set_f1": 0.34736528048826953, "eval_arxiv_token_set_f1_sem": 0.00430616281862336, "eval_arxiv_token_set_precision": 0.29433303516015946, "eval_arxiv_token_set_recall": 0.44004642289141843, "eval_arxiv_true_num_tokens": 64.0, "step": 99375 }, { "epoch": 19.08, "eval_python_code_alpaca_accuracy": 0.15725, "eval_python_code_alpaca_bleu_score": 4.166026646744993, "eval_python_code_alpaca_bleu_score_sem": 0.12735284290301357, "eval_python_code_alpaca_emb_cos_sim": 0.7433856725692749, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009554076135393729, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9262959957122803, "eval_python_code_alpaca_n_ngrams_match_1": 9.504, "eval_python_code_alpaca_n_ngrams_match_2": 2.636, "eval_python_code_alpaca_n_ngrams_match_3": 0.78, "eval_python_code_alpaca_num_pred_words": 43.364, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.65839158279246, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3208857702007807, "eval_python_code_alpaca_runtime": 10.6558, "eval_python_code_alpaca_samples_per_second": 46.923, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.47143257862849414, "eval_python_code_alpaca_token_set_f1_sem": 0.005703811651620781, "eval_python_code_alpaca_token_set_precision": 0.5195892191390774, "eval_python_code_alpaca_token_set_recall": 0.4500885233241683, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 99375 }, { "epoch": 19.08, "eval_wikibio_accuracy": 0.318125, "eval_wikibio_bleu_score": 5.860758974171116, "eval_wikibio_bleu_score_sem": 0.19817267944771857, "eval_wikibio_emb_cos_sim": 0.7324365377426147, "eval_wikibio_emb_cos_sim_sem": 0.009563551273774585, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8286211490631104, "eval_wikibio_n_ngrams_match_1": 10.298, "eval_wikibio_n_ngrams_match_2": 3.386, "eval_wikibio_n_ngrams_match_3": 1.222, "eval_wikibio_num_pred_words": 37.216, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.99906862783368, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3545744978027274, "eval_wikibio_runtime": 15.6524, "eval_wikibio_samples_per_second": 31.944, "eval_wikibio_steps_per_second": 0.064, "eval_wikibio_token_set_f1": 0.3235636798898564, "eval_wikibio_token_set_f1_sem": 0.005042012481950076, "eval_wikibio_token_set_precision": 0.3329991077921081, "eval_wikibio_token_set_recall": 0.3292837209337521, "eval_wikibio_true_num_tokens": 61.1328125, "step": 99375 }, { "epoch": 19.08, "eval_nq_accuracy": 0.5224375, "eval_nq_bleu_score": 11.598419595691514, "eval_nq_bleu_score_sem": 0.47969006378495227, "eval_nq_emb_cos_sim": 0.8233333826065063, "eval_nq_emb_cos_sim_sem": 0.007962877846148103, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2288315296173096, "eval_nq_n_ngrams_match_1": 22.732, "eval_nq_n_ngrams_match_2": 8.392, "eval_nq_n_ngrams_match_3": 3.9, "eval_nq_num_pred_words": 49.068, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.289005807595395, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4385976121395514, "eval_nq_runtime": 11.1057, "eval_nq_samples_per_second": 45.022, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.45589403224100833, "eval_nq_token_set_f1_sem": 0.005147651387653305, "eval_nq_token_set_precision": 0.41086280519093743, "eval_nq_token_set_recall": 0.5211958959232188, "eval_nq_true_num_tokens": 64.0, "step": 99375 }, { "epoch": 19.08, "learning_rate": 0.001, "loss": 2.5921, "step": 99384 }, { "epoch": 19.09, "learning_rate": 0.001, "loss": 2.5944, "step": 99396 }, { "epoch": 19.09, "learning_rate": 0.001, "loss": 2.5987, "step": 99408 }, { "epoch": 19.09, "learning_rate": 0.001, "loss": 2.5999, "step": 99420 }, { "epoch": 19.09, "learning_rate": 0.001, "loss": 2.5997, "step": 99432 }, { "epoch": 19.09, "learning_rate": 0.001, "loss": 2.5934, "step": 99444 }, { "epoch": 19.1, "learning_rate": 0.001, "loss": 2.5931, "step": 99456 }, { "epoch": 19.1, "learning_rate": 0.001, "loss": 2.5979, "step": 99468 }, { "epoch": 19.1, "learning_rate": 0.001, "loss": 2.5973, "step": 99480 }, { "epoch": 19.1, "learning_rate": 0.001, "loss": 2.6049, "step": 99492 }, { "epoch": 19.11, "learning_rate": 0.001, "loss": 2.6011, "step": 99504 }, { "epoch": 19.11, "learning_rate": 0.001, "loss": 2.5911, "step": 99516 }, { "epoch": 19.11, "learning_rate": 0.001, "loss": 2.5884, "step": 99528 }, { "epoch": 19.11, "learning_rate": 0.001, "loss": 2.5947, "step": 99540 }, { "epoch": 19.12, "learning_rate": 0.001, "loss": 2.5941, "step": 99552 }, { "epoch": 19.12, "learning_rate": 0.001, "loss": 2.5863, "step": 99564 }, { "epoch": 19.12, "learning_rate": 0.001, "loss": 2.5904, "step": 99576 }, { "epoch": 19.12, "learning_rate": 0.001, "loss": 2.5975, "step": 99588 }, { "epoch": 19.12, "learning_rate": 0.001, "loss": 2.5994, "step": 99600 }, { "epoch": 19.13, "learning_rate": 0.001, "loss": 2.6068, "step": 99612 }, { "epoch": 19.13, "learning_rate": 0.001, "loss": 2.5928, "step": 99624 }, { "epoch": 19.13, "learning_rate": 0.001, "loss": 2.5895, "step": 99636 }, { "epoch": 19.13, "learning_rate": 0.001, "loss": 2.5998, "step": 99648 }, { "epoch": 19.14, "learning_rate": 0.001, "loss": 2.5972, "step": 99660 }, { "epoch": 19.14, "learning_rate": 0.001, "loss": 2.601, "step": 99672 }, { "epoch": 19.14, "learning_rate": 0.001, "loss": 2.5941, "step": 99684 }, { "epoch": 19.14, "learning_rate": 0.001, "loss": 2.5965, "step": 99696 }, { "epoch": 19.15, "learning_rate": 0.001, "loss": 2.6021, "step": 99708 }, { "epoch": 19.15, "learning_rate": 0.001, "loss": 2.5908, "step": 99720 }, { "epoch": 19.15, "learning_rate": 0.001, "loss": 2.5906, "step": 99732 }, { "epoch": 19.15, "learning_rate": 0.001, "loss": 2.6066, "step": 99744 }, { "epoch": 19.15, "learning_rate": 0.001, "loss": 2.6026, "step": 99756 }, { "epoch": 19.16, "learning_rate": 0.001, "loss": 2.5869, "step": 99768 }, { "epoch": 19.16, "learning_rate": 0.001, "loss": 2.5925, "step": 99780 }, { "epoch": 19.16, "learning_rate": 0.001, "loss": 2.5909, "step": 99792 }, { "epoch": 19.16, "learning_rate": 0.001, "loss": 2.5873, "step": 99804 }, { "epoch": 19.17, "learning_rate": 0.001, "loss": 2.5933, "step": 99816 }, { "epoch": 19.17, "learning_rate": 0.001, "loss": 2.5898, "step": 99828 }, { "epoch": 19.17, "learning_rate": 0.001, "loss": 2.6027, "step": 99840 }, { "epoch": 19.17, "learning_rate": 0.001, "loss": 2.5929, "step": 99852 }, { "epoch": 19.18, "learning_rate": 0.001, "loss": 2.5975, "step": 99864 }, { "epoch": 19.18, "learning_rate": 0.001, "loss": 2.6031, "step": 99876 }, { "epoch": 19.18, "learning_rate": 0.001, "loss": 2.6079, "step": 99888 }, { "epoch": 19.18, "learning_rate": 0.001, "loss": 2.5843, "step": 99900 }, { "epoch": 19.18, "learning_rate": 0.001, "loss": 2.5978, "step": 99912 }, { "epoch": 19.19, "learning_rate": 0.001, "loss": 2.5982, "step": 99924 }, { "epoch": 19.19, "learning_rate": 0.001, "loss": 2.5919, "step": 99936 }, { "epoch": 19.19, "learning_rate": 0.001, "loss": 2.5967, "step": 99948 }, { "epoch": 19.19, "learning_rate": 0.001, "loss": 2.5942, "step": 99960 }, { "epoch": 19.2, "learning_rate": 0.001, "loss": 2.5979, "step": 99972 }, { "epoch": 19.2, "learning_rate": 0.001, "loss": 2.5973, "step": 99984 }, { "epoch": 19.2, "learning_rate": 0.001, "loss": 2.608, "step": 99996 }, { "epoch": 19.2, "eval_ag_news_accuracy": 0.31725, "eval_ag_news_bleu_score": 4.669043197200898, "eval_ag_news_bleu_score_sem": 0.1508911951991967, "eval_ag_news_emb_cos_sim": 0.8073618412017822, "eval_ag_news_emb_cos_sim_sem": 0.0064244289719918935, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.624054193496704, "eval_ag_news_n_ngrams_match_1": 13.868, "eval_ag_news_n_ngrams_match_2": 3.068, "eval_ag_news_n_ngrams_match_3": 0.85, "eval_ag_news_num_pred_words": 46.62, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.48924881098155, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34281124891200276, "eval_ag_news_runtime": 11.0121, "eval_ag_news_samples_per_second": 45.405, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.34887217658055775, "eval_ag_news_token_set_f1_sem": 0.004424521074341578, "eval_ag_news_token_set_precision": 0.3319184970220668, "eval_ag_news_token_set_recall": 0.3841653317804411, "eval_ag_news_true_num_tokens": 56.09375, "step": 100000 }, { "epoch": 19.2, "eval_anthropic_toxic_prompts_accuracy": 0.1116875, "eval_anthropic_toxic_prompts_bleu_score": 2.9737272823415504, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10541475031914696, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6692122220993042, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009298105812041418, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1640625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.03286167651298939, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3108067512512207, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.998, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.872, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.666, "eval_anthropic_toxic_prompts_num_pred_words": 47.548, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.40722737031086, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20394249011862936, "eval_anthropic_toxic_prompts_runtime": 10.5835, "eval_anthropic_toxic_prompts_samples_per_second": 47.244, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.3476096325210851, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006783717493097351, "eval_anthropic_toxic_prompts_token_set_precision": 0.42239181915534135, "eval_anthropic_toxic_prompts_token_set_recall": 0.3264111233596592, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 100000 }, { "epoch": 19.2, "eval_arxiv_accuracy": 0.3455625, "eval_arxiv_bleu_score": 4.39862195394743, "eval_arxiv_bleu_score_sem": 0.13023088493931498, "eval_arxiv_emb_cos_sim": 0.7599332928657532, "eval_arxiv_emb_cos_sim_sem": 0.008488366729015453, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4565768241882324, "eval_arxiv_n_ngrams_match_1": 14.918, "eval_arxiv_n_ngrams_match_2": 2.89, "eval_arxiv_n_ngrams_match_3": 0.694, "eval_arxiv_num_pred_words": 40.35, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.708247615502884, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35304818574208807, "eval_arxiv_runtime": 11.2435, "eval_arxiv_samples_per_second": 44.47, "eval_arxiv_steps_per_second": 0.089, "eval_arxiv_token_set_f1": 0.3500005621030236, "eval_arxiv_token_set_f1_sem": 0.004217378558848126, "eval_arxiv_token_set_precision": 0.2987826934657279, "eval_arxiv_token_set_recall": 0.4412166514947108, "eval_arxiv_true_num_tokens": 64.0, "step": 100000 }, { "epoch": 19.2, "eval_python_code_alpaca_accuracy": 0.15534375, "eval_python_code_alpaca_bleu_score": 4.153210658729626, "eval_python_code_alpaca_bleu_score_sem": 0.1297311063473779, "eval_python_code_alpaca_emb_cos_sim": 0.7312737703323364, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011410933913337615, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.951820135116577, "eval_python_code_alpaca_n_ngrams_match_1": 9.462, "eval_python_code_alpaca_n_ngrams_match_2": 2.666, "eval_python_code_alpaca_n_ngrams_match_3": 0.812, "eval_python_code_alpaca_num_pred_words": 43.754, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.140760812737167, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.31531171332182556, "eval_python_code_alpaca_runtime": 10.547, "eval_python_code_alpaca_samples_per_second": 47.407, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.46519926251865085, "eval_python_code_alpaca_token_set_f1_sem": 0.005925378375173091, "eval_python_code_alpaca_token_set_precision": 0.517281067157932, "eval_python_code_alpaca_token_set_recall": 0.4425828900974685, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 100000 }, { "epoch": 19.2, "eval_wikibio_accuracy": 0.31925, "eval_wikibio_bleu_score": 5.865238303211395, "eval_wikibio_bleu_score_sem": 0.20600500077897738, "eval_wikibio_emb_cos_sim": 0.7343774437904358, "eval_wikibio_emb_cos_sim_sem": 0.008475737162441361, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.823119640350342, "eval_wikibio_n_ngrams_match_1": 10.14, "eval_wikibio_n_ngrams_match_2": 3.288, "eval_wikibio_n_ngrams_match_3": 1.202, "eval_wikibio_num_pred_words": 37.136, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.7466991938484, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35239889671330227, "eval_wikibio_runtime": 10.9485, "eval_wikibio_samples_per_second": 45.668, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.3188697627561331, "eval_wikibio_token_set_f1_sem": 0.0051094826566727486, "eval_wikibio_token_set_precision": 0.3285978075009286, "eval_wikibio_token_set_recall": 0.32486593198486036, "eval_wikibio_true_num_tokens": 61.1328125, "step": 100000 }, { "epoch": 19.2, "eval_nq_accuracy": 0.5225625, "eval_nq_bleu_score": 11.372865626136264, "eval_nq_bleu_score_sem": 0.4754612606808897, "eval_nq_emb_cos_sim": 0.8224873542785645, "eval_nq_emb_cos_sim_sem": 0.007489033340685773, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.229804277420044, "eval_nq_n_ngrams_match_1": 22.954, "eval_nq_n_ngrams_match_2": 8.292, "eval_nq_n_ngrams_match_3": 3.736, "eval_nq_num_pred_words": 49.326, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.298046063816154, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4418118193963947, "eval_nq_runtime": 10.9782, "eval_nq_samples_per_second": 45.545, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.458000988883285, "eval_nq_token_set_f1_sem": 0.004996959045427728, "eval_nq_token_set_precision": 0.41627873774746343, "eval_nq_token_set_recall": 0.5168683928399, "eval_nq_true_num_tokens": 64.0, "step": 100000 }, { "epoch": 19.2, "learning_rate": 0.001, "loss": 2.5868, "step": 100008 }, { "epoch": 19.21, "learning_rate": 0.001, "loss": 2.5922, "step": 100020 }, { "epoch": 19.21, "learning_rate": 0.001, "loss": 2.6018, "step": 100032 }, { "epoch": 19.21, "learning_rate": 0.001, "loss": 2.5974, "step": 100044 }, { "epoch": 19.21, "learning_rate": 0.001, "loss": 2.6042, "step": 100056 }, { "epoch": 19.21, "learning_rate": 0.001, "loss": 2.605, "step": 100068 }, { "epoch": 19.22, "learning_rate": 0.001, "loss": 2.605, "step": 100080 }, { "epoch": 19.22, "learning_rate": 0.001, "loss": 2.6, "step": 100092 }, { "epoch": 19.22, "learning_rate": 0.001, "loss": 2.5973, "step": 100104 }, { "epoch": 19.22, "learning_rate": 0.001, "loss": 2.599, "step": 100116 }, { "epoch": 19.23, "learning_rate": 0.001, "loss": 2.6065, "step": 100128 }, { "epoch": 19.23, "learning_rate": 0.001, "loss": 2.6066, "step": 100140 }, { "epoch": 19.23, "learning_rate": 0.001, "loss": 2.5916, "step": 100152 }, { "epoch": 19.23, "learning_rate": 0.001, "loss": 2.605, "step": 100164 }, { "epoch": 19.24, "learning_rate": 0.001, "loss": 2.601, "step": 100176 }, { "epoch": 19.24, "learning_rate": 0.001, "loss": 2.6045, "step": 100188 }, { "epoch": 19.24, "learning_rate": 0.001, "loss": 2.5953, "step": 100200 }, { "epoch": 19.24, "learning_rate": 0.001, "loss": 2.595, "step": 100212 }, { "epoch": 19.24, "learning_rate": 0.001, "loss": 2.6054, "step": 100224 }, { "epoch": 19.25, "learning_rate": 0.001, "loss": 2.5933, "step": 100236 }, { "epoch": 19.25, "learning_rate": 0.001, "loss": 2.5874, "step": 100248 }, { "epoch": 19.25, "learning_rate": 0.001, "loss": 2.5985, "step": 100260 }, { "epoch": 19.25, "learning_rate": 0.001, "loss": 2.5913, "step": 100272 }, { "epoch": 19.26, "learning_rate": 0.001, "loss": 2.5906, "step": 100284 }, { "epoch": 19.26, "learning_rate": 0.001, "loss": 2.5983, "step": 100296 }, { "epoch": 19.26, "learning_rate": 0.001, "loss": 2.6015, "step": 100308 }, { "epoch": 19.26, "learning_rate": 0.001, "loss": 2.6036, "step": 100320 }, { "epoch": 19.26, "learning_rate": 0.001, "loss": 2.5961, "step": 100332 }, { "epoch": 19.27, "learning_rate": 0.001, "loss": 2.5982, "step": 100344 }, { "epoch": 19.27, "learning_rate": 0.001, "loss": 2.5939, "step": 100356 }, { "epoch": 19.27, "learning_rate": 0.001, "loss": 2.6015, "step": 100368 }, { "epoch": 19.27, "learning_rate": 0.001, "loss": 2.593, "step": 100380 }, { "epoch": 19.28, "learning_rate": 0.001, "loss": 2.6088, "step": 100392 }, { "epoch": 19.28, "learning_rate": 0.001, "loss": 2.6081, "step": 100404 }, { "epoch": 19.28, "learning_rate": 0.001, "loss": 2.6071, "step": 100416 }, { "epoch": 19.28, "learning_rate": 0.001, "loss": 2.609, "step": 100428 }, { "epoch": 19.29, "learning_rate": 0.001, "loss": 2.607, "step": 100440 }, { "epoch": 19.29, "learning_rate": 0.001, "loss": 2.5966, "step": 100452 }, { "epoch": 19.29, "learning_rate": 0.001, "loss": 2.5987, "step": 100464 }, { "epoch": 19.29, "learning_rate": 0.001, "loss": 2.5967, "step": 100476 }, { "epoch": 19.29, "learning_rate": 0.001, "loss": 2.5976, "step": 100488 }, { "epoch": 19.3, "learning_rate": 0.001, "loss": 2.5996, "step": 100500 }, { "epoch": 19.3, "learning_rate": 0.001, "loss": 2.5981, "step": 100512 }, { "epoch": 19.3, "learning_rate": 0.001, "loss": 2.6017, "step": 100524 }, { "epoch": 19.3, "learning_rate": 0.001, "loss": 2.6033, "step": 100536 }, { "epoch": 19.31, "learning_rate": 0.001, "loss": 2.5974, "step": 100548 }, { "epoch": 19.31, "learning_rate": 0.001, "loss": 2.5976, "step": 100560 }, { "epoch": 19.31, "learning_rate": 0.001, "loss": 2.605, "step": 100572 }, { "epoch": 19.31, "learning_rate": 0.001, "loss": 2.6116, "step": 100584 }, { "epoch": 19.32, "learning_rate": 0.001, "loss": 2.5884, "step": 100596 }, { "epoch": 19.32, "learning_rate": 0.001, "loss": 2.6021, "step": 100608 }, { "epoch": 19.32, "learning_rate": 0.001, "loss": 2.6087, "step": 100620 }, { "epoch": 19.32, "eval_ag_news_accuracy": 0.31703125, "eval_ag_news_bleu_score": 4.591186233958706, "eval_ag_news_bleu_score_sem": 0.13914706106809424, "eval_ag_news_emb_cos_sim": 0.8013380765914917, "eval_ag_news_emb_cos_sim_sem": 0.007497183487516567, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.620837926864624, "eval_ag_news_n_ngrams_match_1": 13.58, "eval_ag_news_n_ngrams_match_2": 2.954, "eval_ag_news_n_ngrams_match_3": 0.814, "eval_ag_news_num_pred_words": 46.866, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 37.36886708460662, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3356389783524234, "eval_ag_news_runtime": 11.275, "eval_ag_news_samples_per_second": 44.346, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.34021547828123866, "eval_ag_news_token_set_f1_sem": 0.004377350883325469, "eval_ag_news_token_set_precision": 0.324636686330962, "eval_ag_news_token_set_recall": 0.3711949891491547, "eval_ag_news_true_num_tokens": 56.09375, "step": 100625 }, { "epoch": 19.32, "eval_anthropic_toxic_prompts_accuracy": 0.11203125, "eval_anthropic_toxic_prompts_bleu_score": 3.044232461191681, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11817635203924869, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6540555357933044, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010932708111263676, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3046493530273438, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.022, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.856, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.678, "eval_anthropic_toxic_prompts_num_pred_words": 47.528, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.238988645136953, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20568465539913397, "eval_anthropic_toxic_prompts_runtime": 11.386, "eval_anthropic_toxic_prompts_samples_per_second": 43.913, "eval_anthropic_toxic_prompts_steps_per_second": 0.088, "eval_anthropic_toxic_prompts_token_set_f1": 0.3536839649941146, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006587056755202966, "eval_anthropic_toxic_prompts_token_set_precision": 0.42552078491173967, "eval_anthropic_toxic_prompts_token_set_recall": 0.33493448942325127, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 100625 }, { "epoch": 19.32, "eval_arxiv_accuracy": 0.34175, "eval_arxiv_bleu_score": 4.139237394545484, "eval_arxiv_bleu_score_sem": 0.10746435437629161, "eval_arxiv_emb_cos_sim": 0.752979576587677, "eval_arxiv_emb_cos_sim_sem": 0.008673793106128149, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.471484899520874, "eval_arxiv_n_ngrams_match_1": 14.94, "eval_arxiv_n_ngrams_match_2": 2.85, "eval_arxiv_n_ngrams_match_3": 0.59, "eval_arxiv_num_pred_words": 41.05, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 32.184497725333706, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3529319485637437, "eval_arxiv_runtime": 11.3186, "eval_arxiv_samples_per_second": 44.175, "eval_arxiv_steps_per_second": 0.088, "eval_arxiv_token_set_f1": 0.3476968869613086, "eval_arxiv_token_set_f1_sem": 0.0039758746415613, "eval_arxiv_token_set_precision": 0.2994408277086143, "eval_arxiv_token_set_recall": 0.42842181448168554, "eval_arxiv_true_num_tokens": 64.0, "step": 100625 }, { "epoch": 19.32, "eval_python_code_alpaca_accuracy": 0.15928125, "eval_python_code_alpaca_bleu_score": 4.256231394956872, "eval_python_code_alpaca_bleu_score_sem": 0.13961291586215355, "eval_python_code_alpaca_emb_cos_sim": 0.7518568634986877, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008534636724690828, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9363017082214355, "eval_python_code_alpaca_n_ngrams_match_1": 9.668, "eval_python_code_alpaca_n_ngrams_match_2": 2.712, "eval_python_code_alpaca_n_ngrams_match_3": 0.894, "eval_python_code_alpaca_num_pred_words": 44.758, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.846019193471047, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.316631569495196, "eval_python_code_alpaca_runtime": 11.4226, "eval_python_code_alpaca_samples_per_second": 43.773, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.46861993383758355, "eval_python_code_alpaca_token_set_f1_sem": 0.005677920666004615, "eval_python_code_alpaca_token_set_precision": 0.526921459478536, "eval_python_code_alpaca_token_set_recall": 0.44183253948713774, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 100625 }, { "epoch": 19.32, "eval_wikibio_accuracy": 0.316625, "eval_wikibio_bleu_score": 5.700380231621618, "eval_wikibio_bleu_score_sem": 0.21207477886091958, "eval_wikibio_emb_cos_sim": 0.7466273307800293, "eval_wikibio_emb_cos_sim_sem": 0.008035836675098492, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.847433567047119, "eval_wikibio_n_ngrams_match_1": 10.198, "eval_wikibio_n_ngrams_match_2": 3.322, "eval_wikibio_n_ngrams_match_3": 1.138, "eval_wikibio_num_pred_words": 37.086, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.87261331497524, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3532399034144793, "eval_wikibio_runtime": 10.6071, "eval_wikibio_samples_per_second": 47.138, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.31856224041107567, "eval_wikibio_token_set_f1_sem": 0.005105270663572564, "eval_wikibio_token_set_precision": 0.3298059710300844, "eval_wikibio_token_set_recall": 0.32245833767170023, "eval_wikibio_true_num_tokens": 61.1328125, "step": 100625 }, { "epoch": 19.32, "eval_nq_accuracy": 0.52084375, "eval_nq_bleu_score": 11.489144494353248, "eval_nq_bleu_score_sem": 0.48727283262135757, "eval_nq_emb_cos_sim": 0.8208481073379517, "eval_nq_emb_cos_sim_sem": 0.007484526920617647, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.231174945831299, "eval_nq_n_ngrams_match_1": 22.694, "eval_nq_n_ngrams_match_2": 8.304, "eval_nq_n_ngrams_match_3": 3.826, "eval_nq_num_pred_words": 49.176, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.310799340102026, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43799142477360303, "eval_nq_runtime": 11.0215, "eval_nq_samples_per_second": 45.366, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4524631400963689, "eval_nq_token_set_f1_sem": 0.005023237513942982, "eval_nq_token_set_precision": 0.41001493006649997, "eval_nq_token_set_recall": 0.5134872668866016, "eval_nq_true_num_tokens": 64.0, "step": 100625 }, { "epoch": 19.32, "learning_rate": 0.001, "loss": 2.6002, "step": 100632 }, { "epoch": 19.32, "learning_rate": 0.001, "loss": 2.5928, "step": 100644 }, { "epoch": 19.33, "learning_rate": 0.001, "loss": 2.5891, "step": 100656 }, { "epoch": 19.33, "learning_rate": 0.001, "loss": 2.6001, "step": 100668 }, { "epoch": 19.33, "learning_rate": 0.001, "loss": 2.6033, "step": 100680 }, { "epoch": 19.33, "learning_rate": 0.001, "loss": 2.6068, "step": 100692 }, { "epoch": 19.34, "learning_rate": 0.001, "loss": 2.6047, "step": 100704 }, { "epoch": 19.34, "learning_rate": 0.001, "loss": 2.6049, "step": 100716 }, { "epoch": 19.34, "learning_rate": 0.001, "loss": 2.5925, "step": 100728 }, { "epoch": 19.34, "learning_rate": 0.001, "loss": 2.6, "step": 100740 }, { "epoch": 19.35, "learning_rate": 0.001, "loss": 2.6001, "step": 100752 }, { "epoch": 19.35, "learning_rate": 0.001, "loss": 2.6011, "step": 100764 }, { "epoch": 19.35, "learning_rate": 0.001, "loss": 2.5945, "step": 100776 }, { "epoch": 19.35, "learning_rate": 0.001, "loss": 2.5971, "step": 100788 }, { "epoch": 19.35, "learning_rate": 0.001, "loss": 2.599, "step": 100800 }, { "epoch": 19.36, "learning_rate": 0.001, "loss": 2.5923, "step": 100812 }, { "epoch": 19.36, "learning_rate": 0.001, "loss": 2.6074, "step": 100824 }, { "epoch": 19.36, "learning_rate": 0.001, "loss": 2.595, "step": 100836 }, { "epoch": 19.36, "learning_rate": 0.001, "loss": 2.6008, "step": 100848 }, { "epoch": 19.37, "learning_rate": 0.001, "loss": 2.5972, "step": 100860 }, { "epoch": 19.37, "learning_rate": 0.001, "loss": 2.5999, "step": 100872 }, { "epoch": 19.37, "learning_rate": 0.001, "loss": 2.5996, "step": 100884 }, { "epoch": 19.37, "learning_rate": 0.001, "loss": 2.5911, "step": 100896 }, { "epoch": 19.38, "learning_rate": 0.001, "loss": 2.61, "step": 100908 }, { "epoch": 19.38, "learning_rate": 0.001, "loss": 2.5872, "step": 100920 }, { "epoch": 19.38, "learning_rate": 0.001, "loss": 2.5941, "step": 100932 }, { "epoch": 19.38, "learning_rate": 0.001, "loss": 2.6022, "step": 100944 }, { "epoch": 19.38, "learning_rate": 0.001, "loss": 2.6003, "step": 100956 }, { "epoch": 19.39, "learning_rate": 0.001, "loss": 2.6016, "step": 100968 }, { "epoch": 19.39, "learning_rate": 0.001, "loss": 2.5941, "step": 100980 }, { "epoch": 19.39, "learning_rate": 0.001, "loss": 2.5918, "step": 100992 }, { "epoch": 19.39, "learning_rate": 0.001, "loss": 2.6033, "step": 101004 }, { "epoch": 19.4, "learning_rate": 0.001, "loss": 2.5971, "step": 101016 }, { "epoch": 19.4, "learning_rate": 0.001, "loss": 2.6012, "step": 101028 }, { "epoch": 19.4, "learning_rate": 0.001, "loss": 2.5965, "step": 101040 }, { "epoch": 19.4, "learning_rate": 0.001, "loss": 2.5916, "step": 101052 }, { "epoch": 19.41, "learning_rate": 0.001, "loss": 2.5967, "step": 101064 }, { "epoch": 19.41, "learning_rate": 0.001, "loss": 2.6005, "step": 101076 }, { "epoch": 19.41, "learning_rate": 0.001, "loss": 2.6005, "step": 101088 }, { "epoch": 19.41, "learning_rate": 0.001, "loss": 2.5863, "step": 101100 }, { "epoch": 19.41, "learning_rate": 0.001, "loss": 2.5929, "step": 101112 }, { "epoch": 19.42, "learning_rate": 0.001, "loss": 2.5892, "step": 101124 }, { "epoch": 19.42, "learning_rate": 0.001, "loss": 2.6017, "step": 101136 }, { "epoch": 19.42, "learning_rate": 0.001, "loss": 2.6024, "step": 101148 }, { "epoch": 19.42, "learning_rate": 0.001, "loss": 2.5972, "step": 101160 }, { "epoch": 19.43, "learning_rate": 0.001, "loss": 2.5931, "step": 101172 }, { "epoch": 19.43, "learning_rate": 0.001, "loss": 2.6115, "step": 101184 }, { "epoch": 19.43, "learning_rate": 0.001, "loss": 2.6051, "step": 101196 }, { "epoch": 19.43, "learning_rate": 0.001, "loss": 2.5975, "step": 101208 }, { "epoch": 19.44, "learning_rate": 0.001, "loss": 2.5996, "step": 101220 }, { "epoch": 19.44, "learning_rate": 0.001, "loss": 2.6016, "step": 101232 }, { "epoch": 19.44, "learning_rate": 0.001, "loss": 2.5922, "step": 101244 }, { "epoch": 19.44, "eval_ag_news_accuracy": 0.31871875, "eval_ag_news_bleu_score": 4.614082710993376, "eval_ag_news_bleu_score_sem": 0.1413972830854405, "eval_ag_news_emb_cos_sim": 0.7978829145431519, "eval_ag_news_emb_cos_sim_sem": 0.00735936460997496, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5990102291107178, "eval_ag_news_n_ngrams_match_1": 13.86, "eval_ag_news_n_ngrams_match_2": 2.986, "eval_ag_news_n_ngrams_match_3": 0.81, "eval_ag_news_num_pred_words": 47.018, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.562028497378094, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3409600664235183, "eval_ag_news_runtime": 11.3183, "eval_ag_news_samples_per_second": 44.176, "eval_ag_news_steps_per_second": 0.088, "eval_ag_news_token_set_f1": 0.34765595066331473, "eval_ag_news_token_set_f1_sem": 0.0043305966468352155, "eval_ag_news_token_set_precision": 0.33083606207194277, "eval_ag_news_token_set_recall": 0.38215795902851046, "eval_ag_news_true_num_tokens": 56.09375, "step": 101250 }, { "epoch": 19.44, "eval_anthropic_toxic_prompts_accuracy": 0.1124375, "eval_anthropic_toxic_prompts_bleu_score": 2.9389724036588585, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11049165821692555, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6543651223182678, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010366423713215812, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.269542932510376, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.964, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.8, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.64, "eval_anthropic_toxic_prompts_num_pred_words": 47.854, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.29931603338497, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20547194012002334, "eval_anthropic_toxic_prompts_runtime": 10.6576, "eval_anthropic_toxic_prompts_samples_per_second": 46.915, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.3467155327361048, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006458522233706072, "eval_anthropic_toxic_prompts_token_set_precision": 0.42202739861726735, "eval_anthropic_toxic_prompts_token_set_recall": 0.323628123225362, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 101250 }, { "epoch": 19.44, "eval_arxiv_accuracy": 0.3418125, "eval_arxiv_bleu_score": 4.391075653340749, "eval_arxiv_bleu_score_sem": 0.12009580445282984, "eval_arxiv_emb_cos_sim": 0.7571845054626465, "eval_arxiv_emb_cos_sim_sem": 0.006659691363290688, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4609336853027344, "eval_arxiv_n_ngrams_match_1": 15.096, "eval_arxiv_n_ngrams_match_2": 2.952, "eval_arxiv_n_ngrams_match_3": 0.682, "eval_arxiv_num_pred_words": 41.31, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.846697430849048, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3576581303122379, "eval_arxiv_runtime": 11.1349, "eval_arxiv_samples_per_second": 44.904, "eval_arxiv_steps_per_second": 0.09, "eval_arxiv_token_set_f1": 0.35162833277337363, "eval_arxiv_token_set_f1_sem": 0.004082675359765557, "eval_arxiv_token_set_precision": 0.30323196926083246, "eval_arxiv_token_set_recall": 0.4329976120264988, "eval_arxiv_true_num_tokens": 64.0, "step": 101250 }, { "epoch": 19.44, "eval_python_code_alpaca_accuracy": 0.157375, "eval_python_code_alpaca_bleu_score": 4.542335939915411, "eval_python_code_alpaca_bleu_score_sem": 0.15324601552301118, "eval_python_code_alpaca_emb_cos_sim": 0.7516502141952515, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007781579226011444, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.940692663192749, "eval_python_code_alpaca_n_ngrams_match_1": 9.658, "eval_python_code_alpaca_n_ngrams_match_2": 2.812, "eval_python_code_alpaca_n_ngrams_match_3": 0.938, "eval_python_code_alpaca_num_pred_words": 44.206, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.928953161547238, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3248172631746263, "eval_python_code_alpaca_runtime": 11.2646, "eval_python_code_alpaca_samples_per_second": 44.387, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.47216346999724057, "eval_python_code_alpaca_token_set_f1_sem": 0.00526295892577975, "eval_python_code_alpaca_token_set_precision": 0.531452324383423, "eval_python_code_alpaca_token_set_recall": 0.4457535644804907, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 101250 }, { "epoch": 19.44, "eval_wikibio_accuracy": 0.31678125, "eval_wikibio_bleu_score": 5.742427462420765, "eval_wikibio_bleu_score_sem": 0.19567733220639472, "eval_wikibio_emb_cos_sim": 0.7237722873687744, "eval_wikibio_emb_cos_sim_sem": 0.010059310823373562, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8146474361419678, "eval_wikibio_n_ngrams_match_1": 10.138, "eval_wikibio_n_ngrams_match_2": 3.416, "eval_wikibio_n_ngrams_match_3": 1.178, "eval_wikibio_num_pred_words": 36.86, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.360760998514806, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3513548987769217, "eval_wikibio_runtime": 10.8437, "eval_wikibio_samples_per_second": 46.11, "eval_wikibio_steps_per_second": 0.092, "eval_wikibio_token_set_f1": 0.31881149895725247, "eval_wikibio_token_set_f1_sem": 0.00526758127228664, "eval_wikibio_token_set_precision": 0.3293605004608683, "eval_wikibio_token_set_recall": 0.32457429815562655, "eval_wikibio_true_num_tokens": 61.1328125, "step": 101250 }, { "epoch": 19.44, "eval_nq_accuracy": 0.52209375, "eval_nq_bleu_score": 11.725716604927474, "eval_nq_bleu_score_sem": 0.4813356634441808, "eval_nq_emb_cos_sim": 0.8301323652267456, "eval_nq_emb_cos_sim_sem": 0.006866184579213749, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2232437133789062, "eval_nq_n_ngrams_match_1": 23.148, "eval_nq_n_ngrams_match_2": 8.564, "eval_nq_n_ngrams_match_3": 3.906, "eval_nq_num_pred_words": 49.472, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.237245298890054, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44575824014735066, "eval_nq_runtime": 11.2649, "eval_nq_samples_per_second": 44.386, "eval_nq_steps_per_second": 0.089, "eval_nq_token_set_f1": 0.46191403786435464, "eval_nq_token_set_f1_sem": 0.005112620923326513, "eval_nq_token_set_precision": 0.41927793908568967, "eval_nq_token_set_recall": 0.5215706163570686, "eval_nq_true_num_tokens": 64.0, "step": 101250 }, { "epoch": 19.44, "learning_rate": 0.001, "loss": 2.5934, "step": 101256 }, { "epoch": 19.44, "learning_rate": 0.001, "loss": 2.591, "step": 101268 }, { "epoch": 19.45, "learning_rate": 0.001, "loss": 2.5899, "step": 101280 }, { "epoch": 19.45, "learning_rate": 0.001, "loss": 2.5953, "step": 101292 }, { "epoch": 19.45, "learning_rate": 0.001, "loss": 2.5971, "step": 101304 }, { "epoch": 19.45, "learning_rate": 0.001, "loss": 2.6034, "step": 101316 }, { "epoch": 19.46, "learning_rate": 0.001, "loss": 2.5992, "step": 101328 }, { "epoch": 19.46, "learning_rate": 0.001, "loss": 2.5907, "step": 101340 }, { "epoch": 19.46, "learning_rate": 0.001, "loss": 2.5917, "step": 101352 }, { "epoch": 19.46, "learning_rate": 0.001, "loss": 2.5977, "step": 101364 }, { "epoch": 19.47, "learning_rate": 0.001, "loss": 2.5986, "step": 101376 }, { "epoch": 19.47, "learning_rate": 0.001, "loss": 2.5994, "step": 101388 }, { "epoch": 19.47, "learning_rate": 0.001, "loss": 2.592, "step": 101400 }, { "epoch": 19.47, "learning_rate": 0.001, "loss": 2.5924, "step": 101412 }, { "epoch": 19.47, "learning_rate": 0.001, "loss": 2.5973, "step": 101424 }, { "epoch": 19.48, "learning_rate": 0.001, "loss": 2.6041, "step": 101436 }, { "epoch": 19.48, "learning_rate": 0.001, "loss": 2.6014, "step": 101448 }, { "epoch": 19.48, "learning_rate": 0.001, "loss": 2.602, "step": 101460 }, { "epoch": 19.48, "learning_rate": 0.001, "loss": 2.615, "step": 101472 }, { "epoch": 19.49, "learning_rate": 0.001, "loss": 2.5996, "step": 101484 }, { "epoch": 19.49, "learning_rate": 0.001, "loss": 2.6026, "step": 101496 }, { "epoch": 19.49, "learning_rate": 0.001, "loss": 2.6084, "step": 101508 }, { "epoch": 19.49, "learning_rate": 0.001, "loss": 2.6021, "step": 101520 }, { "epoch": 19.5, "learning_rate": 0.001, "loss": 2.6056, "step": 101532 }, { "epoch": 19.5, "learning_rate": 0.001, "loss": 2.6031, "step": 101544 }, { "epoch": 19.5, "learning_rate": 0.001, "loss": 2.6007, "step": 101556 }, { "epoch": 19.5, "learning_rate": 0.001, "loss": 2.5972, "step": 101568 }, { "epoch": 19.5, "learning_rate": 0.001, "loss": 2.5995, "step": 101580 }, { "epoch": 19.51, "learning_rate": 0.001, "loss": 2.5982, "step": 101592 }, { "epoch": 19.51, "learning_rate": 0.001, "loss": 2.6039, "step": 101604 }, { "epoch": 19.51, "learning_rate": 0.001, "loss": 2.5908, "step": 101616 }, { "epoch": 19.51, "learning_rate": 0.001, "loss": 2.6104, "step": 101628 }, { "epoch": 19.52, "learning_rate": 0.001, "loss": 2.5909, "step": 101640 }, { "epoch": 19.52, "learning_rate": 0.001, "loss": 2.5996, "step": 101652 }, { "epoch": 19.52, "learning_rate": 0.001, "loss": 2.5962, "step": 101664 }, { "epoch": 19.52, "learning_rate": 0.001, "loss": 2.5991, "step": 101676 }, { "epoch": 19.53, "learning_rate": 0.001, "loss": 2.6001, "step": 101688 }, { "epoch": 19.53, "learning_rate": 0.001, "loss": 2.6006, "step": 101700 }, { "epoch": 19.53, "learning_rate": 0.001, "loss": 2.5991, "step": 101712 }, { "epoch": 19.53, "learning_rate": 0.001, "loss": 2.6124, "step": 101724 }, { "epoch": 19.53, "learning_rate": 0.001, "loss": 2.6046, "step": 101736 }, { "epoch": 19.54, "learning_rate": 0.001, "loss": 2.6032, "step": 101748 }, { "epoch": 19.54, "learning_rate": 0.001, "loss": 2.6062, "step": 101760 }, { "epoch": 19.54, "learning_rate": 0.001, "loss": 2.5897, "step": 101772 }, { "epoch": 19.54, "learning_rate": 0.001, "loss": 2.5892, "step": 101784 }, { "epoch": 19.55, "learning_rate": 0.001, "loss": 2.6013, "step": 101796 }, { "epoch": 19.55, "learning_rate": 0.001, "loss": 2.6073, "step": 101808 }, { "epoch": 19.55, "learning_rate": 0.001, "loss": 2.5973, "step": 101820 }, { "epoch": 19.55, "learning_rate": 0.001, "loss": 2.5941, "step": 101832 }, { "epoch": 19.56, "learning_rate": 0.001, "loss": 2.6025, "step": 101844 }, { "epoch": 19.56, "learning_rate": 0.001, "loss": 2.5958, "step": 101856 }, { "epoch": 19.56, "learning_rate": 0.001, "loss": 2.6, "step": 101868 }, { "epoch": 19.56, "eval_ag_news_accuracy": 0.319, "eval_ag_news_bleu_score": 4.704650097639762, "eval_ag_news_bleu_score_sem": 0.15210916543100397, "eval_ag_news_emb_cos_sim": 0.8024437427520752, "eval_ag_news_emb_cos_sim_sem": 0.007323365932990592, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5986874103546143, "eval_ag_news_n_ngrams_match_1": 13.676, "eval_ag_news_n_ngrams_match_2": 3.044, "eval_ag_news_n_ngrams_match_3": 0.858, "eval_ag_news_num_pred_words": 46.918, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.550227493713095, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3385121464470692, "eval_ag_news_runtime": 11.4467, "eval_ag_news_samples_per_second": 43.681, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.34422470334528726, "eval_ag_news_token_set_f1_sem": 0.004504203028635901, "eval_ag_news_token_set_precision": 0.3248866218184864, "eval_ag_news_token_set_recall": 0.3855057975397497, "eval_ag_news_true_num_tokens": 56.09375, "step": 101875 }, { "epoch": 19.56, "eval_anthropic_toxic_prompts_accuracy": 0.11259375, "eval_anthropic_toxic_prompts_bleu_score": 3.002170282392957, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11276446455027171, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6671350002288818, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00911122405156608, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.296062707901001, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.066, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.834, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.66, "eval_anthropic_toxic_prompts_num_pred_words": 47.59, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.00609841982357, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20791827402255775, "eval_anthropic_toxic_prompts_runtime": 10.7394, "eval_anthropic_toxic_prompts_samples_per_second": 46.557, "eval_anthropic_toxic_prompts_steps_per_second": 0.093, "eval_anthropic_toxic_prompts_token_set_f1": 0.3571139406219928, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006809083476808562, "eval_anthropic_toxic_prompts_token_set_precision": 0.42852530404330225, "eval_anthropic_toxic_prompts_token_set_recall": 0.3345380694582043, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 101875 }, { "epoch": 19.56, "eval_arxiv_accuracy": 0.3444375, "eval_arxiv_bleu_score": 4.254292047209247, "eval_arxiv_bleu_score_sem": 0.12241767385326932, "eval_arxiv_emb_cos_sim": 0.7594126462936401, "eval_arxiv_emb_cos_sim_sem": 0.007300154873262233, "eval_arxiv_emb_top1_equal": 0.21875, "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4384498596191406, "eval_arxiv_n_ngrams_match_1": 14.698, "eval_arxiv_n_ngrams_match_2": 2.834, "eval_arxiv_n_ngrams_match_3": 0.668, "eval_arxiv_num_pred_words": 39.974, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.13865145586257, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3499436135823866, "eval_arxiv_runtime": 11.0466, "eval_arxiv_samples_per_second": 45.263, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.3462160504998833, "eval_arxiv_token_set_f1_sem": 0.00428577945847585, "eval_arxiv_token_set_precision": 0.29505246950577924, "eval_arxiv_token_set_recall": 0.4372360848286974, "eval_arxiv_true_num_tokens": 64.0, "step": 101875 }, { "epoch": 19.56, "eval_python_code_alpaca_accuracy": 0.1566875, "eval_python_code_alpaca_bleu_score": 4.185241386519374, "eval_python_code_alpaca_bleu_score_sem": 0.1276186431473251, "eval_python_code_alpaca_emb_cos_sim": 0.7340362668037415, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010707734819357603, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.967298984527588, "eval_python_code_alpaca_n_ngrams_match_1": 9.368, "eval_python_code_alpaca_n_ngrams_match_2": 2.626, "eval_python_code_alpaca_n_ngrams_match_3": 0.768, "eval_python_code_alpaca_num_pred_words": 42.766, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.439342657142458, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3179384309369828, "eval_python_code_alpaca_runtime": 10.686, "eval_python_code_alpaca_samples_per_second": 46.79, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.4667173101722582, "eval_python_code_alpaca_token_set_f1_sem": 0.005810604308319629, "eval_python_code_alpaca_token_set_precision": 0.5093183807723372, "eval_python_code_alpaca_token_set_recall": 0.4568380514323313, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 101875 }, { "epoch": 19.56, "eval_wikibio_accuracy": 0.31896875, "eval_wikibio_bleu_score": 5.992545877183331, "eval_wikibio_bleu_score_sem": 0.21551527606006293, "eval_wikibio_emb_cos_sim": 0.7440139055252075, "eval_wikibio_emb_cos_sim_sem": 0.008596680836965912, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.797386407852173, "eval_wikibio_n_ngrams_match_1": 10.366, "eval_wikibio_n_ngrams_match_2": 3.462, "eval_wikibio_n_ngrams_match_3": 1.258, "eval_wikibio_num_pred_words": 37.002, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.58450636944242, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35732610885301463, "eval_wikibio_runtime": 10.89, "eval_wikibio_samples_per_second": 45.914, "eval_wikibio_steps_per_second": 0.092, "eval_wikibio_token_set_f1": 0.32272176503670524, "eval_wikibio_token_set_f1_sem": 0.005234541721493347, "eval_wikibio_token_set_precision": 0.33427859464187826, "eval_wikibio_token_set_recall": 0.32624952345419367, "eval_wikibio_true_num_tokens": 61.1328125, "step": 101875 }, { "epoch": 19.56, "eval_nq_accuracy": 0.52353125, "eval_nq_bleu_score": 11.569392119807638, "eval_nq_bleu_score_sem": 0.47966337178858387, "eval_nq_emb_cos_sim": 0.8315171003341675, "eval_nq_emb_cos_sim_sem": 0.007283858530867693, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2224278450012207, "eval_nq_n_ngrams_match_1": 22.988, "eval_nq_n_ngrams_match_2": 8.41, "eval_nq_n_ngrams_match_3": 3.81, "eval_nq_num_pred_words": 49.364, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.229711996063415, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4425375167605566, "eval_nq_runtime": 12.1825, "eval_nq_samples_per_second": 41.042, "eval_nq_steps_per_second": 0.082, "eval_nq_token_set_f1": 0.4591295117525607, "eval_nq_token_set_f1_sem": 0.00494357544052548, "eval_nq_token_set_precision": 0.41454146807500586, "eval_nq_token_set_recall": 0.5213796359466645, "eval_nq_true_num_tokens": 64.0, "step": 101875 }, { "epoch": 19.56, "learning_rate": 0.001, "loss": 2.598, "step": 101880 }, { "epoch": 19.56, "learning_rate": 0.001, "loss": 2.6049, "step": 101892 }, { "epoch": 19.57, "learning_rate": 0.001, "loss": 2.5985, "step": 101904 }, { "epoch": 19.57, "learning_rate": 0.001, "loss": 2.6028, "step": 101916 }, { "epoch": 19.57, "learning_rate": 0.001, "loss": 2.6106, "step": 101928 }, { "epoch": 19.57, "learning_rate": 0.001, "loss": 2.5947, "step": 101940 }, { "epoch": 19.58, "learning_rate": 0.001, "loss": 2.6036, "step": 101952 }, { "epoch": 19.58, "learning_rate": 0.001, "loss": 2.6085, "step": 101964 }, { "epoch": 19.58, "learning_rate": 0.001, "loss": 2.5751, "step": 101976 }, { "epoch": 19.58, "learning_rate": 0.001, "loss": 2.5955, "step": 101988 }, { "epoch": 19.59, "learning_rate": 0.001, "loss": 2.6069, "step": 102000 }, { "epoch": 19.59, "learning_rate": 0.001, "loss": 2.6089, "step": 102012 }, { "epoch": 19.59, "learning_rate": 0.001, "loss": 2.6046, "step": 102024 }, { "epoch": 19.59, "learning_rate": 0.001, "loss": 2.5961, "step": 102036 }, { "epoch": 19.59, "learning_rate": 0.001, "loss": 2.6012, "step": 102048 }, { "epoch": 19.6, "learning_rate": 0.001, "loss": 2.6039, "step": 102060 }, { "epoch": 19.6, "learning_rate": 0.001, "loss": 2.6086, "step": 102072 }, { "epoch": 19.6, "learning_rate": 0.001, "loss": 2.5996, "step": 102084 }, { "epoch": 19.6, "learning_rate": 0.001, "loss": 2.6006, "step": 102096 }, { "epoch": 19.61, "learning_rate": 0.001, "loss": 2.6014, "step": 102108 }, { "epoch": 19.61, "learning_rate": 0.001, "loss": 2.601, "step": 102120 }, { "epoch": 19.61, "learning_rate": 0.001, "loss": 2.5975, "step": 102132 }, { "epoch": 19.61, "learning_rate": 0.001, "loss": 2.6014, "step": 102144 }, { "epoch": 19.62, "learning_rate": 0.001, "loss": 2.6065, "step": 102156 }, { "epoch": 19.62, "learning_rate": 0.001, "loss": 2.6078, "step": 102168 }, { "epoch": 19.62, "learning_rate": 0.001, "loss": 2.5905, "step": 102180 }, { "epoch": 19.62, "learning_rate": 0.001, "loss": 2.6107, "step": 102192 }, { "epoch": 19.62, "learning_rate": 0.001, "loss": 2.5995, "step": 102204 }, { "epoch": 19.63, "learning_rate": 0.001, "loss": 2.6061, "step": 102216 }, { "epoch": 19.63, "learning_rate": 0.001, "loss": 2.6102, "step": 102228 }, { "epoch": 19.63, "learning_rate": 0.001, "loss": 2.5967, "step": 102240 }, { "epoch": 19.63, "learning_rate": 0.001, "loss": 2.5952, "step": 102252 }, { "epoch": 19.64, "learning_rate": 0.001, "loss": 2.609, "step": 102264 }, { "epoch": 19.64, "learning_rate": 0.001, "loss": 2.6074, "step": 102276 }, { "epoch": 19.64, "learning_rate": 0.001, "loss": 2.6021, "step": 102288 }, { "epoch": 19.64, "learning_rate": 0.001, "loss": 2.5935, "step": 102300 }, { "epoch": 19.65, "learning_rate": 0.001, "loss": 2.5947, "step": 102312 }, { "epoch": 19.65, "learning_rate": 0.001, "loss": 2.6023, "step": 102324 }, { "epoch": 19.65, "learning_rate": 0.001, "loss": 2.6119, "step": 102336 }, { "epoch": 19.65, "learning_rate": 0.001, "loss": 2.5924, "step": 102348 }, { "epoch": 19.65, "learning_rate": 0.001, "loss": 2.5955, "step": 102360 }, { "epoch": 19.66, "learning_rate": 0.001, "loss": 2.603, "step": 102372 }, { "epoch": 19.66, "learning_rate": 0.001, "loss": 2.5935, "step": 102384 }, { "epoch": 19.66, "learning_rate": 0.001, "loss": 2.6022, "step": 102396 }, { "epoch": 19.66, "learning_rate": 0.001, "loss": 2.5943, "step": 102408 }, { "epoch": 19.67, "learning_rate": 0.001, "loss": 2.6027, "step": 102420 }, { "epoch": 19.67, "learning_rate": 0.001, "loss": 2.5944, "step": 102432 }, { "epoch": 19.67, "learning_rate": 0.001, "loss": 2.5984, "step": 102444 }, { "epoch": 19.67, "learning_rate": 0.001, "loss": 2.5968, "step": 102456 }, { "epoch": 19.68, "learning_rate": 0.001, "loss": 2.5983, "step": 102468 }, { "epoch": 19.68, "learning_rate": 0.001, "loss": 2.6033, "step": 102480 }, { "epoch": 19.68, "learning_rate": 0.001, "loss": 2.6004, "step": 102492 }, { "epoch": 19.68, "eval_ag_news_accuracy": 0.3173125, "eval_ag_news_bleu_score": 4.691423623628973, "eval_ag_news_bleu_score_sem": 0.15466133768682222, "eval_ag_news_emb_cos_sim": 0.7932641506195068, "eval_ag_news_emb_cos_sim_sem": 0.008623128642951006, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.60044527053833, "eval_ag_news_n_ngrams_match_1": 13.744, "eval_ag_news_n_ngrams_match_2": 2.996, "eval_ag_news_n_ngrams_match_3": 0.846, "eval_ag_news_num_pred_words": 46.362, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.61453418785928, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3402795388194745, "eval_ag_news_runtime": 10.5343, "eval_ag_news_samples_per_second": 47.464, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3445436018691345, "eval_ag_news_token_set_f1_sem": 0.004439842698434328, "eval_ag_news_token_set_precision": 0.32660452770689047, "eval_ag_news_token_set_recall": 0.382334889985528, "eval_ag_news_true_num_tokens": 56.09375, "step": 102500 }, { "epoch": 19.68, "eval_anthropic_toxic_prompts_accuracy": 0.11165625, "eval_anthropic_toxic_prompts_bleu_score": 3.038927182508756, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1166048093642569, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6699259281158447, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008529760861153724, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3088090419769287, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.014, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.846, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.674, "eval_anthropic_toxic_prompts_num_pred_words": 46.698, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.352530350572962, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20770925195551268, "eval_anthropic_toxic_prompts_runtime": 11.0456, "eval_anthropic_toxic_prompts_samples_per_second": 45.267, "eval_anthropic_toxic_prompts_steps_per_second": 0.091, "eval_anthropic_toxic_prompts_token_set_f1": 0.3506336904106731, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006533185214663563, "eval_anthropic_toxic_prompts_token_set_precision": 0.42733017352350927, "eval_anthropic_toxic_prompts_token_set_recall": 0.3232101722749988, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 102500 }, { "epoch": 19.68, "eval_arxiv_accuracy": 0.34353125, "eval_arxiv_bleu_score": 4.381685962133321, "eval_arxiv_bleu_score_sem": 0.12864199554866626, "eval_arxiv_emb_cos_sim": 0.7539971470832825, "eval_arxiv_emb_cos_sim_sem": 0.008171694407095328, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4614906311035156, "eval_arxiv_n_ngrams_match_1": 14.888, "eval_arxiv_n_ngrams_match_2": 2.958, "eval_arxiv_n_ngrams_match_3": 0.682, "eval_arxiv_num_pred_words": 40.188, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.864439255410645, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3545552164503677, "eval_arxiv_runtime": 12.2991, "eval_arxiv_samples_per_second": 40.653, "eval_arxiv_steps_per_second": 0.081, "eval_arxiv_token_set_f1": 0.35171876288349385, "eval_arxiv_token_set_f1_sem": 0.004232513574492042, "eval_arxiv_token_set_precision": 0.3019476420818245, "eval_arxiv_token_set_recall": 0.4430720504923636, "eval_arxiv_true_num_tokens": 64.0, "step": 102500 }, { "epoch": 19.68, "eval_python_code_alpaca_accuracy": 0.156875, "eval_python_code_alpaca_bleu_score": 4.579654516055341, "eval_python_code_alpaca_bleu_score_sem": 0.1453812580765918, "eval_python_code_alpaca_emb_cos_sim": 0.749637246131897, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01052159984835432, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9196114540100098, "eval_python_code_alpaca_n_ngrams_match_1": 9.754, "eval_python_code_alpaca_n_ngrams_match_2": 2.87, "eval_python_code_alpaca_n_ngrams_match_3": 0.996, "eval_python_code_alpaca_num_pred_words": 44.162, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.534084716244298, "eval_python_code_alpaca_pred_num_tokens": 62.9921875, "eval_python_code_alpaca_rouge_score": 0.3267327343477824, "eval_python_code_alpaca_runtime": 10.2488, "eval_python_code_alpaca_samples_per_second": 48.786, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4749646198966025, "eval_python_code_alpaca_token_set_f1_sem": 0.005584243114920346, "eval_python_code_alpaca_token_set_precision": 0.5326308208430273, "eval_python_code_alpaca_token_set_recall": 0.4489898794251858, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 102500 }, { "epoch": 19.68, "eval_wikibio_accuracy": 0.31878125, "eval_wikibio_bleu_score": 5.8178783076919345, "eval_wikibio_bleu_score_sem": 0.2051005964325302, "eval_wikibio_emb_cos_sim": 0.7350831031799316, "eval_wikibio_emb_cos_sim_sem": 0.009765998392240599, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8103599548339844, "eval_wikibio_n_ngrams_match_1": 10.184, "eval_wikibio_n_ngrams_match_2": 3.398, "eval_wikibio_n_ngrams_match_3": 1.202, "eval_wikibio_num_pred_words": 37.13, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.1666939104105, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3548268860363404, "eval_wikibio_runtime": 11.5237, "eval_wikibio_samples_per_second": 43.389, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.31853611382067815, "eval_wikibio_token_set_f1_sem": 0.005338818709232502, "eval_wikibio_token_set_precision": 0.331798315823681, "eval_wikibio_token_set_recall": 0.321959454461384, "eval_wikibio_true_num_tokens": 61.1328125, "step": 102500 }, { "epoch": 19.68, "eval_nq_accuracy": 0.52290625, "eval_nq_bleu_score": 11.631830093739987, "eval_nq_bleu_score_sem": 0.4752784839567903, "eval_nq_emb_cos_sim": 0.8363233804702759, "eval_nq_emb_cos_sim_sem": 0.006605236317925597, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2226545810699463, "eval_nq_n_ngrams_match_1": 23.05, "eval_nq_n_ngrams_match_2": 8.416, "eval_nq_n_ngrams_match_3": 3.862, "eval_nq_num_pred_words": 49.088, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.231804941941066, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4451820902385734, "eval_nq_runtime": 10.5124, "eval_nq_samples_per_second": 47.563, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.45973034526634554, "eval_nq_token_set_f1_sem": 0.004803582203352779, "eval_nq_token_set_precision": 0.41600693268306, "eval_nq_token_set_recall": 0.5203950705862183, "eval_nq_true_num_tokens": 64.0, "step": 102500 }, { "epoch": 19.68, "learning_rate": 0.001, "loss": 2.59, "step": 102504 }, { "epoch": 19.68, "learning_rate": 0.001, "loss": 2.6049, "step": 102516 }, { "epoch": 19.69, "learning_rate": 0.001, "loss": 2.6069, "step": 102528 }, { "epoch": 19.69, "learning_rate": 0.001, "loss": 2.5974, "step": 102540 }, { "epoch": 19.69, "learning_rate": 0.001, "loss": 2.6002, "step": 102552 }, { "epoch": 19.69, "learning_rate": 0.001, "loss": 2.6024, "step": 102564 }, { "epoch": 19.7, "learning_rate": 0.001, "loss": 2.6021, "step": 102576 }, { "epoch": 19.7, "learning_rate": 0.001, "loss": 2.6002, "step": 102588 }, { "epoch": 19.7, "learning_rate": 0.001, "loss": 2.5889, "step": 102600 }, { "epoch": 19.7, "learning_rate": 0.001, "loss": 2.6066, "step": 102612 }, { "epoch": 19.71, "learning_rate": 0.001, "loss": 2.6015, "step": 102624 }, { "epoch": 19.71, "learning_rate": 0.001, "loss": 2.5972, "step": 102636 }, { "epoch": 19.71, "learning_rate": 0.001, "loss": 2.6029, "step": 102648 }, { "epoch": 19.71, "learning_rate": 0.001, "loss": 2.599, "step": 102660 }, { "epoch": 19.71, "learning_rate": 0.001, "loss": 2.5969, "step": 102672 }, { "epoch": 19.72, "learning_rate": 0.001, "loss": 2.5982, "step": 102684 }, { "epoch": 19.72, "learning_rate": 0.001, "loss": 2.596, "step": 102696 }, { "epoch": 19.72, "learning_rate": 0.001, "loss": 2.5877, "step": 102708 }, { "epoch": 19.72, "learning_rate": 0.001, "loss": 2.6028, "step": 102720 }, { "epoch": 19.73, "learning_rate": 0.001, "loss": 2.6034, "step": 102732 }, { "epoch": 19.73, "learning_rate": 0.001, "loss": 2.5918, "step": 102744 }, { "epoch": 19.73, "learning_rate": 0.001, "loss": 2.5882, "step": 102756 }, { "epoch": 19.73, "learning_rate": 0.001, "loss": 2.6035, "step": 102768 }, { "epoch": 19.74, "learning_rate": 0.001, "loss": 2.5902, "step": 102780 }, { "epoch": 19.74, "learning_rate": 0.001, "loss": 2.6089, "step": 102792 }, { "epoch": 19.74, "learning_rate": 0.001, "loss": 2.5955, "step": 102804 }, { "epoch": 19.74, "learning_rate": 0.001, "loss": 2.5986, "step": 102816 }, { "epoch": 19.74, "learning_rate": 0.001, "loss": 2.5908, "step": 102828 }, { "epoch": 19.75, "learning_rate": 0.001, "loss": 2.6112, "step": 102840 }, { "epoch": 19.75, "learning_rate": 0.001, "loss": 2.5974, "step": 102852 }, { "epoch": 19.75, "learning_rate": 0.001, "loss": 2.6036, "step": 102864 }, { "epoch": 19.75, "learning_rate": 0.001, "loss": 2.5963, "step": 102876 }, { "epoch": 19.76, "learning_rate": 0.001, "loss": 2.5966, "step": 102888 }, { "epoch": 19.76, "learning_rate": 0.001, "loss": 2.6023, "step": 102900 }, { "epoch": 19.76, "learning_rate": 0.001, "loss": 2.5957, "step": 102912 }, { "epoch": 19.76, "learning_rate": 0.001, "loss": 2.5988, "step": 102924 }, { "epoch": 19.76, "learning_rate": 0.001, "loss": 2.5974, "step": 102936 }, { "epoch": 19.77, "learning_rate": 0.001, "loss": 2.5908, "step": 102948 }, { "epoch": 19.77, "learning_rate": 0.001, "loss": 2.5999, "step": 102960 }, { "epoch": 19.77, "learning_rate": 0.001, "loss": 2.6026, "step": 102972 }, { "epoch": 19.77, "learning_rate": 0.001, "loss": 2.6008, "step": 102984 }, { "epoch": 19.78, "learning_rate": 0.001, "loss": 2.599, "step": 102996 }, { "epoch": 19.78, "learning_rate": 0.001, "loss": 2.6026, "step": 103008 }, { "epoch": 19.78, "learning_rate": 0.001, "loss": 2.6083, "step": 103020 }, { "epoch": 19.78, "learning_rate": 0.001, "loss": 2.606, "step": 103032 }, { "epoch": 19.79, "learning_rate": 0.001, "loss": 2.5947, "step": 103044 }, { "epoch": 19.79, "learning_rate": 0.001, "loss": 2.6033, "step": 103056 }, { "epoch": 19.79, "learning_rate": 0.001, "loss": 2.6062, "step": 103068 }, { "epoch": 19.79, "learning_rate": 0.001, "loss": 2.6004, "step": 103080 }, { "epoch": 19.79, "learning_rate": 0.001, "loss": 2.6022, "step": 103092 }, { "epoch": 19.8, "learning_rate": 0.001, "loss": 2.6023, "step": 103104 }, { "epoch": 19.8, "learning_rate": 0.001, "loss": 2.5889, "step": 103116 }, { "epoch": 19.8, "eval_ag_news_accuracy": 0.31709375, "eval_ag_news_bleu_score": 4.607937684504936, "eval_ag_news_bleu_score_sem": 0.14321047998175232, "eval_ag_news_emb_cos_sim": 0.802483081817627, "eval_ag_news_emb_cos_sim_sem": 0.0072418084883994725, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.601728916168213, "eval_ag_news_n_ngrams_match_1": 13.78, "eval_ag_news_n_ngrams_match_2": 2.916, "eval_ag_news_n_ngrams_match_3": 0.77, "eval_ag_news_num_pred_words": 46.534, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.66156445329925, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34030002597252956, "eval_ag_news_runtime": 12.3707, "eval_ag_news_samples_per_second": 40.418, "eval_ag_news_steps_per_second": 0.081, "eval_ag_news_token_set_f1": 0.34443955534310594, "eval_ag_news_token_set_f1_sem": 0.004312122583042378, "eval_ag_news_token_set_precision": 0.3280786270056416, "eval_ag_news_token_set_recall": 0.37649928616567674, "eval_ag_news_true_num_tokens": 56.09375, "step": 103125 }, { "epoch": 19.8, "eval_anthropic_toxic_prompts_accuracy": 0.11309375, "eval_anthropic_toxic_prompts_bleu_score": 3.0478889488619254, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12645050526843735, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6633448600769043, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009141090362092114, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2605907917022705, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.052, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.838, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.664, "eval_anthropic_toxic_prompts_num_pred_words": 47.392, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.064931539903483, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20870707256624718, "eval_anthropic_toxic_prompts_runtime": 9.9192, "eval_anthropic_toxic_prompts_samples_per_second": 50.407, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3507397207628983, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006486808460927362, "eval_anthropic_toxic_prompts_token_set_precision": 0.42381584074695283, "eval_anthropic_toxic_prompts_token_set_recall": 0.3283925144215763, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 103125 }, { "epoch": 19.8, "eval_arxiv_accuracy": 0.34275, "eval_arxiv_bleu_score": 4.288245254818205, "eval_arxiv_bleu_score_sem": 0.11968165039450031, "eval_arxiv_emb_cos_sim": 0.7569453716278076, "eval_arxiv_emb_cos_sim_sem": 0.008195620646159356, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4457015991210938, "eval_arxiv_n_ngrams_match_1": 14.974, "eval_arxiv_n_ngrams_match_2": 2.942, "eval_arxiv_n_ngrams_match_3": 0.648, "eval_arxiv_num_pred_words": 40.822, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.365281582821886, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3566035159297212, "eval_arxiv_runtime": 10.1589, "eval_arxiv_samples_per_second": 49.218, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.34963592612515726, "eval_arxiv_token_set_f1_sem": 0.004042362921905531, "eval_arxiv_token_set_precision": 0.30014976224361234, "eval_arxiv_token_set_recall": 0.4386803905042103, "eval_arxiv_true_num_tokens": 64.0, "step": 103125 }, { "epoch": 19.8, "eval_python_code_alpaca_accuracy": 0.15715625, "eval_python_code_alpaca_bleu_score": 4.326761162920418, "eval_python_code_alpaca_bleu_score_sem": 0.14400794714798654, "eval_python_code_alpaca_emb_cos_sim": 0.7477049231529236, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01158838136519159, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9304184913635254, "eval_python_code_alpaca_n_ngrams_match_1": 9.566, "eval_python_code_alpaca_n_ngrams_match_2": 2.766, "eval_python_code_alpaca_n_ngrams_match_3": 0.856, "eval_python_code_alpaca_num_pred_words": 43.656, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.73546948845584, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3205807638372131, "eval_python_code_alpaca_runtime": 10.042, "eval_python_code_alpaca_samples_per_second": 49.791, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.46741444420144385, "eval_python_code_alpaca_token_set_f1_sem": 0.006132715892511715, "eval_python_code_alpaca_token_set_precision": 0.5229858419161856, "eval_python_code_alpaca_token_set_recall": 0.4422477184574977, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 103125 }, { "epoch": 19.8, "eval_wikibio_accuracy": 0.31846875, "eval_wikibio_bleu_score": 5.731702906131469, "eval_wikibio_bleu_score_sem": 0.20794995462695767, "eval_wikibio_emb_cos_sim": 0.7460880875587463, "eval_wikibio_emb_cos_sim_sem": 0.00848050700031268, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.798103094100952, "eval_wikibio_n_ngrams_match_1": 9.7, "eval_wikibio_n_ngrams_match_2": 3.188, "eval_wikibio_n_ngrams_match_3": 1.146, "eval_wikibio_num_pred_words": 35.244, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.616470924976525, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34378730685016656, "eval_wikibio_runtime": 9.9569, "eval_wikibio_samples_per_second": 50.216, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.30734571230261204, "eval_wikibio_token_set_f1_sem": 0.005821506104100853, "eval_wikibio_token_set_precision": 0.3133343318682684, "eval_wikibio_token_set_recall": 0.3177849725596456, "eval_wikibio_true_num_tokens": 61.1328125, "step": 103125 }, { "epoch": 19.8, "eval_nq_accuracy": 0.52165625, "eval_nq_bleu_score": 11.19061555193629, "eval_nq_bleu_score_sem": 0.46693828374467233, "eval_nq_emb_cos_sim": 0.8303855657577515, "eval_nq_emb_cos_sim_sem": 0.007060565186001485, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.220346689224243, "eval_nq_n_ngrams_match_1": 22.662, "eval_nq_n_ngrams_match_2": 8.196, "eval_nq_n_ngrams_match_3": 3.686, "eval_nq_num_pred_words": 49.054, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.210523501671728, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.43750425848509755, "eval_nq_runtime": 10.4023, "eval_nq_samples_per_second": 48.066, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.45334682762444267, "eval_nq_token_set_f1_sem": 0.0049616936657383695, "eval_nq_token_set_precision": 0.4100297675442324, "eval_nq_token_set_recall": 0.5151700213104357, "eval_nq_true_num_tokens": 64.0, "step": 103125 }, { "epoch": 19.8, "learning_rate": 0.001, "loss": 2.5988, "step": 103128 }, { "epoch": 19.8, "learning_rate": 0.001, "loss": 2.6066, "step": 103140 }, { "epoch": 19.81, "learning_rate": 0.001, "loss": 2.6019, "step": 103152 }, { "epoch": 19.81, "learning_rate": 0.001, "loss": 2.5935, "step": 103164 }, { "epoch": 19.81, "learning_rate": 0.001, "loss": 2.5946, "step": 103176 }, { "epoch": 19.81, "learning_rate": 0.001, "loss": 2.6025, "step": 103188 }, { "epoch": 19.82, "learning_rate": 0.001, "loss": 2.6019, "step": 103200 }, { "epoch": 19.82, "learning_rate": 0.001, "loss": 2.5965, "step": 103212 }, { "epoch": 19.82, "learning_rate": 0.001, "loss": 2.6026, "step": 103224 }, { "epoch": 19.82, "learning_rate": 0.001, "loss": 2.5891, "step": 103236 }, { "epoch": 19.82, "learning_rate": 0.001, "loss": 2.601, "step": 103248 }, { "epoch": 19.83, "learning_rate": 0.001, "loss": 2.5996, "step": 103260 }, { "epoch": 19.83, "learning_rate": 0.001, "loss": 2.5999, "step": 103272 }, { "epoch": 19.83, "learning_rate": 0.001, "loss": 2.5884, "step": 103284 }, { "epoch": 19.83, "learning_rate": 0.001, "loss": 2.6001, "step": 103296 }, { "epoch": 19.84, "learning_rate": 0.001, "loss": 2.5985, "step": 103308 }, { "epoch": 19.84, "learning_rate": 0.001, "loss": 2.6041, "step": 103320 }, { "epoch": 19.84, "learning_rate": 0.001, "loss": 2.5999, "step": 103332 }, { "epoch": 19.84, "learning_rate": 0.001, "loss": 2.5856, "step": 103344 }, { "epoch": 19.85, "learning_rate": 0.001, "loss": 2.5925, "step": 103356 }, { "epoch": 19.85, "learning_rate": 0.001, "loss": 2.6027, "step": 103368 }, { "epoch": 19.85, "learning_rate": 0.001, "loss": 2.5984, "step": 103380 }, { "epoch": 19.85, "learning_rate": 0.001, "loss": 2.6, "step": 103392 }, { "epoch": 19.85, "learning_rate": 0.001, "loss": 2.5915, "step": 103404 }, { "epoch": 19.86, "learning_rate": 0.001, "loss": 2.5929, "step": 103416 }, { "epoch": 19.86, "learning_rate": 0.001, "loss": 2.5969, "step": 103428 }, { "epoch": 19.86, "learning_rate": 0.001, "loss": 2.6028, "step": 103440 }, { "epoch": 19.86, "learning_rate": 0.001, "loss": 2.6007, "step": 103452 }, { "epoch": 19.87, "learning_rate": 0.001, "loss": 2.6043, "step": 103464 }, { "epoch": 19.87, "learning_rate": 0.001, "loss": 2.597, "step": 103476 }, { "epoch": 19.87, "learning_rate": 0.001, "loss": 2.6008, "step": 103488 }, { "epoch": 19.87, "learning_rate": 0.001, "loss": 2.601, "step": 103500 }, { "epoch": 19.88, "learning_rate": 0.001, "loss": 2.6046, "step": 103512 }, { "epoch": 19.88, "learning_rate": 0.001, "loss": 2.5902, "step": 103524 }, { "epoch": 19.88, "learning_rate": 0.001, "loss": 2.5969, "step": 103536 }, { "epoch": 19.88, "learning_rate": 0.001, "loss": 2.6042, "step": 103548 }, { "epoch": 19.88, "learning_rate": 0.001, "loss": 2.5924, "step": 103560 }, { "epoch": 19.89, "learning_rate": 0.001, "loss": 2.604, "step": 103572 }, { "epoch": 19.89, "learning_rate": 0.001, "loss": 2.5983, "step": 103584 }, { "epoch": 19.89, "learning_rate": 0.001, "loss": 2.5933, "step": 103596 }, { "epoch": 19.89, "learning_rate": 0.001, "loss": 2.6005, "step": 103608 }, { "epoch": 19.9, "learning_rate": 0.001, "loss": 2.5921, "step": 103620 }, { "epoch": 19.9, "learning_rate": 0.001, "loss": 2.5911, "step": 103632 }, { "epoch": 19.9, "learning_rate": 0.001, "loss": 2.607, "step": 103644 }, { "epoch": 19.9, "learning_rate": 0.001, "loss": 2.6071, "step": 103656 }, { "epoch": 19.91, "learning_rate": 0.001, "loss": 2.6034, "step": 103668 }, { "epoch": 19.91, "learning_rate": 0.001, "loss": 2.591, "step": 103680 }, { "epoch": 19.91, "learning_rate": 0.001, "loss": 2.6005, "step": 103692 }, { "epoch": 19.91, "learning_rate": 0.001, "loss": 2.6062, "step": 103704 }, { "epoch": 19.91, "learning_rate": 0.001, "loss": 2.594, "step": 103716 }, { "epoch": 19.92, "learning_rate": 0.001, "loss": 2.5969, "step": 103728 }, { "epoch": 19.92, "learning_rate": 0.001, "loss": 2.595, "step": 103740 }, { "epoch": 19.92, "eval_ag_news_accuracy": 0.3181875, "eval_ag_news_bleu_score": 4.715667647533455, "eval_ag_news_bleu_score_sem": 0.15630175575770888, "eval_ag_news_emb_cos_sim": 0.8031496405601501, "eval_ag_news_emb_cos_sim_sem": 0.006938623816547341, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5962557792663574, "eval_ag_news_n_ngrams_match_1": 13.736, "eval_ag_news_n_ngrams_match_2": 2.966, "eval_ag_news_n_ngrams_match_3": 0.84, "eval_ag_news_num_pred_words": 46.312, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.46145879436054, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34195502514304654, "eval_ag_news_runtime": 10.6348, "eval_ag_news_samples_per_second": 47.015, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.3434867870513179, "eval_ag_news_token_set_f1_sem": 0.004414470275763657, "eval_ag_news_token_set_precision": 0.32764298635347155, "eval_ag_news_token_set_recall": 0.3793269588079093, "eval_ag_news_true_num_tokens": 56.09375, "step": 103750 }, { "epoch": 19.92, "eval_anthropic_toxic_prompts_accuracy": 0.1115625, "eval_anthropic_toxic_prompts_bleu_score": 3.018182871168191, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11276513876030779, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6696054935455322, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00884096858074661, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3192715644836426, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.056, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.83, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648, "eval_anthropic_toxic_prompts_num_pred_words": 47.0, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.640209113537587, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21006779873229692, "eval_anthropic_toxic_prompts_runtime": 9.9518, "eval_anthropic_toxic_prompts_samples_per_second": 50.242, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3520671543164431, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006309386139710659, "eval_anthropic_toxic_prompts_token_set_precision": 0.43008164766729573, "eval_anthropic_toxic_prompts_token_set_recall": 0.3258277909550858, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 103750 }, { "epoch": 19.92, "eval_arxiv_accuracy": 0.34215625, "eval_arxiv_bleu_score": 4.176736975809541, "eval_arxiv_bleu_score_sem": 0.12467495953943461, "eval_arxiv_emb_cos_sim": 0.7531987428665161, "eval_arxiv_emb_cos_sim_sem": 0.0075409055744905555, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.453075408935547, "eval_arxiv_n_ngrams_match_1": 14.544, "eval_arxiv_n_ngrams_match_2": 2.8, "eval_arxiv_n_ngrams_match_3": 0.638, "eval_arxiv_num_pred_words": 39.818, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.59741801711919, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.34731129323909604, "eval_arxiv_runtime": 10.0994, "eval_arxiv_samples_per_second": 49.508, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.34377904849398316, "eval_arxiv_token_set_f1_sem": 0.004463704423932529, "eval_arxiv_token_set_precision": 0.29239258164571863, "eval_arxiv_token_set_recall": 0.4403505401885056, "eval_arxiv_true_num_tokens": 64.0, "step": 103750 }, { "epoch": 19.92, "eval_python_code_alpaca_accuracy": 0.1574375, "eval_python_code_alpaca_bleu_score": 4.403554212258782, "eval_python_code_alpaca_bleu_score_sem": 0.13585818842600816, "eval_python_code_alpaca_emb_cos_sim": 0.7534340023994446, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008355750353086909, "eval_python_code_alpaca_emb_top1_equal": 0.078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9384117126464844, "eval_python_code_alpaca_n_ngrams_match_1": 9.674, "eval_python_code_alpaca_n_ngrams_match_2": 2.866, "eval_python_code_alpaca_n_ngrams_match_3": 0.934, "eval_python_code_alpaca_num_pred_words": 43.554, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.885826359242937, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3256599766462427, "eval_python_code_alpaca_runtime": 9.6925, "eval_python_code_alpaca_samples_per_second": 51.586, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.4706860314796137, "eval_python_code_alpaca_token_set_f1_sem": 0.005952787845075403, "eval_python_code_alpaca_token_set_precision": 0.528415100865822, "eval_python_code_alpaca_token_set_recall": 0.4505679422166955, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 103750 }, { "epoch": 19.92, "eval_wikibio_accuracy": 0.31825, "eval_wikibio_bleu_score": 5.666073048865579, "eval_wikibio_bleu_score_sem": 0.21024273837489288, "eval_wikibio_emb_cos_sim": 0.7306700944900513, "eval_wikibio_emb_cos_sim_sem": 0.010345679561214356, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7904253005981445, "eval_wikibio_n_ngrams_match_1": 9.45, "eval_wikibio_n_ngrams_match_2": 3.178, "eval_wikibio_n_ngrams_match_3": 1.176, "eval_wikibio_num_pred_words": 34.446, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.27522655262205, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33124776114616805, "eval_wikibio_runtime": 10.5162, "eval_wikibio_samples_per_second": 47.546, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.30305201187485425, "eval_wikibio_token_set_f1_sem": 0.006091675061500554, "eval_wikibio_token_set_precision": 0.305604218844721, "eval_wikibio_token_set_recall": 0.31958889759391196, "eval_wikibio_true_num_tokens": 61.1328125, "step": 103750 }, { "epoch": 19.92, "eval_nq_accuracy": 0.523625, "eval_nq_bleu_score": 11.562758117945734, "eval_nq_bleu_score_sem": 0.494669787310588, "eval_nq_emb_cos_sim": 0.827433168888092, "eval_nq_emb_cos_sim_sem": 0.008588306126457895, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2214205265045166, "eval_nq_n_ngrams_match_1": 22.974, "eval_nq_n_ngrams_match_2": 8.374, "eval_nq_n_ngrams_match_3": 3.82, "eval_nq_num_pred_words": 48.762, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.220419417529406, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44541622854136464, "eval_nq_runtime": 10.4349, "eval_nq_samples_per_second": 47.916, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4598731638988288, "eval_nq_token_set_f1_sem": 0.005011778048939061, "eval_nq_token_set_precision": 0.41685872662466955, "eval_nq_token_set_recall": 0.5217251813728859, "eval_nq_true_num_tokens": 64.0, "step": 103750 }, { "epoch": 19.92, "learning_rate": 0.001, "loss": 2.6056, "step": 103752 }, { "epoch": 19.92, "learning_rate": 0.001, "loss": 2.5962, "step": 103764 }, { "epoch": 19.93, "learning_rate": 0.001, "loss": 2.6014, "step": 103776 }, { "epoch": 19.93, "learning_rate": 0.001, "loss": 2.5973, "step": 103788 }, { "epoch": 19.93, "learning_rate": 0.001, "loss": 2.5865, "step": 103800 }, { "epoch": 19.93, "learning_rate": 0.001, "loss": 2.6059, "step": 103812 }, { "epoch": 19.94, "learning_rate": 0.001, "loss": 2.5984, "step": 103824 }, { "epoch": 19.94, "learning_rate": 0.001, "loss": 2.5989, "step": 103836 }, { "epoch": 19.94, "learning_rate": 0.001, "loss": 2.6043, "step": 103848 }, { "epoch": 19.94, "learning_rate": 0.001, "loss": 2.5983, "step": 103860 }, { "epoch": 19.94, "learning_rate": 0.001, "loss": 2.6014, "step": 103872 }, { "epoch": 19.95, "learning_rate": 0.001, "loss": 2.6042, "step": 103884 }, { "epoch": 19.95, "learning_rate": 0.001, "loss": 2.5932, "step": 103896 }, { "epoch": 19.95, "learning_rate": 0.001, "loss": 2.5925, "step": 103908 }, { "epoch": 19.95, "learning_rate": 0.001, "loss": 2.5947, "step": 103920 }, { "epoch": 19.96, "learning_rate": 0.001, "loss": 2.5945, "step": 103932 }, { "epoch": 19.96, "learning_rate": 0.001, "loss": 2.5927, "step": 103944 }, { "epoch": 19.96, "learning_rate": 0.001, "loss": 2.5941, "step": 103956 }, { "epoch": 19.96, "learning_rate": 0.001, "loss": 2.5943, "step": 103968 }, { "epoch": 19.97, "learning_rate": 0.001, "loss": 2.5984, "step": 103980 }, { "epoch": 19.97, "learning_rate": 0.001, "loss": 2.6047, "step": 103992 }, { "epoch": 19.97, "learning_rate": 0.001, "loss": 2.6054, "step": 104004 }, { "epoch": 19.97, "learning_rate": 0.001, "loss": 2.6002, "step": 104016 }, { "epoch": 19.97, "learning_rate": 0.001, "loss": 2.5994, "step": 104028 }, { "epoch": 19.98, "learning_rate": 0.001, "loss": 2.6086, "step": 104040 }, { "epoch": 19.98, "learning_rate": 0.001, "loss": 2.5946, "step": 104052 }, { "epoch": 19.98, "learning_rate": 0.001, "loss": 2.5961, "step": 104064 }, { "epoch": 19.98, "learning_rate": 0.001, "loss": 2.597, "step": 104076 }, { "epoch": 19.99, "learning_rate": 0.001, "loss": 2.6066, "step": 104088 }, { "epoch": 19.99, "learning_rate": 0.001, "loss": 2.6019, "step": 104100 }, { "epoch": 19.99, "learning_rate": 0.001, "loss": 2.5918, "step": 104112 }, { "epoch": 19.99, "learning_rate": 0.001, "loss": 2.596, "step": 104124 }, { "epoch": 20.0, "learning_rate": 0.001, "loss": 2.5922, "step": 104136 }, { "epoch": 20.0, "learning_rate": 0.001, "loss": 2.6008, "step": 104148 }, { "epoch": 20.0, "learning_rate": 0.001, "loss": 2.5896, "step": 104160 }, { "epoch": 20.0, "learning_rate": 0.001, "loss": 2.5917, "step": 104172 }, { "epoch": 20.0, "learning_rate": 0.001, "loss": 2.5753, "step": 104184 }, { "epoch": 20.01, "learning_rate": 0.001, "loss": 2.5787, "step": 104196 }, { "epoch": 20.01, "learning_rate": 0.001, "loss": 2.5833, "step": 104208 }, { "epoch": 20.01, "learning_rate": 0.001, "loss": 2.581, "step": 104220 }, { "epoch": 20.01, "learning_rate": 0.001, "loss": 2.5944, "step": 104232 }, { "epoch": 20.02, "learning_rate": 0.001, "loss": 2.5791, "step": 104244 }, { "epoch": 20.02, "learning_rate": 0.001, "loss": 2.5846, "step": 104256 }, { "epoch": 20.02, "learning_rate": 0.001, "loss": 2.5908, "step": 104268 }, { "epoch": 20.02, "learning_rate": 0.001, "loss": 2.5882, "step": 104280 }, { "epoch": 20.03, "learning_rate": 0.001, "loss": 2.594, "step": 104292 }, { "epoch": 20.03, "learning_rate": 0.001, "loss": 2.5724, "step": 104304 }, { "epoch": 20.03, "learning_rate": 0.001, "loss": 2.5755, "step": 104316 }, { "epoch": 20.03, "learning_rate": 0.001, "loss": 2.5702, "step": 104328 }, { "epoch": 20.03, "learning_rate": 0.001, "loss": 2.5751, "step": 104340 }, { "epoch": 20.04, "learning_rate": 0.001, "loss": 2.5809, "step": 104352 }, { "epoch": 20.04, "learning_rate": 0.001, "loss": 2.5806, "step": 104364 }, { "epoch": 20.04, "eval_ag_news_accuracy": 0.31740625, "eval_ag_news_bleu_score": 4.883588666029329, "eval_ag_news_bleu_score_sem": 0.15723113542537812, "eval_ag_news_emb_cos_sim": 0.8079344034194946, "eval_ag_news_emb_cos_sim_sem": 0.007645888105775129, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5977940559387207, "eval_ag_news_n_ngrams_match_1": 14.032, "eval_ag_news_n_ngrams_match_2": 3.154, "eval_ag_news_n_ngrams_match_3": 0.89, "eval_ag_news_num_pred_words": 46.416, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.51758976727882, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3469472651954559, "eval_ag_news_runtime": 11.2562, "eval_ag_news_samples_per_second": 44.42, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.3525941620996267, "eval_ag_news_token_set_f1_sem": 0.004374450540016678, "eval_ag_news_token_set_precision": 0.3365449646854891, "eval_ag_news_token_set_recall": 0.38861080053122043, "eval_ag_news_true_num_tokens": 56.09375, "step": 104375 }, { "epoch": 20.04, "eval_anthropic_toxic_prompts_accuracy": 0.11059375, "eval_anthropic_toxic_prompts_bleu_score": 2.9586549768073547, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11195752254001154, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6649320721626282, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008099481050527975, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3028931617736816, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.094, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.834, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.654, "eval_anthropic_toxic_prompts_num_pred_words": 47.154, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.191193752290268, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21091802648932717, "eval_anthropic_toxic_prompts_runtime": 10.8255, "eval_anthropic_toxic_prompts_samples_per_second": 46.187, "eval_anthropic_toxic_prompts_steps_per_second": 0.092, "eval_anthropic_toxic_prompts_token_set_f1": 0.35448495895375165, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006407197503116182, "eval_anthropic_toxic_prompts_token_set_precision": 0.43185959562365966, "eval_anthropic_toxic_prompts_token_set_recall": 0.32764787968156023, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 104375 }, { "epoch": 20.04, "eval_arxiv_accuracy": 0.34171875, "eval_arxiv_bleu_score": 4.284346862346857, "eval_arxiv_bleu_score_sem": 0.12288335203105014, "eval_arxiv_emb_cos_sim": 0.7581665515899658, "eval_arxiv_emb_cos_sim_sem": 0.009112410748769466, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4507367610931396, "eval_arxiv_n_ngrams_match_1": 15.064, "eval_arxiv_n_ngrams_match_2": 2.904, "eval_arxiv_n_ngrams_match_3": 0.636, "eval_arxiv_num_pred_words": 41.05, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.52360912379246, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3551188882625711, "eval_arxiv_runtime": 10.1463, "eval_arxiv_samples_per_second": 49.279, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3484516683462858, "eval_arxiv_token_set_f1_sem": 0.004204622078277267, "eval_arxiv_token_set_precision": 0.30191216204063465, "eval_arxiv_token_set_recall": 0.429590681575259, "eval_arxiv_true_num_tokens": 64.0, "step": 104375 }, { "epoch": 20.04, "eval_python_code_alpaca_accuracy": 0.1565625, "eval_python_code_alpaca_bleu_score": 4.4566096427278215, "eval_python_code_alpaca_bleu_score_sem": 0.1318644154584627, "eval_python_code_alpaca_emb_cos_sim": 0.739457368850708, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011631139343005893, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9366791248321533, "eval_python_code_alpaca_n_ngrams_match_1": 9.678, "eval_python_code_alpaca_n_ngrams_match_2": 2.822, "eval_python_code_alpaca_n_ngrams_match_3": 0.916, "eval_python_code_alpaca_num_pred_words": 43.132, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.85313333657401, "eval_python_code_alpaca_pred_num_tokens": 62.9921875, "eval_python_code_alpaca_rouge_score": 0.3266450614411218, "eval_python_code_alpaca_runtime": 10.9889, "eval_python_code_alpaca_samples_per_second": 45.5, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4623365543211727, "eval_python_code_alpaca_token_set_f1_sem": 0.00579049073921156, "eval_python_code_alpaca_token_set_precision": 0.5247487697138802, "eval_python_code_alpaca_token_set_recall": 0.43969554453997806, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 104375 }, { "epoch": 20.04, "eval_wikibio_accuracy": 0.32203125, "eval_wikibio_bleu_score": 5.693586258426564, "eval_wikibio_bleu_score_sem": 0.21401222196896194, "eval_wikibio_emb_cos_sim": 0.7210272550582886, "eval_wikibio_emb_cos_sim_sem": 0.011277541771978667, "eval_wikibio_emb_top1_equal": 0.09375, "eval_wikibio_emb_top1_equal_sem": 0.025864720141013958, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7538135051727295, "eval_wikibio_n_ngrams_match_1": 9.782, "eval_wikibio_n_ngrams_match_2": 3.264, "eval_wikibio_n_ngrams_match_3": 1.184, "eval_wikibio_num_pred_words": 35.648, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.68354594788247, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.33653755581064637, "eval_wikibio_runtime": 10.6279, "eval_wikibio_samples_per_second": 47.046, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.30889792740431826, "eval_wikibio_token_set_f1_sem": 0.006045139571051343, "eval_wikibio_token_set_precision": 0.3135824512940988, "eval_wikibio_token_set_recall": 0.32193176092263764, "eval_wikibio_true_num_tokens": 61.1328125, "step": 104375 }, { "epoch": 20.04, "eval_nq_accuracy": 0.52159375, "eval_nq_bleu_score": 11.51127286391578, "eval_nq_bleu_score_sem": 0.4610427434499334, "eval_nq_emb_cos_sim": 0.8304370641708374, "eval_nq_emb_cos_sim_sem": 0.006957488482523013, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2188475131988525, "eval_nq_n_ngrams_match_1": 22.922, "eval_nq_n_ngrams_match_2": 8.428, "eval_nq_n_ngrams_match_3": 3.796, "eval_nq_num_pred_words": 48.972, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.196725650944487, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4410085262379106, "eval_nq_runtime": 10.6301, "eval_nq_samples_per_second": 47.036, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.45868090157178976, "eval_nq_token_set_f1_sem": 0.004903544929834385, "eval_nq_token_set_precision": 0.4157491712440606, "eval_nq_token_set_recall": 0.5192416185417322, "eval_nq_true_num_tokens": 64.0, "step": 104375 }, { "epoch": 20.04, "learning_rate": 0.001, "loss": 2.5943, "step": 104376 }, { "epoch": 20.04, "learning_rate": 0.001, "loss": 2.5822, "step": 104388 }, { "epoch": 20.05, "learning_rate": 0.001, "loss": 2.5842, "step": 104400 }, { "epoch": 20.05, "learning_rate": 0.001, "loss": 2.5877, "step": 104412 }, { "epoch": 20.05, "learning_rate": 0.001, "loss": 2.5825, "step": 104424 }, { "epoch": 20.05, "learning_rate": 0.001, "loss": 2.5847, "step": 104436 }, { "epoch": 20.06, "learning_rate": 0.001, "loss": 2.5892, "step": 104448 }, { "epoch": 20.06, "learning_rate": 0.001, "loss": 2.5888, "step": 104460 }, { "epoch": 20.06, "learning_rate": 0.001, "loss": 2.5753, "step": 104472 }, { "epoch": 20.06, "learning_rate": 0.001, "loss": 2.5832, "step": 104484 }, { "epoch": 20.06, "learning_rate": 0.001, "loss": 2.5793, "step": 104496 }, { "epoch": 20.07, "learning_rate": 0.001, "loss": 2.5809, "step": 104508 }, { "epoch": 20.07, "learning_rate": 0.001, "loss": 2.5828, "step": 104520 }, { "epoch": 20.07, "learning_rate": 0.001, "loss": 2.5648, "step": 104532 }, { "epoch": 20.07, "learning_rate": 0.001, "loss": 2.5822, "step": 104544 }, { "epoch": 20.08, "learning_rate": 0.001, "loss": 2.5809, "step": 104556 }, { "epoch": 20.08, "learning_rate": 0.001, "loss": 2.5846, "step": 104568 }, { "epoch": 20.08, "learning_rate": 0.001, "loss": 2.5818, "step": 104580 }, { "epoch": 20.08, "learning_rate": 0.001, "loss": 2.5852, "step": 104592 }, { "epoch": 20.09, "learning_rate": 0.001, "loss": 2.5708, "step": 104604 }, { "epoch": 20.09, "learning_rate": 0.001, "loss": 2.5804, "step": 104616 }, { "epoch": 20.09, "learning_rate": 0.001, "loss": 2.5734, "step": 104628 }, { "epoch": 20.09, "learning_rate": 0.001, "loss": 2.59, "step": 104640 }, { "epoch": 20.09, "learning_rate": 0.001, "loss": 2.5806, "step": 104652 }, { "epoch": 20.1, "learning_rate": 0.001, "loss": 2.5679, "step": 104664 }, { "epoch": 20.1, "learning_rate": 0.001, "loss": 2.5897, "step": 104676 }, { "epoch": 20.1, "learning_rate": 0.001, "loss": 2.5846, "step": 104688 }, { "epoch": 20.1, "learning_rate": 0.001, "loss": 2.5961, "step": 104700 }, { "epoch": 20.11, "learning_rate": 0.001, "loss": 2.582, "step": 104712 }, { "epoch": 20.11, "learning_rate": 0.001, "loss": 2.5898, "step": 104724 }, { "epoch": 20.11, "learning_rate": 0.001, "loss": 2.5806, "step": 104736 }, { "epoch": 20.11, "learning_rate": 0.001, "loss": 2.5939, "step": 104748 }, { "epoch": 20.12, "learning_rate": 0.001, "loss": 2.5822, "step": 104760 }, { "epoch": 20.12, "learning_rate": 0.001, "loss": 2.5928, "step": 104772 }, { "epoch": 20.12, "learning_rate": 0.001, "loss": 2.5863, "step": 104784 }, { "epoch": 20.12, "learning_rate": 0.001, "loss": 2.5867, "step": 104796 }, { "epoch": 20.12, "learning_rate": 0.001, "loss": 2.5942, "step": 104808 }, { "epoch": 20.13, "learning_rate": 0.001, "loss": 2.5794, "step": 104820 }, { "epoch": 20.13, "learning_rate": 0.001, "loss": 2.5802, "step": 104832 }, { "epoch": 20.13, "learning_rate": 0.001, "loss": 2.5813, "step": 104844 }, { "epoch": 20.13, "learning_rate": 0.001, "loss": 2.5888, "step": 104856 }, { "epoch": 20.14, "learning_rate": 0.001, "loss": 2.5831, "step": 104868 }, { "epoch": 20.14, "learning_rate": 0.001, "loss": 2.5878, "step": 104880 }, { "epoch": 20.14, "learning_rate": 0.001, "loss": 2.5879, "step": 104892 }, { "epoch": 20.14, "learning_rate": 0.001, "loss": 2.5904, "step": 104904 }, { "epoch": 20.15, "learning_rate": 0.001, "loss": 2.5825, "step": 104916 }, { "epoch": 20.15, "learning_rate": 0.001, "loss": 2.5799, "step": 104928 }, { "epoch": 20.15, "learning_rate": 0.001, "loss": 2.5824, "step": 104940 }, { "epoch": 20.15, "learning_rate": 0.001, "loss": 2.5853, "step": 104952 }, { "epoch": 20.15, "learning_rate": 0.001, "loss": 2.5834, "step": 104964 }, { "epoch": 20.16, "learning_rate": 0.001, "loss": 2.5789, "step": 104976 }, { "epoch": 20.16, "learning_rate": 0.001, "loss": 2.5852, "step": 104988 }, { "epoch": 20.16, "learning_rate": 0.001, "loss": 2.5932, "step": 105000 }, { "epoch": 20.16, "eval_ag_news_accuracy": 0.319, "eval_ag_news_bleu_score": 4.605429809274004, "eval_ag_news_bleu_score_sem": 0.14796464979030508, "eval_ag_news_emb_cos_sim": 0.7997758388519287, "eval_ag_news_emb_cos_sim_sem": 0.008311400014652305, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.59835147857666, "eval_ag_news_n_ngrams_match_1": 13.786, "eval_ag_news_n_ngrams_match_2": 2.982, "eval_ag_news_n_ngrams_match_3": 0.814, "eval_ag_news_num_pred_words": 46.552, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.53795117292509, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3422152509367118, "eval_ag_news_runtime": 10.3613, "eval_ag_news_samples_per_second": 48.256, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3448444164429131, "eval_ag_news_token_set_f1_sem": 0.004375114955950184, "eval_ag_news_token_set_precision": 0.3272199511481177, "eval_ag_news_token_set_recall": 0.383210601694993, "eval_ag_news_true_num_tokens": 56.09375, "step": 105000 }, { "epoch": 20.16, "eval_anthropic_toxic_prompts_accuracy": 0.1111875, "eval_anthropic_toxic_prompts_bleu_score": 2.913957418262643, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11354240886446086, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6618475914001465, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008768023581230888, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.27228045463562, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.968, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.81, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.664, "eval_anthropic_toxic_prompts_num_pred_words": 48.572, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.37140962678605, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20260705874664958, "eval_anthropic_toxic_prompts_runtime": 10.2024, "eval_anthropic_toxic_prompts_samples_per_second": 49.008, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.35556414073326476, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006566697430293288, "eval_anthropic_toxic_prompts_token_set_precision": 0.4219906214910666, "eval_anthropic_toxic_prompts_token_set_recall": 0.3388754652470632, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 105000 }, { "epoch": 20.16, "eval_arxiv_accuracy": 0.34475, "eval_arxiv_bleu_score": 4.208309160536787, "eval_arxiv_bleu_score_sem": 0.12308498120915533, "eval_arxiv_emb_cos_sim": 0.7577486634254456, "eval_arxiv_emb_cos_sim_sem": 0.006845360743326828, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.426072359085083, "eval_arxiv_n_ngrams_match_1": 14.822, "eval_arxiv_n_ngrams_match_2": 2.89, "eval_arxiv_n_ngrams_match_3": 0.636, "eval_arxiv_num_pred_words": 40.15, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.755608229957517, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3521257509215361, "eval_arxiv_runtime": 10.2874, "eval_arxiv_samples_per_second": 48.603, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.34755364501066693, "eval_arxiv_token_set_f1_sem": 0.004244639601194786, "eval_arxiv_token_set_precision": 0.29681714022247974, "eval_arxiv_token_set_recall": 0.439418931454802, "eval_arxiv_true_num_tokens": 64.0, "step": 105000 }, { "epoch": 20.16, "eval_python_code_alpaca_accuracy": 0.15621875, "eval_python_code_alpaca_bleu_score": 4.263972644966267, "eval_python_code_alpaca_bleu_score_sem": 0.13122937793061254, "eval_python_code_alpaca_emb_cos_sim": 0.7547144293785095, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009145565830568364, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.944267511367798, "eval_python_code_alpaca_n_ngrams_match_1": 9.67, "eval_python_code_alpaca_n_ngrams_match_2": 2.85, "eval_python_code_alpaca_n_ngrams_match_3": 0.91, "eval_python_code_alpaca_num_pred_words": 46.294, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.996742391121284, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3192542589726646, "eval_python_code_alpaca_runtime": 10.4067, "eval_python_code_alpaca_samples_per_second": 48.046, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.47121015489205725, "eval_python_code_alpaca_token_set_f1_sem": 0.005519328262978709, "eval_python_code_alpaca_token_set_precision": 0.5266123175871972, "eval_python_code_alpaca_token_set_recall": 0.4544299950246113, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 105000 }, { "epoch": 20.16, "eval_wikibio_accuracy": 0.32009375, "eval_wikibio_bleu_score": 5.812003918938086, "eval_wikibio_bleu_score_sem": 0.2018049464266148, "eval_wikibio_emb_cos_sim": 0.733515739440918, "eval_wikibio_emb_cos_sim_sem": 0.01020074642735301, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.782012701034546, "eval_wikibio_n_ngrams_match_1": 10.268, "eval_wikibio_n_ngrams_match_2": 3.452, "eval_wikibio_n_ngrams_match_3": 1.258, "eval_wikibio_num_pred_words": 37.706, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.90431913631204, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35194199644964386, "eval_wikibio_runtime": 10.1156, "eval_wikibio_samples_per_second": 49.428, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3190888122402507, "eval_wikibio_token_set_f1_sem": 0.005576558664879203, "eval_wikibio_token_set_precision": 0.33124807985049276, "eval_wikibio_token_set_recall": 0.321457263469454, "eval_wikibio_true_num_tokens": 61.1328125, "step": 105000 }, { "epoch": 20.16, "eval_nq_accuracy": 0.52303125, "eval_nq_bleu_score": 11.609777055328264, "eval_nq_bleu_score_sem": 0.4788533672022871, "eval_nq_emb_cos_sim": 0.8259741067886353, "eval_nq_emb_cos_sim_sem": 0.007167097931057849, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2192955017089844, "eval_nq_n_ngrams_match_1": 22.978, "eval_nq_n_ngrams_match_2": 8.426, "eval_nq_n_ngrams_match_3": 3.912, "eval_nq_num_pred_words": 49.236, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.200846601367244, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4417233456326265, "eval_nq_runtime": 11.0886, "eval_nq_samples_per_second": 45.092, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.45956217890785706, "eval_nq_token_set_f1_sem": 0.005096092215346017, "eval_nq_token_set_precision": 0.41619126249112176, "eval_nq_token_set_recall": 0.5219188661556968, "eval_nq_true_num_tokens": 64.0, "step": 105000 }, { "epoch": 20.16, "learning_rate": 0.001, "loss": 2.5869, "step": 105012 }, { "epoch": 20.17, "learning_rate": 0.001, "loss": 2.5809, "step": 105024 }, { "epoch": 20.17, "learning_rate": 0.001, "loss": 2.5819, "step": 105036 }, { "epoch": 20.17, "learning_rate": 0.001, "loss": 2.5961, "step": 105048 }, { "epoch": 20.17, "learning_rate": 0.001, "loss": 2.5904, "step": 105060 }, { "epoch": 20.18, "learning_rate": 0.001, "loss": 2.5849, "step": 105072 }, { "epoch": 20.18, "learning_rate": 0.001, "loss": 2.5872, "step": 105084 }, { "epoch": 20.18, "learning_rate": 0.001, "loss": 2.5938, "step": 105096 }, { "epoch": 20.18, "learning_rate": 0.001, "loss": 2.5827, "step": 105108 }, { "epoch": 20.18, "learning_rate": 0.001, "loss": 2.5817, "step": 105120 }, { "epoch": 20.19, "learning_rate": 0.001, "loss": 2.5864, "step": 105132 }, { "epoch": 20.19, "learning_rate": 0.001, "loss": 2.5787, "step": 105144 }, { "epoch": 20.19, "learning_rate": 0.001, "loss": 2.5811, "step": 105156 }, { "epoch": 20.19, "learning_rate": 0.001, "loss": 2.5943, "step": 105168 }, { "epoch": 20.2, "learning_rate": 0.001, "loss": 2.5767, "step": 105180 }, { "epoch": 20.2, "learning_rate": 0.001, "loss": 2.5862, "step": 105192 }, { "epoch": 20.2, "learning_rate": 0.001, "loss": 2.5905, "step": 105204 }, { "epoch": 20.2, "learning_rate": 0.001, "loss": 2.5866, "step": 105216 }, { "epoch": 20.21, "learning_rate": 0.001, "loss": 2.5946, "step": 105228 }, { "epoch": 20.21, "learning_rate": 0.001, "loss": 2.5888, "step": 105240 }, { "epoch": 20.21, "learning_rate": 0.001, "loss": 2.5823, "step": 105252 }, { "epoch": 20.21, "learning_rate": 0.001, "loss": 2.5978, "step": 105264 }, { "epoch": 20.21, "learning_rate": 0.001, "loss": 2.5971, "step": 105276 }, { "epoch": 20.22, "learning_rate": 0.001, "loss": 2.5861, "step": 105288 }, { "epoch": 20.22, "learning_rate": 0.001, "loss": 2.5929, "step": 105300 }, { "epoch": 20.22, "learning_rate": 0.001, "loss": 2.584, "step": 105312 }, { "epoch": 20.22, "learning_rate": 0.001, "loss": 2.5924, "step": 105324 }, { "epoch": 20.23, "learning_rate": 0.001, "loss": 2.5857, "step": 105336 }, { "epoch": 20.23, "learning_rate": 0.001, "loss": 2.5836, "step": 105348 }, { "epoch": 20.23, "learning_rate": 0.001, "loss": 2.5951, "step": 105360 }, { "epoch": 20.23, "learning_rate": 0.001, "loss": 2.59, "step": 105372 }, { "epoch": 20.24, "learning_rate": 0.001, "loss": 2.5871, "step": 105384 }, { "epoch": 20.24, "learning_rate": 0.001, "loss": 2.5871, "step": 105396 }, { "epoch": 20.24, "learning_rate": 0.001, "loss": 2.5813, "step": 105408 }, { "epoch": 20.24, "learning_rate": 0.001, "loss": 2.5915, "step": 105420 }, { "epoch": 20.24, "learning_rate": 0.001, "loss": 2.581, "step": 105432 }, { "epoch": 20.25, "learning_rate": 0.001, "loss": 2.5691, "step": 105444 }, { "epoch": 20.25, "learning_rate": 0.001, "loss": 2.5891, "step": 105456 }, { "epoch": 20.25, "learning_rate": 0.001, "loss": 2.5906, "step": 105468 }, { "epoch": 20.25, "learning_rate": 0.001, "loss": 2.5941, "step": 105480 }, { "epoch": 20.26, "learning_rate": 0.001, "loss": 2.5834, "step": 105492 }, { "epoch": 20.26, "learning_rate": 0.001, "loss": 2.5892, "step": 105504 }, { "epoch": 20.26, "learning_rate": 0.001, "loss": 2.5869, "step": 105516 }, { "epoch": 20.26, "learning_rate": 0.001, "loss": 2.5923, "step": 105528 }, { "epoch": 20.26, "learning_rate": 0.001, "loss": 2.592, "step": 105540 }, { "epoch": 20.27, "learning_rate": 0.001, "loss": 2.5774, "step": 105552 }, { "epoch": 20.27, "learning_rate": 0.001, "loss": 2.5837, "step": 105564 }, { "epoch": 20.27, "learning_rate": 0.001, "loss": 2.5849, "step": 105576 }, { "epoch": 20.27, "learning_rate": 0.001, "loss": 2.5905, "step": 105588 }, { "epoch": 20.28, "learning_rate": 0.001, "loss": 2.5853, "step": 105600 }, { "epoch": 20.28, "learning_rate": 0.001, "loss": 2.5835, "step": 105612 }, { "epoch": 20.28, "learning_rate": 0.001, "loss": 2.5939, "step": 105624 }, { "epoch": 20.28, "eval_ag_news_accuracy": 0.3175, "eval_ag_news_bleu_score": 4.686586673134966, "eval_ag_news_bleu_score_sem": 0.1487195148458065, "eval_ag_news_emb_cos_sim": 0.8074089884757996, "eval_ag_news_emb_cos_sim_sem": 0.006657484580644438, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.597015380859375, "eval_ag_news_n_ngrams_match_1": 13.786, "eval_ag_news_n_ngrams_match_2": 2.988, "eval_ag_news_n_ngrams_match_3": 0.862, "eval_ag_news_num_pred_words": 46.744, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.48916549824047, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34167332181654797, "eval_ag_news_runtime": 10.2887, "eval_ag_news_samples_per_second": 48.597, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3476416951360769, "eval_ag_news_token_set_f1_sem": 0.004279736414009697, "eval_ag_news_token_set_precision": 0.33120236761040905, "eval_ag_news_token_set_recall": 0.3804365707214545, "eval_ag_news_true_num_tokens": 56.09375, "step": 105625 }, { "epoch": 20.28, "eval_anthropic_toxic_prompts_accuracy": 0.11240625, "eval_anthropic_toxic_prompts_bleu_score": 3.114800385741145, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13139770303858317, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6710171699523926, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00954596681832243, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2607951164245605, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.162, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.89, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.698, "eval_anthropic_toxic_prompts_num_pred_words": 47.83, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.07025779392604, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2103906065837262, "eval_anthropic_toxic_prompts_runtime": 10.1147, "eval_anthropic_toxic_prompts_samples_per_second": 49.433, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.35165873890380994, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006430648021044267, "eval_anthropic_toxic_prompts_token_set_precision": 0.43630196424075235, "eval_anthropic_toxic_prompts_token_set_recall": 0.3222309206911361, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 105625 }, { "epoch": 20.28, "eval_arxiv_accuracy": 0.342125, "eval_arxiv_bleu_score": 4.163515466711427, "eval_arxiv_bleu_score_sem": 0.11898672585469931, "eval_arxiv_emb_cos_sim": 0.7568680644035339, "eval_arxiv_emb_cos_sim_sem": 0.008085180756537129, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4533305168151855, "eval_arxiv_n_ngrams_match_1": 14.824, "eval_arxiv_n_ngrams_match_2": 2.832, "eval_arxiv_n_ngrams_match_3": 0.62, "eval_arxiv_num_pred_words": 40.538, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.60547979569949, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3536696118573205, "eval_arxiv_runtime": 10.7478, "eval_arxiv_samples_per_second": 46.521, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.347638054299798, "eval_arxiv_token_set_f1_sem": 0.004396368419476976, "eval_arxiv_token_set_precision": 0.29900263163404556, "eval_arxiv_token_set_recall": 0.4334344934610621, "eval_arxiv_true_num_tokens": 64.0, "step": 105625 }, { "epoch": 20.28, "eval_python_code_alpaca_accuracy": 0.15675, "eval_python_code_alpaca_bleu_score": 4.312690157876314, "eval_python_code_alpaca_bleu_score_sem": 0.13894895255538506, "eval_python_code_alpaca_emb_cos_sim": 0.7477904558181763, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01032910465449294, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9216020107269287, "eval_python_code_alpaca_n_ngrams_match_1": 9.634, "eval_python_code_alpaca_n_ngrams_match_2": 2.784, "eval_python_code_alpaca_n_ngrams_match_3": 0.908, "eval_python_code_alpaca_num_pred_words": 44.446, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.571014606394616, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32147926756059025, "eval_python_code_alpaca_runtime": 9.9425, "eval_python_code_alpaca_samples_per_second": 50.289, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4657104079206335, "eval_python_code_alpaca_token_set_f1_sem": 0.005738082887467584, "eval_python_code_alpaca_token_set_precision": 0.5243087454582851, "eval_python_code_alpaca_token_set_recall": 0.44338081823921743, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 105625 }, { "epoch": 20.28, "eval_wikibio_accuracy": 0.31696875, "eval_wikibio_bleu_score": 5.670991468345321, "eval_wikibio_bleu_score_sem": 0.2037453600528457, "eval_wikibio_emb_cos_sim": 0.745564341545105, "eval_wikibio_emb_cos_sim_sem": 0.008491318325500232, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8296010494232178, "eval_wikibio_n_ngrams_match_1": 10.294, "eval_wikibio_n_ngrams_match_2": 3.33, "eval_wikibio_n_ngrams_match_3": 1.178, "eval_wikibio_num_pred_words": 37.974, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 46.04416522322324, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3546732297518005, "eval_wikibio_runtime": 10.1032, "eval_wikibio_samples_per_second": 49.489, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3230028201685258, "eval_wikibio_token_set_f1_sem": 0.0051683189323545615, "eval_wikibio_token_set_precision": 0.3327636165973762, "eval_wikibio_token_set_recall": 0.3281494835127084, "eval_wikibio_true_num_tokens": 61.1328125, "step": 105625 }, { "epoch": 20.28, "eval_nq_accuracy": 0.522875, "eval_nq_bleu_score": 11.763845667243404, "eval_nq_bleu_score_sem": 0.47547163346351584, "eval_nq_emb_cos_sim": 0.826331615447998, "eval_nq_emb_cos_sim_sem": 0.007203581955344599, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.218851327896118, "eval_nq_n_ngrams_match_1": 23.094, "eval_nq_n_ngrams_match_2": 8.532, "eval_nq_n_ngrams_match_3": 3.958, "eval_nq_num_pred_words": 49.09, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.196760733735596, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44519123317655285, "eval_nq_runtime": 11.085, "eval_nq_samples_per_second": 45.106, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.4613269563111501, "eval_nq_token_set_f1_sem": 0.004909965814098997, "eval_nq_token_set_precision": 0.41814444380315813, "eval_nq_token_set_recall": 0.522397844153114, "eval_nq_true_num_tokens": 64.0, "step": 105625 }, { "epoch": 20.28, "learning_rate": 0.001, "loss": 2.5818, "step": 105636 }, { "epoch": 20.29, "learning_rate": 0.001, "loss": 2.5856, "step": 105648 }, { "epoch": 20.29, "learning_rate": 0.001, "loss": 2.5941, "step": 105660 }, { "epoch": 20.29, "learning_rate": 0.001, "loss": 2.5907, "step": 105672 }, { "epoch": 20.29, "learning_rate": 0.001, "loss": 2.5939, "step": 105684 }, { "epoch": 20.29, "learning_rate": 0.001, "loss": 2.5896, "step": 105696 }, { "epoch": 20.3, "learning_rate": 0.001, "loss": 2.5944, "step": 105708 }, { "epoch": 20.3, "learning_rate": 0.001, "loss": 2.5898, "step": 105720 }, { "epoch": 20.3, "learning_rate": 0.001, "loss": 2.5856, "step": 105732 }, { "epoch": 20.3, "learning_rate": 0.001, "loss": 2.6003, "step": 105744 }, { "epoch": 20.31, "learning_rate": 0.001, "loss": 2.5906, "step": 105756 }, { "epoch": 20.31, "learning_rate": 0.001, "loss": 2.5973, "step": 105768 }, { "epoch": 20.31, "learning_rate": 0.001, "loss": 2.595, "step": 105780 }, { "epoch": 20.31, "learning_rate": 0.001, "loss": 2.587, "step": 105792 }, { "epoch": 20.32, "learning_rate": 0.001, "loss": 2.5896, "step": 105804 }, { "epoch": 20.32, "learning_rate": 0.001, "loss": 2.5922, "step": 105816 }, { "epoch": 20.32, "learning_rate": 0.001, "loss": 2.5965, "step": 105828 }, { "epoch": 20.32, "learning_rate": 0.001, "loss": 2.5847, "step": 105840 }, { "epoch": 20.32, "learning_rate": 0.001, "loss": 2.5941, "step": 105852 }, { "epoch": 20.33, "learning_rate": 0.001, "loss": 2.5966, "step": 105864 }, { "epoch": 20.33, "learning_rate": 0.001, "loss": 2.5882, "step": 105876 }, { "epoch": 20.33, "learning_rate": 0.001, "loss": 2.5902, "step": 105888 }, { "epoch": 20.33, "learning_rate": 0.001, "loss": 2.5898, "step": 105900 }, { "epoch": 20.34, "learning_rate": 0.001, "loss": 2.5848, "step": 105912 }, { "epoch": 20.34, "learning_rate": 0.001, "loss": 2.5793, "step": 105924 }, { "epoch": 20.34, "learning_rate": 0.001, "loss": 2.5832, "step": 105936 }, { "epoch": 20.34, "learning_rate": 0.001, "loss": 2.5817, "step": 105948 }, { "epoch": 20.35, "learning_rate": 0.001, "loss": 2.5984, "step": 105960 }, { "epoch": 20.35, "learning_rate": 0.001, "loss": 2.5899, "step": 105972 }, { "epoch": 20.35, "learning_rate": 0.001, "loss": 2.5905, "step": 105984 }, { "epoch": 20.35, "learning_rate": 0.001, "loss": 2.5802, "step": 105996 }, { "epoch": 20.35, "learning_rate": 0.001, "loss": 2.5768, "step": 106008 }, { "epoch": 20.36, "learning_rate": 0.001, "loss": 2.5909, "step": 106020 }, { "epoch": 20.36, "learning_rate": 0.001, "loss": 2.5904, "step": 106032 }, { "epoch": 20.36, "learning_rate": 0.001, "loss": 2.5936, "step": 106044 }, { "epoch": 20.36, "learning_rate": 0.001, "loss": 2.5874, "step": 106056 }, { "epoch": 20.37, "learning_rate": 0.001, "loss": 2.5966, "step": 106068 }, { "epoch": 20.37, "learning_rate": 0.001, "loss": 2.5919, "step": 106080 }, { "epoch": 20.37, "learning_rate": 0.001, "loss": 2.5902, "step": 106092 }, { "epoch": 20.37, "learning_rate": 0.001, "loss": 2.5898, "step": 106104 }, { "epoch": 20.38, "learning_rate": 0.001, "loss": 2.5805, "step": 106116 }, { "epoch": 20.38, "learning_rate": 0.001, "loss": 2.5824, "step": 106128 }, { "epoch": 20.38, "learning_rate": 0.001, "loss": 2.589, "step": 106140 }, { "epoch": 20.38, "learning_rate": 0.001, "loss": 2.5888, "step": 106152 }, { "epoch": 20.38, "learning_rate": 0.001, "loss": 2.603, "step": 106164 }, { "epoch": 20.39, "learning_rate": 0.001, "loss": 2.5964, "step": 106176 }, { "epoch": 20.39, "learning_rate": 0.001, "loss": 2.5786, "step": 106188 }, { "epoch": 20.39, "learning_rate": 0.001, "loss": 2.5899, "step": 106200 }, { "epoch": 20.39, "learning_rate": 0.001, "loss": 2.5817, "step": 106212 }, { "epoch": 20.4, "learning_rate": 0.001, "loss": 2.5818, "step": 106224 }, { "epoch": 20.4, "learning_rate": 0.001, "loss": 2.591, "step": 106236 }, { "epoch": 20.4, "learning_rate": 0.001, "loss": 2.5945, "step": 106248 }, { "epoch": 20.4, "eval_ag_news_accuracy": 0.317875, "eval_ag_news_bleu_score": 4.736162124506833, "eval_ag_news_bleu_score_sem": 0.15106853552264363, "eval_ag_news_emb_cos_sim": 0.8027890920639038, "eval_ag_news_emb_cos_sim_sem": 0.008061560740669313, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.6067285537719727, "eval_ag_news_n_ngrams_match_1": 13.786, "eval_ag_news_n_ngrams_match_2": 3.026, "eval_ag_news_n_ngrams_match_3": 0.85, "eval_ag_news_num_pred_words": 46.112, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.84531795725257, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3414733496759852, "eval_ag_news_runtime": 10.4907, "eval_ag_news_samples_per_second": 47.661, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3464649564691012, "eval_ag_news_token_set_f1_sem": 0.004412410021202581, "eval_ag_news_token_set_precision": 0.3279196359557733, "eval_ag_news_token_set_recall": 0.38701578998733127, "eval_ag_news_true_num_tokens": 56.09375, "step": 106250 }, { "epoch": 20.4, "eval_anthropic_toxic_prompts_accuracy": 0.1120625, "eval_anthropic_toxic_prompts_bleu_score": 2.9651580586416264, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1149846847651953, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6625226736068726, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009664726759258509, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.280329704284668, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.05, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.808, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.654, "eval_anthropic_toxic_prompts_num_pred_words": 47.72, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.584536290618793, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20726464934653505, "eval_anthropic_toxic_prompts_runtime": 9.909, "eval_anthropic_toxic_prompts_samples_per_second": 50.459, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3539139198354194, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006704180090698138, "eval_anthropic_toxic_prompts_token_set_precision": 0.42860452362979795, "eval_anthropic_toxic_prompts_token_set_recall": 0.32741245372358746, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 106250 }, { "epoch": 20.4, "eval_arxiv_accuracy": 0.34215625, "eval_arxiv_bleu_score": 4.24142969224101, "eval_arxiv_bleu_score_sem": 0.12202159091311132, "eval_arxiv_emb_cos_sim": 0.7643013596534729, "eval_arxiv_emb_cos_sim_sem": 0.007691076771368395, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.461731433868408, "eval_arxiv_n_ngrams_match_1": 14.774, "eval_arxiv_n_ngrams_match_2": 2.89, "eval_arxiv_n_ngrams_match_3": 0.652, "eval_arxiv_num_pred_words": 40.804, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.87211322440449, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3517843858240757, "eval_arxiv_runtime": 10.4051, "eval_arxiv_samples_per_second": 48.053, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.34460872298111217, "eval_arxiv_token_set_f1_sem": 0.004201232769472353, "eval_arxiv_token_set_precision": 0.296133870411427, "eval_arxiv_token_set_recall": 0.43056774661485525, "eval_arxiv_true_num_tokens": 64.0, "step": 106250 }, { "epoch": 20.4, "eval_python_code_alpaca_accuracy": 0.1545, "eval_python_code_alpaca_bleu_score": 4.290810821574007, "eval_python_code_alpaca_bleu_score_sem": 0.1388669003342831, "eval_python_code_alpaca_emb_cos_sim": 0.739973247051239, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011939318416406566, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9438319206237793, "eval_python_code_alpaca_n_ngrams_match_1": 9.598, "eval_python_code_alpaca_n_ngrams_match_2": 2.76, "eval_python_code_alpaca_n_ngrams_match_3": 0.892, "eval_python_code_alpaca_num_pred_words": 44.62, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.988469387921825, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3161371891457895, "eval_python_code_alpaca_runtime": 9.7482, "eval_python_code_alpaca_samples_per_second": 51.292, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.4660896763141984, "eval_python_code_alpaca_token_set_f1_sem": 0.0059043821743581665, "eval_python_code_alpaca_token_set_precision": 0.5185077422429903, "eval_python_code_alpaca_token_set_recall": 0.4496648271016924, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 106250 }, { "epoch": 20.4, "eval_wikibio_accuracy": 0.31709375, "eval_wikibio_bleu_score": 5.758761685748959, "eval_wikibio_bleu_score_sem": 0.2014654036850103, "eval_wikibio_emb_cos_sim": 0.7358646392822266, "eval_wikibio_emb_cos_sim_sem": 0.00875286256956811, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8096470832824707, "eval_wikibio_n_ngrams_match_1": 9.998, "eval_wikibio_n_ngrams_match_2": 3.342, "eval_wikibio_n_ngrams_match_3": 1.188, "eval_wikibio_num_pred_words": 36.488, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.134507333056575, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3507836800002612, "eval_wikibio_runtime": 10.5957, "eval_wikibio_samples_per_second": 47.189, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.31732368184979565, "eval_wikibio_token_set_f1_sem": 0.005412909304453982, "eval_wikibio_token_set_precision": 0.3238138680832841, "eval_wikibio_token_set_recall": 0.32640364788147386, "eval_wikibio_true_num_tokens": 61.1328125, "step": 106250 }, { "epoch": 20.4, "eval_nq_accuracy": 0.52278125, "eval_nq_bleu_score": 11.533538808764156, "eval_nq_bleu_score_sem": 0.45828506881051434, "eval_nq_emb_cos_sim": 0.8285806179046631, "eval_nq_emb_cos_sim_sem": 0.007730273999028035, "eval_nq_emb_top1_equal": 0.203125, "eval_nq_emb_top1_equal_sem": 0.03570055125142555, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2204086780548096, "eval_nq_n_ngrams_match_1": 22.95, "eval_nq_n_ngrams_match_2": 8.362, "eval_nq_n_ngrams_match_3": 3.852, "eval_nq_num_pred_words": 49.132, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.211094468949115, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44288603975113794, "eval_nq_runtime": 10.4301, "eval_nq_samples_per_second": 47.938, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4580814147836532, "eval_nq_token_set_f1_sem": 0.00479774175121098, "eval_nq_token_set_precision": 0.412922008068103, "eval_nq_token_set_recall": 0.5225166517959134, "eval_nq_true_num_tokens": 64.0, "step": 106250 }, { "epoch": 20.4, "learning_rate": 0.001, "loss": 2.5854, "step": 106260 }, { "epoch": 20.41, "learning_rate": 0.001, "loss": 2.5925, "step": 106272 }, { "epoch": 20.41, "learning_rate": 0.001, "loss": 2.5855, "step": 106284 }, { "epoch": 20.41, "learning_rate": 0.001, "loss": 2.5898, "step": 106296 }, { "epoch": 20.41, "learning_rate": 0.001, "loss": 2.5913, "step": 106308 }, { "epoch": 20.41, "learning_rate": 0.001, "loss": 2.5859, "step": 106320 }, { "epoch": 20.42, "learning_rate": 0.001, "loss": 2.5956, "step": 106332 }, { "epoch": 20.42, "learning_rate": 0.001, "loss": 2.5959, "step": 106344 }, { "epoch": 20.42, "learning_rate": 0.001, "loss": 2.5855, "step": 106356 }, { "epoch": 20.42, "learning_rate": 0.001, "loss": 2.5942, "step": 106368 }, { "epoch": 20.43, "learning_rate": 0.001, "loss": 2.5867, "step": 106380 }, { "epoch": 20.43, "learning_rate": 0.001, "loss": 2.5854, "step": 106392 }, { "epoch": 20.43, "learning_rate": 0.001, "loss": 2.583, "step": 106404 }, { "epoch": 20.43, "learning_rate": 0.001, "loss": 2.5894, "step": 106416 }, { "epoch": 20.44, "learning_rate": 0.001, "loss": 2.592, "step": 106428 }, { "epoch": 20.44, "learning_rate": 0.001, "loss": 2.5789, "step": 106440 }, { "epoch": 20.44, "learning_rate": 0.001, "loss": 2.5939, "step": 106452 }, { "epoch": 20.44, "learning_rate": 0.001, "loss": 2.5895, "step": 106464 }, { "epoch": 20.44, "learning_rate": 0.001, "loss": 2.5906, "step": 106476 }, { "epoch": 20.45, "learning_rate": 0.001, "loss": 2.5825, "step": 106488 }, { "epoch": 20.45, "learning_rate": 0.001, "loss": 2.5841, "step": 106500 }, { "epoch": 20.45, "learning_rate": 0.001, "loss": 2.5712, "step": 106512 }, { "epoch": 20.45, "learning_rate": 0.001, "loss": 2.5879, "step": 106524 }, { "epoch": 20.46, "learning_rate": 0.001, "loss": 2.5967, "step": 106536 }, { "epoch": 20.46, "learning_rate": 0.001, "loss": 2.5826, "step": 106548 }, { "epoch": 20.46, "learning_rate": 0.001, "loss": 2.5897, "step": 106560 }, { "epoch": 20.46, "learning_rate": 0.001, "loss": 2.5877, "step": 106572 }, { "epoch": 20.47, "learning_rate": 0.001, "loss": 2.5886, "step": 106584 }, { "epoch": 20.47, "learning_rate": 0.001, "loss": 2.5966, "step": 106596 }, { "epoch": 20.47, "learning_rate": 0.001, "loss": 2.5851, "step": 106608 }, { "epoch": 20.47, "learning_rate": 0.001, "loss": 2.5808, "step": 106620 }, { "epoch": 20.47, "learning_rate": 0.001, "loss": 2.5872, "step": 106632 }, { "epoch": 20.48, "learning_rate": 0.001, "loss": 2.5821, "step": 106644 }, { "epoch": 20.48, "learning_rate": 0.001, "loss": 2.578, "step": 106656 }, { "epoch": 20.48, "learning_rate": 0.001, "loss": 2.5917, "step": 106668 }, { "epoch": 20.48, "learning_rate": 0.001, "loss": 2.5848, "step": 106680 }, { "epoch": 20.49, "learning_rate": 0.001, "loss": 2.5872, "step": 106692 }, { "epoch": 20.49, "learning_rate": 0.001, "loss": 2.5827, "step": 106704 }, { "epoch": 20.49, "learning_rate": 0.001, "loss": 2.5903, "step": 106716 }, { "epoch": 20.49, "learning_rate": 0.001, "loss": 2.5829, "step": 106728 }, { "epoch": 20.5, "learning_rate": 0.001, "loss": 2.5805, "step": 106740 }, { "epoch": 20.5, "learning_rate": 0.001, "loss": 2.5795, "step": 106752 }, { "epoch": 20.5, "learning_rate": 0.001, "loss": 2.5953, "step": 106764 }, { "epoch": 20.5, "learning_rate": 0.001, "loss": 2.5886, "step": 106776 }, { "epoch": 20.5, "learning_rate": 0.001, "loss": 2.5843, "step": 106788 }, { "epoch": 20.51, "learning_rate": 0.001, "loss": 2.5922, "step": 106800 }, { "epoch": 20.51, "learning_rate": 0.001, "loss": 2.5953, "step": 106812 }, { "epoch": 20.51, "learning_rate": 0.001, "loss": 2.5925, "step": 106824 }, { "epoch": 20.51, "learning_rate": 0.001, "loss": 2.5862, "step": 106836 }, { "epoch": 20.52, "learning_rate": 0.001, "loss": 2.5953, "step": 106848 }, { "epoch": 20.52, "learning_rate": 0.001, "loss": 2.5869, "step": 106860 }, { "epoch": 20.52, "learning_rate": 0.001, "loss": 2.5853, "step": 106872 }, { "epoch": 20.52, "eval_ag_news_accuracy": 0.31903125, "eval_ag_news_bleu_score": 4.675808205117197, "eval_ag_news_bleu_score_sem": 0.15038055364200864, "eval_ag_news_emb_cos_sim": 0.8060142397880554, "eval_ag_news_emb_cos_sim_sem": 0.007473165710765929, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.589247465133667, "eval_ag_news_n_ngrams_match_1": 13.778, "eval_ag_news_n_ngrams_match_2": 2.988, "eval_ag_news_n_ngrams_match_3": 0.84, "eval_ag_news_num_pred_words": 46.4, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.206818778255965, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34461262568055817, "eval_ag_news_runtime": 10.2854, "eval_ag_news_samples_per_second": 48.613, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3460400386277784, "eval_ag_news_token_set_f1_sem": 0.00443891591014686, "eval_ag_news_token_set_precision": 0.32815925631245246, "eval_ag_news_token_set_recall": 0.38706170670097445, "eval_ag_news_true_num_tokens": 56.09375, "step": 106875 }, { "epoch": 20.52, "eval_anthropic_toxic_prompts_accuracy": 0.1120625, "eval_anthropic_toxic_prompts_bleu_score": 2.8172890296624455, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10069188346220201, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6546214818954468, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010074594324314826, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2812349796295166, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.96, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.748, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.59, "eval_anthropic_toxic_prompts_num_pred_words": 47.62, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.60861351249027, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2046918798357004, "eval_anthropic_toxic_prompts_runtime": 9.9743, "eval_anthropic_toxic_prompts_samples_per_second": 50.129, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3521623057039035, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006321514142783254, "eval_anthropic_toxic_prompts_token_set_precision": 0.42174827340097226, "eval_anthropic_toxic_prompts_token_set_recall": 0.33479658803808365, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 106875 }, { "epoch": 20.52, "eval_arxiv_accuracy": 0.344125, "eval_arxiv_bleu_score": 4.155494291056252, "eval_arxiv_bleu_score_sem": 0.11624197610517291, "eval_arxiv_emb_cos_sim": 0.7548593282699585, "eval_arxiv_emb_cos_sim_sem": 0.007868334562404072, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4369235038757324, "eval_arxiv_n_ngrams_match_1": 14.814, "eval_arxiv_n_ngrams_match_2": 2.87, "eval_arxiv_n_ngrams_match_3": 0.586, "eval_arxiv_num_pred_words": 40.154, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.091159050744086, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3534161748848369, "eval_arxiv_runtime": 10.6045, "eval_arxiv_samples_per_second": 47.15, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.34797830157383214, "eval_arxiv_token_set_f1_sem": 0.004419123468664785, "eval_arxiv_token_set_precision": 0.29702121770489404, "eval_arxiv_token_set_recall": 0.43770806121837064, "eval_arxiv_true_num_tokens": 64.0, "step": 106875 }, { "epoch": 20.52, "eval_python_code_alpaca_accuracy": 0.1551875, "eval_python_code_alpaca_bleu_score": 4.343087852374023, "eval_python_code_alpaca_bleu_score_sem": 0.1395949619055419, "eval_python_code_alpaca_emb_cos_sim": 0.7138265371322632, "eval_python_code_alpaca_emb_cos_sim_sem": 0.012877403214226806, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.942326545715332, "eval_python_code_alpaca_n_ngrams_match_1": 9.302, "eval_python_code_alpaca_n_ngrams_match_2": 2.702, "eval_python_code_alpaca_n_ngrams_match_3": 0.87, "eval_python_code_alpaca_num_pred_words": 42.856, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.95990612715752, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3169222548597542, "eval_python_code_alpaca_runtime": 9.9616, "eval_python_code_alpaca_samples_per_second": 50.193, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4682792194392736, "eval_python_code_alpaca_token_set_f1_sem": 0.006167622203396601, "eval_python_code_alpaca_token_set_precision": 0.5021414070720427, "eval_python_code_alpaca_token_set_recall": 0.46914912025549066, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 106875 }, { "epoch": 20.52, "eval_wikibio_accuracy": 0.322125, "eval_wikibio_bleu_score": 6.068244902810261, "eval_wikibio_bleu_score_sem": 0.20719131144981617, "eval_wikibio_emb_cos_sim": 0.746246337890625, "eval_wikibio_emb_cos_sim_sem": 0.00842967526683051, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.750788688659668, "eval_wikibio_n_ngrams_match_1": 10.336, "eval_wikibio_n_ngrams_match_2": 3.472, "eval_wikibio_n_ngrams_match_3": 1.216, "eval_wikibio_num_pred_words": 36.718, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.554631123400554, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36182565201179606, "eval_wikibio_runtime": 10.0603, "eval_wikibio_samples_per_second": 49.7, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3272347824625536, "eval_wikibio_token_set_f1_sem": 0.0051785986877144075, "eval_wikibio_token_set_precision": 0.3355020336165487, "eval_wikibio_token_set_recall": 0.33522756420869765, "eval_wikibio_true_num_tokens": 61.1328125, "step": 106875 }, { "epoch": 20.52, "eval_nq_accuracy": 0.52378125, "eval_nq_bleu_score": 11.616922851142785, "eval_nq_bleu_score_sem": 0.4802084653288616, "eval_nq_emb_cos_sim": 0.8299338221549988, "eval_nq_emb_cos_sim_sem": 0.007294561854755936, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2151927947998047, "eval_nq_n_ngrams_match_1": 22.876, "eval_nq_n_ngrams_match_2": 8.382, "eval_nq_n_ngrams_match_3": 3.87, "eval_nq_num_pred_words": 48.586, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.16317555391938, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44475224986269324, "eval_nq_runtime": 10.4687, "eval_nq_samples_per_second": 47.761, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4599874957344267, "eval_nq_token_set_f1_sem": 0.004955143824878148, "eval_nq_token_set_precision": 0.41495361764274546, "eval_nq_token_set_recall": 0.5260386940884074, "eval_nq_true_num_tokens": 64.0, "step": 106875 }, { "epoch": 20.52, "learning_rate": 0.001, "loss": 2.5879, "step": 106884 }, { "epoch": 20.53, "learning_rate": 0.001, "loss": 2.5934, "step": 106896 }, { "epoch": 20.53, "learning_rate": 0.001, "loss": 2.5952, "step": 106908 }, { "epoch": 20.53, "learning_rate": 0.001, "loss": 2.586, "step": 106920 }, { "epoch": 20.53, "learning_rate": 0.001, "loss": 2.5832, "step": 106932 }, { "epoch": 20.53, "learning_rate": 0.001, "loss": 2.5819, "step": 106944 }, { "epoch": 20.54, "learning_rate": 0.001, "loss": 2.5924, "step": 106956 }, { "epoch": 20.54, "learning_rate": 0.001, "loss": 2.5899, "step": 106968 }, { "epoch": 20.54, "learning_rate": 0.001, "loss": 2.5951, "step": 106980 }, { "epoch": 20.54, "learning_rate": 0.001, "loss": 2.5969, "step": 106992 }, { "epoch": 20.55, "learning_rate": 0.001, "loss": 2.5901, "step": 107004 }, { "epoch": 20.55, "learning_rate": 0.001, "loss": 2.5886, "step": 107016 }, { "epoch": 20.55, "learning_rate": 0.001, "loss": 2.5925, "step": 107028 }, { "epoch": 20.55, "learning_rate": 0.001, "loss": 2.5873, "step": 107040 }, { "epoch": 20.56, "learning_rate": 0.001, "loss": 2.5922, "step": 107052 }, { "epoch": 20.56, "learning_rate": 0.001, "loss": 2.5842, "step": 107064 }, { "epoch": 20.56, "learning_rate": 0.001, "loss": 2.5924, "step": 107076 }, { "epoch": 20.56, "learning_rate": 0.001, "loss": 2.6001, "step": 107088 }, { "epoch": 20.56, "learning_rate": 0.001, "loss": 2.5928, "step": 107100 }, { "epoch": 20.57, "learning_rate": 0.001, "loss": 2.5878, "step": 107112 }, { "epoch": 20.57, "learning_rate": 0.001, "loss": 2.6031, "step": 107124 }, { "epoch": 20.57, "learning_rate": 0.001, "loss": 2.601, "step": 107136 }, { "epoch": 20.57, "learning_rate": 0.001, "loss": 2.5832, "step": 107148 }, { "epoch": 20.58, "learning_rate": 0.001, "loss": 2.589, "step": 107160 }, { "epoch": 20.58, "learning_rate": 0.001, "loss": 2.5807, "step": 107172 }, { "epoch": 20.58, "learning_rate": 0.001, "loss": 2.5893, "step": 107184 }, { "epoch": 20.58, "learning_rate": 0.001, "loss": 2.5943, "step": 107196 }, { "epoch": 20.59, "learning_rate": 0.001, "loss": 2.5915, "step": 107208 }, { "epoch": 20.59, "learning_rate": 0.001, "loss": 2.5866, "step": 107220 }, { "epoch": 20.59, "learning_rate": 0.001, "loss": 2.5802, "step": 107232 }, { "epoch": 20.59, "learning_rate": 0.001, "loss": 2.5905, "step": 107244 }, { "epoch": 20.59, "learning_rate": 0.001, "loss": 2.5936, "step": 107256 }, { "epoch": 20.6, "learning_rate": 0.001, "loss": 2.5892, "step": 107268 }, { "epoch": 20.6, "learning_rate": 0.001, "loss": 2.5819, "step": 107280 }, { "epoch": 20.6, "learning_rate": 0.001, "loss": 2.5936, "step": 107292 }, { "epoch": 20.6, "learning_rate": 0.001, "loss": 2.5881, "step": 107304 }, { "epoch": 20.61, "learning_rate": 0.001, "loss": 2.5856, "step": 107316 }, { "epoch": 20.61, "learning_rate": 0.001, "loss": 2.606, "step": 107328 }, { "epoch": 20.61, "learning_rate": 0.001, "loss": 2.5822, "step": 107340 }, { "epoch": 20.61, "learning_rate": 0.001, "loss": 2.5896, "step": 107352 }, { "epoch": 20.62, "learning_rate": 0.001, "loss": 2.5825, "step": 107364 }, { "epoch": 20.62, "learning_rate": 0.001, "loss": 2.593, "step": 107376 }, { "epoch": 20.62, "learning_rate": 0.001, "loss": 2.5874, "step": 107388 }, { "epoch": 20.62, "learning_rate": 0.001, "loss": 2.5918, "step": 107400 }, { "epoch": 20.62, "learning_rate": 0.001, "loss": 2.5892, "step": 107412 }, { "epoch": 20.63, "learning_rate": 0.001, "loss": 2.5901, "step": 107424 }, { "epoch": 20.63, "learning_rate": 0.001, "loss": 2.5821, "step": 107436 }, { "epoch": 20.63, "learning_rate": 0.001, "loss": 2.5889, "step": 107448 }, { "epoch": 20.63, "learning_rate": 0.001, "loss": 2.5887, "step": 107460 }, { "epoch": 20.64, "learning_rate": 0.001, "loss": 2.5858, "step": 107472 }, { "epoch": 20.64, "learning_rate": 0.001, "loss": 2.5912, "step": 107484 }, { "epoch": 20.64, "learning_rate": 0.001, "loss": 2.5882, "step": 107496 }, { "epoch": 20.64, "eval_ag_news_accuracy": 0.319375, "eval_ag_news_bleu_score": 4.688954603981253, "eval_ag_news_bleu_score_sem": 0.15133146469219155, "eval_ag_news_emb_cos_sim": 0.8037905097007751, "eval_ag_news_emb_cos_sim_sem": 0.007028073258522125, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.585221290588379, "eval_ag_news_n_ngrams_match_1": 13.93, "eval_ag_news_n_ngrams_match_2": 3.008, "eval_ag_news_n_ngrams_match_3": 0.83, "eval_ag_news_num_pred_words": 46.68, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.06133687042489, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34443414735216493, "eval_ag_news_runtime": 10.4247, "eval_ag_news_samples_per_second": 47.963, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3470642592983945, "eval_ag_news_token_set_f1_sem": 0.004297789329807443, "eval_ag_news_token_set_precision": 0.33060344019138604, "eval_ag_news_token_set_recall": 0.38159832091759044, "eval_ag_news_true_num_tokens": 56.09375, "step": 107500 }, { "epoch": 20.64, "eval_anthropic_toxic_prompts_accuracy": 0.112875, "eval_anthropic_toxic_prompts_bleu_score": 3.091999051385931, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11981511911373215, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6684356927871704, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009511060008132297, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.27815580368042, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.128, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.85, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.69, "eval_anthropic_toxic_prompts_num_pred_words": 47.868, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.526806922801747, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2102893138981995, "eval_anthropic_toxic_prompts_runtime": 10.4973, "eval_anthropic_toxic_prompts_samples_per_second": 47.631, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.3549976083694851, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006362206553091603, "eval_anthropic_toxic_prompts_token_set_precision": 0.4326036946265551, "eval_anthropic_toxic_prompts_token_set_recall": 0.32971289703117035, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 107500 }, { "epoch": 20.64, "eval_arxiv_accuracy": 0.34278125, "eval_arxiv_bleu_score": 4.419931285345419, "eval_arxiv_bleu_score_sem": 0.1269135370474574, "eval_arxiv_emb_cos_sim": 0.7733661532402039, "eval_arxiv_emb_cos_sim_sem": 0.0072253679247576835, "eval_arxiv_emb_top1_equal": 0.328125, "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4513282775878906, "eval_arxiv_n_ngrams_match_1": 15.244, "eval_arxiv_n_ngrams_match_2": 3.008, "eval_arxiv_n_ngrams_match_3": 0.686, "eval_arxiv_num_pred_words": 41.026, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.542261374576412, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36047886469912493, "eval_arxiv_runtime": 10.3496, "eval_arxiv_samples_per_second": 48.311, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.35798543081434503, "eval_arxiv_token_set_f1_sem": 0.004153517692732146, "eval_arxiv_token_set_precision": 0.3075819086243378, "eval_arxiv_token_set_recall": 0.4432304342214834, "eval_arxiv_true_num_tokens": 64.0, "step": 107500 }, { "epoch": 20.64, "eval_python_code_alpaca_accuracy": 0.155375, "eval_python_code_alpaca_bleu_score": 4.291386664688456, "eval_python_code_alpaca_bleu_score_sem": 0.13161837014713682, "eval_python_code_alpaca_emb_cos_sim": 0.7419006824493408, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010992546757981525, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9321656227111816, "eval_python_code_alpaca_n_ngrams_match_1": 9.58, "eval_python_code_alpaca_n_ngrams_match_2": 2.736, "eval_python_code_alpaca_n_ngrams_match_3": 0.854, "eval_python_code_alpaca_num_pred_words": 43.546, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.768231425882394, "eval_python_code_alpaca_pred_num_tokens": 62.9921875, "eval_python_code_alpaca_rouge_score": 0.3210950863801495, "eval_python_code_alpaca_runtime": 10.7033, "eval_python_code_alpaca_samples_per_second": 46.715, "eval_python_code_alpaca_steps_per_second": 0.093, "eval_python_code_alpaca_token_set_f1": 0.4624423383131976, "eval_python_code_alpaca_token_set_f1_sem": 0.005773163030078458, "eval_python_code_alpaca_token_set_precision": 0.5200071395801114, "eval_python_code_alpaca_token_set_recall": 0.44217830970271715, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 107500 }, { "epoch": 20.64, "eval_wikibio_accuracy": 0.3189375, "eval_wikibio_bleu_score": 5.968765219251967, "eval_wikibio_bleu_score_sem": 0.21871732595582424, "eval_wikibio_emb_cos_sim": 0.7466143369674683, "eval_wikibio_emb_cos_sim_sem": 0.0092694782237349, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8147342205047607, "eval_wikibio_n_ngrams_match_1": 10.254, "eval_wikibio_n_ngrams_match_2": 3.424, "eval_wikibio_n_ngrams_match_3": 1.224, "eval_wikibio_num_pred_words": 37.238, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.364697774076674, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3580095555832107, "eval_wikibio_runtime": 10.1188, "eval_wikibio_samples_per_second": 49.413, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.32120994433856687, "eval_wikibio_token_set_f1_sem": 0.005277880820177232, "eval_wikibio_token_set_precision": 0.33301637481149027, "eval_wikibio_token_set_recall": 0.32323062131427566, "eval_wikibio_true_num_tokens": 61.1328125, "step": 107500 }, { "epoch": 20.64, "eval_nq_accuracy": 0.52453125, "eval_nq_bleu_score": 11.616328402222152, "eval_nq_bleu_score_sem": 0.47293824110718763, "eval_nq_emb_cos_sim": 0.8306170701980591, "eval_nq_emb_cos_sim_sem": 0.007003467401525859, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2154154777526855, "eval_nq_n_ngrams_match_1": 23.128, "eval_nq_n_ngrams_match_2": 8.504, "eval_nq_n_ngrams_match_3": 3.892, "eval_nq_num_pred_words": 49.308, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.165216264116747, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44480744966270735, "eval_nq_runtime": 10.5489, "eval_nq_samples_per_second": 47.398, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.46189889954581576, "eval_nq_token_set_f1_sem": 0.005182516616622513, "eval_nq_token_set_precision": 0.4203626065249666, "eval_nq_token_set_recall": 0.5212598925502242, "eval_nq_true_num_tokens": 64.0, "step": 107500 }, { "epoch": 20.64, "learning_rate": 0.001, "loss": 2.5877, "step": 107508 }, { "epoch": 20.65, "learning_rate": 0.001, "loss": 2.5879, "step": 107520 }, { "epoch": 20.65, "learning_rate": 0.001, "loss": 2.5912, "step": 107532 }, { "epoch": 20.65, "learning_rate": 0.001, "loss": 2.595, "step": 107544 }, { "epoch": 20.65, "learning_rate": 0.001, "loss": 2.5856, "step": 107556 }, { "epoch": 20.65, "learning_rate": 0.001, "loss": 2.5915, "step": 107568 }, { "epoch": 20.66, "learning_rate": 0.001, "loss": 2.5841, "step": 107580 }, { "epoch": 20.66, "learning_rate": 0.001, "loss": 2.5879, "step": 107592 }, { "epoch": 20.66, "learning_rate": 0.001, "loss": 2.5928, "step": 107604 }, { "epoch": 20.66, "learning_rate": 0.001, "loss": 2.5936, "step": 107616 }, { "epoch": 20.67, "learning_rate": 0.001, "loss": 2.5895, "step": 107628 }, { "epoch": 20.67, "learning_rate": 0.001, "loss": 2.585, "step": 107640 }, { "epoch": 20.67, "learning_rate": 0.001, "loss": 2.5858, "step": 107652 }, { "epoch": 20.67, "learning_rate": 0.001, "loss": 2.5929, "step": 107664 }, { "epoch": 20.68, "learning_rate": 0.001, "loss": 2.5916, "step": 107676 }, { "epoch": 20.68, "learning_rate": 0.001, "loss": 2.5987, "step": 107688 }, { "epoch": 20.68, "learning_rate": 0.001, "loss": 2.5939, "step": 107700 }, { "epoch": 20.68, "learning_rate": 0.001, "loss": 2.5904, "step": 107712 }, { "epoch": 20.68, "learning_rate": 0.001, "loss": 2.5786, "step": 107724 }, { "epoch": 20.69, "learning_rate": 0.001, "loss": 2.5896, "step": 107736 }, { "epoch": 20.69, "learning_rate": 0.001, "loss": 2.5952, "step": 107748 }, { "epoch": 20.69, "learning_rate": 0.001, "loss": 2.5965, "step": 107760 }, { "epoch": 20.69, "learning_rate": 0.001, "loss": 2.5948, "step": 107772 }, { "epoch": 20.7, "learning_rate": 0.001, "loss": 2.5819, "step": 107784 }, { "epoch": 20.7, "learning_rate": 0.001, "loss": 2.5911, "step": 107796 }, { "epoch": 20.7, "learning_rate": 0.001, "loss": 2.6014, "step": 107808 }, { "epoch": 20.7, "learning_rate": 0.001, "loss": 2.5986, "step": 107820 }, { "epoch": 20.71, "learning_rate": 0.001, "loss": 2.6021, "step": 107832 }, { "epoch": 20.71, "learning_rate": 0.001, "loss": 2.5943, "step": 107844 }, { "epoch": 20.71, "learning_rate": 0.001, "loss": 2.5956, "step": 107856 }, { "epoch": 20.71, "learning_rate": 0.001, "loss": 2.5827, "step": 107868 }, { "epoch": 20.71, "learning_rate": 0.001, "loss": 2.614, "step": 107880 }, { "epoch": 20.72, "learning_rate": 0.001, "loss": 2.588, "step": 107892 }, { "epoch": 20.72, "learning_rate": 0.001, "loss": 2.6008, "step": 107904 }, { "epoch": 20.72, "learning_rate": 0.001, "loss": 2.5895, "step": 107916 }, { "epoch": 20.72, "learning_rate": 0.001, "loss": 2.6044, "step": 107928 }, { "epoch": 20.73, "learning_rate": 0.001, "loss": 2.5922, "step": 107940 }, { "epoch": 20.73, "learning_rate": 0.001, "loss": 2.5917, "step": 107952 }, { "epoch": 20.73, "learning_rate": 0.001, "loss": 2.5961, "step": 107964 }, { "epoch": 20.73, "learning_rate": 0.001, "loss": 2.5929, "step": 107976 }, { "epoch": 20.74, "learning_rate": 0.001, "loss": 2.5928, "step": 107988 }, { "epoch": 20.74, "learning_rate": 0.001, "loss": 2.5919, "step": 108000 }, { "epoch": 20.74, "learning_rate": 0.001, "loss": 2.5973, "step": 108012 }, { "epoch": 20.74, "learning_rate": 0.001, "loss": 2.6022, "step": 108024 }, { "epoch": 20.74, "learning_rate": 0.001, "loss": 2.5978, "step": 108036 }, { "epoch": 20.75, "learning_rate": 0.001, "loss": 2.5983, "step": 108048 }, { "epoch": 20.75, "learning_rate": 0.001, "loss": 2.5856, "step": 108060 }, { "epoch": 20.75, "learning_rate": 0.001, "loss": 2.5956, "step": 108072 }, { "epoch": 20.75, "learning_rate": 0.001, "loss": 2.5829, "step": 108084 }, { "epoch": 20.76, "learning_rate": 0.001, "loss": 2.5904, "step": 108096 }, { "epoch": 20.76, "learning_rate": 0.001, "loss": 2.5794, "step": 108108 }, { "epoch": 20.76, "learning_rate": 0.001, "loss": 2.5955, "step": 108120 }, { "epoch": 20.76, "eval_ag_news_accuracy": 0.31875, "eval_ag_news_bleu_score": 4.778080978524433, "eval_ag_news_bleu_score_sem": 0.15357152358327375, "eval_ag_news_emb_cos_sim": 0.8031768798828125, "eval_ag_news_emb_cos_sim_sem": 0.007985539021212296, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5966758728027344, "eval_ag_news_n_ngrams_match_1": 13.998, "eval_ag_news_n_ngrams_match_2": 2.974, "eval_ag_news_n_ngrams_match_3": 0.87, "eval_ag_news_num_pred_words": 47.01, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.47677923531072, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34382825639216164, "eval_ag_news_runtime": 11.2987, "eval_ag_news_samples_per_second": 44.253, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.34927534209835465, "eval_ag_news_token_set_f1_sem": 0.004317128705614198, "eval_ag_news_token_set_precision": 0.33452105991346714, "eval_ag_news_token_set_recall": 0.38007046870232786, "eval_ag_news_true_num_tokens": 56.09375, "step": 108125 }, { "epoch": 20.76, "eval_anthropic_toxic_prompts_accuracy": 0.1129375, "eval_anthropic_toxic_prompts_bleu_score": 2.888986175307003, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12211752060455751, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6607291102409363, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009518338242034522, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2888426780700684, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.996, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.782, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.626, "eval_anthropic_toxic_prompts_num_pred_words": 47.776, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.81181579078046, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20619539152934796, "eval_anthropic_toxic_prompts_runtime": 10.0288, "eval_anthropic_toxic_prompts_samples_per_second": 49.857, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.34977827459647454, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006591544267040966, "eval_anthropic_toxic_prompts_token_set_precision": 0.42263909504860236, "eval_anthropic_toxic_prompts_token_set_recall": 0.32634371458448963, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 108125 }, { "epoch": 20.76, "eval_arxiv_accuracy": 0.344, "eval_arxiv_bleu_score": 4.3282999216840325, "eval_arxiv_bleu_score_sem": 0.12465486673948048, "eval_arxiv_emb_cos_sim": 0.7621566653251648, "eval_arxiv_emb_cos_sim_sem": 0.007000118044158258, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4395155906677246, "eval_arxiv_n_ngrams_match_1": 14.954, "eval_arxiv_n_ngrams_match_2": 2.938, "eval_arxiv_n_ngrams_match_3": 0.652, "eval_arxiv_num_pred_words": 40.494, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.171854573184007, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35776408224509265, "eval_arxiv_runtime": 10.5554, "eval_arxiv_samples_per_second": 47.369, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3503111790453682, "eval_arxiv_token_set_f1_sem": 0.004244257589276047, "eval_arxiv_token_set_precision": 0.30012455740087235, "eval_arxiv_token_set_recall": 0.43698676108896617, "eval_arxiv_true_num_tokens": 64.0, "step": 108125 }, { "epoch": 20.76, "eval_python_code_alpaca_accuracy": 0.154875, "eval_python_code_alpaca_bleu_score": 4.26654464775827, "eval_python_code_alpaca_bleu_score_sem": 0.13383782823754412, "eval_python_code_alpaca_emb_cos_sim": 0.7454551458358765, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009964508073093386, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.925708532333374, "eval_python_code_alpaca_n_ngrams_match_1": 9.53, "eval_python_code_alpaca_n_ngrams_match_2": 2.72, "eval_python_code_alpaca_n_ngrams_match_3": 0.854, "eval_python_code_alpaca_num_pred_words": 43.246, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.647433680026715, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3191635715121748, "eval_python_code_alpaca_runtime": 9.9373, "eval_python_code_alpaca_samples_per_second": 50.316, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4636949319137289, "eval_python_code_alpaca_token_set_f1_sem": 0.0060427583230633726, "eval_python_code_alpaca_token_set_precision": 0.5167051788675627, "eval_python_code_alpaca_token_set_recall": 0.44488502996542006, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 108125 }, { "epoch": 20.76, "eval_wikibio_accuracy": 0.31890625, "eval_wikibio_bleu_score": 6.028696025100964, "eval_wikibio_bleu_score_sem": 0.1973861884727308, "eval_wikibio_emb_cos_sim": 0.7399263978004456, "eval_wikibio_emb_cos_sim_sem": 0.00879182403829389, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8208425045013428, "eval_wikibio_n_ngrams_match_1": 10.466, "eval_wikibio_n_ngrams_match_2": 3.5, "eval_wikibio_n_ngrams_match_3": 1.236, "eval_wikibio_num_pred_words": 36.89, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.64264626143456, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3669505356921922, "eval_wikibio_runtime": 10.1421, "eval_wikibio_samples_per_second": 49.299, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.33002352040099997, "eval_wikibio_token_set_f1_sem": 0.004958464144499835, "eval_wikibio_token_set_precision": 0.3410126613469393, "eval_wikibio_token_set_recall": 0.3333282468945071, "eval_wikibio_true_num_tokens": 61.1328125, "step": 108125 }, { "epoch": 20.76, "eval_nq_accuracy": 0.52240625, "eval_nq_bleu_score": 11.666710848272931, "eval_nq_bleu_score_sem": 0.47769848330662973, "eval_nq_emb_cos_sim": 0.8327779173851013, "eval_nq_emb_cos_sim_sem": 0.006809925121896786, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.215294122695923, "eval_nq_n_ngrams_match_1": 23.212, "eval_nq_n_ngrams_match_2": 8.394, "eval_nq_n_ngrams_match_3": 3.826, "eval_nq_num_pred_words": 49.362, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.164104086262341, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4465019938306465, "eval_nq_runtime": 10.4987, "eval_nq_samples_per_second": 47.625, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.462062337946061, "eval_nq_token_set_f1_sem": 0.004770450278674321, "eval_nq_token_set_precision": 0.4180090124924598, "eval_nq_token_set_recall": 0.5230308971480412, "eval_nq_true_num_tokens": 64.0, "step": 108125 }, { "epoch": 20.76, "learning_rate": 0.001, "loss": 2.594, "step": 108132 }, { "epoch": 20.76, "learning_rate": 0.001, "loss": 2.5848, "step": 108144 }, { "epoch": 20.77, "learning_rate": 0.001, "loss": 2.5946, "step": 108156 }, { "epoch": 20.77, "learning_rate": 0.001, "loss": 2.5957, "step": 108168 }, { "epoch": 20.77, "learning_rate": 0.001, "loss": 2.5875, "step": 108180 }, { "epoch": 20.77, "learning_rate": 0.001, "loss": 2.6001, "step": 108192 }, { "epoch": 20.78, "learning_rate": 0.001, "loss": 2.5924, "step": 108204 }, { "epoch": 20.78, "learning_rate": 0.001, "loss": 2.5955, "step": 108216 }, { "epoch": 20.78, "learning_rate": 0.001, "loss": 2.5963, "step": 108228 }, { "epoch": 20.78, "learning_rate": 0.001, "loss": 2.5909, "step": 108240 }, { "epoch": 20.79, "learning_rate": 0.001, "loss": 2.5954, "step": 108252 }, { "epoch": 20.79, "learning_rate": 0.001, "loss": 2.5976, "step": 108264 }, { "epoch": 20.79, "learning_rate": 0.001, "loss": 2.5986, "step": 108276 }, { "epoch": 20.79, "learning_rate": 0.001, "loss": 2.5904, "step": 108288 }, { "epoch": 20.79, "learning_rate": 0.001, "loss": 2.5883, "step": 108300 }, { "epoch": 20.8, "learning_rate": 0.001, "loss": 2.5968, "step": 108312 }, { "epoch": 20.8, "learning_rate": 0.001, "loss": 2.5805, "step": 108324 }, { "epoch": 20.8, "learning_rate": 0.001, "loss": 2.5921, "step": 108336 }, { "epoch": 20.8, "learning_rate": 0.001, "loss": 2.5888, "step": 108348 }, { "epoch": 20.81, "learning_rate": 0.001, "loss": 2.5838, "step": 108360 }, { "epoch": 20.81, "learning_rate": 0.001, "loss": 2.5947, "step": 108372 }, { "epoch": 20.81, "learning_rate": 0.001, "loss": 2.5857, "step": 108384 }, { "epoch": 20.81, "learning_rate": 0.001, "loss": 2.5777, "step": 108396 }, { "epoch": 20.82, "learning_rate": 0.001, "loss": 2.5895, "step": 108408 }, { "epoch": 20.82, "learning_rate": 0.001, "loss": 2.591, "step": 108420 }, { "epoch": 20.82, "learning_rate": 0.001, "loss": 2.5936, "step": 108432 }, { "epoch": 20.82, "learning_rate": 0.001, "loss": 2.5923, "step": 108444 }, { "epoch": 20.82, "learning_rate": 0.001, "loss": 2.5931, "step": 108456 }, { "epoch": 20.83, "learning_rate": 0.001, "loss": 2.59, "step": 108468 }, { "epoch": 20.83, "learning_rate": 0.001, "loss": 2.5901, "step": 108480 }, { "epoch": 20.83, "learning_rate": 0.001, "loss": 2.6029, "step": 108492 }, { "epoch": 20.83, "learning_rate": 0.001, "loss": 2.5962, "step": 108504 }, { "epoch": 20.84, "learning_rate": 0.001, "loss": 2.5937, "step": 108516 }, { "epoch": 20.84, "learning_rate": 0.001, "loss": 2.5955, "step": 108528 }, { "epoch": 20.84, "learning_rate": 0.001, "loss": 2.5903, "step": 108540 }, { "epoch": 20.84, "learning_rate": 0.001, "loss": 2.5835, "step": 108552 }, { "epoch": 20.85, "learning_rate": 0.001, "loss": 2.5881, "step": 108564 }, { "epoch": 20.85, "learning_rate": 0.001, "loss": 2.5987, "step": 108576 }, { "epoch": 20.85, "learning_rate": 0.001, "loss": 2.597, "step": 108588 }, { "epoch": 20.85, "learning_rate": 0.001, "loss": 2.5909, "step": 108600 }, { "epoch": 20.85, "learning_rate": 0.001, "loss": 2.5888, "step": 108612 }, { "epoch": 20.86, "learning_rate": 0.001, "loss": 2.5849, "step": 108624 }, { "epoch": 20.86, "learning_rate": 0.001, "loss": 2.5927, "step": 108636 }, { "epoch": 20.86, "learning_rate": 0.001, "loss": 2.5907, "step": 108648 }, { "epoch": 20.86, "learning_rate": 0.001, "loss": 2.5951, "step": 108660 }, { "epoch": 20.87, "learning_rate": 0.001, "loss": 2.5949, "step": 108672 }, { "epoch": 20.87, "learning_rate": 0.001, "loss": 2.5944, "step": 108684 }, { "epoch": 20.87, "learning_rate": 0.001, "loss": 2.5932, "step": 108696 }, { "epoch": 20.87, "learning_rate": 0.001, "loss": 2.5971, "step": 108708 }, { "epoch": 20.88, "learning_rate": 0.001, "loss": 2.5941, "step": 108720 }, { "epoch": 20.88, "learning_rate": 0.001, "loss": 2.5927, "step": 108732 }, { "epoch": 20.88, "learning_rate": 0.001, "loss": 2.592, "step": 108744 }, { "epoch": 20.88, "eval_ag_news_accuracy": 0.31871875, "eval_ag_news_bleu_score": 4.873370447428463, "eval_ag_news_bleu_score_sem": 0.15098036946663992, "eval_ag_news_emb_cos_sim": 0.8129647970199585, "eval_ag_news_emb_cos_sim_sem": 0.006730886610048699, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5923500061035156, "eval_ag_news_n_ngrams_match_1": 14.048, "eval_ag_news_n_ngrams_match_2": 3.15, "eval_ag_news_n_ngrams_match_3": 0.902, "eval_ag_news_num_pred_words": 46.618, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.31932635634103, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3484963346129579, "eval_ag_news_runtime": 10.5771, "eval_ag_news_samples_per_second": 47.272, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3504828600643719, "eval_ag_news_token_set_f1_sem": 0.0043611171316939555, "eval_ag_news_token_set_precision": 0.33493982325945754, "eval_ag_news_token_set_recall": 0.3827705621636813, "eval_ag_news_true_num_tokens": 56.09375, "step": 108750 }, { "epoch": 20.88, "eval_anthropic_toxic_prompts_accuracy": 0.112625, "eval_anthropic_toxic_prompts_bleu_score": 2.9707463777623873, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11054395141998996, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6669542193412781, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00893271766014327, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.27938175201416, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.036, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.776, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.634, "eval_anthropic_toxic_prompts_num_pred_words": 46.762, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.559347359919986, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2097143809114018, "eval_anthropic_toxic_prompts_runtime": 9.9603, "eval_anthropic_toxic_prompts_samples_per_second": 50.199, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.35240630743276874, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006275436050904172, "eval_anthropic_toxic_prompts_token_set_precision": 0.42992016538312583, "eval_anthropic_toxic_prompts_token_set_recall": 0.32449227795144736, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 108750 }, { "epoch": 20.88, "eval_arxiv_accuracy": 0.348125, "eval_arxiv_bleu_score": 4.400249783797118, "eval_arxiv_bleu_score_sem": 0.12567661947030506, "eval_arxiv_emb_cos_sim": 0.7580965757369995, "eval_arxiv_emb_cos_sim_sem": 0.007996936451011953, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4323618412017822, "eval_arxiv_n_ngrams_match_1": 14.994, "eval_arxiv_n_ngrams_match_2": 2.956, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 40.414, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.949654664029552, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35805403599114105, "eval_arxiv_runtime": 10.3656, "eval_arxiv_samples_per_second": 48.236, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.349453693463863, "eval_arxiv_token_set_f1_sem": 0.0042896941451422265, "eval_arxiv_token_set_precision": 0.30031323232159085, "eval_arxiv_token_set_recall": 0.43436994637610643, "eval_arxiv_true_num_tokens": 64.0, "step": 108750 }, { "epoch": 20.88, "eval_python_code_alpaca_accuracy": 0.15665625, "eval_python_code_alpaca_bleu_score": 4.371294824286917, "eval_python_code_alpaca_bleu_score_sem": 0.1365779538529853, "eval_python_code_alpaca_emb_cos_sim": 0.7505536079406738, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00804314981185964, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9335525035858154, "eval_python_code_alpaca_n_ngrams_match_1": 9.668, "eval_python_code_alpaca_n_ngrams_match_2": 2.73, "eval_python_code_alpaca_n_ngrams_match_3": 0.848, "eval_python_code_alpaca_num_pred_words": 43.104, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.794278785214857, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32916010935591244, "eval_python_code_alpaca_runtime": 11.1488, "eval_python_code_alpaca_samples_per_second": 44.848, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.46798239105152967, "eval_python_code_alpaca_token_set_f1_sem": 0.005458406793191103, "eval_python_code_alpaca_token_set_precision": 0.5250172107024541, "eval_python_code_alpaca_token_set_recall": 0.44627157475698254, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 108750 }, { "epoch": 20.88, "eval_wikibio_accuracy": 0.32478125, "eval_wikibio_bleu_score": 6.379616694657736, "eval_wikibio_bleu_score_sem": 0.23158062316896835, "eval_wikibio_emb_cos_sim": 0.7379165887832642, "eval_wikibio_emb_cos_sim_sem": 0.008865217506619548, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7640633583068848, "eval_wikibio_n_ngrams_match_1": 10.296, "eval_wikibio_n_ngrams_match_2": 3.59, "eval_wikibio_n_ngrams_match_3": 1.354, "eval_wikibio_num_pred_words": 36.804, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.12329586112143, "eval_wikibio_pred_num_tokens": 62.9921875, "eval_wikibio_rouge_score": 0.3595443509799587, "eval_wikibio_runtime": 10.0145, "eval_wikibio_samples_per_second": 49.928, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.32544783434585106, "eval_wikibio_token_set_f1_sem": 0.005209346479870393, "eval_wikibio_token_set_precision": 0.33508837052885965, "eval_wikibio_token_set_recall": 0.3302157900476604, "eval_wikibio_true_num_tokens": 61.1328125, "step": 108750 }, { "epoch": 20.88, "eval_nq_accuracy": 0.524625, "eval_nq_bleu_score": 11.509977937884306, "eval_nq_bleu_score_sem": 0.4798335647255485, "eval_nq_emb_cos_sim": 0.8304468989372253, "eval_nq_emb_cos_sim_sem": 0.007206013960212579, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2169437408447266, "eval_nq_n_ngrams_match_1": 22.826, "eval_nq_n_ngrams_match_2": 8.26, "eval_nq_n_ngrams_match_3": 3.77, "eval_nq_num_pred_words": 49.15, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.179233834403153, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44084884462137924, "eval_nq_runtime": 11.6632, "eval_nq_samples_per_second": 42.87, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4570505234090874, "eval_nq_token_set_f1_sem": 0.004903078945843605, "eval_nq_token_set_precision": 0.4148243655339545, "eval_nq_token_set_recall": 0.5166141597770819, "eval_nq_true_num_tokens": 64.0, "step": 108750 }, { "epoch": 20.88, "learning_rate": 0.001, "loss": 2.5986, "step": 108756 }, { "epoch": 20.88, "learning_rate": 0.001, "loss": 2.5852, "step": 108768 }, { "epoch": 20.89, "learning_rate": 0.001, "loss": 2.5871, "step": 108780 }, { "epoch": 20.89, "learning_rate": 0.001, "loss": 2.6143, "step": 108792 }, { "epoch": 20.89, "learning_rate": 0.001, "loss": 2.5976, "step": 108804 }, { "epoch": 20.89, "learning_rate": 0.001, "loss": 2.5864, "step": 108816 }, { "epoch": 20.9, "learning_rate": 0.001, "loss": 2.5953, "step": 108828 }, { "epoch": 20.9, "learning_rate": 0.001, "loss": 2.5959, "step": 108840 }, { "epoch": 20.9, "learning_rate": 0.001, "loss": 2.5949, "step": 108852 }, { "epoch": 20.9, "learning_rate": 0.001, "loss": 2.5964, "step": 108864 }, { "epoch": 20.91, "learning_rate": 0.001, "loss": 2.5975, "step": 108876 }, { "epoch": 20.91, "learning_rate": 0.001, "loss": 2.6015, "step": 108888 }, { "epoch": 20.91, "learning_rate": 0.001, "loss": 2.5889, "step": 108900 }, { "epoch": 20.91, "learning_rate": 0.001, "loss": 2.5898, "step": 108912 }, { "epoch": 20.91, "learning_rate": 0.001, "loss": 2.5955, "step": 108924 }, { "epoch": 20.92, "learning_rate": 0.001, "loss": 2.5904, "step": 108936 }, { "epoch": 20.92, "learning_rate": 0.001, "loss": 2.5965, "step": 108948 }, { "epoch": 20.92, "learning_rate": 0.001, "loss": 2.5903, "step": 108960 }, { "epoch": 20.92, "learning_rate": 0.001, "loss": 2.5975, "step": 108972 }, { "epoch": 20.93, "learning_rate": 0.001, "loss": 2.5952, "step": 108984 }, { "epoch": 20.93, "learning_rate": 0.001, "loss": 2.5972, "step": 108996 }, { "epoch": 20.93, "learning_rate": 0.001, "loss": 2.5893, "step": 109008 }, { "epoch": 20.93, "learning_rate": 0.001, "loss": 2.5887, "step": 109020 }, { "epoch": 20.94, "learning_rate": 0.001, "loss": 2.5864, "step": 109032 }, { "epoch": 20.94, "learning_rate": 0.001, "loss": 2.5989, "step": 109044 }, { "epoch": 20.94, "learning_rate": 0.001, "loss": 2.5898, "step": 109056 }, { "epoch": 20.94, "learning_rate": 0.001, "loss": 2.5829, "step": 109068 }, { "epoch": 20.94, "learning_rate": 0.001, "loss": 2.5828, "step": 109080 }, { "epoch": 20.95, "learning_rate": 0.001, "loss": 2.5965, "step": 109092 }, { "epoch": 20.95, "learning_rate": 0.001, "loss": 2.5896, "step": 109104 }, { "epoch": 20.95, "learning_rate": 0.001, "loss": 2.5889, "step": 109116 }, { "epoch": 20.95, "learning_rate": 0.001, "loss": 2.5948, "step": 109128 }, { "epoch": 20.96, "learning_rate": 0.001, "loss": 2.5994, "step": 109140 }, { "epoch": 20.96, "learning_rate": 0.001, "loss": 2.5854, "step": 109152 }, { "epoch": 20.96, "learning_rate": 0.001, "loss": 2.6009, "step": 109164 }, { "epoch": 20.96, "learning_rate": 0.001, "loss": 2.5927, "step": 109176 }, { "epoch": 20.97, "learning_rate": 0.001, "loss": 2.5894, "step": 109188 }, { "epoch": 20.97, "learning_rate": 0.001, "loss": 2.5915, "step": 109200 }, { "epoch": 20.97, "learning_rate": 0.001, "loss": 2.5844, "step": 109212 }, { "epoch": 20.97, "learning_rate": 0.001, "loss": 2.5824, "step": 109224 }, { "epoch": 20.97, "learning_rate": 0.001, "loss": 2.5924, "step": 109236 }, { "epoch": 20.98, "learning_rate": 0.001, "loss": 2.5927, "step": 109248 }, { "epoch": 20.98, "learning_rate": 0.001, "loss": 2.5919, "step": 109260 }, { "epoch": 20.98, "learning_rate": 0.001, "loss": 2.5869, "step": 109272 }, { "epoch": 20.98, "learning_rate": 0.001, "loss": 2.5921, "step": 109284 }, { "epoch": 20.99, "learning_rate": 0.001, "loss": 2.5836, "step": 109296 }, { "epoch": 20.99, "learning_rate": 0.001, "loss": 2.5932, "step": 109308 }, { "epoch": 20.99, "learning_rate": 0.001, "loss": 2.5817, "step": 109320 }, { "epoch": 20.99, "learning_rate": 0.001, "loss": 2.5863, "step": 109332 }, { "epoch": 21.0, "learning_rate": 0.001, "loss": 2.5813, "step": 109344 }, { "epoch": 21.0, "learning_rate": 0.001, "loss": 2.5905, "step": 109356 }, { "epoch": 21.0, "learning_rate": 0.001, "loss": 2.6043, "step": 109368 }, { "epoch": 21.0, "eval_ag_news_accuracy": 0.3196875, "eval_ag_news_bleu_score": 4.730510371239236, "eval_ag_news_bleu_score_sem": 0.15493241022512916, "eval_ag_news_emb_cos_sim": 0.8005703091621399, "eval_ag_news_emb_cos_sim_sem": 0.0073144084105930115, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5906450748443604, "eval_ag_news_n_ngrams_match_1": 13.804, "eval_ag_news_n_ngrams_match_2": 3.012, "eval_ag_news_n_ngrams_match_3": 0.822, "eval_ag_news_num_pred_words": 45.954, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.2574571578766, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34282707021054426, "eval_ag_news_runtime": 10.4718, "eval_ag_news_samples_per_second": 47.747, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.34755710795872863, "eval_ag_news_token_set_f1_sem": 0.004478178591860787, "eval_ag_news_token_set_precision": 0.3275949553581574, "eval_ag_news_token_set_recall": 0.39171616518682495, "eval_ag_news_true_num_tokens": 56.09375, "step": 109375 }, { "epoch": 21.0, "eval_anthropic_toxic_prompts_accuracy": 0.1121875, "eval_anthropic_toxic_prompts_bleu_score": 3.008519808706273, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10933591519156527, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6628186702728271, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00969850482416196, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2805299758911133, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.158, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.876, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.672, "eval_anthropic_toxic_prompts_num_pred_words": 47.604, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.589860951579723, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20973456911770016, "eval_anthropic_toxic_prompts_runtime": 10.3079, "eval_anthropic_toxic_prompts_samples_per_second": 48.506, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.35876685150393867, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00638851690340122, "eval_anthropic_toxic_prompts_token_set_precision": 0.4328364387192545, "eval_anthropic_toxic_prompts_token_set_recall": 0.3353883399722931, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 109375 }, { "epoch": 21.0, "eval_arxiv_accuracy": 0.347125, "eval_arxiv_bleu_score": 4.19404361346, "eval_arxiv_bleu_score_sem": 0.11947950330485461, "eval_arxiv_emb_cos_sim": 0.7596526741981506, "eval_arxiv_emb_cos_sim_sem": 0.006617105924137477, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4264883995056152, "eval_arxiv_n_ngrams_match_1": 14.846, "eval_arxiv_n_ngrams_match_2": 2.856, "eval_arxiv_n_ngrams_match_3": 0.62, "eval_arxiv_num_pred_words": 40.36, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.768406468246848, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35357518687409834, "eval_arxiv_runtime": 10.4697, "eval_arxiv_samples_per_second": 47.757, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.3498007105998447, "eval_arxiv_token_set_f1_sem": 0.00410151461643203, "eval_arxiv_token_set_precision": 0.2999278934988312, "eval_arxiv_token_set_recall": 0.4382757289031304, "eval_arxiv_true_num_tokens": 64.0, "step": 109375 }, { "epoch": 21.0, "eval_python_code_alpaca_accuracy": 0.154625, "eval_python_code_alpaca_bleu_score": 4.113769871700874, "eval_python_code_alpaca_bleu_score_sem": 0.12810463664688804, "eval_python_code_alpaca_emb_cos_sim": 0.7502652406692505, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009955928555243387, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9653851985931396, "eval_python_code_alpaca_n_ngrams_match_1": 9.708, "eval_python_code_alpaca_n_ngrams_match_2": 2.726, "eval_python_code_alpaca_n_ngrams_match_3": 0.82, "eval_python_code_alpaca_num_pred_words": 44.962, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 19.402175492932308, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32136878047698314, "eval_python_code_alpaca_runtime": 10.3949, "eval_python_code_alpaca_samples_per_second": 48.101, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.4684747450950481, "eval_python_code_alpaca_token_set_f1_sem": 0.005246657581335209, "eval_python_code_alpaca_token_set_precision": 0.5298615094570119, "eval_python_code_alpaca_token_set_recall": 0.4440887552401949, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 109375 }, { "epoch": 21.0, "eval_wikibio_accuracy": 0.32084375, "eval_wikibio_bleu_score": 5.700113263717459, "eval_wikibio_bleu_score_sem": 0.19907505100010206, "eval_wikibio_emb_cos_sim": 0.7308796644210815, "eval_wikibio_emb_cos_sim_sem": 0.009892154446545834, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7788538932800293, "eval_wikibio_n_ngrams_match_1": 9.648, "eval_wikibio_n_ngrams_match_2": 3.236, "eval_wikibio_n_ngrams_match_3": 1.146, "eval_wikibio_num_pred_words": 35.132, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.76585264222076, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3387349133127342, "eval_wikibio_runtime": 10.5438, "eval_wikibio_samples_per_second": 47.421, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.30832923022014436, "eval_wikibio_token_set_f1_sem": 0.005774482559036823, "eval_wikibio_token_set_precision": 0.31246205377761976, "eval_wikibio_token_set_recall": 0.3205897710855152, "eval_wikibio_true_num_tokens": 61.1328125, "step": 109375 }, { "epoch": 21.0, "eval_nq_accuracy": 0.5234375, "eval_nq_bleu_score": 11.772185945630039, "eval_nq_bleu_score_sem": 0.4861501353722335, "eval_nq_emb_cos_sim": 0.828109622001648, "eval_nq_emb_cos_sim_sem": 0.007422867398335226, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.216416120529175, "eval_nq_n_ngrams_match_1": 22.762, "eval_nq_n_ngrams_match_2": 8.378, "eval_nq_n_ngrams_match_3": 3.934, "eval_nq_num_pred_words": 49.014, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.174391961598474, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4390581071380426, "eval_nq_runtime": 10.6081, "eval_nq_samples_per_second": 47.134, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.45611953282382467, "eval_nq_token_set_f1_sem": 0.005094695798645029, "eval_nq_token_set_precision": 0.41226724652556246, "eval_nq_token_set_recall": 0.5193156242655752, "eval_nq_true_num_tokens": 64.0, "step": 109375 }, { "epoch": 21.0, "learning_rate": 0.001, "loss": 2.5761, "step": 109380 }, { "epoch": 21.0, "learning_rate": 0.001, "loss": 2.5736, "step": 109392 }, { "epoch": 21.01, "learning_rate": 0.001, "loss": 2.5761, "step": 109404 }, { "epoch": 21.01, "learning_rate": 0.001, "loss": 2.5678, "step": 109416 }, { "epoch": 21.01, "learning_rate": 0.001, "loss": 2.5752, "step": 109428 }, { "epoch": 21.01, "learning_rate": 0.001, "loss": 2.5707, "step": 109440 }, { "epoch": 21.02, "learning_rate": 0.001, "loss": 2.5797, "step": 109452 }, { "epoch": 21.02, "learning_rate": 0.001, "loss": 2.5759, "step": 109464 }, { "epoch": 21.02, "learning_rate": 0.001, "loss": 2.5779, "step": 109476 }, { "epoch": 21.02, "learning_rate": 0.001, "loss": 2.5827, "step": 109488 }, { "epoch": 21.03, "learning_rate": 0.001, "loss": 2.5625, "step": 109500 }, { "epoch": 21.03, "learning_rate": 0.001, "loss": 2.5727, "step": 109512 }, { "epoch": 21.03, "learning_rate": 0.001, "loss": 2.5668, "step": 109524 }, { "epoch": 21.03, "learning_rate": 0.001, "loss": 2.5755, "step": 109536 }, { "epoch": 21.03, "learning_rate": 0.001, "loss": 2.5776, "step": 109548 }, { "epoch": 21.04, "learning_rate": 0.001, "loss": 2.5773, "step": 109560 }, { "epoch": 21.04, "learning_rate": 0.001, "loss": 2.5795, "step": 109572 }, { "epoch": 21.04, "learning_rate": 0.001, "loss": 2.5734, "step": 109584 }, { "epoch": 21.04, "learning_rate": 0.001, "loss": 2.5727, "step": 109596 }, { "epoch": 21.05, "learning_rate": 0.001, "loss": 2.5815, "step": 109608 }, { "epoch": 21.05, "learning_rate": 0.001, "loss": 2.5819, "step": 109620 }, { "epoch": 21.05, "learning_rate": 0.001, "loss": 2.5762, "step": 109632 }, { "epoch": 21.05, "learning_rate": 0.001, "loss": 2.5624, "step": 109644 }, { "epoch": 21.06, "learning_rate": 0.001, "loss": 2.575, "step": 109656 }, { "epoch": 21.06, "learning_rate": 0.001, "loss": 2.5547, "step": 109668 }, { "epoch": 21.06, "learning_rate": 0.001, "loss": 2.5775, "step": 109680 }, { "epoch": 21.06, "learning_rate": 0.001, "loss": 2.5729, "step": 109692 }, { "epoch": 21.06, "learning_rate": 0.001, "loss": 2.5743, "step": 109704 }, { "epoch": 21.07, "learning_rate": 0.001, "loss": 2.5728, "step": 109716 }, { "epoch": 21.07, "learning_rate": 0.001, "loss": 2.5842, "step": 109728 }, { "epoch": 21.07, "learning_rate": 0.001, "loss": 2.572, "step": 109740 }, { "epoch": 21.07, "learning_rate": 0.001, "loss": 2.5823, "step": 109752 }, { "epoch": 21.08, "learning_rate": 0.001, "loss": 2.576, "step": 109764 }, { "epoch": 21.08, "learning_rate": 0.001, "loss": 2.57, "step": 109776 }, { "epoch": 21.08, "learning_rate": 0.001, "loss": 2.5748, "step": 109788 }, { "epoch": 21.08, "learning_rate": 0.001, "loss": 2.5776, "step": 109800 }, { "epoch": 21.09, "learning_rate": 0.001, "loss": 2.5847, "step": 109812 }, { "epoch": 21.09, "learning_rate": 0.001, "loss": 2.5664, "step": 109824 }, { "epoch": 21.09, "learning_rate": 0.001, "loss": 2.5742, "step": 109836 }, { "epoch": 21.09, "learning_rate": 0.001, "loss": 2.5701, "step": 109848 }, { "epoch": 21.09, "learning_rate": 0.001, "loss": 2.5723, "step": 109860 }, { "epoch": 21.1, "learning_rate": 0.001, "loss": 2.5624, "step": 109872 }, { "epoch": 21.1, "learning_rate": 0.001, "loss": 2.575, "step": 109884 }, { "epoch": 21.1, "learning_rate": 0.001, "loss": 2.5749, "step": 109896 }, { "epoch": 21.1, "learning_rate": 0.001, "loss": 2.5818, "step": 109908 }, { "epoch": 21.11, "learning_rate": 0.001, "loss": 2.5769, "step": 109920 }, { "epoch": 21.11, "learning_rate": 0.001, "loss": 2.5793, "step": 109932 }, { "epoch": 21.11, "learning_rate": 0.001, "loss": 2.5702, "step": 109944 }, { "epoch": 21.11, "learning_rate": 0.001, "loss": 2.5792, "step": 109956 }, { "epoch": 21.12, "learning_rate": 0.001, "loss": 2.5654, "step": 109968 }, { "epoch": 21.12, "learning_rate": 0.001, "loss": 2.5888, "step": 109980 }, { "epoch": 21.12, "learning_rate": 0.001, "loss": 2.5736, "step": 109992 }, { "epoch": 21.12, "eval_ag_news_accuracy": 0.31775, "eval_ag_news_bleu_score": 4.702167748990216, "eval_ag_news_bleu_score_sem": 0.15081791411931167, "eval_ag_news_emb_cos_sim": 0.8027331829071045, "eval_ag_news_emb_cos_sim_sem": 0.0073558143965324245, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.606444835662842, "eval_ag_news_n_ngrams_match_1": 13.806, "eval_ag_news_n_ngrams_match_2": 2.974, "eval_ag_news_n_ngrams_match_3": 0.864, "eval_ag_news_num_pred_words": 46.518, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.83486575612089, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34237616129242276, "eval_ag_news_runtime": 12.0025, "eval_ag_news_samples_per_second": 41.658, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.3466558310744491, "eval_ag_news_token_set_f1_sem": 0.00435949853196166, "eval_ag_news_token_set_precision": 0.3306004090612312, "eval_ag_news_token_set_recall": 0.3809390945975503, "eval_ag_news_true_num_tokens": 56.09375, "step": 110000 }, { "epoch": 21.12, "eval_anthropic_toxic_prompts_accuracy": 0.11384375, "eval_anthropic_toxic_prompts_bleu_score": 2.9259785094080306, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10997722118238093, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6563278436660767, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010086220269058753, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.265777349472046, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.134, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.826, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.642, "eval_anthropic_toxic_prompts_num_pred_words": 48.292, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.20046999828643, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20985560656279834, "eval_anthropic_toxic_prompts_runtime": 17.799, "eval_anthropic_toxic_prompts_samples_per_second": 28.091, "eval_anthropic_toxic_prompts_steps_per_second": 0.056, "eval_anthropic_toxic_prompts_token_set_f1": 0.35484068968376375, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006543849288808072, "eval_anthropic_toxic_prompts_token_set_precision": 0.42912887243858927, "eval_anthropic_toxic_prompts_token_set_recall": 0.32790463746217585, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 110000 }, { "epoch": 21.12, "eval_arxiv_accuracy": 0.34540625, "eval_arxiv_bleu_score": 4.261046904853632, "eval_arxiv_bleu_score_sem": 0.1238057453247579, "eval_arxiv_emb_cos_sim": 0.7614809274673462, "eval_arxiv_emb_cos_sim_sem": 0.006998047580225161, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4448392391204834, "eval_arxiv_n_ngrams_match_1": 14.952, "eval_arxiv_n_ngrams_match_2": 2.87, "eval_arxiv_n_ngrams_match_3": 0.616, "eval_arxiv_num_pred_words": 40.082, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.338245077852708, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.356331577107159, "eval_arxiv_runtime": 17.2601, "eval_arxiv_samples_per_second": 28.969, "eval_arxiv_steps_per_second": 0.058, "eval_arxiv_token_set_f1": 0.3496846274647558, "eval_arxiv_token_set_f1_sem": 0.004071850335532273, "eval_arxiv_token_set_precision": 0.3018160043666443, "eval_arxiv_token_set_recall": 0.4319014158568833, "eval_arxiv_true_num_tokens": 64.0, "step": 110000 }, { "epoch": 21.12, "eval_python_code_alpaca_accuracy": 0.15865625, "eval_python_code_alpaca_bleu_score": 4.456097075892867, "eval_python_code_alpaca_bleu_score_sem": 0.13793966607153263, "eval_python_code_alpaca_emb_cos_sim": 0.7517263889312744, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008530632115537784, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9022841453552246, "eval_python_code_alpaca_n_ngrams_match_1": 9.718, "eval_python_code_alpaca_n_ngrams_match_2": 2.858, "eval_python_code_alpaca_n_ngrams_match_3": 0.932, "eval_python_code_alpaca_num_pred_words": 44.3, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.215705205457805, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3246414445098242, "eval_python_code_alpaca_runtime": 12.3718, "eval_python_code_alpaca_samples_per_second": 40.415, "eval_python_code_alpaca_steps_per_second": 0.081, "eval_python_code_alpaca_token_set_f1": 0.4695769726186936, "eval_python_code_alpaca_token_set_f1_sem": 0.005498479940050691, "eval_python_code_alpaca_token_set_precision": 0.5315452489571844, "eval_python_code_alpaca_token_set_recall": 0.4434547686631533, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 110000 }, { "epoch": 21.12, "eval_wikibio_accuracy": 0.32053125, "eval_wikibio_bleu_score": 6.233939655874294, "eval_wikibio_bleu_score_sem": 0.21826220314593694, "eval_wikibio_emb_cos_sim": 0.7423006892204285, "eval_wikibio_emb_cos_sim_sem": 0.00883132814769425, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8176321983337402, "eval_wikibio_n_ngrams_match_1": 10.396, "eval_wikibio_n_ngrams_match_2": 3.524, "eval_wikibio_n_ngrams_match_3": 1.282, "eval_wikibio_num_pred_words": 36.378, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.49635433920703, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3626337127284205, "eval_wikibio_runtime": 15.1044, "eval_wikibio_samples_per_second": 33.103, "eval_wikibio_steps_per_second": 0.066, "eval_wikibio_token_set_f1": 0.32924534421193435, "eval_wikibio_token_set_f1_sem": 0.005255782938895232, "eval_wikibio_token_set_precision": 0.33773896736532916, "eval_wikibio_token_set_recall": 0.3390088046755665, "eval_wikibio_true_num_tokens": 61.1328125, "step": 110000 }, { "epoch": 21.12, "eval_nq_accuracy": 0.525, "eval_nq_bleu_score": 11.387482124332884, "eval_nq_bleu_score_sem": 0.4816751599444313, "eval_nq_emb_cos_sim": 0.826781153678894, "eval_nq_emb_cos_sim_sem": 0.007380899015385994, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2170486450195312, "eval_nq_n_ngrams_match_1": 22.874, "eval_nq_n_ngrams_match_2": 8.34, "eval_nq_n_ngrams_match_3": 3.754, "eval_nq_num_pred_words": 49.292, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.180196824863867, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.441090070517958, "eval_nq_runtime": 11.5755, "eval_nq_samples_per_second": 43.195, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4568296963756491, "eval_nq_token_set_f1_sem": 0.005050259369300929, "eval_nq_token_set_precision": 0.4139612552603517, "eval_nq_token_set_recall": 0.5175484476628561, "eval_nq_true_num_tokens": 64.0, "step": 110000 }, { "epoch": 21.12, "learning_rate": 0.001, "loss": 2.57, "step": 110004 }, { "epoch": 21.12, "learning_rate": 0.001, "loss": 2.5801, "step": 110016 }, { "epoch": 21.13, "learning_rate": 0.001, "loss": 2.5897, "step": 110028 }, { "epoch": 21.13, "learning_rate": 0.001, "loss": 2.5614, "step": 110040 }, { "epoch": 21.13, "learning_rate": 0.001, "loss": 2.5782, "step": 110052 }, { "epoch": 21.13, "learning_rate": 0.001, "loss": 2.5854, "step": 110064 }, { "epoch": 21.14, "learning_rate": 0.001, "loss": 2.5781, "step": 110076 }, { "epoch": 21.14, "learning_rate": 0.001, "loss": 2.5763, "step": 110088 }, { "epoch": 21.14, "learning_rate": 0.001, "loss": 2.5847, "step": 110100 }, { "epoch": 21.14, "learning_rate": 0.001, "loss": 2.5859, "step": 110112 }, { "epoch": 21.15, "learning_rate": 0.001, "loss": 2.5768, "step": 110124 }, { "epoch": 21.15, "learning_rate": 0.001, "loss": 2.5742, "step": 110136 }, { "epoch": 21.15, "learning_rate": 0.001, "loss": 2.5826, "step": 110148 }, { "epoch": 21.15, "learning_rate": 0.001, "loss": 2.5816, "step": 110160 }, { "epoch": 21.15, "learning_rate": 0.001, "loss": 2.5789, "step": 110172 }, { "epoch": 21.16, "learning_rate": 0.001, "loss": 2.5779, "step": 110184 }, { "epoch": 21.16, "learning_rate": 0.001, "loss": 2.5816, "step": 110196 }, { "epoch": 21.16, "learning_rate": 0.001, "loss": 2.5784, "step": 110208 }, { "epoch": 21.16, "learning_rate": 0.001, "loss": 2.581, "step": 110220 }, { "epoch": 21.17, "learning_rate": 0.001, "loss": 2.581, "step": 110232 }, { "epoch": 21.17, "learning_rate": 0.001, "loss": 2.5775, "step": 110244 }, { "epoch": 21.17, "learning_rate": 0.001, "loss": 2.5828, "step": 110256 }, { "epoch": 21.17, "learning_rate": 0.001, "loss": 2.5739, "step": 110268 }, { "epoch": 21.18, "learning_rate": 0.001, "loss": 2.5747, "step": 110280 }, { "epoch": 21.18, "learning_rate": 0.001, "loss": 2.5673, "step": 110292 }, { "epoch": 21.18, "learning_rate": 0.001, "loss": 2.5828, "step": 110304 }, { "epoch": 21.18, "learning_rate": 0.001, "loss": 2.5812, "step": 110316 }, { "epoch": 21.18, "learning_rate": 0.001, "loss": 2.5835, "step": 110328 }, { "epoch": 21.19, "learning_rate": 0.001, "loss": 2.5749, "step": 110340 }, { "epoch": 21.19, "learning_rate": 0.001, "loss": 2.5905, "step": 110352 }, { "epoch": 21.19, "learning_rate": 0.001, "loss": 2.5875, "step": 110364 }, { "epoch": 21.19, "learning_rate": 0.001, "loss": 2.5772, "step": 110376 }, { "epoch": 21.2, "learning_rate": 0.001, "loss": 2.5816, "step": 110388 }, { "epoch": 21.2, "learning_rate": 0.001, "loss": 2.5726, "step": 110400 }, { "epoch": 21.2, "learning_rate": 0.001, "loss": 2.5642, "step": 110412 }, { "epoch": 21.2, "learning_rate": 0.001, "loss": 2.587, "step": 110424 }, { "epoch": 21.21, "learning_rate": 0.001, "loss": 2.5754, "step": 110436 }, { "epoch": 21.21, "learning_rate": 0.001, "loss": 2.5873, "step": 110448 }, { "epoch": 21.21, "learning_rate": 0.001, "loss": 2.5834, "step": 110460 }, { "epoch": 21.21, "learning_rate": 0.001, "loss": 2.5799, "step": 110472 }, { "epoch": 21.21, "learning_rate": 0.001, "loss": 2.5873, "step": 110484 }, { "epoch": 21.22, "learning_rate": 0.001, "loss": 2.5767, "step": 110496 }, { "epoch": 21.22, "learning_rate": 0.001, "loss": 2.5701, "step": 110508 }, { "epoch": 21.22, "learning_rate": 0.001, "loss": 2.5679, "step": 110520 }, { "epoch": 21.22, "learning_rate": 0.001, "loss": 2.58, "step": 110532 }, { "epoch": 21.23, "learning_rate": 0.001, "loss": 2.5881, "step": 110544 }, { "epoch": 21.23, "learning_rate": 0.001, "loss": 2.5787, "step": 110556 }, { "epoch": 21.23, "learning_rate": 0.001, "loss": 2.5892, "step": 110568 }, { "epoch": 21.23, "learning_rate": 0.001, "loss": 2.5783, "step": 110580 }, { "epoch": 21.24, "learning_rate": 0.001, "loss": 2.5766, "step": 110592 }, { "epoch": 21.24, "learning_rate": 0.001, "loss": 2.5782, "step": 110604 }, { "epoch": 21.24, "learning_rate": 0.001, "loss": 2.5824, "step": 110616 }, { "epoch": 21.24, "eval_ag_news_accuracy": 0.31928125, "eval_ag_news_bleu_score": 4.685418248974973, "eval_ag_news_bleu_score_sem": 0.15151162648658734, "eval_ag_news_emb_cos_sim": 0.7950277328491211, "eval_ag_news_emb_cos_sim_sem": 0.008317835311584313, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5835213661193848, "eval_ag_news_n_ngrams_match_1": 13.664, "eval_ag_news_n_ngrams_match_2": 2.99, "eval_ag_news_n_ngrams_match_3": 0.86, "eval_ag_news_num_pred_words": 46.146, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.000087395983975, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3398218563210976, "eval_ag_news_runtime": 11.5823, "eval_ag_news_samples_per_second": 43.169, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.3415182222972952, "eval_ag_news_token_set_f1_sem": 0.004371714021688004, "eval_ag_news_token_set_precision": 0.32511685373685956, "eval_ag_news_token_set_recall": 0.378285218091242, "eval_ag_news_true_num_tokens": 56.09375, "step": 110625 }, { "epoch": 21.24, "eval_anthropic_toxic_prompts_accuracy": 0.11253125, "eval_anthropic_toxic_prompts_bleu_score": 3.0056202436140365, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1178465298981663, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6523705720901489, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009578207181799738, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.289741277694702, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.99, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.82, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67, "eval_anthropic_toxic_prompts_num_pred_words": 47.168, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.835919706648728, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20736422846400374, "eval_anthropic_toxic_prompts_runtime": 10.1856, "eval_anthropic_toxic_prompts_samples_per_second": 49.089, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.352508551236691, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065424098014101335, "eval_anthropic_toxic_prompts_token_set_precision": 0.42643162192414086, "eval_anthropic_toxic_prompts_token_set_recall": 0.330258278791112, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 110625 }, { "epoch": 21.24, "eval_arxiv_accuracy": 0.348, "eval_arxiv_bleu_score": 4.053475405061131, "eval_arxiv_bleu_score_sem": 0.12518156857898258, "eval_arxiv_emb_cos_sim": 0.7488660216331482, "eval_arxiv_emb_cos_sim_sem": 0.00953834416564253, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.408355951309204, "eval_arxiv_n_ngrams_match_1": 14.282, "eval_arxiv_n_ngrams_match_2": 2.682, "eval_arxiv_n_ngrams_match_3": 0.59, "eval_arxiv_num_pred_words": 39.0, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.2155276036984, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3455712802885348, "eval_arxiv_runtime": 10.6547, "eval_arxiv_samples_per_second": 46.928, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.3395123837532162, "eval_arxiv_token_set_f1_sem": 0.00433835253524641, "eval_arxiv_token_set_precision": 0.2898921560301158, "eval_arxiv_token_set_recall": 0.4321801776109789, "eval_arxiv_true_num_tokens": 64.0, "step": 110625 }, { "epoch": 21.24, "eval_python_code_alpaca_accuracy": 0.15646875, "eval_python_code_alpaca_bleu_score": 4.400363717217963, "eval_python_code_alpaca_bleu_score_sem": 0.13434013825950306, "eval_python_code_alpaca_emb_cos_sim": 0.741919755935669, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010611431904688242, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.937800884246826, "eval_python_code_alpaca_n_ngrams_match_1": 9.572, "eval_python_code_alpaca_n_ngrams_match_2": 2.742, "eval_python_code_alpaca_n_ngrams_match_3": 0.85, "eval_python_code_alpaca_num_pred_words": 42.246, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.87429388269237, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32680303249898707, "eval_python_code_alpaca_runtime": 11.6033, "eval_python_code_alpaca_samples_per_second": 43.091, "eval_python_code_alpaca_steps_per_second": 0.086, "eval_python_code_alpaca_token_set_f1": 0.46202769887690065, "eval_python_code_alpaca_token_set_f1_sem": 0.005539596575597648, "eval_python_code_alpaca_token_set_precision": 0.5201054141111333, "eval_python_code_alpaca_token_set_recall": 0.43946663686839127, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 110625 }, { "epoch": 21.24, "eval_wikibio_accuracy": 0.32128125, "eval_wikibio_bleu_score": 5.852205602286266, "eval_wikibio_bleu_score_sem": 0.2096834733090805, "eval_wikibio_emb_cos_sim": 0.7330366969108582, "eval_wikibio_emb_cos_sim_sem": 0.010082510687845285, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.784851551055908, "eval_wikibio_n_ngrams_match_1": 9.952, "eval_wikibio_n_ngrams_match_2": 3.3, "eval_wikibio_n_ngrams_match_3": 1.194, "eval_wikibio_num_pred_words": 36.088, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.02913399513809, "eval_wikibio_pred_num_tokens": 62.9921875, "eval_wikibio_rouge_score": 0.34965874710505507, "eval_wikibio_runtime": 10.0436, "eval_wikibio_samples_per_second": 49.783, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3158548127246789, "eval_wikibio_token_set_f1_sem": 0.005649844857506648, "eval_wikibio_token_set_precision": 0.32367832119585815, "eval_wikibio_token_set_recall": 0.3247660459846288, "eval_wikibio_true_num_tokens": 61.1328125, "step": 110625 }, { "epoch": 21.24, "eval_nq_accuracy": 0.5248125, "eval_nq_bleu_score": 11.168789323686935, "eval_nq_bleu_score_sem": 0.4613882408217578, "eval_nq_emb_cos_sim": 0.8255674839019775, "eval_nq_emb_cos_sim_sem": 0.0077005044945446756, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.212205648422241, "eval_nq_n_ngrams_match_1": 22.85, "eval_nq_n_ngrams_match_2": 8.21, "eval_nq_n_ngrams_match_3": 3.702, "eval_nq_num_pred_words": 48.866, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.135844648287291, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4429615894331756, "eval_nq_runtime": 11.097, "eval_nq_samples_per_second": 45.057, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.4566688510209377, "eval_nq_token_set_f1_sem": 0.0049535964522480795, "eval_nq_token_set_precision": 0.41578913599685846, "eval_nq_token_set_recall": 0.51470473217856, "eval_nq_true_num_tokens": 64.0, "step": 110625 }, { "epoch": 21.24, "learning_rate": 0.001, "loss": 2.5765, "step": 110628 }, { "epoch": 21.24, "learning_rate": 0.001, "loss": 2.5714, "step": 110640 }, { "epoch": 21.25, "learning_rate": 0.001, "loss": 2.5741, "step": 110652 }, { "epoch": 21.25, "learning_rate": 0.001, "loss": 2.5745, "step": 110664 }, { "epoch": 21.25, "learning_rate": 0.001, "loss": 2.5694, "step": 110676 }, { "epoch": 21.25, "learning_rate": 0.001, "loss": 2.5861, "step": 110688 }, { "epoch": 21.26, "learning_rate": 0.001, "loss": 2.5782, "step": 110700 }, { "epoch": 21.26, "learning_rate": 0.001, "loss": 2.5834, "step": 110712 }, { "epoch": 21.26, "learning_rate": 0.001, "loss": 2.5797, "step": 110724 }, { "epoch": 21.26, "learning_rate": 0.001, "loss": 2.5811, "step": 110736 }, { "epoch": 21.26, "learning_rate": 0.001, "loss": 2.5876, "step": 110748 }, { "epoch": 21.27, "learning_rate": 0.001, "loss": 2.5897, "step": 110760 }, { "epoch": 21.27, "learning_rate": 0.001, "loss": 2.5867, "step": 110772 }, { "epoch": 21.27, "learning_rate": 0.001, "loss": 2.5766, "step": 110784 }, { "epoch": 21.27, "learning_rate": 0.001, "loss": 2.5843, "step": 110796 }, { "epoch": 21.28, "learning_rate": 0.001, "loss": 2.5802, "step": 110808 }, { "epoch": 21.28, "learning_rate": 0.001, "loss": 2.5878, "step": 110820 }, { "epoch": 21.28, "learning_rate": 0.001, "loss": 2.5864, "step": 110832 }, { "epoch": 21.28, "learning_rate": 0.001, "loss": 2.5873, "step": 110844 }, { "epoch": 21.29, "learning_rate": 0.001, "loss": 2.5892, "step": 110856 }, { "epoch": 21.29, "learning_rate": 0.001, "loss": 2.5789, "step": 110868 }, { "epoch": 21.29, "learning_rate": 0.001, "loss": 2.5909, "step": 110880 }, { "epoch": 21.29, "learning_rate": 0.001, "loss": 2.5746, "step": 110892 }, { "epoch": 21.29, "learning_rate": 0.001, "loss": 2.5847, "step": 110904 }, { "epoch": 21.3, "learning_rate": 0.001, "loss": 2.5885, "step": 110916 }, { "epoch": 21.3, "learning_rate": 0.001, "loss": 2.5822, "step": 110928 }, { "epoch": 21.3, "learning_rate": 0.001, "loss": 2.5815, "step": 110940 }, { "epoch": 21.3, "learning_rate": 0.001, "loss": 2.5913, "step": 110952 }, { "epoch": 21.31, "learning_rate": 0.001, "loss": 2.5797, "step": 110964 }, { "epoch": 21.31, "learning_rate": 0.001, "loss": 2.5866, "step": 110976 }, { "epoch": 21.31, "learning_rate": 0.001, "loss": 2.5775, "step": 110988 }, { "epoch": 21.31, "learning_rate": 0.001, "loss": 2.5872, "step": 111000 }, { "epoch": 21.32, "learning_rate": 0.001, "loss": 2.5768, "step": 111012 }, { "epoch": 21.32, "learning_rate": 0.001, "loss": 2.5792, "step": 111024 }, { "epoch": 21.32, "learning_rate": 0.001, "loss": 2.5824, "step": 111036 }, { "epoch": 21.32, "learning_rate": 0.001, "loss": 2.5778, "step": 111048 }, { "epoch": 21.32, "learning_rate": 0.001, "loss": 2.5777, "step": 111060 }, { "epoch": 21.33, "learning_rate": 0.001, "loss": 2.5829, "step": 111072 }, { "epoch": 21.33, "learning_rate": 0.001, "loss": 2.5787, "step": 111084 }, { "epoch": 21.33, "learning_rate": 0.001, "loss": 2.5757, "step": 111096 }, { "epoch": 21.33, "learning_rate": 0.001, "loss": 2.5702, "step": 111108 }, { "epoch": 21.34, "learning_rate": 0.001, "loss": 2.5759, "step": 111120 }, { "epoch": 21.34, "learning_rate": 0.001, "loss": 2.5758, "step": 111132 }, { "epoch": 21.34, "learning_rate": 0.001, "loss": 2.5878, "step": 111144 }, { "epoch": 21.34, "learning_rate": 0.001, "loss": 2.5754, "step": 111156 }, { "epoch": 21.35, "learning_rate": 0.001, "loss": 2.5835, "step": 111168 }, { "epoch": 21.35, "learning_rate": 0.001, "loss": 2.5759, "step": 111180 }, { "epoch": 21.35, "learning_rate": 0.001, "loss": 2.5918, "step": 111192 }, { "epoch": 21.35, "learning_rate": 0.001, "loss": 2.5711, "step": 111204 }, { "epoch": 21.35, "learning_rate": 0.001, "loss": 2.5851, "step": 111216 }, { "epoch": 21.36, "learning_rate": 0.001, "loss": 2.5819, "step": 111228 }, { "epoch": 21.36, "learning_rate": 0.001, "loss": 2.5836, "step": 111240 }, { "epoch": 21.36, "eval_ag_news_accuracy": 0.31834375, "eval_ag_news_bleu_score": 4.866329716281025, "eval_ag_news_bleu_score_sem": 0.15617882672746772, "eval_ag_news_emb_cos_sim": 0.802810788154602, "eval_ag_news_emb_cos_sim_sem": 0.00814371021677311, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.584831476211548, "eval_ag_news_n_ngrams_match_1": 13.862, "eval_ag_news_n_ngrams_match_2": 3.072, "eval_ag_news_n_ngrams_match_3": 0.916, "eval_ag_news_num_pred_words": 46.536, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.047282382363775, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3443373521212875, "eval_ag_news_runtime": 13.1096, "eval_ag_news_samples_per_second": 38.14, "eval_ag_news_steps_per_second": 0.076, "eval_ag_news_token_set_f1": 0.3475438675235002, "eval_ag_news_token_set_f1_sem": 0.004396316955014515, "eval_ag_news_token_set_precision": 0.3325826833379386, "eval_ag_news_token_set_recall": 0.38045849206885424, "eval_ag_news_true_num_tokens": 56.09375, "step": 111250 }, { "epoch": 21.36, "eval_anthropic_toxic_prompts_accuracy": 0.1129375, "eval_anthropic_toxic_prompts_bleu_score": 3.1110791230873875, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11863615150761044, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6728488206863403, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009503023130956877, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.27882981300354, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.078, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.894, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694, "eval_anthropic_toxic_prompts_num_pred_words": 46.688, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.54469226474684, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21070630427718445, "eval_anthropic_toxic_prompts_runtime": 9.8523, "eval_anthropic_toxic_prompts_samples_per_second": 50.75, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3504519479027751, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006408439586069482, "eval_anthropic_toxic_prompts_token_set_precision": 0.42777988175727255, "eval_anthropic_toxic_prompts_token_set_recall": 0.3232340079842992, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 111250 }, { "epoch": 21.36, "eval_arxiv_accuracy": 0.3461875, "eval_arxiv_bleu_score": 4.272763842617122, "eval_arxiv_bleu_score_sem": 0.12094276008984688, "eval_arxiv_emb_cos_sim": 0.7589943408966064, "eval_arxiv_emb_cos_sim_sem": 0.007845967756734946, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4353740215301514, "eval_arxiv_n_ngrams_match_1": 15.04, "eval_arxiv_n_ngrams_match_2": 2.916, "eval_arxiv_n_ngrams_match_3": 0.63, "eval_arxiv_num_pred_words": 40.826, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.043021152734035, "eval_arxiv_pred_num_tokens": 62.9921875, "eval_arxiv_rouge_score": 0.3568400506653353, "eval_arxiv_runtime": 10.5306, "eval_arxiv_samples_per_second": 47.481, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3511031462298726, "eval_arxiv_token_set_f1_sem": 0.004124125656696252, "eval_arxiv_token_set_precision": 0.3039353808198982, "eval_arxiv_token_set_recall": 0.43306408840869004, "eval_arxiv_true_num_tokens": 64.0, "step": 111250 }, { "epoch": 21.36, "eval_python_code_alpaca_accuracy": 0.15834375, "eval_python_code_alpaca_bleu_score": 4.475306735052449, "eval_python_code_alpaca_bleu_score_sem": 0.14627040540837266, "eval_python_code_alpaca_emb_cos_sim": 0.7522677183151245, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007894331265401199, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9292328357696533, "eval_python_code_alpaca_n_ngrams_match_1": 9.78, "eval_python_code_alpaca_n_ngrams_match_2": 2.85, "eval_python_code_alpaca_n_ngrams_match_3": 0.922, "eval_python_code_alpaca_num_pred_words": 43.71, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.713268838016493, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3265031362519286, "eval_python_code_alpaca_runtime": 10.2115, "eval_python_code_alpaca_samples_per_second": 48.964, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.46749664919678147, "eval_python_code_alpaca_token_set_f1_sem": 0.005420524718216274, "eval_python_code_alpaca_token_set_precision": 0.5282148022006583, "eval_python_code_alpaca_token_set_recall": 0.4401149781164481, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 111250 }, { "epoch": 21.36, "eval_wikibio_accuracy": 0.31890625, "eval_wikibio_bleu_score": 5.774908833709817, "eval_wikibio_bleu_score_sem": 0.20948880929675023, "eval_wikibio_emb_cos_sim": 0.7310364842414856, "eval_wikibio_emb_cos_sim_sem": 0.010684866861317904, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.8123817443847656, "eval_wikibio_n_ngrams_match_1": 10.01, "eval_wikibio_n_ngrams_match_2": 3.342, "eval_wikibio_n_ngrams_match_3": 1.172, "eval_wikibio_num_pred_words": 36.324, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 45.258103834879456, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3515645100503063, "eval_wikibio_runtime": 10.0767, "eval_wikibio_samples_per_second": 49.619, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3168065314978394, "eval_wikibio_token_set_f1_sem": 0.005617337118590732, "eval_wikibio_token_set_precision": 0.32493484859713717, "eval_wikibio_token_set_recall": 0.32456879125337457, "eval_wikibio_true_num_tokens": 61.1328125, "step": 111250 }, { "epoch": 21.36, "eval_nq_accuracy": 0.52415625, "eval_nq_bleu_score": 11.733117241066784, "eval_nq_bleu_score_sem": 0.4855792083687693, "eval_nq_emb_cos_sim": 0.8233238458633423, "eval_nq_emb_cos_sim_sem": 0.007838283859415804, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.216200113296509, "eval_nq_n_ngrams_match_1": 22.878, "eval_nq_n_ngrams_match_2": 8.394, "eval_nq_n_ngrams_match_3": 3.946, "eval_nq_num_pred_words": 49.062, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.172410440598595, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4420714674237494, "eval_nq_runtime": 10.3619, "eval_nq_samples_per_second": 48.254, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.45572913082868083, "eval_nq_token_set_f1_sem": 0.005075308348249063, "eval_nq_token_set_precision": 0.4142225872774345, "eval_nq_token_set_recall": 0.5157395076313773, "eval_nq_true_num_tokens": 64.0, "step": 111250 }, { "epoch": 21.36, "learning_rate": 0.001, "loss": 2.5865, "step": 111252 }, { "epoch": 21.36, "learning_rate": 0.001, "loss": 2.5788, "step": 111264 }, { "epoch": 21.37, "learning_rate": 0.001, "loss": 2.5664, "step": 111276 }, { "epoch": 21.37, "learning_rate": 0.001, "loss": 2.5769, "step": 111288 }, { "epoch": 21.37, "learning_rate": 0.001, "loss": 2.5829, "step": 111300 }, { "epoch": 21.37, "learning_rate": 0.001, "loss": 2.5878, "step": 111312 }, { "epoch": 21.38, "learning_rate": 0.001, "loss": 2.5852, "step": 111324 }, { "epoch": 21.38, "learning_rate": 0.001, "loss": 2.5806, "step": 111336 }, { "epoch": 21.38, "learning_rate": 0.001, "loss": 2.5756, "step": 111348 }, { "epoch": 21.38, "learning_rate": 0.001, "loss": 2.586, "step": 111360 }, { "epoch": 21.38, "learning_rate": 0.001, "loss": 2.5797, "step": 111372 }, { "epoch": 21.39, "learning_rate": 0.001, "loss": 2.5869, "step": 111384 }, { "epoch": 21.39, "learning_rate": 0.001, "loss": 2.5878, "step": 111396 }, { "epoch": 21.39, "learning_rate": 0.001, "loss": 2.5771, "step": 111408 }, { "epoch": 21.39, "learning_rate": 0.001, "loss": 2.5814, "step": 111420 }, { "epoch": 21.4, "learning_rate": 0.001, "loss": 2.5852, "step": 111432 }, { "epoch": 21.4, "learning_rate": 0.001, "loss": 2.5805, "step": 111444 }, { "epoch": 21.4, "learning_rate": 0.001, "loss": 2.5863, "step": 111456 }, { "epoch": 21.4, "learning_rate": 0.001, "loss": 2.5783, "step": 111468 }, { "epoch": 21.41, "learning_rate": 0.001, "loss": 2.5803, "step": 111480 }, { "epoch": 21.41, "learning_rate": 0.001, "loss": 2.5868, "step": 111492 }, { "epoch": 21.41, "learning_rate": 0.001, "loss": 2.5826, "step": 111504 }, { "epoch": 21.41, "learning_rate": 0.001, "loss": 2.5856, "step": 111516 }, { "epoch": 21.41, "learning_rate": 0.001, "loss": 2.5765, "step": 111528 }, { "epoch": 21.42, "learning_rate": 0.001, "loss": 2.5833, "step": 111540 }, { "epoch": 21.42, "learning_rate": 0.001, "loss": 2.5737, "step": 111552 }, { "epoch": 21.42, "learning_rate": 0.001, "loss": 2.5764, "step": 111564 }, { "epoch": 21.42, "learning_rate": 0.001, "loss": 2.5821, "step": 111576 }, { "epoch": 21.43, "learning_rate": 0.001, "loss": 2.5835, "step": 111588 }, { "epoch": 21.43, "learning_rate": 0.001, "loss": 2.5808, "step": 111600 }, { "epoch": 21.43, "learning_rate": 0.001, "loss": 2.5848, "step": 111612 }, { "epoch": 21.43, "learning_rate": 0.001, "loss": 2.5826, "step": 111624 }, { "epoch": 21.44, "learning_rate": 0.001, "loss": 2.5913, "step": 111636 }, { "epoch": 21.44, "learning_rate": 0.001, "loss": 2.5878, "step": 111648 }, { "epoch": 21.44, "learning_rate": 0.001, "loss": 2.5825, "step": 111660 }, { "epoch": 21.44, "learning_rate": 0.001, "loss": 2.5819, "step": 111672 }, { "epoch": 21.44, "learning_rate": 0.001, "loss": 2.5842, "step": 111684 }, { "epoch": 21.45, "learning_rate": 0.001, "loss": 2.5928, "step": 111696 }, { "epoch": 21.45, "learning_rate": 0.001, "loss": 2.5843, "step": 111708 }, { "epoch": 21.45, "learning_rate": 0.001, "loss": 2.574, "step": 111720 }, { "epoch": 21.45, "learning_rate": 0.001, "loss": 2.5885, "step": 111732 }, { "epoch": 21.46, "learning_rate": 0.001, "loss": 2.5913, "step": 111744 }, { "epoch": 21.46, "learning_rate": 0.001, "loss": 2.5859, "step": 111756 }, { "epoch": 21.46, "learning_rate": 0.001, "loss": 2.5841, "step": 111768 }, { "epoch": 21.46, "learning_rate": 0.001, "loss": 2.582, "step": 111780 }, { "epoch": 21.47, "learning_rate": 0.001, "loss": 2.5822, "step": 111792 }, { "epoch": 21.47, "learning_rate": 0.001, "loss": 2.5875, "step": 111804 }, { "epoch": 21.47, "learning_rate": 0.001, "loss": 2.5754, "step": 111816 }, { "epoch": 21.47, "learning_rate": 0.001, "loss": 2.5811, "step": 111828 }, { "epoch": 21.47, "learning_rate": 0.001, "loss": 2.5802, "step": 111840 }, { "epoch": 21.48, "learning_rate": 0.001, "loss": 2.5843, "step": 111852 }, { "epoch": 21.48, "learning_rate": 0.001, "loss": 2.5703, "step": 111864 }, { "epoch": 21.48, "eval_ag_news_accuracy": 0.317875, "eval_ag_news_bleu_score": 4.700494953317216, "eval_ag_news_bleu_score_sem": 0.15883812852175397, "eval_ag_news_emb_cos_sim": 0.8048298954963684, "eval_ag_news_emb_cos_sim_sem": 0.0070206251201368405, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.590454339981079, "eval_ag_news_n_ngrams_match_1": 13.818, "eval_ag_news_n_ngrams_match_2": 3.026, "eval_ag_news_n_ngrams_match_3": 0.852, "eval_ag_news_num_pred_words": 46.816, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.25054225622004, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34322967174665137, "eval_ag_news_runtime": 10.4165, "eval_ag_news_samples_per_second": 48.001, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3462083173163345, "eval_ag_news_token_set_f1_sem": 0.004433211647136026, "eval_ag_news_token_set_precision": 0.33192278867157937, "eval_ag_news_token_set_recall": 0.38017136136788854, "eval_ag_news_true_num_tokens": 56.09375, "step": 111875 }, { "epoch": 21.48, "eval_anthropic_toxic_prompts_accuracy": 0.11278125, "eval_anthropic_toxic_prompts_bleu_score": 3.1011487188661198, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1175005576017215, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6694596409797668, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009040785472447741, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.287785530090332, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.074, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.854, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, "eval_anthropic_toxic_prompts_num_pred_words": 47.102, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.78348671053744, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20937319846717908, "eval_anthropic_toxic_prompts_runtime": 10.1695, "eval_anthropic_toxic_prompts_samples_per_second": 49.167, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3501391823374047, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062402111439853955, "eval_anthropic_toxic_prompts_token_set_precision": 0.4291342180395337, "eval_anthropic_toxic_prompts_token_set_recall": 0.32489331562091195, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 111875 }, { "epoch": 21.48, "eval_arxiv_accuracy": 0.3473125, "eval_arxiv_bleu_score": 4.2278988098088925, "eval_arxiv_bleu_score_sem": 0.11929453568295216, "eval_arxiv_emb_cos_sim": 0.7575228214263916, "eval_arxiv_emb_cos_sim_sem": 0.007924757338759015, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.435016632080078, "eval_arxiv_n_ngrams_match_1": 14.93, "eval_arxiv_n_ngrams_match_2": 2.83, "eval_arxiv_n_ngrams_match_3": 0.634, "eval_arxiv_num_pred_words": 41.004, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.03192868675887, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3542914729304386, "eval_arxiv_runtime": 10.2452, "eval_arxiv_samples_per_second": 48.803, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3481463723415783, "eval_arxiv_token_set_f1_sem": 0.004256780699375363, "eval_arxiv_token_set_precision": 0.3014698302217242, "eval_arxiv_token_set_recall": 0.4282530571108426, "eval_arxiv_true_num_tokens": 64.0, "step": 111875 }, { "epoch": 21.48, "eval_python_code_alpaca_accuracy": 0.1600625, "eval_python_code_alpaca_bleu_score": 4.5413277231968205, "eval_python_code_alpaca_bleu_score_sem": 0.14724763818708514, "eval_python_code_alpaca_emb_cos_sim": 0.7540905475616455, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00882398274225221, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.900806188583374, "eval_python_code_alpaca_n_ngrams_match_1": 9.906, "eval_python_code_alpaca_n_ngrams_match_2": 2.944, "eval_python_code_alpaca_n_ngrams_match_3": 0.954, "eval_python_code_alpaca_num_pred_words": 44.42, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.188803065592246, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32853446970023503, "eval_python_code_alpaca_runtime": 10.4058, "eval_python_code_alpaca_samples_per_second": 48.05, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.47538907576823586, "eval_python_code_alpaca_token_set_f1_sem": 0.005781947445105544, "eval_python_code_alpaca_token_set_precision": 0.5416553829993164, "eval_python_code_alpaca_token_set_recall": 0.4463131670101364, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 111875 }, { "epoch": 21.48, "eval_wikibio_accuracy": 0.3231875, "eval_wikibio_bleu_score": 6.025139888412912, "eval_wikibio_bleu_score_sem": 0.21193780008115048, "eval_wikibio_emb_cos_sim": 0.7234340906143188, "eval_wikibio_emb_cos_sim_sem": 0.010278749707234549, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.770280361175537, "eval_wikibio_n_ngrams_match_1": 9.996, "eval_wikibio_n_ngrams_match_2": 3.374, "eval_wikibio_n_ngrams_match_3": 1.238, "eval_wikibio_num_pred_words": 35.894, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.39222862687157, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3498012550354983, "eval_wikibio_runtime": 10.3669, "eval_wikibio_samples_per_second": 48.231, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.32076553825869053, "eval_wikibio_token_set_f1_sem": 0.005478533508575749, "eval_wikibio_token_set_precision": 0.3264525553093949, "eval_wikibio_token_set_recall": 0.3308082398231639, "eval_wikibio_true_num_tokens": 61.1328125, "step": 111875 }, { "epoch": 21.48, "eval_nq_accuracy": 0.52659375, "eval_nq_bleu_score": 11.441439523851317, "eval_nq_bleu_score_sem": 0.4981648400823459, "eval_nq_emb_cos_sim": 0.8239423036575317, "eval_nq_emb_cos_sim_sem": 0.007759101126320471, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2113282680511475, "eval_nq_n_ngrams_match_1": 22.888, "eval_nq_n_ngrams_match_2": 8.378, "eval_nq_n_ngrams_match_3": 3.826, "eval_nq_num_pred_words": 49.072, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.127832552861125, "eval_nq_pred_num_tokens": 62.9921875, "eval_nq_rouge_score": 0.44206409795837986, "eval_nq_runtime": 10.6945, "eval_nq_samples_per_second": 46.753, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.45777626861500753, "eval_nq_token_set_f1_sem": 0.005109313056666424, "eval_nq_token_set_precision": 0.4145768546797821, "eval_nq_token_set_recall": 0.5208834102521768, "eval_nq_true_num_tokens": 64.0, "step": 111875 }, { "epoch": 21.48, "learning_rate": 0.001, "loss": 2.5837, "step": 111876 }, { "epoch": 21.48, "learning_rate": 0.001, "loss": 2.576, "step": 111888 }, { "epoch": 21.49, "learning_rate": 0.001, "loss": 2.5789, "step": 111900 }, { "epoch": 21.49, "learning_rate": 0.001, "loss": 2.5881, "step": 111912 }, { "epoch": 21.49, "learning_rate": 0.001, "loss": 2.5722, "step": 111924 }, { "epoch": 21.49, "learning_rate": 0.001, "loss": 2.5845, "step": 111936 }, { "epoch": 21.5, "learning_rate": 0.001, "loss": 2.571, "step": 111948 }, { "epoch": 21.5, "learning_rate": 0.001, "loss": 2.5748, "step": 111960 }, { "epoch": 21.5, "learning_rate": 0.001, "loss": 2.5785, "step": 111972 }, { "epoch": 21.5, "learning_rate": 0.001, "loss": 2.579, "step": 111984 }, { "epoch": 21.5, "learning_rate": 0.001, "loss": 2.5712, "step": 111996 }, { "epoch": 21.51, "learning_rate": 0.001, "loss": 2.5877, "step": 112008 }, { "epoch": 21.51, "learning_rate": 0.001, "loss": 2.5856, "step": 112020 }, { "epoch": 21.51, "learning_rate": 0.001, "loss": 2.58, "step": 112032 }, { "epoch": 21.51, "learning_rate": 0.001, "loss": 2.5785, "step": 112044 }, { "epoch": 21.52, "learning_rate": 0.001, "loss": 2.5707, "step": 112056 }, { "epoch": 21.52, "learning_rate": 0.001, "loss": 2.5829, "step": 112068 }, { "epoch": 21.52, "learning_rate": 0.001, "loss": 2.5756, "step": 112080 }, { "epoch": 21.52, "learning_rate": 0.001, "loss": 2.5809, "step": 112092 }, { "epoch": 21.53, "learning_rate": 0.001, "loss": 2.573, "step": 112104 }, { "epoch": 21.53, "learning_rate": 0.001, "loss": 2.5768, "step": 112116 }, { "epoch": 21.53, "learning_rate": 0.001, "loss": 2.577, "step": 112128 }, { "epoch": 21.53, "learning_rate": 0.001, "loss": 2.5834, "step": 112140 }, { "epoch": 21.53, "learning_rate": 0.001, "loss": 2.5806, "step": 112152 }, { "epoch": 21.54, "learning_rate": 0.001, "loss": 2.5782, "step": 112164 }, { "epoch": 21.54, "learning_rate": 0.001, "loss": 2.5751, "step": 112176 }, { "epoch": 21.54, "learning_rate": 0.001, "loss": 2.5813, "step": 112188 }, { "epoch": 21.54, "learning_rate": 0.001, "loss": 2.5899, "step": 112200 }, { "epoch": 21.55, "learning_rate": 0.001, "loss": 2.5828, "step": 112212 }, { "epoch": 21.55, "learning_rate": 0.001, "loss": 2.5876, "step": 112224 }, { "epoch": 21.55, "learning_rate": 0.001, "loss": 2.5753, "step": 112236 }, { "epoch": 21.55, "learning_rate": 0.001, "loss": 2.5745, "step": 112248 }, { "epoch": 21.56, "learning_rate": 0.001, "loss": 2.5688, "step": 112260 }, { "epoch": 21.56, "learning_rate": 0.001, "loss": 2.5768, "step": 112272 }, { "epoch": 21.56, "learning_rate": 0.001, "loss": 2.5815, "step": 112284 }, { "epoch": 21.56, "learning_rate": 0.001, "loss": 2.5783, "step": 112296 }, { "epoch": 21.56, "learning_rate": 0.001, "loss": 2.5795, "step": 112308 }, { "epoch": 21.57, "learning_rate": 0.001, "loss": 2.5839, "step": 112320 }, { "epoch": 21.57, "learning_rate": 0.001, "loss": 2.5815, "step": 112332 }, { "epoch": 21.57, "learning_rate": 0.001, "loss": 2.5808, "step": 112344 }, { "epoch": 21.57, "learning_rate": 0.001, "loss": 2.5842, "step": 112356 }, { "epoch": 21.58, "learning_rate": 0.001, "loss": 2.5867, "step": 112368 }, { "epoch": 21.58, "learning_rate": 0.001, "loss": 2.5754, "step": 112380 }, { "epoch": 21.58, "learning_rate": 0.001, "loss": 2.582, "step": 112392 }, { "epoch": 21.58, "learning_rate": 0.001, "loss": 2.5732, "step": 112404 }, { "epoch": 21.59, "learning_rate": 0.001, "loss": 2.5794, "step": 112416 }, { "epoch": 21.59, "learning_rate": 0.001, "loss": 2.575, "step": 112428 }, { "epoch": 21.59, "learning_rate": 0.001, "loss": 2.5865, "step": 112440 }, { "epoch": 21.59, "learning_rate": 0.001, "loss": 2.5934, "step": 112452 }, { "epoch": 21.59, "learning_rate": 0.001, "loss": 2.5797, "step": 112464 }, { "epoch": 21.6, "learning_rate": 0.001, "loss": 2.5865, "step": 112476 }, { "epoch": 21.6, "learning_rate": 0.001, "loss": 2.5858, "step": 112488 }, { "epoch": 21.6, "learning_rate": 0.001, "loss": 2.5847, "step": 112500 }, { "epoch": 21.6, "eval_ag_news_accuracy": 0.31659375, "eval_ag_news_bleu_score": 4.542593252956022, "eval_ag_news_bleu_score_sem": 0.15056853216180663, "eval_ag_news_emb_cos_sim": 0.8039220571517944, "eval_ag_news_emb_cos_sim_sem": 0.007440447902520332, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5941953659057617, "eval_ag_news_n_ngrams_match_1": 13.644, "eval_ag_news_n_ngrams_match_2": 2.894, "eval_ag_news_n_ngrams_match_3": 0.762, "eval_ag_news_num_pred_words": 46.446, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.386410459360135, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34007294930622134, "eval_ag_news_runtime": 10.5067, "eval_ag_news_samples_per_second": 47.589, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.34305723136402444, "eval_ag_news_token_set_f1_sem": 0.004487632643678576, "eval_ag_news_token_set_precision": 0.32657992058290375, "eval_ag_news_token_set_recall": 0.37782337585284304, "eval_ag_news_true_num_tokens": 56.09375, "step": 112500 }, { "epoch": 21.6, "eval_anthropic_toxic_prompts_accuracy": 0.113375, "eval_anthropic_toxic_prompts_bleu_score": 2.8859277667501018, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1045962308953717, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6659275889396667, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00853308519383002, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2842369079589844, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 5.964, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.71, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.61, "eval_anthropic_toxic_prompts_num_pred_words": 46.702, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.688610676001964, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20716561635871236, "eval_anthropic_toxic_prompts_runtime": 13.8982, "eval_anthropic_toxic_prompts_samples_per_second": 35.976, "eval_anthropic_toxic_prompts_steps_per_second": 0.072, "eval_anthropic_toxic_prompts_token_set_f1": 0.34519600134113004, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063488696199608145, "eval_anthropic_toxic_prompts_token_set_precision": 0.4228826040609721, "eval_anthropic_toxic_prompts_token_set_recall": 0.31747264310184414, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 112500 }, { "epoch": 21.6, "eval_arxiv_accuracy": 0.3473125, "eval_arxiv_bleu_score": 4.129073917253408, "eval_arxiv_bleu_score_sem": 0.11209592716180401, "eval_arxiv_emb_cos_sim": 0.7514575123786926, "eval_arxiv_emb_cos_sim_sem": 0.0076645861599902824, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.429063081741333, "eval_arxiv_n_ngrams_match_1": 14.658, "eval_arxiv_n_ngrams_match_2": 2.774, "eval_arxiv_n_ngrams_match_3": 0.6, "eval_arxiv_num_pred_words": 40.286, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.84772740708994, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3507662362923506, "eval_arxiv_runtime": 10.5315, "eval_arxiv_samples_per_second": 47.477, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.34238324366268114, "eval_arxiv_token_set_f1_sem": 0.004075691673402702, "eval_arxiv_token_set_precision": 0.2943535155839137, "eval_arxiv_token_set_recall": 0.4264493475804481, "eval_arxiv_true_num_tokens": 64.0, "step": 112500 }, { "epoch": 21.6, "eval_python_code_alpaca_accuracy": 0.15915625, "eval_python_code_alpaca_bleu_score": 4.598021978450534, "eval_python_code_alpaca_bleu_score_sem": 0.14576880316475482, "eval_python_code_alpaca_emb_cos_sim": 0.7366642951965332, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010637163872793159, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.916404962539673, "eval_python_code_alpaca_n_ngrams_match_1": 9.61, "eval_python_code_alpaca_n_ngrams_match_2": 2.83, "eval_python_code_alpaca_n_ngrams_match_3": 0.954, "eval_python_code_alpaca_num_pred_words": 42.024, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.474750509842, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3280987002626782, "eval_python_code_alpaca_runtime": 9.9969, "eval_python_code_alpaca_samples_per_second": 50.016, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4624031093236953, "eval_python_code_alpaca_token_set_f1_sem": 0.0060482280178062115, "eval_python_code_alpaca_token_set_precision": 0.5211010530049532, "eval_python_code_alpaca_token_set_recall": 0.43874641900649025, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 112500 }, { "epoch": 21.6, "eval_wikibio_accuracy": 0.3219375, "eval_wikibio_bleu_score": 5.9554492938360895, "eval_wikibio_bleu_score_sem": 0.2327490139601978, "eval_wikibio_emb_cos_sim": 0.7273236513137817, "eval_wikibio_emb_cos_sim_sem": 0.01112296952215398, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7361032962799072, "eval_wikibio_n_ngrams_match_1": 9.732, "eval_wikibio_n_ngrams_match_2": 3.274, "eval_wikibio_n_ngrams_match_3": 1.2, "eval_wikibio_num_pred_words": 35.486, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.93426596962284, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34392227813541165, "eval_wikibio_runtime": 10.0523, "eval_wikibio_samples_per_second": 49.74, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.31246454278590186, "eval_wikibio_token_set_f1_sem": 0.005588013104639214, "eval_wikibio_token_set_precision": 0.3165813151304886, "eval_wikibio_token_set_recall": 0.3276856118047234, "eval_wikibio_true_num_tokens": 61.1328125, "step": 112500 }, { "epoch": 21.6, "eval_nq_accuracy": 0.52353125, "eval_nq_bleu_score": 11.57150796177806, "eval_nq_bleu_score_sem": 0.4883282667698482, "eval_nq_emb_cos_sim": 0.8306972980499268, "eval_nq_emb_cos_sim_sem": 0.007129528625421943, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.215700149536133, "eval_nq_n_ngrams_match_1": 22.702, "eval_nq_n_ngrams_match_2": 8.34, "eval_nq_n_ngrams_match_3": 3.852, "eval_nq_num_pred_words": 48.866, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.167825713977082, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4385532255405237, "eval_nq_runtime": 11.5807, "eval_nq_samples_per_second": 43.175, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4535784028235237, "eval_nq_token_set_f1_sem": 0.005143847754891399, "eval_nq_token_set_precision": 0.41012864452621106, "eval_nq_token_set_recall": 0.5164164402228356, "eval_nq_true_num_tokens": 64.0, "step": 112500 }, { "epoch": 21.6, "learning_rate": 0.001, "loss": 2.5782, "step": 112512 }, { "epoch": 21.61, "learning_rate": 0.001, "loss": 2.5839, "step": 112524 }, { "epoch": 21.61, "learning_rate": 0.001, "loss": 2.5852, "step": 112536 }, { "epoch": 21.61, "learning_rate": 0.001, "loss": 2.5787, "step": 112548 }, { "epoch": 21.61, "learning_rate": 0.001, "loss": 2.5757, "step": 112560 }, { "epoch": 21.62, "learning_rate": 0.001, "loss": 2.5795, "step": 112572 }, { "epoch": 21.62, "learning_rate": 0.001, "loss": 2.5719, "step": 112584 }, { "epoch": 21.62, "learning_rate": 0.001, "loss": 2.5807, "step": 112596 }, { "epoch": 21.62, "learning_rate": 0.001, "loss": 2.5901, "step": 112608 }, { "epoch": 21.62, "learning_rate": 0.001, "loss": 2.5776, "step": 112620 }, { "epoch": 21.63, "learning_rate": 0.001, "loss": 2.5867, "step": 112632 }, { "epoch": 21.63, "learning_rate": 0.001, "loss": 2.5821, "step": 112644 }, { "epoch": 21.63, "learning_rate": 0.001, "loss": 2.572, "step": 112656 }, { "epoch": 21.63, "learning_rate": 0.001, "loss": 2.5736, "step": 112668 }, { "epoch": 21.64, "learning_rate": 0.001, "loss": 2.58, "step": 112680 }, { "epoch": 21.64, "learning_rate": 0.001, "loss": 2.5768, "step": 112692 }, { "epoch": 21.64, "learning_rate": 0.001, "loss": 2.5928, "step": 112704 }, { "epoch": 21.64, "learning_rate": 0.001, "loss": 2.5799, "step": 112716 }, { "epoch": 21.65, "learning_rate": 0.001, "loss": 2.5816, "step": 112728 }, { "epoch": 21.65, "learning_rate": 0.001, "loss": 2.5803, "step": 112740 }, { "epoch": 21.65, "learning_rate": 0.001, "loss": 2.5876, "step": 112752 }, { "epoch": 21.65, "learning_rate": 0.001, "loss": 2.5861, "step": 112764 }, { "epoch": 21.65, "learning_rate": 0.001, "loss": 2.5914, "step": 112776 }, { "epoch": 21.66, "learning_rate": 0.001, "loss": 2.59, "step": 112788 }, { "epoch": 21.66, "learning_rate": 0.001, "loss": 2.5785, "step": 112800 }, { "epoch": 21.66, "learning_rate": 0.001, "loss": 2.5744, "step": 112812 }, { "epoch": 21.66, "learning_rate": 0.001, "loss": 2.5801, "step": 112824 }, { "epoch": 21.67, "learning_rate": 0.001, "loss": 2.5859, "step": 112836 }, { "epoch": 21.67, "learning_rate": 0.001, "loss": 2.5836, "step": 112848 }, { "epoch": 21.67, "learning_rate": 0.001, "loss": 2.5768, "step": 112860 }, { "epoch": 21.67, "learning_rate": 0.001, "loss": 2.5874, "step": 112872 }, { "epoch": 21.68, "learning_rate": 0.001, "loss": 2.5806, "step": 112884 }, { "epoch": 21.68, "learning_rate": 0.001, "loss": 2.5837, "step": 112896 }, { "epoch": 21.68, "learning_rate": 0.001, "loss": 2.591, "step": 112908 }, { "epoch": 21.68, "learning_rate": 0.001, "loss": 2.5713, "step": 112920 }, { "epoch": 21.68, "learning_rate": 0.001, "loss": 2.5813, "step": 112932 }, { "epoch": 21.69, "learning_rate": 0.001, "loss": 2.5774, "step": 112944 }, { "epoch": 21.69, "learning_rate": 0.001, "loss": 2.5837, "step": 112956 }, { "epoch": 21.69, "learning_rate": 0.001, "loss": 2.5795, "step": 112968 }, { "epoch": 21.69, "learning_rate": 0.001, "loss": 2.5656, "step": 112980 }, { "epoch": 21.7, "learning_rate": 0.001, "loss": 2.5881, "step": 112992 }, { "epoch": 21.7, "learning_rate": 0.001, "loss": 2.5787, "step": 113004 }, { "epoch": 21.7, "learning_rate": 0.001, "loss": 2.5916, "step": 113016 }, { "epoch": 21.7, "learning_rate": 0.001, "loss": 2.5933, "step": 113028 }, { "epoch": 21.71, "learning_rate": 0.001, "loss": 2.5796, "step": 113040 }, { "epoch": 21.71, "learning_rate": 0.001, "loss": 2.5783, "step": 113052 }, { "epoch": 21.71, "learning_rate": 0.001, "loss": 2.5743, "step": 113064 }, { "epoch": 21.71, "learning_rate": 0.001, "loss": 2.576, "step": 113076 }, { "epoch": 21.71, "learning_rate": 0.001, "loss": 2.5817, "step": 113088 }, { "epoch": 21.72, "learning_rate": 0.001, "loss": 2.588, "step": 113100 }, { "epoch": 21.72, "learning_rate": 0.001, "loss": 2.5824, "step": 113112 }, { "epoch": 21.72, "learning_rate": 0.001, "loss": 2.5866, "step": 113124 }, { "epoch": 21.72, "eval_ag_news_accuracy": 0.3189375, "eval_ag_news_bleu_score": 4.6172591221544845, "eval_ag_news_bleu_score_sem": 0.15087499742524083, "eval_ag_news_emb_cos_sim": 0.81067955493927, "eval_ag_news_emb_cos_sim_sem": 0.007655286852993058, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.589211940765381, "eval_ag_news_n_ngrams_match_1": 14.002, "eval_ag_news_n_ngrams_match_2": 2.968, "eval_ag_news_n_ngrams_match_3": 0.798, "eval_ag_news_num_pred_words": 46.7, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.2055325767371, "eval_ag_news_pred_num_tokens": 62.8671875, "eval_ag_news_rouge_score": 0.3444437879338732, "eval_ag_news_runtime": 10.721, "eval_ag_news_samples_per_second": 46.637, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3472914339932452, "eval_ag_news_token_set_f1_sem": 0.004401285825883925, "eval_ag_news_token_set_precision": 0.3326362956793719, "eval_ag_news_token_set_recall": 0.3793297658040768, "eval_ag_news_true_num_tokens": 56.09375, "step": 113125 }, { "epoch": 21.72, "eval_anthropic_toxic_prompts_accuracy": 0.1140625, "eval_anthropic_toxic_prompts_bleu_score": 3.0357400856539316, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11563018913891956, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.675912618637085, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008846460841943186, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.266468048095703, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.078, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.856, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.696, "eval_anthropic_toxic_prompts_num_pred_words": 47.242, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.218572877950766, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2089497499670066, "eval_anthropic_toxic_prompts_runtime": 9.9702, "eval_anthropic_toxic_prompts_samples_per_second": 50.15, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.350794366431882, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006364557450579552, "eval_anthropic_toxic_prompts_token_set_precision": 0.4298825004927714, "eval_anthropic_toxic_prompts_token_set_recall": 0.3202640670747388, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 113125 }, { "epoch": 21.72, "eval_arxiv_accuracy": 0.3450625, "eval_arxiv_bleu_score": 4.427283609380793, "eval_arxiv_bleu_score_sem": 0.1201839663356153, "eval_arxiv_emb_cos_sim": 0.7643361687660217, "eval_arxiv_emb_cos_sim_sem": 0.007089860538439095, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4291634559631348, "eval_arxiv_n_ngrams_match_1": 15.162, "eval_arxiv_n_ngrams_match_2": 3.006, "eval_arxiv_n_ngrams_match_3": 0.678, "eval_arxiv_num_pred_words": 40.802, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.850823879123165, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36038550233441047, "eval_arxiv_runtime": 10.2204, "eval_arxiv_samples_per_second": 48.922, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3540935244536806, "eval_arxiv_token_set_f1_sem": 0.004124901185254456, "eval_arxiv_token_set_precision": 0.30606504947354496, "eval_arxiv_token_set_recall": 0.43679745600451936, "eval_arxiv_true_num_tokens": 64.0, "step": 113125 }, { "epoch": 21.72, "eval_python_code_alpaca_accuracy": 0.15503125, "eval_python_code_alpaca_bleu_score": 4.456264498652783, "eval_python_code_alpaca_bleu_score_sem": 0.14173129545961294, "eval_python_code_alpaca_emb_cos_sim": 0.7401628494262695, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00912275121463381, "eval_python_code_alpaca_emb_top1_equal": 0.1875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9323348999023438, "eval_python_code_alpaca_n_ngrams_match_1": 9.608, "eval_python_code_alpaca_n_ngrams_match_2": 2.708, "eval_python_code_alpaca_n_ngrams_match_3": 0.868, "eval_python_code_alpaca_num_pred_words": 42.276, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.771408728296073, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33434563887115953, "eval_python_code_alpaca_runtime": 9.9309, "eval_python_code_alpaca_samples_per_second": 50.348, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.46630211155272705, "eval_python_code_alpaca_token_set_f1_sem": 0.005577378640235889, "eval_python_code_alpaca_token_set_precision": 0.524713249797721, "eval_python_code_alpaca_token_set_recall": 0.4413909157763783, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 113125 }, { "epoch": 21.72, "eval_wikibio_accuracy": 0.32284375, "eval_wikibio_bleu_score": 6.075281154920749, "eval_wikibio_bleu_score_sem": 0.20602332021792208, "eval_wikibio_emb_cos_sim": 0.7318436503410339, "eval_wikibio_emb_cos_sim_sem": 0.009524317167622497, "eval_wikibio_emb_top1_equal": 0.1328125, "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7428438663482666, "eval_wikibio_n_ngrams_match_1": 10.158, "eval_wikibio_n_ngrams_match_2": 3.434, "eval_wikibio_n_ngrams_match_3": 1.204, "eval_wikibio_num_pred_words": 35.708, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.217881619390496, "eval_wikibio_pred_num_tokens": 62.9609375, "eval_wikibio_rouge_score": 0.3573053501457158, "eval_wikibio_runtime": 10.1174, "eval_wikibio_samples_per_second": 49.42, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3272909505723658, "eval_wikibio_token_set_f1_sem": 0.0051825386297305685, "eval_wikibio_token_set_precision": 0.3313673683276995, "eval_wikibio_token_set_recall": 0.3390362766352453, "eval_wikibio_true_num_tokens": 61.1328125, "step": 113125 }, { "epoch": 21.72, "eval_nq_accuracy": 0.5245625, "eval_nq_bleu_score": 11.483588269998057, "eval_nq_bleu_score_sem": 0.4788229567031315, "eval_nq_emb_cos_sim": 0.828566312789917, "eval_nq_emb_cos_sim_sem": 0.007296827247863397, "eval_nq_emb_top1_equal": 0.3671875, "eval_nq_emb_top1_equal_sem": 0.04277397517748991, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2093870639801025, "eval_nq_n_ngrams_match_1": 22.964, "eval_nq_n_ngrams_match_2": 8.378, "eval_nq_n_ngrams_match_3": 3.866, "eval_nq_num_pred_words": 49.022, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.110130754110362, "eval_nq_pred_num_tokens": 62.9609375, "eval_nq_rouge_score": 0.4429517913784128, "eval_nq_runtime": 10.5228, "eval_nq_samples_per_second": 47.516, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4580778356921578, "eval_nq_token_set_f1_sem": 0.005034516547220299, "eval_nq_token_set_precision": 0.4159114277974363, "eval_nq_token_set_recall": 0.5185378573581648, "eval_nq_true_num_tokens": 64.0, "step": 113125 }, { "epoch": 21.72, "learning_rate": 0.001, "loss": 2.5793, "step": 113136 }, { "epoch": 21.73, "learning_rate": 0.001, "loss": 2.5851, "step": 113148 }, { "epoch": 21.73, "learning_rate": 0.001, "loss": 2.583, "step": 113160 }, { "epoch": 21.73, "learning_rate": 0.001, "loss": 2.5801, "step": 113172 }, { "epoch": 21.73, "learning_rate": 0.001, "loss": 2.5904, "step": 113184 }, { "epoch": 21.74, "learning_rate": 0.001, "loss": 2.5819, "step": 113196 }, { "epoch": 21.74, "learning_rate": 0.001, "loss": 2.5787, "step": 113208 }, { "epoch": 21.74, "learning_rate": 0.001, "loss": 2.5799, "step": 113220 }, { "epoch": 21.74, "learning_rate": 0.001, "loss": 2.5871, "step": 113232 }, { "epoch": 21.74, "learning_rate": 0.001, "loss": 2.5889, "step": 113244 }, { "epoch": 21.75, "learning_rate": 0.001, "loss": 2.578, "step": 113256 }, { "epoch": 21.75, "learning_rate": 0.001, "loss": 2.5901, "step": 113268 }, { "epoch": 21.75, "learning_rate": 0.001, "loss": 2.5802, "step": 113280 }, { "epoch": 21.75, "learning_rate": 0.001, "loss": 2.5794, "step": 113292 }, { "epoch": 21.76, "learning_rate": 0.001, "loss": 2.5797, "step": 113304 }, { "epoch": 21.76, "learning_rate": 0.001, "loss": 2.5762, "step": 113316 }, { "epoch": 21.76, "learning_rate": 0.001, "loss": 2.5802, "step": 113328 }, { "epoch": 21.76, "learning_rate": 0.001, "loss": 2.5804, "step": 113340 }, { "epoch": 21.76, "learning_rate": 0.001, "loss": 2.5808, "step": 113352 }, { "epoch": 21.77, "learning_rate": 0.001, "loss": 2.5651, "step": 113364 }, { "epoch": 21.77, "learning_rate": 0.001, "loss": 2.5766, "step": 113376 }, { "epoch": 21.77, "learning_rate": 0.001, "loss": 2.5875, "step": 113388 }, { "epoch": 21.77, "learning_rate": 0.001, "loss": 2.5784, "step": 113400 }, { "epoch": 21.78, "learning_rate": 0.001, "loss": 2.5804, "step": 113412 }, { "epoch": 21.78, "learning_rate": 0.001, "loss": 2.5923, "step": 113424 }, { "epoch": 21.78, "learning_rate": 0.001, "loss": 2.5892, "step": 113436 }, { "epoch": 21.78, "learning_rate": 0.001, "loss": 2.5819, "step": 113448 }, { "epoch": 21.79, "learning_rate": 0.001, "loss": 2.575, "step": 113460 }, { "epoch": 21.79, "learning_rate": 0.001, "loss": 2.5757, "step": 113472 }, { "epoch": 21.79, "learning_rate": 0.001, "loss": 2.5769, "step": 113484 }, { "epoch": 21.79, "learning_rate": 0.001, "loss": 2.5841, "step": 113496 }, { "epoch": 21.79, "learning_rate": 0.001, "loss": 2.5903, "step": 113508 }, { "epoch": 21.8, "learning_rate": 0.001, "loss": 2.5828, "step": 113520 }, { "epoch": 21.8, "learning_rate": 0.001, "loss": 2.5777, "step": 113532 }, { "epoch": 21.8, "learning_rate": 0.001, "loss": 2.5859, "step": 113544 }, { "epoch": 21.8, "learning_rate": 0.001, "loss": 2.5857, "step": 113556 }, { "epoch": 21.81, "learning_rate": 0.001, "loss": 2.579, "step": 113568 }, { "epoch": 21.81, "learning_rate": 0.001, "loss": 2.5841, "step": 113580 }, { "epoch": 21.81, "learning_rate": 0.001, "loss": 2.5785, "step": 113592 }, { "epoch": 21.81, "learning_rate": 0.001, "loss": 2.5804, "step": 113604 }, { "epoch": 21.82, "learning_rate": 0.001, "loss": 2.5844, "step": 113616 }, { "epoch": 21.82, "learning_rate": 0.001, "loss": 2.5825, "step": 113628 }, { "epoch": 21.82, "learning_rate": 0.001, "loss": 2.5868, "step": 113640 }, { "epoch": 21.82, "learning_rate": 0.001, "loss": 2.5841, "step": 113652 }, { "epoch": 21.82, "learning_rate": 0.001, "loss": 2.5928, "step": 113664 }, { "epoch": 21.83, "learning_rate": 0.001, "loss": 2.5829, "step": 113676 }, { "epoch": 21.83, "learning_rate": 0.001, "loss": 2.5827, "step": 113688 }, { "epoch": 21.83, "learning_rate": 0.001, "loss": 2.5828, "step": 113700 }, { "epoch": 21.83, "learning_rate": 0.001, "loss": 2.5889, "step": 113712 }, { "epoch": 21.84, "learning_rate": 0.001, "loss": 2.5815, "step": 113724 }, { "epoch": 21.84, "learning_rate": 0.001, "loss": 2.5881, "step": 113736 }, { "epoch": 21.84, "learning_rate": 0.001, "loss": 2.5848, "step": 113748 }, { "epoch": 21.84, "eval_ag_news_accuracy": 0.31846875, "eval_ag_news_bleu_score": 4.679012024183408, "eval_ag_news_bleu_score_sem": 0.15355513120994704, "eval_ag_news_emb_cos_sim": 0.8066794276237488, "eval_ag_news_emb_cos_sim_sem": 0.00664133114240813, "eval_ag_news_emb_top1_equal": 0.3046875, "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5819995403289795, "eval_ag_news_n_ngrams_match_1": 13.88, "eval_ag_news_n_ngrams_match_2": 3.024, "eval_ag_news_n_ngrams_match_3": 0.83, "eval_ag_news_num_pred_words": 46.786, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.945343200657504, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3421566200228286, "eval_ag_news_runtime": 10.5682, "eval_ag_news_samples_per_second": 47.312, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.34573068272148605, "eval_ag_news_token_set_f1_sem": 0.004417048525383739, "eval_ag_news_token_set_precision": 0.33024695872557813, "eval_ag_news_token_set_recall": 0.37763815965764136, "eval_ag_news_true_num_tokens": 56.09375, "step": 113750 }, { "epoch": 21.84, "eval_anthropic_toxic_prompts_accuracy": 0.11309375, "eval_anthropic_toxic_prompts_bleu_score": 2.9948367131837115, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1284381535585758, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6698676943778992, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00935611897338314, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.3021464347839355, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.106, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.848, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67, "eval_anthropic_toxic_prompts_num_pred_words": 48.15, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 27.170896933066505, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2081280578046948, "eval_anthropic_toxic_prompts_runtime": 10.0333, "eval_anthropic_toxic_prompts_samples_per_second": 49.834, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3511642363875221, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006447759182461809, "eval_anthropic_toxic_prompts_token_set_precision": 0.4289451355331152, "eval_anthropic_toxic_prompts_token_set_recall": 0.32736353587670647, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 113750 }, { "epoch": 21.84, "eval_arxiv_accuracy": 0.3448125, "eval_arxiv_bleu_score": 4.083958030466865, "eval_arxiv_bleu_score_sem": 0.11825863123114955, "eval_arxiv_emb_cos_sim": 0.7606527209281921, "eval_arxiv_emb_cos_sim_sem": 0.006978115413602048, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4391751289367676, "eval_arxiv_n_ngrams_match_1": 14.782, "eval_arxiv_n_ngrams_match_2": 2.802, "eval_arxiv_n_ngrams_match_3": 0.562, "eval_arxiv_num_pred_words": 40.082, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.16124355604401, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35442063763571935, "eval_arxiv_runtime": 10.36, "eval_arxiv_samples_per_second": 48.262, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.34785914848926547, "eval_arxiv_token_set_f1_sem": 0.004248403809319432, "eval_arxiv_token_set_precision": 0.29804952986992483, "eval_arxiv_token_set_recall": 0.43695701385419244, "eval_arxiv_true_num_tokens": 64.0, "step": 113750 }, { "epoch": 21.84, "eval_python_code_alpaca_accuracy": 0.1583125, "eval_python_code_alpaca_bleu_score": 4.42816420078413, "eval_python_code_alpaca_bleu_score_sem": 0.14064540216470575, "eval_python_code_alpaca_emb_cos_sim": 0.7494914531707764, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008940605706259886, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9011332988739014, "eval_python_code_alpaca_n_ngrams_match_1": 9.548, "eval_python_code_alpaca_n_ngrams_match_2": 2.82, "eval_python_code_alpaca_n_ngrams_match_3": 0.916, "eval_python_code_alpaca_num_pred_words": 43.736, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.194753783464844, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.324992975679671, "eval_python_code_alpaca_runtime": 10.4937, "eval_python_code_alpaca_samples_per_second": 47.648, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.46851271096077945, "eval_python_code_alpaca_token_set_f1_sem": 0.005361809575827445, "eval_python_code_alpaca_token_set_precision": 0.5237733636941833, "eval_python_code_alpaca_token_set_recall": 0.4470571152903056, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 113750 }, { "epoch": 21.84, "eval_wikibio_accuracy": 0.3195625, "eval_wikibio_bleu_score": 5.633856736686618, "eval_wikibio_bleu_score_sem": 0.1939018278312027, "eval_wikibio_emb_cos_sim": 0.7343205809593201, "eval_wikibio_emb_cos_sim_sem": 0.009796639809652527, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.778465509414673, "eval_wikibio_n_ngrams_match_1": 10.0, "eval_wikibio_n_ngrams_match_2": 3.276, "eval_wikibio_n_ngrams_match_3": 1.128, "eval_wikibio_num_pred_words": 36.438, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.74885799163862, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34729071016340013, "eval_wikibio_runtime": 10.2246, "eval_wikibio_samples_per_second": 48.902, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.31660626453372026, "eval_wikibio_token_set_f1_sem": 0.005326242510062092, "eval_wikibio_token_set_precision": 0.3257330726863965, "eval_wikibio_token_set_recall": 0.3250680312480864, "eval_wikibio_true_num_tokens": 61.1328125, "step": 113750 }, { "epoch": 21.84, "eval_nq_accuracy": 0.5249375, "eval_nq_bleu_score": 11.618509505421393, "eval_nq_bleu_score_sem": 0.47727477421093933, "eval_nq_emb_cos_sim": 0.8236499428749084, "eval_nq_emb_cos_sim_sem": 0.008470734858358725, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.208742141723633, "eval_nq_n_ngrams_match_1": 22.912, "eval_nq_n_ngrams_match_2": 8.388, "eval_nq_n_ngrams_match_3": 3.882, "eval_nq_num_pred_words": 49.054, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.104257322184747, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4416679699544236, "eval_nq_runtime": 10.8646, "eval_nq_samples_per_second": 46.021, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.4579354744572694, "eval_nq_token_set_f1_sem": 0.005138784443154923, "eval_nq_token_set_precision": 0.41480579463748113, "eval_nq_token_set_recall": 0.5218710305350167, "eval_nq_true_num_tokens": 64.0, "step": 113750 }, { "epoch": 21.84, "learning_rate": 0.001, "loss": 2.5823, "step": 113760 }, { "epoch": 21.85, "learning_rate": 0.001, "loss": 2.5877, "step": 113772 }, { "epoch": 21.85, "learning_rate": 0.001, "loss": 2.5873, "step": 113784 }, { "epoch": 21.85, "learning_rate": 0.001, "loss": 2.5923, "step": 113796 }, { "epoch": 21.85, "learning_rate": 0.001, "loss": 2.5882, "step": 113808 }, { "epoch": 21.85, "learning_rate": 0.001, "loss": 2.5896, "step": 113820 }, { "epoch": 21.86, "learning_rate": 0.001, "loss": 2.5859, "step": 113832 }, { "epoch": 21.86, "learning_rate": 0.001, "loss": 2.5798, "step": 113844 }, { "epoch": 21.86, "learning_rate": 0.001, "loss": 2.5787, "step": 113856 }, { "epoch": 21.86, "learning_rate": 0.001, "loss": 2.5871, "step": 113868 }, { "epoch": 21.87, "learning_rate": 0.001, "loss": 2.5927, "step": 113880 }, { "epoch": 21.87, "learning_rate": 0.001, "loss": 2.58, "step": 113892 }, { "epoch": 21.87, "learning_rate": 0.001, "loss": 2.5899, "step": 113904 }, { "epoch": 21.87, "learning_rate": 0.001, "loss": 2.5821, "step": 113916 }, { "epoch": 21.88, "learning_rate": 0.001, "loss": 2.587, "step": 113928 }, { "epoch": 21.88, "learning_rate": 0.001, "loss": 2.5925, "step": 113940 }, { "epoch": 21.88, "learning_rate": 0.001, "loss": 2.5779, "step": 113952 }, { "epoch": 21.88, "learning_rate": 0.001, "loss": 2.5882, "step": 113964 }, { "epoch": 21.88, "learning_rate": 0.001, "loss": 2.5791, "step": 113976 }, { "epoch": 21.89, "learning_rate": 0.001, "loss": 2.5859, "step": 113988 }, { "epoch": 21.89, "learning_rate": 0.001, "loss": 2.5756, "step": 114000 }, { "epoch": 21.89, "learning_rate": 0.001, "loss": 2.5958, "step": 114012 }, { "epoch": 21.89, "learning_rate": 0.001, "loss": 2.5881, "step": 114024 }, { "epoch": 21.9, "learning_rate": 0.001, "loss": 2.5822, "step": 114036 }, { "epoch": 21.9, "learning_rate": 0.001, "loss": 2.5813, "step": 114048 }, { "epoch": 21.9, "learning_rate": 0.001, "loss": 2.5839, "step": 114060 }, { "epoch": 21.9, "learning_rate": 0.001, "loss": 2.5888, "step": 114072 }, { "epoch": 21.91, "learning_rate": 0.001, "loss": 2.5752, "step": 114084 }, { "epoch": 21.91, "learning_rate": 0.001, "loss": 2.584, "step": 114096 }, { "epoch": 21.91, "learning_rate": 0.001, "loss": 2.5825, "step": 114108 }, { "epoch": 21.91, "learning_rate": 0.001, "loss": 2.5895, "step": 114120 }, { "epoch": 21.91, "learning_rate": 0.001, "loss": 2.5892, "step": 114132 }, { "epoch": 21.92, "learning_rate": 0.001, "loss": 2.5817, "step": 114144 }, { "epoch": 21.92, "learning_rate": 0.001, "loss": 2.5888, "step": 114156 }, { "epoch": 21.92, "learning_rate": 0.001, "loss": 2.5812, "step": 114168 }, { "epoch": 21.92, "learning_rate": 0.001, "loss": 2.5858, "step": 114180 }, { "epoch": 21.93, "learning_rate": 0.001, "loss": 2.5889, "step": 114192 }, { "epoch": 21.93, "learning_rate": 0.001, "loss": 2.5748, "step": 114204 }, { "epoch": 21.93, "learning_rate": 0.001, "loss": 2.5693, "step": 114216 }, { "epoch": 21.93, "learning_rate": 0.001, "loss": 2.577, "step": 114228 }, { "epoch": 21.94, "learning_rate": 0.001, "loss": 2.5798, "step": 114240 }, { "epoch": 21.94, "learning_rate": 0.001, "loss": 2.5747, "step": 114252 }, { "epoch": 21.94, "learning_rate": 0.001, "loss": 2.5838, "step": 114264 }, { "epoch": 21.94, "learning_rate": 0.001, "loss": 2.5734, "step": 114276 }, { "epoch": 21.94, "learning_rate": 0.001, "loss": 2.5847, "step": 114288 }, { "epoch": 21.95, "learning_rate": 0.001, "loss": 2.5845, "step": 114300 }, { "epoch": 21.95, "learning_rate": 0.001, "loss": 2.5797, "step": 114312 }, { "epoch": 21.95, "learning_rate": 0.001, "loss": 2.5792, "step": 114324 }, { "epoch": 21.95, "learning_rate": 0.001, "loss": 2.5834, "step": 114336 }, { "epoch": 21.96, "learning_rate": 0.001, "loss": 2.5787, "step": 114348 }, { "epoch": 21.96, "learning_rate": 0.001, "loss": 2.5846, "step": 114360 }, { "epoch": 21.96, "learning_rate": 0.001, "loss": 2.5857, "step": 114372 }, { "epoch": 21.96, "eval_ag_news_accuracy": 0.32103125, "eval_ag_news_bleu_score": 4.784761900490624, "eval_ag_news_bleu_score_sem": 0.15790721125078844, "eval_ag_news_emb_cos_sim": 0.8021398782730103, "eval_ag_news_emb_cos_sim_sem": 0.007815886760699312, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5692100524902344, "eval_ag_news_n_ngrams_match_1": 13.88, "eval_ag_news_n_ngrams_match_2": 3.058, "eval_ag_news_n_ngrams_match_3": 0.904, "eval_ag_news_num_pred_words": 46.168, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.4885479858758, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34493416482914646, "eval_ag_news_runtime": 10.9115, "eval_ag_news_samples_per_second": 45.823, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.34895214995523965, "eval_ag_news_token_set_f1_sem": 0.004623837165741866, "eval_ag_news_token_set_precision": 0.3312751277647174, "eval_ag_news_token_set_recall": 0.38771738803002936, "eval_ag_news_true_num_tokens": 56.09375, "step": 114375 }, { "epoch": 21.96, "eval_anthropic_toxic_prompts_accuracy": 0.113, "eval_anthropic_toxic_prompts_bleu_score": 2.906965262187682, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1102732317782224, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.673775851726532, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008469590966548358, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.29018497467041, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.158, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.806, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.622, "eval_anthropic_toxic_prompts_num_pred_words": 47.648, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.84782936500722, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21085188577868202, "eval_anthropic_toxic_prompts_runtime": 9.8936, "eval_anthropic_toxic_prompts_samples_per_second": 50.538, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.34963139807541843, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006226400424160567, "eval_anthropic_toxic_prompts_token_set_precision": 0.43294736046057675, "eval_anthropic_toxic_prompts_token_set_recall": 0.31569602391112955, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 114375 }, { "epoch": 21.96, "eval_arxiv_accuracy": 0.344625, "eval_arxiv_bleu_score": 4.2328438586134, "eval_arxiv_bleu_score_sem": 0.11858662820281565, "eval_arxiv_emb_cos_sim": 0.7667374014854431, "eval_arxiv_emb_cos_sim_sem": 0.007132719932107627, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.42853045463562, "eval_arxiv_n_ngrams_match_1": 14.93, "eval_arxiv_n_ngrams_match_2": 2.928, "eval_arxiv_n_ngrams_match_3": 0.616, "eval_arxiv_num_pred_words": 40.052, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.83130144616762, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35780127197904593, "eval_arxiv_runtime": 11.2411, "eval_arxiv_samples_per_second": 44.48, "eval_arxiv_steps_per_second": 0.089, "eval_arxiv_token_set_f1": 0.3496929306406429, "eval_arxiv_token_set_f1_sem": 0.004551344344202184, "eval_arxiv_token_set_precision": 0.30148368624472965, "eval_arxiv_token_set_recall": 0.43757430553172916, "eval_arxiv_true_num_tokens": 64.0, "step": 114375 }, { "epoch": 21.96, "eval_python_code_alpaca_accuracy": 0.15815625, "eval_python_code_alpaca_bleu_score": 4.63821649394017, "eval_python_code_alpaca_bleu_score_sem": 0.1505187540499886, "eval_python_code_alpaca_emb_cos_sim": 0.7470142245292664, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010516779961100394, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.92989182472229, "eval_python_code_alpaca_n_ngrams_match_1": 9.732, "eval_python_code_alpaca_n_ngrams_match_2": 2.828, "eval_python_code_alpaca_n_ngrams_match_3": 0.922, "eval_python_code_alpaca_num_pred_words": 42.168, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.72560473961348, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3322823791242405, "eval_python_code_alpaca_runtime": 9.9436, "eval_python_code_alpaca_samples_per_second": 50.284, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4716686909205951, "eval_python_code_alpaca_token_set_f1_sem": 0.005666082812173575, "eval_python_code_alpaca_token_set_precision": 0.5285477863338287, "eval_python_code_alpaca_token_set_recall": 0.4490487275601845, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 114375 }, { "epoch": 21.96, "eval_wikibio_accuracy": 0.32621875, "eval_wikibio_bleu_score": 5.937238016981768, "eval_wikibio_bleu_score_sem": 0.2151262452784868, "eval_wikibio_emb_cos_sim": 0.7314822673797607, "eval_wikibio_emb_cos_sim_sem": 0.01095206866125386, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.725485324859619, "eval_wikibio_n_ngrams_match_1": 10.116, "eval_wikibio_n_ngrams_match_2": 3.346, "eval_wikibio_n_ngrams_match_3": 1.168, "eval_wikibio_num_pred_words": 36.184, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.491364649902714, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3566688101601526, "eval_wikibio_runtime": 10.5295, "eval_wikibio_samples_per_second": 47.485, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.31979976001231264, "eval_wikibio_token_set_f1_sem": 0.005290779582982193, "eval_wikibio_token_set_precision": 0.3291476561237887, "eval_wikibio_token_set_recall": 0.32477579009700175, "eval_wikibio_true_num_tokens": 61.1328125, "step": 114375 }, { "epoch": 21.96, "eval_nq_accuracy": 0.52428125, "eval_nq_bleu_score": 11.417613425987117, "eval_nq_bleu_score_sem": 0.4654381066058178, "eval_nq_emb_cos_sim": 0.8305596709251404, "eval_nq_emb_cos_sim_sem": 0.007424187780073731, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.205254316329956, "eval_nq_n_ngrams_match_1": 22.996, "eval_nq_n_ngrams_match_2": 8.338, "eval_nq_n_ngrams_match_3": 3.748, "eval_nq_num_pred_words": 48.95, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.072558574289197, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.444935522803793, "eval_nq_runtime": 10.5871, "eval_nq_samples_per_second": 47.227, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.4591798232474928, "eval_nq_token_set_f1_sem": 0.004980554517841983, "eval_nq_token_set_precision": 0.4171823812040491, "eval_nq_token_set_recall": 0.5191321275823937, "eval_nq_true_num_tokens": 64.0, "step": 114375 }, { "epoch": 21.96, "learning_rate": 0.001, "loss": 2.577, "step": 114384 }, { "epoch": 21.97, "learning_rate": 0.001, "loss": 2.5815, "step": 114396 }, { "epoch": 21.97, "learning_rate": 0.001, "loss": 2.5853, "step": 114408 }, { "epoch": 21.97, "learning_rate": 0.001, "loss": 2.5877, "step": 114420 }, { "epoch": 21.97, "learning_rate": 0.001, "loss": 2.5869, "step": 114432 }, { "epoch": 21.97, "learning_rate": 0.001, "loss": 2.5762, "step": 114444 }, { "epoch": 21.98, "learning_rate": 0.001, "loss": 2.5729, "step": 114456 }, { "epoch": 21.98, "learning_rate": 0.001, "loss": 2.5837, "step": 114468 }, { "epoch": 21.98, "learning_rate": 0.001, "loss": 2.5772, "step": 114480 }, { "epoch": 21.98, "learning_rate": 0.001, "loss": 2.5802, "step": 114492 }, { "epoch": 21.99, "learning_rate": 0.001, "loss": 2.5837, "step": 114504 }, { "epoch": 21.99, "learning_rate": 0.001, "loss": 2.5747, "step": 114516 }, { "epoch": 21.99, "learning_rate": 0.001, "loss": 2.5857, "step": 114528 }, { "epoch": 21.99, "learning_rate": 0.001, "loss": 2.5865, "step": 114540 }, { "epoch": 22.0, "learning_rate": 0.001, "loss": 2.5844, "step": 114552 }, { "epoch": 22.0, "learning_rate": 0.001, "loss": 2.5852, "step": 114564 }, { "epoch": 22.0, "learning_rate": 0.001, "loss": 2.5845, "step": 114576 }, { "epoch": 22.0, "learning_rate": 0.001, "loss": 2.5774, "step": 114588 }, { "epoch": 22.0, "learning_rate": 0.001, "loss": 2.5665, "step": 114600 }, { "epoch": 22.01, "learning_rate": 0.001, "loss": 2.5669, "step": 114612 }, { "epoch": 22.01, "learning_rate": 0.001, "loss": 2.5675, "step": 114624 }, { "epoch": 22.01, "learning_rate": 0.001, "loss": 2.5661, "step": 114636 }, { "epoch": 22.01, "learning_rate": 0.001, "loss": 2.5628, "step": 114648 }, { "epoch": 22.02, "learning_rate": 0.001, "loss": 2.5549, "step": 114660 }, { "epoch": 22.02, "learning_rate": 0.001, "loss": 2.5688, "step": 114672 }, { "epoch": 22.02, "learning_rate": 0.001, "loss": 2.5604, "step": 114684 }, { "epoch": 22.02, "learning_rate": 0.001, "loss": 2.5664, "step": 114696 }, { "epoch": 22.03, "learning_rate": 0.001, "loss": 2.5629, "step": 114708 }, { "epoch": 22.03, "learning_rate": 0.001, "loss": 2.5669, "step": 114720 }, { "epoch": 22.03, "learning_rate": 0.001, "loss": 2.567, "step": 114732 }, { "epoch": 22.03, "learning_rate": 0.001, "loss": 2.5585, "step": 114744 }, { "epoch": 22.03, "learning_rate": 0.001, "loss": 2.5689, "step": 114756 }, { "epoch": 22.04, "learning_rate": 0.001, "loss": 2.5636, "step": 114768 }, { "epoch": 22.04, "learning_rate": 0.001, "loss": 2.566, "step": 114780 }, { "epoch": 22.04, "learning_rate": 0.001, "loss": 2.5557, "step": 114792 }, { "epoch": 22.04, "learning_rate": 0.001, "loss": 2.5716, "step": 114804 }, { "epoch": 22.05, "learning_rate": 0.001, "loss": 2.5717, "step": 114816 }, { "epoch": 22.05, "learning_rate": 0.001, "loss": 2.575, "step": 114828 }, { "epoch": 22.05, "learning_rate": 0.001, "loss": 2.5569, "step": 114840 }, { "epoch": 22.05, "learning_rate": 0.001, "loss": 2.5656, "step": 114852 }, { "epoch": 22.06, "learning_rate": 0.001, "loss": 2.5566, "step": 114864 }, { "epoch": 22.06, "learning_rate": 0.001, "loss": 2.5607, "step": 114876 }, { "epoch": 22.06, "learning_rate": 0.001, "loss": 2.5646, "step": 114888 }, { "epoch": 22.06, "learning_rate": 0.001, "loss": 2.573, "step": 114900 }, { "epoch": 22.06, "learning_rate": 0.001, "loss": 2.5651, "step": 114912 }, { "epoch": 22.07, "learning_rate": 0.001, "loss": 2.5649, "step": 114924 }, { "epoch": 22.07, "learning_rate": 0.001, "loss": 2.5532, "step": 114936 }, { "epoch": 22.07, "learning_rate": 0.001, "loss": 2.564, "step": 114948 }, { "epoch": 22.07, "learning_rate": 0.001, "loss": 2.5804, "step": 114960 }, { "epoch": 22.08, "learning_rate": 0.001, "loss": 2.5794, "step": 114972 }, { "epoch": 22.08, "learning_rate": 0.001, "loss": 2.5696, "step": 114984 }, { "epoch": 22.08, "learning_rate": 0.001, "loss": 2.5688, "step": 114996 }, { "epoch": 22.08, "eval_ag_news_accuracy": 0.32159375, "eval_ag_news_bleu_score": 4.7238797361331475, "eval_ag_news_bleu_score_sem": 0.15886998226372415, "eval_ag_news_emb_cos_sim": 0.8122814893722534, "eval_ag_news_emb_cos_sim_sem": 0.006870719316695718, "eval_ag_news_emb_top1_equal": 0.2890625, "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.582092761993408, "eval_ag_news_n_ngrams_match_1": 13.96, "eval_ag_news_n_ngrams_match_2": 3.05, "eval_ag_news_n_ngrams_match_3": 0.864, "eval_ag_news_num_pred_words": 46.922, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.94869424157151, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.344847234060312, "eval_ag_news_runtime": 10.3756, "eval_ag_news_samples_per_second": 48.19, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.34868912365276444, "eval_ag_news_token_set_f1_sem": 0.0044073167591970296, "eval_ag_news_token_set_precision": 0.3332341760416378, "eval_ag_news_token_set_recall": 0.38273018711868484, "eval_ag_news_true_num_tokens": 56.09375, "step": 115000 }, { "epoch": 22.08, "eval_anthropic_toxic_prompts_accuracy": 0.11371875, "eval_anthropic_toxic_prompts_bleu_score": 3.0856486725872965, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11414719573617015, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6693856120109558, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010228048365564959, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.284191370010376, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.168, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712, "eval_anthropic_toxic_prompts_num_pred_words": 47.408, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.6873953590923, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.96875, "eval_anthropic_toxic_prompts_rouge_score": 0.21133468214565337, "eval_anthropic_toxic_prompts_runtime": 9.9183, "eval_anthropic_toxic_prompts_samples_per_second": 50.412, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3543421457270459, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006548411078583062, "eval_anthropic_toxic_prompts_token_set_precision": 0.433028727004398, "eval_anthropic_toxic_prompts_token_set_recall": 0.32447431356186524, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 115000 }, { "epoch": 22.08, "eval_arxiv_accuracy": 0.345, "eval_arxiv_bleu_score": 4.332887674090163, "eval_arxiv_bleu_score_sem": 0.1219026245576228, "eval_arxiv_emb_cos_sim": 0.7662792205810547, "eval_arxiv_emb_cos_sim_sem": 0.006597461541464352, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.428192377090454, "eval_arxiv_n_ngrams_match_1": 14.974, "eval_arxiv_n_ngrams_match_2": 2.902, "eval_arxiv_n_ngrams_match_3": 0.664, "eval_arxiv_num_pred_words": 40.288, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.820879837215674, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3596958667814193, "eval_arxiv_runtime": 10.7978, "eval_arxiv_samples_per_second": 46.306, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.35176024393843747, "eval_arxiv_token_set_f1_sem": 0.004080474562135315, "eval_arxiv_token_set_precision": 0.3026004170903108, "eval_arxiv_token_set_recall": 0.4365537162085265, "eval_arxiv_true_num_tokens": 64.0, "step": 115000 }, { "epoch": 22.08, "eval_python_code_alpaca_accuracy": 0.15759375, "eval_python_code_alpaca_bleu_score": 4.592611588898136, "eval_python_code_alpaca_bleu_score_sem": 0.1515017279756423, "eval_python_code_alpaca_emb_cos_sim": 0.7440839409828186, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010540835274887988, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9287099838256836, "eval_python_code_alpaca_n_ngrams_match_1": 9.696, "eval_python_code_alpaca_n_ngrams_match_2": 2.876, "eval_python_code_alpaca_n_ngrams_match_3": 0.98, "eval_python_code_alpaca_num_pred_words": 43.064, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.703487126442802, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3278541879311717, "eval_python_code_alpaca_runtime": 9.8815, "eval_python_code_alpaca_samples_per_second": 50.6, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.46579912369083176, "eval_python_code_alpaca_token_set_f1_sem": 0.005772838048274028, "eval_python_code_alpaca_token_set_precision": 0.5238379719629166, "eval_python_code_alpaca_token_set_recall": 0.4414236591984351, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 115000 }, { "epoch": 22.08, "eval_wikibio_accuracy": 0.32103125, "eval_wikibio_bleu_score": 5.988426808877446, "eval_wikibio_bleu_score_sem": 0.20765197765749627, "eval_wikibio_emb_cos_sim": 0.7449048757553101, "eval_wikibio_emb_cos_sim_sem": 0.008843595514788952, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7470011711120605, "eval_wikibio_n_ngrams_match_1": 10.144, "eval_wikibio_n_ngrams_match_2": 3.364, "eval_wikibio_n_ngrams_match_3": 1.208, "eval_wikibio_num_pred_words": 35.814, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.393759555544186, "eval_wikibio_pred_num_tokens": 62.9921875, "eval_wikibio_rouge_score": 0.35676634076127417, "eval_wikibio_runtime": 10.2449, "eval_wikibio_samples_per_second": 48.805, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.3230679948872902, "eval_wikibio_token_set_f1_sem": 0.005287042962957077, "eval_wikibio_token_set_precision": 0.32933225668108, "eval_wikibio_token_set_recall": 0.33355628885650834, "eval_wikibio_true_num_tokens": 61.1328125, "step": 115000 }, { "epoch": 22.08, "eval_nq_accuracy": 0.526, "eval_nq_bleu_score": 11.709103363297908, "eval_nq_bleu_score_sem": 0.4913395272078558, "eval_nq_emb_cos_sim": 0.8308370113372803, "eval_nq_emb_cos_sim_sem": 0.007315120955750648, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2016963958740234, "eval_nq_n_ngrams_match_1": 22.848, "eval_nq_n_ngrams_match_2": 8.432, "eval_nq_n_ngrams_match_3": 3.956, "eval_nq_num_pred_words": 49.092, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.0403364883505, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44107554008541505, "eval_nq_runtime": 11.058, "eval_nq_samples_per_second": 45.216, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.45775410017236257, "eval_nq_token_set_f1_sem": 0.0050027542273605105, "eval_nq_token_set_precision": 0.4144863007077681, "eval_nq_token_set_recall": 0.519926324232164, "eval_nq_true_num_tokens": 64.0, "step": 115000 }, { "epoch": 22.08, "learning_rate": 0.001, "loss": 2.5619, "step": 115008 }, { "epoch": 22.09, "learning_rate": 0.001, "loss": 2.5885, "step": 115020 }, { "epoch": 22.09, "learning_rate": 0.001, "loss": 2.5661, "step": 115032 }, { "epoch": 22.09, "learning_rate": 0.001, "loss": 2.5653, "step": 115044 }, { "epoch": 22.09, "learning_rate": 0.001, "loss": 2.5769, "step": 115056 }, { "epoch": 22.09, "learning_rate": 0.001, "loss": 2.5717, "step": 115068 }, { "epoch": 22.1, "learning_rate": 0.001, "loss": 2.5765, "step": 115080 }, { "epoch": 22.1, "learning_rate": 0.001, "loss": 2.5641, "step": 115092 }, { "epoch": 22.1, "learning_rate": 0.001, "loss": 2.5689, "step": 115104 }, { "epoch": 22.1, "learning_rate": 0.001, "loss": 2.5675, "step": 115116 }, { "epoch": 22.11, "learning_rate": 0.001, "loss": 2.5652, "step": 115128 }, { "epoch": 22.11, "learning_rate": 0.001, "loss": 2.5565, "step": 115140 }, { "epoch": 22.11, "learning_rate": 0.001, "loss": 2.5763, "step": 115152 }, { "epoch": 22.11, "learning_rate": 0.001, "loss": 2.565, "step": 115164 }, { "epoch": 22.12, "learning_rate": 0.001, "loss": 2.5714, "step": 115176 }, { "epoch": 22.12, "learning_rate": 0.001, "loss": 2.57, "step": 115188 }, { "epoch": 22.12, "learning_rate": 0.001, "loss": 2.5684, "step": 115200 }, { "epoch": 22.12, "learning_rate": 0.001, "loss": 2.5679, "step": 115212 }, { "epoch": 22.12, "learning_rate": 0.001, "loss": 2.5705, "step": 115224 }, { "epoch": 22.13, "learning_rate": 0.001, "loss": 2.5732, "step": 115236 }, { "epoch": 22.13, "learning_rate": 0.001, "loss": 2.5709, "step": 115248 }, { "epoch": 22.13, "learning_rate": 0.001, "loss": 2.572, "step": 115260 }, { "epoch": 22.13, "learning_rate": 0.001, "loss": 2.5621, "step": 115272 }, { "epoch": 22.14, "learning_rate": 0.001, "loss": 2.5707, "step": 115284 }, { "epoch": 22.14, "learning_rate": 0.001, "loss": 2.5704, "step": 115296 }, { "epoch": 22.14, "learning_rate": 0.001, "loss": 2.5687, "step": 115308 }, { "epoch": 22.14, "learning_rate": 0.001, "loss": 2.5854, "step": 115320 }, { "epoch": 22.15, "learning_rate": 0.001, "loss": 2.5709, "step": 115332 }, { "epoch": 22.15, "learning_rate": 0.001, "loss": 2.582, "step": 115344 }, { "epoch": 22.15, "learning_rate": 0.001, "loss": 2.5622, "step": 115356 }, { "epoch": 22.15, "learning_rate": 0.001, "loss": 2.5746, "step": 115368 }, { "epoch": 22.15, "learning_rate": 0.001, "loss": 2.5702, "step": 115380 }, { "epoch": 22.16, "learning_rate": 0.001, "loss": 2.5684, "step": 115392 }, { "epoch": 22.16, "learning_rate": 0.001, "loss": 2.5657, "step": 115404 }, { "epoch": 22.16, "learning_rate": 0.001, "loss": 2.5677, "step": 115416 }, { "epoch": 22.16, "learning_rate": 0.001, "loss": 2.5675, "step": 115428 }, { "epoch": 22.17, "learning_rate": 0.001, "loss": 2.5735, "step": 115440 }, { "epoch": 22.17, "learning_rate": 0.001, "loss": 2.5652, "step": 115452 }, { "epoch": 22.17, "learning_rate": 0.001, "loss": 2.5661, "step": 115464 }, { "epoch": 22.17, "learning_rate": 0.001, "loss": 2.5771, "step": 115476 }, { "epoch": 22.18, "learning_rate": 0.001, "loss": 2.5651, "step": 115488 }, { "epoch": 22.18, "learning_rate": 0.001, "loss": 2.5718, "step": 115500 }, { "epoch": 22.18, "learning_rate": 0.001, "loss": 2.5596, "step": 115512 }, { "epoch": 22.18, "learning_rate": 0.001, "loss": 2.5677, "step": 115524 }, { "epoch": 22.18, "learning_rate": 0.001, "loss": 2.5818, "step": 115536 }, { "epoch": 22.19, "learning_rate": 0.001, "loss": 2.5791, "step": 115548 }, { "epoch": 22.19, "learning_rate": 0.001, "loss": 2.5753, "step": 115560 }, { "epoch": 22.19, "learning_rate": 0.001, "loss": 2.566, "step": 115572 }, { "epoch": 22.19, "learning_rate": 0.001, "loss": 2.5741, "step": 115584 }, { "epoch": 22.2, "learning_rate": 0.001, "loss": 2.5733, "step": 115596 }, { "epoch": 22.2, "learning_rate": 0.001, "loss": 2.5716, "step": 115608 }, { "epoch": 22.2, "learning_rate": 0.001, "loss": 2.5712, "step": 115620 }, { "epoch": 22.2, "eval_ag_news_accuracy": 0.3208125, "eval_ag_news_bleu_score": 4.8383610674653665, "eval_ag_news_bleu_score_sem": 0.15287793482284692, "eval_ag_news_emb_cos_sim": 0.8092503547668457, "eval_ag_news_emb_cos_sim_sem": 0.007799535100566101, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.58366060256958, "eval_ag_news_n_ngrams_match_1": 13.95, "eval_ag_news_n_ngrams_match_2": 3.148, "eval_ag_news_n_ngrams_match_3": 0.882, "eval_ag_news_num_pred_words": 46.736, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 36.00510026933896, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3456168995319642, "eval_ag_news_runtime": 10.4292, "eval_ag_news_samples_per_second": 47.942, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.34918508997128606, "eval_ag_news_token_set_f1_sem": 0.004444407429988588, "eval_ag_news_token_set_precision": 0.33353074725350246, "eval_ag_news_token_set_recall": 0.38303365382193366, "eval_ag_news_true_num_tokens": 56.09375, "step": 115625 }, { "epoch": 22.2, "eval_anthropic_toxic_prompts_accuracy": 0.112625, "eval_anthropic_toxic_prompts_bleu_score": 3.119223642899152, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12109346857277077, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6737870573997498, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009638637860030266, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2874598503112793, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.124, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.888, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.708, "eval_anthropic_toxic_prompts_num_pred_words": 47.138, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.77476529077541, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21176920879582883, "eval_anthropic_toxic_prompts_runtime": 9.9911, "eval_anthropic_toxic_prompts_samples_per_second": 50.044, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3504143829702812, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006300040865617722, "eval_anthropic_toxic_prompts_token_set_precision": 0.4360810895301237, "eval_anthropic_toxic_prompts_token_set_recall": 0.31946068739308475, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 115625 }, { "epoch": 22.2, "eval_arxiv_accuracy": 0.34375, "eval_arxiv_bleu_score": 4.309186753701695, "eval_arxiv_bleu_score_sem": 0.12135217228988915, "eval_arxiv_emb_cos_sim": 0.771766185760498, "eval_arxiv_emb_cos_sim_sem": 0.006912421647663724, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4498324394226074, "eval_arxiv_n_ngrams_match_1": 15.26, "eval_arxiv_n_ngrams_match_2": 2.978, "eval_arxiv_n_ngrams_match_3": 0.656, "eval_arxiv_num_pred_words": 41.552, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.49511452701101, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35855996161871206, "eval_arxiv_runtime": 10.2964, "eval_arxiv_samples_per_second": 48.561, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.35263050234596155, "eval_arxiv_token_set_f1_sem": 0.004315855523778874, "eval_arxiv_token_set_precision": 0.3070109782381153, "eval_arxiv_token_set_recall": 0.43150787570777377, "eval_arxiv_true_num_tokens": 64.0, "step": 115625 }, { "epoch": 22.2, "eval_python_code_alpaca_accuracy": 0.15634375, "eval_python_code_alpaca_bleu_score": 4.648977876646551, "eval_python_code_alpaca_bleu_score_sem": 0.15743110360638743, "eval_python_code_alpaca_emb_cos_sim": 0.7463724613189697, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00996857787815272, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9322311878204346, "eval_python_code_alpaca_n_ngrams_match_1": 9.614, "eval_python_code_alpaca_n_ngrams_match_2": 2.872, "eval_python_code_alpaca_n_ngrams_match_3": 0.986, "eval_python_code_alpaca_num_pred_words": 42.91, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.76946200736748, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.327085223619867, "eval_python_code_alpaca_runtime": 10.07, "eval_python_code_alpaca_samples_per_second": 49.652, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.4624767135484843, "eval_python_code_alpaca_token_set_f1_sem": 0.00585234104111037, "eval_python_code_alpaca_token_set_precision": 0.5209808810214367, "eval_python_code_alpaca_token_set_recall": 0.4415241516758691, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 115625 }, { "epoch": 22.2, "eval_wikibio_accuracy": 0.31828125, "eval_wikibio_bleu_score": 5.894394280844617, "eval_wikibio_bleu_score_sem": 0.21338569848021935, "eval_wikibio_emb_cos_sim": 0.7446073293685913, "eval_wikibio_emb_cos_sim_sem": 0.00900049176818231, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.77712345123291, "eval_wikibio_n_ngrams_match_1": 10.312, "eval_wikibio_n_ngrams_match_2": 3.436, "eval_wikibio_n_ngrams_match_3": 1.212, "eval_wikibio_num_pred_words": 37.434, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.69018385968419, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35711947377654774, "eval_wikibio_runtime": 10.0381, "eval_wikibio_samples_per_second": 49.81, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3225553033294478, "eval_wikibio_token_set_f1_sem": 0.005162530664144254, "eval_wikibio_token_set_precision": 0.3326185636482031, "eval_wikibio_token_set_recall": 0.32612116799793156, "eval_wikibio_true_num_tokens": 61.1328125, "step": 115625 }, { "epoch": 22.2, "eval_nq_accuracy": 0.5248125, "eval_nq_bleu_score": 11.671781350251909, "eval_nq_bleu_score_sem": 0.5040887017446044, "eval_nq_emb_cos_sim": 0.8326694369316101, "eval_nq_emb_cos_sim_sem": 0.006603992985894525, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.204833984375, "eval_nq_n_ngrams_match_1": 22.932, "eval_nq_n_ngrams_match_2": 8.464, "eval_nq_n_ngrams_match_3": 3.904, "eval_nq_num_pred_words": 48.994, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.068745889360002, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44350902934874165, "eval_nq_runtime": 10.3744, "eval_nq_samples_per_second": 48.196, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.45723941450951905, "eval_nq_token_set_f1_sem": 0.005266574943958842, "eval_nq_token_set_precision": 0.4147737792573533, "eval_nq_token_set_recall": 0.5175651205144508, "eval_nq_true_num_tokens": 64.0, "step": 115625 }, { "epoch": 22.2, "learning_rate": 0.001, "loss": 2.572, "step": 115632 }, { "epoch": 22.21, "learning_rate": 0.001, "loss": 2.5713, "step": 115644 }, { "epoch": 22.21, "learning_rate": 0.001, "loss": 2.5732, "step": 115656 }, { "epoch": 22.21, "learning_rate": 0.001, "loss": 2.5675, "step": 115668 }, { "epoch": 22.21, "learning_rate": 0.001, "loss": 2.5721, "step": 115680 }, { "epoch": 22.21, "learning_rate": 0.001, "loss": 2.5739, "step": 115692 }, { "epoch": 22.22, "learning_rate": 0.001, "loss": 2.5784, "step": 115704 }, { "epoch": 22.22, "learning_rate": 0.001, "loss": 2.5696, "step": 115716 }, { "epoch": 22.22, "learning_rate": 0.001, "loss": 2.5825, "step": 115728 }, { "epoch": 22.22, "learning_rate": 0.001, "loss": 2.5727, "step": 115740 }, { "epoch": 22.23, "learning_rate": 0.001, "loss": 2.5766, "step": 115752 }, { "epoch": 22.23, "learning_rate": 0.001, "loss": 2.5828, "step": 115764 }, { "epoch": 22.23, "learning_rate": 0.001, "loss": 2.5728, "step": 115776 }, { "epoch": 22.23, "learning_rate": 0.001, "loss": 2.578, "step": 115788 }, { "epoch": 22.24, "learning_rate": 0.001, "loss": 2.5744, "step": 115800 }, { "epoch": 22.24, "learning_rate": 0.001, "loss": 2.5778, "step": 115812 }, { "epoch": 22.24, "learning_rate": 0.001, "loss": 2.5717, "step": 115824 }, { "epoch": 22.24, "learning_rate": 0.001, "loss": 2.5735, "step": 115836 }, { "epoch": 22.24, "learning_rate": 0.001, "loss": 2.5716, "step": 115848 }, { "epoch": 22.25, "learning_rate": 0.001, "loss": 2.5655, "step": 115860 }, { "epoch": 22.25, "learning_rate": 0.001, "loss": 2.5875, "step": 115872 }, { "epoch": 22.25, "learning_rate": 0.001, "loss": 2.5812, "step": 115884 }, { "epoch": 22.25, "learning_rate": 0.001, "loss": 2.5792, "step": 115896 }, { "epoch": 22.26, "learning_rate": 0.001, "loss": 2.5745, "step": 115908 }, { "epoch": 22.26, "learning_rate": 0.001, "loss": 2.5741, "step": 115920 }, { "epoch": 22.26, "learning_rate": 0.001, "loss": 2.579, "step": 115932 }, { "epoch": 22.26, "learning_rate": 0.001, "loss": 2.569, "step": 115944 }, { "epoch": 22.26, "learning_rate": 0.001, "loss": 2.5792, "step": 115956 }, { "epoch": 22.27, "learning_rate": 0.001, "loss": 2.5788, "step": 115968 }, { "epoch": 22.27, "learning_rate": 0.001, "loss": 2.5793, "step": 115980 }, { "epoch": 22.27, "learning_rate": 0.001, "loss": 2.5835, "step": 115992 }, { "epoch": 22.27, "learning_rate": 0.001, "loss": 2.5798, "step": 116004 }, { "epoch": 22.28, "learning_rate": 0.001, "loss": 2.5644, "step": 116016 }, { "epoch": 22.28, "learning_rate": 0.001, "loss": 2.5875, "step": 116028 }, { "epoch": 22.28, "learning_rate": 0.001, "loss": 2.572, "step": 116040 }, { "epoch": 22.28, "learning_rate": 0.001, "loss": 2.5772, "step": 116052 }, { "epoch": 22.29, "learning_rate": 0.001, "loss": 2.5715, "step": 116064 }, { "epoch": 22.29, "learning_rate": 0.001, "loss": 2.5716, "step": 116076 }, { "epoch": 22.29, "learning_rate": 0.001, "loss": 2.5705, "step": 116088 }, { "epoch": 22.29, "learning_rate": 0.001, "loss": 2.5782, "step": 116100 }, { "epoch": 22.29, "learning_rate": 0.001, "loss": 2.5703, "step": 116112 }, { "epoch": 22.3, "learning_rate": 0.001, "loss": 2.5742, "step": 116124 }, { "epoch": 22.3, "learning_rate": 0.001, "loss": 2.5743, "step": 116136 }, { "epoch": 22.3, "learning_rate": 0.001, "loss": 2.5815, "step": 116148 }, { "epoch": 22.3, "learning_rate": 0.001, "loss": 2.571, "step": 116160 }, { "epoch": 22.31, "learning_rate": 0.001, "loss": 2.577, "step": 116172 }, { "epoch": 22.31, "learning_rate": 0.001, "loss": 2.5721, "step": 116184 }, { "epoch": 22.31, "learning_rate": 0.001, "loss": 2.5739, "step": 116196 }, { "epoch": 22.31, "learning_rate": 0.001, "loss": 2.5846, "step": 116208 }, { "epoch": 22.32, "learning_rate": 0.001, "loss": 2.5596, "step": 116220 }, { "epoch": 22.32, "learning_rate": 0.001, "loss": 2.585, "step": 116232 }, { "epoch": 22.32, "learning_rate": 0.001, "loss": 2.5813, "step": 116244 }, { "epoch": 22.32, "eval_ag_news_accuracy": 0.32046875, "eval_ag_news_bleu_score": 4.820447644939628, "eval_ag_news_bleu_score_sem": 0.15686168539058712, "eval_ag_news_emb_cos_sim": 0.806586503982544, "eval_ag_news_emb_cos_sim_sem": 0.0072461252476492995, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5799827575683594, "eval_ag_news_n_ngrams_match_1": 13.988, "eval_ag_news_n_ngrams_match_2": 3.094, "eval_ag_news_n_ngrams_match_3": 0.878, "eval_ag_news_num_pred_words": 47.312, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.87292230531959, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34703636655467196, "eval_ag_news_runtime": 11.0111, "eval_ag_news_samples_per_second": 45.409, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.3476490374831776, "eval_ag_news_token_set_f1_sem": 0.004285788192314468, "eval_ag_news_token_set_precision": 0.33428771966129356, "eval_ag_news_token_set_recall": 0.3771505388639559, "eval_ag_news_true_num_tokens": 56.09375, "step": 116250 }, { "epoch": 22.32, "eval_anthropic_toxic_prompts_accuracy": 0.11371875, "eval_anthropic_toxic_prompts_bleu_score": 3.016444840418683, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11264137508637179, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.659735918045044, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009070119679011324, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2803163528442383, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.08, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.83, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.666, "eval_anthropic_toxic_prompts_num_pred_words": 47.496, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.584181351135637, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21058227510647462, "eval_anthropic_toxic_prompts_runtime": 9.968, "eval_anthropic_toxic_prompts_samples_per_second": 50.16, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3537007728632725, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064541196798620975, "eval_anthropic_toxic_prompts_token_set_precision": 0.43178570591401677, "eval_anthropic_toxic_prompts_token_set_recall": 0.32573036917127296, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 116250 }, { "epoch": 22.32, "eval_arxiv_accuracy": 0.34390625, "eval_arxiv_bleu_score": 4.185872758914649, "eval_arxiv_bleu_score_sem": 0.11666933817858623, "eval_arxiv_emb_cos_sim": 0.7674313187599182, "eval_arxiv_emb_cos_sim_sem": 0.006895747959266109, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4447033405303955, "eval_arxiv_n_ngrams_match_1": 14.88, "eval_arxiv_n_ngrams_match_2": 2.884, "eval_arxiv_n_ngrams_match_3": 0.598, "eval_arxiv_num_pred_words": 40.786, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.333986543901734, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35431787802701264, "eval_arxiv_runtime": 10.2408, "eval_arxiv_samples_per_second": 48.824, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.349699944822291, "eval_arxiv_token_set_f1_sem": 0.004142739999432971, "eval_arxiv_token_set_precision": 0.3010176361794418, "eval_arxiv_token_set_recall": 0.4346348579163179, "eval_arxiv_true_num_tokens": 64.0, "step": 116250 }, { "epoch": 22.32, "eval_python_code_alpaca_accuracy": 0.1589375, "eval_python_code_alpaca_bleu_score": 4.361097722848697, "eval_python_code_alpaca_bleu_score_sem": 0.139940458071686, "eval_python_code_alpaca_emb_cos_sim": 0.7433726787567139, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009855297686067565, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9113898277282715, "eval_python_code_alpaca_n_ngrams_match_1": 9.498, "eval_python_code_alpaca_n_ngrams_match_2": 2.722, "eval_python_code_alpaca_n_ngrams_match_3": 0.878, "eval_python_code_alpaca_num_pred_words": 43.142, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.382329092074873, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3251099019183774, "eval_python_code_alpaca_runtime": 10.0262, "eval_python_code_alpaca_samples_per_second": 49.869, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.47102202508762725, "eval_python_code_alpaca_token_set_f1_sem": 0.006076332489951335, "eval_python_code_alpaca_token_set_precision": 0.5173190498229226, "eval_python_code_alpaca_token_set_recall": 0.45752735963416313, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 116250 }, { "epoch": 22.32, "eval_wikibio_accuracy": 0.32015625, "eval_wikibio_bleu_score": 5.82156297278923, "eval_wikibio_bleu_score_sem": 0.19800734201388231, "eval_wikibio_emb_cos_sim": 0.737400472164154, "eval_wikibio_emb_cos_sim_sem": 0.009803398451937954, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7829058170318604, "eval_wikibio_n_ngrams_match_1": 10.12, "eval_wikibio_n_ngrams_match_2": 3.38, "eval_wikibio_n_ngrams_match_3": 1.214, "eval_wikibio_num_pred_words": 36.704, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.94354830157379, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3495130925947355, "eval_wikibio_runtime": 10.5039, "eval_wikibio_samples_per_second": 47.601, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.31841487571440225, "eval_wikibio_token_set_f1_sem": 0.005544290379750777, "eval_wikibio_token_set_precision": 0.32771621765992043, "eval_wikibio_token_set_recall": 0.3259368214359848, "eval_wikibio_true_num_tokens": 61.1328125, "step": 116250 }, { "epoch": 22.32, "eval_nq_accuracy": 0.52646875, "eval_nq_bleu_score": 11.665484202363233, "eval_nq_bleu_score_sem": 0.47570836770547587, "eval_nq_emb_cos_sim": 0.8337132930755615, "eval_nq_emb_cos_sim_sem": 0.00710044269013787, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.207057476043701, "eval_nq_n_ngrams_match_1": 22.818, "eval_nq_n_ngrams_match_2": 8.358, "eval_nq_n_ngrams_match_3": 3.894, "eval_nq_num_pred_words": 49.104, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.088932604470212, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4389140926684925, "eval_nq_runtime": 10.596, "eval_nq_samples_per_second": 47.188, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.4552582167522271, "eval_nq_token_set_f1_sem": 0.005076647761822282, "eval_nq_token_set_precision": 0.41312373732761054, "eval_nq_token_set_recall": 0.5167785659627977, "eval_nq_true_num_tokens": 64.0, "step": 116250 }, { "epoch": 22.32, "learning_rate": 0.001, "loss": 2.5794, "step": 116256 }, { "epoch": 22.32, "learning_rate": 0.001, "loss": 2.581, "step": 116268 }, { "epoch": 22.33, "learning_rate": 0.001, "loss": 2.5765, "step": 116280 }, { "epoch": 22.33, "learning_rate": 0.001, "loss": 2.5763, "step": 116292 }, { "epoch": 22.33, "learning_rate": 0.001, "loss": 2.5699, "step": 116304 }, { "epoch": 22.33, "learning_rate": 0.001, "loss": 2.5696, "step": 116316 }, { "epoch": 22.34, "learning_rate": 0.001, "loss": 2.5773, "step": 116328 }, { "epoch": 22.34, "learning_rate": 0.001, "loss": 2.5768, "step": 116340 }, { "epoch": 22.34, "learning_rate": 0.001, "loss": 2.5749, "step": 116352 }, { "epoch": 22.34, "learning_rate": 0.001, "loss": 2.5717, "step": 116364 }, { "epoch": 22.35, "learning_rate": 0.001, "loss": 2.5635, "step": 116376 }, { "epoch": 22.35, "learning_rate": 0.001, "loss": 2.5787, "step": 116388 }, { "epoch": 22.35, "learning_rate": 0.001, "loss": 2.5808, "step": 116400 }, { "epoch": 22.35, "learning_rate": 0.001, "loss": 2.5753, "step": 116412 }, { "epoch": 22.35, "learning_rate": 0.001, "loss": 2.5804, "step": 116424 }, { "epoch": 22.36, "learning_rate": 0.001, "loss": 2.5772, "step": 116436 }, { "epoch": 22.36, "learning_rate": 0.001, "loss": 2.5725, "step": 116448 }, { "epoch": 22.36, "learning_rate": 0.001, "loss": 2.5729, "step": 116460 }, { "epoch": 22.36, "learning_rate": 0.001, "loss": 2.5789, "step": 116472 }, { "epoch": 22.37, "learning_rate": 0.001, "loss": 2.5738, "step": 116484 }, { "epoch": 22.37, "learning_rate": 0.001, "loss": 2.5786, "step": 116496 }, { "epoch": 22.37, "learning_rate": 0.001, "loss": 2.5698, "step": 116508 }, { "epoch": 22.37, "learning_rate": 0.001, "loss": 2.5788, "step": 116520 }, { "epoch": 22.38, "learning_rate": 0.001, "loss": 2.5683, "step": 116532 }, { "epoch": 22.38, "learning_rate": 0.001, "loss": 2.5652, "step": 116544 }, { "epoch": 22.38, "learning_rate": 0.001, "loss": 2.5766, "step": 116556 }, { "epoch": 22.38, "learning_rate": 0.001, "loss": 2.5675, "step": 116568 }, { "epoch": 22.38, "learning_rate": 0.001, "loss": 2.5803, "step": 116580 }, { "epoch": 22.39, "learning_rate": 0.001, "loss": 2.5794, "step": 116592 }, { "epoch": 22.39, "learning_rate": 0.001, "loss": 2.5782, "step": 116604 }, { "epoch": 22.39, "learning_rate": 0.001, "loss": 2.576, "step": 116616 }, { "epoch": 22.39, "learning_rate": 0.001, "loss": 2.5761, "step": 116628 }, { "epoch": 22.4, "learning_rate": 0.001, "loss": 2.5839, "step": 116640 }, { "epoch": 22.4, "learning_rate": 0.001, "loss": 2.5761, "step": 116652 }, { "epoch": 22.4, "learning_rate": 0.001, "loss": 2.5694, "step": 116664 }, { "epoch": 22.4, "learning_rate": 0.001, "loss": 2.5778, "step": 116676 }, { "epoch": 22.41, "learning_rate": 0.001, "loss": 2.572, "step": 116688 }, { "epoch": 22.41, "learning_rate": 0.001, "loss": 2.5723, "step": 116700 }, { "epoch": 22.41, "learning_rate": 0.001, "loss": 2.567, "step": 116712 }, { "epoch": 22.41, "learning_rate": 0.001, "loss": 2.5848, "step": 116724 }, { "epoch": 22.41, "learning_rate": 0.001, "loss": 2.5751, "step": 116736 }, { "epoch": 22.42, "learning_rate": 0.001, "loss": 2.5731, "step": 116748 }, { "epoch": 22.42, "learning_rate": 0.001, "loss": 2.5731, "step": 116760 }, { "epoch": 22.42, "learning_rate": 0.001, "loss": 2.5788, "step": 116772 }, { "epoch": 22.42, "learning_rate": 0.001, "loss": 2.5796, "step": 116784 }, { "epoch": 22.43, "learning_rate": 0.001, "loss": 2.5725, "step": 116796 }, { "epoch": 22.43, "learning_rate": 0.001, "loss": 2.5704, "step": 116808 }, { "epoch": 22.43, "learning_rate": 0.001, "loss": 2.5763, "step": 116820 }, { "epoch": 22.43, "learning_rate": 0.001, "loss": 2.5799, "step": 116832 }, { "epoch": 22.44, "learning_rate": 0.001, "loss": 2.5776, "step": 116844 }, { "epoch": 22.44, "learning_rate": 0.001, "loss": 2.5648, "step": 116856 }, { "epoch": 22.44, "learning_rate": 0.001, "loss": 2.5747, "step": 116868 }, { "epoch": 22.44, "eval_ag_news_accuracy": 0.32215625, "eval_ag_news_bleu_score": 4.856594314520586, "eval_ag_news_bleu_score_sem": 0.1598958726882593, "eval_ag_news_emb_cos_sim": 0.8080046772956848, "eval_ag_news_emb_cos_sim_sem": 0.007112505908435098, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.582242250442505, "eval_ag_news_n_ngrams_match_1": 14.19, "eval_ag_news_n_ngrams_match_2": 3.144, "eval_ag_news_n_ngrams_match_3": 0.898, "eval_ag_news_num_pred_words": 46.968, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.954068557809826, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3498322419113007, "eval_ag_news_runtime": 10.5398, "eval_ag_news_samples_per_second": 47.439, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3531902275364227, "eval_ag_news_token_set_f1_sem": 0.004257954253477402, "eval_ag_news_token_set_precision": 0.33842815531173076, "eval_ag_news_token_set_recall": 0.38570623803257226, "eval_ag_news_true_num_tokens": 56.09375, "step": 116875 }, { "epoch": 22.44, "eval_anthropic_toxic_prompts_accuracy": 0.11390625, "eval_anthropic_toxic_prompts_bleu_score": 3.108279336497537, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11760437814455241, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6740961670875549, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008986192132735974, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.275770425796509, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.162, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.726, "eval_anthropic_toxic_prompts_num_pred_words": 47.352, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.463605873398034, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21285200780460556, "eval_anthropic_toxic_prompts_runtime": 11.5867, "eval_anthropic_toxic_prompts_samples_per_second": 43.153, "eval_anthropic_toxic_prompts_steps_per_second": 0.086, "eval_anthropic_toxic_prompts_token_set_f1": 0.3545118763706072, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066613576403408, "eval_anthropic_toxic_prompts_token_set_precision": 0.43568408752204074, "eval_anthropic_toxic_prompts_token_set_recall": 0.3227649481386179, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 116875 }, { "epoch": 22.44, "eval_arxiv_accuracy": 0.34609375, "eval_arxiv_bleu_score": 4.264890275315649, "eval_arxiv_bleu_score_sem": 0.11801758505991256, "eval_arxiv_emb_cos_sim": 0.7583524584770203, "eval_arxiv_emb_cos_sim_sem": 0.008303041767756989, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4266624450683594, "eval_arxiv_n_ngrams_match_1": 14.852, "eval_arxiv_n_ngrams_match_2": 2.94, "eval_arxiv_n_ngrams_match_3": 0.682, "eval_arxiv_num_pred_words": 40.196, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.77376203890849, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3538376492635257, "eval_arxiv_runtime": 10.2673, "eval_arxiv_samples_per_second": 48.698, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3502531526681534, "eval_arxiv_token_set_f1_sem": 0.004261380431489897, "eval_arxiv_token_set_precision": 0.29970747966491473, "eval_arxiv_token_set_recall": 0.44259492888542024, "eval_arxiv_true_num_tokens": 64.0, "step": 116875 }, { "epoch": 22.44, "eval_python_code_alpaca_accuracy": 0.16034375, "eval_python_code_alpaca_bleu_score": 4.434277339316382, "eval_python_code_alpaca_bleu_score_sem": 0.133387632881832, "eval_python_code_alpaca_emb_cos_sim": 0.7473664283752441, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010136398726389004, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.890942096710205, "eval_python_code_alpaca_n_ngrams_match_1": 9.644, "eval_python_code_alpaca_n_ngrams_match_2": 2.768, "eval_python_code_alpaca_n_ngrams_match_3": 0.86, "eval_python_code_alpaca_num_pred_words": 42.488, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.01026902678664, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3293255267957039, "eval_python_code_alpaca_runtime": 10.0451, "eval_python_code_alpaca_samples_per_second": 49.776, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.469584089644997, "eval_python_code_alpaca_token_set_f1_sem": 0.005865255611461517, "eval_python_code_alpaca_token_set_precision": 0.5238862778191908, "eval_python_code_alpaca_token_set_recall": 0.453314890228134, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 116875 }, { "epoch": 22.44, "eval_wikibio_accuracy": 0.321375, "eval_wikibio_bleu_score": 5.818789102742222, "eval_wikibio_bleu_score_sem": 0.20686515782275233, "eval_wikibio_emb_cos_sim": 0.7244291305541992, "eval_wikibio_emb_cos_sim_sem": 0.009842413262768883, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7834208011627197, "eval_wikibio_n_ngrams_match_1": 10.086, "eval_wikibio_n_ngrams_match_2": 3.376, "eval_wikibio_n_ngrams_match_3": 1.19, "eval_wikibio_num_pred_words": 36.738, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.966184359707846, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3511563437729036, "eval_wikibio_runtime": 10.0888, "eval_wikibio_samples_per_second": 49.56, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.31948827103476674, "eval_wikibio_token_set_f1_sem": 0.005409714146322703, "eval_wikibio_token_set_precision": 0.32987331494848554, "eval_wikibio_token_set_recall": 0.3282077246133953, "eval_wikibio_true_num_tokens": 61.1328125, "step": 116875 }, { "epoch": 22.44, "eval_nq_accuracy": 0.52575, "eval_nq_bleu_score": 11.58256673813752, "eval_nq_bleu_score_sem": 0.464927684853318, "eval_nq_emb_cos_sim": 0.8292089700698853, "eval_nq_emb_cos_sim_sem": 0.007278743615674191, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.205331563949585, "eval_nq_n_ngrams_match_1": 22.92, "eval_nq_n_ngrams_match_2": 8.388, "eval_nq_n_ngrams_match_3": 3.848, "eval_nq_num_pred_words": 49.1, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.073259434912563, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44242775204923657, "eval_nq_runtime": 10.5388, "eval_nq_samples_per_second": 47.444, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4578089833272444, "eval_nq_token_set_f1_sem": 0.004977922166256775, "eval_nq_token_set_precision": 0.41615849446692316, "eval_nq_token_set_recall": 0.5172310825982276, "eval_nq_true_num_tokens": 64.0, "step": 116875 }, { "epoch": 22.44, "learning_rate": 0.001, "loss": 2.587, "step": 116880 }, { "epoch": 22.44, "learning_rate": 0.001, "loss": 2.5801, "step": 116892 }, { "epoch": 22.45, "learning_rate": 0.001, "loss": 2.5726, "step": 116904 }, { "epoch": 22.45, "learning_rate": 0.001, "loss": 2.5827, "step": 116916 }, { "epoch": 22.45, "learning_rate": 0.001, "loss": 2.5747, "step": 116928 }, { "epoch": 22.45, "learning_rate": 0.001, "loss": 2.5661, "step": 116940 }, { "epoch": 22.46, "learning_rate": 0.001, "loss": 2.5701, "step": 116952 }, { "epoch": 22.46, "learning_rate": 0.001, "loss": 2.5714, "step": 116964 }, { "epoch": 22.46, "learning_rate": 0.001, "loss": 2.5804, "step": 116976 }, { "epoch": 22.46, "learning_rate": 0.001, "loss": 2.5831, "step": 116988 }, { "epoch": 22.47, "learning_rate": 0.001, "loss": 2.5924, "step": 117000 }, { "epoch": 22.47, "learning_rate": 0.001, "loss": 2.5755, "step": 117012 }, { "epoch": 22.47, "learning_rate": 0.001, "loss": 2.5724, "step": 117024 }, { "epoch": 22.47, "learning_rate": 0.001, "loss": 2.5715, "step": 117036 }, { "epoch": 22.47, "learning_rate": 0.001, "loss": 2.5769, "step": 117048 }, { "epoch": 22.48, "learning_rate": 0.001, "loss": 2.5776, "step": 117060 }, { "epoch": 22.48, "learning_rate": 0.001, "loss": 2.5742, "step": 117072 }, { "epoch": 22.48, "learning_rate": 0.001, "loss": 2.5741, "step": 117084 }, { "epoch": 22.48, "learning_rate": 0.001, "loss": 2.5765, "step": 117096 }, { "epoch": 22.49, "learning_rate": 0.001, "loss": 2.5719, "step": 117108 }, { "epoch": 22.49, "learning_rate": 0.001, "loss": 2.5707, "step": 117120 }, { "epoch": 22.49, "learning_rate": 0.001, "loss": 2.5737, "step": 117132 }, { "epoch": 22.49, "learning_rate": 0.001, "loss": 2.5713, "step": 117144 }, { "epoch": 22.5, "learning_rate": 0.001, "loss": 2.5804, "step": 117156 }, { "epoch": 22.5, "learning_rate": 0.001, "loss": 2.5727, "step": 117168 }, { "epoch": 22.5, "learning_rate": 0.001, "loss": 2.5753, "step": 117180 }, { "epoch": 22.5, "learning_rate": 0.001, "loss": 2.5686, "step": 117192 }, { "epoch": 22.5, "learning_rate": 0.001, "loss": 2.5804, "step": 117204 }, { "epoch": 22.51, "learning_rate": 0.001, "loss": 2.5776, "step": 117216 }, { "epoch": 22.51, "learning_rate": 0.001, "loss": 2.5714, "step": 117228 }, { "epoch": 22.51, "learning_rate": 0.001, "loss": 2.5648, "step": 117240 }, { "epoch": 22.51, "learning_rate": 0.001, "loss": 2.5706, "step": 117252 }, { "epoch": 22.52, "learning_rate": 0.001, "loss": 2.5773, "step": 117264 }, { "epoch": 22.52, "learning_rate": 0.001, "loss": 2.5811, "step": 117276 }, { "epoch": 22.52, "learning_rate": 0.001, "loss": 2.5717, "step": 117288 }, { "epoch": 22.52, "learning_rate": 0.001, "loss": 2.5746, "step": 117300 }, { "epoch": 22.53, "learning_rate": 0.001, "loss": 2.5784, "step": 117312 }, { "epoch": 22.53, "learning_rate": 0.001, "loss": 2.5681, "step": 117324 }, { "epoch": 22.53, "learning_rate": 0.001, "loss": 2.577, "step": 117336 }, { "epoch": 22.53, "learning_rate": 0.001, "loss": 2.5697, "step": 117348 }, { "epoch": 22.53, "learning_rate": 0.001, "loss": 2.5727, "step": 117360 }, { "epoch": 22.54, "learning_rate": 0.001, "loss": 2.5821, "step": 117372 }, { "epoch": 22.54, "learning_rate": 0.001, "loss": 2.5609, "step": 117384 }, { "epoch": 22.54, "learning_rate": 0.001, "loss": 2.5816, "step": 117396 }, { "epoch": 22.54, "learning_rate": 0.001, "loss": 2.5865, "step": 117408 }, { "epoch": 22.55, "learning_rate": 0.001, "loss": 2.5806, "step": 117420 }, { "epoch": 22.55, "learning_rate": 0.001, "loss": 2.5813, "step": 117432 }, { "epoch": 22.55, "learning_rate": 0.001, "loss": 2.5662, "step": 117444 }, { "epoch": 22.55, "learning_rate": 0.001, "loss": 2.5839, "step": 117456 }, { "epoch": 22.56, "learning_rate": 0.001, "loss": 2.5809, "step": 117468 }, { "epoch": 22.56, "learning_rate": 0.001, "loss": 2.5772, "step": 117480 }, { "epoch": 22.56, "learning_rate": 0.001, "loss": 2.5753, "step": 117492 }, { "epoch": 22.56, "eval_ag_news_accuracy": 0.3210625, "eval_ag_news_bleu_score": 4.751313847520546, "eval_ag_news_bleu_score_sem": 0.15357630148775284, "eval_ag_news_emb_cos_sim": 0.8032387495040894, "eval_ag_news_emb_cos_sim_sem": 0.007929164977150944, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5686678886413574, "eval_ag_news_n_ngrams_match_1": 14.074, "eval_ag_news_n_ngrams_match_2": 3.066, "eval_ag_news_n_ngrams_match_3": 0.866, "eval_ag_news_num_pred_words": 46.748, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.469312592947226, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3459832340936717, "eval_ag_news_runtime": 10.5136, "eval_ag_news_samples_per_second": 47.557, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3496529264097084, "eval_ag_news_token_set_f1_sem": 0.004607486911821557, "eval_ag_news_token_set_precision": 0.33522189722840523, "eval_ag_news_token_set_recall": 0.37955676522116066, "eval_ag_news_true_num_tokens": 56.09375, "step": 117500 }, { "epoch": 22.56, "eval_anthropic_toxic_prompts_accuracy": 0.111875, "eval_anthropic_toxic_prompts_bleu_score": 3.0575816694187474, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11457866858044319, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.656312108039856, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009500901300552155, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2598910331726074, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.138, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.706, "eval_anthropic_toxic_prompts_num_pred_words": 47.722, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.04669876174836, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21074573188862467, "eval_anthropic_toxic_prompts_runtime": 9.8163, "eval_anthropic_toxic_prompts_samples_per_second": 50.936, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3523485744391414, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006537204923483806, "eval_anthropic_toxic_prompts_token_set_precision": 0.43178132247169376, "eval_anthropic_toxic_prompts_token_set_recall": 0.324154576788585, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 117500 }, { "epoch": 22.56, "eval_arxiv_accuracy": 0.34596875, "eval_arxiv_bleu_score": 4.404601112288306, "eval_arxiv_bleu_score_sem": 0.1209762459064129, "eval_arxiv_emb_cos_sim": 0.757699728012085, "eval_arxiv_emb_cos_sim_sem": 0.007478353065855309, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4282429218292236, "eval_arxiv_n_ngrams_match_1": 15.358, "eval_arxiv_n_ngrams_match_2": 3.054, "eval_arxiv_n_ngrams_match_3": 0.682, "eval_arxiv_num_pred_words": 40.7, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.822437709906495, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36524552461244275, "eval_arxiv_runtime": 12.4637, "eval_arxiv_samples_per_second": 40.116, "eval_arxiv_steps_per_second": 0.08, "eval_arxiv_token_set_f1": 0.3607402806172713, "eval_arxiv_token_set_f1_sem": 0.00425810251188602, "eval_arxiv_token_set_precision": 0.3110062180736336, "eval_arxiv_token_set_recall": 0.4480569332203351, "eval_arxiv_true_num_tokens": 64.0, "step": 117500 }, { "epoch": 22.56, "eval_python_code_alpaca_accuracy": 0.15896875, "eval_python_code_alpaca_bleu_score": 4.48379496275208, "eval_python_code_alpaca_bleu_score_sem": 0.1473345045875255, "eval_python_code_alpaca_emb_cos_sim": 0.7411110401153564, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010823657750553294, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8746705055236816, "eval_python_code_alpaca_n_ngrams_match_1": 9.514, "eval_python_code_alpaca_n_ngrams_match_2": 2.762, "eval_python_code_alpaca_n_ngrams_match_3": 0.904, "eval_python_code_alpaca_num_pred_words": 42.514, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.719584654212152, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32422284766209397, "eval_python_code_alpaca_runtime": 10.0745, "eval_python_code_alpaca_samples_per_second": 49.63, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.46409111519362245, "eval_python_code_alpaca_token_set_f1_sem": 0.005765989168419287, "eval_python_code_alpaca_token_set_precision": 0.5161061677503002, "eval_python_code_alpaca_token_set_recall": 0.4461122420485951, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 117500 }, { "epoch": 22.56, "eval_wikibio_accuracy": 0.322625, "eval_wikibio_bleu_score": 5.890929405674165, "eval_wikibio_bleu_score_sem": 0.21081142732845903, "eval_wikibio_emb_cos_sim": 0.7339615821838379, "eval_wikibio_emb_cos_sim_sem": 0.010007291075598992, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7456119060516357, "eval_wikibio_n_ngrams_match_1": 9.922, "eval_wikibio_n_ngrams_match_2": 3.346, "eval_wikibio_n_ngrams_match_3": 1.226, "eval_wikibio_num_pred_words": 35.99, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.334904278869544, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34904511274310346, "eval_wikibio_runtime": 10.1967, "eval_wikibio_samples_per_second": 49.036, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.31552256679354507, "eval_wikibio_token_set_f1_sem": 0.005655064206334394, "eval_wikibio_token_set_precision": 0.32342106186795944, "eval_wikibio_token_set_recall": 0.3260212041482342, "eval_wikibio_true_num_tokens": 61.1328125, "step": 117500 }, { "epoch": 22.56, "eval_nq_accuracy": 0.52715625, "eval_nq_bleu_score": 11.389046873894975, "eval_nq_bleu_score_sem": 0.47600020163262186, "eval_nq_emb_cos_sim": 0.8283698558807373, "eval_nq_emb_cos_sim_sem": 0.007455267788127975, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1999282836914062, "eval_nq_n_ngrams_match_1": 22.87, "eval_nq_n_ngrams_match_2": 8.262, "eval_nq_n_ngrams_match_3": 3.766, "eval_nq_num_pred_words": 48.834, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.024366281989233, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44442499810861613, "eval_nq_runtime": 10.5316, "eval_nq_samples_per_second": 47.476, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4579732896661942, "eval_nq_token_set_f1_sem": 0.005053132762283063, "eval_nq_token_set_precision": 0.41472750289136123, "eval_nq_token_set_recall": 0.5204610951023767, "eval_nq_true_num_tokens": 64.0, "step": 117500 }, { "epoch": 22.56, "learning_rate": 0.001, "loss": 2.5692, "step": 117504 }, { "epoch": 22.56, "learning_rate": 0.001, "loss": 2.5689, "step": 117516 }, { "epoch": 22.57, "learning_rate": 0.001, "loss": 2.5768, "step": 117528 }, { "epoch": 22.57, "learning_rate": 0.001, "loss": 2.5948, "step": 117540 }, { "epoch": 22.57, "learning_rate": 0.001, "loss": 2.5832, "step": 117552 }, { "epoch": 22.57, "learning_rate": 0.001, "loss": 2.5831, "step": 117564 }, { "epoch": 22.58, "learning_rate": 0.001, "loss": 2.5871, "step": 117576 }, { "epoch": 22.58, "learning_rate": 0.001, "loss": 2.5762, "step": 117588 }, { "epoch": 22.58, "learning_rate": 0.001, "loss": 2.5775, "step": 117600 }, { "epoch": 22.58, "learning_rate": 0.001, "loss": 2.595, "step": 117612 }, { "epoch": 22.59, "learning_rate": 0.001, "loss": 2.5849, "step": 117624 }, { "epoch": 22.59, "learning_rate": 0.001, "loss": 2.5846, "step": 117636 }, { "epoch": 22.59, "learning_rate": 0.001, "loss": 2.5769, "step": 117648 }, { "epoch": 22.59, "learning_rate": 0.001, "loss": 2.5681, "step": 117660 }, { "epoch": 22.59, "learning_rate": 0.001, "loss": 2.5813, "step": 117672 }, { "epoch": 22.6, "learning_rate": 0.001, "loss": 2.5852, "step": 117684 }, { "epoch": 22.6, "learning_rate": 0.001, "loss": 2.5845, "step": 117696 }, { "epoch": 22.6, "learning_rate": 0.001, "loss": 2.5787, "step": 117708 }, { "epoch": 22.6, "learning_rate": 0.001, "loss": 2.5801, "step": 117720 }, { "epoch": 22.61, "learning_rate": 0.001, "loss": 2.5731, "step": 117732 }, { "epoch": 22.61, "learning_rate": 0.001, "loss": 2.5777, "step": 117744 }, { "epoch": 22.61, "learning_rate": 0.001, "loss": 2.5701, "step": 117756 }, { "epoch": 22.61, "learning_rate": 0.001, "loss": 2.5764, "step": 117768 }, { "epoch": 22.62, "learning_rate": 0.001, "loss": 2.5819, "step": 117780 }, { "epoch": 22.62, "learning_rate": 0.001, "loss": 2.5736, "step": 117792 }, { "epoch": 22.62, "learning_rate": 0.001, "loss": 2.5746, "step": 117804 }, { "epoch": 22.62, "learning_rate": 0.001, "loss": 2.5852, "step": 117816 }, { "epoch": 22.62, "learning_rate": 0.001, "loss": 2.582, "step": 117828 }, { "epoch": 22.63, "learning_rate": 0.001, "loss": 2.5686, "step": 117840 }, { "epoch": 22.63, "learning_rate": 0.001, "loss": 2.5661, "step": 117852 }, { "epoch": 22.63, "learning_rate": 0.001, "loss": 2.5786, "step": 117864 }, { "epoch": 22.63, "learning_rate": 0.001, "loss": 2.5764, "step": 117876 }, { "epoch": 22.64, "learning_rate": 0.001, "loss": 2.5814, "step": 117888 }, { "epoch": 22.64, "learning_rate": 0.001, "loss": 2.5788, "step": 117900 }, { "epoch": 22.64, "learning_rate": 0.001, "loss": 2.5834, "step": 117912 }, { "epoch": 22.64, "learning_rate": 0.001, "loss": 2.5875, "step": 117924 }, { "epoch": 22.65, "learning_rate": 0.001, "loss": 2.5765, "step": 117936 }, { "epoch": 22.65, "learning_rate": 0.001, "loss": 2.5888, "step": 117948 }, { "epoch": 22.65, "learning_rate": 0.001, "loss": 2.5736, "step": 117960 }, { "epoch": 22.65, "learning_rate": 0.001, "loss": 2.5772, "step": 117972 }, { "epoch": 22.65, "learning_rate": 0.001, "loss": 2.5726, "step": 117984 }, { "epoch": 22.66, "learning_rate": 0.001, "loss": 2.5873, "step": 117996 }, { "epoch": 22.66, "learning_rate": 0.001, "loss": 2.5891, "step": 118008 }, { "epoch": 22.66, "learning_rate": 0.001, "loss": 2.5699, "step": 118020 }, { "epoch": 22.66, "learning_rate": 0.001, "loss": 2.5778, "step": 118032 }, { "epoch": 22.67, "learning_rate": 0.001, "loss": 2.5731, "step": 118044 }, { "epoch": 22.67, "learning_rate": 0.001, "loss": 2.5779, "step": 118056 }, { "epoch": 22.67, "learning_rate": 0.001, "loss": 2.5681, "step": 118068 }, { "epoch": 22.67, "learning_rate": 0.001, "loss": 2.581, "step": 118080 }, { "epoch": 22.68, "learning_rate": 0.001, "loss": 2.5785, "step": 118092 }, { "epoch": 22.68, "learning_rate": 0.001, "loss": 2.5797, "step": 118104 }, { "epoch": 22.68, "learning_rate": 0.001, "loss": 2.5667, "step": 118116 }, { "epoch": 22.68, "eval_ag_news_accuracy": 0.3206875, "eval_ag_news_bleu_score": 4.840546160324322, "eval_ag_news_bleu_score_sem": 0.15717995941611823, "eval_ag_news_emb_cos_sim": 0.8073430061340332, "eval_ag_news_emb_cos_sim_sem": 0.007231543755153546, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.572967767715454, "eval_ag_news_n_ngrams_match_1": 13.846, "eval_ag_news_n_ngrams_match_2": 3.076, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.692, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.622154713765795, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3436576714142795, "eval_ag_news_runtime": 11.5141, "eval_ag_news_samples_per_second": 43.425, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.34528038605404987, "eval_ag_news_token_set_f1_sem": 0.004380625888641731, "eval_ag_news_token_set_precision": 0.32926774330513486, "eval_ag_news_token_set_recall": 0.37960968660890493, "eval_ag_news_true_num_tokens": 56.09375, "step": 118125 }, { "epoch": 22.68, "eval_anthropic_toxic_prompts_accuracy": 0.11371875, "eval_anthropic_toxic_prompts_bleu_score": 3.1456554674371224, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1219128571149683, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6706522107124329, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009408666873853113, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2667617797851562, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.28, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.956, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, "eval_anthropic_toxic_prompts_num_pred_words": 47.594, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.226275234815024, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2149039943129452, "eval_anthropic_toxic_prompts_runtime": 9.9279, "eval_anthropic_toxic_prompts_samples_per_second": 50.363, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.35698278637877773, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006647900886517928, "eval_anthropic_toxic_prompts_token_set_precision": 0.4450859525827544, "eval_anthropic_toxic_prompts_token_set_recall": 0.3228706847834766, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 118125 }, { "epoch": 22.68, "eval_arxiv_accuracy": 0.3433125, "eval_arxiv_bleu_score": 4.323463857586938, "eval_arxiv_bleu_score_sem": 0.12050510206508024, "eval_arxiv_emb_cos_sim": 0.7659394145011902, "eval_arxiv_emb_cos_sim_sem": 0.006534922862263774, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.44219708442688, "eval_arxiv_n_ngrams_match_1": 14.92, "eval_arxiv_n_ngrams_match_2": 2.932, "eval_arxiv_n_ngrams_match_3": 0.666, "eval_arxiv_num_pred_words": 39.906, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.25555387610984, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3572795954756781, "eval_arxiv_runtime": 10.3216, "eval_arxiv_samples_per_second": 48.442, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.34989747195975485, "eval_arxiv_token_set_f1_sem": 0.004190585429222164, "eval_arxiv_token_set_precision": 0.30072492311625165, "eval_arxiv_token_set_recall": 0.4362196950083837, "eval_arxiv_true_num_tokens": 64.0, "step": 118125 }, { "epoch": 22.68, "eval_python_code_alpaca_accuracy": 0.15675, "eval_python_code_alpaca_bleu_score": 4.395425837996771, "eval_python_code_alpaca_bleu_score_sem": 0.14135301310053985, "eval_python_code_alpaca_emb_cos_sim": 0.7417300343513489, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009967324667954029, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8914570808410645, "eval_python_code_alpaca_n_ngrams_match_1": 9.434, "eval_python_code_alpaca_n_ngrams_match_2": 2.678, "eval_python_code_alpaca_n_ngrams_match_3": 0.878, "eval_python_code_alpaca_num_pred_words": 42.928, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.01954641817758, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32670841440180876, "eval_python_code_alpaca_runtime": 10.7363, "eval_python_code_alpaca_samples_per_second": 46.571, "eval_python_code_alpaca_steps_per_second": 0.093, "eval_python_code_alpaca_token_set_f1": 0.46292960382541193, "eval_python_code_alpaca_token_set_f1_sem": 0.005693270406237749, "eval_python_code_alpaca_token_set_precision": 0.5117938139124939, "eval_python_code_alpaca_token_set_recall": 0.44617442796793894, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 118125 }, { "epoch": 22.68, "eval_wikibio_accuracy": 0.31978125, "eval_wikibio_bleu_score": 5.703654430766345, "eval_wikibio_bleu_score_sem": 0.2065429911401896, "eval_wikibio_emb_cos_sim": 0.7414698600769043, "eval_wikibio_emb_cos_sim_sem": 0.008842983726941036, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.762153387069702, "eval_wikibio_n_ngrams_match_1": 10.058, "eval_wikibio_n_ngrams_match_2": 3.268, "eval_wikibio_n_ngrams_match_3": 1.158, "eval_wikibio_num_pred_words": 36.9, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.04101021299951, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3478530142638878, "eval_wikibio_runtime": 10.0028, "eval_wikibio_samples_per_second": 49.986, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3132170005221746, "eval_wikibio_token_set_f1_sem": 0.005376814575488786, "eval_wikibio_token_set_precision": 0.32415168566383196, "eval_wikibio_token_set_recall": 0.3170845095153825, "eval_wikibio_true_num_tokens": 61.1328125, "step": 118125 }, { "epoch": 22.68, "eval_nq_accuracy": 0.52753125, "eval_nq_bleu_score": 11.697604780616196, "eval_nq_bleu_score_sem": 0.4794663338808472, "eval_nq_emb_cos_sim": 0.8264201879501343, "eval_nq_emb_cos_sim_sem": 0.006818167464958697, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.19657564163208, "eval_nq_n_ngrams_match_1": 23.102, "eval_nq_n_ngrams_match_2": 8.454, "eval_nq_n_ngrams_match_3": 3.878, "eval_nq_num_pred_words": 49.11, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.994161473281862, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45084280812714295, "eval_nq_runtime": 10.4674, "eval_nq_samples_per_second": 47.767, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46362520961188725, "eval_nq_token_set_f1_sem": 0.004846504152292297, "eval_nq_token_set_precision": 0.42061919822566746, "eval_nq_token_set_recall": 0.5258279988193315, "eval_nq_true_num_tokens": 64.0, "step": 118125 }, { "epoch": 22.68, "learning_rate": 0.001, "loss": 2.5705, "step": 118128 }, { "epoch": 22.68, "learning_rate": 0.001, "loss": 2.5819, "step": 118140 }, { "epoch": 22.69, "learning_rate": 0.001, "loss": 2.576, "step": 118152 }, { "epoch": 22.69, "learning_rate": 0.001, "loss": 2.5664, "step": 118164 }, { "epoch": 22.69, "learning_rate": 0.001, "loss": 2.5682, "step": 118176 }, { "epoch": 22.69, "learning_rate": 0.001, "loss": 2.5805, "step": 118188 }, { "epoch": 22.7, "learning_rate": 0.001, "loss": 2.5669, "step": 118200 }, { "epoch": 22.7, "learning_rate": 0.001, "loss": 2.5746, "step": 118212 }, { "epoch": 22.7, "learning_rate": 0.001, "loss": 2.5832, "step": 118224 }, { "epoch": 22.7, "learning_rate": 0.001, "loss": 2.5787, "step": 118236 }, { "epoch": 22.71, "learning_rate": 0.001, "loss": 2.5704, "step": 118248 }, { "epoch": 22.71, "learning_rate": 0.001, "loss": 2.5865, "step": 118260 }, { "epoch": 22.71, "learning_rate": 0.001, "loss": 2.5731, "step": 118272 }, { "epoch": 22.71, "learning_rate": 0.001, "loss": 2.5729, "step": 118284 }, { "epoch": 22.71, "learning_rate": 0.001, "loss": 2.5694, "step": 118296 }, { "epoch": 22.72, "learning_rate": 0.001, "loss": 2.5623, "step": 118308 }, { "epoch": 22.72, "learning_rate": 0.001, "loss": 2.5731, "step": 118320 }, { "epoch": 22.72, "learning_rate": 0.001, "loss": 2.5768, "step": 118332 }, { "epoch": 22.72, "learning_rate": 0.001, "loss": 2.582, "step": 118344 }, { "epoch": 22.73, "learning_rate": 0.001, "loss": 2.5817, "step": 118356 }, { "epoch": 22.73, "learning_rate": 0.001, "loss": 2.5828, "step": 118368 }, { "epoch": 22.73, "learning_rate": 0.001, "loss": 2.5793, "step": 118380 }, { "epoch": 22.73, "learning_rate": 0.001, "loss": 2.5846, "step": 118392 }, { "epoch": 22.74, "learning_rate": 0.001, "loss": 2.5848, "step": 118404 }, { "epoch": 22.74, "learning_rate": 0.001, "loss": 2.5921, "step": 118416 }, { "epoch": 22.74, "learning_rate": 0.001, "loss": 2.5953, "step": 118428 }, { "epoch": 22.74, "learning_rate": 0.001, "loss": 2.5871, "step": 118440 }, { "epoch": 22.74, "learning_rate": 0.001, "loss": 2.5764, "step": 118452 }, { "epoch": 22.75, "learning_rate": 0.001, "loss": 2.5792, "step": 118464 }, { "epoch": 22.75, "learning_rate": 0.001, "loss": 2.574, "step": 118476 }, { "epoch": 22.75, "learning_rate": 0.001, "loss": 2.5756, "step": 118488 }, { "epoch": 22.75, "learning_rate": 0.001, "loss": 2.5909, "step": 118500 }, { "epoch": 22.76, "learning_rate": 0.001, "loss": 2.5847, "step": 118512 }, { "epoch": 22.76, "learning_rate": 0.001, "loss": 2.5727, "step": 118524 }, { "epoch": 22.76, "learning_rate": 0.001, "loss": 2.5742, "step": 118536 }, { "epoch": 22.76, "learning_rate": 0.001, "loss": 2.5797, "step": 118548 }, { "epoch": 22.76, "learning_rate": 0.001, "loss": 2.5793, "step": 118560 }, { "epoch": 22.77, "learning_rate": 0.001, "loss": 2.5854, "step": 118572 }, { "epoch": 22.77, "learning_rate": 0.001, "loss": 2.5686, "step": 118584 }, { "epoch": 22.77, "learning_rate": 0.001, "loss": 2.5709, "step": 118596 }, { "epoch": 22.77, "learning_rate": 0.001, "loss": 2.5798, "step": 118608 }, { "epoch": 22.78, "learning_rate": 0.001, "loss": 2.5761, "step": 118620 }, { "epoch": 22.78, "learning_rate": 0.001, "loss": 2.5614, "step": 118632 }, { "epoch": 22.78, "learning_rate": 0.001, "loss": 2.578, "step": 118644 }, { "epoch": 22.78, "learning_rate": 0.001, "loss": 2.5774, "step": 118656 }, { "epoch": 22.79, "learning_rate": 0.001, "loss": 2.5811, "step": 118668 }, { "epoch": 22.79, "learning_rate": 0.001, "loss": 2.5817, "step": 118680 }, { "epoch": 22.79, "learning_rate": 0.001, "loss": 2.565, "step": 118692 }, { "epoch": 22.79, "learning_rate": 0.001, "loss": 2.5874, "step": 118704 }, { "epoch": 22.79, "learning_rate": 0.001, "loss": 2.5805, "step": 118716 }, { "epoch": 22.8, "learning_rate": 0.001, "loss": 2.5766, "step": 118728 }, { "epoch": 22.8, "learning_rate": 0.001, "loss": 2.5735, "step": 118740 }, { "epoch": 22.8, "eval_ag_news_accuracy": 0.32065625, "eval_ag_news_bleu_score": 4.728675119766207, "eval_ag_news_bleu_score_sem": 0.16108176959669468, "eval_ag_news_emb_cos_sim": 0.8163679838180542, "eval_ag_news_emb_cos_sim_sem": 0.006003713861800085, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.574566125869751, "eval_ag_news_n_ngrams_match_1": 13.892, "eval_ag_news_n_ngrams_match_2": 2.978, "eval_ag_news_n_ngrams_match_3": 0.864, "eval_ag_news_num_pred_words": 46.704, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.679137202307444, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34591344792147, "eval_ag_news_runtime": 10.4948, "eval_ag_news_samples_per_second": 47.643, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3456728662053108, "eval_ag_news_token_set_f1_sem": 0.004355684349898715, "eval_ag_news_token_set_precision": 0.3312698500081311, "eval_ag_news_token_set_recall": 0.37825539516319207, "eval_ag_news_true_num_tokens": 56.09375, "step": 118750 }, { "epoch": 22.8, "eval_anthropic_toxic_prompts_accuracy": 0.11421875, "eval_anthropic_toxic_prompts_bleu_score": 3.131417167315212, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12109858112678264, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6736284494400024, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008596035463348087, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2645761966705322, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.894, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, "eval_anthropic_toxic_prompts_num_pred_words": 47.016, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.16901812338232, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21344392776445575, "eval_anthropic_toxic_prompts_runtime": 10.1688, "eval_anthropic_toxic_prompts_samples_per_second": 49.17, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3597367554367178, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00646303489294724, "eval_anthropic_toxic_prompts_token_set_precision": 0.43975985437102044, "eval_anthropic_toxic_prompts_token_set_recall": 0.33009692371145005, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 118750 }, { "epoch": 22.8, "eval_arxiv_accuracy": 0.345625, "eval_arxiv_bleu_score": 4.224076713499098, "eval_arxiv_bleu_score_sem": 0.11849153849914, "eval_arxiv_emb_cos_sim": 0.7639448642730713, "eval_arxiv_emb_cos_sim_sem": 0.0067609057029005834, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4284024238586426, "eval_arxiv_n_ngrams_match_1": 14.902, "eval_arxiv_n_ngrams_match_2": 2.866, "eval_arxiv_n_ngrams_match_3": 0.624, "eval_arxiv_num_pred_words": 40.04, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.82735434336895, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35825075103107074, "eval_arxiv_runtime": 10.2552, "eval_arxiv_samples_per_second": 48.756, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.34873992789830865, "eval_arxiv_token_set_f1_sem": 0.004124133935509216, "eval_arxiv_token_set_precision": 0.30042025713647386, "eval_arxiv_token_set_recall": 0.43556948019261876, "eval_arxiv_true_num_tokens": 64.0, "step": 118750 }, { "epoch": 22.8, "eval_python_code_alpaca_accuracy": 0.15934375, "eval_python_code_alpaca_bleu_score": 4.528718327545159, "eval_python_code_alpaca_bleu_score_sem": 0.14783233518395464, "eval_python_code_alpaca_emb_cos_sim": 0.7302837371826172, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010019767859847232, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.881324529647827, "eval_python_code_alpaca_n_ngrams_match_1": 9.378, "eval_python_code_alpaca_n_ngrams_match_2": 2.678, "eval_python_code_alpaca_n_ngrams_match_3": 0.828, "eval_python_code_alpaca_num_pred_words": 40.482, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.83788434598612, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3297159932945782, "eval_python_code_alpaca_runtime": 9.7742, "eval_python_code_alpaca_samples_per_second": 51.155, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4643577157387954, "eval_python_code_alpaca_token_set_f1_sem": 0.005818559465057156, "eval_python_code_alpaca_token_set_precision": 0.5103981416177292, "eval_python_code_alpaca_token_set_recall": 0.4486524542190428, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 118750 }, { "epoch": 22.8, "eval_wikibio_accuracy": 0.32240625, "eval_wikibio_bleu_score": 6.114362821110541, "eval_wikibio_bleu_score_sem": 0.21814553975399284, "eval_wikibio_emb_cos_sim": 0.744574785232544, "eval_wikibio_emb_cos_sim_sem": 0.010106713515602666, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.728107213973999, "eval_wikibio_n_ngrams_match_1": 10.178, "eval_wikibio_n_ngrams_match_2": 3.412, "eval_wikibio_n_ngrams_match_3": 1.284, "eval_wikibio_num_pred_words": 36.718, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.60029314403496, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3616905131558259, "eval_wikibio_runtime": 10.0604, "eval_wikibio_samples_per_second": 49.7, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.32173470242274055, "eval_wikibio_token_set_f1_sem": 0.005348885462502014, "eval_wikibio_token_set_precision": 0.3317864882124454, "eval_wikibio_token_set_recall": 0.3267995582635393, "eval_wikibio_true_num_tokens": 61.1328125, "step": 118750 }, { "epoch": 22.8, "eval_nq_accuracy": 0.526625, "eval_nq_bleu_score": 11.24020788009808, "eval_nq_bleu_score_sem": 0.45684858157587976, "eval_nq_emb_cos_sim": 0.8305935263633728, "eval_nq_emb_cos_sim_sem": 0.006763945544349432, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2016873359680176, "eval_nq_n_ngrams_match_1": 22.814, "eval_nq_n_ngrams_match_2": 8.208, "eval_nq_n_ngrams_match_3": 3.658, "eval_nq_num_pred_words": 48.924, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.040254584122676, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4419139402057335, "eval_nq_runtime": 11.0143, "eval_nq_samples_per_second": 45.396, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.45243742822093835, "eval_nq_token_set_f1_sem": 0.004836848923290419, "eval_nq_token_set_precision": 0.41248264634775594, "eval_nq_token_set_recall": 0.5081416178754598, "eval_nq_true_num_tokens": 64.0, "step": 118750 }, { "epoch": 22.8, "learning_rate": 0.001, "loss": 2.5823, "step": 118752 }, { "epoch": 22.8, "learning_rate": 0.001, "loss": 2.5781, "step": 118764 }, { "epoch": 22.81, "learning_rate": 0.001, "loss": 2.5796, "step": 118776 }, { "epoch": 22.81, "learning_rate": 0.001, "loss": 2.5777, "step": 118788 }, { "epoch": 22.81, "learning_rate": 0.001, "loss": 2.5743, "step": 118800 }, { "epoch": 22.81, "learning_rate": 0.001, "loss": 2.5755, "step": 118812 }, { "epoch": 22.82, "learning_rate": 0.001, "loss": 2.5896, "step": 118824 }, { "epoch": 22.82, "learning_rate": 0.001, "loss": 2.5788, "step": 118836 }, { "epoch": 22.82, "learning_rate": 0.001, "loss": 2.5772, "step": 118848 }, { "epoch": 22.82, "learning_rate": 0.001, "loss": 2.5739, "step": 118860 }, { "epoch": 22.82, "learning_rate": 0.001, "loss": 2.595, "step": 118872 }, { "epoch": 22.83, "learning_rate": 0.001, "loss": 2.5723, "step": 118884 }, { "epoch": 22.83, "learning_rate": 0.001, "loss": 2.5808, "step": 118896 }, { "epoch": 22.83, "learning_rate": 0.001, "loss": 2.5746, "step": 118908 }, { "epoch": 22.83, "learning_rate": 0.001, "loss": 2.587, "step": 118920 }, { "epoch": 22.84, "learning_rate": 0.001, "loss": 2.5827, "step": 118932 }, { "epoch": 22.84, "learning_rate": 0.001, "loss": 2.5806, "step": 118944 }, { "epoch": 22.84, "learning_rate": 0.001, "loss": 2.5825, "step": 118956 }, { "epoch": 22.84, "learning_rate": 0.001, "loss": 2.5759, "step": 118968 }, { "epoch": 22.85, "learning_rate": 0.001, "loss": 2.5761, "step": 118980 }, { "epoch": 22.85, "learning_rate": 0.001, "loss": 2.5824, "step": 118992 }, { "epoch": 22.85, "learning_rate": 0.001, "loss": 2.5812, "step": 119004 }, { "epoch": 22.85, "learning_rate": 0.001, "loss": 2.5834, "step": 119016 }, { "epoch": 22.85, "learning_rate": 0.001, "loss": 2.5884, "step": 119028 }, { "epoch": 22.86, "learning_rate": 0.001, "loss": 2.5742, "step": 119040 }, { "epoch": 22.86, "learning_rate": 0.001, "loss": 2.575, "step": 119052 }, { "epoch": 22.86, "learning_rate": 0.001, "loss": 2.5901, "step": 119064 }, { "epoch": 22.86, "learning_rate": 0.001, "loss": 2.5791, "step": 119076 }, { "epoch": 22.87, "learning_rate": 0.001, "loss": 2.581, "step": 119088 }, { "epoch": 22.87, "learning_rate": 0.001, "loss": 2.5897, "step": 119100 }, { "epoch": 22.87, "learning_rate": 0.001, "loss": 2.586, "step": 119112 }, { "epoch": 22.87, "learning_rate": 0.001, "loss": 2.575, "step": 119124 }, { "epoch": 22.88, "learning_rate": 0.001, "loss": 2.582, "step": 119136 }, { "epoch": 22.88, "learning_rate": 0.001, "loss": 2.5738, "step": 119148 }, { "epoch": 22.88, "learning_rate": 0.001, "loss": 2.5726, "step": 119160 }, { "epoch": 22.88, "learning_rate": 0.001, "loss": 2.5768, "step": 119172 }, { "epoch": 22.88, "learning_rate": 0.001, "loss": 2.5766, "step": 119184 }, { "epoch": 22.89, "learning_rate": 0.001, "loss": 2.5749, "step": 119196 }, { "epoch": 22.89, "learning_rate": 0.001, "loss": 2.5836, "step": 119208 }, { "epoch": 22.89, "learning_rate": 0.001, "loss": 2.575, "step": 119220 }, { "epoch": 22.89, "learning_rate": 0.001, "loss": 2.5735, "step": 119232 }, { "epoch": 22.9, "learning_rate": 0.001, "loss": 2.5829, "step": 119244 }, { "epoch": 22.9, "learning_rate": 0.001, "loss": 2.5734, "step": 119256 }, { "epoch": 22.9, "learning_rate": 0.001, "loss": 2.5789, "step": 119268 }, { "epoch": 22.9, "learning_rate": 0.001, "loss": 2.5763, "step": 119280 }, { "epoch": 22.91, "learning_rate": 0.001, "loss": 2.5862, "step": 119292 }, { "epoch": 22.91, "learning_rate": 0.001, "loss": 2.5742, "step": 119304 }, { "epoch": 22.91, "learning_rate": 0.001, "loss": 2.5789, "step": 119316 }, { "epoch": 22.91, "learning_rate": 0.001, "loss": 2.5865, "step": 119328 }, { "epoch": 22.91, "learning_rate": 0.001, "loss": 2.5805, "step": 119340 }, { "epoch": 22.92, "learning_rate": 0.001, "loss": 2.5752, "step": 119352 }, { "epoch": 22.92, "learning_rate": 0.001, "loss": 2.5752, "step": 119364 }, { "epoch": 22.92, "eval_ag_news_accuracy": 0.321375, "eval_ag_news_bleu_score": 4.771542609334364, "eval_ag_news_bleu_score_sem": 0.1470501125581788, "eval_ag_news_emb_cos_sim": 0.8067134022712708, "eval_ag_news_emb_cos_sim_sem": 0.0069122174988662495, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5727336406707764, "eval_ag_news_n_ngrams_match_1": 14.04, "eval_ag_news_n_ngrams_match_2": 3.122, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.576, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.61381558020405, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.350032673989199, "eval_ag_news_runtime": 10.4763, "eval_ag_news_samples_per_second": 47.727, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.35110158428638666, "eval_ag_news_token_set_f1_sem": 0.004342113085911917, "eval_ag_news_token_set_precision": 0.3353990444657584, "eval_ag_news_token_set_recall": 0.3850773854745541, "eval_ag_news_true_num_tokens": 56.09375, "step": 119375 }, { "epoch": 22.92, "eval_anthropic_toxic_prompts_accuracy": 0.113875, "eval_anthropic_toxic_prompts_bleu_score": 2.9140668966136736, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10368118999703983, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6709904670715332, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008316619638422576, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.250877618789673, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.148, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.86, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.64, "eval_anthropic_toxic_prompts_num_pred_words": 47.954, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.81298393905257, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21121607654629718, "eval_anthropic_toxic_prompts_runtime": 10.0005, "eval_anthropic_toxic_prompts_samples_per_second": 49.998, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3553615145707035, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006553119473347054, "eval_anthropic_toxic_prompts_token_set_precision": 0.4315260440881228, "eval_anthropic_toxic_prompts_token_set_recall": 0.33003881224782194, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 119375 }, { "epoch": 22.92, "eval_arxiv_accuracy": 0.346, "eval_arxiv_bleu_score": 4.169312491926533, "eval_arxiv_bleu_score_sem": 0.12186592289341049, "eval_arxiv_emb_cos_sim": 0.7528259754180908, "eval_arxiv_emb_cos_sim_sem": 0.008360341725396564, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4216580390930176, "eval_arxiv_n_ngrams_match_1": 14.818, "eval_arxiv_n_ngrams_match_2": 2.87, "eval_arxiv_n_ngrams_match_3": 0.6, "eval_arxiv_num_pred_words": 39.986, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.62014234853063, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3585396308505666, "eval_arxiv_runtime": 10.2628, "eval_arxiv_samples_per_second": 48.719, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.34880182526485354, "eval_arxiv_token_set_f1_sem": 0.004492715906354398, "eval_arxiv_token_set_precision": 0.30025934747677413, "eval_arxiv_token_set_recall": 0.4359924377589185, "eval_arxiv_true_num_tokens": 64.0, "step": 119375 }, { "epoch": 22.92, "eval_python_code_alpaca_accuracy": 0.1598125, "eval_python_code_alpaca_bleu_score": 4.579480170502158, "eval_python_code_alpaca_bleu_score_sem": 0.1406896219823957, "eval_python_code_alpaca_emb_cos_sim": 0.7514224648475647, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008423041089457092, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.883908748626709, "eval_python_code_alpaca_n_ngrams_match_1": 9.574, "eval_python_code_alpaca_n_ngrams_match_2": 2.806, "eval_python_code_alpaca_n_ngrams_match_3": 0.932, "eval_python_code_alpaca_num_pred_words": 42.76, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.88404095896687, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3308781466127945, "eval_python_code_alpaca_runtime": 10.1648, "eval_python_code_alpaca_samples_per_second": 49.189, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4759927518667449, "eval_python_code_alpaca_token_set_f1_sem": 0.005466086633814408, "eval_python_code_alpaca_token_set_precision": 0.5201817886890249, "eval_python_code_alpaca_token_set_recall": 0.4620434358846748, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 119375 }, { "epoch": 22.92, "eval_wikibio_accuracy": 0.32278125, "eval_wikibio_bleu_score": 5.836553740050067, "eval_wikibio_bleu_score_sem": 0.20921846966863697, "eval_wikibio_emb_cos_sim": 0.7484660148620605, "eval_wikibio_emb_cos_sim_sem": 0.009859827155193469, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7701869010925293, "eval_wikibio_n_ngrams_match_1": 10.068, "eval_wikibio_n_ngrams_match_2": 3.32, "eval_wikibio_n_ngrams_match_3": 1.218, "eval_wikibio_num_pred_words": 36.876, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.38817337508725, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3521909885771981, "eval_wikibio_runtime": 10.2816, "eval_wikibio_samples_per_second": 48.631, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3162605130662639, "eval_wikibio_token_set_f1_sem": 0.005442885509959944, "eval_wikibio_token_set_precision": 0.32671022151088314, "eval_wikibio_token_set_recall": 0.3207715336894786, "eval_wikibio_true_num_tokens": 61.1328125, "step": 119375 }, { "epoch": 22.92, "eval_nq_accuracy": 0.5265, "eval_nq_bleu_score": 11.763675240452047, "eval_nq_bleu_score_sem": 0.4788024395816035, "eval_nq_emb_cos_sim": 0.832360029220581, "eval_nq_emb_cos_sim_sem": 0.007437167033781562, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1988203525543213, "eval_nq_n_ngrams_match_1": 23.04, "eval_nq_n_ngrams_match_2": 8.436, "eval_nq_n_ngrams_match_3": 3.916, "eval_nq_num_pred_words": 49.28, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.014373442304269, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44567249350959226, "eval_nq_runtime": 10.8519, "eval_nq_samples_per_second": 46.075, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.4603255991639862, "eval_nq_token_set_f1_sem": 0.0047959763576717876, "eval_nq_token_set_precision": 0.41883655406372927, "eval_nq_token_set_recall": 0.5179694841072207, "eval_nq_true_num_tokens": 64.0, "step": 119375 }, { "epoch": 22.92, "learning_rate": 0.001, "loss": 2.5796, "step": 119376 }, { "epoch": 22.92, "learning_rate": 0.001, "loss": 2.5771, "step": 119388 }, { "epoch": 22.93, "learning_rate": 0.001, "loss": 2.5732, "step": 119400 }, { "epoch": 22.93, "learning_rate": 0.001, "loss": 2.5744, "step": 119412 }, { "epoch": 22.93, "learning_rate": 0.001, "loss": 2.572, "step": 119424 }, { "epoch": 22.93, "learning_rate": 0.001, "loss": 2.5666, "step": 119436 }, { "epoch": 22.94, "learning_rate": 0.001, "loss": 2.5765, "step": 119448 }, { "epoch": 22.94, "learning_rate": 0.001, "loss": 2.5787, "step": 119460 }, { "epoch": 22.94, "learning_rate": 0.001, "loss": 2.5852, "step": 119472 }, { "epoch": 22.94, "learning_rate": 0.001, "loss": 2.5824, "step": 119484 }, { "epoch": 22.94, "learning_rate": 0.001, "loss": 2.5933, "step": 119496 }, { "epoch": 22.95, "learning_rate": 0.001, "loss": 2.5745, "step": 119508 }, { "epoch": 22.95, "learning_rate": 0.001, "loss": 2.5734, "step": 119520 }, { "epoch": 22.95, "learning_rate": 0.001, "loss": 2.5817, "step": 119532 }, { "epoch": 22.95, "learning_rate": 0.001, "loss": 2.5809, "step": 119544 }, { "epoch": 22.96, "learning_rate": 0.001, "loss": 2.5706, "step": 119556 }, { "epoch": 22.96, "learning_rate": 0.001, "loss": 2.5852, "step": 119568 }, { "epoch": 22.96, "learning_rate": 0.001, "loss": 2.5814, "step": 119580 }, { "epoch": 22.96, "learning_rate": 0.001, "loss": 2.5785, "step": 119592 }, { "epoch": 22.97, "learning_rate": 0.001, "loss": 2.5801, "step": 119604 }, { "epoch": 22.97, "learning_rate": 0.001, "loss": 2.5882, "step": 119616 }, { "epoch": 22.97, "learning_rate": 0.001, "loss": 2.5963, "step": 119628 }, { "epoch": 22.97, "learning_rate": 0.001, "loss": 2.5927, "step": 119640 }, { "epoch": 22.97, "learning_rate": 0.001, "loss": 2.582, "step": 119652 }, { "epoch": 22.98, "learning_rate": 0.001, "loss": 2.5758, "step": 119664 }, { "epoch": 22.98, "learning_rate": 0.001, "loss": 2.5813, "step": 119676 }, { "epoch": 22.98, "learning_rate": 0.001, "loss": 2.5811, "step": 119688 }, { "epoch": 22.98, "learning_rate": 0.001, "loss": 2.5777, "step": 119700 }, { "epoch": 22.99, "learning_rate": 0.001, "loss": 2.5873, "step": 119712 }, { "epoch": 22.99, "learning_rate": 0.001, "loss": 2.5855, "step": 119724 }, { "epoch": 22.99, "learning_rate": 0.001, "loss": 2.5834, "step": 119736 }, { "epoch": 22.99, "learning_rate": 0.001, "loss": 2.5833, "step": 119748 }, { "epoch": 23.0, "learning_rate": 0.001, "loss": 2.5716, "step": 119760 }, { "epoch": 23.0, "learning_rate": 0.001, "loss": 2.5811, "step": 119772 }, { "epoch": 23.0, "learning_rate": 0.001, "loss": 2.5834, "step": 119784 }, { "epoch": 23.0, "learning_rate": 0.001, "loss": 2.5674, "step": 119796 }, { "epoch": 23.0, "learning_rate": 0.001, "loss": 2.5731, "step": 119808 }, { "epoch": 23.01, "learning_rate": 0.001, "loss": 2.5821, "step": 119820 }, { "epoch": 23.01, "learning_rate": 0.001, "loss": 2.5688, "step": 119832 }, { "epoch": 23.01, "learning_rate": 0.001, "loss": 2.5624, "step": 119844 }, { "epoch": 23.01, "learning_rate": 0.001, "loss": 2.561, "step": 119856 }, { "epoch": 23.02, "learning_rate": 0.001, "loss": 2.5653, "step": 119868 }, { "epoch": 23.02, "learning_rate": 0.001, "loss": 2.5612, "step": 119880 }, { "epoch": 23.02, "learning_rate": 0.001, "loss": 2.558, "step": 119892 }, { "epoch": 23.02, "learning_rate": 0.001, "loss": 2.5653, "step": 119904 }, { "epoch": 23.03, "learning_rate": 0.001, "loss": 2.5666, "step": 119916 }, { "epoch": 23.03, "learning_rate": 0.001, "loss": 2.5713, "step": 119928 }, { "epoch": 23.03, "learning_rate": 0.001, "loss": 2.5655, "step": 119940 }, { "epoch": 23.03, "learning_rate": 0.001, "loss": 2.565, "step": 119952 }, { "epoch": 23.03, "learning_rate": 0.001, "loss": 2.5557, "step": 119964 }, { "epoch": 23.04, "learning_rate": 0.001, "loss": 2.5703, "step": 119976 }, { "epoch": 23.04, "learning_rate": 0.001, "loss": 2.5661, "step": 119988 }, { "epoch": 23.04, "learning_rate": 0.001, "loss": 2.5767, "step": 120000 }, { "epoch": 23.04, "eval_ag_news_accuracy": 0.32053125, "eval_ag_news_bleu_score": 4.7507281973693924, "eval_ag_news_bleu_score_sem": 0.15242972130870316, "eval_ag_news_emb_cos_sim": 0.80669105052948, "eval_ag_news_emb_cos_sim_sem": 0.007559986243064847, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.565901517868042, "eval_ag_news_n_ngrams_match_1": 13.914, "eval_ag_news_n_ngrams_match_2": 3.038, "eval_ag_news_n_ngrams_match_3": 0.852, "eval_ag_news_num_pred_words": 46.496, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.371326918082865, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3452257342075885, "eval_ag_news_runtime": 10.4969, "eval_ag_news_samples_per_second": 47.633, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.348363302530887, "eval_ag_news_token_set_f1_sem": 0.004266929603596635, "eval_ag_news_token_set_precision": 0.3315074464636364, "eval_ag_news_token_set_recall": 0.3834548713420894, "eval_ag_news_true_num_tokens": 56.09375, "step": 120000 }, { "epoch": 23.04, "eval_anthropic_toxic_prompts_accuracy": 0.11303125, "eval_anthropic_toxic_prompts_bleu_score": 3.113714452848769, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1162551914196674, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6651376485824585, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009246945464849764, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2804033756256104, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.256, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.716, "eval_anthropic_toxic_prompts_num_pred_words": 47.78, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.586494881200764, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21111165987552732, "eval_anthropic_toxic_prompts_runtime": 10.5815, "eval_anthropic_toxic_prompts_samples_per_second": 47.252, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.35751448452746515, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00674997818020014, "eval_anthropic_toxic_prompts_token_set_precision": 0.43784531949414346, "eval_anthropic_toxic_prompts_token_set_recall": 0.3280085808062943, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 120000 }, { "epoch": 23.04, "eval_arxiv_accuracy": 0.344625, "eval_arxiv_bleu_score": 4.261845359047497, "eval_arxiv_bleu_score_sem": 0.12152648390963264, "eval_arxiv_emb_cos_sim": 0.7591594457626343, "eval_arxiv_emb_cos_sim_sem": 0.007763274322867557, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.435774564743042, "eval_arxiv_n_ngrams_match_1": 14.84, "eval_arxiv_n_ngrams_match_2": 2.964, "eval_arxiv_n_ngrams_match_3": 0.65, "eval_arxiv_num_pred_words": 40.468, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.05545771468835, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.352156605920514, "eval_arxiv_runtime": 10.3125, "eval_arxiv_samples_per_second": 48.485, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.34750629453071064, "eval_arxiv_token_set_f1_sem": 0.00433831106840747, "eval_arxiv_token_set_precision": 0.299259449773176, "eval_arxiv_token_set_recall": 0.4366267198668744, "eval_arxiv_true_num_tokens": 64.0, "step": 120000 }, { "epoch": 23.04, "eval_python_code_alpaca_accuracy": 0.159, "eval_python_code_alpaca_bleu_score": 4.6542100413563645, "eval_python_code_alpaca_bleu_score_sem": 0.14633888693296002, "eval_python_code_alpaca_emb_cos_sim": 0.7513779401779175, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009774954597549165, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.913936138153076, "eval_python_code_alpaca_n_ngrams_match_1": 9.72, "eval_python_code_alpaca_n_ngrams_match_2": 2.964, "eval_python_code_alpaca_n_ngrams_match_3": 1.012, "eval_python_code_alpaca_num_pred_words": 43.806, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.429195851610917, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.327765670838694, "eval_python_code_alpaca_runtime": 9.9441, "eval_python_code_alpaca_samples_per_second": 50.281, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.47418106912384855, "eval_python_code_alpaca_token_set_f1_sem": 0.005703871103965279, "eval_python_code_alpaca_token_set_precision": 0.5300462592300176, "eval_python_code_alpaca_token_set_recall": 0.4539878431310463, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 120000 }, { "epoch": 23.04, "eval_wikibio_accuracy": 0.31715625, "eval_wikibio_bleu_score": 5.991048425709009, "eval_wikibio_bleu_score_sem": 0.2059768237378832, "eval_wikibio_emb_cos_sim": 0.7435378432273865, "eval_wikibio_emb_cos_sim_sem": 0.008606466149810027, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.795231580734253, "eval_wikibio_n_ngrams_match_1": 10.554, "eval_wikibio_n_ngrams_match_2": 3.528, "eval_wikibio_n_ngrams_match_3": 1.27, "eval_wikibio_num_pred_words": 37.76, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.4885379009417, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3675820865797001, "eval_wikibio_runtime": 10.3052, "eval_wikibio_samples_per_second": 48.519, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3280854906650871, "eval_wikibio_token_set_f1_sem": 0.005002807537994431, "eval_wikibio_token_set_precision": 0.3409086432007072, "eval_wikibio_token_set_recall": 0.3285461415335112, "eval_wikibio_true_num_tokens": 61.1328125, "step": 120000 }, { "epoch": 23.04, "eval_nq_accuracy": 0.52459375, "eval_nq_bleu_score": 11.678230486295876, "eval_nq_bleu_score_sem": 0.4793581572895105, "eval_nq_emb_cos_sim": 0.8322374820709229, "eval_nq_emb_cos_sim_sem": 0.006763545149288578, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.202526569366455, "eval_nq_n_ngrams_match_1": 23.18, "eval_nq_n_ngrams_match_2": 8.478, "eval_nq_n_ngrams_match_3": 3.898, "eval_nq_num_pred_words": 49.23, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.04784465217387, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44818501565287217, "eval_nq_runtime": 10.6443, "eval_nq_samples_per_second": 46.974, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.46243034750089573, "eval_nq_token_set_f1_sem": 0.004945985124560686, "eval_nq_token_set_precision": 0.42029379092839253, "eval_nq_token_set_recall": 0.5201694917058951, "eval_nq_true_num_tokens": 64.0, "step": 120000 }, { "epoch": 23.04, "learning_rate": 0.001, "loss": 2.5671, "step": 120012 }, { "epoch": 23.05, "learning_rate": 0.001, "loss": 2.5756, "step": 120024 }, { "epoch": 23.05, "learning_rate": 0.001, "loss": 2.5749, "step": 120036 }, { "epoch": 23.05, "learning_rate": 0.001, "loss": 2.5689, "step": 120048 }, { "epoch": 23.05, "learning_rate": 0.001, "loss": 2.5666, "step": 120060 }, { "epoch": 23.06, "learning_rate": 0.001, "loss": 2.5685, "step": 120072 }, { "epoch": 23.06, "learning_rate": 0.001, "loss": 2.5659, "step": 120084 }, { "epoch": 23.06, "learning_rate": 0.001, "loss": 2.5561, "step": 120096 }, { "epoch": 23.06, "learning_rate": 0.001, "loss": 2.5586, "step": 120108 }, { "epoch": 23.06, "learning_rate": 0.001, "loss": 2.564, "step": 120120 }, { "epoch": 23.07, "learning_rate": 0.001, "loss": 2.5722, "step": 120132 }, { "epoch": 23.07, "learning_rate": 0.001, "loss": 2.5543, "step": 120144 }, { "epoch": 23.07, "learning_rate": 0.001, "loss": 2.5677, "step": 120156 }, { "epoch": 23.07, "learning_rate": 0.001, "loss": 2.5642, "step": 120168 }, { "epoch": 23.08, "learning_rate": 0.001, "loss": 2.5627, "step": 120180 }, { "epoch": 23.08, "learning_rate": 0.001, "loss": 2.5589, "step": 120192 }, { "epoch": 23.08, "learning_rate": 0.001, "loss": 2.5596, "step": 120204 }, { "epoch": 23.08, "learning_rate": 0.001, "loss": 2.5606, "step": 120216 }, { "epoch": 23.09, "learning_rate": 0.001, "loss": 2.5619, "step": 120228 }, { "epoch": 23.09, "learning_rate": 0.001, "loss": 2.5598, "step": 120240 }, { "epoch": 23.09, "learning_rate": 0.001, "loss": 2.5602, "step": 120252 }, { "epoch": 23.09, "learning_rate": 0.001, "loss": 2.5619, "step": 120264 }, { "epoch": 23.09, "learning_rate": 0.001, "loss": 2.565, "step": 120276 }, { "epoch": 23.1, "learning_rate": 0.001, "loss": 2.5646, "step": 120288 }, { "epoch": 23.1, "learning_rate": 0.001, "loss": 2.5607, "step": 120300 }, { "epoch": 23.1, "learning_rate": 0.001, "loss": 2.5649, "step": 120312 }, { "epoch": 23.1, "learning_rate": 0.001, "loss": 2.5681, "step": 120324 }, { "epoch": 23.11, "learning_rate": 0.001, "loss": 2.5767, "step": 120336 }, { "epoch": 23.11, "learning_rate": 0.001, "loss": 2.5597, "step": 120348 }, { "epoch": 23.11, "learning_rate": 0.001, "loss": 2.5638, "step": 120360 }, { "epoch": 23.11, "learning_rate": 0.001, "loss": 2.562, "step": 120372 }, { "epoch": 23.12, "learning_rate": 0.001, "loss": 2.5537, "step": 120384 }, { "epoch": 23.12, "learning_rate": 0.001, "loss": 2.5656, "step": 120396 }, { "epoch": 23.12, "learning_rate": 0.001, "loss": 2.5665, "step": 120408 }, { "epoch": 23.12, "learning_rate": 0.001, "loss": 2.5719, "step": 120420 }, { "epoch": 23.12, "learning_rate": 0.001, "loss": 2.5597, "step": 120432 }, { "epoch": 23.13, "learning_rate": 0.001, "loss": 2.5744, "step": 120444 }, { "epoch": 23.13, "learning_rate": 0.001, "loss": 2.5757, "step": 120456 }, { "epoch": 23.13, "learning_rate": 0.001, "loss": 2.5696, "step": 120468 }, { "epoch": 23.13, "learning_rate": 0.001, "loss": 2.5744, "step": 120480 }, { "epoch": 23.14, "learning_rate": 0.001, "loss": 2.5655, "step": 120492 }, { "epoch": 23.14, "learning_rate": 0.001, "loss": 2.5745, "step": 120504 }, { "epoch": 23.14, "learning_rate": 0.001, "loss": 2.5791, "step": 120516 }, { "epoch": 23.14, "learning_rate": 0.001, "loss": 2.5749, "step": 120528 }, { "epoch": 23.15, "learning_rate": 0.001, "loss": 2.5625, "step": 120540 }, { "epoch": 23.15, "learning_rate": 0.001, "loss": 2.5671, "step": 120552 }, { "epoch": 23.15, "learning_rate": 0.001, "loss": 2.5619, "step": 120564 }, { "epoch": 23.15, "learning_rate": 0.001, "loss": 2.5704, "step": 120576 }, { "epoch": 23.15, "learning_rate": 0.001, "loss": 2.5576, "step": 120588 }, { "epoch": 23.16, "learning_rate": 0.001, "loss": 2.5642, "step": 120600 }, { "epoch": 23.16, "learning_rate": 0.001, "loss": 2.5652, "step": 120612 }, { "epoch": 23.16, "learning_rate": 0.001, "loss": 2.5587, "step": 120624 }, { "epoch": 23.16, "eval_ag_news_accuracy": 0.32090625, "eval_ag_news_bleu_score": 4.875310752354433, "eval_ag_news_bleu_score_sem": 0.15870330089120316, "eval_ag_news_emb_cos_sim": 0.8108322620391846, "eval_ag_news_emb_cos_sim_sem": 0.006963277747292689, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.574097156524658, "eval_ag_news_n_ngrams_match_1": 13.874, "eval_ag_news_n_ngrams_match_2": 3.092, "eval_ag_news_n_ngrams_match_3": 0.908, "eval_ag_news_num_pred_words": 46.214, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.662408703583345, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3481154534190848, "eval_ag_news_runtime": 10.3417, "eval_ag_news_samples_per_second": 48.348, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3509548927974077, "eval_ag_news_token_set_f1_sem": 0.004224702097278793, "eval_ag_news_token_set_precision": 0.3324007832530405, "eval_ag_news_token_set_recall": 0.38889750745929447, "eval_ag_news_true_num_tokens": 56.09375, "step": 120625 }, { "epoch": 23.16, "eval_anthropic_toxic_prompts_accuracy": 0.1135625, "eval_anthropic_toxic_prompts_bleu_score": 3.1167111312432585, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12071792740159332, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6717588901519775, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00930365141534314, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2766811847686768, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.134, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.878, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714, "eval_anthropic_toxic_prompts_num_pred_words": 46.46, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.487718818757042, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2133445207598026, "eval_anthropic_toxic_prompts_runtime": 10.3808, "eval_anthropic_toxic_prompts_samples_per_second": 48.166, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.354542771347837, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006912398442934433, "eval_anthropic_toxic_prompts_token_set_precision": 0.4321732971310753, "eval_anthropic_toxic_prompts_token_set_recall": 0.3273676779963287, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 120625 }, { "epoch": 23.16, "eval_arxiv_accuracy": 0.3468125, "eval_arxiv_bleu_score": 4.2025908255961895, "eval_arxiv_bleu_score_sem": 0.12251858305805556, "eval_arxiv_emb_cos_sim": 0.7643671631813049, "eval_arxiv_emb_cos_sim_sem": 0.00761582159771867, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.42813777923584, "eval_arxiv_n_ngrams_match_1": 14.838, "eval_arxiv_n_ngrams_match_2": 2.868, "eval_arxiv_n_ngrams_match_3": 0.606, "eval_arxiv_num_pred_words": 39.864, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.81919712923578, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35313480072736525, "eval_arxiv_runtime": 10.2601, "eval_arxiv_samples_per_second": 48.732, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3484620285783303, "eval_arxiv_token_set_f1_sem": 0.004427070345154373, "eval_arxiv_token_set_precision": 0.2986761384584464, "eval_arxiv_token_set_recall": 0.4373637670702221, "eval_arxiv_true_num_tokens": 64.0, "step": 120625 }, { "epoch": 23.16, "eval_python_code_alpaca_accuracy": 0.16203125, "eval_python_code_alpaca_bleu_score": 4.638753400852363, "eval_python_code_alpaca_bleu_score_sem": 0.13829019574635984, "eval_python_code_alpaca_emb_cos_sim": 0.7511048316955566, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009468016222001531, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.892890214920044, "eval_python_code_alpaca_n_ngrams_match_1": 9.924, "eval_python_code_alpaca_n_ngrams_match_2": 2.908, "eval_python_code_alpaca_n_ngrams_match_3": 0.962, "eval_python_code_alpaca_num_pred_words": 43.19, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.045389358012933, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3369124215961945, "eval_python_code_alpaca_runtime": 9.7862, "eval_python_code_alpaca_samples_per_second": 51.092, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4799662341383265, "eval_python_code_alpaca_token_set_f1_sem": 0.0054365720163412545, "eval_python_code_alpaca_token_set_precision": 0.5410514231032056, "eval_python_code_alpaca_token_set_recall": 0.454211479875216, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 120625 }, { "epoch": 23.16, "eval_wikibio_accuracy": 0.32409375, "eval_wikibio_bleu_score": 5.569470526262075, "eval_wikibio_bleu_score_sem": 0.20195718563972753, "eval_wikibio_emb_cos_sim": 0.7310576438903809, "eval_wikibio_emb_cos_sim_sem": 0.009878558136634023, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7472598552703857, "eval_wikibio_n_ngrams_match_1": 9.926, "eval_wikibio_n_ngrams_match_2": 3.158, "eval_wikibio_n_ngrams_match_3": 1.088, "eval_wikibio_num_pred_words": 35.772, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.40472756811744, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3441917984004594, "eval_wikibio_runtime": 10.0326, "eval_wikibio_samples_per_second": 49.838, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3155579849330876, "eval_wikibio_token_set_f1_sem": 0.005554274563882959, "eval_wikibio_token_set_precision": 0.3203731255075236, "eval_wikibio_token_set_recall": 0.32894732292271944, "eval_wikibio_true_num_tokens": 61.1328125, "step": 120625 }, { "epoch": 23.16, "eval_nq_accuracy": 0.52390625, "eval_nq_bleu_score": 11.227191944524384, "eval_nq_bleu_score_sem": 0.47124730409144033, "eval_nq_emb_cos_sim": 0.8317947387695312, "eval_nq_emb_cos_sim_sem": 0.006556944590610716, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.2020928859710693, "eval_nq_n_ngrams_match_1": 22.656, "eval_nq_n_ngrams_match_2": 8.128, "eval_nq_n_ngrams_match_3": 3.706, "eval_nq_num_pred_words": 48.528, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 9.04392160292634, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44103413952042203, "eval_nq_runtime": 10.6069, "eval_nq_samples_per_second": 47.139, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.45492710419806515, "eval_nq_token_set_f1_sem": 0.00474131462453347, "eval_nq_token_set_precision": 0.41196937239271425, "eval_nq_token_set_recall": 0.5173019347620817, "eval_nq_true_num_tokens": 64.0, "step": 120625 }, { "epoch": 23.16, "learning_rate": 0.001, "loss": 2.5675, "step": 120636 }, { "epoch": 23.17, "learning_rate": 0.001, "loss": 2.5613, "step": 120648 }, { "epoch": 23.17, "learning_rate": 0.001, "loss": 2.5778, "step": 120660 }, { "epoch": 23.17, "learning_rate": 0.001, "loss": 2.576, "step": 120672 }, { "epoch": 23.17, "learning_rate": 0.001, "loss": 2.5641, "step": 120684 }, { "epoch": 23.18, "learning_rate": 0.001, "loss": 2.5651, "step": 120696 }, { "epoch": 23.18, "learning_rate": 0.001, "loss": 2.5758, "step": 120708 }, { "epoch": 23.18, "learning_rate": 0.001, "loss": 2.5608, "step": 120720 }, { "epoch": 23.18, "learning_rate": 0.001, "loss": 2.5698, "step": 120732 }, { "epoch": 23.18, "learning_rate": 0.001, "loss": 2.5683, "step": 120744 }, { "epoch": 23.19, "learning_rate": 0.001, "loss": 2.5695, "step": 120756 }, { "epoch": 23.19, "learning_rate": 0.001, "loss": 2.5659, "step": 120768 }, { "epoch": 23.19, "learning_rate": 0.001, "loss": 2.5724, "step": 120780 }, { "epoch": 23.19, "learning_rate": 0.001, "loss": 2.5586, "step": 120792 }, { "epoch": 23.2, "learning_rate": 0.001, "loss": 2.5641, "step": 120804 }, { "epoch": 23.2, "learning_rate": 0.001, "loss": 2.5628, "step": 120816 }, { "epoch": 23.2, "learning_rate": 0.001, "loss": 2.5653, "step": 120828 }, { "epoch": 23.2, "learning_rate": 0.001, "loss": 2.5511, "step": 120840 }, { "epoch": 23.21, "learning_rate": 0.001, "loss": 2.5661, "step": 120852 }, { "epoch": 23.21, "learning_rate": 0.001, "loss": 2.5727, "step": 120864 }, { "epoch": 23.21, "learning_rate": 0.001, "loss": 2.5671, "step": 120876 }, { "epoch": 23.21, "learning_rate": 0.001, "loss": 2.5669, "step": 120888 }, { "epoch": 23.21, "learning_rate": 0.001, "loss": 2.5628, "step": 120900 }, { "epoch": 23.22, "learning_rate": 0.001, "loss": 2.5627, "step": 120912 }, { "epoch": 23.22, "learning_rate": 0.001, "loss": 2.5655, "step": 120924 }, { "epoch": 23.22, "learning_rate": 0.001, "loss": 2.5648, "step": 120936 }, { "epoch": 23.22, "learning_rate": 0.001, "loss": 2.5616, "step": 120948 }, { "epoch": 23.23, "learning_rate": 0.001, "loss": 2.5678, "step": 120960 }, { "epoch": 23.23, "learning_rate": 0.001, "loss": 2.5638, "step": 120972 }, { "epoch": 23.23, "learning_rate": 0.001, "loss": 2.5736, "step": 120984 }, { "epoch": 23.23, "learning_rate": 0.001, "loss": 2.5688, "step": 120996 }, { "epoch": 23.24, "learning_rate": 0.001, "loss": 2.578, "step": 121008 }, { "epoch": 23.24, "learning_rate": 0.001, "loss": 2.5703, "step": 121020 }, { "epoch": 23.24, "learning_rate": 0.001, "loss": 2.5617, "step": 121032 }, { "epoch": 23.24, "learning_rate": 0.001, "loss": 2.5669, "step": 121044 }, { "epoch": 23.24, "learning_rate": 0.001, "loss": 2.5687, "step": 121056 }, { "epoch": 23.25, "learning_rate": 0.001, "loss": 2.573, "step": 121068 }, { "epoch": 23.25, "learning_rate": 0.001, "loss": 2.5774, "step": 121080 }, { "epoch": 23.25, "learning_rate": 0.001, "loss": 2.5608, "step": 121092 }, { "epoch": 23.25, "learning_rate": 0.001, "loss": 2.558, "step": 121104 }, { "epoch": 23.26, "learning_rate": 0.001, "loss": 2.5554, "step": 121116 }, { "epoch": 23.26, "learning_rate": 0.001, "loss": 2.559, "step": 121128 }, { "epoch": 23.26, "learning_rate": 0.001, "loss": 2.5653, "step": 121140 }, { "epoch": 23.26, "learning_rate": 0.001, "loss": 2.5792, "step": 121152 }, { "epoch": 23.26, "learning_rate": 0.001, "loss": 2.5722, "step": 121164 }, { "epoch": 23.27, "learning_rate": 0.001, "loss": 2.5624, "step": 121176 }, { "epoch": 23.27, "learning_rate": 0.001, "loss": 2.5718, "step": 121188 }, { "epoch": 23.27, "learning_rate": 0.001, "loss": 2.5699, "step": 121200 }, { "epoch": 23.27, "learning_rate": 0.001, "loss": 2.5543, "step": 121212 }, { "epoch": 23.28, "learning_rate": 0.001, "loss": 2.5606, "step": 121224 }, { "epoch": 23.28, "learning_rate": 0.001, "loss": 2.5585, "step": 121236 }, { "epoch": 23.28, "learning_rate": 0.001, "loss": 2.5577, "step": 121248 }, { "epoch": 23.28, "eval_ag_news_accuracy": 0.32103125, "eval_ag_news_bleu_score": 4.874165419045654, "eval_ag_news_bleu_score_sem": 0.16579289932605318, "eval_ag_news_emb_cos_sim": 0.8063075542449951, "eval_ag_news_emb_cos_sim_sem": 0.0073833902892596935, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5741777420043945, "eval_ag_news_n_ngrams_match_1": 13.988, "eval_ag_news_n_ngrams_match_2": 3.042, "eval_ag_news_n_ngrams_match_3": 0.898, "eval_ag_news_num_pred_words": 46.472, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.665282691696575, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3462235945244524, "eval_ag_news_runtime": 10.5762, "eval_ag_news_samples_per_second": 47.276, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3477193815814541, "eval_ag_news_token_set_f1_sem": 0.004257447670961566, "eval_ag_news_token_set_precision": 0.33329777494106055, "eval_ag_news_token_set_recall": 0.3792672712927153, "eval_ag_news_true_num_tokens": 56.09375, "step": 121250 }, { "epoch": 23.28, "eval_anthropic_toxic_prompts_accuracy": 0.1154375, "eval_anthropic_toxic_prompts_bleu_score": 3.080957011351216, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11632108037906756, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6728988885879517, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008611370330760974, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2483327388763428, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.112, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.866, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.66, "eval_anthropic_toxic_prompts_num_pred_words": 46.816, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.747376511643456, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21199609188909496, "eval_anthropic_toxic_prompts_runtime": 11.388, "eval_anthropic_toxic_prompts_samples_per_second": 43.906, "eval_anthropic_toxic_prompts_steps_per_second": 0.088, "eval_anthropic_toxic_prompts_token_set_f1": 0.3494584830314751, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006463487341986004, "eval_anthropic_toxic_prompts_token_set_precision": 0.4294979965929602, "eval_anthropic_toxic_prompts_token_set_recall": 0.32281078115399486, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 121250 }, { "epoch": 23.28, "eval_arxiv_accuracy": 0.34340625, "eval_arxiv_bleu_score": 4.359020347480952, "eval_arxiv_bleu_score_sem": 0.1150764754668123, "eval_arxiv_emb_cos_sim": 0.7614631056785583, "eval_arxiv_emb_cos_sim_sem": 0.007802540039156027, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.431478977203369, "eval_arxiv_n_ngrams_match_1": 15.258, "eval_arxiv_n_ngrams_match_2": 2.94, "eval_arxiv_n_ngrams_match_3": 0.7, "eval_arxiv_num_pred_words": 40.914, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.9223423864507, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.362370790399975, "eval_arxiv_runtime": 10.3257, "eval_arxiv_samples_per_second": 48.423, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3565735073656416, "eval_arxiv_token_set_f1_sem": 0.004100316361615648, "eval_arxiv_token_set_precision": 0.3087606879210232, "eval_arxiv_token_set_recall": 0.44003770890626887, "eval_arxiv_true_num_tokens": 64.0, "step": 121250 }, { "epoch": 23.28, "eval_python_code_alpaca_accuracy": 0.15809375, "eval_python_code_alpaca_bleu_score": 4.582500605666104, "eval_python_code_alpaca_bleu_score_sem": 0.1499009498163755, "eval_python_code_alpaca_emb_cos_sim": 0.7578281164169312, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008202336483051763, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.906625270843506, "eval_python_code_alpaca_n_ngrams_match_1": 9.756, "eval_python_code_alpaca_n_ngrams_match_2": 2.848, "eval_python_code_alpaca_n_ngrams_match_3": 0.948, "eval_python_code_alpaca_num_pred_words": 43.654, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.294953757110324, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3298523791182037, "eval_python_code_alpaca_runtime": 10.1033, "eval_python_code_alpaca_samples_per_second": 49.489, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.4767302242072028, "eval_python_code_alpaca_token_set_f1_sem": 0.005600595670791987, "eval_python_code_alpaca_token_set_precision": 0.5336928393342181, "eval_python_code_alpaca_token_set_recall": 0.4521975005902411, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 121250 }, { "epoch": 23.28, "eval_wikibio_accuracy": 0.320875, "eval_wikibio_bleu_score": 5.741545255088878, "eval_wikibio_bleu_score_sem": 0.2083526776120858, "eval_wikibio_emb_cos_sim": 0.7482487559318542, "eval_wikibio_emb_cos_sim_sem": 0.008251312438110439, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7810096740722656, "eval_wikibio_n_ngrams_match_1": 10.218, "eval_wikibio_n_ngrams_match_2": 3.324, "eval_wikibio_n_ngrams_match_3": 1.166, "eval_wikibio_num_pred_words": 37.406, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.860303998332995, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35141834315454623, "eval_wikibio_runtime": 10.3546, "eval_wikibio_samples_per_second": 48.288, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.321003867132244, "eval_wikibio_token_set_f1_sem": 0.005373908280963026, "eval_wikibio_token_set_precision": 0.3313451305113394, "eval_wikibio_token_set_recall": 0.324060558482982, "eval_wikibio_true_num_tokens": 61.1328125, "step": 121250 }, { "epoch": 23.28, "eval_nq_accuracy": 0.526125, "eval_nq_bleu_score": 11.430813609193399, "eval_nq_bleu_score_sem": 0.4650151959951594, "eval_nq_emb_cos_sim": 0.8315334320068359, "eval_nq_emb_cos_sim_sem": 0.006737074952790213, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.192634344100952, "eval_nq_n_ngrams_match_1": 23.026, "eval_nq_n_ngrams_match_2": 8.414, "eval_nq_n_ngrams_match_3": 3.802, "eval_nq_num_pred_words": 48.648, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.958782572058137, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44587342125133683, "eval_nq_runtime": 11.7181, "eval_nq_samples_per_second": 42.669, "eval_nq_steps_per_second": 0.085, "eval_nq_token_set_f1": 0.4608285823908761, "eval_nq_token_set_f1_sem": 0.004752347148498197, "eval_nq_token_set_precision": 0.4192158216426424, "eval_nq_token_set_recall": 0.5196408978945384, "eval_nq_true_num_tokens": 64.0, "step": 121250 }, { "epoch": 23.28, "learning_rate": 0.001, "loss": 2.5655, "step": 121260 }, { "epoch": 23.29, "learning_rate": 0.001, "loss": 2.5754, "step": 121272 }, { "epoch": 23.29, "learning_rate": 0.001, "loss": 2.5687, "step": 121284 }, { "epoch": 23.29, "learning_rate": 0.001, "loss": 2.5704, "step": 121296 }, { "epoch": 23.29, "learning_rate": 0.001, "loss": 2.5732, "step": 121308 }, { "epoch": 23.29, "learning_rate": 0.001, "loss": 2.5705, "step": 121320 }, { "epoch": 23.3, "learning_rate": 0.001, "loss": 2.5746, "step": 121332 }, { "epoch": 23.3, "learning_rate": 0.001, "loss": 2.5717, "step": 121344 }, { "epoch": 23.3, "learning_rate": 0.001, "loss": 2.5609, "step": 121356 }, { "epoch": 23.3, "learning_rate": 0.001, "loss": 2.5697, "step": 121368 }, { "epoch": 23.31, "learning_rate": 0.001, "loss": 2.572, "step": 121380 }, { "epoch": 23.31, "learning_rate": 0.001, "loss": 2.5605, "step": 121392 }, { "epoch": 23.31, "learning_rate": 0.001, "loss": 2.5694, "step": 121404 }, { "epoch": 23.31, "learning_rate": 0.001, "loss": 2.5755, "step": 121416 }, { "epoch": 23.32, "learning_rate": 0.001, "loss": 2.5704, "step": 121428 }, { "epoch": 23.32, "learning_rate": 0.001, "loss": 2.5567, "step": 121440 }, { "epoch": 23.32, "learning_rate": 0.001, "loss": 2.566, "step": 121452 }, { "epoch": 23.32, "learning_rate": 0.001, "loss": 2.5668, "step": 121464 }, { "epoch": 23.32, "learning_rate": 0.001, "loss": 2.5637, "step": 121476 }, { "epoch": 23.33, "learning_rate": 0.001, "loss": 2.5668, "step": 121488 }, { "epoch": 23.33, "learning_rate": 0.001, "loss": 2.5697, "step": 121500 }, { "epoch": 23.33, "learning_rate": 0.001, "loss": 2.5559, "step": 121512 }, { "epoch": 23.33, "learning_rate": 0.001, "loss": 2.5673, "step": 121524 }, { "epoch": 23.34, "learning_rate": 0.001, "loss": 2.5755, "step": 121536 }, { "epoch": 23.34, "learning_rate": 0.001, "loss": 2.5661, "step": 121548 }, { "epoch": 23.34, "learning_rate": 0.001, "loss": 2.5586, "step": 121560 }, { "epoch": 23.34, "learning_rate": 0.001, "loss": 2.5684, "step": 121572 }, { "epoch": 23.35, "learning_rate": 0.001, "loss": 2.5758, "step": 121584 }, { "epoch": 23.35, "learning_rate": 0.001, "loss": 2.5697, "step": 121596 }, { "epoch": 23.35, "learning_rate": 0.001, "loss": 2.5658, "step": 121608 }, { "epoch": 23.35, "learning_rate": 0.001, "loss": 2.5653, "step": 121620 }, { "epoch": 23.35, "learning_rate": 0.001, "loss": 2.5694, "step": 121632 }, { "epoch": 23.36, "learning_rate": 0.001, "loss": 2.5652, "step": 121644 }, { "epoch": 23.36, "learning_rate": 0.001, "loss": 2.5707, "step": 121656 }, { "epoch": 23.36, "learning_rate": 0.001, "loss": 2.564, "step": 121668 }, { "epoch": 23.36, "learning_rate": 0.001, "loss": 2.5677, "step": 121680 }, { "epoch": 23.37, "learning_rate": 0.001, "loss": 2.5742, "step": 121692 }, { "epoch": 23.37, "learning_rate": 0.001, "loss": 2.5502, "step": 121704 }, { "epoch": 23.37, "learning_rate": 0.001, "loss": 2.558, "step": 121716 }, { "epoch": 23.37, "learning_rate": 0.001, "loss": 2.5613, "step": 121728 }, { "epoch": 23.38, "learning_rate": 0.001, "loss": 2.5584, "step": 121740 }, { "epoch": 23.38, "learning_rate": 0.001, "loss": 2.5678, "step": 121752 }, { "epoch": 23.38, "learning_rate": 0.001, "loss": 2.5728, "step": 121764 }, { "epoch": 23.38, "learning_rate": 0.001, "loss": 2.5637, "step": 121776 }, { "epoch": 23.38, "learning_rate": 0.001, "loss": 2.5793, "step": 121788 }, { "epoch": 23.39, "learning_rate": 0.001, "loss": 2.5786, "step": 121800 }, { "epoch": 23.39, "learning_rate": 0.001, "loss": 2.5618, "step": 121812 }, { "epoch": 23.39, "learning_rate": 0.001, "loss": 2.5686, "step": 121824 }, { "epoch": 23.39, "learning_rate": 0.001, "loss": 2.5706, "step": 121836 }, { "epoch": 23.4, "learning_rate": 0.001, "loss": 2.5766, "step": 121848 }, { "epoch": 23.4, "learning_rate": 0.001, "loss": 2.5661, "step": 121860 }, { "epoch": 23.4, "learning_rate": 0.001, "loss": 2.558, "step": 121872 }, { "epoch": 23.4, "eval_ag_news_accuracy": 0.3189375, "eval_ag_news_bleu_score": 4.642633663633252, "eval_ag_news_bleu_score_sem": 0.14945202633130197, "eval_ag_news_emb_cos_sim": 0.8092349767684937, "eval_ag_news_emb_cos_sim_sem": 0.007305152567533314, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.57832670211792, "eval_ag_news_n_ngrams_match_1": 14.132, "eval_ag_news_n_ngrams_match_2": 3.068, "eval_ag_news_n_ngrams_match_3": 0.834, "eval_ag_news_num_pred_words": 47.356, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.813563920766775, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34903784453151987, "eval_ag_news_runtime": 10.6779, "eval_ag_news_samples_per_second": 46.826, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.35103969289557596, "eval_ag_news_token_set_f1_sem": 0.004404011331696912, "eval_ag_news_token_set_precision": 0.33822467424658614, "eval_ag_news_token_set_recall": 0.38167712725139696, "eval_ag_news_true_num_tokens": 56.09375, "step": 121875 }, { "epoch": 23.4, "eval_anthropic_toxic_prompts_accuracy": 0.11371875, "eval_anthropic_toxic_prompts_bleu_score": 3.1588372411166685, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12305615999424686, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.676432728767395, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009129692932292457, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.26751971244812, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.186, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.924, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722, "eval_anthropic_toxic_prompts_num_pred_words": 46.768, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.246160520345136, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21396210678798863, "eval_anthropic_toxic_prompts_runtime": 11.7323, "eval_anthropic_toxic_prompts_samples_per_second": 42.618, "eval_anthropic_toxic_prompts_steps_per_second": 0.085, "eval_anthropic_toxic_prompts_token_set_f1": 0.35061784063451074, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006703209074744799, "eval_anthropic_toxic_prompts_token_set_precision": 0.4321739548990694, "eval_anthropic_toxic_prompts_token_set_recall": 0.3203556620142261, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 121875 }, { "epoch": 23.4, "eval_arxiv_accuracy": 0.34728125, "eval_arxiv_bleu_score": 4.294976414606507, "eval_arxiv_bleu_score_sem": 0.1203371534208929, "eval_arxiv_emb_cos_sim": 0.7646492719650269, "eval_arxiv_emb_cos_sim_sem": 0.0066429834305786585, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.412419319152832, "eval_arxiv_n_ngrams_match_1": 15.07, "eval_arxiv_n_ngrams_match_2": 2.896, "eval_arxiv_n_ngrams_match_3": 0.646, "eval_arxiv_num_pred_words": 40.938, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.33855418880258, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35841171502222924, "eval_arxiv_runtime": 10.2995, "eval_arxiv_samples_per_second": 48.546, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3524439937035807, "eval_arxiv_token_set_f1_sem": 0.004223507030155764, "eval_arxiv_token_set_precision": 0.30447165293398804, "eval_arxiv_token_set_recall": 0.4330600411975926, "eval_arxiv_true_num_tokens": 64.0, "step": 121875 }, { "epoch": 23.4, "eval_python_code_alpaca_accuracy": 0.15903125, "eval_python_code_alpaca_bleu_score": 4.596467146569537, "eval_python_code_alpaca_bleu_score_sem": 0.14537870616202808, "eval_python_code_alpaca_emb_cos_sim": 0.7548423409461975, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008688273182769672, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.910658359527588, "eval_python_code_alpaca_n_ngrams_match_1": 9.848, "eval_python_code_alpaca_n_ngrams_match_2": 2.936, "eval_python_code_alpaca_n_ngrams_match_3": 0.99, "eval_python_code_alpaca_num_pred_words": 43.812, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.368887919384022, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3309596211758795, "eval_python_code_alpaca_runtime": 10.0746, "eval_python_code_alpaca_samples_per_second": 49.63, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.47304201350047886, "eval_python_code_alpaca_token_set_f1_sem": 0.005512177902522732, "eval_python_code_alpaca_token_set_precision": 0.5375731385328762, "eval_python_code_alpaca_token_set_recall": 0.4422129079418682, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 121875 }, { "epoch": 23.4, "eval_wikibio_accuracy": 0.32171875, "eval_wikibio_bleu_score": 5.802899531108984, "eval_wikibio_bleu_score_sem": 0.21133635337679493, "eval_wikibio_emb_cos_sim": 0.73765629529953, "eval_wikibio_emb_cos_sim_sem": 0.010046967723660443, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7330172061920166, "eval_wikibio_n_ngrams_match_1": 10.124, "eval_wikibio_n_ngrams_match_2": 3.332, "eval_wikibio_n_ngrams_match_3": 1.216, "eval_wikibio_num_pred_words": 36.964, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.80505253177776, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34796721431657673, "eval_wikibio_runtime": 10.1114, "eval_wikibio_samples_per_second": 49.449, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.31746722919500114, "eval_wikibio_token_set_f1_sem": 0.005486546495886831, "eval_wikibio_token_set_precision": 0.32854201146966994, "eval_wikibio_token_set_recall": 0.3231797425642653, "eval_wikibio_true_num_tokens": 61.1328125, "step": 121875 }, { "epoch": 23.4, "eval_nq_accuracy": 0.52665625, "eval_nq_bleu_score": 11.756796526191959, "eval_nq_bleu_score_sem": 0.4660388153848274, "eval_nq_emb_cos_sim": 0.8276510834693909, "eval_nq_emb_cos_sim_sem": 0.0071159738038141344, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1940035820007324, "eval_nq_n_ngrams_match_1": 23.012, "eval_nq_n_ngrams_match_2": 8.534, "eval_nq_n_ngrams_match_3": 3.946, "eval_nq_num_pred_words": 49.302, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.971057678544408, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4433672046503808, "eval_nq_runtime": 10.4057, "eval_nq_samples_per_second": 48.051, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4572500635910518, "eval_nq_token_set_f1_sem": 0.004937699731814166, "eval_nq_token_set_precision": 0.416285840345419, "eval_nq_token_set_recall": 0.5161096093213172, "eval_nq_true_num_tokens": 64.0, "step": 121875 }, { "epoch": 23.4, "learning_rate": 0.001, "loss": 2.5653, "step": 121884 }, { "epoch": 23.41, "learning_rate": 0.001, "loss": 2.5632, "step": 121896 }, { "epoch": 23.41, "learning_rate": 0.001, "loss": 2.5515, "step": 121908 }, { "epoch": 23.41, "learning_rate": 0.001, "loss": 2.5783, "step": 121920 }, { "epoch": 23.41, "learning_rate": 0.001, "loss": 2.5752, "step": 121932 }, { "epoch": 23.41, "learning_rate": 0.001, "loss": 2.57, "step": 121944 }, { "epoch": 23.42, "learning_rate": 0.001, "loss": 2.5663, "step": 121956 }, { "epoch": 23.42, "learning_rate": 0.001, "loss": 2.571, "step": 121968 }, { "epoch": 23.42, "learning_rate": 0.001, "loss": 2.5697, "step": 121980 }, { "epoch": 23.42, "learning_rate": 0.001, "loss": 2.565, "step": 121992 }, { "epoch": 23.43, "learning_rate": 0.001, "loss": 2.5656, "step": 122004 }, { "epoch": 23.43, "learning_rate": 0.001, "loss": 2.573, "step": 122016 }, { "epoch": 23.43, "learning_rate": 0.001, "loss": 2.5609, "step": 122028 }, { "epoch": 23.43, "learning_rate": 0.001, "loss": 2.5709, "step": 122040 }, { "epoch": 23.44, "learning_rate": 0.001, "loss": 2.5578, "step": 122052 }, { "epoch": 23.44, "learning_rate": 0.001, "loss": 2.5665, "step": 122064 }, { "epoch": 23.44, "learning_rate": 0.001, "loss": 2.5652, "step": 122076 }, { "epoch": 23.44, "learning_rate": 0.001, "loss": 2.5662, "step": 122088 }, { "epoch": 23.44, "learning_rate": 0.001, "loss": 2.5716, "step": 122100 }, { "epoch": 23.45, "learning_rate": 0.001, "loss": 2.5626, "step": 122112 }, { "epoch": 23.45, "learning_rate": 0.001, "loss": 2.5694, "step": 122124 }, { "epoch": 23.45, "learning_rate": 0.001, "loss": 2.5634, "step": 122136 }, { "epoch": 23.45, "learning_rate": 0.001, "loss": 2.562, "step": 122148 }, { "epoch": 23.46, "learning_rate": 0.001, "loss": 2.5662, "step": 122160 }, { "epoch": 23.46, "learning_rate": 0.001, "loss": 2.5712, "step": 122172 }, { "epoch": 23.46, "learning_rate": 0.001, "loss": 2.5738, "step": 122184 }, { "epoch": 23.46, "learning_rate": 0.001, "loss": 2.5748, "step": 122196 }, { "epoch": 23.47, "learning_rate": 0.001, "loss": 2.5705, "step": 122208 }, { "epoch": 23.47, "learning_rate": 0.001, "loss": 2.5589, "step": 122220 }, { "epoch": 23.47, "learning_rate": 0.001, "loss": 2.5607, "step": 122232 }, { "epoch": 23.47, "learning_rate": 0.001, "loss": 2.5661, "step": 122244 }, { "epoch": 23.47, "learning_rate": 0.001, "loss": 2.5759, "step": 122256 }, { "epoch": 23.48, "learning_rate": 0.001, "loss": 2.5815, "step": 122268 }, { "epoch": 23.48, "learning_rate": 0.001, "loss": 2.5632, "step": 122280 }, { "epoch": 23.48, "learning_rate": 0.001, "loss": 2.58, "step": 122292 }, { "epoch": 23.48, "learning_rate": 0.001, "loss": 2.5718, "step": 122304 }, { "epoch": 23.49, "learning_rate": 0.001, "loss": 2.561, "step": 122316 }, { "epoch": 23.49, "learning_rate": 0.001, "loss": 2.5761, "step": 122328 }, { "epoch": 23.49, "learning_rate": 0.001, "loss": 2.5772, "step": 122340 }, { "epoch": 23.49, "learning_rate": 0.001, "loss": 2.565, "step": 122352 }, { "epoch": 23.5, "learning_rate": 0.001, "loss": 2.5659, "step": 122364 }, { "epoch": 23.5, "learning_rate": 0.001, "loss": 2.5697, "step": 122376 }, { "epoch": 23.5, "learning_rate": 0.001, "loss": 2.5577, "step": 122388 }, { "epoch": 23.5, "learning_rate": 0.001, "loss": 2.5647, "step": 122400 }, { "epoch": 23.5, "learning_rate": 0.001, "loss": 2.5657, "step": 122412 }, { "epoch": 23.51, "learning_rate": 0.001, "loss": 2.575, "step": 122424 }, { "epoch": 23.51, "learning_rate": 0.001, "loss": 2.5589, "step": 122436 }, { "epoch": 23.51, "learning_rate": 0.001, "loss": 2.5652, "step": 122448 }, { "epoch": 23.51, "learning_rate": 0.001, "loss": 2.565, "step": 122460 }, { "epoch": 23.52, "learning_rate": 0.001, "loss": 2.5777, "step": 122472 }, { "epoch": 23.52, "learning_rate": 0.001, "loss": 2.5693, "step": 122484 }, { "epoch": 23.52, "learning_rate": 0.001, "loss": 2.5762, "step": 122496 }, { "epoch": 23.52, "eval_ag_news_accuracy": 0.3204375, "eval_ag_news_bleu_score": 4.8349354317944595, "eval_ag_news_bleu_score_sem": 0.15351510065098548, "eval_ag_news_emb_cos_sim": 0.8132928609848022, "eval_ag_news_emb_cos_sim_sem": 0.006314245256183273, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5733981132507324, "eval_ag_news_n_ngrams_match_1": 14.014, "eval_ag_news_n_ngrams_match_2": 3.078, "eval_ag_news_n_ngrams_match_3": 0.898, "eval_ag_news_num_pred_words": 46.854, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.63748784804016, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3475771176116358, "eval_ag_news_runtime": 10.7206, "eval_ag_news_samples_per_second": 46.639, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.35014314843507793, "eval_ag_news_token_set_f1_sem": 0.004463124716538842, "eval_ag_news_token_set_precision": 0.3359607803155184, "eval_ag_news_token_set_recall": 0.3821531054917202, "eval_ag_news_true_num_tokens": 56.09375, "step": 122500 }, { "epoch": 23.52, "eval_anthropic_toxic_prompts_accuracy": 0.11328125, "eval_anthropic_toxic_prompts_bleu_score": 3.1850523961142883, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11906180560670668, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.681453287601471, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008138155393848276, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2276039123535156, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.278, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.968, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.754, "eval_anthropic_toxic_prompts_num_pred_words": 47.624, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.21915720750061, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21616496563371101, "eval_anthropic_toxic_prompts_runtime": 9.7901, "eval_anthropic_toxic_prompts_samples_per_second": 51.072, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.36249006687763446, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00663312401101885, "eval_anthropic_toxic_prompts_token_set_precision": 0.44388364175908684, "eval_anthropic_toxic_prompts_token_set_recall": 0.33359599349921426, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 122500 }, { "epoch": 23.52, "eval_arxiv_accuracy": 0.34746875, "eval_arxiv_bleu_score": 4.362487455202465, "eval_arxiv_bleu_score_sem": 0.13176923200403984, "eval_arxiv_emb_cos_sim": 0.7627640962600708, "eval_arxiv_emb_cos_sim_sem": 0.007787284855628606, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.426496744155884, "eval_arxiv_n_ngrams_match_1": 15.17, "eval_arxiv_n_ngrams_match_2": 3.056, "eval_arxiv_n_ngrams_match_3": 0.676, "eval_arxiv_num_pred_words": 40.82, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.768663220909403, "eval_arxiv_pred_num_tokens": 62.9765625, "eval_arxiv_rouge_score": 0.3599718993054766, "eval_arxiv_runtime": 10.253, "eval_arxiv_samples_per_second": 48.766, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3546398122566314, "eval_arxiv_token_set_f1_sem": 0.004311476017662968, "eval_arxiv_token_set_precision": 0.30654908122137453, "eval_arxiv_token_set_recall": 0.44160247311300843, "eval_arxiv_true_num_tokens": 64.0, "step": 122500 }, { "epoch": 23.52, "eval_python_code_alpaca_accuracy": 0.16140625, "eval_python_code_alpaca_bleu_score": 4.511677052583327, "eval_python_code_alpaca_bleu_score_sem": 0.13317167967921642, "eval_python_code_alpaca_emb_cos_sim": 0.758931577205658, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007892412925249444, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.876638889312744, "eval_python_code_alpaca_n_ngrams_match_1": 9.92, "eval_python_code_alpaca_n_ngrams_match_2": 3.0, "eval_python_code_alpaca_n_ngrams_match_3": 1.002, "eval_python_code_alpaca_num_pred_words": 45.226, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.75449794750196, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32749275849401843, "eval_python_code_alpaca_runtime": 13.2672, "eval_python_code_alpaca_samples_per_second": 37.687, "eval_python_code_alpaca_steps_per_second": 0.075, "eval_python_code_alpaca_token_set_f1": 0.48482474430756134, "eval_python_code_alpaca_token_set_f1_sem": 0.005587493634600956, "eval_python_code_alpaca_token_set_precision": 0.5450426199491449, "eval_python_code_alpaca_token_set_recall": 0.4568639943124936, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 122500 }, { "epoch": 23.52, "eval_wikibio_accuracy": 0.32209375, "eval_wikibio_bleu_score": 6.204530654614644, "eval_wikibio_bleu_score_sem": 0.20980407647689625, "eval_wikibio_emb_cos_sim": 0.7364906668663025, "eval_wikibio_emb_cos_sim_sem": 0.010211961440323956, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.744091272354126, "eval_wikibio_n_ngrams_match_1": 10.388, "eval_wikibio_n_ngrams_match_2": 3.546, "eval_wikibio_n_ngrams_match_3": 1.314, "eval_wikibio_num_pred_words": 36.864, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.27057731810975, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35843491332223465, "eval_wikibio_runtime": 10.0817, "eval_wikibio_samples_per_second": 49.595, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.32614690938554636, "eval_wikibio_token_set_f1_sem": 0.0051561823668839135, "eval_wikibio_token_set_precision": 0.33595573731791706, "eval_wikibio_token_set_recall": 0.33118152361646486, "eval_wikibio_true_num_tokens": 61.1328125, "step": 122500 }, { "epoch": 23.52, "eval_nq_accuracy": 0.52534375, "eval_nq_bleu_score": 11.67496100472067, "eval_nq_bleu_score_sem": 0.47042914886520376, "eval_nq_emb_cos_sim": 0.8323712348937988, "eval_nq_emb_cos_sim_sem": 0.007359014264296713, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1965255737304688, "eval_nq_n_ngrams_match_1": 23.072, "eval_nq_n_ngrams_match_2": 8.49, "eval_nq_n_ngrams_match_3": 3.906, "eval_nq_num_pred_words": 49.244, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.993711165763212, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44471343700422467, "eval_nq_runtime": 11.1069, "eval_nq_samples_per_second": 45.017, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.4604667802683219, "eval_nq_token_set_f1_sem": 0.004934073570951141, "eval_nq_token_set_precision": 0.4176394182970839, "eval_nq_token_set_recall": 0.5210676704936616, "eval_nq_true_num_tokens": 64.0, "step": 122500 }, { "epoch": 23.52, "learning_rate": 0.001, "loss": 2.568, "step": 122508 }, { "epoch": 23.53, "learning_rate": 0.001, "loss": 2.57, "step": 122520 }, { "epoch": 23.53, "learning_rate": 0.001, "loss": 2.5625, "step": 122532 }, { "epoch": 23.53, "learning_rate": 0.001, "loss": 2.5689, "step": 122544 }, { "epoch": 23.53, "learning_rate": 0.001, "loss": 2.5823, "step": 122556 }, { "epoch": 23.53, "learning_rate": 0.001, "loss": 2.5677, "step": 122568 }, { "epoch": 23.54, "learning_rate": 0.001, "loss": 2.5654, "step": 122580 }, { "epoch": 23.54, "learning_rate": 0.001, "loss": 2.5704, "step": 122592 }, { "epoch": 23.54, "learning_rate": 0.001, "loss": 2.5589, "step": 122604 }, { "epoch": 23.54, "learning_rate": 0.001, "loss": 2.5692, "step": 122616 }, { "epoch": 23.55, "learning_rate": 0.001, "loss": 2.5647, "step": 122628 }, { "epoch": 23.55, "learning_rate": 0.001, "loss": 2.5752, "step": 122640 }, { "epoch": 23.55, "learning_rate": 0.001, "loss": 2.5806, "step": 122652 }, { "epoch": 23.55, "learning_rate": 0.001, "loss": 2.5698, "step": 122664 }, { "epoch": 23.56, "learning_rate": 0.001, "loss": 2.5703, "step": 122676 }, { "epoch": 23.56, "learning_rate": 0.001, "loss": 2.5732, "step": 122688 }, { "epoch": 23.56, "learning_rate": 0.001, "loss": 2.5745, "step": 122700 }, { "epoch": 23.56, "learning_rate": 0.001, "loss": 2.5723, "step": 122712 }, { "epoch": 23.56, "learning_rate": 0.001, "loss": 2.5597, "step": 122724 }, { "epoch": 23.57, "learning_rate": 0.001, "loss": 2.5713, "step": 122736 }, { "epoch": 23.57, "learning_rate": 0.001, "loss": 2.5765, "step": 122748 }, { "epoch": 23.57, "learning_rate": 0.001, "loss": 2.5762, "step": 122760 }, { "epoch": 23.57, "learning_rate": 0.001, "loss": 2.5772, "step": 122772 }, { "epoch": 23.58, "learning_rate": 0.001, "loss": 2.5867, "step": 122784 }, { "epoch": 23.58, "learning_rate": 0.001, "loss": 2.5579, "step": 122796 }, { "epoch": 23.58, "learning_rate": 0.001, "loss": 2.573, "step": 122808 }, { "epoch": 23.58, "learning_rate": 0.001, "loss": 2.5622, "step": 122820 }, { "epoch": 23.59, "learning_rate": 0.001, "loss": 2.5716, "step": 122832 }, { "epoch": 23.59, "learning_rate": 0.001, "loss": 2.5748, "step": 122844 }, { "epoch": 23.59, "learning_rate": 0.001, "loss": 2.5699, "step": 122856 }, { "epoch": 23.59, "learning_rate": 0.001, "loss": 2.5571, "step": 122868 }, { "epoch": 23.59, "learning_rate": 0.001, "loss": 2.5729, "step": 122880 }, { "epoch": 23.6, "learning_rate": 0.001, "loss": 2.5652, "step": 122892 }, { "epoch": 23.6, "learning_rate": 0.001, "loss": 2.5665, "step": 122904 }, { "epoch": 23.6, "learning_rate": 0.001, "loss": 2.5704, "step": 122916 }, { "epoch": 23.6, "learning_rate": 0.001, "loss": 2.56, "step": 122928 }, { "epoch": 23.61, "learning_rate": 0.001, "loss": 2.5726, "step": 122940 }, { "epoch": 23.61, "learning_rate": 0.001, "loss": 2.5578, "step": 122952 }, { "epoch": 23.61, "learning_rate": 0.001, "loss": 2.5705, "step": 122964 }, { "epoch": 23.61, "learning_rate": 0.001, "loss": 2.5657, "step": 122976 }, { "epoch": 23.62, "learning_rate": 0.001, "loss": 2.5717, "step": 122988 }, { "epoch": 23.62, "learning_rate": 0.001, "loss": 2.5759, "step": 123000 }, { "epoch": 23.62, "learning_rate": 0.001, "loss": 2.5643, "step": 123012 }, { "epoch": 23.62, "learning_rate": 0.001, "loss": 2.5615, "step": 123024 }, { "epoch": 23.62, "learning_rate": 0.001, "loss": 2.5697, "step": 123036 }, { "epoch": 23.63, "learning_rate": 0.001, "loss": 2.5629, "step": 123048 }, { "epoch": 23.63, "learning_rate": 0.001, "loss": 2.5687, "step": 123060 }, { "epoch": 23.63, "learning_rate": 0.001, "loss": 2.5693, "step": 123072 }, { "epoch": 23.63, "learning_rate": 0.001, "loss": 2.5585, "step": 123084 }, { "epoch": 23.64, "learning_rate": 0.001, "loss": 2.5586, "step": 123096 }, { "epoch": 23.64, "learning_rate": 0.001, "loss": 2.5693, "step": 123108 }, { "epoch": 23.64, "learning_rate": 0.001, "loss": 2.5683, "step": 123120 }, { "epoch": 23.64, "eval_ag_news_accuracy": 0.32153125, "eval_ag_news_bleu_score": 4.7287873492146195, "eval_ag_news_bleu_score_sem": 0.14790544014679205, "eval_ag_news_emb_cos_sim": 0.809762716293335, "eval_ag_news_emb_cos_sim_sem": 0.006641599170022878, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.560532808303833, "eval_ag_news_n_ngrams_match_1": 13.782, "eval_ag_news_n_ngrams_match_2": 3.006, "eval_ag_news_n_ngrams_match_3": 0.828, "eval_ag_news_num_pred_words": 45.906, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.18193738056804, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3446016510997211, "eval_ag_news_runtime": 10.4482, "eval_ag_news_samples_per_second": 47.855, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3472491714521868, "eval_ag_news_token_set_f1_sem": 0.004338237047170675, "eval_ag_news_token_set_precision": 0.3303722738410303, "eval_ag_news_token_set_recall": 0.38102018685853756, "eval_ag_news_true_num_tokens": 56.09375, "step": 123125 }, { "epoch": 23.64, "eval_anthropic_toxic_prompts_accuracy": 0.1146875, "eval_anthropic_toxic_prompts_bleu_score": 3.1873214004287562, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13109050829733274, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6590409278869629, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010291505714354174, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.222557306289673, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.192, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.886, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.706, "eval_anthropic_toxic_prompts_num_pred_words": 46.23, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.092206659947507, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21532330665313854, "eval_anthropic_toxic_prompts_runtime": 9.9067, "eval_anthropic_toxic_prompts_samples_per_second": 50.471, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3518883630743679, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063797285877650505, "eval_anthropic_toxic_prompts_token_set_precision": 0.43790742145145567, "eval_anthropic_toxic_prompts_token_set_recall": 0.319679910672875, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 123125 }, { "epoch": 23.64, "eval_arxiv_accuracy": 0.34678125, "eval_arxiv_bleu_score": 4.255156246553696, "eval_arxiv_bleu_score_sem": 0.12239349455577901, "eval_arxiv_emb_cos_sim": 0.7600793242454529, "eval_arxiv_emb_cos_sim_sem": 0.007320094283874934, "eval_arxiv_emb_top1_equal": 0.34375, "eval_arxiv_emb_top1_equal_sem": 0.04214578430296913, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4352352619171143, "eval_arxiv_n_ngrams_match_1": 15.038, "eval_arxiv_n_ngrams_match_2": 2.934, "eval_arxiv_n_ngrams_match_3": 0.622, "eval_arxiv_num_pred_words": 40.334, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 31.038713933972293, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36074641046381883, "eval_arxiv_runtime": 10.7035, "eval_arxiv_samples_per_second": 46.714, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.35192481147901955, "eval_arxiv_token_set_f1_sem": 0.004068077998896871, "eval_arxiv_token_set_precision": 0.30415173156387404, "eval_arxiv_token_set_recall": 0.4370254780126012, "eval_arxiv_true_num_tokens": 64.0, "step": 123125 }, { "epoch": 23.64, "eval_python_code_alpaca_accuracy": 0.15871875, "eval_python_code_alpaca_bleu_score": 4.427311801675563, "eval_python_code_alpaca_bleu_score_sem": 0.1414208501910256, "eval_python_code_alpaca_emb_cos_sim": 0.7463688254356384, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009315223359437411, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9006664752960205, "eval_python_code_alpaca_n_ngrams_match_1": 9.67, "eval_python_code_alpaca_n_ngrams_match_2": 2.802, "eval_python_code_alpaca_n_ngrams_match_3": 0.914, "eval_python_code_alpaca_num_pred_words": 43.166, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.186262025635585, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3284907309805889, "eval_python_code_alpaca_runtime": 10.0174, "eval_python_code_alpaca_samples_per_second": 49.913, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4642666563319816, "eval_python_code_alpaca_token_set_f1_sem": 0.005637965100670752, "eval_python_code_alpaca_token_set_precision": 0.5255050098361054, "eval_python_code_alpaca_token_set_recall": 0.4426584371830135, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 123125 }, { "epoch": 23.64, "eval_wikibio_accuracy": 0.320125, "eval_wikibio_bleu_score": 5.9093723264471825, "eval_wikibio_bleu_score_sem": 0.2161196680840932, "eval_wikibio_emb_cos_sim": 0.7459827065467834, "eval_wikibio_emb_cos_sim_sem": 0.008466136242059481, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7835049629211426, "eval_wikibio_n_ngrams_match_1": 10.186, "eval_wikibio_n_ngrams_match_2": 3.388, "eval_wikibio_n_ngrams_match_3": 1.228, "eval_wikibio_num_pred_words": 36.43, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.96988478680974, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3542634594162949, "eval_wikibio_runtime": 10.1012, "eval_wikibio_samples_per_second": 49.499, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.32083178432983245, "eval_wikibio_token_set_f1_sem": 0.005317237659460588, "eval_wikibio_token_set_precision": 0.33244088826435114, "eval_wikibio_token_set_recall": 0.3245069040968402, "eval_wikibio_true_num_tokens": 61.1328125, "step": 123125 }, { "epoch": 23.64, "eval_nq_accuracy": 0.52409375, "eval_nq_bleu_score": 11.675484314510431, "eval_nq_bleu_score_sem": 0.47187692937762726, "eval_nq_emb_cos_sim": 0.835478663444519, "eval_nq_emb_cos_sim_sem": 0.006884316943696545, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1956288814544678, "eval_nq_n_ngrams_match_1": 22.874, "eval_nq_n_ngrams_match_2": 8.4, "eval_nq_n_ngrams_match_3": 3.918, "eval_nq_num_pred_words": 48.88, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.985650189076178, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44354355937317347, "eval_nq_runtime": 10.5147, "eval_nq_samples_per_second": 47.553, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4577481427591343, "eval_nq_token_set_f1_sem": 0.005029344854526479, "eval_nq_token_set_precision": 0.4164451514692618, "eval_nq_token_set_recall": 0.51582719441119, "eval_nq_true_num_tokens": 64.0, "step": 123125 }, { "epoch": 23.64, "learning_rate": 0.001, "loss": 2.568, "step": 123132 }, { "epoch": 23.65, "learning_rate": 0.001, "loss": 2.5756, "step": 123144 }, { "epoch": 23.65, "learning_rate": 0.001, "loss": 2.5696, "step": 123156 }, { "epoch": 23.65, "learning_rate": 0.001, "loss": 2.5622, "step": 123168 }, { "epoch": 23.65, "learning_rate": 0.001, "loss": 2.5646, "step": 123180 }, { "epoch": 23.65, "learning_rate": 0.001, "loss": 2.5682, "step": 123192 }, { "epoch": 23.66, "learning_rate": 0.001, "loss": 2.5642, "step": 123204 }, { "epoch": 23.66, "learning_rate": 0.001, "loss": 2.5674, "step": 123216 }, { "epoch": 23.66, "learning_rate": 0.001, "loss": 2.5673, "step": 123228 }, { "epoch": 23.66, "learning_rate": 0.001, "loss": 2.5609, "step": 123240 }, { "epoch": 23.67, "learning_rate": 0.001, "loss": 2.5662, "step": 123252 }, { "epoch": 23.67, "learning_rate": 0.001, "loss": 2.5659, "step": 123264 }, { "epoch": 23.67, "learning_rate": 0.001, "loss": 2.5743, "step": 123276 }, { "epoch": 23.67, "learning_rate": 0.001, "loss": 2.5591, "step": 123288 }, { "epoch": 23.68, "learning_rate": 0.001, "loss": 2.5695, "step": 123300 }, { "epoch": 23.68, "learning_rate": 0.001, "loss": 2.5607, "step": 123312 }, { "epoch": 23.68, "learning_rate": 0.001, "loss": 2.5819, "step": 123324 }, { "epoch": 23.68, "learning_rate": 0.001, "loss": 2.5752, "step": 123336 }, { "epoch": 23.68, "learning_rate": 0.001, "loss": 2.5687, "step": 123348 }, { "epoch": 23.69, "learning_rate": 0.001, "loss": 2.5663, "step": 123360 }, { "epoch": 23.69, "learning_rate": 0.001, "loss": 2.5846, "step": 123372 }, { "epoch": 23.69, "learning_rate": 0.001, "loss": 2.5851, "step": 123384 }, { "epoch": 23.69, "learning_rate": 0.001, "loss": 2.5678, "step": 123396 }, { "epoch": 23.7, "learning_rate": 0.001, "loss": 2.5721, "step": 123408 }, { "epoch": 23.7, "learning_rate": 0.001, "loss": 2.5692, "step": 123420 }, { "epoch": 23.7, "learning_rate": 0.001, "loss": 2.5655, "step": 123432 }, { "epoch": 23.7, "learning_rate": 0.001, "loss": 2.5676, "step": 123444 }, { "epoch": 23.71, "learning_rate": 0.001, "loss": 2.5739, "step": 123456 }, { "epoch": 23.71, "learning_rate": 0.001, "loss": 2.5666, "step": 123468 }, { "epoch": 23.71, "learning_rate": 0.001, "loss": 2.5643, "step": 123480 }, { "epoch": 23.71, "learning_rate": 0.001, "loss": 2.5635, "step": 123492 }, { "epoch": 23.71, "learning_rate": 0.001, "loss": 2.5627, "step": 123504 }, { "epoch": 23.72, "learning_rate": 0.001, "loss": 2.576, "step": 123516 }, { "epoch": 23.72, "learning_rate": 0.001, "loss": 2.5699, "step": 123528 }, { "epoch": 23.72, "learning_rate": 0.001, "loss": 2.5556, "step": 123540 }, { "epoch": 23.72, "learning_rate": 0.001, "loss": 2.5746, "step": 123552 }, { "epoch": 23.73, "learning_rate": 0.001, "loss": 2.5663, "step": 123564 }, { "epoch": 23.73, "learning_rate": 0.001, "loss": 2.5679, "step": 123576 }, { "epoch": 23.73, "learning_rate": 0.001, "loss": 2.5601, "step": 123588 }, { "epoch": 23.73, "learning_rate": 0.001, "loss": 2.5626, "step": 123600 }, { "epoch": 23.74, "learning_rate": 0.001, "loss": 2.5697, "step": 123612 }, { "epoch": 23.74, "learning_rate": 0.001, "loss": 2.5734, "step": 123624 }, { "epoch": 23.74, "learning_rate": 0.001, "loss": 2.5623, "step": 123636 }, { "epoch": 23.74, "learning_rate": 0.001, "loss": 2.5698, "step": 123648 }, { "epoch": 23.74, "learning_rate": 0.001, "loss": 2.5628, "step": 123660 }, { "epoch": 23.75, "learning_rate": 0.001, "loss": 2.5712, "step": 123672 }, { "epoch": 23.75, "learning_rate": 0.001, "loss": 2.5729, "step": 123684 }, { "epoch": 23.75, "learning_rate": 0.001, "loss": 2.5662, "step": 123696 }, { "epoch": 23.75, "learning_rate": 0.001, "loss": 2.5706, "step": 123708 }, { "epoch": 23.76, "learning_rate": 0.001, "loss": 2.5614, "step": 123720 }, { "epoch": 23.76, "learning_rate": 0.001, "loss": 2.5663, "step": 123732 }, { "epoch": 23.76, "learning_rate": 0.001, "loss": 2.5601, "step": 123744 }, { "epoch": 23.76, "eval_ag_news_accuracy": 0.3225625, "eval_ag_news_bleu_score": 4.80911523662197, "eval_ag_news_bleu_score_sem": 0.1505158013521019, "eval_ag_news_emb_cos_sim": 0.8057507276535034, "eval_ag_news_emb_cos_sim_sem": 0.007469187443393206, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5592167377471924, "eval_ag_news_n_ngrams_match_1": 13.954, "eval_ag_news_n_ngrams_match_2": 3.088, "eval_ag_news_n_ngrams_match_3": 0.886, "eval_ag_news_num_pred_words": 46.306, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.13566592358565, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3499271773864675, "eval_ag_news_runtime": 10.5718, "eval_ag_news_samples_per_second": 47.295, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3505775578588199, "eval_ag_news_token_set_f1_sem": 0.00446683082499616, "eval_ag_news_token_set_precision": 0.3341955923310428, "eval_ag_news_token_set_recall": 0.3857662798047629, "eval_ag_news_true_num_tokens": 56.09375, "step": 123750 }, { "epoch": 23.76, "eval_anthropic_toxic_prompts_accuracy": 0.1138125, "eval_anthropic_toxic_prompts_bleu_score": 3.0856771276902344, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12505592710514374, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6681952476501465, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009632829497469858, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2448768615722656, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.13, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.686, "eval_anthropic_toxic_prompts_num_pred_words": 46.93, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.65855031214567, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21177156658064994, "eval_anthropic_toxic_prompts_runtime": 9.8606, "eval_anthropic_toxic_prompts_samples_per_second": 50.707, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3542165080733708, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006705805135088895, "eval_anthropic_toxic_prompts_token_set_precision": 0.43358594079213353, "eval_anthropic_toxic_prompts_token_set_recall": 0.32403467838846783, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 123750 }, { "epoch": 23.76, "eval_arxiv_accuracy": 0.34634375, "eval_arxiv_bleu_score": 4.323527819434282, "eval_arxiv_bleu_score_sem": 0.12690310676739336, "eval_arxiv_emb_cos_sim": 0.7569823265075684, "eval_arxiv_emb_cos_sim_sem": 0.007879461330410948, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.416527032852173, "eval_arxiv_n_ngrams_match_1": 14.87, "eval_arxiv_n_ngrams_match_2": 2.876, "eval_arxiv_n_ngrams_match_3": 0.66, "eval_arxiv_num_pred_words": 40.04, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.46343259022963, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35730537813127394, "eval_arxiv_runtime": 10.7288, "eval_arxiv_samples_per_second": 46.604, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.35056932094447046, "eval_arxiv_token_set_f1_sem": 0.004136622701862446, "eval_arxiv_token_set_precision": 0.3013940086194369, "eval_arxiv_token_set_recall": 0.4352885692743516, "eval_arxiv_true_num_tokens": 64.0, "step": 123750 }, { "epoch": 23.76, "eval_python_code_alpaca_accuracy": 0.16028125, "eval_python_code_alpaca_bleu_score": 4.6366772816008766, "eval_python_code_alpaca_bleu_score_sem": 0.14784878135350046, "eval_python_code_alpaca_emb_cos_sim": 0.7635290622711182, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007488291819569785, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8861355781555176, "eval_python_code_alpaca_n_ngrams_match_1": 9.85, "eval_python_code_alpaca_n_ngrams_match_2": 2.9, "eval_python_code_alpaca_n_ngrams_match_3": 0.932, "eval_python_code_alpaca_num_pred_words": 43.502, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.92391004382131, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3353624760878188, "eval_python_code_alpaca_runtime": 10.0937, "eval_python_code_alpaca_samples_per_second": 49.536, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.4776360116933921, "eval_python_code_alpaca_token_set_f1_sem": 0.005389846905568517, "eval_python_code_alpaca_token_set_precision": 0.5378696106686256, "eval_python_code_alpaca_token_set_recall": 0.4511554207872903, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 123750 }, { "epoch": 23.76, "eval_wikibio_accuracy": 0.32315625, "eval_wikibio_bleu_score": 5.888987682259996, "eval_wikibio_bleu_score_sem": 0.2005888805823427, "eval_wikibio_emb_cos_sim": 0.7536141276359558, "eval_wikibio_emb_cos_sim_sem": 0.008231543590525305, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.739074945449829, "eval_wikibio_n_ngrams_match_1": 10.212, "eval_wikibio_n_ngrams_match_2": 3.414, "eval_wikibio_n_ngrams_match_3": 1.21, "eval_wikibio_num_pred_words": 37.138, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.05906523425962, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.357356145874208, "eval_wikibio_runtime": 10.0528, "eval_wikibio_samples_per_second": 49.737, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.32031816975269517, "eval_wikibio_token_set_f1_sem": 0.005141235283817227, "eval_wikibio_token_set_precision": 0.331091781388553, "eval_wikibio_token_set_recall": 0.326432029902187, "eval_wikibio_true_num_tokens": 61.1328125, "step": 123750 }, { "epoch": 23.76, "eval_nq_accuracy": 0.52696875, "eval_nq_bleu_score": 11.512729350208728, "eval_nq_bleu_score_sem": 0.4686555999273394, "eval_nq_emb_cos_sim": 0.826134204864502, "eval_nq_emb_cos_sim_sem": 0.0073975634841604975, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1931984424591064, "eval_nq_n_ngrams_match_1": 22.952, "eval_nq_n_ngrams_match_2": 8.41, "eval_nq_n_ngrams_match_3": 3.804, "eval_nq_num_pred_words": 48.844, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.963837632239624, "eval_nq_pred_num_tokens": 62.9921875, "eval_nq_rouge_score": 0.44703954207752905, "eval_nq_runtime": 10.7547, "eval_nq_samples_per_second": 46.491, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.4609720776791791, "eval_nq_token_set_f1_sem": 0.004923906721364817, "eval_nq_token_set_precision": 0.4177849225975492, "eval_nq_token_set_recall": 0.5238332287599875, "eval_nq_true_num_tokens": 64.0, "step": 123750 }, { "epoch": 23.76, "learning_rate": 0.001, "loss": 2.5682, "step": 123756 }, { "epoch": 23.76, "learning_rate": 0.001, "loss": 2.5746, "step": 123768 }, { "epoch": 23.77, "learning_rate": 0.001, "loss": 2.5595, "step": 123780 }, { "epoch": 23.77, "learning_rate": 0.001, "loss": 2.5702, "step": 123792 }, { "epoch": 23.77, "learning_rate": 0.001, "loss": 2.5694, "step": 123804 }, { "epoch": 23.77, "learning_rate": 0.001, "loss": 2.5686, "step": 123816 }, { "epoch": 23.78, "learning_rate": 0.001, "loss": 2.5638, "step": 123828 }, { "epoch": 23.78, "learning_rate": 0.001, "loss": 2.5677, "step": 123840 }, { "epoch": 23.78, "learning_rate": 0.001, "loss": 2.5593, "step": 123852 }, { "epoch": 23.78, "learning_rate": 0.001, "loss": 2.5647, "step": 123864 }, { "epoch": 23.79, "learning_rate": 0.001, "loss": 2.5577, "step": 123876 }, { "epoch": 23.79, "learning_rate": 0.001, "loss": 2.5725, "step": 123888 }, { "epoch": 23.79, "learning_rate": 0.001, "loss": 2.5622, "step": 123900 }, { "epoch": 23.79, "learning_rate": 0.001, "loss": 2.5652, "step": 123912 }, { "epoch": 23.79, "learning_rate": 0.001, "loss": 2.5678, "step": 123924 }, { "epoch": 23.8, "learning_rate": 0.001, "loss": 2.5684, "step": 123936 }, { "epoch": 23.8, "learning_rate": 0.001, "loss": 2.5557, "step": 123948 }, { "epoch": 23.8, "learning_rate": 0.001, "loss": 2.5616, "step": 123960 }, { "epoch": 23.8, "learning_rate": 0.001, "loss": 2.569, "step": 123972 }, { "epoch": 23.81, "learning_rate": 0.001, "loss": 2.558, "step": 123984 }, { "epoch": 23.81, "learning_rate": 0.001, "loss": 2.5703, "step": 123996 }, { "epoch": 23.81, "learning_rate": 0.001, "loss": 2.5633, "step": 124008 }, { "epoch": 23.81, "learning_rate": 0.001, "loss": 2.5713, "step": 124020 }, { "epoch": 23.82, "learning_rate": 0.001, "loss": 2.5782, "step": 124032 }, { "epoch": 23.82, "learning_rate": 0.001, "loss": 2.5581, "step": 124044 }, { "epoch": 23.82, "learning_rate": 0.001, "loss": 2.5647, "step": 124056 }, { "epoch": 23.82, "learning_rate": 0.001, "loss": 2.5773, "step": 124068 }, { "epoch": 23.82, "learning_rate": 0.001, "loss": 2.5589, "step": 124080 }, { "epoch": 23.83, "learning_rate": 0.001, "loss": 2.5624, "step": 124092 }, { "epoch": 23.83, "learning_rate": 0.001, "loss": 2.5612, "step": 124104 }, { "epoch": 23.83, "learning_rate": 0.001, "loss": 2.5672, "step": 124116 }, { "epoch": 23.83, "learning_rate": 0.001, "loss": 2.5567, "step": 124128 }, { "epoch": 23.84, "learning_rate": 0.001, "loss": 2.5658, "step": 124140 }, { "epoch": 23.84, "learning_rate": 0.001, "loss": 2.5625, "step": 124152 }, { "epoch": 23.84, "learning_rate": 0.001, "loss": 2.5605, "step": 124164 }, { "epoch": 23.84, "learning_rate": 0.001, "loss": 2.5666, "step": 124176 }, { "epoch": 23.85, "learning_rate": 0.001, "loss": 2.5646, "step": 124188 }, { "epoch": 23.85, "learning_rate": 0.001, "loss": 2.5505, "step": 124200 }, { "epoch": 23.85, "learning_rate": 0.001, "loss": 2.5662, "step": 124212 }, { "epoch": 23.85, "learning_rate": 0.001, "loss": 2.5705, "step": 124224 }, { "epoch": 23.85, "learning_rate": 0.001, "loss": 2.5672, "step": 124236 }, { "epoch": 23.86, "learning_rate": 0.001, "loss": 2.5709, "step": 124248 }, { "epoch": 23.86, "learning_rate": 0.001, "loss": 2.5598, "step": 124260 }, { "epoch": 23.86, "learning_rate": 0.001, "loss": 2.5641, "step": 124272 }, { "epoch": 23.86, "learning_rate": 0.001, "loss": 2.5646, "step": 124284 }, { "epoch": 23.87, "learning_rate": 0.001, "loss": 2.5621, "step": 124296 }, { "epoch": 23.87, "learning_rate": 0.001, "loss": 2.5594, "step": 124308 }, { "epoch": 23.87, "learning_rate": 0.001, "loss": 2.5746, "step": 124320 }, { "epoch": 23.87, "learning_rate": 0.001, "loss": 2.5699, "step": 124332 }, { "epoch": 23.88, "learning_rate": 0.001, "loss": 2.5613, "step": 124344 }, { "epoch": 23.88, "learning_rate": 0.001, "loss": 2.5679, "step": 124356 }, { "epoch": 23.88, "learning_rate": 0.001, "loss": 2.574, "step": 124368 }, { "epoch": 23.88, "eval_ag_news_accuracy": 0.32234375, "eval_ag_news_bleu_score": 4.616771394251265, "eval_ag_news_bleu_score_sem": 0.14110397244622383, "eval_ag_news_emb_cos_sim": 0.8145469427108765, "eval_ag_news_emb_cos_sim_sem": 0.0076602694007404555, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5534090995788574, "eval_ag_news_n_ngrams_match_1": 14.004, "eval_ag_news_n_ngrams_match_2": 3.006, "eval_ag_news_n_ngrams_match_3": 0.79, "eval_ag_news_num_pred_words": 46.486, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.93220208316065, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3487744780063735, "eval_ag_news_runtime": 11.7613, "eval_ag_news_samples_per_second": 42.512, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.34724649744661784, "eval_ag_news_token_set_f1_sem": 0.004423211016564372, "eval_ag_news_token_set_precision": 0.3315374911371905, "eval_ag_news_token_set_recall": 0.3822301581506783, "eval_ag_news_true_num_tokens": 56.09375, "step": 124375 }, { "epoch": 23.88, "eval_anthropic_toxic_prompts_accuracy": 0.1145, "eval_anthropic_toxic_prompts_bleu_score": 3.022738651020741, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11094835640131087, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6775563955307007, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009782226246006309, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.211892604827881, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.188, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.908, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.688, "eval_anthropic_toxic_prompts_num_pred_words": 47.204, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.82602764957878, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21259204565192613, "eval_anthropic_toxic_prompts_runtime": 9.7955, "eval_anthropic_toxic_prompts_samples_per_second": 51.044, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3537173268028458, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006601718683755981, "eval_anthropic_toxic_prompts_token_set_precision": 0.4370176657236675, "eval_anthropic_toxic_prompts_token_set_recall": 0.32217223297445924, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 124375 }, { "epoch": 23.88, "eval_arxiv_accuracy": 0.3485625, "eval_arxiv_bleu_score": 4.317926980658027, "eval_arxiv_bleu_score_sem": 0.13168929178770322, "eval_arxiv_emb_cos_sim": 0.7690600752830505, "eval_arxiv_emb_cos_sim_sem": 0.006692548782971978, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.393745183944702, "eval_arxiv_n_ngrams_match_1": 15.034, "eval_arxiv_n_ngrams_match_2": 2.898, "eval_arxiv_n_ngrams_match_3": 0.66, "eval_arxiv_num_pred_words": 40.74, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.777265031789824, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3554842152864838, "eval_arxiv_runtime": 10.212, "eval_arxiv_samples_per_second": 48.962, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.35387775035000996, "eval_arxiv_token_set_f1_sem": 0.0042370892249581765, "eval_arxiv_token_set_precision": 0.3028047192261118, "eval_arxiv_token_set_recall": 0.4465013605988915, "eval_arxiv_true_num_tokens": 64.0, "step": 124375 }, { "epoch": 23.88, "eval_python_code_alpaca_accuracy": 0.1601875, "eval_python_code_alpaca_bleu_score": 4.572525873858984, "eval_python_code_alpaca_bleu_score_sem": 0.14368911937320733, "eval_python_code_alpaca_emb_cos_sim": 0.7642950415611267, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007458241116581517, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8737761974334717, "eval_python_code_alpaca_n_ngrams_match_1": 9.884, "eval_python_code_alpaca_n_ngrams_match_2": 2.918, "eval_python_code_alpaca_n_ngrams_match_3": 0.972, "eval_python_code_alpaca_num_pred_words": 44.026, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.703744970135237, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33411647224285534, "eval_python_code_alpaca_runtime": 9.9246, "eval_python_code_alpaca_samples_per_second": 50.38, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4754079026373771, "eval_python_code_alpaca_token_set_f1_sem": 0.005461751353869362, "eval_python_code_alpaca_token_set_precision": 0.5384331032914961, "eval_python_code_alpaca_token_set_recall": 0.4463353592644423, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 124375 }, { "epoch": 23.88, "eval_wikibio_accuracy": 0.32553125, "eval_wikibio_bleu_score": 6.029396731199813, "eval_wikibio_bleu_score_sem": 0.22558600661844924, "eval_wikibio_emb_cos_sim": 0.745415210723877, "eval_wikibio_emb_cos_sim_sem": 0.00891533669494408, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.705982208251953, "eval_wikibio_n_ngrams_match_1": 10.118, "eval_wikibio_n_ngrams_match_2": 3.346, "eval_wikibio_n_ngrams_match_3": 1.212, "eval_wikibio_num_pred_words": 36.508, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.689993743439125, "eval_wikibio_pred_num_tokens": 62.9765625, "eval_wikibio_rouge_score": 0.35712811986607584, "eval_wikibio_runtime": 9.8765, "eval_wikibio_samples_per_second": 50.625, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.32024821143729626, "eval_wikibio_token_set_f1_sem": 0.005385370485516734, "eval_wikibio_token_set_precision": 0.3303363399386787, "eval_wikibio_token_set_recall": 0.32362681929192155, "eval_wikibio_true_num_tokens": 61.1328125, "step": 124375 }, { "epoch": 23.88, "eval_nq_accuracy": 0.52834375, "eval_nq_bleu_score": 11.565532244966526, "eval_nq_bleu_score_sem": 0.477540407621757, "eval_nq_emb_cos_sim": 0.8256431221961975, "eval_nq_emb_cos_sim_sem": 0.0073839790280498295, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1905767917633057, "eval_nq_n_ngrams_match_1": 22.94, "eval_nq_n_ngrams_match_2": 8.388, "eval_nq_n_ngrams_match_3": 3.882, "eval_nq_num_pred_words": 49.17, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.940368358634883, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4452944172525074, "eval_nq_runtime": 10.3096, "eval_nq_samples_per_second": 48.498, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4601880192052505, "eval_nq_token_set_f1_sem": 0.00493523955955324, "eval_nq_token_set_precision": 0.4178062410098839, "eval_nq_token_set_recall": 0.5191908223833593, "eval_nq_true_num_tokens": 64.0, "step": 124375 }, { "epoch": 23.88, "learning_rate": 0.001, "loss": 2.572, "step": 124380 }, { "epoch": 23.88, "learning_rate": 0.001, "loss": 2.5604, "step": 124392 }, { "epoch": 23.89, "learning_rate": 0.001, "loss": 2.5591, "step": 124404 }, { "epoch": 23.89, "learning_rate": 0.001, "loss": 2.5682, "step": 124416 }, { "epoch": 23.89, "learning_rate": 0.001, "loss": 2.5727, "step": 124428 }, { "epoch": 23.89, "learning_rate": 0.001, "loss": 2.5738, "step": 124440 }, { "epoch": 23.9, "learning_rate": 0.001, "loss": 2.5661, "step": 124452 }, { "epoch": 23.9, "learning_rate": 0.001, "loss": 2.5638, "step": 124464 }, { "epoch": 23.9, "learning_rate": 0.001, "loss": 2.563, "step": 124476 }, { "epoch": 23.9, "learning_rate": 0.001, "loss": 2.5683, "step": 124488 }, { "epoch": 23.91, "learning_rate": 0.001, "loss": 2.569, "step": 124500 }, { "epoch": 23.91, "learning_rate": 0.001, "loss": 2.5746, "step": 124512 }, { "epoch": 23.91, "learning_rate": 0.001, "loss": 2.5711, "step": 124524 }, { "epoch": 23.91, "learning_rate": 0.001, "loss": 2.5689, "step": 124536 }, { "epoch": 23.91, "learning_rate": 0.001, "loss": 2.5742, "step": 124548 }, { "epoch": 23.92, "learning_rate": 0.001, "loss": 2.5704, "step": 124560 }, { "epoch": 23.92, "learning_rate": 0.001, "loss": 2.5727, "step": 124572 }, { "epoch": 23.92, "learning_rate": 0.001, "loss": 2.5565, "step": 124584 }, { "epoch": 23.92, "learning_rate": 0.001, "loss": 2.5661, "step": 124596 }, { "epoch": 23.93, "learning_rate": 0.001, "loss": 2.5704, "step": 124608 }, { "epoch": 23.93, "learning_rate": 0.001, "loss": 2.5619, "step": 124620 }, { "epoch": 23.93, "learning_rate": 0.001, "loss": 2.5582, "step": 124632 }, { "epoch": 23.93, "learning_rate": 0.001, "loss": 2.567, "step": 124644 }, { "epoch": 23.94, "learning_rate": 0.001, "loss": 2.5537, "step": 124656 }, { "epoch": 23.94, "learning_rate": 0.001, "loss": 2.5605, "step": 124668 }, { "epoch": 23.94, "learning_rate": 0.001, "loss": 2.5627, "step": 124680 }, { "epoch": 23.94, "learning_rate": 0.001, "loss": 2.5539, "step": 124692 }, { "epoch": 23.94, "learning_rate": 0.001, "loss": 2.5644, "step": 124704 }, { "epoch": 23.95, "learning_rate": 0.001, "loss": 2.5704, "step": 124716 }, { "epoch": 23.95, "learning_rate": 0.001, "loss": 2.5654, "step": 124728 }, { "epoch": 23.95, "learning_rate": 0.001, "loss": 2.5584, "step": 124740 }, { "epoch": 23.95, "learning_rate": 0.001, "loss": 2.5628, "step": 124752 }, { "epoch": 23.96, "learning_rate": 0.001, "loss": 2.5751, "step": 124764 }, { "epoch": 23.96, "learning_rate": 0.001, "loss": 2.5751, "step": 124776 }, { "epoch": 23.96, "learning_rate": 0.001, "loss": 2.5658, "step": 124788 }, { "epoch": 23.96, "learning_rate": 0.001, "loss": 2.5625, "step": 124800 }, { "epoch": 23.97, "learning_rate": 0.001, "loss": 2.575, "step": 124812 }, { "epoch": 23.97, "learning_rate": 0.001, "loss": 2.5581, "step": 124824 }, { "epoch": 23.97, "learning_rate": 0.001, "loss": 2.5606, "step": 124836 }, { "epoch": 23.97, "learning_rate": 0.001, "loss": 2.5639, "step": 124848 }, { "epoch": 23.97, "learning_rate": 0.001, "loss": 2.5748, "step": 124860 }, { "epoch": 23.98, "learning_rate": 0.001, "loss": 2.5665, "step": 124872 }, { "epoch": 23.98, "learning_rate": 0.001, "loss": 2.5652, "step": 124884 }, { "epoch": 23.98, "learning_rate": 0.001, "loss": 2.5679, "step": 124896 }, { "epoch": 23.98, "learning_rate": 0.001, "loss": 2.5649, "step": 124908 }, { "epoch": 23.99, "learning_rate": 0.001, "loss": 2.5701, "step": 124920 }, { "epoch": 23.99, "learning_rate": 0.001, "loss": 2.5732, "step": 124932 }, { "epoch": 23.99, "learning_rate": 0.001, "loss": 2.5717, "step": 124944 }, { "epoch": 23.99, "learning_rate": 0.001, "loss": 2.5672, "step": 124956 }, { "epoch": 24.0, "learning_rate": 0.001, "loss": 2.5645, "step": 124968 }, { "epoch": 24.0, "learning_rate": 0.001, "loss": 2.5644, "step": 124980 }, { "epoch": 24.0, "learning_rate": 0.001, "loss": 2.5666, "step": 124992 }, { "epoch": 24.0, "eval_ag_news_accuracy": 0.32259375, "eval_ag_news_bleu_score": 4.842368892698552, "eval_ag_news_bleu_score_sem": 0.1488780661338448, "eval_ag_news_emb_cos_sim": 0.8147686719894409, "eval_ag_news_emb_cos_sim_sem": 0.006900436796162947, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.559001922607422, "eval_ag_news_n_ngrams_match_1": 14.002, "eval_ag_news_n_ngrams_match_2": 3.146, "eval_ag_news_n_ngrams_match_3": 0.862, "eval_ag_news_num_pred_words": 46.428, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.128119061218506, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3476293266622733, "eval_ag_news_runtime": 10.414, "eval_ag_news_samples_per_second": 48.012, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35248336170187944, "eval_ag_news_token_set_f1_sem": 0.004469338978748432, "eval_ag_news_token_set_precision": 0.3357517603114475, "eval_ag_news_token_set_recall": 0.38874662374982394, "eval_ag_news_true_num_tokens": 56.09375, "step": 125000 }, { "epoch": 24.0, "eval_anthropic_toxic_prompts_accuracy": 0.1151875, "eval_anthropic_toxic_prompts_bleu_score": 3.0859499989740744, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10905266666044433, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6744695901870728, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009016824989069298, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2217483520507812, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.196, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.93, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.69, "eval_anthropic_toxic_prompts_num_pred_words": 47.306, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.071916421050688, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21347986523836338, "eval_anthropic_toxic_prompts_runtime": 10.1658, "eval_anthropic_toxic_prompts_samples_per_second": 49.184, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3619602511688941, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006730535266363549, "eval_anthropic_toxic_prompts_token_set_precision": 0.436739302416979, "eval_anthropic_toxic_prompts_token_set_recall": 0.3369953895690069, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 125000 }, { "epoch": 24.0, "eval_arxiv_accuracy": 0.348, "eval_arxiv_bleu_score": 4.24697386896977, "eval_arxiv_bleu_score_sem": 0.12353276395941543, "eval_arxiv_emb_cos_sim": 0.7643498182296753, "eval_arxiv_emb_cos_sim_sem": 0.006785813831826014, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4126248359680176, "eval_arxiv_n_ngrams_match_1": 14.73, "eval_arxiv_n_ngrams_match_2": 2.87, "eval_arxiv_n_ngrams_match_3": 0.644, "eval_arxiv_num_pred_words": 39.556, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.344789912587892, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3547536572828171, "eval_arxiv_runtime": 10.7939, "eval_arxiv_samples_per_second": 46.322, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.34956129651660545, "eval_arxiv_token_set_f1_sem": 0.004269246417330083, "eval_arxiv_token_set_precision": 0.29812225878968807, "eval_arxiv_token_set_recall": 0.4441866052858707, "eval_arxiv_true_num_tokens": 64.0, "step": 125000 }, { "epoch": 24.0, "eval_python_code_alpaca_accuracy": 0.1606875, "eval_python_code_alpaca_bleu_score": 4.505426017777299, "eval_python_code_alpaca_bleu_score_sem": 0.14411098878649153, "eval_python_code_alpaca_emb_cos_sim": 0.7475378513336182, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0115773441592378, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8584771156311035, "eval_python_code_alpaca_n_ngrams_match_1": 9.712, "eval_python_code_alpaca_n_ngrams_match_2": 2.864, "eval_python_code_alpaca_n_ngrams_match_3": 0.956, "eval_python_code_alpaca_num_pred_words": 43.454, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.434955288060664, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3282754389076918, "eval_python_code_alpaca_runtime": 10.0518, "eval_python_code_alpaca_samples_per_second": 49.742, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.47499117136433716, "eval_python_code_alpaca_token_set_f1_sem": 0.006080313457766385, "eval_python_code_alpaca_token_set_precision": 0.5285453966285859, "eval_python_code_alpaca_token_set_recall": 0.4507578398211699, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 125000 }, { "epoch": 24.0, "eval_wikibio_accuracy": 0.319125, "eval_wikibio_bleu_score": 5.8327788162543035, "eval_wikibio_bleu_score_sem": 0.21357066509556064, "eval_wikibio_emb_cos_sim": 0.7230526208877563, "eval_wikibio_emb_cos_sim_sem": 0.01157876134701898, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.780616521835327, "eval_wikibio_n_ngrams_match_1": 10.08, "eval_wikibio_n_ngrams_match_2": 3.404, "eval_wikibio_n_ngrams_match_3": 1.224, "eval_wikibio_num_pred_words": 36.83, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 43.84306361097375, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35287446014234436, "eval_wikibio_runtime": 9.9376, "eval_wikibio_samples_per_second": 50.314, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.3160235209487705, "eval_wikibio_token_set_f1_sem": 0.005651608281351785, "eval_wikibio_token_set_precision": 0.326063145200404, "eval_wikibio_token_set_recall": 0.3222274321290726, "eval_wikibio_true_num_tokens": 61.1328125, "step": 125000 }, { "epoch": 24.0, "eval_nq_accuracy": 0.526, "eval_nq_bleu_score": 11.934141509486224, "eval_nq_bleu_score_sem": 0.48057588393319084, "eval_nq_emb_cos_sim": 0.8391439914703369, "eval_nq_emb_cos_sim_sem": 0.006317563661958997, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1919448375701904, "eval_nq_n_ngrams_match_1": 23.008, "eval_nq_n_ngrams_match_2": 8.51, "eval_nq_n_ngrams_match_3": 4.036, "eval_nq_num_pred_words": 48.806, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.95260756206653, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4488428481016429, "eval_nq_runtime": 10.3918, "eval_nq_samples_per_second": 48.115, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46076825900826884, "eval_nq_token_set_f1_sem": 0.004848203609598437, "eval_nq_token_set_precision": 0.4170604278959615, "eval_nq_token_set_recall": 0.5216247249282614, "eval_nq_true_num_tokens": 64.0, "step": 125000 }, { "epoch": 24.0, "learning_rate": 0.001, "loss": 2.5511, "step": 125004 }, { "epoch": 24.0, "learning_rate": 0.001, "loss": 2.5528, "step": 125016 }, { "epoch": 24.01, "learning_rate": 0.001, "loss": 2.5507, "step": 125028 }, { "epoch": 24.01, "learning_rate": 0.001, "loss": 2.5555, "step": 125040 }, { "epoch": 24.01, "learning_rate": 0.001, "loss": 2.5484, "step": 125052 }, { "epoch": 24.01, "learning_rate": 0.001, "loss": 2.5503, "step": 125064 }, { "epoch": 24.02, "learning_rate": 0.001, "loss": 2.5504, "step": 125076 }, { "epoch": 24.02, "learning_rate": 0.001, "loss": 2.5529, "step": 125088 }, { "epoch": 24.02, "learning_rate": 0.001, "loss": 2.5475, "step": 125100 }, { "epoch": 24.02, "learning_rate": 0.001, "loss": 2.554, "step": 125112 }, { "epoch": 24.03, "learning_rate": 0.001, "loss": 2.5523, "step": 125124 }, { "epoch": 24.03, "learning_rate": 0.001, "loss": 2.5418, "step": 125136 }, { "epoch": 24.03, "learning_rate": 0.001, "loss": 2.5473, "step": 125148 }, { "epoch": 24.03, "learning_rate": 0.001, "loss": 2.5567, "step": 125160 }, { "epoch": 24.03, "learning_rate": 0.001, "loss": 2.5555, "step": 125172 }, { "epoch": 24.04, "learning_rate": 0.001, "loss": 2.5669, "step": 125184 }, { "epoch": 24.04, "learning_rate": 0.001, "loss": 2.5535, "step": 125196 }, { "epoch": 24.04, "learning_rate": 0.001, "loss": 2.5501, "step": 125208 }, { "epoch": 24.04, "learning_rate": 0.001, "loss": 2.5535, "step": 125220 }, { "epoch": 24.05, "learning_rate": 0.001, "loss": 2.5534, "step": 125232 }, { "epoch": 24.05, "learning_rate": 0.001, "loss": 2.5501, "step": 125244 }, { "epoch": 24.05, "learning_rate": 0.001, "loss": 2.5645, "step": 125256 }, { "epoch": 24.05, "learning_rate": 0.001, "loss": 2.5464, "step": 125268 }, { "epoch": 24.06, "learning_rate": 0.001, "loss": 2.5487, "step": 125280 }, { "epoch": 24.06, "learning_rate": 0.001, "loss": 2.5486, "step": 125292 }, { "epoch": 24.06, "learning_rate": 0.001, "loss": 2.5476, "step": 125304 }, { "epoch": 24.06, "learning_rate": 0.001, "loss": 2.5545, "step": 125316 }, { "epoch": 24.06, "learning_rate": 0.001, "loss": 2.5599, "step": 125328 }, { "epoch": 24.07, "learning_rate": 0.001, "loss": 2.5502, "step": 125340 }, { "epoch": 24.07, "learning_rate": 0.001, "loss": 2.5468, "step": 125352 }, { "epoch": 24.07, "learning_rate": 0.001, "loss": 2.5612, "step": 125364 }, { "epoch": 24.07, "learning_rate": 0.001, "loss": 2.5554, "step": 125376 }, { "epoch": 24.08, "learning_rate": 0.001, "loss": 2.5581, "step": 125388 }, { "epoch": 24.08, "learning_rate": 0.001, "loss": 2.5493, "step": 125400 }, { "epoch": 24.08, "learning_rate": 0.001, "loss": 2.5549, "step": 125412 }, { "epoch": 24.08, "learning_rate": 0.001, "loss": 2.5563, "step": 125424 }, { "epoch": 24.09, "learning_rate": 0.001, "loss": 2.5495, "step": 125436 }, { "epoch": 24.09, "learning_rate": 0.001, "loss": 2.5482, "step": 125448 }, { "epoch": 24.09, "learning_rate": 0.001, "loss": 2.5608, "step": 125460 }, { "epoch": 24.09, "learning_rate": 0.001, "loss": 2.5598, "step": 125472 }, { "epoch": 24.09, "learning_rate": 0.001, "loss": 2.545, "step": 125484 }, { "epoch": 24.1, "learning_rate": 0.001, "loss": 2.5632, "step": 125496 }, { "epoch": 24.1, "learning_rate": 0.001, "loss": 2.5478, "step": 125508 }, { "epoch": 24.1, "learning_rate": 0.001, "loss": 2.557, "step": 125520 }, { "epoch": 24.1, "learning_rate": 0.001, "loss": 2.5501, "step": 125532 }, { "epoch": 24.11, "learning_rate": 0.001, "loss": 2.5575, "step": 125544 }, { "epoch": 24.11, "learning_rate": 0.001, "loss": 2.5653, "step": 125556 }, { "epoch": 24.11, "learning_rate": 0.001, "loss": 2.5526, "step": 125568 }, { "epoch": 24.11, "learning_rate": 0.001, "loss": 2.5608, "step": 125580 }, { "epoch": 24.12, "learning_rate": 0.001, "loss": 2.5504, "step": 125592 }, { "epoch": 24.12, "learning_rate": 0.001, "loss": 2.5412, "step": 125604 }, { "epoch": 24.12, "learning_rate": 0.001, "loss": 2.5538, "step": 125616 }, { "epoch": 24.12, "eval_ag_news_accuracy": 0.32103125, "eval_ag_news_bleu_score": 4.784798295141622, "eval_ag_news_bleu_score_sem": 0.15427868540256148, "eval_ag_news_emb_cos_sim": 0.809669017791748, "eval_ag_news_emb_cos_sim_sem": 0.006527537944151255, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.568237543106079, "eval_ag_news_n_ngrams_match_1": 13.996, "eval_ag_news_n_ngrams_match_2": 3.092, "eval_ag_news_n_ngrams_match_3": 0.882, "eval_ag_news_num_pred_words": 46.874, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.45405181657247, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3442876372138963, "eval_ag_news_runtime": 10.5068, "eval_ag_news_samples_per_second": 47.588, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.34793921692200586, "eval_ag_news_token_set_f1_sem": 0.004441752454298163, "eval_ag_news_token_set_precision": 0.33362705228095707, "eval_ag_news_token_set_recall": 0.38219241731187903, "eval_ag_news_true_num_tokens": 56.09375, "step": 125625 }, { "epoch": 24.12, "eval_anthropic_toxic_prompts_accuracy": 0.11428125, "eval_anthropic_toxic_prompts_bleu_score": 3.1322644439317546, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11562268752709737, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6842227578163147, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008113772124895707, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2619502544403076, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.25, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.95, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, "eval_anthropic_toxic_prompts_num_pred_words": 47.448, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.10038993982719, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2141068707903946, "eval_anthropic_toxic_prompts_runtime": 10.0407, "eval_anthropic_toxic_prompts_samples_per_second": 49.798, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3561577536101551, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006522907534362738, "eval_anthropic_toxic_prompts_token_set_precision": 0.4431010390258805, "eval_anthropic_toxic_prompts_token_set_recall": 0.3242378106576544, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 125625 }, { "epoch": 24.12, "eval_arxiv_accuracy": 0.3483125, "eval_arxiv_bleu_score": 4.498381604918457, "eval_arxiv_bleu_score_sem": 0.13068081532478143, "eval_arxiv_emb_cos_sim": 0.768142580986023, "eval_arxiv_emb_cos_sim_sem": 0.007198875996290555, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4143869876861572, "eval_arxiv_n_ngrams_match_1": 15.354, "eval_arxiv_n_ngrams_match_2": 3.072, "eval_arxiv_n_ngrams_match_3": 0.724, "eval_arxiv_num_pred_words": 41.068, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.398309176951866, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36447736348630155, "eval_arxiv_runtime": 10.26, "eval_arxiv_samples_per_second": 48.733, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.36006858998921565, "eval_arxiv_token_set_f1_sem": 0.0041938916836464805, "eval_arxiv_token_set_precision": 0.3108529444811535, "eval_arxiv_token_set_recall": 0.44454270881214014, "eval_arxiv_true_num_tokens": 64.0, "step": 125625 }, { "epoch": 24.12, "eval_python_code_alpaca_accuracy": 0.16021875, "eval_python_code_alpaca_bleu_score": 4.505963201119526, "eval_python_code_alpaca_bleu_score_sem": 0.13186304346315086, "eval_python_code_alpaca_emb_cos_sim": 0.7627312541007996, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00802290022678368, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9129910469055176, "eval_python_code_alpaca_n_ngrams_match_1": 9.9, "eval_python_code_alpaca_n_ngrams_match_2": 2.914, "eval_python_code_alpaca_n_ngrams_match_3": 0.912, "eval_python_code_alpaca_num_pred_words": 42.998, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.411786807775307, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3383525506797753, "eval_python_code_alpaca_runtime": 9.655, "eval_python_code_alpaca_samples_per_second": 51.787, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.4830843485849761, "eval_python_code_alpaca_token_set_f1_sem": 0.005552455657271146, "eval_python_code_alpaca_token_set_precision": 0.5403298221735762, "eval_python_code_alpaca_token_set_recall": 0.4588261703302595, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 125625 }, { "epoch": 24.12, "eval_wikibio_accuracy": 0.32375, "eval_wikibio_bleu_score": 6.29947775891366, "eval_wikibio_bleu_score_sem": 0.23459476458042533, "eval_wikibio_emb_cos_sim": 0.7467377185821533, "eval_wikibio_emb_cos_sim_sem": 0.008959534909597342, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7882604598999023, "eval_wikibio_n_ngrams_match_1": 10.436, "eval_wikibio_n_ngrams_match_2": 3.57, "eval_wikibio_n_ngrams_match_3": 1.36, "eval_wikibio_num_pred_words": 37.06, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 44.17948141413601, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.363930063425463, "eval_wikibio_runtime": 10.4021, "eval_wikibio_samples_per_second": 48.067, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.3277827524552823, "eval_wikibio_token_set_f1_sem": 0.004963359708103471, "eval_wikibio_token_set_precision": 0.3390005311193062, "eval_wikibio_token_set_recall": 0.3305002920753566, "eval_wikibio_true_num_tokens": 61.1328125, "step": 125625 }, { "epoch": 24.12, "eval_nq_accuracy": 0.5286875, "eval_nq_bleu_score": 11.848439929969501, "eval_nq_bleu_score_sem": 0.482556589758612, "eval_nq_emb_cos_sim": 0.8347615003585815, "eval_nq_emb_cos_sim_sem": 0.00660855538224583, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1916778087615967, "eval_nq_n_ngrams_match_1": 23.158, "eval_nq_n_ngrams_match_2": 8.594, "eval_nq_n_ngrams_match_3": 3.97, "eval_nq_num_pred_words": 49.124, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.950217277087102, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44821869646121165, "eval_nq_runtime": 11.3716, "eval_nq_samples_per_second": 43.969, "eval_nq_steps_per_second": 0.088, "eval_nq_token_set_f1": 0.462282850068921, "eval_nq_token_set_f1_sem": 0.004955139364647121, "eval_nq_token_set_precision": 0.42033641053801074, "eval_nq_token_set_recall": 0.5212534612496856, "eval_nq_true_num_tokens": 64.0, "step": 125625 }, { "epoch": 24.12, "learning_rate": 0.001, "loss": 2.5563, "step": 125628 }, { "epoch": 24.12, "learning_rate": 0.001, "loss": 2.5562, "step": 125640 }, { "epoch": 24.13, "learning_rate": 0.001, "loss": 2.5577, "step": 125652 }, { "epoch": 24.13, "learning_rate": 0.001, "loss": 2.5601, "step": 125664 }, { "epoch": 24.13, "learning_rate": 0.001, "loss": 2.547, "step": 125676 }, { "epoch": 24.13, "learning_rate": 0.001, "loss": 2.559, "step": 125688 }, { "epoch": 24.14, "learning_rate": 0.001, "loss": 2.5496, "step": 125700 }, { "epoch": 24.14, "learning_rate": 0.001, "loss": 2.5583, "step": 125712 }, { "epoch": 24.14, "learning_rate": 0.001, "loss": 2.5508, "step": 125724 }, { "epoch": 24.14, "learning_rate": 0.001, "loss": 2.5603, "step": 125736 }, { "epoch": 24.15, "learning_rate": 0.001, "loss": 2.5611, "step": 125748 }, { "epoch": 24.15, "learning_rate": 0.001, "loss": 2.5584, "step": 125760 }, { "epoch": 24.15, "learning_rate": 0.001, "loss": 2.5513, "step": 125772 }, { "epoch": 24.15, "learning_rate": 0.001, "loss": 2.5561, "step": 125784 }, { "epoch": 24.15, "learning_rate": 0.001, "loss": 2.5501, "step": 125796 }, { "epoch": 24.16, "learning_rate": 0.001, "loss": 2.553, "step": 125808 }, { "epoch": 24.16, "learning_rate": 0.001, "loss": 2.5583, "step": 125820 }, { "epoch": 24.16, "learning_rate": 0.001, "loss": 2.5551, "step": 125832 }, { "epoch": 24.16, "learning_rate": 0.001, "loss": 2.5508, "step": 125844 }, { "epoch": 24.17, "learning_rate": 0.001, "loss": 2.5529, "step": 125856 }, { "epoch": 24.17, "learning_rate": 0.001, "loss": 2.561, "step": 125868 }, { "epoch": 24.17, "learning_rate": 0.001, "loss": 2.5617, "step": 125880 }, { "epoch": 24.17, "learning_rate": 0.001, "loss": 2.5544, "step": 125892 }, { "epoch": 24.18, "learning_rate": 0.001, "loss": 2.5477, "step": 125904 }, { "epoch": 24.18, "learning_rate": 0.001, "loss": 2.5626, "step": 125916 }, { "epoch": 24.18, "learning_rate": 0.001, "loss": 2.5392, "step": 125928 }, { "epoch": 24.18, "learning_rate": 0.001, "loss": 2.5496, "step": 125940 }, { "epoch": 24.18, "learning_rate": 0.001, "loss": 2.5596, "step": 125952 }, { "epoch": 24.19, "learning_rate": 0.001, "loss": 2.554, "step": 125964 }, { "epoch": 24.19, "learning_rate": 0.001, "loss": 2.5482, "step": 125976 }, { "epoch": 24.19, "learning_rate": 0.001, "loss": 2.5463, "step": 125988 }, { "epoch": 24.19, "learning_rate": 0.001, "loss": 2.551, "step": 126000 }, { "epoch": 24.2, "learning_rate": 0.001, "loss": 2.5644, "step": 126012 }, { "epoch": 24.2, "learning_rate": 0.001, "loss": 2.5487, "step": 126024 }, { "epoch": 24.2, "learning_rate": 0.001, "loss": 2.548, "step": 126036 }, { "epoch": 24.2, "learning_rate": 0.001, "loss": 2.5485, "step": 126048 }, { "epoch": 24.21, "learning_rate": 0.001, "loss": 2.5527, "step": 126060 }, { "epoch": 24.21, "learning_rate": 0.001, "loss": 2.5403, "step": 126072 }, { "epoch": 24.21, "learning_rate": 0.001, "loss": 2.5546, "step": 126084 }, { "epoch": 24.21, "learning_rate": 0.001, "loss": 2.5591, "step": 126096 }, { "epoch": 24.21, "learning_rate": 0.001, "loss": 2.5497, "step": 126108 }, { "epoch": 24.22, "learning_rate": 0.001, "loss": 2.5585, "step": 126120 }, { "epoch": 24.22, "learning_rate": 0.001, "loss": 2.5528, "step": 126132 }, { "epoch": 24.22, "learning_rate": 0.001, "loss": 2.5662, "step": 126144 }, { "epoch": 24.22, "learning_rate": 0.001, "loss": 2.5569, "step": 126156 }, { "epoch": 24.23, "learning_rate": 0.001, "loss": 2.5605, "step": 126168 }, { "epoch": 24.23, "learning_rate": 0.001, "loss": 2.556, "step": 126180 }, { "epoch": 24.23, "learning_rate": 0.001, "loss": 2.5561, "step": 126192 }, { "epoch": 24.23, "learning_rate": 0.001, "loss": 2.5465, "step": 126204 }, { "epoch": 24.24, "learning_rate": 0.001, "loss": 2.56, "step": 126216 }, { "epoch": 24.24, "learning_rate": 0.001, "loss": 2.5483, "step": 126228 }, { "epoch": 24.24, "learning_rate": 0.001, "loss": 2.556, "step": 126240 }, { "epoch": 24.24, "eval_ag_news_accuracy": 0.320375, "eval_ag_news_bleu_score": 4.775344895117515, "eval_ag_news_bleu_score_sem": 0.1503726080535789, "eval_ag_news_emb_cos_sim": 0.813957929611206, "eval_ag_news_emb_cos_sim_sem": 0.006901063071990038, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.56335186958313, "eval_ag_news_n_ngrams_match_1": 14.102, "eval_ag_news_n_ngrams_match_2": 3.164, "eval_ag_news_n_ngrams_match_3": 0.908, "eval_ag_news_num_pred_words": 47.192, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.281257346728154, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34860669216545304, "eval_ag_news_runtime": 10.2215, "eval_ag_news_samples_per_second": 48.917, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3508809194480405, "eval_ag_news_token_set_f1_sem": 0.004608580168012796, "eval_ag_news_token_set_precision": 0.3370039889105722, "eval_ag_news_token_set_recall": 0.38289193839935337, "eval_ag_news_true_num_tokens": 56.09375, "step": 126250 }, { "epoch": 24.24, "eval_anthropic_toxic_prompts_accuracy": 0.1155625, "eval_anthropic_toxic_prompts_bleu_score": 3.183296997527721, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11751172369579264, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6759467720985413, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009322279663840447, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2364230155944824, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.258, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722, "eval_anthropic_toxic_prompts_num_pred_words": 47.458, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.442551177908204, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21737512716610657, "eval_anthropic_toxic_prompts_runtime": 10.5639, "eval_anthropic_toxic_prompts_samples_per_second": 47.331, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.35706090076941505, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006248248098035702, "eval_anthropic_toxic_prompts_token_set_precision": 0.44598438796709156, "eval_anthropic_toxic_prompts_token_set_recall": 0.322629626383217, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 126250 }, { "epoch": 24.24, "eval_arxiv_accuracy": 0.34590625, "eval_arxiv_bleu_score": 4.370220563565557, "eval_arxiv_bleu_score_sem": 0.12244450527849082, "eval_arxiv_emb_cos_sim": 0.7648048400878906, "eval_arxiv_emb_cos_sim_sem": 0.007851811680698786, "eval_arxiv_emb_top1_equal": 0.21875, "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.40950608253479, "eval_arxiv_n_ngrams_match_1": 15.444, "eval_arxiv_n_ngrams_match_2": 3.028, "eval_arxiv_n_ngrams_match_3": 0.646, "eval_arxiv_num_pred_words": 40.934, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.250299417734496, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36743317306610207, "eval_arxiv_runtime": 10.3059, "eval_arxiv_samples_per_second": 48.516, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3612732390786656, "eval_arxiv_token_set_f1_sem": 0.0041916572078898855, "eval_arxiv_token_set_precision": 0.31239754699987654, "eval_arxiv_token_set_recall": 0.4463468093924475, "eval_arxiv_true_num_tokens": 64.0, "step": 126250 }, { "epoch": 24.24, "eval_python_code_alpaca_accuracy": 0.1619375, "eval_python_code_alpaca_bleu_score": 4.515389946211663, "eval_python_code_alpaca_bleu_score_sem": 0.14363464753390234, "eval_python_code_alpaca_emb_cos_sim": 0.7711158990859985, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007478067257538845, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8851685523986816, "eval_python_code_alpaca_n_ngrams_match_1": 9.818, "eval_python_code_alpaca_n_ngrams_match_2": 2.838, "eval_python_code_alpaca_n_ngrams_match_3": 0.942, "eval_python_code_alpaca_num_pred_words": 44.356, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.90658553911693, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33062087935042067, "eval_python_code_alpaca_runtime": 10.0448, "eval_python_code_alpaca_samples_per_second": 49.777, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4738106631009328, "eval_python_code_alpaca_token_set_f1_sem": 0.005270960318232269, "eval_python_code_alpaca_token_set_precision": 0.5344894127527231, "eval_python_code_alpaca_token_set_recall": 0.4441386550305618, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 126250 }, { "epoch": 24.24, "eval_wikibio_accuracy": 0.31928125, "eval_wikibio_bleu_score": 6.161294640055921, "eval_wikibio_bleu_score_sem": 0.22319173794094715, "eval_wikibio_emb_cos_sim": 0.7410797476768494, "eval_wikibio_emb_cos_sim_sem": 0.00849212174979997, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7522311210632324, "eval_wikibio_n_ngrams_match_1": 10.232, "eval_wikibio_n_ngrams_match_2": 3.546, "eval_wikibio_n_ngrams_match_3": 1.31, "eval_wikibio_num_pred_words": 36.764, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.61605759336966, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3602616735370881, "eval_wikibio_runtime": 10.0047, "eval_wikibio_samples_per_second": 49.976, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.32419194600123336, "eval_wikibio_token_set_f1_sem": 0.005199649131062626, "eval_wikibio_token_set_precision": 0.3324473577542092, "eval_wikibio_token_set_recall": 0.33253550773248336, "eval_wikibio_true_num_tokens": 61.1328125, "step": 126250 }, { "epoch": 24.24, "eval_nq_accuracy": 0.52671875, "eval_nq_bleu_score": 11.72408680291379, "eval_nq_bleu_score_sem": 0.48437731077461954, "eval_nq_emb_cos_sim": 0.8301180005073547, "eval_nq_emb_cos_sim_sem": 0.006678577102690616, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.191635847091675, "eval_nq_n_ngrams_match_1": 23.17, "eval_nq_n_ngrams_match_2": 8.48, "eval_nq_n_ngrams_match_3": 3.878, "eval_nq_num_pred_words": 49.504, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.949841718903572, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4478242322044885, "eval_nq_runtime": 11.5305, "eval_nq_samples_per_second": 43.363, "eval_nq_steps_per_second": 0.087, "eval_nq_token_set_f1": 0.4605346009442573, "eval_nq_token_set_f1_sem": 0.004921763449679784, "eval_nq_token_set_precision": 0.42052989951206965, "eval_nq_token_set_recall": 0.5158344925083073, "eval_nq_true_num_tokens": 64.0, "step": 126250 }, { "epoch": 24.24, "learning_rate": 0.001, "loss": 2.5555, "step": 126252 }, { "epoch": 24.24, "learning_rate": 0.001, "loss": 2.5494, "step": 126264 }, { "epoch": 24.25, "learning_rate": 0.001, "loss": 2.5587, "step": 126276 }, { "epoch": 24.25, "learning_rate": 0.001, "loss": 2.563, "step": 126288 }, { "epoch": 24.25, "learning_rate": 0.001, "loss": 2.5574, "step": 126300 }, { "epoch": 24.25, "learning_rate": 0.001, "loss": 2.5598, "step": 126312 }, { "epoch": 24.26, "learning_rate": 0.001, "loss": 2.557, "step": 126324 }, { "epoch": 24.26, "learning_rate": 0.001, "loss": 2.5709, "step": 126336 }, { "epoch": 24.26, "learning_rate": 0.001, "loss": 2.5587, "step": 126348 }, { "epoch": 24.26, "learning_rate": 0.001, "loss": 2.5574, "step": 126360 }, { "epoch": 24.26, "learning_rate": 0.001, "loss": 2.5513, "step": 126372 }, { "epoch": 24.27, "learning_rate": 0.001, "loss": 2.5608, "step": 126384 }, { "epoch": 24.27, "learning_rate": 0.001, "loss": 2.5503, "step": 126396 }, { "epoch": 24.27, "learning_rate": 0.001, "loss": 2.5539, "step": 126408 }, { "epoch": 24.27, "learning_rate": 0.001, "loss": 2.5437, "step": 126420 }, { "epoch": 24.28, "learning_rate": 0.001, "loss": 2.5554, "step": 126432 }, { "epoch": 24.28, "learning_rate": 0.001, "loss": 2.5532, "step": 126444 }, { "epoch": 24.28, "learning_rate": 0.001, "loss": 2.5624, "step": 126456 }, { "epoch": 24.28, "learning_rate": 0.001, "loss": 2.5596, "step": 126468 }, { "epoch": 24.29, "learning_rate": 0.001, "loss": 2.5547, "step": 126480 }, { "epoch": 24.29, "learning_rate": 0.001, "loss": 2.5608, "step": 126492 }, { "epoch": 24.29, "learning_rate": 0.001, "loss": 2.5542, "step": 126504 }, { "epoch": 24.29, "learning_rate": 0.001, "loss": 2.567, "step": 126516 }, { "epoch": 24.29, "learning_rate": 0.001, "loss": 2.5639, "step": 126528 }, { "epoch": 24.3, "learning_rate": 0.001, "loss": 2.567, "step": 126540 }, { "epoch": 24.3, "learning_rate": 0.001, "loss": 2.5605, "step": 126552 }, { "epoch": 24.3, "learning_rate": 0.001, "loss": 2.557, "step": 126564 }, { "epoch": 24.3, "learning_rate": 0.001, "loss": 2.5639, "step": 126576 }, { "epoch": 24.31, "learning_rate": 0.001, "loss": 2.5517, "step": 126588 }, { "epoch": 24.31, "learning_rate": 0.001, "loss": 2.5523, "step": 126600 }, { "epoch": 24.31, "learning_rate": 0.001, "loss": 2.5569, "step": 126612 }, { "epoch": 24.31, "learning_rate": 0.001, "loss": 2.5504, "step": 126624 }, { "epoch": 24.32, "learning_rate": 0.001, "loss": 2.5577, "step": 126636 }, { "epoch": 24.32, "learning_rate": 0.001, "loss": 2.549, "step": 126648 }, { "epoch": 24.32, "learning_rate": 0.001, "loss": 2.5628, "step": 126660 }, { "epoch": 24.32, "learning_rate": 0.001, "loss": 2.553, "step": 126672 }, { "epoch": 24.32, "learning_rate": 0.001, "loss": 2.5443, "step": 126684 }, { "epoch": 24.33, "learning_rate": 0.001, "loss": 2.5611, "step": 126696 }, { "epoch": 24.33, "learning_rate": 0.001, "loss": 2.5607, "step": 126708 }, { "epoch": 24.33, "learning_rate": 0.001, "loss": 2.5643, "step": 126720 }, { "epoch": 24.33, "learning_rate": 0.001, "loss": 2.5524, "step": 126732 }, { "epoch": 24.34, "learning_rate": 0.001, "loss": 2.5363, "step": 126744 }, { "epoch": 24.34, "learning_rate": 0.001, "loss": 2.5578, "step": 126756 }, { "epoch": 24.34, "learning_rate": 0.001, "loss": 2.5579, "step": 126768 }, { "epoch": 24.34, "learning_rate": 0.001, "loss": 2.5561, "step": 126780 }, { "epoch": 24.35, "learning_rate": 0.001, "loss": 2.561, "step": 126792 }, { "epoch": 24.35, "learning_rate": 0.001, "loss": 2.5596, "step": 126804 }, { "epoch": 24.35, "learning_rate": 0.001, "loss": 2.5511, "step": 126816 }, { "epoch": 24.35, "learning_rate": 0.001, "loss": 2.5632, "step": 126828 }, { "epoch": 24.35, "learning_rate": 0.001, "loss": 2.5554, "step": 126840 }, { "epoch": 24.36, "learning_rate": 0.001, "loss": 2.5511, "step": 126852 }, { "epoch": 24.36, "learning_rate": 0.001, "loss": 2.5623, "step": 126864 }, { "epoch": 24.36, "eval_ag_news_accuracy": 0.322625, "eval_ag_news_bleu_score": 4.8517890698492945, "eval_ag_news_bleu_score_sem": 0.1522933907373371, "eval_ag_news_emb_cos_sim": 0.8152967095375061, "eval_ag_news_emb_cos_sim_sem": 0.0068136669717911436, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5554556846618652, "eval_ag_news_n_ngrams_match_1": 14.124, "eval_ag_news_n_ngrams_match_2": 3.106, "eval_ag_news_n_ngrams_match_3": 0.896, "eval_ag_news_num_pred_words": 46.47, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.003767013741154, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35211771820722176, "eval_ag_news_runtime": 10.42, "eval_ag_news_samples_per_second": 47.985, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35229496268430777, "eval_ag_news_token_set_f1_sem": 0.004466539590259143, "eval_ag_news_token_set_precision": 0.3383977395442717, "eval_ag_news_token_set_recall": 0.3810278056279813, "eval_ag_news_true_num_tokens": 56.09375, "step": 126875 }, { "epoch": 24.36, "eval_anthropic_toxic_prompts_accuracy": 0.1158125, "eval_anthropic_toxic_prompts_bleu_score": 3.149471515981467, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1190929971631158, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6771558523178101, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008211656863473242, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02147948148198014, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.233881950378418, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.258, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, "eval_anthropic_toxic_prompts_num_pred_words": 47.226, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.37798206800531, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21729543515758332, "eval_anthropic_toxic_prompts_runtime": 10.1479, "eval_anthropic_toxic_prompts_samples_per_second": 49.271, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.35704811484097193, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006284680811508397, "eval_anthropic_toxic_prompts_token_set_precision": 0.4458421497684926, "eval_anthropic_toxic_prompts_token_set_recall": 0.3231182740517771, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 126875 }, { "epoch": 24.36, "eval_arxiv_accuracy": 0.346875, "eval_arxiv_bleu_score": 4.3466297534459635, "eval_arxiv_bleu_score_sem": 0.12563718015316847, "eval_arxiv_emb_cos_sim": 0.7638865113258362, "eval_arxiv_emb_cos_sim_sem": 0.007166739682845506, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4086461067199707, "eval_arxiv_n_ngrams_match_1": 15.086, "eval_arxiv_n_ngrams_match_2": 2.978, "eval_arxiv_n_ngrams_match_3": 0.642, "eval_arxiv_num_pred_words": 40.306, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.224296074569896, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36123106928975923, "eval_arxiv_runtime": 10.4356, "eval_arxiv_samples_per_second": 47.913, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.35605520173792715, "eval_arxiv_token_set_f1_sem": 0.004142081506871111, "eval_arxiv_token_set_precision": 0.3057803339486993, "eval_arxiv_token_set_recall": 0.44311153278851845, "eval_arxiv_true_num_tokens": 64.0, "step": 126875 }, { "epoch": 24.36, "eval_python_code_alpaca_accuracy": 0.160625, "eval_python_code_alpaca_bleu_score": 4.30087033516261, "eval_python_code_alpaca_bleu_score_sem": 0.13042133780117723, "eval_python_code_alpaca_emb_cos_sim": 0.7505527138710022, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00803445307308722, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8764841556549072, "eval_python_code_alpaca_n_ngrams_match_1": 9.74, "eval_python_code_alpaca_n_ngrams_match_2": 2.774, "eval_python_code_alpaca_n_ngrams_match_3": 0.866, "eval_python_code_alpaca_num_pred_words": 44.03, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.751750941624103, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3260038877090582, "eval_python_code_alpaca_runtime": 10.7463, "eval_python_code_alpaca_samples_per_second": 46.528, "eval_python_code_alpaca_steps_per_second": 0.093, "eval_python_code_alpaca_token_set_f1": 0.47522250597941507, "eval_python_code_alpaca_token_set_f1_sem": 0.005458049414288956, "eval_python_code_alpaca_token_set_precision": 0.5245003194265568, "eval_python_code_alpaca_token_set_recall": 0.46036931571758516, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 126875 }, { "epoch": 24.36, "eval_wikibio_accuracy": 0.3223125, "eval_wikibio_bleu_score": 5.974463342667476, "eval_wikibio_bleu_score_sem": 0.2150361480818903, "eval_wikibio_emb_cos_sim": 0.7507862448692322, "eval_wikibio_emb_cos_sim_sem": 0.008929076567558633, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7396738529205322, "eval_wikibio_n_ngrams_match_1": 9.938, "eval_wikibio_n_ngrams_match_2": 3.368, "eval_wikibio_n_ngrams_match_3": 1.23, "eval_wikibio_num_pred_words": 35.982, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.08426226723169, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3550537204340967, "eval_wikibio_runtime": 10.2427, "eval_wikibio_samples_per_second": 48.815, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.3207254466103453, "eval_wikibio_token_set_f1_sem": 0.0054076916010678076, "eval_wikibio_token_set_precision": 0.32816329139207967, "eval_wikibio_token_set_recall": 0.32734481181794106, "eval_wikibio_true_num_tokens": 61.1328125, "step": 126875 }, { "epoch": 24.36, "eval_nq_accuracy": 0.52746875, "eval_nq_bleu_score": 11.540913833744037, "eval_nq_bleu_score_sem": 0.45454573049351493, "eval_nq_emb_cos_sim": 0.8343319892883301, "eval_nq_emb_cos_sim_sem": 0.006692799029885011, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1918694972991943, "eval_nq_n_ngrams_match_1": 23.134, "eval_nq_n_ngrams_match_2": 8.408, "eval_nq_n_ngrams_match_3": 3.828, "eval_nq_num_pred_words": 48.936, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.951933095594246, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4484575503789313, "eval_nq_runtime": 10.5333, "eval_nq_samples_per_second": 47.469, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4613356012270699, "eval_nq_token_set_f1_sem": 0.004893589728379912, "eval_nq_token_set_precision": 0.4203849298189492, "eval_nq_token_set_recall": 0.5178627023143768, "eval_nq_true_num_tokens": 64.0, "step": 126875 }, { "epoch": 24.36, "learning_rate": 0.001, "loss": 2.5562, "step": 126876 }, { "epoch": 24.36, "learning_rate": 0.001, "loss": 2.5638, "step": 126888 }, { "epoch": 24.37, "learning_rate": 0.001, "loss": 2.5577, "step": 126900 }, { "epoch": 24.37, "learning_rate": 0.001, "loss": 2.5579, "step": 126912 }, { "epoch": 24.37, "learning_rate": 0.001, "loss": 2.5597, "step": 126924 }, { "epoch": 24.37, "learning_rate": 0.001, "loss": 2.5638, "step": 126936 }, { "epoch": 24.38, "learning_rate": 0.001, "loss": 2.5625, "step": 126948 }, { "epoch": 24.38, "learning_rate": 0.001, "loss": 2.564, "step": 126960 }, { "epoch": 24.38, "learning_rate": 0.001, "loss": 2.5637, "step": 126972 }, { "epoch": 24.38, "learning_rate": 0.001, "loss": 2.5598, "step": 126984 }, { "epoch": 24.38, "learning_rate": 0.001, "loss": 2.5647, "step": 126996 }, { "epoch": 24.39, "learning_rate": 0.001, "loss": 2.5537, "step": 127008 }, { "epoch": 24.39, "learning_rate": 0.001, "loss": 2.5536, "step": 127020 }, { "epoch": 24.39, "learning_rate": 0.001, "loss": 2.5602, "step": 127032 }, { "epoch": 24.39, "learning_rate": 0.001, "loss": 2.5693, "step": 127044 }, { "epoch": 24.4, "learning_rate": 0.001, "loss": 2.5591, "step": 127056 }, { "epoch": 24.4, "learning_rate": 0.001, "loss": 2.5621, "step": 127068 }, { "epoch": 24.4, "learning_rate": 0.001, "loss": 2.5521, "step": 127080 }, { "epoch": 24.4, "learning_rate": 0.001, "loss": 2.5578, "step": 127092 }, { "epoch": 24.41, "learning_rate": 0.001, "loss": 2.5605, "step": 127104 }, { "epoch": 24.41, "learning_rate": 0.001, "loss": 2.5435, "step": 127116 }, { "epoch": 24.41, "learning_rate": 0.001, "loss": 2.5597, "step": 127128 }, { "epoch": 24.41, "learning_rate": 0.001, "loss": 2.5629, "step": 127140 }, { "epoch": 24.41, "learning_rate": 0.001, "loss": 2.5765, "step": 127152 }, { "epoch": 24.42, "learning_rate": 0.001, "loss": 2.5616, "step": 127164 }, { "epoch": 24.42, "learning_rate": 0.001, "loss": 2.5583, "step": 127176 }, { "epoch": 24.42, "learning_rate": 0.001, "loss": 2.5643, "step": 127188 }, { "epoch": 24.42, "learning_rate": 0.001, "loss": 2.5734, "step": 127200 }, { "epoch": 24.43, "learning_rate": 0.001, "loss": 2.555, "step": 127212 }, { "epoch": 24.43, "learning_rate": 0.001, "loss": 2.5562, "step": 127224 }, { "epoch": 24.43, "learning_rate": 0.001, "loss": 2.569, "step": 127236 }, { "epoch": 24.43, "learning_rate": 0.001, "loss": 2.5769, "step": 127248 }, { "epoch": 24.44, "learning_rate": 0.001, "loss": 2.5634, "step": 127260 }, { "epoch": 24.44, "learning_rate": 0.001, "loss": 2.5638, "step": 127272 }, { "epoch": 24.44, "learning_rate": 0.001, "loss": 2.5647, "step": 127284 }, { "epoch": 24.44, "learning_rate": 0.001, "loss": 2.5585, "step": 127296 }, { "epoch": 24.44, "learning_rate": 0.001, "loss": 2.5491, "step": 127308 }, { "epoch": 24.45, "learning_rate": 0.001, "loss": 2.5604, "step": 127320 }, { "epoch": 24.45, "learning_rate": 0.001, "loss": 2.5643, "step": 127332 }, { "epoch": 24.45, "learning_rate": 0.001, "loss": 2.572, "step": 127344 }, { "epoch": 24.45, "learning_rate": 0.001, "loss": 2.5779, "step": 127356 }, { "epoch": 24.46, "learning_rate": 0.001, "loss": 2.556, "step": 127368 }, { "epoch": 24.46, "learning_rate": 0.001, "loss": 2.5708, "step": 127380 }, { "epoch": 24.46, "learning_rate": 0.001, "loss": 2.5754, "step": 127392 }, { "epoch": 24.46, "learning_rate": 0.001, "loss": 2.5524, "step": 127404 }, { "epoch": 24.47, "learning_rate": 0.001, "loss": 2.5581, "step": 127416 }, { "epoch": 24.47, "learning_rate": 0.001, "loss": 2.5617, "step": 127428 }, { "epoch": 24.47, "learning_rate": 0.001, "loss": 2.5626, "step": 127440 }, { "epoch": 24.47, "learning_rate": 0.001, "loss": 2.5585, "step": 127452 }, { "epoch": 24.47, "learning_rate": 0.001, "loss": 2.5667, "step": 127464 }, { "epoch": 24.48, "learning_rate": 0.001, "loss": 2.5585, "step": 127476 }, { "epoch": 24.48, "learning_rate": 0.001, "loss": 2.5702, "step": 127488 }, { "epoch": 24.48, "learning_rate": 0.001, "loss": 2.5611, "step": 127500 }, { "epoch": 24.48, "eval_ag_news_accuracy": 0.32275, "eval_ag_news_bleu_score": 4.639986234548955, "eval_ag_news_bleu_score_sem": 0.15093575212638385, "eval_ag_news_emb_cos_sim": 0.8081047534942627, "eval_ag_news_emb_cos_sim_sem": 0.007535462045587571, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.562582492828369, "eval_ag_news_n_ngrams_match_1": 13.852, "eval_ag_news_n_ngrams_match_2": 3.042, "eval_ag_news_n_ngrams_match_3": 0.848, "eval_ag_news_num_pred_words": 46.446, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.25412320697353, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3421019167867688, "eval_ag_news_runtime": 10.3419, "eval_ag_news_samples_per_second": 48.347, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3455803030196758, "eval_ag_news_token_set_f1_sem": 0.00467891002648212, "eval_ag_news_token_set_precision": 0.33008748740328303, "eval_ag_news_token_set_recall": 0.37960448336621844, "eval_ag_news_true_num_tokens": 56.09375, "step": 127500 }, { "epoch": 24.48, "eval_anthropic_toxic_prompts_accuracy": 0.114125, "eval_anthropic_toxic_prompts_bleu_score": 3.1050762676535277, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11241941847580554, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6736693382263184, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008920013678039775, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2447471618652344, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.266, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.874, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714, "eval_anthropic_toxic_prompts_num_pred_words": 47.3, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.655222621492257, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9921875, "eval_anthropic_toxic_prompts_rouge_score": 0.21439864626368288, "eval_anthropic_toxic_prompts_runtime": 9.9393, "eval_anthropic_toxic_prompts_samples_per_second": 50.305, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3553538130328317, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065486978584849975, "eval_anthropic_toxic_prompts_token_set_precision": 0.4437251640051009, "eval_anthropic_toxic_prompts_token_set_recall": 0.3215271663954234, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 127500 }, { "epoch": 24.48, "eval_arxiv_accuracy": 0.34778125, "eval_arxiv_bleu_score": 4.448812431247174, "eval_arxiv_bleu_score_sem": 0.12925630353778725, "eval_arxiv_emb_cos_sim": 0.7683203220367432, "eval_arxiv_emb_cos_sim_sem": 0.007284435415856686, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.406280755996704, "eval_arxiv_n_ngrams_match_1": 15.096, "eval_arxiv_n_ngrams_match_2": 2.99, "eval_arxiv_n_ngrams_match_3": 0.71, "eval_arxiv_num_pred_words": 40.876, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.152889498080985, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35627068013546237, "eval_arxiv_runtime": 10.0932, "eval_arxiv_samples_per_second": 49.538, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3528760660577213, "eval_arxiv_token_set_f1_sem": 0.004272846584088692, "eval_arxiv_token_set_precision": 0.30536737720033885, "eval_arxiv_token_set_recall": 0.43913473403758196, "eval_arxiv_true_num_tokens": 64.0, "step": 127500 }, { "epoch": 24.48, "eval_python_code_alpaca_accuracy": 0.1595, "eval_python_code_alpaca_bleu_score": 4.365213762966829, "eval_python_code_alpaca_bleu_score_sem": 0.13607054543843916, "eval_python_code_alpaca_emb_cos_sim": 0.7403180003166199, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010975971851260109, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8966422080993652, "eval_python_code_alpaca_n_ngrams_match_1": 9.822, "eval_python_code_alpaca_n_ngrams_match_2": 2.798, "eval_python_code_alpaca_n_ngrams_match_3": 0.896, "eval_python_code_alpaca_num_pred_words": 43.694, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.113222711365232, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32386381280394927, "eval_python_code_alpaca_runtime": 9.9343, "eval_python_code_alpaca_samples_per_second": 50.33, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.46753360571179126, "eval_python_code_alpaca_token_set_f1_sem": 0.005739783072673743, "eval_python_code_alpaca_token_set_precision": 0.5370473728284986, "eval_python_code_alpaca_token_set_recall": 0.4377501185475591, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 127500 }, { "epoch": 24.48, "eval_wikibio_accuracy": 0.32396875, "eval_wikibio_bleu_score": 6.1000228062214825, "eval_wikibio_bleu_score_sem": 0.23407866633719634, "eval_wikibio_emb_cos_sim": 0.7507628798484802, "eval_wikibio_emb_cos_sim_sem": 0.00895993069484664, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7186765670776367, "eval_wikibio_n_ngrams_match_1": 10.184, "eval_wikibio_n_ngrams_match_2": 3.444, "eval_wikibio_n_ngrams_match_3": 1.314, "eval_wikibio_num_pred_words": 37.072, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.209819571760974, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.356131583206422, "eval_wikibio_runtime": 10.9648, "eval_wikibio_samples_per_second": 45.6, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.3199903781561186, "eval_wikibio_token_set_f1_sem": 0.005533735553196984, "eval_wikibio_token_set_precision": 0.33100406067427135, "eval_wikibio_token_set_recall": 0.32680066525775153, "eval_wikibio_true_num_tokens": 61.1328125, "step": 127500 }, { "epoch": 24.48, "eval_nq_accuracy": 0.52690625, "eval_nq_bleu_score": 11.758171062058048, "eval_nq_bleu_score_sem": 0.4951452956106227, "eval_nq_emb_cos_sim": 0.8322650194168091, "eval_nq_emb_cos_sim_sem": 0.006576643632478003, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1930091381073, "eval_nq_n_ngrams_match_1": 23.302, "eval_nq_n_ngrams_match_2": 8.636, "eval_nq_n_ngrams_match_3": 3.942, "eval_nq_num_pred_words": 49.612, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.962140899371478, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4474533939548328, "eval_nq_runtime": 10.3442, "eval_nq_samples_per_second": 48.336, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.46476623759754593, "eval_nq_token_set_f1_sem": 0.004884904240429779, "eval_nq_token_set_precision": 0.4220830535152562, "eval_nq_token_set_recall": 0.5242566794244469, "eval_nq_true_num_tokens": 64.0, "step": 127500 }, { "epoch": 24.48, "learning_rate": 0.001, "loss": 2.5632, "step": 127512 }, { "epoch": 24.49, "learning_rate": 0.001, "loss": 2.56, "step": 127524 }, { "epoch": 24.49, "learning_rate": 0.001, "loss": 2.5641, "step": 127536 }, { "epoch": 24.49, "learning_rate": 0.001, "loss": 2.5613, "step": 127548 }, { "epoch": 24.49, "learning_rate": 0.001, "loss": 2.5545, "step": 127560 }, { "epoch": 24.5, "learning_rate": 0.001, "loss": 2.5666, "step": 127572 }, { "epoch": 24.5, "learning_rate": 0.001, "loss": 2.5643, "step": 127584 }, { "epoch": 24.5, "learning_rate": 0.001, "loss": 2.5573, "step": 127596 }, { "epoch": 24.5, "learning_rate": 0.001, "loss": 2.557, "step": 127608 }, { "epoch": 24.5, "learning_rate": 0.001, "loss": 2.5619, "step": 127620 }, { "epoch": 24.51, "learning_rate": 0.001, "loss": 2.5564, "step": 127632 }, { "epoch": 24.51, "learning_rate": 0.001, "loss": 2.5662, "step": 127644 }, { "epoch": 24.51, "learning_rate": 0.001, "loss": 2.5552, "step": 127656 }, { "epoch": 24.51, "learning_rate": 0.001, "loss": 2.5669, "step": 127668 }, { "epoch": 24.52, "learning_rate": 0.001, "loss": 2.5679, "step": 127680 }, { "epoch": 24.52, "learning_rate": 0.001, "loss": 2.5652, "step": 127692 }, { "epoch": 24.52, "learning_rate": 0.001, "loss": 2.5624, "step": 127704 }, { "epoch": 24.52, "learning_rate": 0.001, "loss": 2.5621, "step": 127716 }, { "epoch": 24.53, "learning_rate": 0.001, "loss": 2.5698, "step": 127728 }, { "epoch": 24.53, "learning_rate": 0.001, "loss": 2.5542, "step": 127740 }, { "epoch": 24.53, "learning_rate": 0.001, "loss": 2.5687, "step": 127752 }, { "epoch": 24.53, "learning_rate": 0.001, "loss": 2.5659, "step": 127764 }, { "epoch": 24.53, "learning_rate": 0.001, "loss": 2.5643, "step": 127776 }, { "epoch": 24.54, "learning_rate": 0.001, "loss": 2.5698, "step": 127788 }, { "epoch": 24.54, "learning_rate": 0.001, "loss": 2.5541, "step": 127800 }, { "epoch": 24.54, "learning_rate": 0.001, "loss": 2.5662, "step": 127812 }, { "epoch": 24.54, "learning_rate": 0.001, "loss": 2.5659, "step": 127824 }, { "epoch": 24.55, "learning_rate": 0.001, "loss": 2.5532, "step": 127836 }, { "epoch": 24.55, "learning_rate": 0.001, "loss": 2.5513, "step": 127848 }, { "epoch": 24.55, "learning_rate": 0.001, "loss": 2.5729, "step": 127860 }, { "epoch": 24.55, "learning_rate": 0.001, "loss": 2.5559, "step": 127872 }, { "epoch": 24.56, "learning_rate": 0.001, "loss": 2.5557, "step": 127884 }, { "epoch": 24.56, "learning_rate": 0.001, "loss": 2.5519, "step": 127896 }, { "epoch": 24.56, "learning_rate": 0.001, "loss": 2.5519, "step": 127908 }, { "epoch": 24.56, "learning_rate": 0.001, "loss": 2.566, "step": 127920 }, { "epoch": 24.56, "learning_rate": 0.001, "loss": 2.5733, "step": 127932 }, { "epoch": 24.57, "learning_rate": 0.001, "loss": 2.5615, "step": 127944 }, { "epoch": 24.57, "learning_rate": 0.001, "loss": 2.5655, "step": 127956 }, { "epoch": 24.57, "learning_rate": 0.001, "loss": 2.565, "step": 127968 }, { "epoch": 24.57, "learning_rate": 0.001, "loss": 2.5528, "step": 127980 }, { "epoch": 24.58, "learning_rate": 0.001, "loss": 2.5627, "step": 127992 }, { "epoch": 24.58, "learning_rate": 0.001, "loss": 2.574, "step": 128004 }, { "epoch": 24.58, "learning_rate": 0.001, "loss": 2.5644, "step": 128016 }, { "epoch": 24.58, "learning_rate": 0.001, "loss": 2.5635, "step": 128028 }, { "epoch": 24.59, "learning_rate": 0.001, "loss": 2.569, "step": 128040 }, { "epoch": 24.59, "learning_rate": 0.001, "loss": 2.5648, "step": 128052 }, { "epoch": 24.59, "learning_rate": 0.001, "loss": 2.56, "step": 128064 }, { "epoch": 24.59, "learning_rate": 0.001, "loss": 2.5599, "step": 128076 }, { "epoch": 24.59, "learning_rate": 0.001, "loss": 2.5594, "step": 128088 }, { "epoch": 24.6, "learning_rate": 0.001, "loss": 2.5635, "step": 128100 }, { "epoch": 24.6, "learning_rate": 0.001, "loss": 2.5661, "step": 128112 }, { "epoch": 24.6, "learning_rate": 0.001, "loss": 2.5576, "step": 128124 }, { "epoch": 24.6, "eval_ag_news_accuracy": 0.3214375, "eval_ag_news_bleu_score": 4.750501466268349, "eval_ag_news_bleu_score_sem": 0.15019038992967793, "eval_ag_news_emb_cos_sim": 0.8111311793327332, "eval_ag_news_emb_cos_sim_sem": 0.006907176340657641, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5570576190948486, "eval_ag_news_n_ngrams_match_1": 14.06, "eval_ag_news_n_ngrams_match_2": 3.07, "eval_ag_news_n_ngrams_match_3": 0.872, "eval_ag_news_num_pred_words": 46.858, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.05988569062406, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34809493488834936, "eval_ag_news_runtime": 10.653, "eval_ag_news_samples_per_second": 46.935, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.3494369119117738, "eval_ag_news_token_set_f1_sem": 0.004413964753067884, "eval_ag_news_token_set_precision": 0.33502623632371903, "eval_ag_news_token_set_recall": 0.3817980944693631, "eval_ag_news_true_num_tokens": 56.09375, "step": 128125 }, { "epoch": 24.6, "eval_anthropic_toxic_prompts_accuracy": 0.1135625, "eval_anthropic_toxic_prompts_bleu_score": 2.9947039622433658, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10692995469104294, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.674341082572937, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0093943527504276, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2263243198394775, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.226, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.808, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.63, "eval_anthropic_toxic_prompts_num_pred_words": 46.472, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.18690760030586, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2144519553837049, "eval_anthropic_toxic_prompts_runtime": 9.8168, "eval_anthropic_toxic_prompts_samples_per_second": 50.933, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.35419134220481474, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006446299903105918, "eval_anthropic_toxic_prompts_token_set_precision": 0.4402190910751929, "eval_anthropic_toxic_prompts_token_set_recall": 0.3204095056859719, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 128125 }, { "epoch": 24.6, "eval_arxiv_accuracy": 0.34721875, "eval_arxiv_bleu_score": 4.285557501452188, "eval_arxiv_bleu_score_sem": 0.12125013896119925, "eval_arxiv_emb_cos_sim": 0.7653764486312866, "eval_arxiv_emb_cos_sim_sem": 0.007201370562886794, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3980510234832764, "eval_arxiv_n_ngrams_match_1": 15.098, "eval_arxiv_n_ngrams_match_2": 2.94, "eval_arxiv_n_ngrams_match_3": 0.654, "eval_arxiv_num_pred_words": 40.84, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.90575759256494, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3596928326263683, "eval_arxiv_runtime": 10.3729, "eval_arxiv_samples_per_second": 48.202, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.35664676180739174, "eval_arxiv_token_set_f1_sem": 0.004113132817146726, "eval_arxiv_token_set_precision": 0.3063978328796045, "eval_arxiv_token_set_recall": 0.4440181599383096, "eval_arxiv_true_num_tokens": 64.0, "step": 128125 }, { "epoch": 24.6, "eval_python_code_alpaca_accuracy": 0.15984375, "eval_python_code_alpaca_bleu_score": 4.527798796290013, "eval_python_code_alpaca_bleu_score_sem": 0.13816159101012032, "eval_python_code_alpaca_emb_cos_sim": 0.7694908380508423, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0074370432274140615, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8988540172576904, "eval_python_code_alpaca_n_ngrams_match_1": 9.874, "eval_python_code_alpaca_n_ngrams_match_2": 2.842, "eval_python_code_alpaca_n_ngrams_match_3": 0.916, "eval_python_code_alpaca_num_pred_words": 43.818, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.15333004177462, "eval_python_code_alpaca_pred_num_tokens": 62.796875, "eval_python_code_alpaca_rouge_score": 0.33195087861643935, "eval_python_code_alpaca_runtime": 10.3324, "eval_python_code_alpaca_samples_per_second": 48.391, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.47935385745143244, "eval_python_code_alpaca_token_set_f1_sem": 0.005417704378028998, "eval_python_code_alpaca_token_set_precision": 0.5396073459899511, "eval_python_code_alpaca_token_set_recall": 0.4510487647260865, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 128125 }, { "epoch": 24.6, "eval_wikibio_accuracy": 0.324375, "eval_wikibio_bleu_score": 5.886994198339176, "eval_wikibio_bleu_score_sem": 0.22194314839117812, "eval_wikibio_emb_cos_sim": 0.7401241064071655, "eval_wikibio_emb_cos_sim_sem": 0.008463849775527764, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.706007719039917, "eval_wikibio_n_ngrams_match_1": 10.028, "eval_wikibio_n_ngrams_match_2": 3.322, "eval_wikibio_n_ngrams_match_3": 1.196, "eval_wikibio_num_pred_words": 35.996, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.69103179048241, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.351261898116098, "eval_wikibio_runtime": 10.0533, "eval_wikibio_samples_per_second": 49.735, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3166181934378616, "eval_wikibio_token_set_f1_sem": 0.005451295581210154, "eval_wikibio_token_set_precision": 0.3263197820828759, "eval_wikibio_token_set_recall": 0.3229714542426315, "eval_wikibio_true_num_tokens": 61.1328125, "step": 128125 }, { "epoch": 24.6, "eval_nq_accuracy": 0.5265, "eval_nq_bleu_score": 11.506156015401555, "eval_nq_bleu_score_sem": 0.46702609026551933, "eval_nq_emb_cos_sim": 0.8306975960731506, "eval_nq_emb_cos_sim_sem": 0.007131957337567383, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1885643005371094, "eval_nq_n_ngrams_match_1": 22.91, "eval_nq_n_ngrams_match_2": 8.414, "eval_nq_n_ngrams_match_3": 3.848, "eval_nq_num_pred_words": 49.1, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.922394038401565, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44169443578995443, "eval_nq_runtime": 10.5133, "eval_nq_samples_per_second": 47.559, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4594846132417338, "eval_nq_token_set_f1_sem": 0.004880251654589268, "eval_nq_token_set_precision": 0.41670072674744696, "eval_nq_token_set_recall": 0.5202501300338103, "eval_nq_true_num_tokens": 64.0, "step": 128125 }, { "epoch": 24.6, "learning_rate": 0.001, "loss": 2.5607, "step": 128136 }, { "epoch": 24.61, "learning_rate": 0.001, "loss": 2.5523, "step": 128148 }, { "epoch": 24.61, "learning_rate": 0.001, "loss": 2.5671, "step": 128160 }, { "epoch": 24.61, "learning_rate": 0.001, "loss": 2.5498, "step": 128172 }, { "epoch": 24.61, "learning_rate": 0.001, "loss": 2.5572, "step": 128184 }, { "epoch": 24.62, "learning_rate": 0.001, "loss": 2.569, "step": 128196 }, { "epoch": 24.62, "learning_rate": 0.001, "loss": 2.5602, "step": 128208 }, { "epoch": 24.62, "learning_rate": 0.001, "loss": 2.5538, "step": 128220 }, { "epoch": 24.62, "learning_rate": 0.001, "loss": 2.5505, "step": 128232 }, { "epoch": 24.62, "learning_rate": 0.001, "loss": 2.568, "step": 128244 }, { "epoch": 24.63, "learning_rate": 0.001, "loss": 2.5673, "step": 128256 }, { "epoch": 24.63, "learning_rate": 0.001, "loss": 2.5617, "step": 128268 }, { "epoch": 24.63, "learning_rate": 0.001, "loss": 2.5574, "step": 128280 }, { "epoch": 24.63, "learning_rate": 0.001, "loss": 2.5679, "step": 128292 }, { "epoch": 24.64, "learning_rate": 0.001, "loss": 2.5544, "step": 128304 }, { "epoch": 24.64, "learning_rate": 0.001, "loss": 2.5668, "step": 128316 }, { "epoch": 24.64, "learning_rate": 0.001, "loss": 2.5593, "step": 128328 }, { "epoch": 24.64, "learning_rate": 0.001, "loss": 2.5677, "step": 128340 }, { "epoch": 24.65, "learning_rate": 0.001, "loss": 2.5686, "step": 128352 }, { "epoch": 24.65, "learning_rate": 0.001, "loss": 2.5564, "step": 128364 }, { "epoch": 24.65, "learning_rate": 0.001, "loss": 2.558, "step": 128376 }, { "epoch": 24.65, "learning_rate": 0.001, "loss": 2.5616, "step": 128388 }, { "epoch": 24.65, "learning_rate": 0.001, "loss": 2.5562, "step": 128400 }, { "epoch": 24.66, "learning_rate": 0.001, "loss": 2.5519, "step": 128412 }, { "epoch": 24.66, "learning_rate": 0.001, "loss": 2.5656, "step": 128424 }, { "epoch": 24.66, "learning_rate": 0.001, "loss": 2.5659, "step": 128436 }, { "epoch": 24.66, "learning_rate": 0.001, "loss": 2.5625, "step": 128448 }, { "epoch": 24.67, "learning_rate": 0.001, "loss": 2.5607, "step": 128460 }, { "epoch": 24.67, "learning_rate": 0.001, "loss": 2.5608, "step": 128472 }, { "epoch": 24.67, "learning_rate": 0.001, "loss": 2.5542, "step": 128484 }, { "epoch": 24.67, "learning_rate": 0.001, "loss": 2.5677, "step": 128496 }, { "epoch": 24.68, "learning_rate": 0.001, "loss": 2.5654, "step": 128508 }, { "epoch": 24.68, "learning_rate": 0.001, "loss": 2.5713, "step": 128520 }, { "epoch": 24.68, "learning_rate": 0.001, "loss": 2.551, "step": 128532 }, { "epoch": 24.68, "learning_rate": 0.001, "loss": 2.5571, "step": 128544 }, { "epoch": 24.68, "learning_rate": 0.001, "loss": 2.562, "step": 128556 }, { "epoch": 24.69, "learning_rate": 0.001, "loss": 2.5591, "step": 128568 }, { "epoch": 24.69, "learning_rate": 0.001, "loss": 2.5585, "step": 128580 }, { "epoch": 24.69, "learning_rate": 0.001, "loss": 2.5585, "step": 128592 }, { "epoch": 24.69, "learning_rate": 0.001, "loss": 2.5445, "step": 128604 }, { "epoch": 24.7, "learning_rate": 0.001, "loss": 2.5632, "step": 128616 }, { "epoch": 24.7, "learning_rate": 0.001, "loss": 2.5553, "step": 128628 }, { "epoch": 24.7, "learning_rate": 0.001, "loss": 2.5568, "step": 128640 }, { "epoch": 24.7, "learning_rate": 0.001, "loss": 2.5587, "step": 128652 }, { "epoch": 24.71, "learning_rate": 0.001, "loss": 2.5671, "step": 128664 }, { "epoch": 24.71, "learning_rate": 0.001, "loss": 2.5626, "step": 128676 }, { "epoch": 24.71, "learning_rate": 0.001, "loss": 2.5567, "step": 128688 }, { "epoch": 24.71, "learning_rate": 0.001, "loss": 2.5482, "step": 128700 }, { "epoch": 24.71, "learning_rate": 0.001, "loss": 2.56, "step": 128712 }, { "epoch": 24.72, "learning_rate": 0.001, "loss": 2.5585, "step": 128724 }, { "epoch": 24.72, "learning_rate": 0.001, "loss": 2.5686, "step": 128736 }, { "epoch": 24.72, "learning_rate": 0.001, "loss": 2.5624, "step": 128748 }, { "epoch": 24.72, "eval_ag_news_accuracy": 0.3225, "eval_ag_news_bleu_score": 4.8594125617269714, "eval_ag_news_bleu_score_sem": 0.1493270373992517, "eval_ag_news_emb_cos_sim": 0.8203473091125488, "eval_ag_news_emb_cos_sim_sem": 0.006253855407712242, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5523531436920166, "eval_ag_news_n_ngrams_match_1": 14.236, "eval_ag_news_n_ngrams_match_2": 3.112, "eval_ag_news_n_ngrams_match_3": 0.9, "eval_ag_news_num_pred_words": 46.576, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.89533468732818, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3520427300955037, "eval_ag_news_runtime": 10.3902, "eval_ag_news_samples_per_second": 48.122, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35754788054873177, "eval_ag_news_token_set_f1_sem": 0.00438505286786259, "eval_ag_news_token_set_precision": 0.34227245238066745, "eval_ag_news_token_set_recall": 0.3899300240692165, "eval_ag_news_true_num_tokens": 56.09375, "step": 128750 }, { "epoch": 24.72, "eval_anthropic_toxic_prompts_accuracy": 0.11378125, "eval_anthropic_toxic_prompts_bleu_score": 3.1841452691344037, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11833967806378883, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6728819608688354, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008648650535357875, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2363362312316895, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.284, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.982, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.716, "eval_anthropic_toxic_prompts_num_pred_words": 47.174, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.440343258124244, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2175659295386394, "eval_anthropic_toxic_prompts_runtime": 9.8006, "eval_anthropic_toxic_prompts_samples_per_second": 51.017, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3627906442378746, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006526858455145603, "eval_anthropic_toxic_prompts_token_set_precision": 0.4439766026920582, "eval_anthropic_toxic_prompts_token_set_recall": 0.3322530444952403, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 128750 }, { "epoch": 24.72, "eval_arxiv_accuracy": 0.3476875, "eval_arxiv_bleu_score": 4.388585308371022, "eval_arxiv_bleu_score_sem": 0.12655224546827296, "eval_arxiv_emb_cos_sim": 0.7760187387466431, "eval_arxiv_emb_cos_sim_sem": 0.006352286079686895, "eval_arxiv_emb_top1_equal": 0.3984375, "eval_arxiv_emb_top1_equal_sem": 0.04344287990767221, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.40493106842041, "eval_arxiv_n_ngrams_match_1": 15.22, "eval_arxiv_n_ngrams_match_2": 2.974, "eval_arxiv_n_ngrams_match_3": 0.66, "eval_arxiv_num_pred_words": 40.964, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.112219969488624, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3626776385572391, "eval_arxiv_runtime": 10.2292, "eval_arxiv_samples_per_second": 48.88, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.35554001998337925, "eval_arxiv_token_set_f1_sem": 0.004236998849753365, "eval_arxiv_token_set_precision": 0.30881461791339043, "eval_arxiv_token_set_recall": 0.43578627201146425, "eval_arxiv_true_num_tokens": 64.0, "step": 128750 }, { "epoch": 24.72, "eval_python_code_alpaca_accuracy": 0.16084375, "eval_python_code_alpaca_bleu_score": 4.623035325589885, "eval_python_code_alpaca_bleu_score_sem": 0.13889948220375697, "eval_python_code_alpaca_emb_cos_sim": 0.7599724531173706, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00829261108129505, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8961777687072754, "eval_python_code_alpaca_n_ngrams_match_1": 9.97, "eval_python_code_alpaca_n_ngrams_match_2": 2.876, "eval_python_code_alpaca_n_ngrams_match_3": 1.004, "eval_python_code_alpaca_num_pred_words": 43.95, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.10481217046481, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33946354671892975, "eval_python_code_alpaca_runtime": 10.4155, "eval_python_code_alpaca_samples_per_second": 48.005, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.47794335008136624, "eval_python_code_alpaca_token_set_f1_sem": 0.005361574417404339, "eval_python_code_alpaca_token_set_precision": 0.548465843970291, "eval_python_code_alpaca_token_set_recall": 0.4426105243920704, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 128750 }, { "epoch": 24.72, "eval_wikibio_accuracy": 0.3264375, "eval_wikibio_bleu_score": 6.027325001403926, "eval_wikibio_bleu_score_sem": 0.21576941232633043, "eval_wikibio_emb_cos_sim": 0.7406089305877686, "eval_wikibio_emb_cos_sim_sem": 0.009288847993348196, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7055041790008545, "eval_wikibio_n_ngrams_match_1": 10.224, "eval_wikibio_n_ngrams_match_2": 3.424, "eval_wikibio_n_ngrams_match_3": 1.244, "eval_wikibio_num_pred_words": 36.738, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.670547384537244, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36090803844712627, "eval_wikibio_runtime": 10.1752, "eval_wikibio_samples_per_second": 49.139, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.32459411007854344, "eval_wikibio_token_set_f1_sem": 0.00525416512902063, "eval_wikibio_token_set_precision": 0.3334788419930351, "eval_wikibio_token_set_recall": 0.33203200127954646, "eval_wikibio_true_num_tokens": 61.1328125, "step": 128750 }, { "epoch": 24.72, "eval_nq_accuracy": 0.52678125, "eval_nq_bleu_score": 12.045270072228162, "eval_nq_bleu_score_sem": 0.4974524232186081, "eval_nq_emb_cos_sim": 0.8337074518203735, "eval_nq_emb_cos_sim_sem": 0.007259930974714649, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1854989528656006, "eval_nq_n_ngrams_match_1": 23.206, "eval_nq_n_ngrams_match_2": 8.638, "eval_nq_n_ngrams_match_3": 4.086, "eval_nq_num_pred_words": 49.276, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.895085674809298, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.446872530303096, "eval_nq_runtime": 10.4633, "eval_nq_samples_per_second": 47.786, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46272120122428845, "eval_nq_token_set_f1_sem": 0.005098550388567427, "eval_nq_token_set_precision": 0.42145287599355963, "eval_nq_token_set_recall": 0.5210570214353234, "eval_nq_true_num_tokens": 64.0, "step": 128750 }, { "epoch": 24.72, "learning_rate": 0.001, "loss": 2.5584, "step": 128760 }, { "epoch": 24.73, "learning_rate": 0.001, "loss": 2.565, "step": 128772 }, { "epoch": 24.73, "learning_rate": 0.001, "loss": 2.5529, "step": 128784 }, { "epoch": 24.73, "learning_rate": 0.001, "loss": 2.5602, "step": 128796 }, { "epoch": 24.73, "learning_rate": 0.001, "loss": 2.5514, "step": 128808 }, { "epoch": 24.74, "learning_rate": 0.001, "loss": 2.5536, "step": 128820 }, { "epoch": 24.74, "learning_rate": 0.001, "loss": 2.5646, "step": 128832 }, { "epoch": 24.74, "learning_rate": 0.001, "loss": 2.5542, "step": 128844 }, { "epoch": 24.74, "learning_rate": 0.001, "loss": 2.5537, "step": 128856 }, { "epoch": 24.74, "learning_rate": 0.001, "loss": 2.5499, "step": 128868 }, { "epoch": 24.75, "learning_rate": 0.001, "loss": 2.5547, "step": 128880 }, { "epoch": 24.75, "learning_rate": 0.001, "loss": 2.5523, "step": 128892 }, { "epoch": 24.75, "learning_rate": 0.001, "loss": 2.5522, "step": 128904 }, { "epoch": 24.75, "learning_rate": 0.001, "loss": 2.5629, "step": 128916 }, { "epoch": 24.76, "learning_rate": 0.001, "loss": 2.561, "step": 128928 }, { "epoch": 24.76, "learning_rate": 0.001, "loss": 2.5636, "step": 128940 }, { "epoch": 24.76, "learning_rate": 0.001, "loss": 2.5586, "step": 128952 }, { "epoch": 24.76, "learning_rate": 0.001, "loss": 2.5664, "step": 128964 }, { "epoch": 24.76, "learning_rate": 0.001, "loss": 2.5452, "step": 128976 }, { "epoch": 24.77, "learning_rate": 0.001, "loss": 2.5504, "step": 128988 }, { "epoch": 24.77, "learning_rate": 0.001, "loss": 2.5518, "step": 129000 }, { "epoch": 24.77, "learning_rate": 0.001, "loss": 2.5479, "step": 129012 }, { "epoch": 24.77, "learning_rate": 0.001, "loss": 2.5624, "step": 129024 }, { "epoch": 24.78, "learning_rate": 0.001, "loss": 2.5631, "step": 129036 }, { "epoch": 24.78, "learning_rate": 0.001, "loss": 2.5556, "step": 129048 }, { "epoch": 24.78, "learning_rate": 0.001, "loss": 2.5669, "step": 129060 }, { "epoch": 24.78, "learning_rate": 0.001, "loss": 2.5637, "step": 129072 }, { "epoch": 24.79, "learning_rate": 0.001, "loss": 2.5682, "step": 129084 }, { "epoch": 24.79, "learning_rate": 0.001, "loss": 2.5614, "step": 129096 }, { "epoch": 24.79, "learning_rate": 0.001, "loss": 2.5716, "step": 129108 }, { "epoch": 24.79, "learning_rate": 0.001, "loss": 2.5651, "step": 129120 }, { "epoch": 24.79, "learning_rate": 0.001, "loss": 2.5562, "step": 129132 }, { "epoch": 24.8, "learning_rate": 0.001, "loss": 2.5691, "step": 129144 }, { "epoch": 24.8, "learning_rate": 0.001, "loss": 2.5581, "step": 129156 }, { "epoch": 24.8, "learning_rate": 0.001, "loss": 2.5634, "step": 129168 }, { "epoch": 24.8, "learning_rate": 0.001, "loss": 2.5591, "step": 129180 }, { "epoch": 24.81, "learning_rate": 0.001, "loss": 2.5588, "step": 129192 }, { "epoch": 24.81, "learning_rate": 0.001, "loss": 2.5587, "step": 129204 }, { "epoch": 24.81, "learning_rate": 0.001, "loss": 2.5596, "step": 129216 }, { "epoch": 24.81, "learning_rate": 0.001, "loss": 2.5607, "step": 129228 }, { "epoch": 24.82, "learning_rate": 0.001, "loss": 2.555, "step": 129240 }, { "epoch": 24.82, "learning_rate": 0.001, "loss": 2.5719, "step": 129252 }, { "epoch": 24.82, "learning_rate": 0.001, "loss": 2.5566, "step": 129264 }, { "epoch": 24.82, "learning_rate": 0.001, "loss": 2.563, "step": 129276 }, { "epoch": 24.82, "learning_rate": 0.001, "loss": 2.5532, "step": 129288 }, { "epoch": 24.83, "learning_rate": 0.001, "loss": 2.5645, "step": 129300 }, { "epoch": 24.83, "learning_rate": 0.001, "loss": 2.5639, "step": 129312 }, { "epoch": 24.83, "learning_rate": 0.001, "loss": 2.5668, "step": 129324 }, { "epoch": 24.83, "learning_rate": 0.001, "loss": 2.5592, "step": 129336 }, { "epoch": 24.84, "learning_rate": 0.001, "loss": 2.5652, "step": 129348 }, { "epoch": 24.84, "learning_rate": 0.001, "loss": 2.5657, "step": 129360 }, { "epoch": 24.84, "learning_rate": 0.001, "loss": 2.5548, "step": 129372 }, { "epoch": 24.84, "eval_ag_news_accuracy": 0.324625, "eval_ag_news_bleu_score": 4.611841310972994, "eval_ag_news_bleu_score_sem": 0.139095623438631, "eval_ag_news_emb_cos_sim": 0.8163087368011475, "eval_ag_news_emb_cos_sim_sem": 0.00677469299071971, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5461506843566895, "eval_ag_news_n_ngrams_match_1": 14.162, "eval_ag_news_n_ngrams_match_2": 2.99, "eval_ag_news_n_ngrams_match_3": 0.796, "eval_ag_news_num_pred_words": 46.91, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.67956762786532, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3496774758012094, "eval_ag_news_runtime": 10.3696, "eval_ag_news_samples_per_second": 48.218, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3550150146183312, "eval_ag_news_token_set_f1_sem": 0.004226825967887237, "eval_ag_news_token_set_precision": 0.3405986662742473, "eval_ag_news_token_set_recall": 0.38454226864413726, "eval_ag_news_true_num_tokens": 56.09375, "step": 129375 }, { "epoch": 24.84, "eval_anthropic_toxic_prompts_accuracy": 0.1145, "eval_anthropic_toxic_prompts_bleu_score": 3.1030712903873607, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11636478023507556, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6733030080795288, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008804373920982517, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.243640661239624, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.292, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.914, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712, "eval_anthropic_toxic_prompts_num_pred_words": 47.766, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.626850801224577, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21522968185035563, "eval_anthropic_toxic_prompts_runtime": 10.1647, "eval_anthropic_toxic_prompts_samples_per_second": 49.19, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3619918156189409, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006453628770575891, "eval_anthropic_toxic_prompts_token_set_precision": 0.44416642940755013, "eval_anthropic_toxic_prompts_token_set_recall": 0.33056648737153715, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 129375 }, { "epoch": 24.84, "eval_arxiv_accuracy": 0.34715625, "eval_arxiv_bleu_score": 4.37437253227214, "eval_arxiv_bleu_score_sem": 0.12190013051498984, "eval_arxiv_emb_cos_sim": 0.7696950435638428, "eval_arxiv_emb_cos_sim_sem": 0.006782770697654626, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.39719557762146, "eval_arxiv_n_ngrams_match_1": 15.26, "eval_arxiv_n_ngrams_match_2": 2.968, "eval_arxiv_n_ngrams_match_3": 0.668, "eval_arxiv_num_pred_words": 41.218, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.880185775199994, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3615217581165149, "eval_arxiv_runtime": 10.3273, "eval_arxiv_samples_per_second": 48.415, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3550864412908191, "eval_arxiv_token_set_f1_sem": 0.004074105658219401, "eval_arxiv_token_set_precision": 0.30747585101477637, "eval_arxiv_token_set_recall": 0.4341225104184517, "eval_arxiv_true_num_tokens": 64.0, "step": 129375 }, { "epoch": 24.84, "eval_python_code_alpaca_accuracy": 0.161, "eval_python_code_alpaca_bleu_score": 4.656605964320858, "eval_python_code_alpaca_bleu_score_sem": 0.14850754928690157, "eval_python_code_alpaca_emb_cos_sim": 0.7533272504806519, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007777214393012535, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8644797801971436, "eval_python_code_alpaca_n_ngrams_match_1": 9.974, "eval_python_code_alpaca_n_ngrams_match_2": 2.98, "eval_python_code_alpaca_n_ngrams_match_3": 0.994, "eval_python_code_alpaca_num_pred_words": 44.032, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.539926213814688, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3328447913721537, "eval_python_code_alpaca_runtime": 10.4122, "eval_python_code_alpaca_samples_per_second": 48.021, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.47994598316547554, "eval_python_code_alpaca_token_set_f1_sem": 0.005217274632447154, "eval_python_code_alpaca_token_set_precision": 0.5455145607356641, "eval_python_code_alpaca_token_set_recall": 0.4496675056044883, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 129375 }, { "epoch": 24.84, "eval_wikibio_accuracy": 0.32353125, "eval_wikibio_bleu_score": 6.030243942921439, "eval_wikibio_bleu_score_sem": 0.20807651743859762, "eval_wikibio_emb_cos_sim": 0.7456989288330078, "eval_wikibio_emb_cos_sim_sem": 0.008595102305780276, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.722168445587158, "eval_wikibio_n_ngrams_match_1": 10.16, "eval_wikibio_n_ngrams_match_2": 3.412, "eval_wikibio_n_ngrams_match_3": 1.234, "eval_wikibio_num_pred_words": 36.298, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.353970787899534, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35719515166510757, "eval_wikibio_runtime": 9.8344, "eval_wikibio_samples_per_second": 50.842, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.32339913754212296, "eval_wikibio_token_set_f1_sem": 0.00524644653849838, "eval_wikibio_token_set_precision": 0.3304765342446991, "eval_wikibio_token_set_recall": 0.3320025909337566, "eval_wikibio_true_num_tokens": 61.1328125, "step": 129375 }, { "epoch": 24.84, "eval_nq_accuracy": 0.52859375, "eval_nq_bleu_score": 11.836564485734353, "eval_nq_bleu_score_sem": 0.48477659779615984, "eval_nq_emb_cos_sim": 0.834022045135498, "eval_nq_emb_cos_sim_sem": 0.0076715871466546464, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1840269565582275, "eval_nq_n_ngrams_match_1": 23.274, "eval_nq_n_ngrams_match_2": 8.558, "eval_nq_n_ngrams_match_3": 3.924, "eval_nq_num_pred_words": 49.214, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.882001773631808, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4511581413013653, "eval_nq_runtime": 10.7005, "eval_nq_samples_per_second": 46.727, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.46504651157551585, "eval_nq_token_set_f1_sem": 0.0049702645665558485, "eval_nq_token_set_precision": 0.4232878176653709, "eval_nq_token_set_recall": 0.5234713107335722, "eval_nq_true_num_tokens": 64.0, "step": 129375 }, { "epoch": 24.84, "learning_rate": 0.001, "loss": 2.5641, "step": 129384 }, { "epoch": 24.85, "learning_rate": 0.001, "loss": 2.5639, "step": 129396 }, { "epoch": 24.85, "learning_rate": 0.001, "loss": 2.5586, "step": 129408 }, { "epoch": 24.85, "learning_rate": 0.001, "loss": 2.558, "step": 129420 }, { "epoch": 24.85, "learning_rate": 0.001, "loss": 2.5631, "step": 129432 }, { "epoch": 24.85, "learning_rate": 0.001, "loss": 2.5656, "step": 129444 }, { "epoch": 24.86, "learning_rate": 0.001, "loss": 2.5572, "step": 129456 }, { "epoch": 24.86, "learning_rate": 0.001, "loss": 2.5584, "step": 129468 }, { "epoch": 24.86, "learning_rate": 0.001, "loss": 2.5651, "step": 129480 }, { "epoch": 24.86, "learning_rate": 0.001, "loss": 2.5589, "step": 129492 }, { "epoch": 24.87, "learning_rate": 0.001, "loss": 2.5496, "step": 129504 }, { "epoch": 24.87, "learning_rate": 0.001, "loss": 2.5509, "step": 129516 }, { "epoch": 24.87, "learning_rate": 0.001, "loss": 2.5593, "step": 129528 }, { "epoch": 24.87, "learning_rate": 0.001, "loss": 2.5632, "step": 129540 }, { "epoch": 24.88, "learning_rate": 0.001, "loss": 2.5487, "step": 129552 }, { "epoch": 24.88, "learning_rate": 0.001, "loss": 2.5619, "step": 129564 }, { "epoch": 24.88, "learning_rate": 0.001, "loss": 2.5577, "step": 129576 }, { "epoch": 24.88, "learning_rate": 0.001, "loss": 2.5547, "step": 129588 }, { "epoch": 24.88, "learning_rate": 0.001, "loss": 2.5693, "step": 129600 }, { "epoch": 24.89, "learning_rate": 0.001, "loss": 2.5627, "step": 129612 }, { "epoch": 24.89, "learning_rate": 0.001, "loss": 2.5679, "step": 129624 }, { "epoch": 24.89, "learning_rate": 0.001, "loss": 2.554, "step": 129636 }, { "epoch": 24.89, "learning_rate": 0.001, "loss": 2.5694, "step": 129648 }, { "epoch": 24.9, "learning_rate": 0.001, "loss": 2.55, "step": 129660 }, { "epoch": 24.9, "learning_rate": 0.001, "loss": 2.5591, "step": 129672 }, { "epoch": 24.9, "learning_rate": 0.001, "loss": 2.5471, "step": 129684 }, { "epoch": 24.9, "learning_rate": 0.001, "loss": 2.569, "step": 129696 }, { "epoch": 24.91, "learning_rate": 0.001, "loss": 2.5593, "step": 129708 }, { "epoch": 24.91, "learning_rate": 0.001, "loss": 2.5584, "step": 129720 }, { "epoch": 24.91, "learning_rate": 0.001, "loss": 2.5597, "step": 129732 }, { "epoch": 24.91, "learning_rate": 0.001, "loss": 2.5599, "step": 129744 }, { "epoch": 24.91, "learning_rate": 0.001, "loss": 2.5616, "step": 129756 }, { "epoch": 24.92, "learning_rate": 0.001, "loss": 2.5573, "step": 129768 }, { "epoch": 24.92, "learning_rate": 0.001, "loss": 2.5482, "step": 129780 }, { "epoch": 24.92, "learning_rate": 0.001, "loss": 2.558, "step": 129792 }, { "epoch": 24.92, "learning_rate": 0.001, "loss": 2.5573, "step": 129804 }, { "epoch": 24.93, "learning_rate": 0.001, "loss": 2.5674, "step": 129816 }, { "epoch": 24.93, "learning_rate": 0.001, "loss": 2.5661, "step": 129828 }, { "epoch": 24.93, "learning_rate": 0.001, "loss": 2.5579, "step": 129840 }, { "epoch": 24.93, "learning_rate": 0.001, "loss": 2.5613, "step": 129852 }, { "epoch": 24.94, "learning_rate": 0.001, "loss": 2.5582, "step": 129864 }, { "epoch": 24.94, "learning_rate": 0.001, "loss": 2.5575, "step": 129876 }, { "epoch": 24.94, "learning_rate": 0.001, "loss": 2.563, "step": 129888 }, { "epoch": 24.94, "learning_rate": 0.001, "loss": 2.5615, "step": 129900 }, { "epoch": 24.94, "learning_rate": 0.001, "loss": 2.5595, "step": 129912 }, { "epoch": 24.95, "learning_rate": 0.001, "loss": 2.5586, "step": 129924 }, { "epoch": 24.95, "learning_rate": 0.001, "loss": 2.5616, "step": 129936 }, { "epoch": 24.95, "learning_rate": 0.001, "loss": 2.558, "step": 129948 }, { "epoch": 24.95, "learning_rate": 0.001, "loss": 2.5461, "step": 129960 }, { "epoch": 24.96, "learning_rate": 0.001, "loss": 2.5536, "step": 129972 }, { "epoch": 24.96, "learning_rate": 0.001, "loss": 2.5641, "step": 129984 }, { "epoch": 24.96, "learning_rate": 0.001, "loss": 2.5567, "step": 129996 }, { "epoch": 24.96, "eval_ag_news_accuracy": 0.32228125, "eval_ag_news_bleu_score": 4.8569670206542535, "eval_ag_news_bleu_score_sem": 0.15385616976491615, "eval_ag_news_emb_cos_sim": 0.8157208561897278, "eval_ag_news_emb_cos_sim_sem": 0.006540289341459325, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5547828674316406, "eval_ag_news_n_ngrams_match_1": 14.062, "eval_ag_news_n_ngrams_match_2": 3.154, "eval_ag_news_n_ngrams_match_3": 0.926, "eval_ag_news_num_pred_words": 46.91, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.980223797200544, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.346820653733372, "eval_ag_news_runtime": 10.3574, "eval_ag_news_samples_per_second": 48.275, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3499038006623323, "eval_ag_news_token_set_f1_sem": 0.004577170389225165, "eval_ag_news_token_set_precision": 0.33433701648938513, "eval_ag_news_token_set_recall": 0.3822383014388413, "eval_ag_news_true_num_tokens": 56.09375, "step": 130000 }, { "epoch": 24.96, "eval_anthropic_toxic_prompts_accuracy": 0.1155625, "eval_anthropic_toxic_prompts_bleu_score": 3.075671100653126, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11603741744058574, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6625092029571533, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009459481485178075, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.23411226272583, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.152, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.87, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736, "eval_anthropic_toxic_prompts_num_pred_words": 47.548, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.383827603751666, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.20985994770471703, "eval_anthropic_toxic_prompts_runtime": 9.7717, "eval_anthropic_toxic_prompts_samples_per_second": 51.168, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3590743707530839, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006544998491800654, "eval_anthropic_toxic_prompts_token_set_precision": 0.4327766721258468, "eval_anthropic_toxic_prompts_token_set_recall": 0.3336060762924124, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 130000 }, { "epoch": 24.96, "eval_arxiv_accuracy": 0.34746875, "eval_arxiv_bleu_score": 4.20879397179608, "eval_arxiv_bleu_score_sem": 0.11361532866047358, "eval_arxiv_emb_cos_sim": 0.7663363218307495, "eval_arxiv_emb_cos_sim_sem": 0.007776095525893472, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4092836380004883, "eval_arxiv_n_ngrams_match_1": 15.126, "eval_arxiv_n_ngrams_match_2": 2.9, "eval_arxiv_n_ngrams_match_3": 0.598, "eval_arxiv_num_pred_words": 41.1, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.243571152328705, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35988053960868455, "eval_arxiv_runtime": 11.026, "eval_arxiv_samples_per_second": 45.347, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.35151394019497023, "eval_arxiv_token_set_f1_sem": 0.00427413125255946, "eval_arxiv_token_set_precision": 0.30549235783273615, "eval_arxiv_token_set_recall": 0.43096732321731296, "eval_arxiv_true_num_tokens": 64.0, "step": 130000 }, { "epoch": 24.96, "eval_python_code_alpaca_accuracy": 0.16109375, "eval_python_code_alpaca_bleu_score": 4.513227663224373, "eval_python_code_alpaca_bleu_score_sem": 0.13573887174341479, "eval_python_code_alpaca_emb_cos_sim": 0.7477669715881348, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009026385738236192, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8647093772888184, "eval_python_code_alpaca_n_ngrams_match_1": 9.784, "eval_python_code_alpaca_n_ngrams_match_2": 2.868, "eval_python_code_alpaca_n_ngrams_match_3": 0.938, "eval_python_code_alpaca_num_pred_words": 43.394, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.54395379220402, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32830788366778396, "eval_python_code_alpaca_runtime": 9.877, "eval_python_code_alpaca_samples_per_second": 50.623, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4799839217138252, "eval_python_code_alpaca_token_set_f1_sem": 0.005614848629865109, "eval_python_code_alpaca_token_set_precision": 0.5326985761324451, "eval_python_code_alpaca_token_set_recall": 0.4626581472235106, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 130000 }, { "epoch": 24.96, "eval_wikibio_accuracy": 0.3193125, "eval_wikibio_bleu_score": 5.905620036352893, "eval_wikibio_bleu_score_sem": 0.20810696312487797, "eval_wikibio_emb_cos_sim": 0.7314857244491577, "eval_wikibio_emb_cos_sim_sem": 0.011306030407394, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7453036308288574, "eval_wikibio_n_ngrams_match_1": 9.974, "eval_wikibio_n_ngrams_match_2": 3.396, "eval_wikibio_n_ngrams_match_3": 1.258, "eval_wikibio_num_pred_words": 36.23, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.32185548823443, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34956803516986734, "eval_wikibio_runtime": 9.7783, "eval_wikibio_samples_per_second": 51.133, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.31749773947651205, "eval_wikibio_token_set_f1_sem": 0.00559183095783198, "eval_wikibio_token_set_precision": 0.32478569732171025, "eval_wikibio_token_set_recall": 0.32668020057657265, "eval_wikibio_true_num_tokens": 61.1328125, "step": 130000 }, { "epoch": 24.96, "eval_nq_accuracy": 0.52759375, "eval_nq_bleu_score": 11.705387817943874, "eval_nq_bleu_score_sem": 0.4739122759241288, "eval_nq_emb_cos_sim": 0.8311482667922974, "eval_nq_emb_cos_sim_sem": 0.007096436763895811, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1851646900177, "eval_nq_n_ngrams_match_1": 23.072, "eval_nq_n_ngrams_match_2": 8.552, "eval_nq_n_ngrams_match_3": 3.956, "eval_nq_num_pred_words": 49.61, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.892112875015261, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4450731379087566, "eval_nq_runtime": 10.4336, "eval_nq_samples_per_second": 47.922, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.461650529745682, "eval_nq_token_set_f1_sem": 0.004983275319338765, "eval_nq_token_set_precision": 0.4196844084014667, "eval_nq_token_set_recall": 0.5218515770661769, "eval_nq_true_num_tokens": 64.0, "step": 130000 }, { "epoch": 24.96, "learning_rate": 0.001, "loss": 2.5577, "step": 130008 }, { "epoch": 24.97, "learning_rate": 0.001, "loss": 2.5603, "step": 130020 }, { "epoch": 24.97, "learning_rate": 0.001, "loss": 2.5549, "step": 130032 }, { "epoch": 24.97, "learning_rate": 0.001, "loss": 2.5665, "step": 130044 }, { "epoch": 24.97, "learning_rate": 0.001, "loss": 2.5489, "step": 130056 }, { "epoch": 24.97, "learning_rate": 0.001, "loss": 2.5564, "step": 130068 }, { "epoch": 24.98, "learning_rate": 0.001, "loss": 2.5632, "step": 130080 }, { "epoch": 24.98, "learning_rate": 0.001, "loss": 2.5452, "step": 130092 }, { "epoch": 24.98, "learning_rate": 0.001, "loss": 2.567, "step": 130104 }, { "epoch": 24.98, "learning_rate": 0.001, "loss": 2.5579, "step": 130116 }, { "epoch": 24.99, "learning_rate": 0.001, "loss": 2.5676, "step": 130128 }, { "epoch": 24.99, "learning_rate": 0.001, "loss": 2.5604, "step": 130140 }, { "epoch": 24.99, "learning_rate": 0.001, "loss": 2.5604, "step": 130152 }, { "epoch": 24.99, "learning_rate": 0.001, "loss": 2.5575, "step": 130164 }, { "epoch": 25.0, "learning_rate": 0.001, "loss": 2.555, "step": 130176 }, { "epoch": 25.0, "learning_rate": 0.001, "loss": 2.5601, "step": 130188 }, { "epoch": 25.0, "learning_rate": 0.001, "loss": 2.5628, "step": 130200 }, { "epoch": 25.0, "learning_rate": 0.001, "loss": 2.5459, "step": 130212 }, { "epoch": 25.0, "learning_rate": 0.001, "loss": 2.5412, "step": 130224 }, { "epoch": 25.01, "learning_rate": 0.001, "loss": 2.5395, "step": 130236 }, { "epoch": 25.01, "learning_rate": 0.001, "loss": 2.536, "step": 130248 }, { "epoch": 25.01, "learning_rate": 0.001, "loss": 2.5427, "step": 130260 }, { "epoch": 25.01, "learning_rate": 0.001, "loss": 2.5468, "step": 130272 }, { "epoch": 25.02, "learning_rate": 0.001, "loss": 2.5456, "step": 130284 }, { "epoch": 25.02, "learning_rate": 0.001, "loss": 2.5434, "step": 130296 }, { "epoch": 25.02, "learning_rate": 0.001, "loss": 2.5341, "step": 130308 }, { "epoch": 25.02, "learning_rate": 0.001, "loss": 2.5488, "step": 130320 }, { "epoch": 25.03, "learning_rate": 0.001, "loss": 2.5425, "step": 130332 }, { "epoch": 25.03, "learning_rate": 0.001, "loss": 2.5423, "step": 130344 }, { "epoch": 25.03, "learning_rate": 0.001, "loss": 2.5374, "step": 130356 }, { "epoch": 25.03, "learning_rate": 0.001, "loss": 2.5421, "step": 130368 }, { "epoch": 25.03, "learning_rate": 0.001, "loss": 2.5467, "step": 130380 }, { "epoch": 25.04, "learning_rate": 0.001, "loss": 2.5406, "step": 130392 }, { "epoch": 25.04, "learning_rate": 0.001, "loss": 2.5408, "step": 130404 }, { "epoch": 25.04, "learning_rate": 0.001, "loss": 2.5479, "step": 130416 }, { "epoch": 25.04, "learning_rate": 0.001, "loss": 2.5436, "step": 130428 }, { "epoch": 25.05, "learning_rate": 0.001, "loss": 2.5413, "step": 130440 }, { "epoch": 25.05, "learning_rate": 0.001, "loss": 2.5492, "step": 130452 }, { "epoch": 25.05, "learning_rate": 0.001, "loss": 2.5499, "step": 130464 }, { "epoch": 25.05, "learning_rate": 0.001, "loss": 2.5383, "step": 130476 }, { "epoch": 25.06, "learning_rate": 0.001, "loss": 2.553, "step": 130488 }, { "epoch": 25.06, "learning_rate": 0.001, "loss": 2.5459, "step": 130500 }, { "epoch": 25.06, "learning_rate": 0.001, "loss": 2.5339, "step": 130512 }, { "epoch": 25.06, "learning_rate": 0.001, "loss": 2.5441, "step": 130524 }, { "epoch": 25.06, "learning_rate": 0.001, "loss": 2.5518, "step": 130536 }, { "epoch": 25.07, "learning_rate": 0.001, "loss": 2.5448, "step": 130548 }, { "epoch": 25.07, "learning_rate": 0.001, "loss": 2.5415, "step": 130560 }, { "epoch": 25.07, "learning_rate": 0.001, "loss": 2.5471, "step": 130572 }, { "epoch": 25.07, "learning_rate": 0.001, "loss": 2.5495, "step": 130584 }, { "epoch": 25.08, "learning_rate": 0.001, "loss": 2.5428, "step": 130596 }, { "epoch": 25.08, "learning_rate": 0.001, "loss": 2.5415, "step": 130608 }, { "epoch": 25.08, "learning_rate": 0.001, "loss": 2.5393, "step": 130620 }, { "epoch": 25.08, "eval_ag_news_accuracy": 0.3236875, "eval_ag_news_bleu_score": 4.827984117861617, "eval_ag_news_bleu_score_sem": 0.15220740571278935, "eval_ag_news_emb_cos_sim": 0.8090826272964478, "eval_ag_news_emb_cos_sim_sem": 0.007669002359460815, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5572659969329834, "eval_ag_news_n_ngrams_match_1": 14.072, "eval_ag_news_n_ngrams_match_2": 3.15, "eval_ag_news_n_ngrams_match_3": 0.926, "eval_ag_news_num_pred_words": 46.744, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 35.06719215503571, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34900649881407964, "eval_ag_news_runtime": 10.6427, "eval_ag_news_samples_per_second": 46.981, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.35075826680282174, "eval_ag_news_token_set_f1_sem": 0.004355908259580651, "eval_ag_news_token_set_precision": 0.3363891934917141, "eval_ag_news_token_set_recall": 0.38339458619399286, "eval_ag_news_true_num_tokens": 56.09375, "step": 130625 }, { "epoch": 25.08, "eval_anthropic_toxic_prompts_accuracy": 0.11515625, "eval_anthropic_toxic_prompts_bleu_score": 3.1666231315005953, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1254007163647972, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6742832660675049, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009745622366619106, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2476534843444824, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.272, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.916, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.71, "eval_anthropic_toxic_prompts_num_pred_words": 46.444, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.729893427868554, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21501414096227137, "eval_anthropic_toxic_prompts_runtime": 9.6679, "eval_anthropic_toxic_prompts_samples_per_second": 51.717, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.3578723173352975, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006666168942377207, "eval_anthropic_toxic_prompts_token_set_precision": 0.43911005805304826, "eval_anthropic_toxic_prompts_token_set_recall": 0.32638078974927826, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 130625 }, { "epoch": 25.08, "eval_arxiv_accuracy": 0.34775, "eval_arxiv_bleu_score": 4.346147228921791, "eval_arxiv_bleu_score_sem": 0.12009390396867743, "eval_arxiv_emb_cos_sim": 0.7748437523841858, "eval_arxiv_emb_cos_sim_sem": 0.00682109140257414, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4118895530700684, "eval_arxiv_n_ngrams_match_1": 15.442, "eval_arxiv_n_ngrams_match_2": 3.056, "eval_arxiv_n_ngrams_match_3": 0.65, "eval_arxiv_num_pred_words": 41.424, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.322486108331084, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.365002423676854, "eval_arxiv_runtime": 10.2155, "eval_arxiv_samples_per_second": 48.945, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.35929546926595846, "eval_arxiv_token_set_f1_sem": 0.004223968695648756, "eval_arxiv_token_set_precision": 0.31240521488793094, "eval_arxiv_token_set_recall": 0.4388999656542089, "eval_arxiv_true_num_tokens": 64.0, "step": 130625 }, { "epoch": 25.08, "eval_python_code_alpaca_accuracy": 0.16159375, "eval_python_code_alpaca_bleu_score": 4.700190184608433, "eval_python_code_alpaca_bleu_score_sem": 0.14783404668100003, "eval_python_code_alpaca_emb_cos_sim": 0.7699594497680664, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008064217967759024, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.882859468460083, "eval_python_code_alpaca_n_ngrams_match_1": 10.07, "eval_python_code_alpaca_n_ngrams_match_2": 2.972, "eval_python_code_alpaca_n_ngrams_match_3": 1.042, "eval_python_code_alpaca_num_pred_words": 44.262, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.865285431112, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33557557881595146, "eval_python_code_alpaca_runtime": 10.2308, "eval_python_code_alpaca_samples_per_second": 48.872, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.48789019025988795, "eval_python_code_alpaca_token_set_f1_sem": 0.005465768917660974, "eval_python_code_alpaca_token_set_precision": 0.5520485122298064, "eval_python_code_alpaca_token_set_recall": 0.4612636806224925, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 130625 }, { "epoch": 25.08, "eval_wikibio_accuracy": 0.3235, "eval_wikibio_bleu_score": 5.884867750372959, "eval_wikibio_bleu_score_sem": 0.204063171310663, "eval_wikibio_emb_cos_sim": 0.7450792789459229, "eval_wikibio_emb_cos_sim_sem": 0.009133146339692318, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7214627265930176, "eval_wikibio_n_ngrams_match_1": 9.944, "eval_wikibio_n_ngrams_match_2": 3.312, "eval_wikibio_n_ngrams_match_3": 1.164, "eval_wikibio_num_pred_words": 35.418, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.32479680076062, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3543727444295229, "eval_wikibio_runtime": 11.0114, "eval_wikibio_samples_per_second": 45.408, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.3176341096998218, "eval_wikibio_token_set_f1_sem": 0.0055624397332888, "eval_wikibio_token_set_precision": 0.3237973147406713, "eval_wikibio_token_set_recall": 0.32794535105596967, "eval_wikibio_true_num_tokens": 61.1328125, "step": 130625 }, { "epoch": 25.08, "eval_nq_accuracy": 0.528625, "eval_nq_bleu_score": 11.67742032842143, "eval_nq_bleu_score_sem": 0.4638451677287935, "eval_nq_emb_cos_sim": 0.8356366157531738, "eval_nq_emb_cos_sim_sem": 0.006841456891483506, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1816728115081787, "eval_nq_n_ngrams_match_1": 23.31, "eval_nq_n_ngrams_match_2": 8.57, "eval_nq_n_ngrams_match_3": 3.844, "eval_nq_num_pred_words": 49.382, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.86111684584193, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4483338264516282, "eval_nq_runtime": 10.3559, "eval_nq_samples_per_second": 48.282, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4641876513952255, "eval_nq_token_set_f1_sem": 0.00501728527338106, "eval_nq_token_set_precision": 0.42385992188355665, "eval_nq_token_set_recall": 0.5197076134214403, "eval_nq_true_num_tokens": 64.0, "step": 130625 }, { "epoch": 25.08, "learning_rate": 0.001, "loss": 2.5493, "step": 130632 }, { "epoch": 25.09, "learning_rate": 0.001, "loss": 2.5411, "step": 130644 }, { "epoch": 25.09, "learning_rate": 0.001, "loss": 2.5431, "step": 130656 }, { "epoch": 25.09, "learning_rate": 0.001, "loss": 2.548, "step": 130668 }, { "epoch": 25.09, "learning_rate": 0.001, "loss": 2.5458, "step": 130680 }, { "epoch": 25.09, "learning_rate": 0.001, "loss": 2.5407, "step": 130692 }, { "epoch": 25.1, "learning_rate": 0.001, "loss": 2.5408, "step": 130704 }, { "epoch": 25.1, "learning_rate": 0.001, "loss": 2.5428, "step": 130716 }, { "epoch": 25.1, "learning_rate": 0.001, "loss": 2.5384, "step": 130728 }, { "epoch": 25.1, "learning_rate": 0.001, "loss": 2.5457, "step": 130740 }, { "epoch": 25.11, "learning_rate": 0.001, "loss": 2.5332, "step": 130752 }, { "epoch": 25.11, "learning_rate": 0.001, "loss": 2.5523, "step": 130764 }, { "epoch": 25.11, "learning_rate": 0.001, "loss": 2.5439, "step": 130776 }, { "epoch": 25.11, "learning_rate": 0.001, "loss": 2.5418, "step": 130788 }, { "epoch": 25.12, "learning_rate": 0.001, "loss": 2.5474, "step": 130800 }, { "epoch": 25.12, "learning_rate": 0.001, "loss": 2.5523, "step": 130812 }, { "epoch": 25.12, "learning_rate": 0.001, "loss": 2.5497, "step": 130824 }, { "epoch": 25.12, "learning_rate": 0.001, "loss": 2.5476, "step": 130836 }, { "epoch": 25.12, "learning_rate": 0.001, "loss": 2.5469, "step": 130848 }, { "epoch": 25.13, "learning_rate": 0.001, "loss": 2.54, "step": 130860 }, { "epoch": 25.13, "learning_rate": 0.001, "loss": 2.5478, "step": 130872 }, { "epoch": 25.13, "learning_rate": 0.001, "loss": 2.5469, "step": 130884 }, { "epoch": 25.13, "learning_rate": 0.001, "loss": 2.5347, "step": 130896 }, { "epoch": 25.14, "learning_rate": 0.001, "loss": 2.565, "step": 130908 }, { "epoch": 25.14, "learning_rate": 0.001, "loss": 2.5384, "step": 130920 }, { "epoch": 25.14, "learning_rate": 0.001, "loss": 2.5416, "step": 130932 }, { "epoch": 25.14, "learning_rate": 0.001, "loss": 2.5386, "step": 130944 }, { "epoch": 25.15, "learning_rate": 0.001, "loss": 2.544, "step": 130956 }, { "epoch": 25.15, "learning_rate": 0.001, "loss": 2.545, "step": 130968 }, { "epoch": 25.15, "learning_rate": 0.001, "loss": 2.5456, "step": 130980 }, { "epoch": 25.15, "learning_rate": 0.001, "loss": 2.5493, "step": 130992 }, { "epoch": 25.15, "learning_rate": 0.001, "loss": 2.5358, "step": 131004 }, { "epoch": 25.16, "learning_rate": 0.001, "loss": 2.5487, "step": 131016 }, { "epoch": 25.16, "learning_rate": 0.001, "loss": 2.5443, "step": 131028 }, { "epoch": 25.16, "learning_rate": 0.001, "loss": 2.5406, "step": 131040 }, { "epoch": 25.16, "learning_rate": 0.001, "loss": 2.546, "step": 131052 }, { "epoch": 25.17, "learning_rate": 0.001, "loss": 2.5387, "step": 131064 }, { "epoch": 25.17, "learning_rate": 0.001, "loss": 2.551, "step": 131076 }, { "epoch": 25.17, "learning_rate": 0.001, "loss": 2.5438, "step": 131088 }, { "epoch": 25.17, "learning_rate": 0.001, "loss": 2.5598, "step": 131100 }, { "epoch": 25.18, "learning_rate": 0.001, "loss": 2.5459, "step": 131112 }, { "epoch": 25.18, "learning_rate": 0.001, "loss": 2.5462, "step": 131124 }, { "epoch": 25.18, "learning_rate": 0.001, "loss": 2.5589, "step": 131136 }, { "epoch": 25.18, "learning_rate": 0.001, "loss": 2.5446, "step": 131148 }, { "epoch": 25.18, "learning_rate": 0.001, "loss": 2.5502, "step": 131160 }, { "epoch": 25.19, "learning_rate": 0.001, "loss": 2.5457, "step": 131172 }, { "epoch": 25.19, "learning_rate": 0.001, "loss": 2.5419, "step": 131184 }, { "epoch": 25.19, "learning_rate": 0.001, "loss": 2.5449, "step": 131196 }, { "epoch": 25.19, "learning_rate": 0.001, "loss": 2.5444, "step": 131208 }, { "epoch": 25.2, "learning_rate": 0.001, "loss": 2.5546, "step": 131220 }, { "epoch": 25.2, "learning_rate": 0.001, "loss": 2.5532, "step": 131232 }, { "epoch": 25.2, "learning_rate": 0.001, "loss": 2.5495, "step": 131244 }, { "epoch": 25.2, "eval_ag_news_accuracy": 0.32171875, "eval_ag_news_bleu_score": 4.805631448252006, "eval_ag_news_bleu_score_sem": 0.15731309758821782, "eval_ag_news_emb_cos_sim": 0.8115901947021484, "eval_ag_news_emb_cos_sim_sem": 0.007044800947569394, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5540506839752197, "eval_ag_news_n_ngrams_match_1": 14.088, "eval_ag_news_n_ngrams_match_2": 3.062, "eval_ag_news_n_ngrams_match_3": 0.91, "eval_ag_news_num_pred_words": 46.798, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.95462123006617, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3487181900166695, "eval_ag_news_runtime": 10.2804, "eval_ag_news_samples_per_second": 48.636, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3539297425194083, "eval_ag_news_token_set_f1_sem": 0.004664780213336639, "eval_ag_news_token_set_precision": 0.3368518935516282, "eval_ag_news_token_set_recall": 0.39110883270092806, "eval_ag_news_true_num_tokens": 56.09375, "step": 131250 }, { "epoch": 25.2, "eval_anthropic_toxic_prompts_accuracy": 0.11459375, "eval_anthropic_toxic_prompts_bleu_score": 3.1142799442175075, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1180712984255266, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6735135316848755, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0087461631962884, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2482686042785645, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684, "eval_anthropic_toxic_prompts_num_pred_words": 47.684, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.74572526695856, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21633015064721026, "eval_anthropic_toxic_prompts_runtime": 10.6216, "eval_anthropic_toxic_prompts_samples_per_second": 47.074, "eval_anthropic_toxic_prompts_steps_per_second": 0.094, "eval_anthropic_toxic_prompts_token_set_f1": 0.3698789348153006, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006514893728509565, "eval_anthropic_toxic_prompts_token_set_precision": 0.44766389158759734, "eval_anthropic_toxic_prompts_token_set_recall": 0.34046532730912593, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 131250 }, { "epoch": 25.2, "eval_arxiv_accuracy": 0.34784375, "eval_arxiv_bleu_score": 4.252698195553729, "eval_arxiv_bleu_score_sem": 0.125878825147803, "eval_arxiv_emb_cos_sim": 0.7599482536315918, "eval_arxiv_emb_cos_sim_sem": 0.008686516186022375, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.402564764022827, "eval_arxiv_n_ngrams_match_1": 14.614, "eval_arxiv_n_ngrams_match_2": 2.878, "eval_arxiv_n_ngrams_match_3": 0.644, "eval_arxiv_num_pred_words": 39.636, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.04104952962554, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35258337465492606, "eval_arxiv_runtime": 10.1102, "eval_arxiv_samples_per_second": 49.455, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3471174740892369, "eval_arxiv_token_set_f1_sem": 0.004402086723114342, "eval_arxiv_token_set_precision": 0.29591355807088987, "eval_arxiv_token_set_recall": 0.4433432911664083, "eval_arxiv_true_num_tokens": 64.0, "step": 131250 }, { "epoch": 25.2, "eval_python_code_alpaca_accuracy": 0.16275, "eval_python_code_alpaca_bleu_score": 4.569554837567205, "eval_python_code_alpaca_bleu_score_sem": 0.13904510355918395, "eval_python_code_alpaca_emb_cos_sim": 0.7596986293792725, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008280630181061321, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8721566200256348, "eval_python_code_alpaca_n_ngrams_match_1": 9.79, "eval_python_code_alpaca_n_ngrams_match_2": 2.822, "eval_python_code_alpaca_n_ngrams_match_3": 0.936, "eval_python_code_alpaca_num_pred_words": 43.292, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.675095590953486, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3358129209246936, "eval_python_code_alpaca_runtime": 9.8984, "eval_python_code_alpaca_samples_per_second": 50.513, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4789527117372141, "eval_python_code_alpaca_token_set_f1_sem": 0.005252355757502807, "eval_python_code_alpaca_token_set_precision": 0.5335525771130952, "eval_python_code_alpaca_token_set_recall": 0.4589934698895055, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 131250 }, { "epoch": 25.2, "eval_wikibio_accuracy": 0.32084375, "eval_wikibio_bleu_score": 5.760687101937728, "eval_wikibio_bleu_score_sem": 0.19543921355987715, "eval_wikibio_emb_cos_sim": 0.7488494515419006, "eval_wikibio_emb_cos_sim_sem": 0.009071617867766984, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.730569362640381, "eval_wikibio_n_ngrams_match_1": 10.176, "eval_wikibio_n_ngrams_match_2": 3.314, "eval_wikibio_n_ngrams_match_3": 1.156, "eval_wikibio_num_pred_words": 36.604, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.70284544802238, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3533984380167211, "eval_wikibio_runtime": 11.2338, "eval_wikibio_samples_per_second": 44.509, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.3214486717882299, "eval_wikibio_token_set_f1_sem": 0.0053460076038565115, "eval_wikibio_token_set_precision": 0.3303092102677939, "eval_wikibio_token_set_recall": 0.32763994857562584, "eval_wikibio_true_num_tokens": 61.1328125, "step": 131250 }, { "epoch": 25.2, "eval_nq_accuracy": 0.52728125, "eval_nq_bleu_score": 11.561191462132927, "eval_nq_bleu_score_sem": 0.4662156399093546, "eval_nq_emb_cos_sim": 0.8296736478805542, "eval_nq_emb_cos_sim_sem": 0.008866525376012402, "eval_nq_emb_top1_equal": 0.3359375, "eval_nq_emb_top1_equal_sem": 0.04191137143408563, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1852967739105225, "eval_nq_n_ngrams_match_1": 23.062, "eval_nq_n_ngrams_match_2": 8.428, "eval_nq_n_ngrams_match_3": 3.824, "eval_nq_num_pred_words": 49.076, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.893287457469212, "eval_nq_pred_num_tokens": 62.9921875, "eval_nq_rouge_score": 0.44907523634455804, "eval_nq_runtime": 10.4274, "eval_nq_samples_per_second": 47.951, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46337964098634604, "eval_nq_token_set_f1_sem": 0.004823173527116388, "eval_nq_token_set_precision": 0.4205088953532891, "eval_nq_token_set_recall": 0.5258761476364512, "eval_nq_true_num_tokens": 64.0, "step": 131250 }, { "epoch": 25.2, "learning_rate": 0.001, "loss": 2.5443, "step": 131256 }, { "epoch": 25.21, "learning_rate": 0.001, "loss": 2.5586, "step": 131268 }, { "epoch": 25.21, "learning_rate": 0.001, "loss": 2.5533, "step": 131280 }, { "epoch": 25.21, "learning_rate": 0.001, "loss": 2.5457, "step": 131292 }, { "epoch": 25.21, "learning_rate": 0.001, "loss": 2.5498, "step": 131304 }, { "epoch": 25.21, "learning_rate": 0.001, "loss": 2.5542, "step": 131316 }, { "epoch": 25.22, "learning_rate": 0.001, "loss": 2.5348, "step": 131328 }, { "epoch": 25.22, "learning_rate": 0.001, "loss": 2.5387, "step": 131340 }, { "epoch": 25.22, "learning_rate": 0.001, "loss": 2.5455, "step": 131352 }, { "epoch": 25.22, "learning_rate": 0.001, "loss": 2.5472, "step": 131364 }, { "epoch": 25.23, "learning_rate": 0.001, "loss": 2.5423, "step": 131376 }, { "epoch": 25.23, "learning_rate": 0.001, "loss": 2.5479, "step": 131388 }, { "epoch": 25.23, "learning_rate": 0.001, "loss": 2.5509, "step": 131400 }, { "epoch": 25.23, "learning_rate": 0.001, "loss": 2.5557, "step": 131412 }, { "epoch": 25.24, "learning_rate": 0.001, "loss": 2.549, "step": 131424 }, { "epoch": 25.24, "learning_rate": 0.001, "loss": 2.5403, "step": 131436 }, { "epoch": 25.24, "learning_rate": 0.001, "loss": 2.5495, "step": 131448 }, { "epoch": 25.24, "learning_rate": 0.001, "loss": 2.5548, "step": 131460 }, { "epoch": 25.24, "learning_rate": 0.001, "loss": 2.5526, "step": 131472 }, { "epoch": 25.25, "learning_rate": 0.001, "loss": 2.5512, "step": 131484 }, { "epoch": 25.25, "learning_rate": 0.001, "loss": 2.549, "step": 131496 }, { "epoch": 25.25, "learning_rate": 0.001, "loss": 2.5532, "step": 131508 }, { "epoch": 25.25, "learning_rate": 0.001, "loss": 2.5473, "step": 131520 }, { "epoch": 25.26, "learning_rate": 0.001, "loss": 2.5534, "step": 131532 }, { "epoch": 25.26, "learning_rate": 0.001, "loss": 2.5431, "step": 131544 }, { "epoch": 25.26, "learning_rate": 0.001, "loss": 2.551, "step": 131556 }, { "epoch": 25.26, "learning_rate": 0.001, "loss": 2.5491, "step": 131568 }, { "epoch": 25.26, "learning_rate": 0.001, "loss": 2.5426, "step": 131580 }, { "epoch": 25.27, "learning_rate": 0.001, "loss": 2.551, "step": 131592 }, { "epoch": 25.27, "learning_rate": 0.001, "loss": 2.5327, "step": 131604 }, { "epoch": 25.27, "learning_rate": 0.001, "loss": 2.5533, "step": 131616 }, { "epoch": 25.27, "learning_rate": 0.001, "loss": 2.5523, "step": 131628 }, { "epoch": 25.28, "learning_rate": 0.001, "loss": 2.5453, "step": 131640 }, { "epoch": 25.28, "learning_rate": 0.001, "loss": 2.5584, "step": 131652 }, { "epoch": 25.28, "learning_rate": 0.001, "loss": 2.5561, "step": 131664 }, { "epoch": 25.28, "learning_rate": 0.001, "loss": 2.5506, "step": 131676 }, { "epoch": 25.29, "learning_rate": 0.001, "loss": 2.5584, "step": 131688 }, { "epoch": 25.29, "learning_rate": 0.001, "loss": 2.5578, "step": 131700 }, { "epoch": 25.29, "learning_rate": 0.001, "loss": 2.5476, "step": 131712 }, { "epoch": 25.29, "learning_rate": 0.001, "loss": 2.5587, "step": 131724 }, { "epoch": 25.29, "learning_rate": 0.001, "loss": 2.5574, "step": 131736 }, { "epoch": 25.3, "learning_rate": 0.001, "loss": 2.5491, "step": 131748 }, { "epoch": 25.3, "learning_rate": 0.001, "loss": 2.5475, "step": 131760 }, { "epoch": 25.3, "learning_rate": 0.001, "loss": 2.5471, "step": 131772 }, { "epoch": 25.3, "learning_rate": 0.001, "loss": 2.5556, "step": 131784 }, { "epoch": 25.31, "learning_rate": 0.001, "loss": 2.5436, "step": 131796 }, { "epoch": 25.31, "learning_rate": 0.001, "loss": 2.5512, "step": 131808 }, { "epoch": 25.31, "learning_rate": 0.001, "loss": 2.551, "step": 131820 }, { "epoch": 25.31, "learning_rate": 0.001, "loss": 2.5514, "step": 131832 }, { "epoch": 25.32, "learning_rate": 0.001, "loss": 2.552, "step": 131844 }, { "epoch": 25.32, "learning_rate": 0.001, "loss": 2.5532, "step": 131856 }, { "epoch": 25.32, "learning_rate": 0.001, "loss": 2.5392, "step": 131868 }, { "epoch": 25.32, "eval_ag_news_accuracy": 0.3233125, "eval_ag_news_bleu_score": 4.914054492516313, "eval_ag_news_bleu_score_sem": 0.15882457708417028, "eval_ag_news_emb_cos_sim": 0.8165811896324158, "eval_ag_news_emb_cos_sim_sem": 0.006833821067913446, "eval_ag_news_emb_top1_equal": 0.3046875, "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5426907539367676, "eval_ag_news_n_ngrams_match_1": 14.02, "eval_ag_news_n_ngrams_match_2": 3.194, "eval_ag_news_n_ngrams_match_3": 0.924, "eval_ag_news_num_pred_words": 46.3, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.55978607429364, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3476716971761488, "eval_ag_news_runtime": 10.1809, "eval_ag_news_samples_per_second": 49.112, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3514273899288549, "eval_ag_news_token_set_f1_sem": 0.004484274728591805, "eval_ag_news_token_set_precision": 0.33579671758364776, "eval_ag_news_token_set_recall": 0.38635494547837046, "eval_ag_news_true_num_tokens": 56.09375, "step": 131875 }, { "epoch": 25.32, "eval_anthropic_toxic_prompts_accuracy": 0.11375, "eval_anthropic_toxic_prompts_bleu_score": 3.085019595742232, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11654913967861044, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6766707897186279, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008607759531223705, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.253098726272583, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.142, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.85, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.676, "eval_anthropic_toxic_prompts_num_pred_words": 46.986, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.87038106983766, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21420840518099032, "eval_anthropic_toxic_prompts_runtime": 9.6824, "eval_anthropic_toxic_prompts_samples_per_second": 51.64, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.3577461416490362, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006384952942136405, "eval_anthropic_toxic_prompts_token_set_precision": 0.4404934531102299, "eval_anthropic_toxic_prompts_token_set_recall": 0.3272516559822401, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 131875 }, { "epoch": 25.32, "eval_arxiv_accuracy": 0.34528125, "eval_arxiv_bleu_score": 4.265711408339266, "eval_arxiv_bleu_score_sem": 0.12273177469112136, "eval_arxiv_emb_cos_sim": 0.7665755748748779, "eval_arxiv_emb_cos_sim_sem": 0.00850778523092234, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4275219440460205, "eval_arxiv_n_ngrams_match_1": 15.034, "eval_arxiv_n_ngrams_match_2": 2.91, "eval_arxiv_n_ngrams_match_3": 0.61, "eval_arxiv_num_pred_words": 40.14, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.800223426058317, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36384411351929746, "eval_arxiv_runtime": 10.0873, "eval_arxiv_samples_per_second": 49.567, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3560836132591048, "eval_arxiv_token_set_f1_sem": 0.004144378183015886, "eval_arxiv_token_set_precision": 0.30640336475178753, "eval_arxiv_token_set_recall": 0.4413355803588468, "eval_arxiv_true_num_tokens": 64.0, "step": 131875 }, { "epoch": 25.32, "eval_python_code_alpaca_accuracy": 0.16096875, "eval_python_code_alpaca_bleu_score": 4.849660229188227, "eval_python_code_alpaca_bleu_score_sem": 0.15057476290733582, "eval_python_code_alpaca_emb_cos_sim": 0.7644432783126831, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00885730706998966, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.857652187347412, "eval_python_code_alpaca_n_ngrams_match_1": 9.896, "eval_python_code_alpaca_n_ngrams_match_2": 2.948, "eval_python_code_alpaca_n_ngrams_match_3": 1.032, "eval_python_code_alpaca_num_pred_words": 42.368, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.420578630989453, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34201870244315064, "eval_python_code_alpaca_runtime": 9.6805, "eval_python_code_alpaca_samples_per_second": 51.65, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.4788301706153694, "eval_python_code_alpaca_token_set_f1_sem": 0.005446574107318026, "eval_python_code_alpaca_token_set_precision": 0.5419286879636789, "eval_python_code_alpaca_token_set_recall": 0.45094325898384224, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 131875 }, { "epoch": 25.32, "eval_wikibio_accuracy": 0.32046875, "eval_wikibio_bleu_score": 6.115802189788201, "eval_wikibio_bleu_score_sem": 0.2152826429351957, "eval_wikibio_emb_cos_sim": 0.7278193235397339, "eval_wikibio_emb_cos_sim_sem": 0.010483447072284342, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7603769302368164, "eval_wikibio_n_ngrams_match_1": 10.044, "eval_wikibio_n_ngrams_match_2": 3.428, "eval_wikibio_n_ngrams_match_3": 1.27, "eval_wikibio_num_pred_words": 35.51, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.96461759050041, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3536068608678047, "eval_wikibio_runtime": 9.9175, "eval_wikibio_samples_per_second": 50.416, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.31950334286278564, "eval_wikibio_token_set_f1_sem": 0.0054629541468927826, "eval_wikibio_token_set_precision": 0.3268275953142922, "eval_wikibio_token_set_recall": 0.32785432442488327, "eval_wikibio_true_num_tokens": 61.1328125, "step": 131875 }, { "epoch": 25.32, "eval_nq_accuracy": 0.52884375, "eval_nq_bleu_score": 11.720867354101722, "eval_nq_bleu_score_sem": 0.47696404351242483, "eval_nq_emb_cos_sim": 0.8370710611343384, "eval_nq_emb_cos_sim_sem": 0.00703817862399712, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.183683395385742, "eval_nq_n_ngrams_match_1": 22.91, "eval_nq_n_ngrams_match_2": 8.512, "eval_nq_n_ngrams_match_3": 3.944, "eval_nq_num_pred_words": 48.704, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.878950786818756, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4457343118349473, "eval_nq_runtime": 11.5829, "eval_nq_samples_per_second": 43.167, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4591167797022442, "eval_nq_token_set_f1_sem": 0.004982949629145637, "eval_nq_token_set_precision": 0.4157857532761781, "eval_nq_token_set_recall": 0.5211131689654769, "eval_nq_true_num_tokens": 64.0, "step": 131875 }, { "epoch": 25.32, "learning_rate": 0.001, "loss": 2.5571, "step": 131880 }, { "epoch": 25.32, "learning_rate": 0.001, "loss": 2.5511, "step": 131892 }, { "epoch": 25.33, "learning_rate": 0.001, "loss": 2.5515, "step": 131904 }, { "epoch": 25.33, "learning_rate": 0.001, "loss": 2.5486, "step": 131916 }, { "epoch": 25.33, "learning_rate": 0.001, "loss": 2.5511, "step": 131928 }, { "epoch": 25.33, "learning_rate": 0.001, "loss": 2.5485, "step": 131940 }, { "epoch": 25.34, "learning_rate": 0.001, "loss": 2.5545, "step": 131952 }, { "epoch": 25.34, "learning_rate": 0.001, "loss": 2.5389, "step": 131964 }, { "epoch": 25.34, "learning_rate": 0.001, "loss": 2.5436, "step": 131976 }, { "epoch": 25.34, "learning_rate": 0.001, "loss": 2.559, "step": 131988 }, { "epoch": 25.35, "learning_rate": 0.001, "loss": 2.5575, "step": 132000 }, { "epoch": 25.35, "learning_rate": 0.001, "loss": 2.5597, "step": 132012 }, { "epoch": 25.35, "learning_rate": 0.001, "loss": 2.5486, "step": 132024 }, { "epoch": 25.35, "learning_rate": 0.001, "loss": 2.5592, "step": 132036 }, { "epoch": 25.35, "learning_rate": 0.001, "loss": 2.5445, "step": 132048 }, { "epoch": 25.36, "learning_rate": 0.001, "loss": 2.5414, "step": 132060 }, { "epoch": 25.36, "learning_rate": 0.001, "loss": 2.5517, "step": 132072 }, { "epoch": 25.36, "learning_rate": 0.001, "loss": 2.5482, "step": 132084 }, { "epoch": 25.36, "learning_rate": 0.001, "loss": 2.5486, "step": 132096 }, { "epoch": 25.37, "learning_rate": 0.001, "loss": 2.5469, "step": 132108 }, { "epoch": 25.37, "learning_rate": 0.001, "loss": 2.5631, "step": 132120 }, { "epoch": 25.37, "learning_rate": 0.001, "loss": 2.5572, "step": 132132 }, { "epoch": 25.37, "learning_rate": 0.001, "loss": 2.546, "step": 132144 }, { "epoch": 25.38, "learning_rate": 0.001, "loss": 2.5429, "step": 132156 }, { "epoch": 25.38, "learning_rate": 0.001, "loss": 2.5437, "step": 132168 }, { "epoch": 25.38, "learning_rate": 0.001, "loss": 2.5443, "step": 132180 }, { "epoch": 25.38, "learning_rate": 0.001, "loss": 2.5482, "step": 132192 }, { "epoch": 25.38, "learning_rate": 0.001, "loss": 2.5437, "step": 132204 }, { "epoch": 25.39, "learning_rate": 0.001, "loss": 2.5454, "step": 132216 }, { "epoch": 25.39, "learning_rate": 0.001, "loss": 2.551, "step": 132228 }, { "epoch": 25.39, "learning_rate": 0.001, "loss": 2.553, "step": 132240 }, { "epoch": 25.39, "learning_rate": 0.001, "loss": 2.555, "step": 132252 }, { "epoch": 25.4, "learning_rate": 0.001, "loss": 2.5459, "step": 132264 }, { "epoch": 25.4, "learning_rate": 0.001, "loss": 2.553, "step": 132276 }, { "epoch": 25.4, "learning_rate": 0.001, "loss": 2.545, "step": 132288 }, { "epoch": 25.4, "learning_rate": 0.001, "loss": 2.5485, "step": 132300 }, { "epoch": 25.41, "learning_rate": 0.001, "loss": 2.5493, "step": 132312 }, { "epoch": 25.41, "learning_rate": 0.001, "loss": 2.5455, "step": 132324 }, { "epoch": 25.41, "learning_rate": 0.001, "loss": 2.5535, "step": 132336 }, { "epoch": 25.41, "learning_rate": 0.001, "loss": 2.5464, "step": 132348 }, { "epoch": 25.41, "learning_rate": 0.001, "loss": 2.5491, "step": 132360 }, { "epoch": 25.42, "learning_rate": 0.001, "loss": 2.5601, "step": 132372 }, { "epoch": 25.42, "learning_rate": 0.001, "loss": 2.5565, "step": 132384 }, { "epoch": 25.42, "learning_rate": 0.001, "loss": 2.5552, "step": 132396 }, { "epoch": 25.42, "learning_rate": 0.001, "loss": 2.5515, "step": 132408 }, { "epoch": 25.43, "learning_rate": 0.001, "loss": 2.552, "step": 132420 }, { "epoch": 25.43, "learning_rate": 0.001, "loss": 2.552, "step": 132432 }, { "epoch": 25.43, "learning_rate": 0.001, "loss": 2.553, "step": 132444 }, { "epoch": 25.43, "learning_rate": 0.001, "loss": 2.542, "step": 132456 }, { "epoch": 25.44, "learning_rate": 0.001, "loss": 2.5554, "step": 132468 }, { "epoch": 25.44, "learning_rate": 0.001, "loss": 2.5429, "step": 132480 }, { "epoch": 25.44, "learning_rate": 0.001, "loss": 2.5423, "step": 132492 }, { "epoch": 25.44, "eval_ag_news_accuracy": 0.322875, "eval_ag_news_bleu_score": 4.7698868245981245, "eval_ag_news_bleu_score_sem": 0.15752912959053497, "eval_ag_news_emb_cos_sim": 0.8068827390670776, "eval_ag_news_emb_cos_sim_sem": 0.007690376080011901, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5438425540924072, "eval_ag_news_n_ngrams_match_1": 13.864, "eval_ag_news_n_ngrams_match_2": 3.016, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.124, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.59961497433629, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3477881285632682, "eval_ag_news_runtime": 10.7334, "eval_ag_news_samples_per_second": 46.584, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3470764898765906, "eval_ag_news_token_set_f1_sem": 0.0044581620414478424, "eval_ag_news_token_set_precision": 0.3307261801418196, "eval_ag_news_token_set_recall": 0.3841999551202603, "eval_ag_news_true_num_tokens": 56.09375, "step": 132500 }, { "epoch": 25.44, "eval_anthropic_toxic_prompts_accuracy": 0.11340625, "eval_anthropic_toxic_prompts_bleu_score": 3.089625103414488, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11688325696314535, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.669338583946228, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00898543283091827, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.279155731201172, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.098, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.864, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.688, "eval_anthropic_toxic_prompts_num_pred_words": 47.358, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.553345072983696, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2102696532187595, "eval_anthropic_toxic_prompts_runtime": 10.1677, "eval_anthropic_toxic_prompts_samples_per_second": 49.176, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.36356602580647374, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006562408310480911, "eval_anthropic_toxic_prompts_token_set_precision": 0.431138789570558, "eval_anthropic_toxic_prompts_token_set_recall": 0.34125571557806916, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 132500 }, { "epoch": 25.44, "eval_arxiv_accuracy": 0.34915625, "eval_arxiv_bleu_score": 4.351104214910977, "eval_arxiv_bleu_score_sem": 0.1201916460933994, "eval_arxiv_emb_cos_sim": 0.7684433460235596, "eval_arxiv_emb_cos_sim_sem": 0.007544569057588464, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.400456428527832, "eval_arxiv_n_ngrams_match_1": 15.238, "eval_arxiv_n_ngrams_match_2": 2.966, "eval_arxiv_n_ngrams_match_3": 0.652, "eval_arxiv_num_pred_words": 40.916, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.977779639109926, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3641389821534941, "eval_arxiv_runtime": 10.3876, "eval_arxiv_samples_per_second": 48.134, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.3555969960165585, "eval_arxiv_token_set_f1_sem": 0.003999183443474901, "eval_arxiv_token_set_precision": 0.3098904766935479, "eval_arxiv_token_set_recall": 0.4295684457700368, "eval_arxiv_true_num_tokens": 64.0, "step": 132500 }, { "epoch": 25.44, "eval_python_code_alpaca_accuracy": 0.15925, "eval_python_code_alpaca_bleu_score": 4.565926151490827, "eval_python_code_alpaca_bleu_score_sem": 0.15168405190953407, "eval_python_code_alpaca_emb_cos_sim": 0.7494837045669556, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009453031700266892, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9003562927246094, "eval_python_code_alpaca_n_ngrams_match_1": 9.74, "eval_python_code_alpaca_n_ngrams_match_2": 2.772, "eval_python_code_alpaca_n_ngrams_match_3": 0.936, "eval_python_code_alpaca_num_pred_words": 43.198, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.18062183890515, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3312216624541677, "eval_python_code_alpaca_runtime": 11.5417, "eval_python_code_alpaca_samples_per_second": 43.321, "eval_python_code_alpaca_steps_per_second": 0.087, "eval_python_code_alpaca_token_set_f1": 0.4819077003065002, "eval_python_code_alpaca_token_set_f1_sem": 0.005277984873226256, "eval_python_code_alpaca_token_set_precision": 0.5340350551546666, "eval_python_code_alpaca_token_set_recall": 0.45880801977434044, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 132500 }, { "epoch": 25.44, "eval_wikibio_accuracy": 0.32434375, "eval_wikibio_bleu_score": 6.027446613976045, "eval_wikibio_bleu_score_sem": 0.22882628857670487, "eval_wikibio_emb_cos_sim": 0.7307478785514832, "eval_wikibio_emb_cos_sim_sem": 0.00914073277242428, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.695962429046631, "eval_wikibio_n_ngrams_match_1": 9.754, "eval_wikibio_n_ngrams_match_2": 3.346, "eval_wikibio_n_ngrams_match_3": 1.246, "eval_wikibio_num_pred_words": 35.07, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.28432474113627, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34753822265327605, "eval_wikibio_runtime": 10.1668, "eval_wikibio_samples_per_second": 49.18, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.3134439116195845, "eval_wikibio_token_set_f1_sem": 0.005674375377611694, "eval_wikibio_token_set_precision": 0.3178393349170109, "eval_wikibio_token_set_recall": 0.32681329408075077, "eval_wikibio_true_num_tokens": 61.1328125, "step": 132500 }, { "epoch": 25.44, "eval_nq_accuracy": 0.52809375, "eval_nq_bleu_score": 11.53092895746729, "eval_nq_bleu_score_sem": 0.4843067273193696, "eval_nq_emb_cos_sim": 0.8284263014793396, "eval_nq_emb_cos_sim_sem": 0.007505142656440047, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1828134059906006, "eval_nq_n_ngrams_match_1": 22.816, "eval_nq_n_ngrams_match_2": 8.256, "eval_nq_n_ngrams_match_3": 3.816, "eval_nq_num_pred_words": 48.766, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.871229552977024, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44225201895672694, "eval_nq_runtime": 10.5392, "eval_nq_samples_per_second": 47.442, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.456554878112556, "eval_nq_token_set_f1_sem": 0.004977605428584864, "eval_nq_token_set_precision": 0.41275398990833295, "eval_nq_token_set_recall": 0.520890101964246, "eval_nq_true_num_tokens": 64.0, "step": 132500 }, { "epoch": 25.44, "learning_rate": 0.001, "loss": 2.544, "step": 132504 }, { "epoch": 25.44, "learning_rate": 0.001, "loss": 2.5508, "step": 132516 }, { "epoch": 25.45, "learning_rate": 0.001, "loss": 2.5597, "step": 132528 }, { "epoch": 25.45, "learning_rate": 0.001, "loss": 2.5504, "step": 132540 }, { "epoch": 25.45, "learning_rate": 0.001, "loss": 2.5359, "step": 132552 }, { "epoch": 25.45, "learning_rate": 0.001, "loss": 2.5543, "step": 132564 }, { "epoch": 25.46, "learning_rate": 0.001, "loss": 2.541, "step": 132576 }, { "epoch": 25.46, "learning_rate": 0.001, "loss": 2.5508, "step": 132588 }, { "epoch": 25.46, "learning_rate": 0.001, "loss": 2.5478, "step": 132600 }, { "epoch": 25.46, "learning_rate": 0.001, "loss": 2.5501, "step": 132612 }, { "epoch": 25.47, "learning_rate": 0.001, "loss": 2.5553, "step": 132624 }, { "epoch": 25.47, "learning_rate": 0.001, "loss": 2.5632, "step": 132636 }, { "epoch": 25.47, "learning_rate": 0.001, "loss": 2.5462, "step": 132648 }, { "epoch": 25.47, "learning_rate": 0.001, "loss": 2.5589, "step": 132660 }, { "epoch": 25.47, "learning_rate": 0.001, "loss": 2.5519, "step": 132672 }, { "epoch": 25.48, "learning_rate": 0.001, "loss": 2.5515, "step": 132684 }, { "epoch": 25.48, "learning_rate": 0.001, "loss": 2.5512, "step": 132696 }, { "epoch": 25.48, "learning_rate": 0.001, "loss": 2.5441, "step": 132708 }, { "epoch": 25.48, "learning_rate": 0.001, "loss": 2.5476, "step": 132720 }, { "epoch": 25.49, "learning_rate": 0.001, "loss": 2.5537, "step": 132732 }, { "epoch": 25.49, "learning_rate": 0.001, "loss": 2.5441, "step": 132744 }, { "epoch": 25.49, "learning_rate": 0.001, "loss": 2.5513, "step": 132756 }, { "epoch": 25.49, "learning_rate": 0.001, "loss": 2.5519, "step": 132768 }, { "epoch": 25.5, "learning_rate": 0.001, "loss": 2.5369, "step": 132780 }, { "epoch": 25.5, "learning_rate": 0.001, "loss": 2.5437, "step": 132792 }, { "epoch": 25.5, "learning_rate": 0.001, "loss": 2.5556, "step": 132804 }, { "epoch": 25.5, "learning_rate": 0.001, "loss": 2.554, "step": 132816 }, { "epoch": 25.5, "learning_rate": 0.001, "loss": 2.5416, "step": 132828 }, { "epoch": 25.51, "learning_rate": 0.001, "loss": 2.5463, "step": 132840 }, { "epoch": 25.51, "learning_rate": 0.001, "loss": 2.5556, "step": 132852 }, { "epoch": 25.51, "learning_rate": 0.001, "loss": 2.5537, "step": 132864 }, { "epoch": 25.51, "learning_rate": 0.001, "loss": 2.5487, "step": 132876 }, { "epoch": 25.52, "learning_rate": 0.001, "loss": 2.5504, "step": 132888 }, { "epoch": 25.52, "learning_rate": 0.001, "loss": 2.5531, "step": 132900 }, { "epoch": 25.52, "learning_rate": 0.001, "loss": 2.541, "step": 132912 }, { "epoch": 25.52, "learning_rate": 0.001, "loss": 2.5554, "step": 132924 }, { "epoch": 25.53, "learning_rate": 0.001, "loss": 2.5534, "step": 132936 }, { "epoch": 25.53, "learning_rate": 0.001, "loss": 2.5548, "step": 132948 }, { "epoch": 25.53, "learning_rate": 0.001, "loss": 2.5521, "step": 132960 }, { "epoch": 25.53, "learning_rate": 0.001, "loss": 2.5491, "step": 132972 }, { "epoch": 25.53, "learning_rate": 0.001, "loss": 2.5503, "step": 132984 }, { "epoch": 25.54, "learning_rate": 0.001, "loss": 2.5521, "step": 132996 }, { "epoch": 25.54, "learning_rate": 0.001, "loss": 2.5532, "step": 133008 }, { "epoch": 25.54, "learning_rate": 0.001, "loss": 2.5467, "step": 133020 }, { "epoch": 25.54, "learning_rate": 0.001, "loss": 2.5509, "step": 133032 }, { "epoch": 25.55, "learning_rate": 0.001, "loss": 2.5609, "step": 133044 }, { "epoch": 25.55, "learning_rate": 0.001, "loss": 2.5425, "step": 133056 }, { "epoch": 25.55, "learning_rate": 0.001, "loss": 2.555, "step": 133068 }, { "epoch": 25.55, "learning_rate": 0.001, "loss": 2.555, "step": 133080 }, { "epoch": 25.56, "learning_rate": 0.001, "loss": 2.5527, "step": 133092 }, { "epoch": 25.56, "learning_rate": 0.001, "loss": 2.5519, "step": 133104 }, { "epoch": 25.56, "learning_rate": 0.001, "loss": 2.5608, "step": 133116 }, { "epoch": 25.56, "eval_ag_news_accuracy": 0.32365625, "eval_ag_news_bleu_score": 4.855899871609612, "eval_ag_news_bleu_score_sem": 0.15629417852463312, "eval_ag_news_emb_cos_sim": 0.80861496925354, "eval_ag_news_emb_cos_sim_sem": 0.007653365220118765, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5360374450683594, "eval_ag_news_n_ngrams_match_1": 14.054, "eval_ag_news_n_ngrams_match_2": 3.138, "eval_ag_news_n_ngrams_match_3": 0.848, "eval_ag_news_num_pred_words": 46.054, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.33061237022698, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35103040909370664, "eval_ag_news_runtime": 10.5655, "eval_ag_news_samples_per_second": 47.324, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3513876953873009, "eval_ag_news_token_set_f1_sem": 0.004428657807683609, "eval_ag_news_token_set_precision": 0.33518842970883794, "eval_ag_news_token_set_recall": 0.38715639119014816, "eval_ag_news_true_num_tokens": 56.09375, "step": 133125 }, { "epoch": 25.56, "eval_anthropic_toxic_prompts_accuracy": 0.11446875, "eval_anthropic_toxic_prompts_bleu_score": 3.1903966164722344, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12270087868482252, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6759926080703735, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008792292263439066, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.229008674621582, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.164, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.926, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, "eval_anthropic_toxic_prompts_num_pred_words": 46.776, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.254609022834476, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9765625, "eval_anthropic_toxic_prompts_rouge_score": 0.21487705897450765, "eval_anthropic_toxic_prompts_runtime": 10.453, "eval_anthropic_toxic_prompts_samples_per_second": 47.833, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.3626626272298002, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006788346091085388, "eval_anthropic_toxic_prompts_token_set_precision": 0.4399205435156726, "eval_anthropic_toxic_prompts_token_set_recall": 0.3366358630871735, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 133125 }, { "epoch": 25.56, "eval_arxiv_accuracy": 0.34753125, "eval_arxiv_bleu_score": 4.314217105866603, "eval_arxiv_bleu_score_sem": 0.12240891262977042, "eval_arxiv_emb_cos_sim": 0.7705197334289551, "eval_arxiv_emb_cos_sim_sem": 0.006838412440223102, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3973591327667236, "eval_arxiv_n_ngrams_match_1": 15.024, "eval_arxiv_n_ngrams_match_2": 2.908, "eval_arxiv_n_ngrams_match_3": 0.63, "eval_arxiv_num_pred_words": 40.222, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.885073232998504, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36388975262662504, "eval_arxiv_runtime": 10.3797, "eval_arxiv_samples_per_second": 48.171, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.35392897313565735, "eval_arxiv_token_set_f1_sem": 0.004024507589042814, "eval_arxiv_token_set_precision": 0.3062499404385056, "eval_arxiv_token_set_recall": 0.4355530940608332, "eval_arxiv_true_num_tokens": 64.0, "step": 133125 }, { "epoch": 25.56, "eval_python_code_alpaca_accuracy": 0.16053125, "eval_python_code_alpaca_bleu_score": 4.788908634479267, "eval_python_code_alpaca_bleu_score_sem": 0.1513937749102946, "eval_python_code_alpaca_emb_cos_sim": 0.7552530765533447, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008792524071105876, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8843941688537598, "eval_python_code_alpaca_n_ngrams_match_1": 9.996, "eval_python_code_alpaca_n_ngrams_match_2": 3.004, "eval_python_code_alpaca_n_ngrams_match_3": 1.004, "eval_python_code_alpaca_num_pred_words": 43.416, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.892724341564026, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34000495503536365, "eval_python_code_alpaca_runtime": 10.0607, "eval_python_code_alpaca_samples_per_second": 49.698, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.48455450275954537, "eval_python_code_alpaca_token_set_f1_sem": 0.005331723089022349, "eval_python_code_alpaca_token_set_precision": 0.5457429878672126, "eval_python_code_alpaca_token_set_recall": 0.4585016187534243, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 133125 }, { "epoch": 25.56, "eval_wikibio_accuracy": 0.32196875, "eval_wikibio_bleu_score": 6.062982074780387, "eval_wikibio_bleu_score_sem": 0.2025494902855392, "eval_wikibio_emb_cos_sim": 0.744976282119751, "eval_wikibio_emb_cos_sim_sem": 0.010711892869381007, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7191264629364014, "eval_wikibio_n_ngrams_match_1": 10.364, "eval_wikibio_n_ngrams_match_2": 3.566, "eval_wikibio_n_ngrams_match_3": 1.288, "eval_wikibio_num_pred_words": 36.894, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.22836387011547, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3639462322648924, "eval_wikibio_runtime": 10.6158, "eval_wikibio_samples_per_second": 47.1, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.32465973139475823, "eval_wikibio_token_set_f1_sem": 0.005354998232170549, "eval_wikibio_token_set_precision": 0.33592074502898145, "eval_wikibio_token_set_recall": 0.3281273416307827, "eval_wikibio_true_num_tokens": 61.1328125, "step": 133125 }, { "epoch": 25.56, "eval_nq_accuracy": 0.52859375, "eval_nq_bleu_score": 11.766816965162063, "eval_nq_bleu_score_sem": 0.4753187977408753, "eval_nq_emb_cos_sim": 0.8320537805557251, "eval_nq_emb_cos_sim_sem": 0.00716613118772013, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1797211170196533, "eval_nq_n_ngrams_match_1": 23.184, "eval_nq_n_ngrams_match_2": 8.492, "eval_nq_n_ngrams_match_3": 3.916, "eval_nq_num_pred_words": 49.2, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.843839518448318, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4488687975217286, "eval_nq_runtime": 10.4915, "eval_nq_samples_per_second": 47.658, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4619329690907166, "eval_nq_token_set_f1_sem": 0.005041814777617996, "eval_nq_token_set_precision": 0.42067824182411534, "eval_nq_token_set_recall": 0.52084940326046, "eval_nq_true_num_tokens": 64.0, "step": 133125 }, { "epoch": 25.56, "learning_rate": 0.001, "loss": 2.5465, "step": 133128 }, { "epoch": 25.56, "learning_rate": 0.001, "loss": 2.5595, "step": 133140 }, { "epoch": 25.57, "learning_rate": 0.001, "loss": 2.5514, "step": 133152 }, { "epoch": 25.57, "learning_rate": 0.001, "loss": 2.5485, "step": 133164 }, { "epoch": 25.57, "learning_rate": 0.001, "loss": 2.5525, "step": 133176 }, { "epoch": 25.57, "learning_rate": 0.001, "loss": 2.5533, "step": 133188 }, { "epoch": 25.58, "learning_rate": 0.001, "loss": 2.5619, "step": 133200 }, { "epoch": 25.58, "learning_rate": 0.001, "loss": 2.5421, "step": 133212 }, { "epoch": 25.58, "learning_rate": 0.001, "loss": 2.5536, "step": 133224 }, { "epoch": 25.58, "learning_rate": 0.001, "loss": 2.5509, "step": 133236 }, { "epoch": 25.59, "learning_rate": 0.001, "loss": 2.5476, "step": 133248 }, { "epoch": 25.59, "learning_rate": 0.001, "loss": 2.5514, "step": 133260 }, { "epoch": 25.59, "learning_rate": 0.001, "loss": 2.5602, "step": 133272 }, { "epoch": 25.59, "learning_rate": 0.001, "loss": 2.5443, "step": 133284 }, { "epoch": 25.59, "learning_rate": 0.001, "loss": 2.5605, "step": 133296 }, { "epoch": 25.6, "learning_rate": 0.001, "loss": 2.5518, "step": 133308 }, { "epoch": 25.6, "learning_rate": 0.001, "loss": 2.5418, "step": 133320 }, { "epoch": 25.6, "learning_rate": 0.001, "loss": 2.5683, "step": 133332 }, { "epoch": 25.6, "learning_rate": 0.001, "loss": 2.557, "step": 133344 }, { "epoch": 25.61, "learning_rate": 0.001, "loss": 2.5578, "step": 133356 }, { "epoch": 25.61, "learning_rate": 0.001, "loss": 2.553, "step": 133368 }, { "epoch": 25.61, "learning_rate": 0.001, "loss": 2.5526, "step": 133380 }, { "epoch": 25.61, "learning_rate": 0.001, "loss": 2.5452, "step": 133392 }, { "epoch": 25.62, "learning_rate": 0.001, "loss": 2.556, "step": 133404 }, { "epoch": 25.62, "learning_rate": 0.001, "loss": 2.5482, "step": 133416 }, { "epoch": 25.62, "learning_rate": 0.001, "loss": 2.5575, "step": 133428 }, { "epoch": 25.62, "learning_rate": 0.001, "loss": 2.557, "step": 133440 }, { "epoch": 25.62, "learning_rate": 0.001, "loss": 2.5483, "step": 133452 }, { "epoch": 25.63, "learning_rate": 0.001, "loss": 2.5489, "step": 133464 }, { "epoch": 25.63, "learning_rate": 0.001, "loss": 2.544, "step": 133476 }, { "epoch": 25.63, "learning_rate": 0.001, "loss": 2.5498, "step": 133488 }, { "epoch": 25.63, "learning_rate": 0.001, "loss": 2.5628, "step": 133500 }, { "epoch": 25.64, "learning_rate": 0.001, "loss": 2.5466, "step": 133512 }, { "epoch": 25.64, "learning_rate": 0.001, "loss": 2.5534, "step": 133524 }, { "epoch": 25.64, "learning_rate": 0.001, "loss": 2.5563, "step": 133536 }, { "epoch": 25.64, "learning_rate": 0.001, "loss": 2.5556, "step": 133548 }, { "epoch": 25.65, "learning_rate": 0.001, "loss": 2.552, "step": 133560 }, { "epoch": 25.65, "learning_rate": 0.001, "loss": 2.5457, "step": 133572 }, { "epoch": 25.65, "learning_rate": 0.001, "loss": 2.5553, "step": 133584 }, { "epoch": 25.65, "learning_rate": 0.001, "loss": 2.545, "step": 133596 }, { "epoch": 25.65, "learning_rate": 0.001, "loss": 2.5456, "step": 133608 }, { "epoch": 25.66, "learning_rate": 0.001, "loss": 2.55, "step": 133620 }, { "epoch": 25.66, "learning_rate": 0.001, "loss": 2.5535, "step": 133632 }, { "epoch": 25.66, "learning_rate": 0.001, "loss": 2.5545, "step": 133644 }, { "epoch": 25.66, "learning_rate": 0.001, "loss": 2.5533, "step": 133656 }, { "epoch": 25.67, "learning_rate": 0.001, "loss": 2.5512, "step": 133668 }, { "epoch": 25.67, "learning_rate": 0.001, "loss": 2.5581, "step": 133680 }, { "epoch": 25.67, "learning_rate": 0.001, "loss": 2.5548, "step": 133692 }, { "epoch": 25.67, "learning_rate": 0.001, "loss": 2.5543, "step": 133704 }, { "epoch": 25.68, "learning_rate": 0.001, "loss": 2.5394, "step": 133716 }, { "epoch": 25.68, "learning_rate": 0.001, "loss": 2.5536, "step": 133728 }, { "epoch": 25.68, "learning_rate": 0.001, "loss": 2.5561, "step": 133740 }, { "epoch": 25.68, "eval_ag_news_accuracy": 0.32309375, "eval_ag_news_bleu_score": 4.888573567916063, "eval_ag_news_bleu_score_sem": 0.15650234872976646, "eval_ag_news_emb_cos_sim": 0.8169499635696411, "eval_ag_news_emb_cos_sim_sem": 0.006926774625215207, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.543426752090454, "eval_ag_news_n_ngrams_match_1": 14.258, "eval_ag_news_n_ngrams_match_2": 3.242, "eval_ag_news_n_ngrams_match_3": 0.92, "eval_ag_news_num_pred_words": 46.846, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.585231375734935, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3526426557351461, "eval_ag_news_runtime": 10.9592, "eval_ag_news_samples_per_second": 45.624, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.3539517565845531, "eval_ag_news_token_set_f1_sem": 0.0043802896680215555, "eval_ag_news_token_set_precision": 0.33947344760386655, "eval_ag_news_token_set_recall": 0.3875487690266719, "eval_ag_news_true_num_tokens": 56.09375, "step": 133750 }, { "epoch": 25.68, "eval_anthropic_toxic_prompts_accuracy": 0.11534375, "eval_anthropic_toxic_prompts_bleu_score": 3.24550285156498, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1275891704996912, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6736080646514893, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008341823453832092, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2241549491882324, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.334, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736, "eval_anthropic_toxic_prompts_num_pred_words": 46.882, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.13232708625022, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21755447094292285, "eval_anthropic_toxic_prompts_runtime": 9.9447, "eval_anthropic_toxic_prompts_samples_per_second": 50.278, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.36727662832547525, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006746261540125967, "eval_anthropic_toxic_prompts_token_set_precision": 0.44669465724382945, "eval_anthropic_toxic_prompts_token_set_recall": 0.3390676853152514, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 133750 }, { "epoch": 25.68, "eval_arxiv_accuracy": 0.34921875, "eval_arxiv_bleu_score": 4.2929043807675695, "eval_arxiv_bleu_score_sem": 0.12803835219053986, "eval_arxiv_emb_cos_sim": 0.7631653547286987, "eval_arxiv_emb_cos_sim_sem": 0.008592578104681179, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3955345153808594, "eval_arxiv_n_ngrams_match_1": 15.068, "eval_arxiv_n_ngrams_match_2": 2.97, "eval_arxiv_n_ngrams_match_3": 0.65, "eval_arxiv_num_pred_words": 40.42, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.83059412567739, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3608827035761182, "eval_arxiv_runtime": 10.2855, "eval_arxiv_samples_per_second": 48.612, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3567580426630008, "eval_arxiv_token_set_f1_sem": 0.004291661397986758, "eval_arxiv_token_set_precision": 0.3065091526444031, "eval_arxiv_token_set_recall": 0.4455519866199295, "eval_arxiv_true_num_tokens": 64.0, "step": 133750 }, { "epoch": 25.68, "eval_python_code_alpaca_accuracy": 0.16246875, "eval_python_code_alpaca_bleu_score": 4.5237646833939325, "eval_python_code_alpaca_bleu_score_sem": 0.13009332769464257, "eval_python_code_alpaca_emb_cos_sim": 0.7561140656471252, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009422573358228183, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8891215324401855, "eval_python_code_alpaca_n_ngrams_match_1": 9.89, "eval_python_code_alpaca_n_ngrams_match_2": 2.898, "eval_python_code_alpaca_n_ngrams_match_3": 0.926, "eval_python_code_alpaca_num_pred_words": 43.184, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.97751000350488, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3319159991278319, "eval_python_code_alpaca_runtime": 9.9612, "eval_python_code_alpaca_samples_per_second": 50.195, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4817946429770144, "eval_python_code_alpaca_token_set_f1_sem": 0.005398740906617264, "eval_python_code_alpaca_token_set_precision": 0.54204073260342, "eval_python_code_alpaca_token_set_recall": 0.4575423473347422, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 133750 }, { "epoch": 25.68, "eval_wikibio_accuracy": 0.32628125, "eval_wikibio_bleu_score": 6.0395154360742715, "eval_wikibio_bleu_score_sem": 0.2209590227537619, "eval_wikibio_emb_cos_sim": 0.7338325381278992, "eval_wikibio_emb_cos_sim_sem": 0.010597537274114315, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7107479572296143, "eval_wikibio_n_ngrams_match_1": 9.85, "eval_wikibio_n_ngrams_match_2": 3.338, "eval_wikibio_n_ngrams_match_3": 1.248, "eval_wikibio_num_pred_words": 35.674, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.88437485741616, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34899591740478264, "eval_wikibio_runtime": 13.1109, "eval_wikibio_samples_per_second": 38.136, "eval_wikibio_steps_per_second": 0.076, "eval_wikibio_token_set_f1": 0.3153780916314267, "eval_wikibio_token_set_f1_sem": 0.005680015025179767, "eval_wikibio_token_set_precision": 0.32243100120275797, "eval_wikibio_token_set_recall": 0.32583361109637027, "eval_wikibio_true_num_tokens": 61.1328125, "step": 133750 }, { "epoch": 25.68, "eval_nq_accuracy": 0.52840625, "eval_nq_bleu_score": 11.779300615308497, "eval_nq_bleu_score_sem": 0.4967497881099778, "eval_nq_emb_cos_sim": 0.8297677040100098, "eval_nq_emb_cos_sim_sem": 0.007091601730118202, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1775362491607666, "eval_nq_n_ngrams_match_1": 23.152, "eval_nq_n_ngrams_match_2": 8.516, "eval_nq_n_ngrams_match_3": 3.914, "eval_nq_num_pred_words": 49.208, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.824537991056944, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44897050359226154, "eval_nq_runtime": 10.4495, "eval_nq_samples_per_second": 47.849, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46081202162357, "eval_nq_token_set_f1_sem": 0.005103763159529631, "eval_nq_token_set_precision": 0.41988725971033675, "eval_nq_token_set_recall": 0.5174729913996847, "eval_nq_true_num_tokens": 64.0, "step": 133750 }, { "epoch": 25.68, "learning_rate": 0.001, "loss": 2.5561, "step": 133752 }, { "epoch": 25.68, "learning_rate": 0.001, "loss": 2.5492, "step": 133764 }, { "epoch": 25.69, "learning_rate": 0.001, "loss": 2.5532, "step": 133776 }, { "epoch": 25.69, "learning_rate": 0.001, "loss": 2.5429, "step": 133788 }, { "epoch": 25.69, "learning_rate": 0.001, "loss": 2.5546, "step": 133800 }, { "epoch": 25.69, "learning_rate": 0.001, "loss": 2.5462, "step": 133812 }, { "epoch": 25.7, "learning_rate": 0.001, "loss": 2.5595, "step": 133824 }, { "epoch": 25.7, "learning_rate": 0.001, "loss": 2.5498, "step": 133836 }, { "epoch": 25.7, "learning_rate": 0.001, "loss": 2.5456, "step": 133848 }, { "epoch": 25.7, "learning_rate": 0.001, "loss": 2.5598, "step": 133860 }, { "epoch": 25.71, "learning_rate": 0.001, "loss": 2.5627, "step": 133872 }, { "epoch": 25.71, "learning_rate": 0.001, "loss": 2.5553, "step": 133884 }, { "epoch": 25.71, "learning_rate": 0.001, "loss": 2.5495, "step": 133896 }, { "epoch": 25.71, "learning_rate": 0.001, "loss": 2.5589, "step": 133908 }, { "epoch": 25.71, "learning_rate": 0.001, "loss": 2.5566, "step": 133920 }, { "epoch": 25.72, "learning_rate": 0.001, "loss": 2.5512, "step": 133932 }, { "epoch": 25.72, "learning_rate": 0.001, "loss": 2.5611, "step": 133944 }, { "epoch": 25.72, "learning_rate": 0.001, "loss": 2.5531, "step": 133956 }, { "epoch": 25.72, "learning_rate": 0.001, "loss": 2.5518, "step": 133968 }, { "epoch": 25.73, "learning_rate": 0.001, "loss": 2.5496, "step": 133980 }, { "epoch": 25.73, "learning_rate": 0.001, "loss": 2.5587, "step": 133992 }, { "epoch": 25.73, "learning_rate": 0.001, "loss": 2.5533, "step": 134004 }, { "epoch": 25.73, "learning_rate": 0.001, "loss": 2.5594, "step": 134016 }, { "epoch": 25.74, "learning_rate": 0.001, "loss": 2.5548, "step": 134028 }, { "epoch": 25.74, "learning_rate": 0.001, "loss": 2.5538, "step": 134040 }, { "epoch": 25.74, "learning_rate": 0.001, "loss": 2.543, "step": 134052 }, { "epoch": 25.74, "learning_rate": 0.001, "loss": 2.5592, "step": 134064 }, { "epoch": 25.74, "learning_rate": 0.001, "loss": 2.5569, "step": 134076 }, { "epoch": 25.75, "learning_rate": 0.001, "loss": 2.5549, "step": 134088 }, { "epoch": 25.75, "learning_rate": 0.001, "loss": 2.5505, "step": 134100 }, { "epoch": 25.75, "learning_rate": 0.001, "loss": 2.5561, "step": 134112 }, { "epoch": 25.75, "learning_rate": 0.001, "loss": 2.5574, "step": 134124 }, { "epoch": 25.76, "learning_rate": 0.001, "loss": 2.5496, "step": 134136 }, { "epoch": 25.76, "learning_rate": 0.001, "loss": 2.5486, "step": 134148 }, { "epoch": 25.76, "learning_rate": 0.001, "loss": 2.5599, "step": 134160 }, { "epoch": 25.76, "learning_rate": 0.001, "loss": 2.5481, "step": 134172 }, { "epoch": 25.76, "learning_rate": 0.001, "loss": 2.5656, "step": 134184 }, { "epoch": 25.77, "learning_rate": 0.001, "loss": 2.5588, "step": 134196 }, { "epoch": 25.77, "learning_rate": 0.001, "loss": 2.5411, "step": 134208 }, { "epoch": 25.77, "learning_rate": 0.001, "loss": 2.5507, "step": 134220 }, { "epoch": 25.77, "learning_rate": 0.001, "loss": 2.5548, "step": 134232 }, { "epoch": 25.78, "learning_rate": 0.001, "loss": 2.5514, "step": 134244 }, { "epoch": 25.78, "learning_rate": 0.001, "loss": 2.5507, "step": 134256 }, { "epoch": 25.78, "learning_rate": 0.001, "loss": 2.557, "step": 134268 }, { "epoch": 25.78, "learning_rate": 0.001, "loss": 2.5576, "step": 134280 }, { "epoch": 25.79, "learning_rate": 0.001, "loss": 2.5559, "step": 134292 }, { "epoch": 25.79, "learning_rate": 0.001, "loss": 2.5536, "step": 134304 }, { "epoch": 25.79, "learning_rate": 0.001, "loss": 2.5482, "step": 134316 }, { "epoch": 25.79, "learning_rate": 0.001, "loss": 2.5566, "step": 134328 }, { "epoch": 25.79, "learning_rate": 0.001, "loss": 2.5672, "step": 134340 }, { "epoch": 25.8, "learning_rate": 0.001, "loss": 2.5651, "step": 134352 }, { "epoch": 25.8, "learning_rate": 0.001, "loss": 2.556, "step": 134364 }, { "epoch": 25.8, "eval_ag_news_accuracy": 0.3225625, "eval_ag_news_bleu_score": 4.936972664786717, "eval_ag_news_bleu_score_sem": 0.16388769154047406, "eval_ag_news_emb_cos_sim": 0.8119990825653076, "eval_ag_news_emb_cos_sim_sem": 0.0071752744196689625, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5403366088867188, "eval_ag_news_n_ngrams_match_1": 14.07, "eval_ag_news_n_ngrams_match_2": 3.112, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.376, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.47852301502045, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3486201575707193, "eval_ag_news_runtime": 10.4267, "eval_ag_news_samples_per_second": 47.954, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35000945644344394, "eval_ag_news_token_set_f1_sem": 0.004259476031331064, "eval_ag_news_token_set_precision": 0.33525634181367797, "eval_ag_news_token_set_recall": 0.3812157984813743, "eval_ag_news_true_num_tokens": 56.09375, "step": 134375 }, { "epoch": 25.8, "eval_anthropic_toxic_prompts_accuracy": 0.1145, "eval_anthropic_toxic_prompts_bleu_score": 2.9194052884215744, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10985525394116184, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6746144890785217, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009121698860510055, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2397563457489014, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.25, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.786, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.624, "eval_anthropic_toxic_prompts_num_pred_words": 47.65, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.52750110537512, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21325897167956348, "eval_anthropic_toxic_prompts_runtime": 10.0563, "eval_anthropic_toxic_prompts_samples_per_second": 49.72, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.3547631983238783, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066454033441821735, "eval_anthropic_toxic_prompts_token_set_precision": 0.43760832773443115, "eval_anthropic_toxic_prompts_token_set_recall": 0.3234539542376684, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 134375 }, { "epoch": 25.8, "eval_arxiv_accuracy": 0.34865625, "eval_arxiv_bleu_score": 4.359990279945595, "eval_arxiv_bleu_score_sem": 0.12775837319983965, "eval_arxiv_emb_cos_sim": 0.7650954127311707, "eval_arxiv_emb_cos_sim_sem": 0.008417891929949175, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.400831460952759, "eval_arxiv_n_ngrams_match_1": 15.302, "eval_arxiv_n_ngrams_match_2": 2.956, "eval_arxiv_n_ngrams_match_3": 0.684, "eval_arxiv_num_pred_words": 40.93, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.989024386942628, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.362906544609063, "eval_arxiv_runtime": 10.2873, "eval_arxiv_samples_per_second": 48.603, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.35605140419661274, "eval_arxiv_token_set_f1_sem": 0.004243659280433926, "eval_arxiv_token_set_precision": 0.30981840333793964, "eval_arxiv_token_set_recall": 0.43443289292970755, "eval_arxiv_true_num_tokens": 64.0, "step": 134375 }, { "epoch": 25.8, "eval_python_code_alpaca_accuracy": 0.16234375, "eval_python_code_alpaca_bleu_score": 4.628174647388354, "eval_python_code_alpaca_bleu_score_sem": 0.14117937250506507, "eval_python_code_alpaca_emb_cos_sim": 0.7622000575065613, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007454475559084872, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.89320969581604, "eval_python_code_alpaca_n_ngrams_match_1": 10.036, "eval_python_code_alpaca_n_ngrams_match_2": 3.036, "eval_python_code_alpaca_n_ngrams_match_3": 1.026, "eval_python_code_alpaca_num_pred_words": 44.668, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.0511554362005, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.330449537463229, "eval_python_code_alpaca_runtime": 14.1576, "eval_python_code_alpaca_samples_per_second": 35.317, "eval_python_code_alpaca_steps_per_second": 0.071, "eval_python_code_alpaca_token_set_f1": 0.488121423386487, "eval_python_code_alpaca_token_set_f1_sem": 0.005540800217105159, "eval_python_code_alpaca_token_set_precision": 0.5517540567682055, "eval_python_code_alpaca_token_set_recall": 0.4590513954978373, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 134375 }, { "epoch": 25.8, "eval_wikibio_accuracy": 0.32578125, "eval_wikibio_bleu_score": 5.709751126467004, "eval_wikibio_bleu_score_sem": 0.20657633570043119, "eval_wikibio_emb_cos_sim": 0.7230824828147888, "eval_wikibio_emb_cos_sim_sem": 0.010519697971815265, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6777310371398926, "eval_wikibio_n_ngrams_match_1": 9.974, "eval_wikibio_n_ngrams_match_2": 3.304, "eval_wikibio_n_ngrams_match_3": 1.17, "eval_wikibio_num_pred_words": 36.46, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.556539853376734, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3462751251601449, "eval_wikibio_runtime": 10.258, "eval_wikibio_samples_per_second": 48.742, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3137252096935702, "eval_wikibio_token_set_f1_sem": 0.0056553984606375176, "eval_wikibio_token_set_precision": 0.3214622151294059, "eval_wikibio_token_set_recall": 0.3245149902879184, "eval_wikibio_true_num_tokens": 61.1328125, "step": 134375 }, { "epoch": 25.8, "eval_nq_accuracy": 0.52803125, "eval_nq_bleu_score": 11.514893952271498, "eval_nq_bleu_score_sem": 0.4745440691142446, "eval_nq_emb_cos_sim": 0.8224170207977295, "eval_nq_emb_cos_sim_sem": 0.008182158679327173, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.180854082107544, "eval_nq_n_ngrams_match_1": 23.15, "eval_nq_n_ngrams_match_2": 8.46, "eval_nq_n_ngrams_match_3": 3.842, "eval_nq_num_pred_words": 49.18, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.85386495802975, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44760593975440643, "eval_nq_runtime": 10.3444, "eval_nq_samples_per_second": 48.335, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4628755583514812, "eval_nq_token_set_f1_sem": 0.005065217356340255, "eval_nq_token_set_precision": 0.4200706643271948, "eval_nq_token_set_recall": 0.5255489521942965, "eval_nq_true_num_tokens": 64.0, "step": 134375 }, { "epoch": 25.8, "learning_rate": 0.001, "loss": 2.5603, "step": 134376 }, { "epoch": 25.8, "learning_rate": 0.001, "loss": 2.5637, "step": 134388 }, { "epoch": 25.81, "learning_rate": 0.001, "loss": 2.5573, "step": 134400 }, { "epoch": 25.81, "learning_rate": 0.001, "loss": 2.5497, "step": 134412 }, { "epoch": 25.81, "learning_rate": 0.001, "loss": 2.5634, "step": 134424 }, { "epoch": 25.81, "learning_rate": 0.001, "loss": 2.5543, "step": 134436 }, { "epoch": 25.82, "learning_rate": 0.001, "loss": 2.5503, "step": 134448 }, { "epoch": 25.82, "learning_rate": 0.001, "loss": 2.5453, "step": 134460 }, { "epoch": 25.82, "learning_rate": 0.001, "loss": 2.5472, "step": 134472 }, { "epoch": 25.82, "learning_rate": 0.001, "loss": 2.554, "step": 134484 }, { "epoch": 25.82, "learning_rate": 0.001, "loss": 2.5515, "step": 134496 }, { "epoch": 25.83, "learning_rate": 0.001, "loss": 2.5576, "step": 134508 }, { "epoch": 25.83, "learning_rate": 0.001, "loss": 2.5541, "step": 134520 }, { "epoch": 25.83, "learning_rate": 0.001, "loss": 2.5467, "step": 134532 }, { "epoch": 25.83, "learning_rate": 0.001, "loss": 2.5605, "step": 134544 }, { "epoch": 25.84, "learning_rate": 0.001, "loss": 2.5378, "step": 134556 }, { "epoch": 25.84, "learning_rate": 0.001, "loss": 2.5471, "step": 134568 }, { "epoch": 25.84, "learning_rate": 0.001, "loss": 2.5549, "step": 134580 }, { "epoch": 25.84, "learning_rate": 0.001, "loss": 2.5548, "step": 134592 }, { "epoch": 25.85, "learning_rate": 0.001, "loss": 2.5609, "step": 134604 }, { "epoch": 25.85, "learning_rate": 0.001, "loss": 2.5601, "step": 134616 }, { "epoch": 25.85, "learning_rate": 0.001, "loss": 2.5621, "step": 134628 }, { "epoch": 25.85, "learning_rate": 0.001, "loss": 2.5489, "step": 134640 }, { "epoch": 25.85, "learning_rate": 0.001, "loss": 2.5566, "step": 134652 }, { "epoch": 25.86, "learning_rate": 0.001, "loss": 2.553, "step": 134664 }, { "epoch": 25.86, "learning_rate": 0.001, "loss": 2.5633, "step": 134676 }, { "epoch": 25.86, "learning_rate": 0.001, "loss": 2.5627, "step": 134688 }, { "epoch": 25.86, "learning_rate": 0.001, "loss": 2.5493, "step": 134700 }, { "epoch": 25.87, "learning_rate": 0.001, "loss": 2.5442, "step": 134712 }, { "epoch": 25.87, "learning_rate": 0.001, "loss": 2.5463, "step": 134724 }, { "epoch": 25.87, "learning_rate": 0.001, "loss": 2.5586, "step": 134736 }, { "epoch": 25.87, "learning_rate": 0.001, "loss": 2.5439, "step": 134748 }, { "epoch": 25.88, "learning_rate": 0.001, "loss": 2.5519, "step": 134760 }, { "epoch": 25.88, "learning_rate": 0.001, "loss": 2.5455, "step": 134772 }, { "epoch": 25.88, "learning_rate": 0.001, "loss": 2.5573, "step": 134784 }, { "epoch": 25.88, "learning_rate": 0.001, "loss": 2.5517, "step": 134796 }, { "epoch": 25.88, "learning_rate": 0.001, "loss": 2.5482, "step": 134808 }, { "epoch": 25.89, "learning_rate": 0.001, "loss": 2.5477, "step": 134820 }, { "epoch": 25.89, "learning_rate": 0.001, "loss": 2.5526, "step": 134832 }, { "epoch": 25.89, "learning_rate": 0.001, "loss": 2.5526, "step": 134844 }, { "epoch": 25.89, "learning_rate": 0.001, "loss": 2.5628, "step": 134856 }, { "epoch": 25.9, "learning_rate": 0.001, "loss": 2.5582, "step": 134868 }, { "epoch": 25.9, "learning_rate": 0.001, "loss": 2.5622, "step": 134880 }, { "epoch": 25.9, "learning_rate": 0.001, "loss": 2.5539, "step": 134892 }, { "epoch": 25.9, "learning_rate": 0.001, "loss": 2.5589, "step": 134904 }, { "epoch": 25.91, "learning_rate": 0.001, "loss": 2.5581, "step": 134916 }, { "epoch": 25.91, "learning_rate": 0.001, "loss": 2.5435, "step": 134928 }, { "epoch": 25.91, "learning_rate": 0.001, "loss": 2.5573, "step": 134940 }, { "epoch": 25.91, "learning_rate": 0.001, "loss": 2.5522, "step": 134952 }, { "epoch": 25.91, "learning_rate": 0.001, "loss": 2.5552, "step": 134964 }, { "epoch": 25.92, "learning_rate": 0.001, "loss": 2.5626, "step": 134976 }, { "epoch": 25.92, "learning_rate": 0.001, "loss": 2.5491, "step": 134988 }, { "epoch": 25.92, "learning_rate": 0.001, "loss": 2.552, "step": 135000 }, { "epoch": 25.92, "eval_ag_news_accuracy": 0.324625, "eval_ag_news_bleu_score": 4.8215466061035945, "eval_ag_news_bleu_score_sem": 0.15782144989987337, "eval_ag_news_emb_cos_sim": 0.8168601989746094, "eval_ag_news_emb_cos_sim_sem": 0.005857727056725968, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5389437675476074, "eval_ag_news_n_ngrams_match_1": 14.146, "eval_ag_news_n_ngrams_match_2": 3.096, "eval_ag_news_n_ngrams_match_3": 0.892, "eval_ag_news_num_pred_words": 46.884, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.43053333161943, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3530551643132303, "eval_ag_news_runtime": 10.4201, "eval_ag_news_samples_per_second": 47.984, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.34989439806463696, "eval_ag_news_token_set_f1_sem": 0.004337027871653999, "eval_ag_news_token_set_precision": 0.33528392211218166, "eval_ag_news_token_set_recall": 0.38263716450838275, "eval_ag_news_true_num_tokens": 56.09375, "step": 135000 }, { "epoch": 25.92, "eval_anthropic_toxic_prompts_accuracy": 0.1155625, "eval_anthropic_toxic_prompts_bleu_score": 3.248820541653724, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12388414549316776, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6773720979690552, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008940576071757028, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2112584114074707, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.392, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.964, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, "eval_anthropic_toxic_prompts_num_pred_words": 47.088, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.81028813766348, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2209994635794737, "eval_anthropic_toxic_prompts_runtime": 10.3619, "eval_anthropic_toxic_prompts_samples_per_second": 48.254, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3608351765559386, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006268429188372862, "eval_anthropic_toxic_prompts_token_set_precision": 0.452080697008561, "eval_anthropic_toxic_prompts_token_set_recall": 0.3260422245809032, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 135000 }, { "epoch": 25.92, "eval_arxiv_accuracy": 0.34821875, "eval_arxiv_bleu_score": 4.414458346154399, "eval_arxiv_bleu_score_sem": 0.12981745992836125, "eval_arxiv_emb_cos_sim": 0.7740839719772339, "eval_arxiv_emb_cos_sim_sem": 0.007271887508801583, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3873417377471924, "eval_arxiv_n_ngrams_match_1": 15.364, "eval_arxiv_n_ngrams_match_2": 3.0, "eval_arxiv_n_ngrams_match_3": 0.68, "eval_arxiv_num_pred_words": 40.734, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.587197111563878, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3670790227490169, "eval_arxiv_runtime": 13.9243, "eval_arxiv_samples_per_second": 35.908, "eval_arxiv_steps_per_second": 0.072, "eval_arxiv_token_set_f1": 0.3582978618413922, "eval_arxiv_token_set_f1_sem": 0.004188978589808033, "eval_arxiv_token_set_precision": 0.3103418486459011, "eval_arxiv_token_set_recall": 0.44124318320252637, "eval_arxiv_true_num_tokens": 64.0, "step": 135000 }, { "epoch": 25.92, "eval_python_code_alpaca_accuracy": 0.16065625, "eval_python_code_alpaca_bleu_score": 4.612681237207016, "eval_python_code_alpaca_bleu_score_sem": 0.142013135243516, "eval_python_code_alpaca_emb_cos_sim": 0.7625421285629272, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00792974778904051, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8573057651519775, "eval_python_code_alpaca_n_ngrams_match_1": 10.184, "eval_python_code_alpaca_n_ngrams_match_2": 2.998, "eval_python_code_alpaca_n_ngrams_match_3": 1.014, "eval_python_code_alpaca_num_pred_words": 44.598, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.41454480108101, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3390603081545783, "eval_python_code_alpaca_runtime": 10.2266, "eval_python_code_alpaca_samples_per_second": 48.892, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4834815272456369, "eval_python_code_alpaca_token_set_f1_sem": 0.00544191619967506, "eval_python_code_alpaca_token_set_precision": 0.5563364195786793, "eval_python_code_alpaca_token_set_recall": 0.447065452883681, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 135000 }, { "epoch": 25.92, "eval_wikibio_accuracy": 0.3250625, "eval_wikibio_bleu_score": 5.7693631875653395, "eval_wikibio_bleu_score_sem": 0.19686744688937188, "eval_wikibio_emb_cos_sim": 0.7337939739227295, "eval_wikibio_emb_cos_sim_sem": 0.00984931546775705, "eval_wikibio_emb_top1_equal": 0.1171875, "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7203829288482666, "eval_wikibio_n_ngrams_match_1": 10.166, "eval_wikibio_n_ngrams_match_2": 3.396, "eval_wikibio_n_ngrams_match_3": 1.176, "eval_wikibio_num_pred_words": 36.68, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.28019846129939, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35702674164601056, "eval_wikibio_runtime": 10.0444, "eval_wikibio_samples_per_second": 49.779, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.32086572052134377, "eval_wikibio_token_set_f1_sem": 0.005320723673554833, "eval_wikibio_token_set_precision": 0.32908280801527406, "eval_wikibio_token_set_recall": 0.3299145150312103, "eval_wikibio_true_num_tokens": 61.1328125, "step": 135000 }, { "epoch": 25.92, "eval_nq_accuracy": 0.53034375, "eval_nq_bleu_score": 11.852041455321265, "eval_nq_bleu_score_sem": 0.48372725850744325, "eval_nq_emb_cos_sim": 0.8295139074325562, "eval_nq_emb_cos_sim_sem": 0.007649390245468581, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1738548278808594, "eval_nq_n_ngrams_match_1": 23.166, "eval_nq_n_ngrams_match_2": 8.558, "eval_nq_n_ngrams_match_3": 3.972, "eval_nq_num_pred_words": 49.102, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.792110874672815, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4505863833185626, "eval_nq_runtime": 10.5048, "eval_nq_samples_per_second": 47.597, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4626684146917264, "eval_nq_token_set_f1_sem": 0.005149669122032737, "eval_nq_token_set_precision": 0.42074960115249344, "eval_nq_token_set_recall": 0.5229615161500419, "eval_nq_true_num_tokens": 64.0, "step": 135000 }, { "epoch": 25.92, "learning_rate": 0.001, "loss": 2.546, "step": 135012 }, { "epoch": 25.93, "learning_rate": 0.001, "loss": 2.5593, "step": 135024 }, { "epoch": 25.93, "learning_rate": 0.001, "loss": 2.548, "step": 135036 }, { "epoch": 25.93, "learning_rate": 0.001, "loss": 2.556, "step": 135048 }, { "epoch": 25.93, "learning_rate": 0.001, "loss": 2.5558, "step": 135060 }, { "epoch": 25.94, "learning_rate": 0.001, "loss": 2.5578, "step": 135072 }, { "epoch": 25.94, "learning_rate": 0.001, "loss": 2.5459, "step": 135084 }, { "epoch": 25.94, "learning_rate": 0.001, "loss": 2.5426, "step": 135096 }, { "epoch": 25.94, "learning_rate": 0.001, "loss": 2.5539, "step": 135108 }, { "epoch": 25.94, "learning_rate": 0.001, "loss": 2.5569, "step": 135120 }, { "epoch": 25.95, "learning_rate": 0.001, "loss": 2.5658, "step": 135132 }, { "epoch": 25.95, "learning_rate": 0.001, "loss": 2.5496, "step": 135144 }, { "epoch": 25.95, "learning_rate": 0.001, "loss": 2.56, "step": 135156 }, { "epoch": 25.95, "learning_rate": 0.001, "loss": 2.5504, "step": 135168 }, { "epoch": 25.96, "learning_rate": 0.001, "loss": 2.5573, "step": 135180 }, { "epoch": 25.96, "learning_rate": 0.001, "loss": 2.564, "step": 135192 }, { "epoch": 25.96, "learning_rate": 0.001, "loss": 2.5605, "step": 135204 }, { "epoch": 25.96, "learning_rate": 0.001, "loss": 2.5438, "step": 135216 }, { "epoch": 25.97, "learning_rate": 0.001, "loss": 2.5501, "step": 135228 }, { "epoch": 25.97, "learning_rate": 0.001, "loss": 2.5478, "step": 135240 }, { "epoch": 25.97, "learning_rate": 0.001, "loss": 2.5581, "step": 135252 }, { "epoch": 25.97, "learning_rate": 0.001, "loss": 2.5461, "step": 135264 }, { "epoch": 25.97, "learning_rate": 0.001, "loss": 2.5541, "step": 135276 }, { "epoch": 25.98, "learning_rate": 0.001, "loss": 2.5541, "step": 135288 }, { "epoch": 25.98, "learning_rate": 0.001, "loss": 2.5516, "step": 135300 }, { "epoch": 25.98, "learning_rate": 0.001, "loss": 2.554, "step": 135312 }, { "epoch": 25.98, "learning_rate": 0.001, "loss": 2.5474, "step": 135324 }, { "epoch": 25.99, "learning_rate": 0.001, "loss": 2.5568, "step": 135336 }, { "epoch": 25.99, "learning_rate": 0.001, "loss": 2.5468, "step": 135348 }, { "epoch": 25.99, "learning_rate": 0.001, "loss": 2.5571, "step": 135360 }, { "epoch": 25.99, "learning_rate": 0.001, "loss": 2.5516, "step": 135372 }, { "epoch": 26.0, "learning_rate": 0.001, "loss": 2.5514, "step": 135384 }, { "epoch": 26.0, "learning_rate": 0.001, "loss": 2.5586, "step": 135396 }, { "epoch": 26.0, "learning_rate": 0.001, "loss": 2.5505, "step": 135408 }, { "epoch": 26.0, "learning_rate": 0.001, "loss": 2.5353, "step": 135420 }, { "epoch": 26.0, "learning_rate": 0.001, "loss": 2.5458, "step": 135432 }, { "epoch": 26.01, "learning_rate": 0.001, "loss": 2.5356, "step": 135444 }, { "epoch": 26.01, "learning_rate": 0.001, "loss": 2.5402, "step": 135456 }, { "epoch": 26.01, "learning_rate": 0.001, "loss": 2.5342, "step": 135468 }, { "epoch": 26.01, "learning_rate": 0.001, "loss": 2.5409, "step": 135480 }, { "epoch": 26.02, "learning_rate": 0.001, "loss": 2.5358, "step": 135492 }, { "epoch": 26.02, "learning_rate": 0.001, "loss": 2.5438, "step": 135504 }, { "epoch": 26.02, "learning_rate": 0.001, "loss": 2.5355, "step": 135516 }, { "epoch": 26.02, "learning_rate": 0.001, "loss": 2.5304, "step": 135528 }, { "epoch": 26.03, "learning_rate": 0.001, "loss": 2.5335, "step": 135540 }, { "epoch": 26.03, "learning_rate": 0.001, "loss": 2.5385, "step": 135552 }, { "epoch": 26.03, "learning_rate": 0.001, "loss": 2.531, "step": 135564 }, { "epoch": 26.03, "learning_rate": 0.001, "loss": 2.5355, "step": 135576 }, { "epoch": 26.03, "learning_rate": 0.001, "loss": 2.5331, "step": 135588 }, { "epoch": 26.04, "learning_rate": 0.001, "loss": 2.54, "step": 135600 }, { "epoch": 26.04, "learning_rate": 0.001, "loss": 2.5499, "step": 135612 }, { "epoch": 26.04, "learning_rate": 0.001, "loss": 2.5415, "step": 135624 }, { "epoch": 26.04, "eval_ag_news_accuracy": 0.32403125, "eval_ag_news_bleu_score": 4.590910691753977, "eval_ag_news_bleu_score_sem": 0.14774585171485072, "eval_ag_news_emb_cos_sim": 0.8111287951469421, "eval_ag_news_emb_cos_sim_sem": 0.007094043613153802, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.54253888130188, "eval_ag_news_n_ngrams_match_1": 14.126, "eval_ag_news_n_ngrams_match_2": 3.03, "eval_ag_news_n_ngrams_match_3": 0.778, "eval_ag_news_num_pred_words": 46.774, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.554537787067076, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35369404731794807, "eval_ag_news_runtime": 10.3381, "eval_ag_news_samples_per_second": 48.365, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.35162081455483996, "eval_ag_news_token_set_f1_sem": 0.004197430205313869, "eval_ag_news_token_set_precision": 0.33796050845385917, "eval_ag_news_token_set_recall": 0.3816561790739224, "eval_ag_news_true_num_tokens": 56.09375, "step": 135625 }, { "epoch": 26.04, "eval_anthropic_toxic_prompts_accuracy": 0.1141875, "eval_anthropic_toxic_prompts_bleu_score": 3.2594359282646783, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1246410124995687, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6739163398742676, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00865119844405926, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2369542121887207, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.996, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.774, "eval_anthropic_toxic_prompts_num_pred_words": 47.302, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.45606976463838, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2177168882032835, "eval_anthropic_toxic_prompts_runtime": 10.3373, "eval_anthropic_toxic_prompts_samples_per_second": 48.369, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3632432499569225, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064447337418982445, "eval_anthropic_toxic_prompts_token_set_precision": 0.4506778821014783, "eval_anthropic_toxic_prompts_token_set_recall": 0.3275703381121131, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 135625 }, { "epoch": 26.04, "eval_arxiv_accuracy": 0.3488125, "eval_arxiv_bleu_score": 4.344606939093127, "eval_arxiv_bleu_score_sem": 0.12211452841307469, "eval_arxiv_emb_cos_sim": 0.7621860504150391, "eval_arxiv_emb_cos_sim_sem": 0.00938866292587863, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3966190814971924, "eval_arxiv_n_ngrams_match_1": 15.316, "eval_arxiv_n_ngrams_match_2": 2.98, "eval_arxiv_n_ngrams_match_3": 0.658, "eval_arxiv_num_pred_words": 41.302, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.862964928260897, "eval_arxiv_pred_num_tokens": 62.9921875, "eval_arxiv_rouge_score": 0.362235737094995, "eval_arxiv_runtime": 10.0227, "eval_arxiv_samples_per_second": 49.887, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.35471993227548804, "eval_arxiv_token_set_f1_sem": 0.004285801798175079, "eval_arxiv_token_set_precision": 0.308939019407551, "eval_arxiv_token_set_recall": 0.4333996262695078, "eval_arxiv_true_num_tokens": 64.0, "step": 135625 }, { "epoch": 26.04, "eval_python_code_alpaca_accuracy": 0.16053125, "eval_python_code_alpaca_bleu_score": 4.555752557558127, "eval_python_code_alpaca_bleu_score_sem": 0.13748333417367972, "eval_python_code_alpaca_emb_cos_sim": 0.7590852975845337, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007779874912824785, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8728814125061035, "eval_python_code_alpaca_n_ngrams_match_1": 9.992, "eval_python_code_alpaca_n_ngrams_match_2": 2.926, "eval_python_code_alpaca_n_ngrams_match_3": 0.946, "eval_python_code_alpaca_num_pred_words": 43.998, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.687911011028405, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3393173819108022, "eval_python_code_alpaca_runtime": 9.8497, "eval_python_code_alpaca_samples_per_second": 50.763, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.48191817785688534, "eval_python_code_alpaca_token_set_f1_sem": 0.00522353860919341, "eval_python_code_alpaca_token_set_precision": 0.5477991796605628, "eval_python_code_alpaca_token_set_recall": 0.45174149684965764, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 135625 }, { "epoch": 26.04, "eval_wikibio_accuracy": 0.3250625, "eval_wikibio_bleu_score": 5.935405123280823, "eval_wikibio_bleu_score_sem": 0.21304024124245813, "eval_wikibio_emb_cos_sim": 0.7358399629592896, "eval_wikibio_emb_cos_sim_sem": 0.01074842496741684, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7152676582336426, "eval_wikibio_n_ngrams_match_1": 9.982, "eval_wikibio_n_ngrams_match_2": 3.394, "eval_wikibio_n_ngrams_match_3": 1.236, "eval_wikibio_num_pred_words": 35.552, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.069578224156515, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34920475810298784, "eval_wikibio_runtime": 10.1291, "eval_wikibio_samples_per_second": 49.363, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.31497383578099925, "eval_wikibio_token_set_f1_sem": 0.005805803427327527, "eval_wikibio_token_set_precision": 0.32213102834355295, "eval_wikibio_token_set_recall": 0.3237996382968531, "eval_wikibio_true_num_tokens": 61.1328125, "step": 135625 }, { "epoch": 26.04, "eval_nq_accuracy": 0.52965625, "eval_nq_bleu_score": 11.926697426339699, "eval_nq_bleu_score_sem": 0.492345157062038, "eval_nq_emb_cos_sim": 0.8344102501869202, "eval_nq_emb_cos_sim_sem": 0.00695168538831867, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1763744354248047, "eval_nq_n_ngrams_match_1": 23.292, "eval_nq_n_ngrams_match_2": 8.574, "eval_nq_n_ngrams_match_3": 4.026, "eval_nq_num_pred_words": 49.42, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.81429147502952, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45102300446386084, "eval_nq_runtime": 10.9002, "eval_nq_samples_per_second": 45.871, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.46501276606574626, "eval_nq_token_set_f1_sem": 0.005065437908397407, "eval_nq_token_set_precision": 0.42433663644825226, "eval_nq_token_set_recall": 0.5210483141291445, "eval_nq_true_num_tokens": 64.0, "step": 135625 }, { "epoch": 26.04, "learning_rate": 0.001, "loss": 2.5297, "step": 135636 }, { "epoch": 26.05, "learning_rate": 0.001, "loss": 2.5346, "step": 135648 }, { "epoch": 26.05, "learning_rate": 0.001, "loss": 2.5403, "step": 135660 }, { "epoch": 26.05, "learning_rate": 0.001, "loss": 2.5348, "step": 135672 }, { "epoch": 26.05, "learning_rate": 0.001, "loss": 2.5346, "step": 135684 }, { "epoch": 26.06, "learning_rate": 0.001, "loss": 2.5356, "step": 135696 }, { "epoch": 26.06, "learning_rate": 0.001, "loss": 2.5363, "step": 135708 }, { "epoch": 26.06, "learning_rate": 0.001, "loss": 2.538, "step": 135720 }, { "epoch": 26.06, "learning_rate": 0.001, "loss": 2.539, "step": 135732 }, { "epoch": 26.06, "learning_rate": 0.001, "loss": 2.5445, "step": 135744 }, { "epoch": 26.07, "learning_rate": 0.001, "loss": 2.536, "step": 135756 }, { "epoch": 26.07, "learning_rate": 0.001, "loss": 2.537, "step": 135768 }, { "epoch": 26.07, "learning_rate": 0.001, "loss": 2.5311, "step": 135780 }, { "epoch": 26.07, "learning_rate": 0.001, "loss": 2.5385, "step": 135792 }, { "epoch": 26.08, "learning_rate": 0.001, "loss": 2.538, "step": 135804 }, { "epoch": 26.08, "learning_rate": 0.001, "loss": 2.5442, "step": 135816 }, { "epoch": 26.08, "learning_rate": 0.001, "loss": 2.5462, "step": 135828 }, { "epoch": 26.08, "learning_rate": 0.001, "loss": 2.5421, "step": 135840 }, { "epoch": 26.09, "learning_rate": 0.001, "loss": 2.5455, "step": 135852 }, { "epoch": 26.09, "learning_rate": 0.001, "loss": 2.5279, "step": 135864 }, { "epoch": 26.09, "learning_rate": 0.001, "loss": 2.533, "step": 135876 }, { "epoch": 26.09, "learning_rate": 0.001, "loss": 2.5463, "step": 135888 }, { "epoch": 26.09, "learning_rate": 0.001, "loss": 2.535, "step": 135900 }, { "epoch": 26.1, "learning_rate": 0.001, "loss": 2.5455, "step": 135912 }, { "epoch": 26.1, "learning_rate": 0.001, "loss": 2.5316, "step": 135924 }, { "epoch": 26.1, "learning_rate": 0.001, "loss": 2.5401, "step": 135936 }, { "epoch": 26.1, "learning_rate": 0.001, "loss": 2.5417, "step": 135948 }, { "epoch": 26.11, "learning_rate": 0.001, "loss": 2.5312, "step": 135960 }, { "epoch": 26.11, "learning_rate": 0.001, "loss": 2.5389, "step": 135972 }, { "epoch": 26.11, "learning_rate": 0.001, "loss": 2.5459, "step": 135984 }, { "epoch": 26.11, "learning_rate": 0.001, "loss": 2.5362, "step": 135996 }, { "epoch": 26.12, "learning_rate": 0.001, "loss": 2.5436, "step": 136008 }, { "epoch": 26.12, "learning_rate": 0.001, "loss": 2.5379, "step": 136020 }, { "epoch": 26.12, "learning_rate": 0.001, "loss": 2.5434, "step": 136032 }, { "epoch": 26.12, "learning_rate": 0.001, "loss": 2.553, "step": 136044 }, { "epoch": 26.12, "learning_rate": 0.001, "loss": 2.5474, "step": 136056 }, { "epoch": 26.13, "learning_rate": 0.001, "loss": 2.5397, "step": 136068 }, { "epoch": 26.13, "learning_rate": 0.001, "loss": 2.5478, "step": 136080 }, { "epoch": 26.13, "learning_rate": 0.001, "loss": 2.5463, "step": 136092 }, { "epoch": 26.13, "learning_rate": 0.001, "loss": 2.5378, "step": 136104 }, { "epoch": 26.14, "learning_rate": 0.001, "loss": 2.5536, "step": 136116 }, { "epoch": 26.14, "learning_rate": 0.001, "loss": 2.534, "step": 136128 }, { "epoch": 26.14, "learning_rate": 0.001, "loss": 2.5429, "step": 136140 }, { "epoch": 26.14, "learning_rate": 0.001, "loss": 2.5315, "step": 136152 }, { "epoch": 26.15, "learning_rate": 0.001, "loss": 2.5361, "step": 136164 }, { "epoch": 26.15, "learning_rate": 0.001, "loss": 2.5358, "step": 136176 }, { "epoch": 26.15, "learning_rate": 0.001, "loss": 2.5411, "step": 136188 }, { "epoch": 26.15, "learning_rate": 0.001, "loss": 2.5442, "step": 136200 }, { "epoch": 26.15, "learning_rate": 0.001, "loss": 2.5377, "step": 136212 }, { "epoch": 26.16, "learning_rate": 0.001, "loss": 2.5326, "step": 136224 }, { "epoch": 26.16, "learning_rate": 0.001, "loss": 2.5373, "step": 136236 }, { "epoch": 26.16, "learning_rate": 0.001, "loss": 2.5399, "step": 136248 }, { "epoch": 26.16, "eval_ag_news_accuracy": 0.32609375, "eval_ag_news_bleu_score": 4.7534053863205346, "eval_ag_news_bleu_score_sem": 0.1436586508635874, "eval_ag_news_emb_cos_sim": 0.8094553351402283, "eval_ag_news_emb_cos_sim_sem": 0.006793587291198242, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5403218269348145, "eval_ag_news_n_ngrams_match_1": 14.188, "eval_ag_news_n_ngrams_match_2": 3.094, "eval_ag_news_n_ngrams_match_3": 0.832, "eval_ag_news_num_pred_words": 46.52, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.478013358918375, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35556657263447833, "eval_ag_news_runtime": 10.7699, "eval_ag_news_samples_per_second": 46.426, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.35001001072982785, "eval_ag_news_token_set_f1_sem": 0.004270852844131644, "eval_ag_news_token_set_precision": 0.33776192722003534, "eval_ag_news_token_set_recall": 0.3751954875034251, "eval_ag_news_true_num_tokens": 56.09375, "step": 136250 }, { "epoch": 26.16, "eval_anthropic_toxic_prompts_accuracy": 0.11446875, "eval_anthropic_toxic_prompts_bleu_score": 3.1740314544838166, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1236341983682375, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6758701801300049, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008016001314518052, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2240450382232666, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.246, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.862, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712, "eval_anthropic_toxic_prompts_num_pred_words": 47.144, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.12956491972731, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21463533868913415, "eval_anthropic_toxic_prompts_runtime": 10.0734, "eval_anthropic_toxic_prompts_samples_per_second": 49.635, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.35637729894753334, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006495450137988693, "eval_anthropic_toxic_prompts_token_set_precision": 0.43936010001946035, "eval_anthropic_toxic_prompts_token_set_recall": 0.3253769992586194, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 136250 }, { "epoch": 26.16, "eval_arxiv_accuracy": 0.34896875, "eval_arxiv_bleu_score": 4.198809583659221, "eval_arxiv_bleu_score_sem": 0.11817792796855849, "eval_arxiv_emb_cos_sim": 0.7708814144134521, "eval_arxiv_emb_cos_sim_sem": 0.0073460185469761815, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.404740571975708, "eval_arxiv_n_ngrams_match_1": 15.02, "eval_arxiv_n_ngrams_match_2": 2.862, "eval_arxiv_n_ngrams_match_3": 0.606, "eval_arxiv_num_pred_words": 40.278, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.106484244977256, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36243692727416343, "eval_arxiv_runtime": 10.4605, "eval_arxiv_samples_per_second": 47.799, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.3531461121161466, "eval_arxiv_token_set_f1_sem": 0.004170485006289717, "eval_arxiv_token_set_precision": 0.30482242880959065, "eval_arxiv_token_set_recall": 0.43681992029202804, "eval_arxiv_true_num_tokens": 64.0, "step": 136250 }, { "epoch": 26.16, "eval_python_code_alpaca_accuracy": 0.161125, "eval_python_code_alpaca_bleu_score": 4.553928158502647, "eval_python_code_alpaca_bleu_score_sem": 0.13522952121475498, "eval_python_code_alpaca_emb_cos_sim": 0.7599102258682251, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008040670391787085, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8596158027648926, "eval_python_code_alpaca_n_ngrams_match_1": 10.016, "eval_python_code_alpaca_n_ngrams_match_2": 2.93, "eval_python_code_alpaca_n_ngrams_match_3": 0.954, "eval_python_code_alpaca_num_pred_words": 43.546, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.45481955477148, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3438928807825201, "eval_python_code_alpaca_runtime": 9.6393, "eval_python_code_alpaca_samples_per_second": 51.871, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.4784491578718069, "eval_python_code_alpaca_token_set_f1_sem": 0.005472352866820055, "eval_python_code_alpaca_token_set_precision": 0.5473837131364725, "eval_python_code_alpaca_token_set_recall": 0.44501932904987707, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 136250 }, { "epoch": 26.16, "eval_wikibio_accuracy": 0.32453125, "eval_wikibio_bleu_score": 5.878704931551258, "eval_wikibio_bleu_score_sem": 0.20375367412176457, "eval_wikibio_emb_cos_sim": 0.7443228363990784, "eval_wikibio_emb_cos_sim_sem": 0.0086008046426749, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.736111879348755, "eval_wikibio_n_ngrams_match_1": 10.048, "eval_wikibio_n_ngrams_match_2": 3.29, "eval_wikibio_n_ngrams_match_3": 1.198, "eval_wikibio_num_pred_words": 36.232, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.93462589585937, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35490149540642446, "eval_wikibio_runtime": 9.9723, "eval_wikibio_samples_per_second": 50.139, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3177924991133672, "eval_wikibio_token_set_f1_sem": 0.005372607721016776, "eval_wikibio_token_set_precision": 0.32588086357287527, "eval_wikibio_token_set_recall": 0.3258573969922704, "eval_wikibio_true_num_tokens": 61.1328125, "step": 136250 }, { "epoch": 26.16, "eval_nq_accuracy": 0.52878125, "eval_nq_bleu_score": 11.619591471832347, "eval_nq_bleu_score_sem": 0.4760657884699022, "eval_nq_emb_cos_sim": 0.8333326578140259, "eval_nq_emb_cos_sim_sem": 0.007278220731334853, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1724135875701904, "eval_nq_n_ngrams_match_1": 23.06, "eval_nq_n_ngrams_match_2": 8.314, "eval_nq_n_ngrams_match_3": 3.802, "eval_nq_num_pred_words": 49.046, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.779448457049556, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44818040968729433, "eval_nq_runtime": 10.4038, "eval_nq_samples_per_second": 48.059, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4599892953133288, "eval_nq_token_set_f1_sem": 0.005008508671166199, "eval_nq_token_set_precision": 0.4193728890237574, "eval_nq_token_set_recall": 0.5177488915911149, "eval_nq_true_num_tokens": 64.0, "step": 136250 }, { "epoch": 26.16, "learning_rate": 0.001, "loss": 2.5232, "step": 136260 }, { "epoch": 26.17, "learning_rate": 0.001, "loss": 2.5416, "step": 136272 }, { "epoch": 26.17, "learning_rate": 0.001, "loss": 2.5346, "step": 136284 }, { "epoch": 26.17, "learning_rate": 0.001, "loss": 2.5479, "step": 136296 }, { "epoch": 26.17, "learning_rate": 0.001, "loss": 2.5503, "step": 136308 }, { "epoch": 26.18, "learning_rate": 0.001, "loss": 2.543, "step": 136320 }, { "epoch": 26.18, "learning_rate": 0.001, "loss": 2.5389, "step": 136332 }, { "epoch": 26.18, "learning_rate": 0.001, "loss": 2.5379, "step": 136344 }, { "epoch": 26.18, "learning_rate": 0.001, "loss": 2.5371, "step": 136356 }, { "epoch": 26.18, "learning_rate": 0.001, "loss": 2.5402, "step": 136368 }, { "epoch": 26.19, "learning_rate": 0.001, "loss": 2.547, "step": 136380 }, { "epoch": 26.19, "learning_rate": 0.001, "loss": 2.5392, "step": 136392 }, { "epoch": 26.19, "learning_rate": 0.001, "loss": 2.5382, "step": 136404 }, { "epoch": 26.19, "learning_rate": 0.001, "loss": 2.5393, "step": 136416 }, { "epoch": 26.2, "learning_rate": 0.001, "loss": 2.5394, "step": 136428 }, { "epoch": 26.2, "learning_rate": 0.001, "loss": 2.5422, "step": 136440 }, { "epoch": 26.2, "learning_rate": 0.001, "loss": 2.5372, "step": 136452 }, { "epoch": 26.2, "learning_rate": 0.001, "loss": 2.5409, "step": 136464 }, { "epoch": 26.21, "learning_rate": 0.001, "loss": 2.5433, "step": 136476 }, { "epoch": 26.21, "learning_rate": 0.001, "loss": 2.5402, "step": 136488 }, { "epoch": 26.21, "learning_rate": 0.001, "loss": 2.5476, "step": 136500 }, { "epoch": 26.21, "learning_rate": 0.001, "loss": 2.5464, "step": 136512 }, { "epoch": 26.21, "learning_rate": 0.001, "loss": 2.5333, "step": 136524 }, { "epoch": 26.22, "learning_rate": 0.001, "loss": 2.5323, "step": 136536 }, { "epoch": 26.22, "learning_rate": 0.001, "loss": 2.5328, "step": 136548 }, { "epoch": 26.22, "learning_rate": 0.001, "loss": 2.5398, "step": 136560 }, { "epoch": 26.22, "learning_rate": 0.001, "loss": 2.54, "step": 136572 }, { "epoch": 26.23, "learning_rate": 0.001, "loss": 2.5462, "step": 136584 }, { "epoch": 26.23, "learning_rate": 0.001, "loss": 2.546, "step": 136596 }, { "epoch": 26.23, "learning_rate": 0.001, "loss": 2.5397, "step": 136608 }, { "epoch": 26.23, "learning_rate": 0.001, "loss": 2.5364, "step": 136620 }, { "epoch": 26.24, "learning_rate": 0.001, "loss": 2.5447, "step": 136632 }, { "epoch": 26.24, "learning_rate": 0.001, "loss": 2.5396, "step": 136644 }, { "epoch": 26.24, "learning_rate": 0.001, "loss": 2.5401, "step": 136656 }, { "epoch": 26.24, "learning_rate": 0.001, "loss": 2.5446, "step": 136668 }, { "epoch": 26.24, "learning_rate": 0.001, "loss": 2.5442, "step": 136680 }, { "epoch": 26.25, "learning_rate": 0.001, "loss": 2.5444, "step": 136692 }, { "epoch": 26.25, "learning_rate": 0.001, "loss": 2.5428, "step": 136704 }, { "epoch": 26.25, "learning_rate": 0.001, "loss": 2.5469, "step": 136716 }, { "epoch": 26.25, "learning_rate": 0.001, "loss": 2.5333, "step": 136728 }, { "epoch": 26.26, "learning_rate": 0.001, "loss": 2.5499, "step": 136740 }, { "epoch": 26.26, "learning_rate": 0.001, "loss": 2.5453, "step": 136752 }, { "epoch": 26.26, "learning_rate": 0.001, "loss": 2.5419, "step": 136764 }, { "epoch": 26.26, "learning_rate": 0.001, "loss": 2.5428, "step": 136776 }, { "epoch": 26.26, "learning_rate": 0.001, "loss": 2.5406, "step": 136788 }, { "epoch": 26.27, "learning_rate": 0.001, "loss": 2.5462, "step": 136800 }, { "epoch": 26.27, "learning_rate": 0.001, "loss": 2.5398, "step": 136812 }, { "epoch": 26.27, "learning_rate": 0.001, "loss": 2.5384, "step": 136824 }, { "epoch": 26.27, "learning_rate": 0.001, "loss": 2.5467, "step": 136836 }, { "epoch": 26.28, "learning_rate": 0.001, "loss": 2.5396, "step": 136848 }, { "epoch": 26.28, "learning_rate": 0.001, "loss": 2.5453, "step": 136860 }, { "epoch": 26.28, "learning_rate": 0.001, "loss": 2.5444, "step": 136872 }, { "epoch": 26.28, "eval_ag_news_accuracy": 0.32390625, "eval_ag_news_bleu_score": 5.010337133307245, "eval_ag_news_bleu_score_sem": 0.16365978257351546, "eval_ag_news_emb_cos_sim": 0.8188580870628357, "eval_ag_news_emb_cos_sim_sem": 0.006255775064953011, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5462374687194824, "eval_ag_news_n_ngrams_match_1": 14.176, "eval_ag_news_n_ngrams_match_2": 3.226, "eval_ag_news_n_ngrams_match_3": 0.954, "eval_ag_news_num_pred_words": 46.444, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.68257740264264, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3518776534142356, "eval_ag_news_runtime": 11.4499, "eval_ag_news_samples_per_second": 43.668, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.3537365554646819, "eval_ag_news_token_set_f1_sem": 0.004595980424030815, "eval_ag_news_token_set_precision": 0.33868839092978453, "eval_ag_news_token_set_recall": 0.38467899536128697, "eval_ag_news_true_num_tokens": 56.09375, "step": 136875 }, { "epoch": 26.28, "eval_anthropic_toxic_prompts_accuracy": 0.11384375, "eval_anthropic_toxic_prompts_bleu_score": 3.113344748058637, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11685076588236008, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.669208288192749, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009204963252465866, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.23777437210083, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.21, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.894, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.718, "eval_anthropic_toxic_prompts_num_pred_words": 47.676, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.476956376589303, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21266123423207317, "eval_anthropic_toxic_prompts_runtime": 9.9588, "eval_anthropic_toxic_prompts_samples_per_second": 50.207, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3591344786901012, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006655155952238647, "eval_anthropic_toxic_prompts_token_set_precision": 0.4441902119053794, "eval_anthropic_toxic_prompts_token_set_recall": 0.32864163490749215, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 136875 }, { "epoch": 26.28, "eval_arxiv_accuracy": 0.34915625, "eval_arxiv_bleu_score": 4.333011328166027, "eval_arxiv_bleu_score_sem": 0.12615184594541862, "eval_arxiv_emb_cos_sim": 0.7579234838485718, "eval_arxiv_emb_cos_sim_sem": 0.007518748844519477, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.38987398147583, "eval_arxiv_n_ngrams_match_1": 15.066, "eval_arxiv_n_ngrams_match_2": 2.938, "eval_arxiv_n_ngrams_match_3": 0.65, "eval_arxiv_num_pred_words": 40.424, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.662214046393327, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3604611427623603, "eval_arxiv_runtime": 10.3209, "eval_arxiv_samples_per_second": 48.445, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3548279845397079, "eval_arxiv_token_set_f1_sem": 0.004075179992290366, "eval_arxiv_token_set_precision": 0.3052151464510861, "eval_arxiv_token_set_recall": 0.44324363720376597, "eval_arxiv_true_num_tokens": 64.0, "step": 136875 }, { "epoch": 26.28, "eval_python_code_alpaca_accuracy": 0.163375, "eval_python_code_alpaca_bleu_score": 4.715599250215951, "eval_python_code_alpaca_bleu_score_sem": 0.14824706152068332, "eval_python_code_alpaca_emb_cos_sim": 0.7673449516296387, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008126806037797701, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8577284812927246, "eval_python_code_alpaca_n_ngrams_match_1": 10.16, "eval_python_code_alpaca_n_ngrams_match_2": 3.056, "eval_python_code_alpaca_n_ngrams_match_3": 1.052, "eval_python_code_alpaca_num_pred_words": 44.004, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.421907766364683, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3412971944011274, "eval_python_code_alpaca_runtime": 9.7176, "eval_python_code_alpaca_samples_per_second": 51.453, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.484713177447905, "eval_python_code_alpaca_token_set_f1_sem": 0.005649177262070943, "eval_python_code_alpaca_token_set_precision": 0.5539526744933355, "eval_python_code_alpaca_token_set_recall": 0.4492507534197922, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 136875 }, { "epoch": 26.28, "eval_wikibio_accuracy": 0.3281875, "eval_wikibio_bleu_score": 5.818690450125476, "eval_wikibio_bleu_score_sem": 0.21608202173265756, "eval_wikibio_emb_cos_sim": 0.7250394821166992, "eval_wikibio_emb_cos_sim_sem": 0.01122964056155702, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.692779541015625, "eval_wikibio_n_ngrams_match_1": 9.58, "eval_wikibio_n_ngrams_match_2": 3.244, "eval_wikibio_n_ngrams_match_3": 1.192, "eval_wikibio_num_pred_words": 34.72, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.156308085496704, "eval_wikibio_pred_num_tokens": 62.9921875, "eval_wikibio_rouge_score": 0.33666921256597937, "eval_wikibio_runtime": 9.8949, "eval_wikibio_samples_per_second": 50.531, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.31053074019118176, "eval_wikibio_token_set_f1_sem": 0.005821424953035725, "eval_wikibio_token_set_precision": 0.31318598808710874, "eval_wikibio_token_set_recall": 0.3302876270624791, "eval_wikibio_true_num_tokens": 61.1328125, "step": 136875 }, { "epoch": 26.28, "eval_nq_accuracy": 0.5296875, "eval_nq_bleu_score": 11.965890207913443, "eval_nq_bleu_score_sem": 0.5056489717638976, "eval_nq_emb_cos_sim": 0.8246257901191711, "eval_nq_emb_cos_sim_sem": 0.007787292758162702, "eval_nq_emb_top1_equal": 0.3515625, "eval_nq_emb_top1_equal_sem": 0.04236756101983345, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1769216060638428, "eval_nq_n_ngrams_match_1": 23.036, "eval_nq_n_ngrams_match_2": 8.636, "eval_nq_n_ngrams_match_3": 4.038, "eval_nq_num_pred_words": 49.032, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.819115716249794, "eval_nq_pred_num_tokens": 62.96875, "eval_nq_rouge_score": 0.4445829886526741, "eval_nq_runtime": 10.7013, "eval_nq_samples_per_second": 46.723, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.4579759030373418, "eval_nq_token_set_f1_sem": 0.005142090637970512, "eval_nq_token_set_precision": 0.4153846846481496, "eval_nq_token_set_recall": 0.5205017468738073, "eval_nq_true_num_tokens": 64.0, "step": 136875 }, { "epoch": 26.28, "learning_rate": 0.001, "loss": 2.5439, "step": 136884 }, { "epoch": 26.29, "learning_rate": 0.001, "loss": 2.5399, "step": 136896 }, { "epoch": 26.29, "learning_rate": 0.001, "loss": 2.541, "step": 136908 }, { "epoch": 26.29, "learning_rate": 0.001, "loss": 2.538, "step": 136920 }, { "epoch": 26.29, "learning_rate": 0.001, "loss": 2.5345, "step": 136932 }, { "epoch": 26.29, "learning_rate": 0.001, "loss": 2.5462, "step": 136944 }, { "epoch": 26.3, "learning_rate": 0.001, "loss": 2.5426, "step": 136956 }, { "epoch": 26.3, "learning_rate": 0.001, "loss": 2.5268, "step": 136968 }, { "epoch": 26.3, "learning_rate": 0.001, "loss": 2.5376, "step": 136980 }, { "epoch": 26.3, "learning_rate": 0.001, "loss": 2.5451, "step": 136992 }, { "epoch": 26.31, "learning_rate": 0.001, "loss": 2.5382, "step": 137004 }, { "epoch": 26.31, "learning_rate": 0.001, "loss": 2.5407, "step": 137016 }, { "epoch": 26.31, "learning_rate": 0.001, "loss": 2.5418, "step": 137028 }, { "epoch": 26.31, "learning_rate": 0.001, "loss": 2.538, "step": 137040 }, { "epoch": 26.32, "learning_rate": 0.001, "loss": 2.5424, "step": 137052 }, { "epoch": 26.32, "learning_rate": 0.001, "loss": 2.543, "step": 137064 }, { "epoch": 26.32, "learning_rate": 0.001, "loss": 2.5444, "step": 137076 }, { "epoch": 26.32, "learning_rate": 0.001, "loss": 2.5366, "step": 137088 }, { "epoch": 26.32, "learning_rate": 0.001, "loss": 2.5438, "step": 137100 }, { "epoch": 26.33, "learning_rate": 0.001, "loss": 2.5408, "step": 137112 }, { "epoch": 26.33, "learning_rate": 0.001, "loss": 2.5392, "step": 137124 }, { "epoch": 26.33, "learning_rate": 0.001, "loss": 2.5473, "step": 137136 }, { "epoch": 26.33, "learning_rate": 0.001, "loss": 2.5439, "step": 137148 }, { "epoch": 26.34, "learning_rate": 0.001, "loss": 2.5435, "step": 137160 }, { "epoch": 26.34, "learning_rate": 0.001, "loss": 2.5485, "step": 137172 }, { "epoch": 26.34, "learning_rate": 0.001, "loss": 2.5399, "step": 137184 }, { "epoch": 26.34, "learning_rate": 0.001, "loss": 2.5395, "step": 137196 }, { "epoch": 26.35, "learning_rate": 0.001, "loss": 2.5419, "step": 137208 }, { "epoch": 26.35, "learning_rate": 0.001, "loss": 2.542, "step": 137220 }, { "epoch": 26.35, "learning_rate": 0.001, "loss": 2.5421, "step": 137232 }, { "epoch": 26.35, "learning_rate": 0.001, "loss": 2.5367, "step": 137244 }, { "epoch": 26.35, "learning_rate": 0.001, "loss": 2.5371, "step": 137256 }, { "epoch": 26.36, "learning_rate": 0.001, "loss": 2.5447, "step": 137268 }, { "epoch": 26.36, "learning_rate": 0.001, "loss": 2.5421, "step": 137280 }, { "epoch": 26.36, "learning_rate": 0.001, "loss": 2.5344, "step": 137292 }, { "epoch": 26.36, "learning_rate": 0.001, "loss": 2.5344, "step": 137304 }, { "epoch": 26.37, "learning_rate": 0.001, "loss": 2.5371, "step": 137316 }, { "epoch": 26.37, "learning_rate": 0.001, "loss": 2.5421, "step": 137328 }, { "epoch": 26.37, "learning_rate": 0.001, "loss": 2.5357, "step": 137340 }, { "epoch": 26.37, "learning_rate": 0.001, "loss": 2.5447, "step": 137352 }, { "epoch": 26.38, "learning_rate": 0.001, "loss": 2.547, "step": 137364 }, { "epoch": 26.38, "learning_rate": 0.001, "loss": 2.5404, "step": 137376 }, { "epoch": 26.38, "learning_rate": 0.001, "loss": 2.5455, "step": 137388 }, { "epoch": 26.38, "learning_rate": 0.001, "loss": 2.5404, "step": 137400 }, { "epoch": 26.38, "learning_rate": 0.001, "loss": 2.5405, "step": 137412 }, { "epoch": 26.39, "learning_rate": 0.001, "loss": 2.5457, "step": 137424 }, { "epoch": 26.39, "learning_rate": 0.001, "loss": 2.5587, "step": 137436 }, { "epoch": 26.39, "learning_rate": 0.001, "loss": 2.5451, "step": 137448 }, { "epoch": 26.39, "learning_rate": 0.001, "loss": 2.5477, "step": 137460 }, { "epoch": 26.4, "learning_rate": 0.001, "loss": 2.5497, "step": 137472 }, { "epoch": 26.4, "learning_rate": 0.001, "loss": 2.5451, "step": 137484 }, { "epoch": 26.4, "learning_rate": 0.001, "loss": 2.5452, "step": 137496 }, { "epoch": 26.4, "eval_ag_news_accuracy": 0.3243125, "eval_ag_news_bleu_score": 4.955759470323633, "eval_ag_news_bleu_score_sem": 0.15887039219714075, "eval_ag_news_emb_cos_sim": 0.8114238977432251, "eval_ag_news_emb_cos_sim_sem": 0.007117613579639011, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.533785581588745, "eval_ag_news_n_ngrams_match_1": 14.126, "eval_ag_news_n_ngrams_match_2": 3.226, "eval_ag_news_n_ngrams_match_3": 0.932, "eval_ag_news_num_pred_words": 46.806, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.253391496062406, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35256318161874883, "eval_ag_news_runtime": 10.8308, "eval_ag_news_samples_per_second": 46.164, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.35316782346785075, "eval_ag_news_token_set_f1_sem": 0.004266996819858838, "eval_ag_news_token_set_precision": 0.3376001556690009, "eval_ag_news_token_set_recall": 0.3861518414493657, "eval_ag_news_true_num_tokens": 56.09375, "step": 137500 }, { "epoch": 26.4, "eval_anthropic_toxic_prompts_accuracy": 0.11515625, "eval_anthropic_toxic_prompts_bleu_score": 3.1110553648078065, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11634531628327847, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6803028583526611, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008580157955260626, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2036924362182617, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.932, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, "eval_anthropic_toxic_prompts_num_pred_words": 47.6, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.62328244546882, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21807319733408637, "eval_anthropic_toxic_prompts_runtime": 10.0044, "eval_anthropic_toxic_prompts_samples_per_second": 49.978, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.36441856595593025, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006599289802246864, "eval_anthropic_toxic_prompts_token_set_precision": 0.4495157000227447, "eval_anthropic_toxic_prompts_token_set_recall": 0.33079276349794706, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 137500 }, { "epoch": 26.4, "eval_arxiv_accuracy": 0.34925, "eval_arxiv_bleu_score": 4.179773553975574, "eval_arxiv_bleu_score_sem": 0.11278790991979552, "eval_arxiv_emb_cos_sim": 0.7635753750801086, "eval_arxiv_emb_cos_sim_sem": 0.007356731090487537, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3825840950012207, "eval_arxiv_n_ngrams_match_1": 15.188, "eval_arxiv_n_ngrams_match_2": 2.888, "eval_arxiv_n_ngrams_match_3": 0.614, "eval_arxiv_num_pred_words": 41.094, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.446766122979263, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35820472439116297, "eval_arxiv_runtime": 10.1665, "eval_arxiv_samples_per_second": 49.181, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.35417310242164035, "eval_arxiv_token_set_f1_sem": 0.004257793044281869, "eval_arxiv_token_set_precision": 0.3075897923303374, "eval_arxiv_token_set_recall": 0.43718891607634053, "eval_arxiv_true_num_tokens": 64.0, "step": 137500 }, { "epoch": 26.4, "eval_python_code_alpaca_accuracy": 0.15865625, "eval_python_code_alpaca_bleu_score": 4.717075394164557, "eval_python_code_alpaca_bleu_score_sem": 0.1513849380381095, "eval_python_code_alpaca_emb_cos_sim": 0.7640969753265381, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007780952291639845, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.894763946533203, "eval_python_code_alpaca_n_ngrams_match_1": 10.204, "eval_python_code_alpaca_n_ngrams_match_2": 3.026, "eval_python_code_alpaca_n_ngrams_match_3": 1.064, "eval_python_code_alpaca_num_pred_words": 44.904, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.079233271828627, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3398542276247796, "eval_python_code_alpaca_runtime": 10.1131, "eval_python_code_alpaca_samples_per_second": 49.441, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.485626048143985, "eval_python_code_alpaca_token_set_f1_sem": 0.005575612169828964, "eval_python_code_alpaca_token_set_precision": 0.5573860876167339, "eval_python_code_alpaca_token_set_recall": 0.4494263120640594, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 137500 }, { "epoch": 26.4, "eval_wikibio_accuracy": 0.32596875, "eval_wikibio_bleu_score": 5.881323417754504, "eval_wikibio_bleu_score_sem": 0.23294533006336862, "eval_wikibio_emb_cos_sim": 0.7359535694122314, "eval_wikibio_emb_cos_sim_sem": 0.009895827149266853, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7024004459381104, "eval_wikibio_n_ngrams_match_1": 9.672, "eval_wikibio_n_ngrams_match_2": 3.222, "eval_wikibio_n_ngrams_match_3": 1.182, "eval_wikibio_num_pred_words": 34.98, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.54451255235392, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34343898460069355, "eval_wikibio_runtime": 10.0527, "eval_wikibio_samples_per_second": 49.738, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3096431583939992, "eval_wikibio_token_set_f1_sem": 0.005894352753507593, "eval_wikibio_token_set_precision": 0.3128749873512374, "eval_wikibio_token_set_recall": 0.32681829243231664, "eval_wikibio_true_num_tokens": 61.1328125, "step": 137500 }, { "epoch": 26.4, "eval_nq_accuracy": 0.52903125, "eval_nq_bleu_score": 11.424583829580019, "eval_nq_bleu_score_sem": 0.4727421297078216, "eval_nq_emb_cos_sim": 0.8270809650421143, "eval_nq_emb_cos_sim_sem": 0.008246695382564971, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1732428073883057, "eval_nq_n_ngrams_match_1": 23.002, "eval_nq_n_ngrams_match_2": 8.284, "eval_nq_n_ngrams_match_3": 3.794, "eval_nq_num_pred_words": 49.4, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.786731568935295, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44401144925390756, "eval_nq_runtime": 10.3349, "eval_nq_samples_per_second": 48.38, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4577020947749211, "eval_nq_token_set_f1_sem": 0.004893191246329938, "eval_nq_token_set_precision": 0.41763552281650806, "eval_nq_token_set_recall": 0.5173763896640758, "eval_nq_true_num_tokens": 64.0, "step": 137500 }, { "epoch": 26.4, "learning_rate": 0.001, "loss": 2.5401, "step": 137508 }, { "epoch": 26.41, "learning_rate": 0.001, "loss": 2.5433, "step": 137520 }, { "epoch": 26.41, "learning_rate": 0.001, "loss": 2.5505, "step": 137532 }, { "epoch": 26.41, "learning_rate": 0.001, "loss": 2.5452, "step": 137544 }, { "epoch": 26.41, "learning_rate": 0.001, "loss": 2.5488, "step": 137556 }, { "epoch": 26.41, "learning_rate": 0.001, "loss": 2.5397, "step": 137568 }, { "epoch": 26.42, "learning_rate": 0.001, "loss": 2.5451, "step": 137580 }, { "epoch": 26.42, "learning_rate": 0.001, "loss": 2.5449, "step": 137592 }, { "epoch": 26.42, "learning_rate": 0.001, "loss": 2.549, "step": 137604 }, { "epoch": 26.42, "learning_rate": 0.001, "loss": 2.5439, "step": 137616 }, { "epoch": 26.43, "learning_rate": 0.001, "loss": 2.5341, "step": 137628 }, { "epoch": 26.43, "learning_rate": 0.001, "loss": 2.5488, "step": 137640 }, { "epoch": 26.43, "learning_rate": 0.001, "loss": 2.5511, "step": 137652 }, { "epoch": 26.43, "learning_rate": 0.001, "loss": 2.5514, "step": 137664 }, { "epoch": 26.44, "learning_rate": 0.001, "loss": 2.5403, "step": 137676 }, { "epoch": 26.44, "learning_rate": 0.001, "loss": 2.5415, "step": 137688 }, { "epoch": 26.44, "learning_rate": 0.001, "loss": 2.5406, "step": 137700 }, { "epoch": 26.44, "learning_rate": 0.001, "loss": 2.5468, "step": 137712 }, { "epoch": 26.44, "learning_rate": 0.001, "loss": 2.5468, "step": 137724 }, { "epoch": 26.45, "learning_rate": 0.001, "loss": 2.5479, "step": 137736 }, { "epoch": 26.45, "learning_rate": 0.001, "loss": 2.5518, "step": 137748 }, { "epoch": 26.45, "learning_rate": 0.001, "loss": 2.552, "step": 137760 }, { "epoch": 26.45, "learning_rate": 0.001, "loss": 2.55, "step": 137772 }, { "epoch": 26.46, "learning_rate": 0.001, "loss": 2.5445, "step": 137784 }, { "epoch": 26.46, "learning_rate": 0.001, "loss": 2.5493, "step": 137796 }, { "epoch": 26.46, "learning_rate": 0.001, "loss": 2.5629, "step": 137808 }, { "epoch": 26.46, "learning_rate": 0.001, "loss": 2.547, "step": 137820 }, { "epoch": 26.47, "learning_rate": 0.001, "loss": 2.5418, "step": 137832 }, { "epoch": 26.47, "learning_rate": 0.001, "loss": 2.5432, "step": 137844 }, { "epoch": 26.47, "learning_rate": 0.001, "loss": 2.5512, "step": 137856 }, { "epoch": 26.47, "learning_rate": 0.001, "loss": 2.552, "step": 137868 }, { "epoch": 26.47, "learning_rate": 0.001, "loss": 2.5442, "step": 137880 }, { "epoch": 26.48, "learning_rate": 0.001, "loss": 2.5545, "step": 137892 }, { "epoch": 26.48, "learning_rate": 0.001, "loss": 2.5446, "step": 137904 }, { "epoch": 26.48, "learning_rate": 0.001, "loss": 2.5438, "step": 137916 }, { "epoch": 26.48, "learning_rate": 0.001, "loss": 2.5474, "step": 137928 }, { "epoch": 26.49, "learning_rate": 0.001, "loss": 2.5498, "step": 137940 }, { "epoch": 26.49, "learning_rate": 0.001, "loss": 2.5525, "step": 137952 }, { "epoch": 26.49, "learning_rate": 0.001, "loss": 2.551, "step": 137964 }, { "epoch": 26.49, "learning_rate": 0.001, "loss": 2.5563, "step": 137976 }, { "epoch": 26.5, "learning_rate": 0.001, "loss": 2.5478, "step": 137988 }, { "epoch": 26.5, "learning_rate": 0.001, "loss": 2.5471, "step": 138000 }, { "epoch": 26.5, "learning_rate": 0.001, "loss": 2.5476, "step": 138012 }, { "epoch": 26.5, "learning_rate": 0.001, "loss": 2.5538, "step": 138024 }, { "epoch": 26.5, "learning_rate": 0.001, "loss": 2.5536, "step": 138036 }, { "epoch": 26.51, "learning_rate": 0.001, "loss": 2.5364, "step": 138048 }, { "epoch": 26.51, "learning_rate": 0.001, "loss": 2.5464, "step": 138060 }, { "epoch": 26.51, "learning_rate": 0.001, "loss": 2.5487, "step": 138072 }, { "epoch": 26.51, "learning_rate": 0.001, "loss": 2.5556, "step": 138084 }, { "epoch": 26.52, "learning_rate": 0.001, "loss": 2.55, "step": 138096 }, { "epoch": 26.52, "learning_rate": 0.001, "loss": 2.5457, "step": 138108 }, { "epoch": 26.52, "learning_rate": 0.001, "loss": 2.547, "step": 138120 }, { "epoch": 26.52, "eval_ag_news_accuracy": 0.3253125, "eval_ag_news_bleu_score": 4.959226138590679, "eval_ag_news_bleu_score_sem": 0.16219360894267384, "eval_ag_news_emb_cos_sim": 0.8112995028495789, "eval_ag_news_emb_cos_sim_sem": 0.006759624175288049, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5298244953155518, "eval_ag_news_n_ngrams_match_1": 14.096, "eval_ag_news_n_ngrams_match_2": 3.142, "eval_ag_news_n_ngrams_match_3": 0.908, "eval_ag_news_num_pred_words": 46.252, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.11797922409669, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3511656920225139, "eval_ag_news_runtime": 10.2514, "eval_ag_news_samples_per_second": 48.774, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.35243850091046586, "eval_ag_news_token_set_f1_sem": 0.004429363013131422, "eval_ag_news_token_set_precision": 0.3378667578097021, "eval_ag_news_token_set_recall": 0.38483197844049744, "eval_ag_news_true_num_tokens": 56.09375, "step": 138125 }, { "epoch": 26.52, "eval_anthropic_toxic_prompts_accuracy": 0.11390625, "eval_anthropic_toxic_prompts_bleu_score": 3.1131375045066556, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11796619077821163, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6754779815673828, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009643804800239898, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.236358880996704, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.246, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.902, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, "eval_anthropic_toxic_prompts_num_pred_words": 46.7, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.44091948244658, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2153345177363887, "eval_anthropic_toxic_prompts_runtime": 9.8146, "eval_anthropic_toxic_prompts_samples_per_second": 50.944, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3587887664654874, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006763941670662588, "eval_anthropic_toxic_prompts_token_set_precision": 0.43860774979640527, "eval_anthropic_toxic_prompts_token_set_recall": 0.3292519758103467, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 138125 }, { "epoch": 26.52, "eval_arxiv_accuracy": 0.34796875, "eval_arxiv_bleu_score": 4.402037482769789, "eval_arxiv_bleu_score_sem": 0.12520143481700993, "eval_arxiv_emb_cos_sim": 0.7637171745300293, "eval_arxiv_emb_cos_sim_sem": 0.0067443913822739015, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.388087272644043, "eval_arxiv_n_ngrams_match_1": 15.148, "eval_arxiv_n_ngrams_match_2": 2.978, "eval_arxiv_n_ngrams_match_3": 0.666, "eval_arxiv_num_pred_words": 40.444, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.609263624166058, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36246474064846745, "eval_arxiv_runtime": 10.7169, "eval_arxiv_samples_per_second": 46.655, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.3544349286576337, "eval_arxiv_token_set_f1_sem": 0.004211841254079962, "eval_arxiv_token_set_precision": 0.30692930093919746, "eval_arxiv_token_set_recall": 0.434815790712515, "eval_arxiv_true_num_tokens": 64.0, "step": 138125 }, { "epoch": 26.52, "eval_python_code_alpaca_accuracy": 0.15709375, "eval_python_code_alpaca_bleu_score": 4.534078637726143, "eval_python_code_alpaca_bleu_score_sem": 0.14233550072533638, "eval_python_code_alpaca_emb_cos_sim": 0.7551637291908264, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00868436933092635, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.92767333984375, "eval_python_code_alpaca_n_ngrams_match_1": 9.72, "eval_python_code_alpaca_n_ngrams_match_2": 2.814, "eval_python_code_alpaca_n_ngrams_match_3": 0.918, "eval_python_code_alpaca_num_pred_words": 42.468, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.684108315271413, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3377472714490662, "eval_python_code_alpaca_runtime": 10.0014, "eval_python_code_alpaca_samples_per_second": 49.993, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4701137754043401, "eval_python_code_alpaca_token_set_f1_sem": 0.005551559877368672, "eval_python_code_alpaca_token_set_precision": 0.5265204169817107, "eval_python_code_alpaca_token_set_recall": 0.4461888894310651, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 138125 }, { "epoch": 26.52, "eval_wikibio_accuracy": 0.3255625, "eval_wikibio_bleu_score": 6.062646065586861, "eval_wikibio_bleu_score_sem": 0.21360257706867236, "eval_wikibio_emb_cos_sim": 0.7402997612953186, "eval_wikibio_emb_cos_sim_sem": 0.010037151459224453, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7152249813079834, "eval_wikibio_n_ngrams_match_1": 10.028, "eval_wikibio_n_ngrams_match_2": 3.35, "eval_wikibio_n_ngrams_match_3": 1.22, "eval_wikibio_num_pred_words": 35.242, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.06782553821968, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35530582903784064, "eval_wikibio_runtime": 10.1063, "eval_wikibio_samples_per_second": 49.474, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3214020396085377, "eval_wikibio_token_set_f1_sem": 0.005528925450837666, "eval_wikibio_token_set_precision": 0.3260291403205192, "eval_wikibio_token_set_recall": 0.33389809075319343, "eval_wikibio_true_num_tokens": 61.1328125, "step": 138125 }, { "epoch": 26.52, "eval_nq_accuracy": 0.52846875, "eval_nq_bleu_score": 11.711828768129264, "eval_nq_bleu_score_sem": 0.4737353012203361, "eval_nq_emb_cos_sim": 0.8314865827560425, "eval_nq_emb_cos_sim_sem": 0.0072692750627384155, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1723036766052246, "eval_nq_n_ngrams_match_1": 23.12, "eval_nq_n_ngrams_match_2": 8.448, "eval_nq_n_ngrams_match_3": 3.892, "eval_nq_num_pred_words": 49.106, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.778483552425545, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44833025644502966, "eval_nq_runtime": 10.3517, "eval_nq_samples_per_second": 48.301, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4626036655039273, "eval_nq_token_set_f1_sem": 0.00496835247379176, "eval_nq_token_set_precision": 0.4207643543177327, "eval_nq_token_set_recall": 0.5210255985059231, "eval_nq_true_num_tokens": 64.0, "step": 138125 }, { "epoch": 26.52, "learning_rate": 0.001, "loss": 2.5507, "step": 138132 }, { "epoch": 26.53, "learning_rate": 0.001, "loss": 2.5414, "step": 138144 }, { "epoch": 26.53, "learning_rate": 0.001, "loss": 2.5539, "step": 138156 }, { "epoch": 26.53, "learning_rate": 0.001, "loss": 2.5416, "step": 138168 }, { "epoch": 26.53, "learning_rate": 0.001, "loss": 2.547, "step": 138180 }, { "epoch": 26.53, "learning_rate": 0.001, "loss": 2.5537, "step": 138192 }, { "epoch": 26.54, "learning_rate": 0.001, "loss": 2.554, "step": 138204 }, { "epoch": 26.54, "learning_rate": 0.001, "loss": 2.5485, "step": 138216 }, { "epoch": 26.54, "learning_rate": 0.001, "loss": 2.5417, "step": 138228 }, { "epoch": 26.54, "learning_rate": 0.001, "loss": 2.5427, "step": 138240 }, { "epoch": 26.55, "learning_rate": 0.001, "loss": 2.5496, "step": 138252 }, { "epoch": 26.55, "learning_rate": 0.001, "loss": 2.5433, "step": 138264 }, { "epoch": 26.55, "learning_rate": 0.001, "loss": 2.536, "step": 138276 }, { "epoch": 26.55, "learning_rate": 0.001, "loss": 2.5441, "step": 138288 }, { "epoch": 26.56, "learning_rate": 0.001, "loss": 2.5376, "step": 138300 }, { "epoch": 26.56, "learning_rate": 0.001, "loss": 2.5416, "step": 138312 }, { "epoch": 26.56, "learning_rate": 0.001, "loss": 2.5455, "step": 138324 }, { "epoch": 26.56, "learning_rate": 0.001, "loss": 2.5445, "step": 138336 }, { "epoch": 26.56, "learning_rate": 0.001, "loss": 2.5456, "step": 138348 }, { "epoch": 26.57, "learning_rate": 0.001, "loss": 2.5496, "step": 138360 }, { "epoch": 26.57, "learning_rate": 0.001, "loss": 2.542, "step": 138372 }, { "epoch": 26.57, "learning_rate": 0.001, "loss": 2.5491, "step": 138384 }, { "epoch": 26.57, "learning_rate": 0.001, "loss": 2.5514, "step": 138396 }, { "epoch": 26.58, "learning_rate": 0.001, "loss": 2.5309, "step": 138408 }, { "epoch": 26.58, "learning_rate": 0.001, "loss": 2.5515, "step": 138420 }, { "epoch": 26.58, "learning_rate": 0.001, "loss": 2.5477, "step": 138432 }, { "epoch": 26.58, "learning_rate": 0.001, "loss": 2.5483, "step": 138444 }, { "epoch": 26.59, "learning_rate": 0.001, "loss": 2.5506, "step": 138456 }, { "epoch": 26.59, "learning_rate": 0.001, "loss": 2.5515, "step": 138468 }, { "epoch": 26.59, "learning_rate": 0.001, "loss": 2.5339, "step": 138480 }, { "epoch": 26.59, "learning_rate": 0.001, "loss": 2.5483, "step": 138492 }, { "epoch": 26.59, "learning_rate": 0.001, "loss": 2.5397, "step": 138504 }, { "epoch": 26.6, "learning_rate": 0.001, "loss": 2.5393, "step": 138516 }, { "epoch": 26.6, "learning_rate": 0.001, "loss": 2.544, "step": 138528 }, { "epoch": 26.6, "learning_rate": 0.001, "loss": 2.544, "step": 138540 }, { "epoch": 26.6, "learning_rate": 0.001, "loss": 2.5533, "step": 138552 }, { "epoch": 26.61, "learning_rate": 0.001, "loss": 2.5442, "step": 138564 }, { "epoch": 26.61, "learning_rate": 0.001, "loss": 2.5556, "step": 138576 }, { "epoch": 26.61, "learning_rate": 0.001, "loss": 2.5411, "step": 138588 }, { "epoch": 26.61, "learning_rate": 0.001, "loss": 2.5463, "step": 138600 }, { "epoch": 26.62, "learning_rate": 0.001, "loss": 2.5481, "step": 138612 }, { "epoch": 26.62, "learning_rate": 0.001, "loss": 2.5403, "step": 138624 }, { "epoch": 26.62, "learning_rate": 0.001, "loss": 2.5405, "step": 138636 }, { "epoch": 26.62, "learning_rate": 0.001, "loss": 2.5518, "step": 138648 }, { "epoch": 26.62, "learning_rate": 0.001, "loss": 2.5555, "step": 138660 }, { "epoch": 26.63, "learning_rate": 0.001, "loss": 2.5544, "step": 138672 }, { "epoch": 26.63, "learning_rate": 0.001, "loss": 2.5502, "step": 138684 }, { "epoch": 26.63, "learning_rate": 0.001, "loss": 2.5447, "step": 138696 }, { "epoch": 26.63, "learning_rate": 0.001, "loss": 2.5487, "step": 138708 }, { "epoch": 26.64, "learning_rate": 0.001, "loss": 2.5577, "step": 138720 }, { "epoch": 26.64, "learning_rate": 0.001, "loss": 2.5487, "step": 138732 }, { "epoch": 26.64, "learning_rate": 0.001, "loss": 2.5519, "step": 138744 }, { "epoch": 26.64, "eval_ag_news_accuracy": 0.3241875, "eval_ag_news_bleu_score": 4.965060862419303, "eval_ag_news_bleu_score_sem": 0.15438728512738223, "eval_ag_news_emb_cos_sim": 0.8131359815597534, "eval_ag_news_emb_cos_sim_sem": 0.0063308952369785995, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5344045162200928, "eval_ag_news_n_ngrams_match_1": 14.184, "eval_ag_news_n_ngrams_match_2": 3.186, "eval_ag_news_n_ngrams_match_3": 0.916, "eval_ag_news_num_pred_words": 47.018, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.27459866855017, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.350500314604809, "eval_ag_news_runtime": 10.4869, "eval_ag_news_samples_per_second": 47.678, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3532376550712028, "eval_ag_news_token_set_f1_sem": 0.004367471002802514, "eval_ag_news_token_set_precision": 0.3378850477441128, "eval_ag_news_token_set_recall": 0.38653801426449513, "eval_ag_news_true_num_tokens": 56.09375, "step": 138750 }, { "epoch": 26.64, "eval_anthropic_toxic_prompts_accuracy": 0.114625, "eval_anthropic_toxic_prompts_bleu_score": 3.11163846657245, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11372678728501974, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6804838180541992, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007905600937566306, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2448742389678955, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.156, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7, "eval_anthropic_toxic_prompts_num_pred_words": 46.87, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.658483020007733, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2151872767905688, "eval_anthropic_toxic_prompts_runtime": 9.8784, "eval_anthropic_toxic_prompts_samples_per_second": 50.616, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3553317607568192, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006635344215942749, "eval_anthropic_toxic_prompts_token_set_precision": 0.43286421188331903, "eval_anthropic_toxic_prompts_token_set_recall": 0.3285166876661079, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 138750 }, { "epoch": 26.64, "eval_arxiv_accuracy": 0.3485625, "eval_arxiv_bleu_score": 4.264796320712743, "eval_arxiv_bleu_score_sem": 0.12075134889061248, "eval_arxiv_emb_cos_sim": 0.771316647529602, "eval_arxiv_emb_cos_sim_sem": 0.00700699917572217, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.387031316757202, "eval_arxiv_n_ngrams_match_1": 15.254, "eval_arxiv_n_ngrams_match_2": 2.882, "eval_arxiv_n_ngrams_match_3": 0.61, "eval_arxiv_num_pred_words": 41.13, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.578014049926754, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36239528489991546, "eval_arxiv_runtime": 10.3113, "eval_arxiv_samples_per_second": 48.49, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3565038852463042, "eval_arxiv_token_set_f1_sem": 0.004005371012824581, "eval_arxiv_token_set_precision": 0.3072828568172363, "eval_arxiv_token_set_recall": 0.4405003398770094, "eval_arxiv_true_num_tokens": 64.0, "step": 138750 }, { "epoch": 26.64, "eval_python_code_alpaca_accuracy": 0.1610625, "eval_python_code_alpaca_bleu_score": 4.459944855256856, "eval_python_code_alpaca_bleu_score_sem": 0.14383230908246283, "eval_python_code_alpaca_emb_cos_sim": 0.7556705474853516, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008313643017246495, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8856201171875, "eval_python_code_alpaca_n_ngrams_match_1": 9.87, "eval_python_code_alpaca_n_ngrams_match_2": 2.876, "eval_python_code_alpaca_n_ngrams_match_3": 0.942, "eval_python_code_alpaca_num_pred_words": 44.04, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.91467334858191, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3329802664418746, "eval_python_code_alpaca_runtime": 9.8936, "eval_python_code_alpaca_samples_per_second": 50.538, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4779585230603366, "eval_python_code_alpaca_token_set_f1_sem": 0.005851969260384139, "eval_python_code_alpaca_token_set_precision": 0.537983528265224, "eval_python_code_alpaca_token_set_recall": 0.4513912894177452, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 138750 }, { "epoch": 26.64, "eval_wikibio_accuracy": 0.32840625, "eval_wikibio_bleu_score": 6.195257570122492, "eval_wikibio_bleu_score_sem": 0.22528679215553907, "eval_wikibio_emb_cos_sim": 0.7375030517578125, "eval_wikibio_emb_cos_sim_sem": 0.00922417299576372, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.700056552886963, "eval_wikibio_n_ngrams_match_1": 10.036, "eval_wikibio_n_ngrams_match_2": 3.45, "eval_wikibio_n_ngrams_match_3": 1.262, "eval_wikibio_num_pred_words": 35.798, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.44959183657991, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3554960810057535, "eval_wikibio_runtime": 9.9643, "eval_wikibio_samples_per_second": 50.179, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3254677870560993, "eval_wikibio_token_set_f1_sem": 0.0053984178241856294, "eval_wikibio_token_set_precision": 0.330026286269777, "eval_wikibio_token_set_recall": 0.3390868022910393, "eval_wikibio_true_num_tokens": 61.1328125, "step": 138750 }, { "epoch": 26.64, "eval_nq_accuracy": 0.52890625, "eval_nq_bleu_score": 11.753755750075214, "eval_nq_bleu_score_sem": 0.47813875186557536, "eval_nq_emb_cos_sim": 0.8355936408042908, "eval_nq_emb_cos_sim_sem": 0.0065716689872647015, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1703109741210938, "eval_nq_n_ngrams_match_1": 22.952, "eval_nq_n_ngrams_match_2": 8.528, "eval_nq_n_ngrams_match_3": 3.954, "eval_nq_num_pred_words": 48.99, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.761008063951103, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.446778549457096, "eval_nq_runtime": 10.6875, "eval_nq_samples_per_second": 46.784, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.460552629086646, "eval_nq_token_set_f1_sem": 0.004977581539049686, "eval_nq_token_set_precision": 0.4172179579331786, "eval_nq_token_set_recall": 0.5217868775197729, "eval_nq_true_num_tokens": 64.0, "step": 138750 }, { "epoch": 26.64, "learning_rate": 0.001, "loss": 2.5491, "step": 138756 }, { "epoch": 26.65, "learning_rate": 0.001, "loss": 2.5522, "step": 138768 }, { "epoch": 26.65, "learning_rate": 0.001, "loss": 2.5506, "step": 138780 }, { "epoch": 26.65, "learning_rate": 0.001, "loss": 2.5362, "step": 138792 }, { "epoch": 26.65, "learning_rate": 0.001, "loss": 2.5491, "step": 138804 }, { "epoch": 26.65, "learning_rate": 0.001, "loss": 2.5567, "step": 138816 }, { "epoch": 26.66, "learning_rate": 0.001, "loss": 2.5529, "step": 138828 }, { "epoch": 26.66, "learning_rate": 0.001, "loss": 2.5429, "step": 138840 }, { "epoch": 26.66, "learning_rate": 0.001, "loss": 2.5486, "step": 138852 }, { "epoch": 26.66, "learning_rate": 0.001, "loss": 2.5362, "step": 138864 }, { "epoch": 26.67, "learning_rate": 0.001, "loss": 2.5505, "step": 138876 }, { "epoch": 26.67, "learning_rate": 0.001, "loss": 2.5541, "step": 138888 }, { "epoch": 26.67, "learning_rate": 0.001, "loss": 2.5547, "step": 138900 }, { "epoch": 26.67, "learning_rate": 0.001, "loss": 2.5521, "step": 138912 }, { "epoch": 26.68, "learning_rate": 0.001, "loss": 2.5515, "step": 138924 }, { "epoch": 26.68, "learning_rate": 0.001, "loss": 2.555, "step": 138936 }, { "epoch": 26.68, "learning_rate": 0.001, "loss": 2.5611, "step": 138948 }, { "epoch": 26.68, "learning_rate": 0.001, "loss": 2.542, "step": 138960 }, { "epoch": 26.68, "learning_rate": 0.001, "loss": 2.5484, "step": 138972 }, { "epoch": 26.69, "learning_rate": 0.001, "loss": 2.5556, "step": 138984 }, { "epoch": 26.69, "learning_rate": 0.001, "loss": 2.5476, "step": 138996 }, { "epoch": 26.69, "learning_rate": 0.001, "loss": 2.5415, "step": 139008 }, { "epoch": 26.69, "learning_rate": 0.001, "loss": 2.5627, "step": 139020 }, { "epoch": 26.7, "learning_rate": 0.001, "loss": 2.5465, "step": 139032 }, { "epoch": 26.7, "learning_rate": 0.001, "loss": 2.5441, "step": 139044 }, { "epoch": 26.7, "learning_rate": 0.001, "loss": 2.5523, "step": 139056 }, { "epoch": 26.7, "learning_rate": 0.001, "loss": 2.5503, "step": 139068 }, { "epoch": 26.71, "learning_rate": 0.001, "loss": 2.5396, "step": 139080 }, { "epoch": 26.71, "learning_rate": 0.001, "loss": 2.5561, "step": 139092 }, { "epoch": 26.71, "learning_rate": 0.001, "loss": 2.5472, "step": 139104 }, { "epoch": 26.71, "learning_rate": 0.001, "loss": 2.5483, "step": 139116 }, { "epoch": 26.71, "learning_rate": 0.001, "loss": 2.553, "step": 139128 }, { "epoch": 26.72, "learning_rate": 0.001, "loss": 2.5593, "step": 139140 }, { "epoch": 26.72, "learning_rate": 0.001, "loss": 2.56, "step": 139152 }, { "epoch": 26.72, "learning_rate": 0.001, "loss": 2.5405, "step": 139164 }, { "epoch": 26.72, "learning_rate": 0.001, "loss": 2.5567, "step": 139176 }, { "epoch": 26.73, "learning_rate": 0.001, "loss": 2.5497, "step": 139188 }, { "epoch": 26.73, "learning_rate": 0.001, "loss": 2.5495, "step": 139200 }, { "epoch": 26.73, "learning_rate": 0.001, "loss": 2.556, "step": 139212 }, { "epoch": 26.73, "learning_rate": 0.001, "loss": 2.5561, "step": 139224 }, { "epoch": 26.74, "learning_rate": 0.001, "loss": 2.5497, "step": 139236 }, { "epoch": 26.74, "learning_rate": 0.001, "loss": 2.5454, "step": 139248 }, { "epoch": 26.74, "learning_rate": 0.001, "loss": 2.5604, "step": 139260 }, { "epoch": 26.74, "learning_rate": 0.001, "loss": 2.5453, "step": 139272 }, { "epoch": 26.74, "learning_rate": 0.001, "loss": 2.5448, "step": 139284 }, { "epoch": 26.75, "learning_rate": 0.001, "loss": 2.5598, "step": 139296 }, { "epoch": 26.75, "learning_rate": 0.001, "loss": 2.5642, "step": 139308 }, { "epoch": 26.75, "learning_rate": 0.001, "loss": 2.5485, "step": 139320 }, { "epoch": 26.75, "learning_rate": 0.001, "loss": 2.554, "step": 139332 }, { "epoch": 26.76, "learning_rate": 0.001, "loss": 2.5503, "step": 139344 }, { "epoch": 26.76, "learning_rate": 0.001, "loss": 2.5579, "step": 139356 }, { "epoch": 26.76, "learning_rate": 0.001, "loss": 2.5582, "step": 139368 }, { "epoch": 26.76, "eval_ag_news_accuracy": 0.32209375, "eval_ag_news_bleu_score": 4.883039681182582, "eval_ag_news_bleu_score_sem": 0.15028572684274583, "eval_ag_news_emb_cos_sim": 0.8151203393936157, "eval_ag_news_emb_cos_sim_sem": 0.006459929789328226, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5450551509857178, "eval_ag_news_n_ngrams_match_1": 14.166, "eval_ag_news_n_ngrams_match_2": 3.092, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.15, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.64159580773397, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35509581835061726, "eval_ag_news_runtime": 10.3032, "eval_ag_news_samples_per_second": 48.529, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.35499297414895553, "eval_ag_news_token_set_f1_sem": 0.0044278571931696395, "eval_ag_news_token_set_precision": 0.34008887717726405, "eval_ag_news_token_set_recall": 0.3870041642876613, "eval_ag_news_true_num_tokens": 56.09375, "step": 139375 }, { "epoch": 26.76, "eval_anthropic_toxic_prompts_accuracy": 0.11240625, "eval_anthropic_toxic_prompts_bleu_score": 3.1405064442608244, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11492515895809967, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6847192049026489, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009212928348289916, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2757725715637207, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.192, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.878, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.658, "eval_anthropic_toxic_prompts_num_pred_words": 46.0, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.46366265819675, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22013370095881896, "eval_anthropic_toxic_prompts_runtime": 10.2023, "eval_anthropic_toxic_prompts_samples_per_second": 49.008, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.35495125345294976, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00636732151368483, "eval_anthropic_toxic_prompts_token_set_precision": 0.440083951460424, "eval_anthropic_toxic_prompts_token_set_recall": 0.32399824953124007, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 139375 }, { "epoch": 26.76, "eval_arxiv_accuracy": 0.3483125, "eval_arxiv_bleu_score": 4.266548561097967, "eval_arxiv_bleu_score_sem": 0.11951296949766975, "eval_arxiv_emb_cos_sim": 0.7608402967453003, "eval_arxiv_emb_cos_sim_sem": 0.008398710504065163, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3979198932647705, "eval_arxiv_n_ngrams_match_1": 15.262, "eval_arxiv_n_ngrams_match_2": 2.878, "eval_arxiv_n_ngrams_match_3": 0.626, "eval_arxiv_num_pred_words": 40.288, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.90183630114276, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36535832982275657, "eval_arxiv_runtime": 9.9141, "eval_arxiv_samples_per_second": 50.433, "eval_arxiv_steps_per_second": 0.101, "eval_arxiv_token_set_f1": 0.3588403458338021, "eval_arxiv_token_set_f1_sem": 0.004079002239419578, "eval_arxiv_token_set_precision": 0.3109485366434201, "eval_arxiv_token_set_recall": 0.4414263435502644, "eval_arxiv_true_num_tokens": 64.0, "step": 139375 }, { "epoch": 26.76, "eval_python_code_alpaca_accuracy": 0.1605, "eval_python_code_alpaca_bleu_score": 4.78814525467256, "eval_python_code_alpaca_bleu_score_sem": 0.15972130891421898, "eval_python_code_alpaca_emb_cos_sim": 0.7554644346237183, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008445594263221976, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.89371919631958, "eval_python_code_alpaca_n_ngrams_match_1": 10.002, "eval_python_code_alpaca_n_ngrams_match_2": 2.926, "eval_python_code_alpaca_n_ngrams_match_3": 0.968, "eval_python_code_alpaca_num_pred_words": 42.432, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.060354852339294, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34485621994811294, "eval_python_code_alpaca_runtime": 9.7111, "eval_python_code_alpaca_samples_per_second": 51.488, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.47755522254118815, "eval_python_code_alpaca_token_set_f1_sem": 0.005190540762148067, "eval_python_code_alpaca_token_set_precision": 0.5450351378485951, "eval_python_code_alpaca_token_set_recall": 0.4466616561824618, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 139375 }, { "epoch": 26.76, "eval_wikibio_accuracy": 0.32546875, "eval_wikibio_bleu_score": 6.012222877556776, "eval_wikibio_bleu_score_sem": 0.19930999058990118, "eval_wikibio_emb_cos_sim": 0.7461388111114502, "eval_wikibio_emb_cos_sim_sem": 0.009180695887346702, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7581255435943604, "eval_wikibio_n_ngrams_match_1": 10.098, "eval_wikibio_n_ngrams_match_2": 3.456, "eval_wikibio_n_ngrams_match_3": 1.252, "eval_wikibio_num_pred_words": 36.29, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.86799643096521, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3561306434892939, "eval_wikibio_runtime": 9.7768, "eval_wikibio_samples_per_second": 51.142, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.3219031784383085, "eval_wikibio_token_set_f1_sem": 0.005376487850117779, "eval_wikibio_token_set_precision": 0.3307109163064446, "eval_wikibio_token_set_recall": 0.3301599920682775, "eval_wikibio_true_num_tokens": 61.1328125, "step": 139375 }, { "epoch": 26.76, "eval_nq_accuracy": 0.528625, "eval_nq_bleu_score": 11.653447479118352, "eval_nq_bleu_score_sem": 0.4857224043780406, "eval_nq_emb_cos_sim": 0.8353527784347534, "eval_nq_emb_cos_sim_sem": 0.007095706438036458, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1789746284484863, "eval_nq_n_ngrams_match_1": 22.994, "eval_nq_n_ngrams_match_2": 8.334, "eval_nq_n_ngrams_match_3": 3.854, "eval_nq_num_pred_words": 48.58, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.83724015680304, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44741050847881686, "eval_nq_runtime": 10.2127, "eval_nq_samples_per_second": 48.959, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.46258269156278825, "eval_nq_token_set_f1_sem": 0.004992160416557622, "eval_nq_token_set_precision": 0.41992210252936485, "eval_nq_token_set_recall": 0.5230947265966663, "eval_nq_true_num_tokens": 64.0, "step": 139375 }, { "epoch": 26.76, "learning_rate": 0.001, "loss": 2.5586, "step": 139380 }, { "epoch": 26.76, "learning_rate": 0.001, "loss": 2.554, "step": 139392 }, { "epoch": 26.77, "learning_rate": 0.001, "loss": 2.5552, "step": 139404 }, { "epoch": 26.77, "learning_rate": 0.001, "loss": 2.5342, "step": 139416 }, { "epoch": 26.77, "learning_rate": 0.001, "loss": 2.5443, "step": 139428 }, { "epoch": 26.77, "learning_rate": 0.001, "loss": 2.5549, "step": 139440 }, { "epoch": 26.78, "learning_rate": 0.001, "loss": 2.5427, "step": 139452 }, { "epoch": 26.78, "learning_rate": 0.001, "loss": 2.5544, "step": 139464 }, { "epoch": 26.78, "learning_rate": 0.001, "loss": 2.5491, "step": 139476 }, { "epoch": 26.78, "learning_rate": 0.001, "loss": 2.5588, "step": 139488 }, { "epoch": 26.79, "learning_rate": 0.001, "loss": 2.5473, "step": 139500 }, { "epoch": 26.79, "learning_rate": 0.001, "loss": 2.5523, "step": 139512 }, { "epoch": 26.79, "learning_rate": 0.001, "loss": 2.5438, "step": 139524 }, { "epoch": 26.79, "learning_rate": 0.001, "loss": 2.5566, "step": 139536 }, { "epoch": 26.79, "learning_rate": 0.001, "loss": 2.551, "step": 139548 }, { "epoch": 26.8, "learning_rate": 0.001, "loss": 2.5516, "step": 139560 }, { "epoch": 26.8, "learning_rate": 0.001, "loss": 2.5496, "step": 139572 }, { "epoch": 26.8, "learning_rate": 0.001, "loss": 2.5405, "step": 139584 }, { "epoch": 26.8, "learning_rate": 0.001, "loss": 2.5508, "step": 139596 }, { "epoch": 26.81, "learning_rate": 0.001, "loss": 2.5543, "step": 139608 }, { "epoch": 26.81, "learning_rate": 0.001, "loss": 2.5468, "step": 139620 }, { "epoch": 26.81, "learning_rate": 0.001, "loss": 2.5429, "step": 139632 }, { "epoch": 26.81, "learning_rate": 0.001, "loss": 2.5558, "step": 139644 }, { "epoch": 26.82, "learning_rate": 0.001, "loss": 2.5423, "step": 139656 }, { "epoch": 26.82, "learning_rate": 0.001, "loss": 2.5424, "step": 139668 }, { "epoch": 26.82, "learning_rate": 0.001, "loss": 2.5461, "step": 139680 }, { "epoch": 26.82, "learning_rate": 0.001, "loss": 2.5555, "step": 139692 }, { "epoch": 26.82, "learning_rate": 0.001, "loss": 2.5528, "step": 139704 }, { "epoch": 26.83, "learning_rate": 0.001, "loss": 2.5493, "step": 139716 }, { "epoch": 26.83, "learning_rate": 0.001, "loss": 2.5355, "step": 139728 }, { "epoch": 26.83, "learning_rate": 0.001, "loss": 2.543, "step": 139740 }, { "epoch": 26.83, "learning_rate": 0.001, "loss": 2.5388, "step": 139752 }, { "epoch": 26.84, "learning_rate": 0.001, "loss": 2.5504, "step": 139764 }, { "epoch": 26.84, "learning_rate": 0.001, "loss": 2.5362, "step": 139776 }, { "epoch": 26.84, "learning_rate": 0.001, "loss": 2.5476, "step": 139788 }, { "epoch": 26.84, "learning_rate": 0.001, "loss": 2.5398, "step": 139800 }, { "epoch": 26.85, "learning_rate": 0.001, "loss": 2.5437, "step": 139812 }, { "epoch": 26.85, "learning_rate": 0.001, "loss": 2.5526, "step": 139824 }, { "epoch": 26.85, "learning_rate": 0.001, "loss": 2.5568, "step": 139836 }, { "epoch": 26.85, "learning_rate": 0.001, "loss": 2.5453, "step": 139848 }, { "epoch": 26.85, "learning_rate": 0.001, "loss": 2.549, "step": 139860 }, { "epoch": 26.86, "learning_rate": 0.001, "loss": 2.5398, "step": 139872 }, { "epoch": 26.86, "learning_rate": 0.001, "loss": 2.5418, "step": 139884 }, { "epoch": 26.86, "learning_rate": 0.001, "loss": 2.543, "step": 139896 }, { "epoch": 26.86, "learning_rate": 0.001, "loss": 2.5413, "step": 139908 }, { "epoch": 26.87, "learning_rate": 0.001, "loss": 2.5333, "step": 139920 }, { "epoch": 26.87, "learning_rate": 0.001, "loss": 2.5454, "step": 139932 }, { "epoch": 26.87, "learning_rate": 0.001, "loss": 2.541, "step": 139944 }, { "epoch": 26.87, "learning_rate": 0.001, "loss": 2.5565, "step": 139956 }, { "epoch": 26.88, "learning_rate": 0.001, "loss": 2.5508, "step": 139968 }, { "epoch": 26.88, "learning_rate": 0.001, "loss": 2.5456, "step": 139980 }, { "epoch": 26.88, "learning_rate": 0.001, "loss": 2.5482, "step": 139992 }, { "epoch": 26.88, "eval_ag_news_accuracy": 0.32378125, "eval_ag_news_bleu_score": 4.859562710872875, "eval_ag_news_bleu_score_sem": 0.15112587920874196, "eval_ag_news_emb_cos_sim": 0.8180899620056152, "eval_ag_news_emb_cos_sim_sem": 0.006294373675654894, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5315823554992676, "eval_ag_news_n_ngrams_match_1": 14.054, "eval_ag_news_n_ngrams_match_2": 3.044, "eval_ag_news_n_ngrams_match_3": 0.906, "eval_ag_news_num_pred_words": 46.664, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.178006605738226, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35150963551075565, "eval_ag_news_runtime": 10.3828, "eval_ag_news_samples_per_second": 48.157, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.34922680013912466, "eval_ag_news_token_set_f1_sem": 0.0043831397881787856, "eval_ag_news_token_set_precision": 0.3355236480530844, "eval_ag_news_token_set_recall": 0.3815039976767892, "eval_ag_news_true_num_tokens": 56.09375, "step": 140000 }, { "epoch": 26.88, "eval_anthropic_toxic_prompts_accuracy": 0.114875, "eval_anthropic_toxic_prompts_bleu_score": 3.2388205154238707, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12482345241526657, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6881364583969116, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007808975336088033, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.246290922164917, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.386, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.784, "eval_anthropic_toxic_prompts_num_pred_words": 47.948, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.6948587221024, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21760573167571967, "eval_anthropic_toxic_prompts_runtime": 11.0266, "eval_anthropic_toxic_prompts_samples_per_second": 45.345, "eval_anthropic_toxic_prompts_steps_per_second": 0.091, "eval_anthropic_toxic_prompts_token_set_f1": 0.3639004416417083, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006608106759269167, "eval_anthropic_toxic_prompts_token_set_precision": 0.45031523922665767, "eval_anthropic_toxic_prompts_token_set_recall": 0.33321317066671063, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 140000 }, { "epoch": 26.88, "eval_arxiv_accuracy": 0.349375, "eval_arxiv_bleu_score": 4.381076378331517, "eval_arxiv_bleu_score_sem": 0.12439305276843189, "eval_arxiv_emb_cos_sim": 0.7646592259407043, "eval_arxiv_emb_cos_sim_sem": 0.009558706361829355, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4013900756835938, "eval_arxiv_n_ngrams_match_1": 15.042, "eval_arxiv_n_ngrams_match_2": 2.982, "eval_arxiv_n_ngrams_match_3": 0.668, "eval_arxiv_num_pred_words": 40.172, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.005781377643714, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3626276623035773, "eval_arxiv_runtime": 10.7392, "eval_arxiv_samples_per_second": 46.558, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.3564813725332108, "eval_arxiv_token_set_f1_sem": 0.004310774975927624, "eval_arxiv_token_set_precision": 0.308464528726972, "eval_arxiv_token_set_recall": 0.4394898763358817, "eval_arxiv_true_num_tokens": 64.0, "step": 140000 }, { "epoch": 26.88, "eval_python_code_alpaca_accuracy": 0.1616875, "eval_python_code_alpaca_bleu_score": 4.819738200854623, "eval_python_code_alpaca_bleu_score_sem": 0.14958318953026548, "eval_python_code_alpaca_emb_cos_sim": 0.7647327184677124, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008782805929801575, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8652584552764893, "eval_python_code_alpaca_n_ngrams_match_1": 10.142, "eval_python_code_alpaca_n_ngrams_match_2": 3.054, "eval_python_code_alpaca_n_ngrams_match_3": 1.074, "eval_python_code_alpaca_num_pred_words": 44.41, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.55358943616595, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33987289061179404, "eval_python_code_alpaca_runtime": 9.7905, "eval_python_code_alpaca_samples_per_second": 51.07, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.48364748544866415, "eval_python_code_alpaca_token_set_f1_sem": 0.005537410214425081, "eval_python_code_alpaca_token_set_precision": 0.5552013014529864, "eval_python_code_alpaca_token_set_recall": 0.44523628866971093, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 140000 }, { "epoch": 26.88, "eval_wikibio_accuracy": 0.322875, "eval_wikibio_bleu_score": 5.694965861715147, "eval_wikibio_bleu_score_sem": 0.2083526008429106, "eval_wikibio_emb_cos_sim": 0.749782919883728, "eval_wikibio_emb_cos_sim_sem": 0.008007438260280754, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.720547914505005, "eval_wikibio_n_ngrams_match_1": 9.652, "eval_wikibio_n_ngrams_match_2": 3.232, "eval_wikibio_n_ngrams_match_3": 1.164, "eval_wikibio_num_pred_words": 35.384, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.287009663812725, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34065599092161936, "eval_wikibio_runtime": 12.0605, "eval_wikibio_samples_per_second": 41.458, "eval_wikibio_steps_per_second": 0.083, "eval_wikibio_token_set_f1": 0.3070669243097482, "eval_wikibio_token_set_f1_sem": 0.0058401314040885275, "eval_wikibio_token_set_precision": 0.3140311115333913, "eval_wikibio_token_set_recall": 0.32070890763575616, "eval_wikibio_true_num_tokens": 61.1328125, "step": 140000 }, { "epoch": 26.88, "eval_nq_accuracy": 0.52925, "eval_nq_bleu_score": 11.72532536143017, "eval_nq_bleu_score_sem": 0.47604284476268044, "eval_nq_emb_cos_sim": 0.8359426259994507, "eval_nq_emb_cos_sim_sem": 0.006911591881583665, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1711294651031494, "eval_nq_n_ngrams_match_1": 23.012, "eval_nq_n_ngrams_match_2": 8.508, "eval_nq_n_ngrams_match_3": 3.934, "eval_nq_num_pred_words": 48.744, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.768181805466043, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4488636174950027, "eval_nq_runtime": 10.4438, "eval_nq_samples_per_second": 47.875, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4615851902336044, "eval_nq_token_set_f1_sem": 0.00505296348586497, "eval_nq_token_set_precision": 0.4199677988052723, "eval_nq_token_set_recall": 0.5200451270792383, "eval_nq_true_num_tokens": 64.0, "step": 140000 }, { "epoch": 26.88, "learning_rate": 0.001, "loss": 2.5484, "step": 140004 }, { "epoch": 26.88, "learning_rate": 0.001, "loss": 2.5477, "step": 140016 }, { "epoch": 26.89, "learning_rate": 0.001, "loss": 2.5517, "step": 140028 }, { "epoch": 26.89, "learning_rate": 0.001, "loss": 2.5516, "step": 140040 }, { "epoch": 26.89, "learning_rate": 0.001, "loss": 2.5549, "step": 140052 }, { "epoch": 26.89, "learning_rate": 0.001, "loss": 2.5489, "step": 140064 }, { "epoch": 26.9, "learning_rate": 0.001, "loss": 2.5383, "step": 140076 }, { "epoch": 26.9, "learning_rate": 0.001, "loss": 2.5481, "step": 140088 }, { "epoch": 26.9, "learning_rate": 0.001, "loss": 2.5492, "step": 140100 }, { "epoch": 26.9, "learning_rate": 0.001, "loss": 2.5442, "step": 140112 }, { "epoch": 26.91, "learning_rate": 0.001, "loss": 2.5473, "step": 140124 }, { "epoch": 26.91, "learning_rate": 0.001, "loss": 2.5434, "step": 140136 }, { "epoch": 26.91, "learning_rate": 0.001, "loss": 2.5552, "step": 140148 }, { "epoch": 26.91, "learning_rate": 0.001, "loss": 2.5501, "step": 140160 }, { "epoch": 26.91, "learning_rate": 0.001, "loss": 2.5493, "step": 140172 }, { "epoch": 26.92, "learning_rate": 0.001, "loss": 2.5458, "step": 140184 }, { "epoch": 26.92, "learning_rate": 0.001, "loss": 2.5494, "step": 140196 }, { "epoch": 26.92, "learning_rate": 0.001, "loss": 2.5465, "step": 140208 }, { "epoch": 26.92, "learning_rate": 0.001, "loss": 2.5515, "step": 140220 }, { "epoch": 26.93, "learning_rate": 0.001, "loss": 2.561, "step": 140232 }, { "epoch": 26.93, "learning_rate": 0.001, "loss": 2.5533, "step": 140244 }, { "epoch": 26.93, "learning_rate": 0.001, "loss": 2.5517, "step": 140256 }, { "epoch": 26.93, "learning_rate": 0.001, "loss": 2.5517, "step": 140268 }, { "epoch": 26.94, "learning_rate": 0.001, "loss": 2.5561, "step": 140280 }, { "epoch": 26.94, "learning_rate": 0.001, "loss": 2.5512, "step": 140292 }, { "epoch": 26.94, "learning_rate": 0.001, "loss": 2.5598, "step": 140304 }, { "epoch": 26.94, "learning_rate": 0.001, "loss": 2.5486, "step": 140316 }, { "epoch": 26.94, "learning_rate": 0.001, "loss": 2.5558, "step": 140328 }, { "epoch": 26.95, "learning_rate": 0.001, "loss": 2.5441, "step": 140340 }, { "epoch": 26.95, "learning_rate": 0.001, "loss": 2.5553, "step": 140352 }, { "epoch": 26.95, "learning_rate": 0.001, "loss": 2.5537, "step": 140364 }, { "epoch": 26.95, "learning_rate": 0.001, "loss": 2.55, "step": 140376 }, { "epoch": 26.96, "learning_rate": 0.001, "loss": 2.5406, "step": 140388 }, { "epoch": 26.96, "learning_rate": 0.001, "loss": 2.5496, "step": 140400 }, { "epoch": 26.96, "learning_rate": 0.001, "loss": 2.5626, "step": 140412 }, { "epoch": 26.96, "learning_rate": 0.001, "loss": 2.5524, "step": 140424 }, { "epoch": 26.97, "learning_rate": 0.001, "loss": 2.551, "step": 140436 }, { "epoch": 26.97, "learning_rate": 0.001, "loss": 2.543, "step": 140448 }, { "epoch": 26.97, "learning_rate": 0.001, "loss": 2.5405, "step": 140460 }, { "epoch": 26.97, "learning_rate": 0.001, "loss": 2.5509, "step": 140472 }, { "epoch": 26.97, "learning_rate": 0.001, "loss": 2.5466, "step": 140484 }, { "epoch": 26.98, "learning_rate": 0.001, "loss": 2.5416, "step": 140496 }, { "epoch": 26.98, "learning_rate": 0.001, "loss": 2.5553, "step": 140508 }, { "epoch": 26.98, "learning_rate": 0.001, "loss": 2.5424, "step": 140520 }, { "epoch": 26.98, "learning_rate": 0.001, "loss": 2.5325, "step": 140532 }, { "epoch": 26.99, "learning_rate": 0.001, "loss": 2.5561, "step": 140544 }, { "epoch": 26.99, "learning_rate": 0.001, "loss": 2.5481, "step": 140556 }, { "epoch": 26.99, "learning_rate": 0.001, "loss": 2.5513, "step": 140568 }, { "epoch": 26.99, "learning_rate": 0.001, "loss": 2.5445, "step": 140580 }, { "epoch": 27.0, "learning_rate": 0.001, "loss": 2.5494, "step": 140592 }, { "epoch": 27.0, "learning_rate": 0.001, "loss": 2.5503, "step": 140604 }, { "epoch": 27.0, "learning_rate": 0.001, "loss": 2.5365, "step": 140616 }, { "epoch": 27.0, "eval_ag_news_accuracy": 0.32353125, "eval_ag_news_bleu_score": 4.739044514090154, "eval_ag_news_bleu_score_sem": 0.140377611948644, "eval_ag_news_emb_cos_sim": 0.8107103705406189, "eval_ag_news_emb_cos_sim_sem": 0.006744443407290032, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.527357339859009, "eval_ag_news_n_ngrams_match_1": 14.024, "eval_ag_news_n_ngrams_match_2": 3.058, "eval_ag_news_n_ngrams_match_3": 0.844, "eval_ag_news_num_pred_words": 46.328, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.03390861576159, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3512726980769755, "eval_ag_news_runtime": 12.4669, "eval_ag_news_samples_per_second": 40.106, "eval_ag_news_steps_per_second": 0.08, "eval_ag_news_token_set_f1": 0.3512999233288417, "eval_ag_news_token_set_f1_sem": 0.004330093257944045, "eval_ag_news_token_set_precision": 0.33668663614916, "eval_ag_news_token_set_recall": 0.38277024613918653, "eval_ag_news_true_num_tokens": 56.09375, "step": 140625 }, { "epoch": 27.0, "eval_anthropic_toxic_prompts_accuracy": 0.11453125, "eval_anthropic_toxic_prompts_bleu_score": 3.1500712685853585, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12016290631420022, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6760477423667908, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009180294175196833, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2295303344726562, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.254, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.896, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, "eval_anthropic_toxic_prompts_num_pred_words": 47.33, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.267786775269617, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21460008106013984, "eval_anthropic_toxic_prompts_runtime": 9.7578, "eval_anthropic_toxic_prompts_samples_per_second": 51.241, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3593870896366552, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066447142719551835, "eval_anthropic_toxic_prompts_token_set_precision": 0.44027933446874573, "eval_anthropic_toxic_prompts_token_set_recall": 0.32803647486115683, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 140625 }, { "epoch": 27.0, "eval_arxiv_accuracy": 0.3488125, "eval_arxiv_bleu_score": 4.256183700835488, "eval_arxiv_bleu_score_sem": 0.12197246067995418, "eval_arxiv_emb_cos_sim": 0.7529629468917847, "eval_arxiv_emb_cos_sim_sem": 0.009230361997049743, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4022555351257324, "eval_arxiv_n_ngrams_match_1": 14.99, "eval_arxiv_n_ngrams_match_2": 2.912, "eval_arxiv_n_ngrams_match_3": 0.636, "eval_arxiv_num_pred_words": 39.9, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 30.03176140516418, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3601718659795816, "eval_arxiv_runtime": 10.5045, "eval_arxiv_samples_per_second": 47.599, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3523661766441768, "eval_arxiv_token_set_f1_sem": 0.004168593281648541, "eval_arxiv_token_set_precision": 0.30437402121242574, "eval_arxiv_token_set_recall": 0.4396984248189482, "eval_arxiv_true_num_tokens": 64.0, "step": 140625 }, { "epoch": 27.0, "eval_python_code_alpaca_accuracy": 0.162, "eval_python_code_alpaca_bleu_score": 4.855418194740201, "eval_python_code_alpaca_bleu_score_sem": 0.15619118291888343, "eval_python_code_alpaca_emb_cos_sim": 0.7572555541992188, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008597182647880994, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.866929531097412, "eval_python_code_alpaca_n_ngrams_match_1": 9.92, "eval_python_code_alpaca_n_ngrams_match_2": 2.95, "eval_python_code_alpaca_n_ngrams_match_3": 1.042, "eval_python_code_alpaca_num_pred_words": 43.09, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.58294733785115, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34126812701357984, "eval_python_code_alpaca_runtime": 9.9135, "eval_python_code_alpaca_samples_per_second": 50.437, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.47922315503559504, "eval_python_code_alpaca_token_set_f1_sem": 0.005561840676598813, "eval_python_code_alpaca_token_set_precision": 0.5407090048895702, "eval_python_code_alpaca_token_set_recall": 0.45118218335335475, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 140625 }, { "epoch": 27.0, "eval_wikibio_accuracy": 0.32640625, "eval_wikibio_bleu_score": 5.992813297432066, "eval_wikibio_bleu_score_sem": 0.2181657710188778, "eval_wikibio_emb_cos_sim": 0.7401313781738281, "eval_wikibio_emb_cos_sim_sem": 0.010332932773717843, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.703239679336548, "eval_wikibio_n_ngrams_match_1": 9.702, "eval_wikibio_n_ngrams_match_2": 3.286, "eval_wikibio_n_ngrams_match_3": 1.218, "eval_wikibio_num_pred_words": 35.008, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.57855314341375, "eval_wikibio_pred_num_tokens": 62.84375, "eval_wikibio_rouge_score": 0.34762803541341447, "eval_wikibio_runtime": 9.8407, "eval_wikibio_samples_per_second": 50.809, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.313201020253811, "eval_wikibio_token_set_f1_sem": 0.005815097802903207, "eval_wikibio_token_set_precision": 0.3179119318721146, "eval_wikibio_token_set_recall": 0.3278201567920565, "eval_wikibio_true_num_tokens": 61.1328125, "step": 140625 }, { "epoch": 27.0, "eval_nq_accuracy": 0.53040625, "eval_nq_bleu_score": 12.193885788459726, "eval_nq_bleu_score_sem": 0.482405015018087, "eval_nq_emb_cos_sim": 0.8364588618278503, "eval_nq_emb_cos_sim_sem": 0.007292546708561509, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.167343854904175, "eval_nq_n_ngrams_match_1": 23.15, "eval_nq_n_ngrams_match_2": 8.62, "eval_nq_n_ngrams_match_3": 4.106, "eval_nq_num_pred_words": 48.85, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.7350516355167, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4494291900578522, "eval_nq_runtime": 10.4137, "eval_nq_samples_per_second": 48.014, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.464658240093137, "eval_nq_token_set_f1_sem": 0.004993405310024307, "eval_nq_token_set_precision": 0.4231963435907049, "eval_nq_token_set_recall": 0.5223170698252272, "eval_nq_true_num_tokens": 64.0, "step": 140625 }, { "epoch": 27.0, "learning_rate": 0.001, "loss": 2.5311, "step": 140628 }, { "epoch": 27.0, "learning_rate": 0.001, "loss": 2.529, "step": 140640 }, { "epoch": 27.01, "learning_rate": 0.001, "loss": 2.5286, "step": 140652 }, { "epoch": 27.01, "learning_rate": 0.001, "loss": 2.5384, "step": 140664 }, { "epoch": 27.01, "learning_rate": 0.001, "loss": 2.536, "step": 140676 }, { "epoch": 27.01, "learning_rate": 0.001, "loss": 2.5431, "step": 140688 }, { "epoch": 27.02, "learning_rate": 0.001, "loss": 2.5391, "step": 140700 }, { "epoch": 27.02, "learning_rate": 0.001, "loss": 2.5378, "step": 140712 }, { "epoch": 27.02, "learning_rate": 0.001, "loss": 2.5335, "step": 140724 }, { "epoch": 27.02, "learning_rate": 0.001, "loss": 2.5409, "step": 140736 }, { "epoch": 27.03, "learning_rate": 0.001, "loss": 2.5218, "step": 140748 }, { "epoch": 27.03, "learning_rate": 0.001, "loss": 2.5328, "step": 140760 }, { "epoch": 27.03, "learning_rate": 0.001, "loss": 2.5367, "step": 140772 }, { "epoch": 27.03, "learning_rate": 0.001, "loss": 2.5361, "step": 140784 }, { "epoch": 27.03, "learning_rate": 0.001, "loss": 2.5281, "step": 140796 }, { "epoch": 27.04, "learning_rate": 0.001, "loss": 2.528, "step": 140808 }, { "epoch": 27.04, "learning_rate": 0.001, "loss": 2.5363, "step": 140820 }, { "epoch": 27.04, "learning_rate": 0.001, "loss": 2.5208, "step": 140832 }, { "epoch": 27.04, "learning_rate": 0.001, "loss": 2.5442, "step": 140844 }, { "epoch": 27.05, "learning_rate": 0.001, "loss": 2.5363, "step": 140856 }, { "epoch": 27.05, "learning_rate": 0.001, "loss": 2.5303, "step": 140868 }, { "epoch": 27.05, "learning_rate": 0.001, "loss": 2.5349, "step": 140880 }, { "epoch": 27.05, "learning_rate": 0.001, "loss": 2.5238, "step": 140892 }, { "epoch": 27.06, "learning_rate": 0.001, "loss": 2.5325, "step": 140904 }, { "epoch": 27.06, "learning_rate": 0.001, "loss": 2.5304, "step": 140916 }, { "epoch": 27.06, "learning_rate": 0.001, "loss": 2.5328, "step": 140928 }, { "epoch": 27.06, "learning_rate": 0.001, "loss": 2.5279, "step": 140940 }, { "epoch": 27.06, "learning_rate": 0.001, "loss": 2.5339, "step": 140952 }, { "epoch": 27.07, "learning_rate": 0.001, "loss": 2.5324, "step": 140964 }, { "epoch": 27.07, "learning_rate": 0.001, "loss": 2.5311, "step": 140976 }, { "epoch": 27.07, "learning_rate": 0.001, "loss": 2.532, "step": 140988 }, { "epoch": 27.07, "learning_rate": 0.001, "loss": 2.5345, "step": 141000 }, { "epoch": 27.08, "learning_rate": 0.001, "loss": 2.539, "step": 141012 }, { "epoch": 27.08, "learning_rate": 0.001, "loss": 2.5322, "step": 141024 }, { "epoch": 27.08, "learning_rate": 0.001, "loss": 2.5354, "step": 141036 }, { "epoch": 27.08, "learning_rate": 0.001, "loss": 2.5352, "step": 141048 }, { "epoch": 27.09, "learning_rate": 0.001, "loss": 2.5369, "step": 141060 }, { "epoch": 27.09, "learning_rate": 0.001, "loss": 2.5339, "step": 141072 }, { "epoch": 27.09, "learning_rate": 0.001, "loss": 2.5358, "step": 141084 }, { "epoch": 27.09, "learning_rate": 0.001, "loss": 2.5207, "step": 141096 }, { "epoch": 27.09, "learning_rate": 0.001, "loss": 2.5478, "step": 141108 }, { "epoch": 27.1, "learning_rate": 0.001, "loss": 2.5338, "step": 141120 }, { "epoch": 27.1, "learning_rate": 0.001, "loss": 2.5389, "step": 141132 }, { "epoch": 27.1, "learning_rate": 0.001, "loss": 2.5428, "step": 141144 }, { "epoch": 27.1, "learning_rate": 0.001, "loss": 2.5366, "step": 141156 }, { "epoch": 27.11, "learning_rate": 0.001, "loss": 2.5397, "step": 141168 }, { "epoch": 27.11, "learning_rate": 0.001, "loss": 2.5494, "step": 141180 }, { "epoch": 27.11, "learning_rate": 0.001, "loss": 2.5334, "step": 141192 }, { "epoch": 27.11, "learning_rate": 0.001, "loss": 2.5343, "step": 141204 }, { "epoch": 27.12, "learning_rate": 0.001, "loss": 2.532, "step": 141216 }, { "epoch": 27.12, "learning_rate": 0.001, "loss": 2.5393, "step": 141228 }, { "epoch": 27.12, "learning_rate": 0.001, "loss": 2.5289, "step": 141240 }, { "epoch": 27.12, "eval_ag_news_accuracy": 0.3264375, "eval_ag_news_bleu_score": 4.9004347070764664, "eval_ag_news_bleu_score_sem": 0.14755183483400694, "eval_ag_news_emb_cos_sim": 0.8155641555786133, "eval_ag_news_emb_cos_sim_sem": 0.0073370346827982815, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5504798889160156, "eval_ag_news_n_ngrams_match_1": 14.286, "eval_ag_news_n_ngrams_match_2": 3.17, "eval_ag_news_n_ngrams_match_3": 0.932, "eval_ag_news_num_pred_words": 46.842, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.8300280220735, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3535168361662875, "eval_ag_news_runtime": 10.0954, "eval_ag_news_samples_per_second": 49.528, "eval_ag_news_steps_per_second": 0.099, "eval_ag_news_token_set_f1": 0.3573181011436177, "eval_ag_news_token_set_f1_sem": 0.004470164752863457, "eval_ag_news_token_set_precision": 0.3391237020509248, "eval_ag_news_token_set_recall": 0.3935938386315709, "eval_ag_news_true_num_tokens": 56.09375, "step": 141250 }, { "epoch": 27.12, "eval_anthropic_toxic_prompts_accuracy": 0.11478125, "eval_anthropic_toxic_prompts_bleu_score": 3.0150833923399283, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10955506768456669, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6785062551498413, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008186659172494727, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.249908685684204, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.178, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.892, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714, "eval_anthropic_toxic_prompts_num_pred_words": 48.276, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.78798499746996, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21101062662901304, "eval_anthropic_toxic_prompts_runtime": 9.9024, "eval_anthropic_toxic_prompts_samples_per_second": 50.493, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3707925278993287, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006482853018165835, "eval_anthropic_toxic_prompts_token_set_precision": 0.4461795370414037, "eval_anthropic_toxic_prompts_token_set_recall": 0.34683668237564347, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 141250 }, { "epoch": 27.12, "eval_arxiv_accuracy": 0.34790625, "eval_arxiv_bleu_score": 4.461709294954279, "eval_arxiv_bleu_score_sem": 0.1344275111919054, "eval_arxiv_emb_cos_sim": 0.7651487588882446, "eval_arxiv_emb_cos_sim_sem": 0.008997062068384737, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.398071765899658, "eval_arxiv_n_ngrams_match_1": 15.362, "eval_arxiv_n_ngrams_match_2": 3.046, "eval_arxiv_n_ngrams_match_3": 0.684, "eval_arxiv_num_pred_words": 41.014, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.90637791667463, "eval_arxiv_pred_num_tokens": 62.96875, "eval_arxiv_rouge_score": 0.3651099171339527, "eval_arxiv_runtime": 10.1248, "eval_arxiv_samples_per_second": 49.384, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.359213955285717, "eval_arxiv_token_set_f1_sem": 0.004244709962988462, "eval_arxiv_token_set_precision": 0.308466804437646, "eval_arxiv_token_set_recall": 0.44681578820475754, "eval_arxiv_true_num_tokens": 64.0, "step": 141250 }, { "epoch": 27.12, "eval_python_code_alpaca_accuracy": 0.16103125, "eval_python_code_alpaca_bleu_score": 4.446975211810466, "eval_python_code_alpaca_bleu_score_sem": 0.1370287258997162, "eval_python_code_alpaca_emb_cos_sim": 0.7647826671600342, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007976102078412903, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8925230503082275, "eval_python_code_alpaca_n_ngrams_match_1": 10.074, "eval_python_code_alpaca_n_ngrams_match_2": 3.006, "eval_python_code_alpaca_n_ngrams_match_3": 0.998, "eval_python_code_alpaca_num_pred_words": 46.34, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.038764945833506, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32902236652320827, "eval_python_code_alpaca_runtime": 9.8522, "eval_python_code_alpaca_samples_per_second": 50.75, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.48857035639300544, "eval_python_code_alpaca_token_set_f1_sem": 0.005201786981010137, "eval_python_code_alpaca_token_set_precision": 0.5519706306998449, "eval_python_code_alpaca_token_set_recall": 0.45941360942084103, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 141250 }, { "epoch": 27.12, "eval_wikibio_accuracy": 0.3249375, "eval_wikibio_bleu_score": 5.880058079662076, "eval_wikibio_bleu_score_sem": 0.21368764081563563, "eval_wikibio_emb_cos_sim": 0.7586035132408142, "eval_wikibio_emb_cos_sim_sem": 0.007264186489325233, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7385756969451904, "eval_wikibio_n_ngrams_match_1": 10.236, "eval_wikibio_n_ngrams_match_2": 3.372, "eval_wikibio_n_ngrams_match_3": 1.204, "eval_wikibio_num_pred_words": 37.052, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.03807254955418, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3581604096255913, "eval_wikibio_runtime": 9.8967, "eval_wikibio_samples_per_second": 50.522, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.32323460095912443, "eval_wikibio_token_set_f1_sem": 0.005248446062747598, "eval_wikibio_token_set_precision": 0.331230550101497, "eval_wikibio_token_set_recall": 0.3317681584772462, "eval_wikibio_true_num_tokens": 61.1328125, "step": 141250 }, { "epoch": 27.12, "eval_nq_accuracy": 0.5293125, "eval_nq_bleu_score": 11.888426097655694, "eval_nq_bleu_score_sem": 0.4823268204876117, "eval_nq_emb_cos_sim": 0.8352838158607483, "eval_nq_emb_cos_sim_sem": 0.006663987049116259, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1758615970611572, "eval_nq_n_ngrams_match_1": 23.2, "eval_nq_n_ngrams_match_2": 8.608, "eval_nq_n_ngrams_match_3": 3.964, "eval_nq_num_pred_words": 49.152, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.809772327108014, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44937964306180467, "eval_nq_runtime": 10.6536, "eval_nq_samples_per_second": 46.933, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.46292291547340003, "eval_nq_token_set_f1_sem": 0.004760199512869909, "eval_nq_token_set_precision": 0.41947397455665075, "eval_nq_token_set_recall": 0.5252281568202145, "eval_nq_true_num_tokens": 64.0, "step": 141250 }, { "epoch": 27.12, "learning_rate": 0.001, "loss": 2.5459, "step": 141252 }, { "epoch": 27.12, "learning_rate": 0.001, "loss": 2.5398, "step": 141264 }, { "epoch": 27.13, "learning_rate": 0.001, "loss": 2.5328, "step": 141276 }, { "epoch": 27.13, "learning_rate": 0.001, "loss": 2.5429, "step": 141288 }, { "epoch": 27.13, "learning_rate": 0.001, "loss": 2.5328, "step": 141300 }, { "epoch": 27.13, "learning_rate": 0.001, "loss": 2.5354, "step": 141312 }, { "epoch": 27.14, "learning_rate": 0.001, "loss": 2.5403, "step": 141324 }, { "epoch": 27.14, "learning_rate": 0.001, "loss": 2.5366, "step": 141336 }, { "epoch": 27.14, "learning_rate": 0.001, "loss": 2.5456, "step": 141348 }, { "epoch": 27.14, "learning_rate": 0.001, "loss": 2.5396, "step": 141360 }, { "epoch": 27.15, "learning_rate": 0.001, "loss": 2.5388, "step": 141372 }, { "epoch": 27.15, "learning_rate": 0.001, "loss": 2.5489, "step": 141384 }, { "epoch": 27.15, "learning_rate": 0.001, "loss": 2.5427, "step": 141396 }, { "epoch": 27.15, "learning_rate": 0.001, "loss": 2.5496, "step": 141408 }, { "epoch": 27.15, "learning_rate": 0.001, "loss": 2.5454, "step": 141420 }, { "epoch": 27.16, "learning_rate": 0.001, "loss": 2.541, "step": 141432 }, { "epoch": 27.16, "learning_rate": 0.001, "loss": 2.5397, "step": 141444 }, { "epoch": 27.16, "learning_rate": 0.001, "loss": 2.5312, "step": 141456 }, { "epoch": 27.16, "learning_rate": 0.001, "loss": 2.5406, "step": 141468 }, { "epoch": 27.17, "learning_rate": 0.001, "loss": 2.5426, "step": 141480 }, { "epoch": 27.17, "learning_rate": 0.001, "loss": 2.5333, "step": 141492 }, { "epoch": 27.17, "learning_rate": 0.001, "loss": 2.5457, "step": 141504 }, { "epoch": 27.17, "learning_rate": 0.001, "loss": 2.5288, "step": 141516 }, { "epoch": 27.18, "learning_rate": 0.001, "loss": 2.5406, "step": 141528 }, { "epoch": 27.18, "learning_rate": 0.001, "loss": 2.5397, "step": 141540 }, { "epoch": 27.18, "learning_rate": 0.001, "loss": 2.5436, "step": 141552 }, { "epoch": 27.18, "learning_rate": 0.001, "loss": 2.5539, "step": 141564 }, { "epoch": 27.18, "learning_rate": 0.001, "loss": 2.5377, "step": 141576 }, { "epoch": 27.19, "learning_rate": 0.001, "loss": 2.531, "step": 141588 }, { "epoch": 27.19, "learning_rate": 0.001, "loss": 2.547, "step": 141600 }, { "epoch": 27.19, "learning_rate": 0.001, "loss": 2.5361, "step": 141612 }, { "epoch": 27.19, "learning_rate": 0.001, "loss": 2.5449, "step": 141624 }, { "epoch": 27.2, "learning_rate": 0.001, "loss": 2.5374, "step": 141636 }, { "epoch": 27.2, "learning_rate": 0.001, "loss": 2.5378, "step": 141648 }, { "epoch": 27.2, "learning_rate": 0.001, "loss": 2.5437, "step": 141660 }, { "epoch": 27.2, "learning_rate": 0.001, "loss": 2.5397, "step": 141672 }, { "epoch": 27.21, "learning_rate": 0.001, "loss": 2.5319, "step": 141684 }, { "epoch": 27.21, "learning_rate": 0.001, "loss": 2.5341, "step": 141696 }, { "epoch": 27.21, "learning_rate": 0.001, "loss": 2.5356, "step": 141708 }, { "epoch": 27.21, "learning_rate": 0.001, "loss": 2.5367, "step": 141720 }, { "epoch": 27.21, "learning_rate": 0.001, "loss": 2.5418, "step": 141732 }, { "epoch": 27.22, "learning_rate": 0.001, "loss": 2.54, "step": 141744 }, { "epoch": 27.22, "learning_rate": 0.001, "loss": 2.5363, "step": 141756 }, { "epoch": 27.22, "learning_rate": 0.001, "loss": 2.5414, "step": 141768 }, { "epoch": 27.22, "learning_rate": 0.001, "loss": 2.5373, "step": 141780 }, { "epoch": 27.23, "learning_rate": 0.001, "loss": 2.5378, "step": 141792 }, { "epoch": 27.23, "learning_rate": 0.001, "loss": 2.5368, "step": 141804 }, { "epoch": 27.23, "learning_rate": 0.001, "loss": 2.5447, "step": 141816 }, { "epoch": 27.23, "learning_rate": 0.001, "loss": 2.5422, "step": 141828 }, { "epoch": 27.24, "learning_rate": 0.001, "loss": 2.5388, "step": 141840 }, { "epoch": 27.24, "learning_rate": 0.001, "loss": 2.545, "step": 141852 }, { "epoch": 27.24, "learning_rate": 0.001, "loss": 2.5404, "step": 141864 }, { "epoch": 27.24, "eval_ag_news_accuracy": 0.3230625, "eval_ag_news_bleu_score": 4.695236421046426, "eval_ag_news_bleu_score_sem": 0.14337753510925322, "eval_ag_news_emb_cos_sim": 0.8126242160797119, "eval_ag_news_emb_cos_sim_sem": 0.007243090016012007, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5421786308288574, "eval_ag_news_n_ngrams_match_1": 14.102, "eval_ag_news_n_ngrams_match_2": 3.044, "eval_ag_news_n_ngrams_match_3": 0.852, "eval_ag_news_num_pred_words": 46.33, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.54209174046591, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3511028068599481, "eval_ag_news_runtime": 10.5234, "eval_ag_news_samples_per_second": 47.513, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.35126550267062095, "eval_ag_news_token_set_f1_sem": 0.004349084425878091, "eval_ag_news_token_set_precision": 0.3368724276109881, "eval_ag_news_token_set_recall": 0.3818385208659725, "eval_ag_news_true_num_tokens": 56.09375, "step": 141875 }, { "epoch": 27.24, "eval_anthropic_toxic_prompts_accuracy": 0.1156875, "eval_anthropic_toxic_prompts_bleu_score": 3.1471792051203704, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11996522175800078, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6792980432510376, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009361244425288769, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2164149284362793, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.282, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.956, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746, "eval_anthropic_toxic_prompts_num_pred_words": 47.69, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.93855322728402, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21490718146133306, "eval_anthropic_toxic_prompts_runtime": 9.8643, "eval_anthropic_toxic_prompts_samples_per_second": 50.688, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.35637220708491674, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00661980135967938, "eval_anthropic_toxic_prompts_token_set_precision": 0.4446733590317007, "eval_anthropic_toxic_prompts_token_set_recall": 0.32503633152903283, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 141875 }, { "epoch": 27.24, "eval_arxiv_accuracy": 0.34909375, "eval_arxiv_bleu_score": 4.461642500518708, "eval_arxiv_bleu_score_sem": 0.13195831530701474, "eval_arxiv_emb_cos_sim": 0.7710940837860107, "eval_arxiv_emb_cos_sim_sem": 0.007245903976694617, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3970530033111572, "eval_arxiv_n_ngrams_match_1": 15.342, "eval_arxiv_n_ngrams_match_2": 2.986, "eval_arxiv_n_ngrams_match_3": 0.674, "eval_arxiv_num_pred_words": 40.674, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.875925932000698, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.366844827069081, "eval_arxiv_runtime": 10.5053, "eval_arxiv_samples_per_second": 47.595, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.35957249144156844, "eval_arxiv_token_set_f1_sem": 0.004022660989498987, "eval_arxiv_token_set_precision": 0.31175704851952657, "eval_arxiv_token_set_recall": 0.43951956309984946, "eval_arxiv_true_num_tokens": 64.0, "step": 141875 }, { "epoch": 27.24, "eval_python_code_alpaca_accuracy": 0.16125, "eval_python_code_alpaca_bleu_score": 4.800524728411466, "eval_python_code_alpaca_bleu_score_sem": 0.15556858641471405, "eval_python_code_alpaca_emb_cos_sim": 0.7585030794143677, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0076178196217558895, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.879707098007202, "eval_python_code_alpaca_n_ngrams_match_1": 10.124, "eval_python_code_alpaca_n_ngrams_match_2": 3.06, "eval_python_code_alpaca_n_ngrams_match_3": 1.052, "eval_python_code_alpaca_num_pred_words": 44.164, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.80905610758034, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3399763958064745, "eval_python_code_alpaca_runtime": 9.9431, "eval_python_code_alpaca_samples_per_second": 50.286, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.48190383905050427, "eval_python_code_alpaca_token_set_f1_sem": 0.0055441938121901495, "eval_python_code_alpaca_token_set_precision": 0.5517781748117877, "eval_python_code_alpaca_token_set_recall": 0.4495225154600577, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 141875 }, { "epoch": 27.24, "eval_wikibio_accuracy": 0.3265, "eval_wikibio_bleu_score": 5.8613962982352295, "eval_wikibio_bleu_score_sem": 0.20624154148742285, "eval_wikibio_emb_cos_sim": 0.7403441667556763, "eval_wikibio_emb_cos_sim_sem": 0.009352147291455495, "eval_wikibio_emb_top1_equal": 0.1015625, "eval_wikibio_emb_top1_equal_sem": 0.026804565886848545, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7235610485076904, "eval_wikibio_n_ngrams_match_1": 10.048, "eval_wikibio_n_ngrams_match_2": 3.336, "eval_wikibio_n_ngrams_match_3": 1.202, "eval_wikibio_num_pred_words": 35.714, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.411600566779896, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3533449039304873, "eval_wikibio_runtime": 9.8714, "eval_wikibio_samples_per_second": 50.652, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.31918143246822217, "eval_wikibio_token_set_f1_sem": 0.005600280279077625, "eval_wikibio_token_set_precision": 0.32590203181550037, "eval_wikibio_token_set_recall": 0.33026026306714884, "eval_wikibio_true_num_tokens": 61.1328125, "step": 141875 }, { "epoch": 27.24, "eval_nq_accuracy": 0.530625, "eval_nq_bleu_score": 11.923652479195313, "eval_nq_bleu_score_sem": 0.48769293250735835, "eval_nq_emb_cos_sim": 0.8363159894943237, "eval_nq_emb_cos_sim_sem": 0.006774035104756235, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.17130708694458, "eval_nq_n_ngrams_match_1": 23.072, "eval_nq_n_ngrams_match_2": 8.488, "eval_nq_n_ngrams_match_3": 3.974, "eval_nq_num_pred_words": 48.922, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.769739364388474, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4477909038090314, "eval_nq_runtime": 10.318, "eval_nq_samples_per_second": 48.459, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.46216718630629605, "eval_nq_token_set_f1_sem": 0.004894449845837533, "eval_nq_token_set_precision": 0.4203633866577919, "eval_nq_token_set_recall": 0.5202700453028507, "eval_nq_true_num_tokens": 64.0, "step": 141875 }, { "epoch": 27.24, "learning_rate": 0.001, "loss": 2.5375, "step": 141876 }, { "epoch": 27.24, "learning_rate": 0.001, "loss": 2.5352, "step": 141888 }, { "epoch": 27.25, "learning_rate": 0.001, "loss": 2.5383, "step": 141900 }, { "epoch": 27.25, "learning_rate": 0.001, "loss": 2.5336, "step": 141912 }, { "epoch": 27.25, "learning_rate": 0.001, "loss": 2.5414, "step": 141924 }, { "epoch": 27.25, "learning_rate": 0.001, "loss": 2.5426, "step": 141936 }, { "epoch": 27.26, "learning_rate": 0.001, "loss": 2.5395, "step": 141948 }, { "epoch": 27.26, "learning_rate": 0.001, "loss": 2.5364, "step": 141960 }, { "epoch": 27.26, "learning_rate": 0.001, "loss": 2.5461, "step": 141972 }, { "epoch": 27.26, "learning_rate": 0.001, "loss": 2.5383, "step": 141984 }, { "epoch": 27.26, "learning_rate": 0.001, "loss": 2.5307, "step": 141996 }, { "epoch": 27.27, "learning_rate": 0.001, "loss": 2.5458, "step": 142008 }, { "epoch": 27.27, "learning_rate": 0.001, "loss": 2.5281, "step": 142020 }, { "epoch": 27.27, "learning_rate": 0.001, "loss": 2.5347, "step": 142032 }, { "epoch": 27.27, "learning_rate": 0.001, "loss": 2.5357, "step": 142044 }, { "epoch": 27.28, "learning_rate": 0.001, "loss": 2.5377, "step": 142056 }, { "epoch": 27.28, "learning_rate": 0.001, "loss": 2.5383, "step": 142068 }, { "epoch": 27.28, "learning_rate": 0.001, "loss": 2.5344, "step": 142080 }, { "epoch": 27.28, "learning_rate": 0.001, "loss": 2.5379, "step": 142092 }, { "epoch": 27.29, "learning_rate": 0.001, "loss": 2.531, "step": 142104 }, { "epoch": 27.29, "learning_rate": 0.001, "loss": 2.5433, "step": 142116 }, { "epoch": 27.29, "learning_rate": 0.001, "loss": 2.5452, "step": 142128 }, { "epoch": 27.29, "learning_rate": 0.001, "loss": 2.5241, "step": 142140 }, { "epoch": 27.29, "learning_rate": 0.001, "loss": 2.5477, "step": 142152 }, { "epoch": 27.3, "learning_rate": 0.001, "loss": 2.5381, "step": 142164 }, { "epoch": 27.3, "learning_rate": 0.001, "loss": 2.5334, "step": 142176 }, { "epoch": 27.3, "learning_rate": 0.001, "loss": 2.5365, "step": 142188 }, { "epoch": 27.3, "learning_rate": 0.001, "loss": 2.5414, "step": 142200 }, { "epoch": 27.31, "learning_rate": 0.001, "loss": 2.548, "step": 142212 }, { "epoch": 27.31, "learning_rate": 0.001, "loss": 2.5367, "step": 142224 }, { "epoch": 27.31, "learning_rate": 0.001, "loss": 2.5334, "step": 142236 }, { "epoch": 27.31, "learning_rate": 0.001, "loss": 2.5359, "step": 142248 }, { "epoch": 27.32, "learning_rate": 0.001, "loss": 2.5442, "step": 142260 }, { "epoch": 27.32, "learning_rate": 0.001, "loss": 2.5478, "step": 142272 }, { "epoch": 27.32, "learning_rate": 0.001, "loss": 2.5485, "step": 142284 }, { "epoch": 27.32, "learning_rate": 0.001, "loss": 2.5442, "step": 142296 }, { "epoch": 27.32, "learning_rate": 0.001, "loss": 2.5523, "step": 142308 }, { "epoch": 27.33, "learning_rate": 0.001, "loss": 2.5393, "step": 142320 }, { "epoch": 27.33, "learning_rate": 0.001, "loss": 2.5376, "step": 142332 }, { "epoch": 27.33, "learning_rate": 0.001, "loss": 2.5326, "step": 142344 }, { "epoch": 27.33, "learning_rate": 0.001, "loss": 2.5416, "step": 142356 }, { "epoch": 27.34, "learning_rate": 0.001, "loss": 2.5306, "step": 142368 }, { "epoch": 27.34, "learning_rate": 0.001, "loss": 2.5396, "step": 142380 }, { "epoch": 27.34, "learning_rate": 0.001, "loss": 2.5414, "step": 142392 }, { "epoch": 27.34, "learning_rate": 0.001, "loss": 2.5432, "step": 142404 }, { "epoch": 27.35, "learning_rate": 0.001, "loss": 2.538, "step": 142416 }, { "epoch": 27.35, "learning_rate": 0.001, "loss": 2.5303, "step": 142428 }, { "epoch": 27.35, "learning_rate": 0.001, "loss": 2.5378, "step": 142440 }, { "epoch": 27.35, "learning_rate": 0.001, "loss": 2.5344, "step": 142452 }, { "epoch": 27.35, "learning_rate": 0.001, "loss": 2.543, "step": 142464 }, { "epoch": 27.36, "learning_rate": 0.001, "loss": 2.5343, "step": 142476 }, { "epoch": 27.36, "learning_rate": 0.001, "loss": 2.5376, "step": 142488 }, { "epoch": 27.36, "learning_rate": 0.001, "loss": 2.5423, "step": 142500 }, { "epoch": 27.36, "eval_ag_news_accuracy": 0.3239375, "eval_ag_news_bleu_score": 4.830598132312196, "eval_ag_news_bleu_score_sem": 0.1573441424417308, "eval_ag_news_emb_cos_sim": 0.8136233687400818, "eval_ag_news_emb_cos_sim_sem": 0.006709981773187296, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5366628170013428, "eval_ag_news_n_ngrams_match_1": 14.158, "eval_ag_news_n_ngrams_match_2": 3.12, "eval_ag_news_n_ngrams_match_3": 0.92, "eval_ag_news_num_pred_words": 47.28, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.35208848622561, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.34865583719920046, "eval_ag_news_runtime": 10.5531, "eval_ag_news_samples_per_second": 47.379, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3506578007699237, "eval_ag_news_token_set_f1_sem": 0.00432896121733213, "eval_ag_news_token_set_precision": 0.33785937442762887, "eval_ag_news_token_set_recall": 0.3771866626859236, "eval_ag_news_true_num_tokens": 56.09375, "step": 142500 }, { "epoch": 27.36, "eval_anthropic_toxic_prompts_accuracy": 0.1156875, "eval_anthropic_toxic_prompts_bleu_score": 3.018429416369582, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11312213146117575, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.677958607673645, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008252589355911416, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.21828031539917, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.144, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.838, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67, "eval_anthropic_toxic_prompts_num_pred_words": 47.396, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.985116695288514, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21155234687896496, "eval_anthropic_toxic_prompts_runtime": 10.4133, "eval_anthropic_toxic_prompts_samples_per_second": 48.016, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.3547790678122964, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00643582847758279, "eval_anthropic_toxic_prompts_token_set_precision": 0.43369618688291867, "eval_anthropic_toxic_prompts_token_set_recall": 0.3269438007734628, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 142500 }, { "epoch": 27.36, "eval_arxiv_accuracy": 0.34953125, "eval_arxiv_bleu_score": 4.32187604679201, "eval_arxiv_bleu_score_sem": 0.1283524302047555, "eval_arxiv_emb_cos_sim": 0.7498331665992737, "eval_arxiv_emb_cos_sim_sem": 0.009119076536279268, "eval_arxiv_emb_top1_equal": 0.21875, "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.39444637298584, "eval_arxiv_n_ngrams_match_1": 14.798, "eval_arxiv_n_ngrams_match_2": 2.97, "eval_arxiv_n_ngrams_match_3": 0.698, "eval_arxiv_num_pred_words": 39.894, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.79815184565188, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.35563091088651877, "eval_arxiv_runtime": 10.1858, "eval_arxiv_samples_per_second": 49.088, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.34819284414634094, "eval_arxiv_token_set_f1_sem": 0.004589054415412313, "eval_arxiv_token_set_precision": 0.2984303754212457, "eval_arxiv_token_set_recall": 0.4399873959763242, "eval_arxiv_true_num_tokens": 64.0, "step": 142500 }, { "epoch": 27.36, "eval_python_code_alpaca_accuracy": 0.160625, "eval_python_code_alpaca_bleu_score": 4.589698055543078, "eval_python_code_alpaca_bleu_score_sem": 0.14378166776701212, "eval_python_code_alpaca_emb_cos_sim": 0.7577734589576721, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00971669711619498, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.88792085647583, "eval_python_code_alpaca_n_ngrams_match_1": 9.822, "eval_python_code_alpaca_n_ngrams_match_2": 2.89, "eval_python_code_alpaca_n_ngrams_match_3": 0.99, "eval_python_code_alpaca_num_pred_words": 43.624, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.955937792553886, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3282297908130535, "eval_python_code_alpaca_runtime": 10.3509, "eval_python_code_alpaca_samples_per_second": 48.305, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.47276119537101813, "eval_python_code_alpaca_token_set_f1_sem": 0.005933106140040877, "eval_python_code_alpaca_token_set_precision": 0.5343343202297457, "eval_python_code_alpaca_token_set_recall": 0.44413219104114415, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 142500 }, { "epoch": 27.36, "eval_wikibio_accuracy": 0.32396875, "eval_wikibio_bleu_score": 6.13173766393367, "eval_wikibio_bleu_score_sem": 0.21518832836323656, "eval_wikibio_emb_cos_sim": 0.7459526062011719, "eval_wikibio_emb_cos_sim_sem": 0.008536965338071054, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.712768077850342, "eval_wikibio_n_ngrams_match_1": 10.32, "eval_wikibio_n_ngrams_match_2": 3.526, "eval_wikibio_n_ngrams_match_3": 1.28, "eval_wikibio_num_pred_words": 36.85, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.96704970459743, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36472456853502216, "eval_wikibio_runtime": 10.0988, "eval_wikibio_samples_per_second": 49.511, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3260278336768941, "eval_wikibio_token_set_f1_sem": 0.005138854716074928, "eval_wikibio_token_set_precision": 0.33401035182185435, "eval_wikibio_token_set_recall": 0.33341173815211955, "eval_wikibio_true_num_tokens": 61.1328125, "step": 142500 }, { "epoch": 27.36, "eval_nq_accuracy": 0.529, "eval_nq_bleu_score": 11.780356328999263, "eval_nq_bleu_score_sem": 0.47903628345571003, "eval_nq_emb_cos_sim": 0.8332801461219788, "eval_nq_emb_cos_sim_sem": 0.007031384420308262, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.173349618911743, "eval_nq_n_ngrams_match_1": 23.45, "eval_nq_n_ngrams_match_2": 8.572, "eval_nq_n_ngrams_match_3": 3.876, "eval_nq_num_pred_words": 49.566, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.787670143244592, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4521968124155398, "eval_nq_runtime": 10.4245, "eval_nq_samples_per_second": 47.964, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46548585375671925, "eval_nq_token_set_f1_sem": 0.0049039555675616095, "eval_nq_token_set_precision": 0.42552332036072815, "eval_nq_token_set_recall": 0.520016810258735, "eval_nq_true_num_tokens": 64.0, "step": 142500 }, { "epoch": 27.36, "learning_rate": 0.001, "loss": 2.5432, "step": 142512 }, { "epoch": 27.37, "learning_rate": 0.001, "loss": 2.5471, "step": 142524 }, { "epoch": 27.37, "learning_rate": 0.001, "loss": 2.5467, "step": 142536 }, { "epoch": 27.37, "learning_rate": 0.001, "loss": 2.5475, "step": 142548 }, { "epoch": 27.37, "learning_rate": 0.001, "loss": 2.5434, "step": 142560 }, { "epoch": 27.38, "learning_rate": 0.001, "loss": 2.5276, "step": 142572 }, { "epoch": 27.38, "learning_rate": 0.001, "loss": 2.5458, "step": 142584 }, { "epoch": 27.38, "learning_rate": 0.001, "loss": 2.5434, "step": 142596 }, { "epoch": 27.38, "learning_rate": 0.001, "loss": 2.5444, "step": 142608 }, { "epoch": 27.38, "learning_rate": 0.001, "loss": 2.5483, "step": 142620 }, { "epoch": 27.39, "learning_rate": 0.001, "loss": 2.5413, "step": 142632 }, { "epoch": 27.39, "learning_rate": 0.001, "loss": 2.5465, "step": 142644 }, { "epoch": 27.39, "learning_rate": 0.001, "loss": 2.5417, "step": 142656 }, { "epoch": 27.39, "learning_rate": 0.001, "loss": 2.5567, "step": 142668 }, { "epoch": 27.4, "learning_rate": 0.001, "loss": 2.5329, "step": 142680 }, { "epoch": 27.4, "learning_rate": 0.001, "loss": 2.5459, "step": 142692 }, { "epoch": 27.4, "learning_rate": 0.001, "loss": 2.5459, "step": 142704 }, { "epoch": 27.4, "learning_rate": 0.001, "loss": 2.5435, "step": 142716 }, { "epoch": 27.41, "learning_rate": 0.001, "loss": 2.5444, "step": 142728 }, { "epoch": 27.41, "learning_rate": 0.001, "loss": 2.5432, "step": 142740 }, { "epoch": 27.41, "learning_rate": 0.001, "loss": 2.5471, "step": 142752 }, { "epoch": 27.41, "learning_rate": 0.001, "loss": 2.5399, "step": 142764 }, { "epoch": 27.41, "learning_rate": 0.001, "loss": 2.5522, "step": 142776 }, { "epoch": 27.42, "learning_rate": 0.001, "loss": 2.5441, "step": 142788 }, { "epoch": 27.42, "learning_rate": 0.001, "loss": 2.5372, "step": 142800 }, { "epoch": 27.42, "learning_rate": 0.001, "loss": 2.5418, "step": 142812 }, { "epoch": 27.42, "learning_rate": 0.001, "loss": 2.552, "step": 142824 }, { "epoch": 27.43, "learning_rate": 0.001, "loss": 2.5378, "step": 142836 }, { "epoch": 27.43, "learning_rate": 0.001, "loss": 2.5502, "step": 142848 }, { "epoch": 27.43, "learning_rate": 0.001, "loss": 2.5404, "step": 142860 }, { "epoch": 27.43, "learning_rate": 0.001, "loss": 2.536, "step": 142872 }, { "epoch": 27.44, "learning_rate": 0.001, "loss": 2.5402, "step": 142884 }, { "epoch": 27.44, "learning_rate": 0.001, "loss": 2.5466, "step": 142896 }, { "epoch": 27.44, "learning_rate": 0.001, "loss": 2.544, "step": 142908 }, { "epoch": 27.44, "learning_rate": 0.001, "loss": 2.5493, "step": 142920 }, { "epoch": 27.44, "learning_rate": 0.001, "loss": 2.5458, "step": 142932 }, { "epoch": 27.45, "learning_rate": 0.001, "loss": 2.5425, "step": 142944 }, { "epoch": 27.45, "learning_rate": 0.001, "loss": 2.5369, "step": 142956 }, { "epoch": 27.45, "learning_rate": 0.001, "loss": 2.5441, "step": 142968 }, { "epoch": 27.45, "learning_rate": 0.001, "loss": 2.5474, "step": 142980 }, { "epoch": 27.46, "learning_rate": 0.001, "loss": 2.5406, "step": 142992 }, { "epoch": 27.46, "learning_rate": 0.001, "loss": 2.5343, "step": 143004 }, { "epoch": 27.46, "learning_rate": 0.001, "loss": 2.5335, "step": 143016 }, { "epoch": 27.46, "learning_rate": 0.001, "loss": 2.5485, "step": 143028 }, { "epoch": 27.47, "learning_rate": 0.001, "loss": 2.5369, "step": 143040 }, { "epoch": 27.47, "learning_rate": 0.001, "loss": 2.5428, "step": 143052 }, { "epoch": 27.47, "learning_rate": 0.001, "loss": 2.5492, "step": 143064 }, { "epoch": 27.47, "learning_rate": 0.001, "loss": 2.5481, "step": 143076 }, { "epoch": 27.47, "learning_rate": 0.001, "loss": 2.5423, "step": 143088 }, { "epoch": 27.48, "learning_rate": 0.001, "loss": 2.5421, "step": 143100 }, { "epoch": 27.48, "learning_rate": 0.001, "loss": 2.5367, "step": 143112 }, { "epoch": 27.48, "learning_rate": 0.001, "loss": 2.5417, "step": 143124 }, { "epoch": 27.48, "eval_ag_news_accuracy": 0.324125, "eval_ag_news_bleu_score": 5.039687093948509, "eval_ag_news_bleu_score_sem": 0.1665901840628385, "eval_ag_news_emb_cos_sim": 0.8166662454605103, "eval_ag_news_emb_cos_sim_sem": 0.0068301160965115355, "eval_ag_news_emb_top1_equal": 0.1953125, "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.525768518447876, "eval_ag_news_n_ngrams_match_1": 14.082, "eval_ag_news_n_ngrams_match_2": 3.12, "eval_ag_news_n_ngrams_match_3": 0.928, "eval_ag_news_num_pred_words": 46.436, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.9798777471149, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3510902997573453, "eval_ag_news_runtime": 10.6257, "eval_ag_news_samples_per_second": 47.056, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.35099022534293467, "eval_ag_news_token_set_f1_sem": 0.0044793457169557665, "eval_ag_news_token_set_precision": 0.33560578149662795, "eval_ag_news_token_set_recall": 0.3897147862700805, "eval_ag_news_true_num_tokens": 56.09375, "step": 143125 }, { "epoch": 27.48, "eval_anthropic_toxic_prompts_accuracy": 0.115375, "eval_anthropic_toxic_prompts_bleu_score": 3.236108825922928, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11807013059932496, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6814109086990356, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008242399038194893, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2264301776885986, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.006, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764, "eval_anthropic_toxic_prompts_num_pred_words": 47.144, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.189573973296206, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2188053723440092, "eval_anthropic_toxic_prompts_runtime": 10.2191, "eval_anthropic_toxic_prompts_samples_per_second": 48.928, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.36225645245563226, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006332418695622373, "eval_anthropic_toxic_prompts_token_set_precision": 0.44929653816519577, "eval_anthropic_toxic_prompts_token_set_recall": 0.3296031274263888, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 143125 }, { "epoch": 27.48, "eval_arxiv_accuracy": 0.351125, "eval_arxiv_bleu_score": 4.4092280661335534, "eval_arxiv_bleu_score_sem": 0.12279747690110586, "eval_arxiv_emb_cos_sim": 0.7718652486801147, "eval_arxiv_emb_cos_sim_sem": 0.006698258363856188, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3722763061523438, "eval_arxiv_n_ngrams_match_1": 15.386, "eval_arxiv_n_ngrams_match_2": 3.036, "eval_arxiv_n_ngrams_match_3": 0.666, "eval_arxiv_num_pred_words": 40.212, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.144794081242484, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36884531866145703, "eval_arxiv_runtime": 10.7393, "eval_arxiv_samples_per_second": 46.558, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.36054283884661736, "eval_arxiv_token_set_f1_sem": 0.004120284513604176, "eval_arxiv_token_set_precision": 0.3101980172639564, "eval_arxiv_token_set_recall": 0.4501447148878183, "eval_arxiv_true_num_tokens": 64.0, "step": 143125 }, { "epoch": 27.48, "eval_python_code_alpaca_accuracy": 0.1613125, "eval_python_code_alpaca_bleu_score": 4.506491867995508, "eval_python_code_alpaca_bleu_score_sem": 0.15751610811373762, "eval_python_code_alpaca_emb_cos_sim": 0.764640748500824, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008484346973338728, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8803963661193848, "eval_python_code_alpaca_n_ngrams_match_1": 9.812, "eval_python_code_alpaca_n_ngrams_match_2": 2.838, "eval_python_code_alpaca_n_ngrams_match_3": 0.882, "eval_python_code_alpaca_num_pred_words": 43.264, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.821335553492464, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33476666746035166, "eval_python_code_alpaca_runtime": 10.3, "eval_python_code_alpaca_samples_per_second": 48.544, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.4777867146158203, "eval_python_code_alpaca_token_set_f1_sem": 0.005835641091555561, "eval_python_code_alpaca_token_set_precision": 0.5386932988730029, "eval_python_code_alpaca_token_set_recall": 0.45061103365992106, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 143125 }, { "epoch": 27.48, "eval_wikibio_accuracy": 0.325125, "eval_wikibio_bleu_score": 5.578659342138037, "eval_wikibio_bleu_score_sem": 0.20152464322979105, "eval_wikibio_emb_cos_sim": 0.738686203956604, "eval_wikibio_emb_cos_sim_sem": 0.009736943408548401, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.676901340484619, "eval_wikibio_n_ngrams_match_1": 9.53, "eval_wikibio_n_ngrams_match_2": 3.15, "eval_wikibio_n_ngrams_match_3": 1.124, "eval_wikibio_num_pred_words": 35.248, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.523733536094035, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3384667298775853, "eval_wikibio_runtime": 10.3067, "eval_wikibio_samples_per_second": 48.512, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.30206023185653375, "eval_wikibio_token_set_f1_sem": 0.005679007063202068, "eval_wikibio_token_set_precision": 0.3088813719711778, "eval_wikibio_token_set_recall": 0.31404185933962747, "eval_wikibio_true_num_tokens": 61.1328125, "step": 143125 }, { "epoch": 27.48, "eval_nq_accuracy": 0.52903125, "eval_nq_bleu_score": 11.79704300367307, "eval_nq_bleu_score_sem": 0.48651102349765474, "eval_nq_emb_cos_sim": 0.827208936214447, "eval_nq_emb_cos_sim_sem": 0.00765775441926447, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.168199300765991, "eval_nq_n_ngrams_match_1": 23.006, "eval_nq_n_ngrams_match_2": 8.494, "eval_nq_n_ngrams_match_3": 3.974, "eval_nq_num_pred_words": 49.126, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.742527196303948, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4446213040074076, "eval_nq_runtime": 11.7566, "eval_nq_samples_per_second": 42.529, "eval_nq_steps_per_second": 0.085, "eval_nq_token_set_f1": 0.45899188065685237, "eval_nq_token_set_f1_sem": 0.004989788793268598, "eval_nq_token_set_precision": 0.41753366194454644, "eval_nq_token_set_recall": 0.5174536315242882, "eval_nq_true_num_tokens": 64.0, "step": 143125 }, { "epoch": 27.48, "learning_rate": 0.001, "loss": 2.5418, "step": 143136 }, { "epoch": 27.49, "learning_rate": 0.001, "loss": 2.5372, "step": 143148 }, { "epoch": 27.49, "learning_rate": 0.001, "loss": 2.5391, "step": 143160 }, { "epoch": 27.49, "learning_rate": 0.001, "loss": 2.5489, "step": 143172 }, { "epoch": 27.49, "learning_rate": 0.001, "loss": 2.5509, "step": 143184 }, { "epoch": 27.5, "learning_rate": 0.001, "loss": 2.5443, "step": 143196 }, { "epoch": 27.5, "learning_rate": 0.001, "loss": 2.5378, "step": 143208 }, { "epoch": 27.5, "learning_rate": 0.001, "loss": 2.5377, "step": 143220 }, { "epoch": 27.5, "learning_rate": 0.001, "loss": 2.536, "step": 143232 }, { "epoch": 27.5, "learning_rate": 0.001, "loss": 2.5349, "step": 143244 }, { "epoch": 27.51, "learning_rate": 0.001, "loss": 2.5462, "step": 143256 }, { "epoch": 27.51, "learning_rate": 0.001, "loss": 2.5391, "step": 143268 }, { "epoch": 27.51, "learning_rate": 0.001, "loss": 2.5503, "step": 143280 }, { "epoch": 27.51, "learning_rate": 0.001, "loss": 2.5478, "step": 143292 }, { "epoch": 27.52, "learning_rate": 0.001, "loss": 2.5254, "step": 143304 }, { "epoch": 27.52, "learning_rate": 0.001, "loss": 2.5476, "step": 143316 }, { "epoch": 27.52, "learning_rate": 0.001, "loss": 2.5332, "step": 143328 }, { "epoch": 27.52, "learning_rate": 0.001, "loss": 2.5392, "step": 143340 }, { "epoch": 27.53, "learning_rate": 0.001, "loss": 2.5393, "step": 143352 }, { "epoch": 27.53, "learning_rate": 0.001, "loss": 2.539, "step": 143364 }, { "epoch": 27.53, "learning_rate": 0.001, "loss": 2.5401, "step": 143376 }, { "epoch": 27.53, "learning_rate": 0.001, "loss": 2.5432, "step": 143388 }, { "epoch": 27.53, "learning_rate": 0.001, "loss": 2.5411, "step": 143400 }, { "epoch": 27.54, "learning_rate": 0.001, "loss": 2.5366, "step": 143412 }, { "epoch": 27.54, "learning_rate": 0.001, "loss": 2.5463, "step": 143424 }, { "epoch": 27.54, "learning_rate": 0.001, "loss": 2.5313, "step": 143436 }, { "epoch": 27.54, "learning_rate": 0.001, "loss": 2.5372, "step": 143448 }, { "epoch": 27.55, "learning_rate": 0.001, "loss": 2.5501, "step": 143460 }, { "epoch": 27.55, "learning_rate": 0.001, "loss": 2.5462, "step": 143472 }, { "epoch": 27.55, "learning_rate": 0.001, "loss": 2.5481, "step": 143484 }, { "epoch": 27.55, "learning_rate": 0.001, "loss": 2.5377, "step": 143496 }, { "epoch": 27.56, "learning_rate": 0.001, "loss": 2.5563, "step": 143508 }, { "epoch": 27.56, "learning_rate": 0.001, "loss": 2.5382, "step": 143520 }, { "epoch": 27.56, "learning_rate": 0.001, "loss": 2.5444, "step": 143532 }, { "epoch": 27.56, "learning_rate": 0.001, "loss": 2.5429, "step": 143544 }, { "epoch": 27.56, "learning_rate": 0.001, "loss": 2.535, "step": 143556 }, { "epoch": 27.57, "learning_rate": 0.001, "loss": 2.5466, "step": 143568 }, { "epoch": 27.57, "learning_rate": 0.001, "loss": 2.5409, "step": 143580 }, { "epoch": 27.57, "learning_rate": 0.001, "loss": 2.5429, "step": 143592 }, { "epoch": 27.57, "learning_rate": 0.001, "loss": 2.5399, "step": 143604 }, { "epoch": 27.58, "learning_rate": 0.001, "loss": 2.5443, "step": 143616 }, { "epoch": 27.58, "learning_rate": 0.001, "loss": 2.5405, "step": 143628 }, { "epoch": 27.58, "learning_rate": 0.001, "loss": 2.5458, "step": 143640 }, { "epoch": 27.58, "learning_rate": 0.001, "loss": 2.5409, "step": 143652 }, { "epoch": 27.59, "learning_rate": 0.001, "loss": 2.5409, "step": 143664 }, { "epoch": 27.59, "learning_rate": 0.001, "loss": 2.5388, "step": 143676 }, { "epoch": 27.59, "learning_rate": 0.001, "loss": 2.5496, "step": 143688 }, { "epoch": 27.59, "learning_rate": 0.001, "loss": 2.5429, "step": 143700 }, { "epoch": 27.59, "learning_rate": 0.001, "loss": 2.5368, "step": 143712 }, { "epoch": 27.6, "learning_rate": 0.001, "loss": 2.5428, "step": 143724 }, { "epoch": 27.6, "learning_rate": 0.001, "loss": 2.539, "step": 143736 }, { "epoch": 27.6, "learning_rate": 0.001, "loss": 2.5431, "step": 143748 }, { "epoch": 27.6, "eval_ag_news_accuracy": 0.325125, "eval_ag_news_bleu_score": 4.9060576397579005, "eval_ag_news_bleu_score_sem": 0.15229328835492612, "eval_ag_news_emb_cos_sim": 0.8132540583610535, "eval_ag_news_emb_cos_sim_sem": 0.008341442815106476, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5347824096679688, "eval_ag_news_n_ngrams_match_1": 14.248, "eval_ag_news_n_ngrams_match_2": 3.162, "eval_ag_news_n_ngrams_match_3": 0.884, "eval_ag_news_num_pred_words": 46.202, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.287553262389494, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3531805767359434, "eval_ag_news_runtime": 10.3162, "eval_ag_news_samples_per_second": 48.467, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3534782397664274, "eval_ag_news_token_set_f1_sem": 0.004460881976462208, "eval_ag_news_token_set_precision": 0.34088337323922935, "eval_ag_news_token_set_recall": 0.38200145698709126, "eval_ag_news_true_num_tokens": 56.09375, "step": 143750 }, { "epoch": 27.6, "eval_anthropic_toxic_prompts_accuracy": 0.11678125, "eval_anthropic_toxic_prompts_bleu_score": 3.1628702078780115, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12169665074305872, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6807167530059814, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00788797433526564, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2000010013580322, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.924, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.704, "eval_anthropic_toxic_prompts_num_pred_words": 46.57, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.532554762967813, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2158291149747981, "eval_anthropic_toxic_prompts_runtime": 9.9051, "eval_anthropic_toxic_prompts_samples_per_second": 50.479, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.35263688369545504, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006466724667783063, "eval_anthropic_toxic_prompts_token_set_precision": 0.43821858309757283, "eval_anthropic_toxic_prompts_token_set_recall": 0.3228368894232848, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 143750 }, { "epoch": 27.6, "eval_arxiv_accuracy": 0.34753125, "eval_arxiv_bleu_score": 4.35764468534656, "eval_arxiv_bleu_score_sem": 0.12481431895670242, "eval_arxiv_emb_cos_sim": 0.7613619565963745, "eval_arxiv_emb_cos_sim_sem": 0.00844892583988786, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3958537578582764, "eval_arxiv_n_ngrams_match_1": 15.274, "eval_arxiv_n_ngrams_match_2": 3.006, "eval_arxiv_n_ngrams_match_3": 0.658, "eval_arxiv_num_pred_words": 40.622, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.840118838714492, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36452885880810904, "eval_arxiv_runtime": 10.4698, "eval_arxiv_samples_per_second": 47.756, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.35962154370328314, "eval_arxiv_token_set_f1_sem": 0.0041119309525474105, "eval_arxiv_token_set_precision": 0.3111581583150548, "eval_arxiv_token_set_recall": 0.44568854702110317, "eval_arxiv_true_num_tokens": 64.0, "step": 143750 }, { "epoch": 27.6, "eval_python_code_alpaca_accuracy": 0.1625, "eval_python_code_alpaca_bleu_score": 4.567988418321479, "eval_python_code_alpaca_bleu_score_sem": 0.13992635271135734, "eval_python_code_alpaca_emb_cos_sim": 0.7611854672431946, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007823779416627485, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.891040563583374, "eval_python_code_alpaca_n_ngrams_match_1": 10.022, "eval_python_code_alpaca_n_ngrams_match_2": 2.864, "eval_python_code_alpaca_n_ngrams_match_3": 0.938, "eval_python_code_alpaca_num_pred_words": 43.834, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.012042528976817, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3414438237031079, "eval_python_code_alpaca_runtime": 10.2062, "eval_python_code_alpaca_samples_per_second": 48.99, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4802944043537252, "eval_python_code_alpaca_token_set_f1_sem": 0.005362698658037156, "eval_python_code_alpaca_token_set_precision": 0.5526999807207397, "eval_python_code_alpaca_token_set_recall": 0.44429027037711927, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 143750 }, { "epoch": 27.6, "eval_wikibio_accuracy": 0.32371875, "eval_wikibio_bleu_score": 6.038606194427294, "eval_wikibio_bleu_score_sem": 0.20993551884146786, "eval_wikibio_emb_cos_sim": 0.7467552423477173, "eval_wikibio_emb_cos_sim_sem": 0.009053170060464865, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7513339519500732, "eval_wikibio_n_ngrams_match_1": 10.056, "eval_wikibio_n_ngrams_match_2": 3.352, "eval_wikibio_n_ngrams_match_3": 1.204, "eval_wikibio_num_pred_words": 35.754, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.57784092874125, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35741638556115785, "eval_wikibio_runtime": 11.0308, "eval_wikibio_samples_per_second": 45.328, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.32068604088086433, "eval_wikibio_token_set_f1_sem": 0.005247016802490674, "eval_wikibio_token_set_precision": 0.3285627299052317, "eval_wikibio_token_set_recall": 0.3292274279664372, "eval_wikibio_true_num_tokens": 61.1328125, "step": 143750 }, { "epoch": 27.6, "eval_nq_accuracy": 0.5283125, "eval_nq_bleu_score": 11.54522568293295, "eval_nq_bleu_score_sem": 0.4623561804624445, "eval_nq_emb_cos_sim": 0.8352663516998291, "eval_nq_emb_cos_sim_sem": 0.007021329104215874, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1706504821777344, "eval_nq_n_ngrams_match_1": 22.98, "eval_nq_n_ngrams_match_2": 8.36, "eval_nq_n_ngrams_match_3": 3.832, "eval_nq_num_pred_words": 48.572, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.763983001752207, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4483094144303781, "eval_nq_runtime": 10.2471, "eval_nq_samples_per_second": 48.794, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.4625401152356082, "eval_nq_token_set_f1_sem": 0.0049427924543661165, "eval_nq_token_set_precision": 0.42163948046897626, "eval_nq_token_set_recall": 0.5201009496547064, "eval_nq_true_num_tokens": 64.0, "step": 143750 }, { "epoch": 27.6, "learning_rate": 0.001, "loss": 2.535, "step": 143760 }, { "epoch": 27.61, "learning_rate": 0.001, "loss": 2.5446, "step": 143772 }, { "epoch": 27.61, "learning_rate": 0.001, "loss": 2.5501, "step": 143784 }, { "epoch": 27.61, "learning_rate": 0.001, "loss": 2.5471, "step": 143796 }, { "epoch": 27.61, "learning_rate": 0.001, "loss": 2.5464, "step": 143808 }, { "epoch": 27.62, "learning_rate": 0.001, "loss": 2.5411, "step": 143820 }, { "epoch": 27.62, "learning_rate": 0.001, "loss": 2.5473, "step": 143832 }, { "epoch": 27.62, "learning_rate": 0.001, "loss": 2.5378, "step": 143844 }, { "epoch": 27.62, "learning_rate": 0.001, "loss": 2.5439, "step": 143856 }, { "epoch": 27.62, "learning_rate": 0.001, "loss": 2.5431, "step": 143868 }, { "epoch": 27.63, "learning_rate": 0.001, "loss": 2.5472, "step": 143880 }, { "epoch": 27.63, "learning_rate": 0.001, "loss": 2.549, "step": 143892 }, { "epoch": 27.63, "learning_rate": 0.001, "loss": 2.5568, "step": 143904 }, { "epoch": 27.63, "learning_rate": 0.001, "loss": 2.5526, "step": 143916 }, { "epoch": 27.64, "learning_rate": 0.001, "loss": 2.5417, "step": 143928 }, { "epoch": 27.64, "learning_rate": 0.001, "loss": 2.5536, "step": 143940 }, { "epoch": 27.64, "learning_rate": 0.001, "loss": 2.5366, "step": 143952 }, { "epoch": 27.64, "learning_rate": 0.001, "loss": 2.5537, "step": 143964 }, { "epoch": 27.65, "learning_rate": 0.001, "loss": 2.5499, "step": 143976 }, { "epoch": 27.65, "learning_rate": 0.001, "loss": 2.5472, "step": 143988 }, { "epoch": 27.65, "learning_rate": 0.001, "loss": 2.5448, "step": 144000 }, { "epoch": 27.65, "learning_rate": 0.001, "loss": 2.5532, "step": 144012 }, { "epoch": 27.65, "learning_rate": 0.001, "loss": 2.552, "step": 144024 }, { "epoch": 27.66, "learning_rate": 0.001, "loss": 2.5466, "step": 144036 }, { "epoch": 27.66, "learning_rate": 0.001, "loss": 2.5372, "step": 144048 }, { "epoch": 27.66, "learning_rate": 0.001, "loss": 2.557, "step": 144060 }, { "epoch": 27.66, "learning_rate": 0.001, "loss": 2.5376, "step": 144072 }, { "epoch": 27.67, "learning_rate": 0.001, "loss": 2.5435, "step": 144084 }, { "epoch": 27.67, "learning_rate": 0.001, "loss": 2.5445, "step": 144096 }, { "epoch": 27.67, "learning_rate": 0.001, "loss": 2.549, "step": 144108 }, { "epoch": 27.67, "learning_rate": 0.001, "loss": 2.5519, "step": 144120 }, { "epoch": 27.68, "learning_rate": 0.001, "loss": 2.5433, "step": 144132 }, { "epoch": 27.68, "learning_rate": 0.001, "loss": 2.5425, "step": 144144 }, { "epoch": 27.68, "learning_rate": 0.001, "loss": 2.5447, "step": 144156 }, { "epoch": 27.68, "learning_rate": 0.001, "loss": 2.5457, "step": 144168 }, { "epoch": 27.68, "learning_rate": 0.001, "loss": 2.5481, "step": 144180 }, { "epoch": 27.69, "learning_rate": 0.001, "loss": 2.5447, "step": 144192 }, { "epoch": 27.69, "learning_rate": 0.001, "loss": 2.5478, "step": 144204 }, { "epoch": 27.69, "learning_rate": 0.001, "loss": 2.5503, "step": 144216 }, { "epoch": 27.69, "learning_rate": 0.001, "loss": 2.5407, "step": 144228 }, { "epoch": 27.7, "learning_rate": 0.001, "loss": 2.5409, "step": 144240 }, { "epoch": 27.7, "learning_rate": 0.001, "loss": 2.5457, "step": 144252 }, { "epoch": 27.7, "learning_rate": 0.001, "loss": 2.5453, "step": 144264 }, { "epoch": 27.7, "learning_rate": 0.001, "loss": 2.5446, "step": 144276 }, { "epoch": 27.71, "learning_rate": 0.001, "loss": 2.5502, "step": 144288 }, { "epoch": 27.71, "learning_rate": 0.001, "loss": 2.5403, "step": 144300 }, { "epoch": 27.71, "learning_rate": 0.001, "loss": 2.55, "step": 144312 }, { "epoch": 27.71, "learning_rate": 0.001, "loss": 2.5395, "step": 144324 }, { "epoch": 27.71, "learning_rate": 0.001, "loss": 2.5391, "step": 144336 }, { "epoch": 27.72, "learning_rate": 0.001, "loss": 2.5477, "step": 144348 }, { "epoch": 27.72, "learning_rate": 0.001, "loss": 2.5414, "step": 144360 }, { "epoch": 27.72, "learning_rate": 0.001, "loss": 2.5524, "step": 144372 }, { "epoch": 27.72, "eval_ag_news_accuracy": 0.3240625, "eval_ag_news_bleu_score": 4.970414400051082, "eval_ag_news_bleu_score_sem": 0.15894019644248084, "eval_ag_news_emb_cos_sim": 0.8165663480758667, "eval_ag_news_emb_cos_sim_sem": 0.006890783192220428, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5300419330596924, "eval_ag_news_n_ngrams_match_1": 14.202, "eval_ag_news_n_ngrams_match_2": 3.2, "eval_ag_news_n_ngrams_match_3": 0.94, "eval_ag_news_num_pred_words": 46.626, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.12539856712719, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3527152335552278, "eval_ag_news_runtime": 11.2613, "eval_ag_news_samples_per_second": 44.4, "eval_ag_news_steps_per_second": 0.089, "eval_ag_news_token_set_f1": 0.3547670915677933, "eval_ag_news_token_set_f1_sem": 0.004412122266916088, "eval_ag_news_token_set_precision": 0.3410934263404353, "eval_ag_news_token_set_recall": 0.38717581638007587, "eval_ag_news_true_num_tokens": 56.09375, "step": 144375 }, { "epoch": 27.72, "eval_anthropic_toxic_prompts_accuracy": 0.11428125, "eval_anthropic_toxic_prompts_bleu_score": 3.1798131297428873, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12461979207105056, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6775497198104858, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009008732135610698, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.219355583190918, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.148, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.942, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, "eval_anthropic_toxic_prompts_num_pred_words": 47.154, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.011996835628693, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21389804969470883, "eval_anthropic_toxic_prompts_runtime": 9.8543, "eval_anthropic_toxic_prompts_samples_per_second": 50.739, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.35475397955300003, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006658995972486581, "eval_anthropic_toxic_prompts_token_set_precision": 0.43114369295925986, "eval_anthropic_toxic_prompts_token_set_recall": 0.3278570045263657, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 144375 }, { "epoch": 27.72, "eval_arxiv_accuracy": 0.34809375, "eval_arxiv_bleu_score": 4.291331107247671, "eval_arxiv_bleu_score_sem": 0.12796598973892936, "eval_arxiv_emb_cos_sim": 0.7707942128181458, "eval_arxiv_emb_cos_sim_sem": 0.007130301098129807, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.397416830062866, "eval_arxiv_n_ngrams_match_1": 15.254, "eval_arxiv_n_ngrams_match_2": 2.892, "eval_arxiv_n_ngrams_match_3": 0.628, "eval_arxiv_num_pred_words": 41.08, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.886797570663404, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36530406305732177, "eval_arxiv_runtime": 11.3173, "eval_arxiv_samples_per_second": 44.18, "eval_arxiv_steps_per_second": 0.088, "eval_arxiv_token_set_f1": 0.35487308078448215, "eval_arxiv_token_set_f1_sem": 0.004195730407037573, "eval_arxiv_token_set_precision": 0.3079749247150917, "eval_arxiv_token_set_recall": 0.43428864442882037, "eval_arxiv_true_num_tokens": 64.0, "step": 144375 }, { "epoch": 27.72, "eval_python_code_alpaca_accuracy": 0.1631875, "eval_python_code_alpaca_bleu_score": 4.609435844335552, "eval_python_code_alpaca_bleu_score_sem": 0.1432049999141692, "eval_python_code_alpaca_emb_cos_sim": 0.7601396441459656, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008817652153896974, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8603568077087402, "eval_python_code_alpaca_n_ngrams_match_1": 9.976, "eval_python_code_alpaca_n_ngrams_match_2": 2.898, "eval_python_code_alpaca_n_ngrams_match_3": 1.008, "eval_python_code_alpaca_num_pred_words": 44.25, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.46775845565825, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33343501326855096, "eval_python_code_alpaca_runtime": 10.0348, "eval_python_code_alpaca_samples_per_second": 49.827, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.478446577186757, "eval_python_code_alpaca_token_set_f1_sem": 0.005448696921159396, "eval_python_code_alpaca_token_set_precision": 0.5468609620776019, "eval_python_code_alpaca_token_set_recall": 0.44585232783812784, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 144375 }, { "epoch": 27.72, "eval_wikibio_accuracy": 0.324375, "eval_wikibio_bleu_score": 6.127609120138676, "eval_wikibio_bleu_score_sem": 0.2135397458310362, "eval_wikibio_emb_cos_sim": 0.7458769679069519, "eval_wikibio_emb_cos_sim_sem": 0.0093512154509767, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7127797603607178, "eval_wikibio_n_ngrams_match_1": 10.226, "eval_wikibio_n_ngrams_match_2": 3.474, "eval_wikibio_n_ngrams_match_3": 1.292, "eval_wikibio_num_pred_words": 36.576, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.9675283053763, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3624596865412507, "eval_wikibio_runtime": 10.1097, "eval_wikibio_samples_per_second": 49.457, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3232188252718927, "eval_wikibio_token_set_f1_sem": 0.005172646829244083, "eval_wikibio_token_set_precision": 0.33374889848493866, "eval_wikibio_token_set_recall": 0.3280943778877188, "eval_wikibio_true_num_tokens": 61.1328125, "step": 144375 }, { "epoch": 27.72, "eval_nq_accuracy": 0.53040625, "eval_nq_bleu_score": 11.528264571578472, "eval_nq_bleu_score_sem": 0.4655984014245001, "eval_nq_emb_cos_sim": 0.8405819535255432, "eval_nq_emb_cos_sim_sem": 0.006863966601310863, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.166982412338257, "eval_nq_n_ngrams_match_1": 23.074, "eval_nq_n_ngrams_match_2": 8.422, "eval_nq_n_ngrams_match_3": 3.838, "eval_nq_num_pred_words": 48.786, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.73189498654815, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44792931955147647, "eval_nq_runtime": 10.5458, "eval_nq_samples_per_second": 47.412, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4588423175001224, "eval_nq_token_set_f1_sem": 0.004830897197932589, "eval_nq_token_set_precision": 0.4166175122565744, "eval_nq_token_set_recall": 0.5182190175123669, "eval_nq_true_num_tokens": 64.0, "step": 144375 }, { "epoch": 27.72, "learning_rate": 0.001, "loss": 2.5558, "step": 144384 }, { "epoch": 27.73, "learning_rate": 0.001, "loss": 2.5497, "step": 144396 }, { "epoch": 27.73, "learning_rate": 0.001, "loss": 2.5448, "step": 144408 }, { "epoch": 27.73, "learning_rate": 0.001, "loss": 2.5426, "step": 144420 }, { "epoch": 27.73, "learning_rate": 0.001, "loss": 2.5486, "step": 144432 }, { "epoch": 27.74, "learning_rate": 0.001, "loss": 2.548, "step": 144444 }, { "epoch": 27.74, "learning_rate": 0.001, "loss": 2.5431, "step": 144456 }, { "epoch": 27.74, "learning_rate": 0.001, "loss": 2.5446, "step": 144468 }, { "epoch": 27.74, "learning_rate": 0.001, "loss": 2.5458, "step": 144480 }, { "epoch": 27.74, "learning_rate": 0.001, "loss": 2.5628, "step": 144492 }, { "epoch": 27.75, "learning_rate": 0.001, "loss": 2.5385, "step": 144504 }, { "epoch": 27.75, "learning_rate": 0.001, "loss": 2.5497, "step": 144516 }, { "epoch": 27.75, "learning_rate": 0.001, "loss": 2.5527, "step": 144528 }, { "epoch": 27.75, "learning_rate": 0.001, "loss": 2.55, "step": 144540 }, { "epoch": 27.76, "learning_rate": 0.001, "loss": 2.5433, "step": 144552 }, { "epoch": 27.76, "learning_rate": 0.001, "loss": 2.534, "step": 144564 }, { "epoch": 27.76, "learning_rate": 0.001, "loss": 2.5473, "step": 144576 }, { "epoch": 27.76, "learning_rate": 0.001, "loss": 2.5568, "step": 144588 }, { "epoch": 27.76, "learning_rate": 0.001, "loss": 2.5396, "step": 144600 }, { "epoch": 27.77, "learning_rate": 0.001, "loss": 2.5632, "step": 144612 }, { "epoch": 27.77, "learning_rate": 0.001, "loss": 2.5476, "step": 144624 }, { "epoch": 27.77, "learning_rate": 0.001, "loss": 2.5569, "step": 144636 }, { "epoch": 27.77, "learning_rate": 0.001, "loss": 2.5536, "step": 144648 }, { "epoch": 27.78, "learning_rate": 0.001, "loss": 2.5563, "step": 144660 }, { "epoch": 27.78, "learning_rate": 0.001, "loss": 2.5506, "step": 144672 }, { "epoch": 27.78, "learning_rate": 0.001, "loss": 2.5571, "step": 144684 }, { "epoch": 27.78, "learning_rate": 0.001, "loss": 2.5578, "step": 144696 }, { "epoch": 27.79, "learning_rate": 0.001, "loss": 2.5462, "step": 144708 }, { "epoch": 27.79, "learning_rate": 0.001, "loss": 2.5524, "step": 144720 }, { "epoch": 27.79, "learning_rate": 0.001, "loss": 2.5441, "step": 144732 }, { "epoch": 27.79, "learning_rate": 0.001, "loss": 2.5481, "step": 144744 }, { "epoch": 27.79, "learning_rate": 0.001, "loss": 2.5484, "step": 144756 }, { "epoch": 27.8, "learning_rate": 0.001, "loss": 2.5462, "step": 144768 }, { "epoch": 27.8, "learning_rate": 0.001, "loss": 2.5529, "step": 144780 }, { "epoch": 27.8, "learning_rate": 0.001, "loss": 2.5551, "step": 144792 }, { "epoch": 27.8, "learning_rate": 0.001, "loss": 2.5428, "step": 144804 }, { "epoch": 27.81, "learning_rate": 0.001, "loss": 2.5544, "step": 144816 }, { "epoch": 27.81, "learning_rate": 0.001, "loss": 2.5495, "step": 144828 }, { "epoch": 27.81, "learning_rate": 0.001, "loss": 2.5511, "step": 144840 }, { "epoch": 27.81, "learning_rate": 0.001, "loss": 2.55, "step": 144852 }, { "epoch": 27.82, "learning_rate": 0.001, "loss": 2.5478, "step": 144864 }, { "epoch": 27.82, "learning_rate": 0.001, "loss": 2.5523, "step": 144876 }, { "epoch": 27.82, "learning_rate": 0.001, "loss": 2.5635, "step": 144888 }, { "epoch": 27.82, "learning_rate": 0.001, "loss": 2.5531, "step": 144900 }, { "epoch": 27.82, "learning_rate": 0.001, "loss": 2.5481, "step": 144912 }, { "epoch": 27.83, "learning_rate": 0.001, "loss": 2.5564, "step": 144924 }, { "epoch": 27.83, "learning_rate": 0.001, "loss": 2.5549, "step": 144936 }, { "epoch": 27.83, "learning_rate": 0.001, "loss": 2.5576, "step": 144948 }, { "epoch": 27.83, "learning_rate": 0.001, "loss": 2.5456, "step": 144960 }, { "epoch": 27.84, "learning_rate": 0.001, "loss": 2.5504, "step": 144972 }, { "epoch": 27.84, "learning_rate": 0.001, "loss": 2.5446, "step": 144984 }, { "epoch": 27.84, "learning_rate": 0.001, "loss": 2.5603, "step": 144996 }, { "epoch": 27.84, "eval_ag_news_accuracy": 0.32534375, "eval_ag_news_bleu_score": 4.8675052779507695, "eval_ag_news_bleu_score_sem": 0.15423374050962127, "eval_ag_news_emb_cos_sim": 0.8129054307937622, "eval_ag_news_emb_cos_sim_sem": 0.0077852795876018, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5247530937194824, "eval_ag_news_n_ngrams_match_1": 14.156, "eval_ag_news_n_ngrams_match_2": 3.094, "eval_ag_news_n_ngrams_match_3": 0.882, "eval_ag_news_num_pred_words": 46.428, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.94539125116629, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35092431823047665, "eval_ag_news_runtime": 9.9119, "eval_ag_news_samples_per_second": 50.445, "eval_ag_news_steps_per_second": 0.101, "eval_ag_news_token_set_f1": 0.35503893435791334, "eval_ag_news_token_set_f1_sem": 0.004356091461463944, "eval_ag_news_token_set_precision": 0.3387076756136274, "eval_ag_news_token_set_recall": 0.39054112356255016, "eval_ag_news_true_num_tokens": 56.09375, "step": 145000 }, { "epoch": 27.84, "eval_anthropic_toxic_prompts_accuracy": 0.11528125, "eval_anthropic_toxic_prompts_bleu_score": 3.035062237310198, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11474366489905395, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6745525598526001, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008941784500929176, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2177090644836426, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.108, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.866, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.696, "eval_anthropic_toxic_prompts_num_pred_words": 47.24, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.970848000392305, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21239538917101328, "eval_anthropic_toxic_prompts_runtime": 9.97, "eval_anthropic_toxic_prompts_samples_per_second": 50.151, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3601126237014914, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006721048003872662, "eval_anthropic_toxic_prompts_token_set_precision": 0.4323960264713501, "eval_anthropic_toxic_prompts_token_set_recall": 0.3362558132085867, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 145000 }, { "epoch": 27.84, "eval_arxiv_accuracy": 0.35090625, "eval_arxiv_bleu_score": 4.385563568867093, "eval_arxiv_bleu_score_sem": 0.12942509155446114, "eval_arxiv_emb_cos_sim": 0.7661959528923035, "eval_arxiv_emb_cos_sim_sem": 0.007445631964887172, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.37373948097229, "eval_arxiv_n_ngrams_match_1": 15.196, "eval_arxiv_n_ngrams_match_2": 2.962, "eval_arxiv_n_ngrams_match_3": 0.664, "eval_arxiv_num_pred_words": 40.242, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.187469223057615, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36519591969325826, "eval_arxiv_runtime": 10.3585, "eval_arxiv_samples_per_second": 48.27, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.359672278920851, "eval_arxiv_token_set_f1_sem": 0.004138312262634471, "eval_arxiv_token_set_precision": 0.30804023179031814, "eval_arxiv_token_set_recall": 0.4478689257163039, "eval_arxiv_true_num_tokens": 64.0, "step": 145000 }, { "epoch": 27.84, "eval_python_code_alpaca_accuracy": 0.16115625, "eval_python_code_alpaca_bleu_score": 4.675873061055396, "eval_python_code_alpaca_bleu_score_sem": 0.14521065862146887, "eval_python_code_alpaca_emb_cos_sim": 0.7455906867980957, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010485603147003477, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8650741577148438, "eval_python_code_alpaca_n_ngrams_match_1": 9.81, "eval_python_code_alpaca_n_ngrams_match_2": 2.906, "eval_python_code_alpaca_n_ngrams_match_3": 0.992, "eval_python_code_alpaca_num_pred_words": 43.32, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.550354650525446, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3352814709123943, "eval_python_code_alpaca_runtime": 9.9699, "eval_python_code_alpaca_samples_per_second": 50.151, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4718053567366586, "eval_python_code_alpaca_token_set_f1_sem": 0.005538700436273628, "eval_python_code_alpaca_token_set_precision": 0.5345477910229891, "eval_python_code_alpaca_token_set_recall": 0.4471907717852849, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 145000 }, { "epoch": 27.84, "eval_wikibio_accuracy": 0.3265, "eval_wikibio_bleu_score": 5.9709247254810505, "eval_wikibio_bleu_score_sem": 0.22313585042195538, "eval_wikibio_emb_cos_sim": 0.735073447227478, "eval_wikibio_emb_cos_sim_sem": 0.00943841991472377, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6820902824401855, "eval_wikibio_n_ngrams_match_1": 9.778, "eval_wikibio_n_ngrams_match_2": 3.342, "eval_wikibio_n_ngrams_match_3": 1.254, "eval_wikibio_num_pred_words": 35.844, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.72935290741116, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34520965329401354, "eval_wikibio_runtime": 9.779, "eval_wikibio_samples_per_second": 51.13, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.31398602630878913, "eval_wikibio_token_set_f1_sem": 0.005793615057936962, "eval_wikibio_token_set_precision": 0.31938491935469054, "eval_wikibio_token_set_recall": 0.3278386988221626, "eval_wikibio_true_num_tokens": 61.1328125, "step": 145000 }, { "epoch": 27.84, "eval_nq_accuracy": 0.52971875, "eval_nq_bleu_score": 12.093362890213317, "eval_nq_bleu_score_sem": 0.5002878423445387, "eval_nq_emb_cos_sim": 0.8364195227622986, "eval_nq_emb_cos_sim_sem": 0.006576766121756488, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1689302921295166, "eval_nq_n_ngrams_match_1": 23.426, "eval_nq_n_ngrams_match_2": 8.724, "eval_nq_n_ngrams_match_3": 4.03, "eval_nq_num_pred_words": 49.194, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.748920244526674, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4542114617646773, "eval_nq_runtime": 10.3945, "eval_nq_samples_per_second": 48.102, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4674915179947641, "eval_nq_token_set_f1_sem": 0.005029040902590511, "eval_nq_token_set_precision": 0.42419503837681494, "eval_nq_token_set_recall": 0.5281744958888587, "eval_nq_true_num_tokens": 64.0, "step": 145000 }, { "epoch": 27.84, "learning_rate": 0.001, "loss": 2.5498, "step": 145008 }, { "epoch": 27.85, "learning_rate": 0.001, "loss": 2.5562, "step": 145020 }, { "epoch": 27.85, "learning_rate": 0.001, "loss": 2.5473, "step": 145032 }, { "epoch": 27.85, "learning_rate": 0.001, "loss": 2.5466, "step": 145044 }, { "epoch": 27.85, "learning_rate": 0.001, "loss": 2.5402, "step": 145056 }, { "epoch": 27.85, "learning_rate": 0.001, "loss": 2.5547, "step": 145068 }, { "epoch": 27.86, "learning_rate": 0.001, "loss": 2.5569, "step": 145080 }, { "epoch": 27.86, "learning_rate": 0.001, "loss": 2.5506, "step": 145092 }, { "epoch": 27.86, "learning_rate": 0.001, "loss": 2.5449, "step": 145104 }, { "epoch": 27.86, "learning_rate": 0.001, "loss": 2.5555, "step": 145116 }, { "epoch": 27.87, "learning_rate": 0.001, "loss": 2.5586, "step": 145128 }, { "epoch": 27.87, "learning_rate": 0.001, "loss": 2.5503, "step": 145140 }, { "epoch": 27.87, "learning_rate": 0.001, "loss": 2.5577, "step": 145152 }, { "epoch": 27.87, "learning_rate": 0.001, "loss": 2.5683, "step": 145164 }, { "epoch": 27.88, "learning_rate": 0.001, "loss": 2.5496, "step": 145176 }, { "epoch": 27.88, "learning_rate": 0.001, "loss": 2.5467, "step": 145188 }, { "epoch": 27.88, "learning_rate": 0.001, "loss": 2.5582, "step": 145200 }, { "epoch": 27.88, "learning_rate": 0.001, "loss": 2.5515, "step": 145212 }, { "epoch": 27.88, "learning_rate": 0.001, "loss": 2.5403, "step": 145224 }, { "epoch": 27.89, "learning_rate": 0.001, "loss": 2.5563, "step": 145236 }, { "epoch": 27.89, "learning_rate": 0.001, "loss": 2.5419, "step": 145248 }, { "epoch": 27.89, "learning_rate": 0.001, "loss": 2.558, "step": 145260 }, { "epoch": 27.89, "learning_rate": 0.001, "loss": 2.5546, "step": 145272 }, { "epoch": 27.9, "learning_rate": 0.001, "loss": 2.5451, "step": 145284 }, { "epoch": 27.9, "learning_rate": 0.001, "loss": 2.5605, "step": 145296 }, { "epoch": 27.9, "learning_rate": 0.001, "loss": 2.5536, "step": 145308 }, { "epoch": 27.9, "learning_rate": 0.001, "loss": 2.5607, "step": 145320 }, { "epoch": 27.91, "learning_rate": 0.001, "loss": 2.5505, "step": 145332 }, { "epoch": 27.91, "learning_rate": 0.001, "loss": 2.5539, "step": 145344 }, { "epoch": 27.91, "learning_rate": 0.001, "loss": 2.549, "step": 145356 }, { "epoch": 27.91, "learning_rate": 0.001, "loss": 2.5487, "step": 145368 }, { "epoch": 27.91, "learning_rate": 0.001, "loss": 2.5566, "step": 145380 }, { "epoch": 27.92, "learning_rate": 0.001, "loss": 2.5503, "step": 145392 }, { "epoch": 27.92, "learning_rate": 0.001, "loss": 2.544, "step": 145404 }, { "epoch": 27.92, "learning_rate": 0.001, "loss": 2.5478, "step": 145416 }, { "epoch": 27.92, "learning_rate": 0.001, "loss": 2.5549, "step": 145428 }, { "epoch": 27.93, "learning_rate": 0.001, "loss": 2.5468, "step": 145440 }, { "epoch": 27.93, "learning_rate": 0.001, "loss": 2.5576, "step": 145452 }, { "epoch": 27.93, "learning_rate": 0.001, "loss": 2.5525, "step": 145464 }, { "epoch": 27.93, "learning_rate": 0.001, "loss": 2.5562, "step": 145476 }, { "epoch": 27.94, "learning_rate": 0.001, "loss": 2.5503, "step": 145488 }, { "epoch": 27.94, "learning_rate": 0.001, "loss": 2.5526, "step": 145500 }, { "epoch": 27.94, "learning_rate": 0.001, "loss": 2.5619, "step": 145512 }, { "epoch": 27.94, "learning_rate": 0.001, "loss": 2.5492, "step": 145524 }, { "epoch": 27.94, "learning_rate": 0.001, "loss": 2.5502, "step": 145536 }, { "epoch": 27.95, "learning_rate": 0.001, "loss": 2.5514, "step": 145548 }, { "epoch": 27.95, "learning_rate": 0.001, "loss": 2.5534, "step": 145560 }, { "epoch": 27.95, "learning_rate": 0.001, "loss": 2.5483, "step": 145572 }, { "epoch": 27.95, "learning_rate": 0.001, "loss": 2.544, "step": 145584 }, { "epoch": 27.96, "learning_rate": 0.001, "loss": 2.5498, "step": 145596 }, { "epoch": 27.96, "learning_rate": 0.001, "loss": 2.5485, "step": 145608 }, { "epoch": 27.96, "learning_rate": 0.001, "loss": 2.5429, "step": 145620 }, { "epoch": 27.96, "eval_ag_news_accuracy": 0.323875, "eval_ag_news_bleu_score": 4.910617404866825, "eval_ag_news_bleu_score_sem": 0.1573348701568156, "eval_ag_news_emb_cos_sim": 0.8168294429779053, "eval_ag_news_emb_cos_sim_sem": 0.006366818181344549, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.532271146774292, "eval_ag_news_n_ngrams_match_1": 14.134, "eval_ag_news_n_ngrams_match_2": 3.24, "eval_ag_news_n_ngrams_match_3": 0.926, "eval_ag_news_num_pred_words": 46.722, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.20155622794208, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35125415506204893, "eval_ag_news_runtime": 10.3222, "eval_ag_news_samples_per_second": 48.439, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3545952564019915, "eval_ag_news_token_set_f1_sem": 0.004309820244551049, "eval_ag_news_token_set_precision": 0.34017337087885013, "eval_ag_news_token_set_recall": 0.3882196094332846, "eval_ag_news_true_num_tokens": 56.09375, "step": 145625 }, { "epoch": 27.96, "eval_anthropic_toxic_prompts_accuracy": 0.11434375, "eval_anthropic_toxic_prompts_bleu_score": 3.075192452458553, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11800515664346292, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6746152639389038, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009187364967579043, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2376859188079834, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.146, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7, "eval_anthropic_toxic_prompts_num_pred_words": 47.318, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.474702955568805, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21287619423670326, "eval_anthropic_toxic_prompts_runtime": 10.041, "eval_anthropic_toxic_prompts_samples_per_second": 49.796, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3576538157638634, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065782782685566655, "eval_anthropic_toxic_prompts_token_set_precision": 0.43505866930394155, "eval_anthropic_toxic_prompts_token_set_recall": 0.32891197715308995, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 145625 }, { "epoch": 27.96, "eval_arxiv_accuracy": 0.34896875, "eval_arxiv_bleu_score": 4.506671232722476, "eval_arxiv_bleu_score_sem": 0.12858575161834154, "eval_arxiv_emb_cos_sim": 0.7645954489707947, "eval_arxiv_emb_cos_sim_sem": 0.006850017970087281, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3764231204986572, "eval_arxiv_n_ngrams_match_1": 15.198, "eval_arxiv_n_ngrams_match_2": 3.082, "eval_arxiv_n_ngrams_match_3": 0.696, "eval_arxiv_num_pred_words": 40.662, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.265903066147143, "eval_arxiv_pred_num_tokens": 62.9140625, "eval_arxiv_rouge_score": 0.3612463677716442, "eval_arxiv_runtime": 10.4782, "eval_arxiv_samples_per_second": 47.718, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3600539066541921, "eval_arxiv_token_set_f1_sem": 0.004119097251032281, "eval_arxiv_token_set_precision": 0.308028916206553, "eval_arxiv_token_set_recall": 0.4500104806797574, "eval_arxiv_true_num_tokens": 64.0, "step": 145625 }, { "epoch": 27.96, "eval_python_code_alpaca_accuracy": 0.16025, "eval_python_code_alpaca_bleu_score": 4.658713891463431, "eval_python_code_alpaca_bleu_score_sem": 0.14818277100256289, "eval_python_code_alpaca_emb_cos_sim": 0.7642186284065247, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007257160477969565, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8961870670318604, "eval_python_code_alpaca_n_ngrams_match_1": 10.036, "eval_python_code_alpaca_n_ngrams_match_2": 2.868, "eval_python_code_alpaca_n_ngrams_match_3": 0.938, "eval_python_code_alpaca_num_pred_words": 43.378, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.104980515667588, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3390717072900312, "eval_python_code_alpaca_runtime": 10.0471, "eval_python_code_alpaca_samples_per_second": 49.766, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4754388621366275, "eval_python_code_alpaca_token_set_f1_sem": 0.005108182121670698, "eval_python_code_alpaca_token_set_precision": 0.5448649434657044, "eval_python_code_alpaca_token_set_recall": 0.4427471484073749, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 145625 }, { "epoch": 27.96, "eval_wikibio_accuracy": 0.32403125, "eval_wikibio_bleu_score": 5.8016650883714735, "eval_wikibio_bleu_score_sem": 0.20469397383022217, "eval_wikibio_emb_cos_sim": 0.7484632134437561, "eval_wikibio_emb_cos_sim_sem": 0.00851143488458558, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7147724628448486, "eval_wikibio_n_ngrams_match_1": 10.152, "eval_wikibio_n_ngrams_match_2": 3.31, "eval_wikibio_n_ngrams_match_3": 1.172, "eval_wikibio_num_pred_words": 36.812, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.049245793078754, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3549690485532082, "eval_wikibio_runtime": 9.9814, "eval_wikibio_samples_per_second": 50.093, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.32202872024201085, "eval_wikibio_token_set_f1_sem": 0.005184770073454325, "eval_wikibio_token_set_precision": 0.32994717393404416, "eval_wikibio_token_set_recall": 0.32988277249828724, "eval_wikibio_true_num_tokens": 61.1328125, "step": 145625 }, { "epoch": 27.96, "eval_nq_accuracy": 0.53121875, "eval_nq_bleu_score": 11.808683773069896, "eval_nq_bleu_score_sem": 0.4838169537347454, "eval_nq_emb_cos_sim": 0.8334431648254395, "eval_nq_emb_cos_sim_sem": 0.007487980328017509, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1664879322052, "eval_nq_n_ngrams_match_1": 23.118, "eval_nq_n_ngrams_match_2": 8.606, "eval_nq_n_ngrams_match_3": 3.964, "eval_nq_num_pred_words": 49.156, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.72757830529788, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4468582117136011, "eval_nq_runtime": 10.9956, "eval_nq_samples_per_second": 45.473, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.46168992517648466, "eval_nq_token_set_f1_sem": 0.005032645703840807, "eval_nq_token_set_precision": 0.4185919917686395, "eval_nq_token_set_recall": 0.525047861840401, "eval_nq_true_num_tokens": 64.0, "step": 145625 }, { "epoch": 27.96, "learning_rate": 0.001, "loss": 2.5481, "step": 145632 }, { "epoch": 27.97, "learning_rate": 0.001, "loss": 2.552, "step": 145644 }, { "epoch": 27.97, "learning_rate": 0.001, "loss": 2.5541, "step": 145656 }, { "epoch": 27.97, "learning_rate": 0.001, "loss": 2.547, "step": 145668 }, { "epoch": 27.97, "learning_rate": 0.001, "loss": 2.552, "step": 145680 }, { "epoch": 27.97, "learning_rate": 0.001, "loss": 2.5469, "step": 145692 }, { "epoch": 27.98, "learning_rate": 0.001, "loss": 2.5358, "step": 145704 }, { "epoch": 27.98, "learning_rate": 0.001, "loss": 2.5487, "step": 145716 }, { "epoch": 27.98, "learning_rate": 0.001, "loss": 2.5577, "step": 145728 }, { "epoch": 27.98, "learning_rate": 0.001, "loss": 2.5438, "step": 145740 }, { "epoch": 27.99, "learning_rate": 0.001, "loss": 2.5437, "step": 145752 }, { "epoch": 27.99, "learning_rate": 0.001, "loss": 2.5538, "step": 145764 }, { "epoch": 27.99, "learning_rate": 0.001, "loss": 2.5525, "step": 145776 }, { "epoch": 27.99, "learning_rate": 0.001, "loss": 2.5567, "step": 145788 }, { "epoch": 28.0, "learning_rate": 0.001, "loss": 2.5466, "step": 145800 }, { "epoch": 28.0, "learning_rate": 0.001, "loss": 2.5444, "step": 145812 }, { "epoch": 28.0, "learning_rate": 0.001, "loss": 2.542, "step": 145824 }, { "epoch": 28.0, "learning_rate": 0.001, "loss": 2.5376, "step": 145836 }, { "epoch": 28.0, "learning_rate": 0.001, "loss": 2.543, "step": 145848 }, { "epoch": 28.01, "learning_rate": 0.001, "loss": 2.5368, "step": 145860 }, { "epoch": 28.01, "learning_rate": 0.001, "loss": 2.5326, "step": 145872 }, { "epoch": 28.01, "learning_rate": 0.001, "loss": 2.5253, "step": 145884 }, { "epoch": 28.01, "learning_rate": 0.001, "loss": 2.5445, "step": 145896 }, { "epoch": 28.02, "learning_rate": 0.001, "loss": 2.5289, "step": 145908 }, { "epoch": 28.02, "learning_rate": 0.001, "loss": 2.5287, "step": 145920 }, { "epoch": 28.02, "learning_rate": 0.001, "loss": 2.5278, "step": 145932 }, { "epoch": 28.02, "learning_rate": 0.001, "loss": 2.5389, "step": 145944 }, { "epoch": 28.03, "learning_rate": 0.001, "loss": 2.5286, "step": 145956 }, { "epoch": 28.03, "learning_rate": 0.001, "loss": 2.5295, "step": 145968 }, { "epoch": 28.03, "learning_rate": 0.001, "loss": 2.5298, "step": 145980 }, { "epoch": 28.03, "learning_rate": 0.001, "loss": 2.5454, "step": 145992 }, { "epoch": 28.03, "learning_rate": 0.001, "loss": 2.5301, "step": 146004 }, { "epoch": 28.04, "learning_rate": 0.001, "loss": 2.5394, "step": 146016 }, { "epoch": 28.04, "learning_rate": 0.001, "loss": 2.5387, "step": 146028 }, { "epoch": 28.04, "learning_rate": 0.001, "loss": 2.5298, "step": 146040 }, { "epoch": 28.04, "learning_rate": 0.001, "loss": 2.5337, "step": 146052 }, { "epoch": 28.05, "learning_rate": 0.001, "loss": 2.5358, "step": 146064 }, { "epoch": 28.05, "learning_rate": 0.001, "loss": 2.5263, "step": 146076 }, { "epoch": 28.05, "learning_rate": 0.001, "loss": 2.5469, "step": 146088 }, { "epoch": 28.05, "learning_rate": 0.001, "loss": 2.5361, "step": 146100 }, { "epoch": 28.06, "learning_rate": 0.001, "loss": 2.5337, "step": 146112 }, { "epoch": 28.06, "learning_rate": 0.001, "loss": 2.5439, "step": 146124 }, { "epoch": 28.06, "learning_rate": 0.001, "loss": 2.5221, "step": 146136 }, { "epoch": 28.06, "learning_rate": 0.001, "loss": 2.531, "step": 146148 }, { "epoch": 28.06, "learning_rate": 0.001, "loss": 2.5382, "step": 146160 }, { "epoch": 28.07, "learning_rate": 0.001, "loss": 2.5395, "step": 146172 }, { "epoch": 28.07, "learning_rate": 0.001, "loss": 2.5294, "step": 146184 }, { "epoch": 28.07, "learning_rate": 0.001, "loss": 2.5306, "step": 146196 }, { "epoch": 28.07, "learning_rate": 0.001, "loss": 2.5396, "step": 146208 }, { "epoch": 28.08, "learning_rate": 0.001, "loss": 2.5389, "step": 146220 }, { "epoch": 28.08, "learning_rate": 0.001, "loss": 2.5365, "step": 146232 }, { "epoch": 28.08, "learning_rate": 0.001, "loss": 2.5433, "step": 146244 }, { "epoch": 28.08, "eval_ag_news_accuracy": 0.32346875, "eval_ag_news_bleu_score": 4.88933358549054, "eval_ag_news_bleu_score_sem": 0.1495385200295116, "eval_ag_news_emb_cos_sim": 0.8185994625091553, "eval_ag_news_emb_cos_sim_sem": 0.005962279558446829, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.529163122177124, "eval_ag_news_n_ngrams_match_1": 14.17, "eval_ag_news_n_ngrams_match_2": 3.152, "eval_ag_news_n_ngrams_match_3": 0.91, "eval_ag_news_num_pred_words": 46.824, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.09542196930388, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35328557903614377, "eval_ag_news_runtime": 10.2614, "eval_ag_news_samples_per_second": 48.726, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.35381324579829265, "eval_ag_news_token_set_f1_sem": 0.004506894948881247, "eval_ag_news_token_set_precision": 0.338694606243144, "eval_ag_news_token_set_recall": 0.38574194389262617, "eval_ag_news_true_num_tokens": 56.09375, "step": 146250 }, { "epoch": 28.08, "eval_anthropic_toxic_prompts_accuracy": 0.11525, "eval_anthropic_toxic_prompts_bleu_score": 3.1025764388193147, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11673357009430338, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.688366174697876, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00839087316587566, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.250960111618042, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.9, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.696, "eval_anthropic_toxic_prompts_num_pred_words": 46.728, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.815113412938306, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2157249287222241, "eval_anthropic_toxic_prompts_runtime": 10.0509, "eval_anthropic_toxic_prompts_samples_per_second": 49.747, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.3592458009222363, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00643331194361511, "eval_anthropic_toxic_prompts_token_set_precision": 0.44069401049663676, "eval_anthropic_toxic_prompts_token_set_recall": 0.3308920603658305, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 146250 }, { "epoch": 28.08, "eval_arxiv_accuracy": 0.349375, "eval_arxiv_bleu_score": 4.388583997874758, "eval_arxiv_bleu_score_sem": 0.123463011787757, "eval_arxiv_emb_cos_sim": 0.7765886187553406, "eval_arxiv_emb_cos_sim_sem": 0.006319269950779181, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3801686763763428, "eval_arxiv_n_ngrams_match_1": 15.38, "eval_arxiv_n_ngrams_match_2": 3.004, "eval_arxiv_n_ngrams_match_3": 0.626, "eval_arxiv_num_pred_words": 40.648, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.37572568637919, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3679883820724149, "eval_arxiv_runtime": 10.3047, "eval_arxiv_samples_per_second": 48.521, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.36123990977720516, "eval_arxiv_token_set_f1_sem": 0.004122756118564047, "eval_arxiv_token_set_precision": 0.31281050019102363, "eval_arxiv_token_set_recall": 0.440686418235979, "eval_arxiv_true_num_tokens": 64.0, "step": 146250 }, { "epoch": 28.08, "eval_python_code_alpaca_accuracy": 0.15978125, "eval_python_code_alpaca_bleu_score": 4.743050812663442, "eval_python_code_alpaca_bleu_score_sem": 0.14493420575119303, "eval_python_code_alpaca_emb_cos_sim": 0.7607376575469971, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008173871555238719, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.891065835952759, "eval_python_code_alpaca_n_ngrams_match_1": 9.95, "eval_python_code_alpaca_n_ngrams_match_2": 3.03, "eval_python_code_alpaca_n_ngrams_match_3": 1.046, "eval_python_code_alpaca_num_pred_words": 43.792, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.01249774172111, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33852505803455246, "eval_python_code_alpaca_runtime": 16.3432, "eval_python_code_alpaca_samples_per_second": 30.594, "eval_python_code_alpaca_steps_per_second": 0.061, "eval_python_code_alpaca_token_set_f1": 0.4800213088356204, "eval_python_code_alpaca_token_set_f1_sem": 0.005468617484744028, "eval_python_code_alpaca_token_set_precision": 0.5458710736376982, "eval_python_code_alpaca_token_set_recall": 0.4488237114317898, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 146250 }, { "epoch": 28.08, "eval_wikibio_accuracy": 0.327625, "eval_wikibio_bleu_score": 5.935470071713013, "eval_wikibio_bleu_score_sem": 0.20353583005875175, "eval_wikibio_emb_cos_sim": 0.7535380721092224, "eval_wikibio_emb_cos_sim_sem": 0.008793624498978716, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.669752836227417, "eval_wikibio_n_ngrams_match_1": 10.038, "eval_wikibio_n_ngrams_match_2": 3.384, "eval_wikibio_n_ngrams_match_3": 1.246, "eval_wikibio_num_pred_words": 35.97, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.24220541002032, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3543242286273901, "eval_wikibio_runtime": 10.1275, "eval_wikibio_samples_per_second": 49.371, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3184323590068187, "eval_wikibio_token_set_f1_sem": 0.005480312902743177, "eval_wikibio_token_set_precision": 0.3255292548701312, "eval_wikibio_token_set_recall": 0.331237265513698, "eval_wikibio_true_num_tokens": 61.1328125, "step": 146250 }, { "epoch": 28.08, "eval_nq_accuracy": 0.531375, "eval_nq_bleu_score": 11.423716282248757, "eval_nq_bleu_score_sem": 0.4814025057720387, "eval_nq_emb_cos_sim": 0.8324013352394104, "eval_nq_emb_cos_sim_sem": 0.006954593520865921, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.164207696914673, "eval_nq_n_ngrams_match_1": 23.062, "eval_nq_n_ngrams_match_2": 8.38, "eval_nq_n_ngrams_match_3": 3.776, "eval_nq_num_pred_words": 48.936, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.70770004541319, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44874031225443733, "eval_nq_runtime": 10.539, "eval_nq_samples_per_second": 47.443, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4615451423733168, "eval_nq_token_set_f1_sem": 0.004997725983139397, "eval_nq_token_set_precision": 0.41974679645714585, "eval_nq_token_set_recall": 0.5226533822086087, "eval_nq_true_num_tokens": 64.0, "step": 146250 }, { "epoch": 28.08, "learning_rate": 0.001, "loss": 2.53, "step": 146256 }, { "epoch": 28.09, "learning_rate": 0.001, "loss": 2.5372, "step": 146268 }, { "epoch": 28.09, "learning_rate": 0.001, "loss": 2.5275, "step": 146280 }, { "epoch": 28.09, "learning_rate": 0.001, "loss": 2.5271, "step": 146292 }, { "epoch": 28.09, "learning_rate": 0.001, "loss": 2.5388, "step": 146304 }, { "epoch": 28.09, "learning_rate": 0.001, "loss": 2.5279, "step": 146316 }, { "epoch": 28.1, "learning_rate": 0.001, "loss": 2.5382, "step": 146328 }, { "epoch": 28.1, "learning_rate": 0.001, "loss": 2.5349, "step": 146340 }, { "epoch": 28.1, "learning_rate": 0.001, "loss": 2.5375, "step": 146352 }, { "epoch": 28.1, "learning_rate": 0.001, "loss": 2.5324, "step": 146364 }, { "epoch": 28.11, "learning_rate": 0.001, "loss": 2.5394, "step": 146376 }, { "epoch": 28.11, "learning_rate": 0.001, "loss": 2.549, "step": 146388 }, { "epoch": 28.11, "learning_rate": 0.001, "loss": 2.5452, "step": 146400 }, { "epoch": 28.11, "learning_rate": 0.001, "loss": 2.5375, "step": 146412 }, { "epoch": 28.12, "learning_rate": 0.001, "loss": 2.5402, "step": 146424 }, { "epoch": 28.12, "learning_rate": 0.001, "loss": 2.5434, "step": 146436 }, { "epoch": 28.12, "learning_rate": 0.001, "loss": 2.5311, "step": 146448 }, { "epoch": 28.12, "learning_rate": 0.001, "loss": 2.5248, "step": 146460 }, { "epoch": 28.12, "learning_rate": 0.001, "loss": 2.5379, "step": 146472 }, { "epoch": 28.13, "learning_rate": 0.001, "loss": 2.5291, "step": 146484 }, { "epoch": 28.13, "learning_rate": 0.001, "loss": 2.5298, "step": 146496 }, { "epoch": 28.13, "learning_rate": 0.001, "loss": 2.5357, "step": 146508 }, { "epoch": 28.13, "learning_rate": 0.001, "loss": 2.5321, "step": 146520 }, { "epoch": 28.14, "learning_rate": 0.001, "loss": 2.543, "step": 146532 }, { "epoch": 28.14, "learning_rate": 0.001, "loss": 2.5471, "step": 146544 }, { "epoch": 28.14, "learning_rate": 0.001, "loss": 2.5394, "step": 146556 }, { "epoch": 28.14, "learning_rate": 0.001, "loss": 2.5376, "step": 146568 }, { "epoch": 28.15, "learning_rate": 0.001, "loss": 2.547, "step": 146580 }, { "epoch": 28.15, "learning_rate": 0.001, "loss": 2.5304, "step": 146592 }, { "epoch": 28.15, "learning_rate": 0.001, "loss": 2.5442, "step": 146604 }, { "epoch": 28.15, "learning_rate": 0.001, "loss": 2.5446, "step": 146616 }, { "epoch": 28.15, "learning_rate": 0.001, "loss": 2.5458, "step": 146628 }, { "epoch": 28.16, "learning_rate": 0.001, "loss": 2.5415, "step": 146640 }, { "epoch": 28.16, "learning_rate": 0.001, "loss": 2.5301, "step": 146652 }, { "epoch": 28.16, "learning_rate": 0.001, "loss": 2.5321, "step": 146664 }, { "epoch": 28.16, "learning_rate": 0.001, "loss": 2.5389, "step": 146676 }, { "epoch": 28.17, "learning_rate": 0.001, "loss": 2.5379, "step": 146688 }, { "epoch": 28.17, "learning_rate": 0.001, "loss": 2.5372, "step": 146700 }, { "epoch": 28.17, "learning_rate": 0.001, "loss": 2.5258, "step": 146712 }, { "epoch": 28.17, "learning_rate": 0.001, "loss": 2.538, "step": 146724 }, { "epoch": 28.18, "learning_rate": 0.001, "loss": 2.5345, "step": 146736 }, { "epoch": 28.18, "learning_rate": 0.001, "loss": 2.54, "step": 146748 }, { "epoch": 28.18, "learning_rate": 0.001, "loss": 2.5353, "step": 146760 }, { "epoch": 28.18, "learning_rate": 0.001, "loss": 2.5389, "step": 146772 }, { "epoch": 28.18, "learning_rate": 0.001, "loss": 2.5454, "step": 146784 }, { "epoch": 28.19, "learning_rate": 0.001, "loss": 2.5266, "step": 146796 }, { "epoch": 28.19, "learning_rate": 0.001, "loss": 2.5461, "step": 146808 }, { "epoch": 28.19, "learning_rate": 0.001, "loss": 2.5432, "step": 146820 }, { "epoch": 28.19, "learning_rate": 0.001, "loss": 2.5368, "step": 146832 }, { "epoch": 28.2, "learning_rate": 0.001, "loss": 2.5422, "step": 146844 }, { "epoch": 28.2, "learning_rate": 0.001, "loss": 2.5406, "step": 146856 }, { "epoch": 28.2, "learning_rate": 0.001, "loss": 2.5384, "step": 146868 }, { "epoch": 28.2, "eval_ag_news_accuracy": 0.32515625, "eval_ag_news_bleu_score": 4.727902891925582, "eval_ag_news_bleu_score_sem": 0.13861072140099853, "eval_ag_news_emb_cos_sim": 0.8126072883605957, "eval_ag_news_emb_cos_sim_sem": 0.007029657058063824, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.525362730026245, "eval_ag_news_n_ngrams_match_1": 14.072, "eval_ag_news_n_ngrams_match_2": 3.024, "eval_ag_news_n_ngrams_match_3": 0.85, "eval_ag_news_num_pred_words": 46.598, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.966091903413734, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35073214171794964, "eval_ag_news_runtime": 10.7626, "eval_ag_news_samples_per_second": 46.457, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.35152040309285, "eval_ag_news_token_set_f1_sem": 0.0041023464680715, "eval_ag_news_token_set_precision": 0.33792657206851634, "eval_ag_news_token_set_recall": 0.38003136150867717, "eval_ag_news_true_num_tokens": 56.09375, "step": 146875 }, { "epoch": 28.2, "eval_anthropic_toxic_prompts_accuracy": 0.11578125, "eval_anthropic_toxic_prompts_bleu_score": 3.070931347439393, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1132417458825351, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6800554394721985, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008519421712378395, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2209484577178955, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.27, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.892, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.694, "eval_anthropic_toxic_prompts_num_pred_words": 47.286, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.05186955594668, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21533283902667258, "eval_anthropic_toxic_prompts_runtime": 10.4803, "eval_anthropic_toxic_prompts_samples_per_second": 47.709, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.359983194227909, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006546728399224668, "eval_anthropic_toxic_prompts_token_set_precision": 0.44333651063025303, "eval_anthropic_toxic_prompts_token_set_recall": 0.32925044382815694, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 146875 }, { "epoch": 28.2, "eval_arxiv_accuracy": 0.35015625, "eval_arxiv_bleu_score": 4.2366391027743555, "eval_arxiv_bleu_score_sem": 0.11839382999027176, "eval_arxiv_emb_cos_sim": 0.7724995017051697, "eval_arxiv_emb_cos_sim_sem": 0.007262037000051178, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.378502130508423, "eval_arxiv_n_ngrams_match_1": 15.242, "eval_arxiv_n_ngrams_match_2": 2.908, "eval_arxiv_n_ngrams_match_3": 0.62, "eval_arxiv_num_pred_words": 40.18, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.32681046317229, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3657916958413393, "eval_arxiv_runtime": 11.1698, "eval_arxiv_samples_per_second": 44.764, "eval_arxiv_steps_per_second": 0.09, "eval_arxiv_token_set_f1": 0.3607956551808795, "eval_arxiv_token_set_f1_sem": 0.0042121987330928585, "eval_arxiv_token_set_precision": 0.31093707825153855, "eval_arxiv_token_set_recall": 0.4471758994531477, "eval_arxiv_true_num_tokens": 64.0, "step": 146875 }, { "epoch": 28.2, "eval_python_code_alpaca_accuracy": 0.161125, "eval_python_code_alpaca_bleu_score": 4.484428089151906, "eval_python_code_alpaca_bleu_score_sem": 0.139098361350417, "eval_python_code_alpaca_emb_cos_sim": 0.7598724365234375, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008038805393740479, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.881675958633423, "eval_python_code_alpaca_n_ngrams_match_1": 9.95, "eval_python_code_alpaca_n_ngrams_match_2": 2.842, "eval_python_code_alpaca_n_ngrams_match_3": 0.908, "eval_python_code_alpaca_num_pred_words": 43.848, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.844154197226203, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33786432646369235, "eval_python_code_alpaca_runtime": 9.8973, "eval_python_code_alpaca_samples_per_second": 50.519, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.47817807628207076, "eval_python_code_alpaca_token_set_f1_sem": 0.005347730504199243, "eval_python_code_alpaca_token_set_precision": 0.5428783909201385, "eval_python_code_alpaca_token_set_recall": 0.4472716463987637, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 146875 }, { "epoch": 28.2, "eval_wikibio_accuracy": 0.32734375, "eval_wikibio_bleu_score": 6.081810368842, "eval_wikibio_bleu_score_sem": 0.2254834839098503, "eval_wikibio_emb_cos_sim": 0.7580106258392334, "eval_wikibio_emb_cos_sim_sem": 0.007873820896700075, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6655948162078857, "eval_wikibio_n_ngrams_match_1": 10.034, "eval_wikibio_n_ngrams_match_2": 3.394, "eval_wikibio_n_ngrams_match_3": 1.258, "eval_wikibio_num_pred_words": 35.724, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.079374296430586, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3571696117970976, "eval_wikibio_runtime": 9.9333, "eval_wikibio_samples_per_second": 50.336, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.3173829153408167, "eval_wikibio_token_set_f1_sem": 0.005628348895596099, "eval_wikibio_token_set_precision": 0.3238723275626845, "eval_wikibio_token_set_recall": 0.32692515325967575, "eval_wikibio_true_num_tokens": 61.1328125, "step": 146875 }, { "epoch": 28.2, "eval_nq_accuracy": 0.53075, "eval_nq_bleu_score": 11.848784726225416, "eval_nq_bleu_score_sem": 0.49204298155525433, "eval_nq_emb_cos_sim": 0.8347103595733643, "eval_nq_emb_cos_sim_sem": 0.007278560540300972, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1631009578704834, "eval_nq_n_ngrams_match_1": 23.256, "eval_nq_n_ngrams_match_2": 8.48, "eval_nq_n_ngrams_match_3": 3.942, "eval_nq_num_pred_words": 49.258, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.698068224726995, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4507502952551629, "eval_nq_runtime": 10.5148, "eval_nq_samples_per_second": 47.552, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4648648082013177, "eval_nq_token_set_f1_sem": 0.004916229352266828, "eval_nq_token_set_precision": 0.42294593970219174, "eval_nq_token_set_recall": 0.5240538609663946, "eval_nq_true_num_tokens": 64.0, "step": 146875 }, { "epoch": 28.2, "learning_rate": 0.001, "loss": 2.5305, "step": 146880 }, { "epoch": 28.21, "learning_rate": 0.001, "loss": 2.5389, "step": 146892 }, { "epoch": 28.21, "learning_rate": 0.001, "loss": 2.5447, "step": 146904 }, { "epoch": 28.21, "learning_rate": 0.001, "loss": 2.5401, "step": 146916 }, { "epoch": 28.21, "learning_rate": 0.001, "loss": 2.5379, "step": 146928 }, { "epoch": 28.21, "learning_rate": 0.001, "loss": 2.537, "step": 146940 }, { "epoch": 28.22, "learning_rate": 0.001, "loss": 2.5296, "step": 146952 }, { "epoch": 28.22, "learning_rate": 0.001, "loss": 2.5388, "step": 146964 }, { "epoch": 28.22, "learning_rate": 0.001, "loss": 2.541, "step": 146976 }, { "epoch": 28.22, "learning_rate": 0.001, "loss": 2.5427, "step": 146988 }, { "epoch": 28.23, "learning_rate": 0.001, "loss": 2.5379, "step": 147000 }, { "epoch": 28.23, "learning_rate": 0.001, "loss": 2.535, "step": 147012 }, { "epoch": 28.23, "learning_rate": 0.001, "loss": 2.5426, "step": 147024 }, { "epoch": 28.23, "learning_rate": 0.001, "loss": 2.5405, "step": 147036 }, { "epoch": 28.24, "learning_rate": 0.001, "loss": 2.542, "step": 147048 }, { "epoch": 28.24, "learning_rate": 0.001, "loss": 2.5392, "step": 147060 }, { "epoch": 28.24, "learning_rate": 0.001, "loss": 2.5337, "step": 147072 }, { "epoch": 28.24, "learning_rate": 0.001, "loss": 2.5346, "step": 147084 }, { "epoch": 28.24, "learning_rate": 0.001, "loss": 2.5416, "step": 147096 }, { "epoch": 28.25, "learning_rate": 0.001, "loss": 2.5402, "step": 147108 }, { "epoch": 28.25, "learning_rate": 0.001, "loss": 2.5394, "step": 147120 }, { "epoch": 28.25, "learning_rate": 0.001, "loss": 2.5312, "step": 147132 }, { "epoch": 28.25, "learning_rate": 0.001, "loss": 2.5417, "step": 147144 }, { "epoch": 28.26, "learning_rate": 0.001, "loss": 2.5439, "step": 147156 }, { "epoch": 28.26, "learning_rate": 0.001, "loss": 2.5317, "step": 147168 }, { "epoch": 28.26, "learning_rate": 0.001, "loss": 2.5401, "step": 147180 }, { "epoch": 28.26, "learning_rate": 0.001, "loss": 2.5455, "step": 147192 }, { "epoch": 28.26, "learning_rate": 0.001, "loss": 2.5398, "step": 147204 }, { "epoch": 28.27, "learning_rate": 0.001, "loss": 2.5279, "step": 147216 }, { "epoch": 28.27, "learning_rate": 0.001, "loss": 2.5394, "step": 147228 }, { "epoch": 28.27, "learning_rate": 0.001, "loss": 2.5392, "step": 147240 }, { "epoch": 28.27, "learning_rate": 0.001, "loss": 2.5431, "step": 147252 }, { "epoch": 28.28, "learning_rate": 0.001, "loss": 2.5358, "step": 147264 }, { "epoch": 28.28, "learning_rate": 0.001, "loss": 2.5477, "step": 147276 }, { "epoch": 28.28, "learning_rate": 0.001, "loss": 2.5349, "step": 147288 }, { "epoch": 28.28, "learning_rate": 0.001, "loss": 2.5373, "step": 147300 }, { "epoch": 28.29, "learning_rate": 0.001, "loss": 2.548, "step": 147312 }, { "epoch": 28.29, "learning_rate": 0.001, "loss": 2.5362, "step": 147324 }, { "epoch": 28.29, "learning_rate": 0.001, "loss": 2.5404, "step": 147336 }, { "epoch": 28.29, "learning_rate": 0.001, "loss": 2.5391, "step": 147348 }, { "epoch": 28.29, "learning_rate": 0.001, "loss": 2.536, "step": 147360 }, { "epoch": 28.3, "learning_rate": 0.001, "loss": 2.5408, "step": 147372 }, { "epoch": 28.3, "learning_rate": 0.001, "loss": 2.5478, "step": 147384 }, { "epoch": 28.3, "learning_rate": 0.001, "loss": 2.5416, "step": 147396 }, { "epoch": 28.3, "learning_rate": 0.001, "loss": 2.5438, "step": 147408 }, { "epoch": 28.31, "learning_rate": 0.001, "loss": 2.5457, "step": 147420 }, { "epoch": 28.31, "learning_rate": 0.001, "loss": 2.5439, "step": 147432 }, { "epoch": 28.31, "learning_rate": 0.001, "loss": 2.5456, "step": 147444 }, { "epoch": 28.31, "learning_rate": 0.001, "loss": 2.5409, "step": 147456 }, { "epoch": 28.32, "learning_rate": 0.001, "loss": 2.5323, "step": 147468 }, { "epoch": 28.32, "learning_rate": 0.001, "loss": 2.5328, "step": 147480 }, { "epoch": 28.32, "learning_rate": 0.001, "loss": 2.5508, "step": 147492 }, { "epoch": 28.32, "eval_ag_news_accuracy": 0.32496875, "eval_ag_news_bleu_score": 5.058627120325173, "eval_ag_news_bleu_score_sem": 0.161816408985397, "eval_ag_news_emb_cos_sim": 0.8181792497634888, "eval_ag_news_emb_cos_sim_sem": 0.005908350690143616, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5404701232910156, "eval_ag_news_n_ngrams_match_1": 14.55, "eval_ag_news_n_ngrams_match_2": 3.222, "eval_ag_news_n_ngrams_match_3": 0.952, "eval_ag_news_num_pred_words": 47.02, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.483126701804245, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3596947141738803, "eval_ag_news_runtime": 10.327, "eval_ag_news_samples_per_second": 48.417, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.36122328818709387, "eval_ag_news_token_set_f1_sem": 0.004252778338673336, "eval_ag_news_token_set_precision": 0.3481848570783673, "eval_ag_news_token_set_recall": 0.3883082182604708, "eval_ag_news_true_num_tokens": 56.09375, "step": 147500 }, { "epoch": 28.32, "eval_anthropic_toxic_prompts_accuracy": 0.11565625, "eval_anthropic_toxic_prompts_bleu_score": 3.2617294868236386, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12156621925491215, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6849709749221802, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008578819134275896, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2211756706237793, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.338, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.008, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.79, "eval_anthropic_toxic_prompts_num_pred_words": 47.082, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.0575623107355, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22034475016228383, "eval_anthropic_toxic_prompts_runtime": 10.3566, "eval_anthropic_toxic_prompts_samples_per_second": 48.278, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3636098196278902, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006576000253160794, "eval_anthropic_toxic_prompts_token_set_precision": 0.44755966639836103, "eval_anthropic_toxic_prompts_token_set_recall": 0.3314902285564594, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 147500 }, { "epoch": 28.32, "eval_arxiv_accuracy": 0.34671875, "eval_arxiv_bleu_score": 4.491195571076645, "eval_arxiv_bleu_score_sem": 0.13996277436818558, "eval_arxiv_emb_cos_sim": 0.7671834230422974, "eval_arxiv_emb_cos_sim_sem": 0.007037962621398501, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.393876314163208, "eval_arxiv_n_ngrams_match_1": 15.268, "eval_arxiv_n_ngrams_match_2": 3.05, "eval_arxiv_n_ngrams_match_3": 0.72, "eval_arxiv_num_pred_words": 41.004, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.781169987083175, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.364461733862252, "eval_arxiv_runtime": 10.216, "eval_arxiv_samples_per_second": 48.943, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.35881319233874137, "eval_arxiv_token_set_f1_sem": 0.004121462831889142, "eval_arxiv_token_set_precision": 0.31067587081492953, "eval_arxiv_token_set_recall": 0.44142980851469676, "eval_arxiv_true_num_tokens": 64.0, "step": 147500 }, { "epoch": 28.32, "eval_python_code_alpaca_accuracy": 0.160375, "eval_python_code_alpaca_bleu_score": 4.6133436284216325, "eval_python_code_alpaca_bleu_score_sem": 0.14620112970310353, "eval_python_code_alpaca_emb_cos_sim": 0.7637086510658264, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007533973076455021, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8811099529266357, "eval_python_code_alpaca_n_ngrams_match_1": 10.016, "eval_python_code_alpaca_n_ngrams_match_2": 2.938, "eval_python_code_alpaca_n_ngrams_match_3": 0.972, "eval_python_code_alpaca_num_pred_words": 43.944, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.83405716187716, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33797355622952535, "eval_python_code_alpaca_runtime": 9.9459, "eval_python_code_alpaca_samples_per_second": 50.272, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.478446919358761, "eval_python_code_alpaca_token_set_f1_sem": 0.005270713999190445, "eval_python_code_alpaca_token_set_precision": 0.5429764903453214, "eval_python_code_alpaca_token_set_recall": 0.44789353173329033, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 147500 }, { "epoch": 28.32, "eval_wikibio_accuracy": 0.31890625, "eval_wikibio_bleu_score": 6.062976786744554, "eval_wikibio_bleu_score_sem": 0.2172282879443045, "eval_wikibio_emb_cos_sim": 0.7468626499176025, "eval_wikibio_emb_cos_sim_sem": 0.009506253291768532, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.749068260192871, "eval_wikibio_n_ngrams_match_1": 10.34, "eval_wikibio_n_ngrams_match_2": 3.472, "eval_wikibio_n_ngrams_match_3": 1.292, "eval_wikibio_num_pred_words": 36.772, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.48148186669631, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3615766848430457, "eval_wikibio_runtime": 9.8401, "eval_wikibio_samples_per_second": 50.813, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.3238601821085116, "eval_wikibio_token_set_f1_sem": 0.005327155933330291, "eval_wikibio_token_set_precision": 0.33480334333418027, "eval_wikibio_token_set_recall": 0.32762355793268383, "eval_wikibio_true_num_tokens": 61.1328125, "step": 147500 }, { "epoch": 28.32, "eval_nq_accuracy": 0.52946875, "eval_nq_bleu_score": 11.701758716992579, "eval_nq_bleu_score_sem": 0.4791316807894547, "eval_nq_emb_cos_sim": 0.8338937759399414, "eval_nq_emb_cos_sim_sem": 0.0072380205403895555, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1693382263183594, "eval_nq_n_ngrams_match_1": 23.416, "eval_nq_n_ngrams_match_2": 8.568, "eval_nq_n_ngrams_match_3": 3.916, "eval_nq_num_pred_words": 49.158, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.752489956264105, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4502296560816097, "eval_nq_runtime": 10.6182, "eval_nq_samples_per_second": 47.089, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.4661927852454617, "eval_nq_token_set_f1_sem": 0.004862939505677727, "eval_nq_token_set_precision": 0.42371133283314993, "eval_nq_token_set_recall": 0.5258215610804338, "eval_nq_true_num_tokens": 64.0, "step": 147500 }, { "epoch": 28.32, "learning_rate": 0.001, "loss": 2.5327, "step": 147504 }, { "epoch": 28.32, "learning_rate": 0.001, "loss": 2.5434, "step": 147516 }, { "epoch": 28.33, "learning_rate": 0.001, "loss": 2.5489, "step": 147528 }, { "epoch": 28.33, "learning_rate": 0.001, "loss": 2.5376, "step": 147540 }, { "epoch": 28.33, "learning_rate": 0.001, "loss": 2.5326, "step": 147552 }, { "epoch": 28.33, "learning_rate": 0.001, "loss": 2.5401, "step": 147564 }, { "epoch": 28.34, "learning_rate": 0.001, "loss": 2.5413, "step": 147576 }, { "epoch": 28.34, "learning_rate": 0.001, "loss": 2.5393, "step": 147588 }, { "epoch": 28.34, "learning_rate": 0.001, "loss": 2.5355, "step": 147600 }, { "epoch": 28.34, "learning_rate": 0.001, "loss": 2.5463, "step": 147612 }, { "epoch": 28.35, "learning_rate": 0.001, "loss": 2.5451, "step": 147624 }, { "epoch": 28.35, "learning_rate": 0.001, "loss": 2.5466, "step": 147636 }, { "epoch": 28.35, "learning_rate": 0.001, "loss": 2.5499, "step": 147648 }, { "epoch": 28.35, "learning_rate": 0.001, "loss": 2.5414, "step": 147660 }, { "epoch": 28.35, "learning_rate": 0.001, "loss": 2.5481, "step": 147672 }, { "epoch": 28.36, "learning_rate": 0.001, "loss": 2.543, "step": 147684 }, { "epoch": 28.36, "learning_rate": 0.001, "loss": 2.5489, "step": 147696 }, { "epoch": 28.36, "learning_rate": 0.001, "loss": 2.5492, "step": 147708 }, { "epoch": 28.36, "learning_rate": 0.001, "loss": 2.5478, "step": 147720 }, { "epoch": 28.37, "learning_rate": 0.001, "loss": 2.5397, "step": 147732 }, { "epoch": 28.37, "learning_rate": 0.001, "loss": 2.5483, "step": 147744 }, { "epoch": 28.37, "learning_rate": 0.001, "loss": 2.5402, "step": 147756 }, { "epoch": 28.37, "learning_rate": 0.001, "loss": 2.553, "step": 147768 }, { "epoch": 28.38, "learning_rate": 0.001, "loss": 2.542, "step": 147780 }, { "epoch": 28.38, "learning_rate": 0.001, "loss": 2.5446, "step": 147792 }, { "epoch": 28.38, "learning_rate": 0.001, "loss": 2.5512, "step": 147804 }, { "epoch": 28.38, "learning_rate": 0.001, "loss": 2.5449, "step": 147816 }, { "epoch": 28.38, "learning_rate": 0.001, "loss": 2.5411, "step": 147828 }, { "epoch": 28.39, "learning_rate": 0.001, "loss": 2.5535, "step": 147840 }, { "epoch": 28.39, "learning_rate": 0.001, "loss": 2.5429, "step": 147852 }, { "epoch": 28.39, "learning_rate": 0.001, "loss": 2.5491, "step": 147864 }, { "epoch": 28.39, "learning_rate": 0.001, "loss": 2.5379, "step": 147876 }, { "epoch": 28.4, "learning_rate": 0.001, "loss": 2.5465, "step": 147888 }, { "epoch": 28.4, "learning_rate": 0.001, "loss": 2.5456, "step": 147900 }, { "epoch": 28.4, "learning_rate": 0.001, "loss": 2.5445, "step": 147912 }, { "epoch": 28.4, "learning_rate": 0.001, "loss": 2.5362, "step": 147924 }, { "epoch": 28.41, "learning_rate": 0.001, "loss": 2.5347, "step": 147936 }, { "epoch": 28.41, "learning_rate": 0.001, "loss": 2.5442, "step": 147948 }, { "epoch": 28.41, "learning_rate": 0.001, "loss": 2.5378, "step": 147960 }, { "epoch": 28.41, "learning_rate": 0.001, "loss": 2.5426, "step": 147972 }, { "epoch": 28.41, "learning_rate": 0.001, "loss": 2.5436, "step": 147984 }, { "epoch": 28.42, "learning_rate": 0.001, "loss": 2.537, "step": 147996 }, { "epoch": 28.42, "learning_rate": 0.001, "loss": 2.5419, "step": 148008 }, { "epoch": 28.42, "learning_rate": 0.001, "loss": 2.5344, "step": 148020 }, { "epoch": 28.42, "learning_rate": 0.001, "loss": 2.5437, "step": 148032 }, { "epoch": 28.43, "learning_rate": 0.001, "loss": 2.5447, "step": 148044 }, { "epoch": 28.43, "learning_rate": 0.001, "loss": 2.5472, "step": 148056 }, { "epoch": 28.43, "learning_rate": 0.001, "loss": 2.5486, "step": 148068 }, { "epoch": 28.43, "learning_rate": 0.001, "loss": 2.5423, "step": 148080 }, { "epoch": 28.44, "learning_rate": 0.001, "loss": 2.543, "step": 148092 }, { "epoch": 28.44, "learning_rate": 0.001, "loss": 2.5481, "step": 148104 }, { "epoch": 28.44, "learning_rate": 0.001, "loss": 2.5514, "step": 148116 }, { "epoch": 28.44, "eval_ag_news_accuracy": 0.32521875, "eval_ag_news_bleu_score": 4.919500148084001, "eval_ag_news_bleu_score_sem": 0.15261339154140596, "eval_ag_news_emb_cos_sim": 0.8202201128005981, "eval_ag_news_emb_cos_sim_sem": 0.007474579605824569, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5227763652801514, "eval_ag_news_n_ngrams_match_1": 14.246, "eval_ag_news_n_ngrams_match_2": 3.286, "eval_ag_news_n_ngrams_match_3": 0.95, "eval_ag_news_num_pred_words": 46.996, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.87835670726844, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35314929493138925, "eval_ag_news_runtime": 10.5201, "eval_ag_news_samples_per_second": 47.528, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.35612438703738875, "eval_ag_news_token_set_f1_sem": 0.004623295864016999, "eval_ag_news_token_set_precision": 0.3404142533605893, "eval_ag_news_token_set_recall": 0.391822754950611, "eval_ag_news_true_num_tokens": 56.09375, "step": 148125 }, { "epoch": 28.44, "eval_anthropic_toxic_prompts_accuracy": 0.115625, "eval_anthropic_toxic_prompts_bleu_score": 2.975170619532323, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1126888361912147, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6649253368377686, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009802338195280102, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2226529121398926, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.172, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.862, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.668, "eval_anthropic_toxic_prompts_num_pred_words": 47.712, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.094605736380164, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2105401992924088, "eval_anthropic_toxic_prompts_runtime": 11.181, "eval_anthropic_toxic_prompts_samples_per_second": 44.719, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.3511539435015556, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006604996312961682, "eval_anthropic_toxic_prompts_token_set_precision": 0.43285931990358306, "eval_anthropic_toxic_prompts_token_set_recall": 0.32560668529177567, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 148125 }, { "epoch": 28.44, "eval_arxiv_accuracy": 0.35065625, "eval_arxiv_bleu_score": 4.563628620853197, "eval_arxiv_bleu_score_sem": 0.1373218933249007, "eval_arxiv_emb_cos_sim": 0.7701166868209839, "eval_arxiv_emb_cos_sim_sem": 0.007064328109320098, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.383981466293335, "eval_arxiv_n_ngrams_match_1": 15.32, "eval_arxiv_n_ngrams_match_2": 3.11, "eval_arxiv_n_ngrams_match_3": 0.732, "eval_arxiv_num_pred_words": 40.696, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.487942951563884, "eval_arxiv_pred_num_tokens": 62.9609375, "eval_arxiv_rouge_score": 0.36804509052626416, "eval_arxiv_runtime": 10.4142, "eval_arxiv_samples_per_second": 48.011, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.3592781073925174, "eval_arxiv_token_set_f1_sem": 0.004238921442030327, "eval_arxiv_token_set_precision": 0.3095947030549729, "eval_arxiv_token_set_recall": 0.4413900551762349, "eval_arxiv_true_num_tokens": 64.0, "step": 148125 }, { "epoch": 28.44, "eval_python_code_alpaca_accuracy": 0.16259375, "eval_python_code_alpaca_bleu_score": 4.484850193966426, "eval_python_code_alpaca_bleu_score_sem": 0.13808648249959496, "eval_python_code_alpaca_emb_cos_sim": 0.7501258850097656, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010587072343337962, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.891721248626709, "eval_python_code_alpaca_n_ngrams_match_1": 9.844, "eval_python_code_alpaca_n_ngrams_match_2": 2.808, "eval_python_code_alpaca_n_ngrams_match_3": 0.93, "eval_python_code_alpaca_num_pred_words": 43.712, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.02430723065215, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33241039651790105, "eval_python_code_alpaca_runtime": 11.3281, "eval_python_code_alpaca_samples_per_second": 44.138, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.4681634642353719, "eval_python_code_alpaca_token_set_f1_sem": 0.00567608342590527, "eval_python_code_alpaca_token_set_precision": 0.539047968387537, "eval_python_code_alpaca_token_set_recall": 0.43849770529127396, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 148125 }, { "epoch": 28.44, "eval_wikibio_accuracy": 0.32546875, "eval_wikibio_bleu_score": 5.936747296165914, "eval_wikibio_bleu_score_sem": 0.20803172398243366, "eval_wikibio_emb_cos_sim": 0.7442708015441895, "eval_wikibio_emb_cos_sim_sem": 0.009910900574510068, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6966350078582764, "eval_wikibio_n_ngrams_match_1": 10.166, "eval_wikibio_n_ngrams_match_2": 3.476, "eval_wikibio_n_ngrams_match_3": 1.278, "eval_wikibio_num_pred_words": 36.714, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.31142823799574, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35653502731892006, "eval_wikibio_runtime": 10.1759, "eval_wikibio_samples_per_second": 49.136, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.32140197713135743, "eval_wikibio_token_set_f1_sem": 0.005499881518304417, "eval_wikibio_token_set_precision": 0.32982016300060796, "eval_wikibio_token_set_recall": 0.3313464747933582, "eval_wikibio_true_num_tokens": 61.1328125, "step": 148125 }, { "epoch": 28.44, "eval_nq_accuracy": 0.530375, "eval_nq_bleu_score": 11.866695437786367, "eval_nq_bleu_score_sem": 0.48121408499178814, "eval_nq_emb_cos_sim": 0.8286600708961487, "eval_nq_emb_cos_sim_sem": 0.007795574613895093, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.168030023574829, "eval_nq_n_ngrams_match_1": 23.178, "eval_nq_n_ngrams_match_2": 8.496, "eval_nq_n_ngrams_match_3": 3.924, "eval_nq_num_pred_words": 49.13, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.741047411106974, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4485457505447752, "eval_nq_runtime": 10.858, "eval_nq_samples_per_second": 46.049, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.462470080542473, "eval_nq_token_set_f1_sem": 0.005146965443933411, "eval_nq_token_set_precision": 0.421093461301019, "eval_nq_token_set_recall": 0.5228317180517345, "eval_nq_true_num_tokens": 64.0, "step": 148125 }, { "epoch": 28.44, "learning_rate": 0.001, "loss": 2.5368, "step": 148128 }, { "epoch": 28.44, "learning_rate": 0.001, "loss": 2.5323, "step": 148140 }, { "epoch": 28.45, "learning_rate": 0.001, "loss": 2.5416, "step": 148152 }, { "epoch": 28.45, "learning_rate": 0.001, "loss": 2.5441, "step": 148164 }, { "epoch": 28.45, "learning_rate": 0.001, "loss": 2.5449, "step": 148176 }, { "epoch": 28.45, "learning_rate": 0.001, "loss": 2.5378, "step": 148188 }, { "epoch": 28.46, "learning_rate": 0.001, "loss": 2.5418, "step": 148200 }, { "epoch": 28.46, "learning_rate": 0.001, "loss": 2.5387, "step": 148212 }, { "epoch": 28.46, "learning_rate": 0.001, "loss": 2.5472, "step": 148224 }, { "epoch": 28.46, "learning_rate": 0.001, "loss": 2.5487, "step": 148236 }, { "epoch": 28.47, "learning_rate": 0.001, "loss": 2.5369, "step": 148248 }, { "epoch": 28.47, "learning_rate": 0.001, "loss": 2.5505, "step": 148260 }, { "epoch": 28.47, "learning_rate": 0.001, "loss": 2.5413, "step": 148272 }, { "epoch": 28.47, "learning_rate": 0.001, "loss": 2.54, "step": 148284 }, { "epoch": 28.47, "learning_rate": 0.001, "loss": 2.5385, "step": 148296 }, { "epoch": 28.48, "learning_rate": 0.001, "loss": 2.5384, "step": 148308 }, { "epoch": 28.48, "learning_rate": 0.001, "loss": 2.5443, "step": 148320 }, { "epoch": 28.48, "learning_rate": 0.001, "loss": 2.5455, "step": 148332 }, { "epoch": 28.48, "learning_rate": 0.001, "loss": 2.5455, "step": 148344 }, { "epoch": 28.49, "learning_rate": 0.001, "loss": 2.5448, "step": 148356 }, { "epoch": 28.49, "learning_rate": 0.001, "loss": 2.543, "step": 148368 }, { "epoch": 28.49, "learning_rate": 0.001, "loss": 2.5431, "step": 148380 }, { "epoch": 28.49, "learning_rate": 0.001, "loss": 2.5455, "step": 148392 }, { "epoch": 28.5, "learning_rate": 0.001, "loss": 2.5331, "step": 148404 }, { "epoch": 28.5, "learning_rate": 0.001, "loss": 2.5472, "step": 148416 }, { "epoch": 28.5, "learning_rate": 0.001, "loss": 2.5414, "step": 148428 }, { "epoch": 28.5, "learning_rate": 0.001, "loss": 2.5378, "step": 148440 }, { "epoch": 28.5, "learning_rate": 0.001, "loss": 2.5417, "step": 148452 }, { "epoch": 28.51, "learning_rate": 0.001, "loss": 2.5471, "step": 148464 }, { "epoch": 28.51, "learning_rate": 0.001, "loss": 2.5436, "step": 148476 }, { "epoch": 28.51, "learning_rate": 0.001, "loss": 2.5466, "step": 148488 }, { "epoch": 28.51, "learning_rate": 0.001, "loss": 2.5328, "step": 148500 }, { "epoch": 28.52, "learning_rate": 0.001, "loss": 2.5387, "step": 148512 }, { "epoch": 28.52, "learning_rate": 0.001, "loss": 2.5368, "step": 148524 }, { "epoch": 28.52, "learning_rate": 0.001, "loss": 2.548, "step": 148536 }, { "epoch": 28.52, "learning_rate": 0.001, "loss": 2.5377, "step": 148548 }, { "epoch": 28.53, "learning_rate": 0.001, "loss": 2.5434, "step": 148560 }, { "epoch": 28.53, "learning_rate": 0.001, "loss": 2.5413, "step": 148572 }, { "epoch": 28.53, "learning_rate": 0.001, "loss": 2.5483, "step": 148584 }, { "epoch": 28.53, "learning_rate": 0.001, "loss": 2.5425, "step": 148596 }, { "epoch": 28.53, "learning_rate": 0.001, "loss": 2.5338, "step": 148608 }, { "epoch": 28.54, "learning_rate": 0.001, "loss": 2.5347, "step": 148620 }, { "epoch": 28.54, "learning_rate": 0.001, "loss": 2.5462, "step": 148632 }, { "epoch": 28.54, "learning_rate": 0.001, "loss": 2.5384, "step": 148644 }, { "epoch": 28.54, "learning_rate": 0.001, "loss": 2.53, "step": 148656 }, { "epoch": 28.55, "learning_rate": 0.001, "loss": 2.5483, "step": 148668 }, { "epoch": 28.55, "learning_rate": 0.001, "loss": 2.5364, "step": 148680 }, { "epoch": 28.55, "learning_rate": 0.001, "loss": 2.5357, "step": 148692 }, { "epoch": 28.55, "learning_rate": 0.001, "loss": 2.5418, "step": 148704 }, { "epoch": 28.56, "learning_rate": 0.001, "loss": 2.5398, "step": 148716 }, { "epoch": 28.56, "learning_rate": 0.001, "loss": 2.541, "step": 148728 }, { "epoch": 28.56, "learning_rate": 0.001, "loss": 2.5538, "step": 148740 }, { "epoch": 28.56, "eval_ag_news_accuracy": 0.3243125, "eval_ag_news_bleu_score": 4.756171917009729, "eval_ag_news_bleu_score_sem": 0.14941271223160832, "eval_ag_news_emb_cos_sim": 0.8182030916213989, "eval_ag_news_emb_cos_sim_sem": 0.006345165237922077, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5314078330993652, "eval_ag_news_n_ngrams_match_1": 14.328, "eval_ag_news_n_ngrams_match_2": 3.112, "eval_ag_news_n_ngrams_match_3": 0.87, "eval_ag_news_num_pred_words": 46.954, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.17204229846926, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3534847715901825, "eval_ag_news_runtime": 10.8852, "eval_ag_news_samples_per_second": 45.934, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.3555322727312971, "eval_ag_news_token_set_f1_sem": 0.00428780652135488, "eval_ag_news_token_set_precision": 0.3410526111294004, "eval_ag_news_token_set_recall": 0.3853175974998572, "eval_ag_news_true_num_tokens": 56.09375, "step": 148750 }, { "epoch": 28.56, "eval_anthropic_toxic_prompts_accuracy": 0.11396875, "eval_anthropic_toxic_prompts_bleu_score": 3.0643367565056243, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1201213060777016, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.675471305847168, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009189501944507447, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.234440803527832, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.156, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.898, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722, "eval_anthropic_toxic_prompts_num_pred_words": 48.598, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.39216859693375, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.208450634279371, "eval_anthropic_toxic_prompts_runtime": 9.8553, "eval_anthropic_toxic_prompts_samples_per_second": 50.734, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.35818510019077876, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006421029992889609, "eval_anthropic_toxic_prompts_token_set_precision": 0.428584827742149, "eval_anthropic_toxic_prompts_token_set_recall": 0.3348244185071268, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 148750 }, { "epoch": 28.56, "eval_arxiv_accuracy": 0.34975, "eval_arxiv_bleu_score": 4.367905795039898, "eval_arxiv_bleu_score_sem": 0.12993739532055507, "eval_arxiv_emb_cos_sim": 0.7781081199645996, "eval_arxiv_emb_cos_sim_sem": 0.0067955978275811135, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3815231323242188, "eval_arxiv_n_ngrams_match_1": 15.204, "eval_arxiv_n_ngrams_match_2": 3.012, "eval_arxiv_n_ngrams_match_3": 0.68, "eval_arxiv_num_pred_words": 40.834, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.41554077056017, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3625433074967942, "eval_arxiv_runtime": 10.2468, "eval_arxiv_samples_per_second": 48.796, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.35502807828030514, "eval_arxiv_token_set_f1_sem": 0.004236078488294291, "eval_arxiv_token_set_precision": 0.3060704591108825, "eval_arxiv_token_set_recall": 0.4425632761727061, "eval_arxiv_true_num_tokens": 64.0, "step": 148750 }, { "epoch": 28.56, "eval_python_code_alpaca_accuracy": 0.1613125, "eval_python_code_alpaca_bleu_score": 4.502380345139768, "eval_python_code_alpaca_bleu_score_sem": 0.14058627237128596, "eval_python_code_alpaca_emb_cos_sim": 0.7614978551864624, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007785972376424197, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8783135414123535, "eval_python_code_alpaca_n_ngrams_match_1": 10.054, "eval_python_code_alpaca_n_ngrams_match_2": 2.958, "eval_python_code_alpaca_n_ngrams_match_3": 0.934, "eval_python_code_alpaca_num_pred_words": 44.118, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.784255464556985, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33412447209432394, "eval_python_code_alpaca_runtime": 10.0555, "eval_python_code_alpaca_samples_per_second": 49.724, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.47905262775355023, "eval_python_code_alpaca_token_set_f1_sem": 0.005433762017092308, "eval_python_code_alpaca_token_set_precision": 0.5464928925229299, "eval_python_code_alpaca_token_set_recall": 0.4470709755220311, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 148750 }, { "epoch": 28.56, "eval_wikibio_accuracy": 0.32434375, "eval_wikibio_bleu_score": 6.028966990076317, "eval_wikibio_bleu_score_sem": 0.2219807337520827, "eval_wikibio_emb_cos_sim": 0.7519832849502563, "eval_wikibio_emb_cos_sim_sem": 0.009022705132981076, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.739767551422119, "eval_wikibio_n_ngrams_match_1": 10.144, "eval_wikibio_n_ngrams_match_2": 3.454, "eval_wikibio_n_ngrams_match_3": 1.284, "eval_wikibio_num_pred_words": 36.188, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.088205684289775, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3566398404420736, "eval_wikibio_runtime": 10.1372, "eval_wikibio_samples_per_second": 49.323, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.32181054103821344, "eval_wikibio_token_set_f1_sem": 0.005481928979619786, "eval_wikibio_token_set_precision": 0.3305455895231806, "eval_wikibio_token_set_recall": 0.3323655225855936, "eval_wikibio_true_num_tokens": 61.1328125, "step": 148750 }, { "epoch": 28.56, "eval_nq_accuracy": 0.531625, "eval_nq_bleu_score": 11.733330475134023, "eval_nq_bleu_score_sem": 0.4758405920195655, "eval_nq_emb_cos_sim": 0.8343696594238281, "eval_nq_emb_cos_sim_sem": 0.006893065707485096, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.166505813598633, "eval_nq_n_ngrams_match_1": 23.33, "eval_nq_n_ngrams_match_2": 8.486, "eval_nq_n_ngrams_match_3": 3.882, "eval_nq_num_pred_words": 49.434, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.727734367954577, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45111523021649913, "eval_nq_runtime": 10.4337, "eval_nq_samples_per_second": 47.921, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4658619928953663, "eval_nq_token_set_f1_sem": 0.004827877671905447, "eval_nq_token_set_precision": 0.4241522771524402, "eval_nq_token_set_recall": 0.5240705834766768, "eval_nq_true_num_tokens": 64.0, "step": 148750 }, { "epoch": 28.56, "learning_rate": 0.001, "loss": 2.5392, "step": 148752 }, { "epoch": 28.56, "learning_rate": 0.001, "loss": 2.5486, "step": 148764 }, { "epoch": 28.57, "learning_rate": 0.001, "loss": 2.5454, "step": 148776 }, { "epoch": 28.57, "learning_rate": 0.001, "loss": 2.5447, "step": 148788 }, { "epoch": 28.57, "learning_rate": 0.001, "loss": 2.5368, "step": 148800 }, { "epoch": 28.57, "learning_rate": 0.001, "loss": 2.5411, "step": 148812 }, { "epoch": 28.58, "learning_rate": 0.001, "loss": 2.5465, "step": 148824 }, { "epoch": 28.58, "learning_rate": 0.001, "loss": 2.541, "step": 148836 }, { "epoch": 28.58, "learning_rate": 0.001, "loss": 2.5267, "step": 148848 }, { "epoch": 28.58, "learning_rate": 0.001, "loss": 2.5353, "step": 148860 }, { "epoch": 28.59, "learning_rate": 0.001, "loss": 2.5268, "step": 148872 }, { "epoch": 28.59, "learning_rate": 0.001, "loss": 2.5306, "step": 148884 }, { "epoch": 28.59, "learning_rate": 0.001, "loss": 2.543, "step": 148896 }, { "epoch": 28.59, "learning_rate": 0.001, "loss": 2.5368, "step": 148908 }, { "epoch": 28.59, "learning_rate": 0.001, "loss": 2.5424, "step": 148920 }, { "epoch": 28.6, "learning_rate": 0.001, "loss": 2.5368, "step": 148932 }, { "epoch": 28.6, "learning_rate": 0.001, "loss": 2.5331, "step": 148944 }, { "epoch": 28.6, "learning_rate": 0.001, "loss": 2.5392, "step": 148956 }, { "epoch": 28.6, "learning_rate": 0.001, "loss": 2.5366, "step": 148968 }, { "epoch": 28.61, "learning_rate": 0.001, "loss": 2.5442, "step": 148980 }, { "epoch": 28.61, "learning_rate": 0.001, "loss": 2.537, "step": 148992 }, { "epoch": 28.61, "learning_rate": 0.001, "loss": 2.5363, "step": 149004 }, { "epoch": 28.61, "learning_rate": 0.001, "loss": 2.5472, "step": 149016 }, { "epoch": 28.62, "learning_rate": 0.001, "loss": 2.5449, "step": 149028 }, { "epoch": 28.62, "learning_rate": 0.001, "loss": 2.5426, "step": 149040 }, { "epoch": 28.62, "learning_rate": 0.001, "loss": 2.5408, "step": 149052 }, { "epoch": 28.62, "learning_rate": 0.001, "loss": 2.5407, "step": 149064 }, { "epoch": 28.62, "learning_rate": 0.001, "loss": 2.5335, "step": 149076 }, { "epoch": 28.63, "learning_rate": 0.001, "loss": 2.5365, "step": 149088 }, { "epoch": 28.63, "learning_rate": 0.001, "loss": 2.535, "step": 149100 }, { "epoch": 28.63, "learning_rate": 0.001, "loss": 2.5454, "step": 149112 }, { "epoch": 28.63, "learning_rate": 0.001, "loss": 2.5414, "step": 149124 }, { "epoch": 28.64, "learning_rate": 0.001, "loss": 2.5452, "step": 149136 }, { "epoch": 28.64, "learning_rate": 0.001, "loss": 2.5364, "step": 149148 }, { "epoch": 28.64, "learning_rate": 0.001, "loss": 2.5458, "step": 149160 }, { "epoch": 28.64, "learning_rate": 0.001, "loss": 2.5461, "step": 149172 }, { "epoch": 28.65, "learning_rate": 0.001, "loss": 2.5335, "step": 149184 }, { "epoch": 28.65, "learning_rate": 0.001, "loss": 2.5478, "step": 149196 }, { "epoch": 28.65, "learning_rate": 0.001, "loss": 2.534, "step": 149208 }, { "epoch": 28.65, "learning_rate": 0.001, "loss": 2.5436, "step": 149220 }, { "epoch": 28.65, "learning_rate": 0.001, "loss": 2.5416, "step": 149232 }, { "epoch": 28.66, "learning_rate": 0.001, "loss": 2.5459, "step": 149244 }, { "epoch": 28.66, "learning_rate": 0.001, "loss": 2.5324, "step": 149256 }, { "epoch": 28.66, "learning_rate": 0.001, "loss": 2.535, "step": 149268 }, { "epoch": 28.66, "learning_rate": 0.001, "loss": 2.5418, "step": 149280 }, { "epoch": 28.67, "learning_rate": 0.001, "loss": 2.5517, "step": 149292 }, { "epoch": 28.67, "learning_rate": 0.001, "loss": 2.5507, "step": 149304 }, { "epoch": 28.67, "learning_rate": 0.001, "loss": 2.5345, "step": 149316 }, { "epoch": 28.67, "learning_rate": 0.001, "loss": 2.541, "step": 149328 }, { "epoch": 28.68, "learning_rate": 0.001, "loss": 2.5482, "step": 149340 }, { "epoch": 28.68, "learning_rate": 0.001, "loss": 2.5434, "step": 149352 }, { "epoch": 28.68, "learning_rate": 0.001, "loss": 2.5459, "step": 149364 }, { "epoch": 28.68, "eval_ag_news_accuracy": 0.3219375, "eval_ag_news_bleu_score": 4.772041790436677, "eval_ag_news_bleu_score_sem": 0.1537271497890346, "eval_ag_news_emb_cos_sim": 0.8114557266235352, "eval_ag_news_emb_cos_sim_sem": 0.006865690670832762, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5282657146453857, "eval_ag_news_n_ngrams_match_1": 13.99, "eval_ag_news_n_ngrams_match_2": 3.124, "eval_ag_news_n_ngrams_match_3": 0.918, "eval_ag_news_num_pred_words": 46.602, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.06483820593316, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3483905258532579, "eval_ag_news_runtime": 10.4876, "eval_ag_news_samples_per_second": 47.675, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.34885650810860663, "eval_ag_news_token_set_f1_sem": 0.004584215635663528, "eval_ag_news_token_set_precision": 0.3342390967439693, "eval_ag_news_token_set_recall": 0.37885604416096513, "eval_ag_news_true_num_tokens": 56.09375, "step": 149375 }, { "epoch": 28.68, "eval_anthropic_toxic_prompts_accuracy": 0.11434375, "eval_anthropic_toxic_prompts_bleu_score": 3.1904436102936296, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12088460606534683, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6796077489852905, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008200283141275874, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.243507146835327, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.268, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, "eval_anthropic_toxic_prompts_num_pred_words": 47.372, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.623429475909035, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21502481066848333, "eval_anthropic_toxic_prompts_runtime": 10.5231, "eval_anthropic_toxic_prompts_samples_per_second": 47.515, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.36275511208515815, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006592997696022939, "eval_anthropic_toxic_prompts_token_set_precision": 0.44340855886704533, "eval_anthropic_toxic_prompts_token_set_recall": 0.3353735231735486, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 149375 }, { "epoch": 28.68, "eval_arxiv_accuracy": 0.3488125, "eval_arxiv_bleu_score": 4.269698737808313, "eval_arxiv_bleu_score_sem": 0.12778169579667845, "eval_arxiv_emb_cos_sim": 0.7690363526344299, "eval_arxiv_emb_cos_sim_sem": 0.007080500645347138, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.370055675506592, "eval_arxiv_n_ngrams_match_1": 15.046, "eval_arxiv_n_ngrams_match_2": 2.928, "eval_arxiv_n_ngrams_match_3": 0.63, "eval_arxiv_num_pred_words": 40.24, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.08014606459105, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3631524136507738, "eval_arxiv_runtime": 10.2498, "eval_arxiv_samples_per_second": 48.782, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3552734211734464, "eval_arxiv_token_set_f1_sem": 0.004199874670979993, "eval_arxiv_token_set_precision": 0.30494772927429825, "eval_arxiv_token_set_recall": 0.4447780181514975, "eval_arxiv_true_num_tokens": 64.0, "step": 149375 }, { "epoch": 28.68, "eval_python_code_alpaca_accuracy": 0.161875, "eval_python_code_alpaca_bleu_score": 4.576457482748722, "eval_python_code_alpaca_bleu_score_sem": 0.14659049201287674, "eval_python_code_alpaca_emb_cos_sim": 0.7466869354248047, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009250980367050174, "eval_python_code_alpaca_emb_top1_equal": 0.0859375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02487009666300537, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8794357776641846, "eval_python_code_alpaca_n_ngrams_match_1": 9.732, "eval_python_code_alpaca_n_ngrams_match_2": 2.852, "eval_python_code_alpaca_n_ngrams_match_3": 0.964, "eval_python_code_alpaca_num_pred_words": 43.494, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.804224803813547, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3290592140381472, "eval_python_code_alpaca_runtime": 10.0296, "eval_python_code_alpaca_samples_per_second": 49.853, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4722881605359816, "eval_python_code_alpaca_token_set_f1_sem": 0.005548355310737996, "eval_python_code_alpaca_token_set_precision": 0.5336534711735468, "eval_python_code_alpaca_token_set_recall": 0.4473906501637632, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 149375 }, { "epoch": 28.68, "eval_wikibio_accuracy": 0.32565625, "eval_wikibio_bleu_score": 5.77750789569979, "eval_wikibio_bleu_score_sem": 0.20954370616634016, "eval_wikibio_emb_cos_sim": 0.7381787896156311, "eval_wikibio_emb_cos_sim_sem": 0.009751466949127452, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7005228996276855, "eval_wikibio_n_ngrams_match_1": 9.866, "eval_wikibio_n_ngrams_match_2": 3.284, "eval_wikibio_n_ngrams_match_3": 1.164, "eval_wikibio_num_pred_words": 35.906, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.468459771054384, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34995863214615996, "eval_wikibio_runtime": 10.0672, "eval_wikibio_samples_per_second": 49.666, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.31661482798084417, "eval_wikibio_token_set_f1_sem": 0.005622156353982404, "eval_wikibio_token_set_precision": 0.3233563077941803, "eval_wikibio_token_set_recall": 0.3284257556419153, "eval_wikibio_true_num_tokens": 61.1328125, "step": 149375 }, { "epoch": 28.68, "eval_nq_accuracy": 0.531, "eval_nq_bleu_score": 11.43955904137557, "eval_nq_bleu_score_sem": 0.4714544899456285, "eval_nq_emb_cos_sim": 0.8228099346160889, "eval_nq_emb_cos_sim_sem": 0.007503157803292987, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.166867733001709, "eval_nq_n_ngrams_match_1": 22.882, "eval_nq_n_ngrams_match_2": 8.356, "eval_nq_n_ngrams_match_3": 3.762, "eval_nq_num_pred_words": 49.19, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.730893676040196, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4448080950548505, "eval_nq_runtime": 10.5262, "eval_nq_samples_per_second": 47.501, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.45650527234887706, "eval_nq_token_set_f1_sem": 0.004953077512897897, "eval_nq_token_set_precision": 0.4146863164485548, "eval_nq_token_set_recall": 0.5178328184864912, "eval_nq_true_num_tokens": 64.0, "step": 149375 }, { "epoch": 28.68, "learning_rate": 0.001, "loss": 2.5446, "step": 149376 }, { "epoch": 28.68, "learning_rate": 0.001, "loss": 2.5464, "step": 149388 }, { "epoch": 28.69, "learning_rate": 0.001, "loss": 2.5383, "step": 149400 }, { "epoch": 28.69, "learning_rate": 0.001, "loss": 2.5397, "step": 149412 }, { "epoch": 28.69, "learning_rate": 0.001, "loss": 2.5404, "step": 149424 }, { "epoch": 28.69, "learning_rate": 0.001, "loss": 2.5507, "step": 149436 }, { "epoch": 28.7, "learning_rate": 0.001, "loss": 2.5444, "step": 149448 }, { "epoch": 28.7, "learning_rate": 0.001, "loss": 2.5377, "step": 149460 }, { "epoch": 28.7, "learning_rate": 0.001, "loss": 2.5518, "step": 149472 }, { "epoch": 28.7, "learning_rate": 0.001, "loss": 2.5384, "step": 149484 }, { "epoch": 28.71, "learning_rate": 0.001, "loss": 2.541, "step": 149496 }, { "epoch": 28.71, "learning_rate": 0.001, "loss": 2.5485, "step": 149508 }, { "epoch": 28.71, "learning_rate": 0.001, "loss": 2.5407, "step": 149520 }, { "epoch": 28.71, "learning_rate": 0.001, "loss": 2.5374, "step": 149532 }, { "epoch": 28.71, "learning_rate": 0.001, "loss": 2.5352, "step": 149544 }, { "epoch": 28.72, "learning_rate": 0.001, "loss": 2.5467, "step": 149556 }, { "epoch": 28.72, "learning_rate": 0.001, "loss": 2.542, "step": 149568 }, { "epoch": 28.72, "learning_rate": 0.001, "loss": 2.5446, "step": 149580 }, { "epoch": 28.72, "learning_rate": 0.001, "loss": 2.5481, "step": 149592 }, { "epoch": 28.73, "learning_rate": 0.001, "loss": 2.5442, "step": 149604 }, { "epoch": 28.73, "learning_rate": 0.001, "loss": 2.5387, "step": 149616 }, { "epoch": 28.73, "learning_rate": 0.001, "loss": 2.5386, "step": 149628 }, { "epoch": 28.73, "learning_rate": 0.001, "loss": 2.547, "step": 149640 }, { "epoch": 28.74, "learning_rate": 0.001, "loss": 2.5442, "step": 149652 }, { "epoch": 28.74, "learning_rate": 0.001, "loss": 2.5485, "step": 149664 }, { "epoch": 28.74, "learning_rate": 0.001, "loss": 2.5503, "step": 149676 }, { "epoch": 28.74, "learning_rate": 0.001, "loss": 2.5428, "step": 149688 }, { "epoch": 28.74, "learning_rate": 0.001, "loss": 2.542, "step": 149700 }, { "epoch": 28.75, "learning_rate": 0.001, "loss": 2.5415, "step": 149712 }, { "epoch": 28.75, "learning_rate": 0.001, "loss": 2.5432, "step": 149724 }, { "epoch": 28.75, "learning_rate": 0.001, "loss": 2.5424, "step": 149736 }, { "epoch": 28.75, "learning_rate": 0.001, "loss": 2.5472, "step": 149748 }, { "epoch": 28.76, "learning_rate": 0.001, "loss": 2.5367, "step": 149760 }, { "epoch": 28.76, "learning_rate": 0.001, "loss": 2.543, "step": 149772 }, { "epoch": 28.76, "learning_rate": 0.001, "loss": 2.5492, "step": 149784 }, { "epoch": 28.76, "learning_rate": 0.001, "loss": 2.5427, "step": 149796 }, { "epoch": 28.76, "learning_rate": 0.001, "loss": 2.543, "step": 149808 }, { "epoch": 28.77, "learning_rate": 0.001, "loss": 2.5424, "step": 149820 }, { "epoch": 28.77, "learning_rate": 0.001, "loss": 2.5536, "step": 149832 }, { "epoch": 28.77, "learning_rate": 0.001, "loss": 2.5424, "step": 149844 }, { "epoch": 28.77, "learning_rate": 0.001, "loss": 2.5382, "step": 149856 }, { "epoch": 28.78, "learning_rate": 0.001, "loss": 2.5386, "step": 149868 }, { "epoch": 28.78, "learning_rate": 0.001, "loss": 2.5411, "step": 149880 }, { "epoch": 28.78, "learning_rate": 0.001, "loss": 2.5523, "step": 149892 }, { "epoch": 28.78, "learning_rate": 0.001, "loss": 2.5488, "step": 149904 }, { "epoch": 28.79, "learning_rate": 0.001, "loss": 2.5406, "step": 149916 }, { "epoch": 28.79, "learning_rate": 0.001, "loss": 2.5398, "step": 149928 }, { "epoch": 28.79, "learning_rate": 0.001, "loss": 2.5542, "step": 149940 }, { "epoch": 28.79, "learning_rate": 0.001, "loss": 2.5376, "step": 149952 }, { "epoch": 28.79, "learning_rate": 0.001, "loss": 2.5408, "step": 149964 }, { "epoch": 28.8, "learning_rate": 0.001, "loss": 2.5466, "step": 149976 }, { "epoch": 28.8, "learning_rate": 0.001, "loss": 2.5436, "step": 149988 }, { "epoch": 28.8, "learning_rate": 0.001, "loss": 2.543, "step": 150000 }, { "epoch": 28.8, "eval_ag_news_accuracy": 0.324375, "eval_ag_news_bleu_score": 4.7056634866851565, "eval_ag_news_bleu_score_sem": 0.14283871622014702, "eval_ag_news_emb_cos_sim": 0.8181965351104736, "eval_ag_news_emb_cos_sim_sem": 0.006325943640823077, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5157887935638428, "eval_ag_news_n_ngrams_match_1": 14.222, "eval_ag_news_n_ngrams_match_2": 3.108, "eval_ag_news_n_ngrams_match_3": 0.84, "eval_ag_news_num_pred_words": 46.81, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.64245441209795, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3520721827994989, "eval_ag_news_runtime": 10.4502, "eval_ag_news_samples_per_second": 47.846, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3556641217079547, "eval_ag_news_token_set_f1_sem": 0.004486645585032673, "eval_ag_news_token_set_precision": 0.34048718632930713, "eval_ag_news_token_set_recall": 0.38683471936444186, "eval_ag_news_true_num_tokens": 56.09375, "step": 150000 }, { "epoch": 28.8, "eval_anthropic_toxic_prompts_accuracy": 0.11628125, "eval_anthropic_toxic_prompts_bleu_score": 3.2923909119651347, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13009757907748862, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6801949739456177, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009209527624450693, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.215897798538208, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.38, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.03, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.818, "eval_anthropic_toxic_prompts_num_pred_words": 47.63, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.925660089796313, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2180218869785031, "eval_anthropic_toxic_prompts_runtime": 10.4481, "eval_anthropic_toxic_prompts_samples_per_second": 47.856, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.35917874076071804, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006374282602473499, "eval_anthropic_toxic_prompts_token_set_precision": 0.44973359141432817, "eval_anthropic_toxic_prompts_token_set_recall": 0.3244919054013823, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 150000 }, { "epoch": 28.8, "eval_arxiv_accuracy": 0.34865625, "eval_arxiv_bleu_score": 4.365896166913111, "eval_arxiv_bleu_score_sem": 0.13988424476600514, "eval_arxiv_emb_cos_sim": 0.7636216282844543, "eval_arxiv_emb_cos_sim_sem": 0.0078008080671000315, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3648488521575928, "eval_arxiv_n_ngrams_match_1": 14.968, "eval_arxiv_n_ngrams_match_2": 2.932, "eval_arxiv_n_ngrams_match_3": 0.678, "eval_arxiv_num_pred_words": 39.714, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.92912439384603, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36020671358803785, "eval_arxiv_runtime": 10.3931, "eval_arxiv_samples_per_second": 48.109, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.35432192957842007, "eval_arxiv_token_set_f1_sem": 0.004564245948690358, "eval_arxiv_token_set_precision": 0.3035574178537733, "eval_arxiv_token_set_recall": 0.44815358109370773, "eval_arxiv_true_num_tokens": 64.0, "step": 150000 }, { "epoch": 28.8, "eval_python_code_alpaca_accuracy": 0.161125, "eval_python_code_alpaca_bleu_score": 4.755795493449977, "eval_python_code_alpaca_bleu_score_sem": 0.14549366900201757, "eval_python_code_alpaca_emb_cos_sim": 0.7626192569732666, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008396583405304378, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8663179874420166, "eval_python_code_alpaca_n_ngrams_match_1": 10.08, "eval_python_code_alpaca_n_ngrams_match_2": 3.046, "eval_python_code_alpaca_n_ngrams_match_3": 1.056, "eval_python_code_alpaca_num_pred_words": 44.274, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.572197885178333, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33843861801528086, "eval_python_code_alpaca_runtime": 10.0438, "eval_python_code_alpaca_samples_per_second": 49.782, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.48493233617981535, "eval_python_code_alpaca_token_set_f1_sem": 0.005579080368697369, "eval_python_code_alpaca_token_set_precision": 0.5509247502750639, "eval_python_code_alpaca_token_set_recall": 0.45493196296414046, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 150000 }, { "epoch": 28.8, "eval_wikibio_accuracy": 0.32121875, "eval_wikibio_bleu_score": 6.007782878271135, "eval_wikibio_bleu_score_sem": 0.21203780962119811, "eval_wikibio_emb_cos_sim": 0.7495360970497131, "eval_wikibio_emb_cos_sim_sem": 0.009200777543573128, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.74615740776062, "eval_wikibio_n_ngrams_match_1": 10.188, "eval_wikibio_n_ngrams_match_2": 3.494, "eval_wikibio_n_ngrams_match_3": 1.278, "eval_wikibio_num_pred_words": 36.532, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.35800434149235, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3581564612049972, "eval_wikibio_runtime": 9.8952, "eval_wikibio_samples_per_second": 50.529, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.32024840846497205, "eval_wikibio_token_set_f1_sem": 0.00550340127863008, "eval_wikibio_token_set_precision": 0.32988643236768794, "eval_wikibio_token_set_recall": 0.32797115482364864, "eval_wikibio_true_num_tokens": 61.1328125, "step": 150000 }, { "epoch": 28.8, "eval_nq_accuracy": 0.52965625, "eval_nq_bleu_score": 11.836088103151667, "eval_nq_bleu_score_sem": 0.481278945993792, "eval_nq_emb_cos_sim": 0.8382047414779663, "eval_nq_emb_cos_sim_sem": 0.007092566497822397, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1648871898651123, "eval_nq_n_ngrams_match_1": 23.172, "eval_nq_n_ngrams_match_2": 8.476, "eval_nq_n_ngrams_match_3": 3.916, "eval_nq_num_pred_words": 49.04, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.713618876882986, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4507962657434307, "eval_nq_runtime": 10.7526, "eval_nq_samples_per_second": 46.501, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.46396604445030754, "eval_nq_token_set_f1_sem": 0.004935143530120637, "eval_nq_token_set_precision": 0.42081194801954014, "eval_nq_token_set_recall": 0.5241043656630668, "eval_nq_true_num_tokens": 64.0, "step": 150000 }, { "epoch": 28.8, "learning_rate": 0.001, "loss": 2.5461, "step": 150012 }, { "epoch": 28.81, "learning_rate": 0.001, "loss": 2.5439, "step": 150024 }, { "epoch": 28.81, "learning_rate": 0.001, "loss": 2.5472, "step": 150036 }, { "epoch": 28.81, "learning_rate": 0.001, "loss": 2.5376, "step": 150048 }, { "epoch": 28.81, "learning_rate": 0.001, "loss": 2.5433, "step": 150060 }, { "epoch": 28.82, "learning_rate": 0.001, "loss": 2.5485, "step": 150072 }, { "epoch": 28.82, "learning_rate": 0.001, "loss": 2.5437, "step": 150084 }, { "epoch": 28.82, "learning_rate": 0.001, "loss": 2.5572, "step": 150096 }, { "epoch": 28.82, "learning_rate": 0.001, "loss": 2.5442, "step": 150108 }, { "epoch": 28.82, "learning_rate": 0.001, "loss": 2.5472, "step": 150120 }, { "epoch": 28.83, "learning_rate": 0.001, "loss": 2.5411, "step": 150132 }, { "epoch": 28.83, "learning_rate": 0.001, "loss": 2.5568, "step": 150144 }, { "epoch": 28.83, "learning_rate": 0.001, "loss": 2.5467, "step": 150156 }, { "epoch": 28.83, "learning_rate": 0.001, "loss": 2.5437, "step": 150168 }, { "epoch": 28.84, "learning_rate": 0.001, "loss": 2.5416, "step": 150180 }, { "epoch": 28.84, "learning_rate": 0.001, "loss": 2.5459, "step": 150192 }, { "epoch": 28.84, "learning_rate": 0.001, "loss": 2.5442, "step": 150204 }, { "epoch": 28.84, "learning_rate": 0.001, "loss": 2.545, "step": 150216 }, { "epoch": 28.85, "learning_rate": 0.001, "loss": 2.555, "step": 150228 }, { "epoch": 28.85, "learning_rate": 0.001, "loss": 2.5443, "step": 150240 }, { "epoch": 28.85, "learning_rate": 0.001, "loss": 2.55, "step": 150252 }, { "epoch": 28.85, "learning_rate": 0.001, "loss": 2.5463, "step": 150264 }, { "epoch": 28.85, "learning_rate": 0.001, "loss": 2.5438, "step": 150276 }, { "epoch": 28.86, "learning_rate": 0.001, "loss": 2.5421, "step": 150288 }, { "epoch": 28.86, "learning_rate": 0.001, "loss": 2.5512, "step": 150300 }, { "epoch": 28.86, "learning_rate": 0.001, "loss": 2.5415, "step": 150312 }, { "epoch": 28.86, "learning_rate": 0.001, "loss": 2.544, "step": 150324 }, { "epoch": 28.87, "learning_rate": 0.001, "loss": 2.5449, "step": 150336 }, { "epoch": 28.87, "learning_rate": 0.001, "loss": 2.5379, "step": 150348 }, { "epoch": 28.87, "learning_rate": 0.001, "loss": 2.5422, "step": 150360 }, { "epoch": 28.87, "learning_rate": 0.001, "loss": 2.5499, "step": 150372 }, { "epoch": 28.88, "learning_rate": 0.001, "loss": 2.5383, "step": 150384 }, { "epoch": 28.88, "learning_rate": 0.001, "loss": 2.5522, "step": 150396 }, { "epoch": 28.88, "learning_rate": 0.001, "loss": 2.5513, "step": 150408 }, { "epoch": 28.88, "learning_rate": 0.001, "loss": 2.5473, "step": 150420 }, { "epoch": 28.88, "learning_rate": 0.001, "loss": 2.5456, "step": 150432 }, { "epoch": 28.89, "learning_rate": 0.001, "loss": 2.5417, "step": 150444 }, { "epoch": 28.89, "learning_rate": 0.001, "loss": 2.5496, "step": 150456 }, { "epoch": 28.89, "learning_rate": 0.001, "loss": 2.5448, "step": 150468 }, { "epoch": 28.89, "learning_rate": 0.001, "loss": 2.5503, "step": 150480 }, { "epoch": 28.9, "learning_rate": 0.001, "loss": 2.5497, "step": 150492 }, { "epoch": 28.9, "learning_rate": 0.001, "loss": 2.549, "step": 150504 }, { "epoch": 28.9, "learning_rate": 0.001, "loss": 2.5487, "step": 150516 }, { "epoch": 28.9, "learning_rate": 0.001, "loss": 2.5435, "step": 150528 }, { "epoch": 28.91, "learning_rate": 0.001, "loss": 2.5413, "step": 150540 }, { "epoch": 28.91, "learning_rate": 0.001, "loss": 2.5405, "step": 150552 }, { "epoch": 28.91, "learning_rate": 0.001, "loss": 2.5461, "step": 150564 }, { "epoch": 28.91, "learning_rate": 0.001, "loss": 2.5458, "step": 150576 }, { "epoch": 28.91, "learning_rate": 0.001, "loss": 2.5521, "step": 150588 }, { "epoch": 28.92, "learning_rate": 0.001, "loss": 2.5476, "step": 150600 }, { "epoch": 28.92, "learning_rate": 0.001, "loss": 2.5472, "step": 150612 }, { "epoch": 28.92, "learning_rate": 0.001, "loss": 2.5565, "step": 150624 }, { "epoch": 28.92, "eval_ag_news_accuracy": 0.32484375, "eval_ag_news_bleu_score": 5.032894214158689, "eval_ag_news_bleu_score_sem": 0.15396842624079832, "eval_ag_news_emb_cos_sim": 0.8207932710647583, "eval_ag_news_emb_cos_sim_sem": 0.007046064035935705, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5192196369171143, "eval_ag_news_n_ngrams_match_1": 14.448, "eval_ag_news_n_ngrams_match_2": 3.286, "eval_ag_news_n_ngrams_match_3": 0.918, "eval_ag_news_num_pred_words": 46.55, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.75807462721796, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3584486115297454, "eval_ag_news_runtime": 10.6909, "eval_ag_news_samples_per_second": 46.769, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.36137190070270975, "eval_ag_news_token_set_f1_sem": 0.004324235428228524, "eval_ag_news_token_set_precision": 0.34654550122910605, "eval_ag_news_token_set_recall": 0.3927851697798266, "eval_ag_news_true_num_tokens": 56.09375, "step": 150625 }, { "epoch": 28.92, "eval_anthropic_toxic_prompts_accuracy": 0.1140625, "eval_anthropic_toxic_prompts_bleu_score": 3.2254266591380185, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12147323885363766, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.67835533618927, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008829004802670086, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.219359874725342, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.382, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.058, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.76, "eval_anthropic_toxic_prompts_num_pred_words": 47.796, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.012104175704447, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21628296184474827, "eval_anthropic_toxic_prompts_runtime": 9.9529, "eval_anthropic_toxic_prompts_samples_per_second": 50.237, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3604679689408342, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006571871501617973, "eval_anthropic_toxic_prompts_token_set_precision": 0.4525535627850996, "eval_anthropic_toxic_prompts_token_set_recall": 0.3241484427790535, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 150625 }, { "epoch": 28.92, "eval_arxiv_accuracy": 0.35090625, "eval_arxiv_bleu_score": 4.613563488489597, "eval_arxiv_bleu_score_sem": 0.13203620394768947, "eval_arxiv_emb_cos_sim": 0.7815990447998047, "eval_arxiv_emb_cos_sim_sem": 0.006599197464787395, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3649280071258545, "eval_arxiv_n_ngrams_match_1": 15.636, "eval_arxiv_n_ngrams_match_2": 3.14, "eval_arxiv_n_ngrams_match_3": 0.748, "eval_arxiv_num_pred_words": 41.43, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.9314143683995, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36972113661200434, "eval_arxiv_runtime": 10.4743, "eval_arxiv_samples_per_second": 47.736, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.36380014420776124, "eval_arxiv_token_set_f1_sem": 0.004212665916970724, "eval_arxiv_token_set_precision": 0.31665380363810286, "eval_arxiv_token_set_recall": 0.44348842155375345, "eval_arxiv_true_num_tokens": 64.0, "step": 150625 }, { "epoch": 28.92, "eval_python_code_alpaca_accuracy": 0.16103125, "eval_python_code_alpaca_bleu_score": 4.778076875730435, "eval_python_code_alpaca_bleu_score_sem": 0.1524799074416788, "eval_python_code_alpaca_emb_cos_sim": 0.7549819946289062, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00910292902494353, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.882640838623047, "eval_python_code_alpaca_n_ngrams_match_1": 9.906, "eval_python_code_alpaca_n_ngrams_match_2": 3.056, "eval_python_code_alpaca_n_ngrams_match_3": 1.056, "eval_python_code_alpaca_num_pred_words": 43.728, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.861379973609917, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3354974984194967, "eval_python_code_alpaca_runtime": 10.0086, "eval_python_code_alpaca_samples_per_second": 49.957, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.47508127806752826, "eval_python_code_alpaca_token_set_f1_sem": 0.0054941455858405425, "eval_python_code_alpaca_token_set_precision": 0.5439744361355182, "eval_python_code_alpaca_token_set_recall": 0.441518002428255, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 150625 }, { "epoch": 28.92, "eval_wikibio_accuracy": 0.32446875, "eval_wikibio_bleu_score": 5.6012696989931285, "eval_wikibio_bleu_score_sem": 0.19973431937002056, "eval_wikibio_emb_cos_sim": 0.7597984075546265, "eval_wikibio_emb_cos_sim_sem": 0.008000335199217652, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7035703659057617, "eval_wikibio_n_ngrams_match_1": 9.932, "eval_wikibio_n_ngrams_match_2": 3.232, "eval_wikibio_n_ngrams_match_3": 1.158, "eval_wikibio_num_pred_words": 36.442, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.59197414488657, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3504889782888023, "eval_wikibio_runtime": 10.0745, "eval_wikibio_samples_per_second": 49.63, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.31785949312849954, "eval_wikibio_token_set_f1_sem": 0.0053684517034468734, "eval_wikibio_token_set_precision": 0.324917014408283, "eval_wikibio_token_set_recall": 0.32865650845956945, "eval_wikibio_true_num_tokens": 61.1328125, "step": 150625 }, { "epoch": 28.92, "eval_nq_accuracy": 0.52971875, "eval_nq_bleu_score": 11.767434355091405, "eval_nq_bleu_score_sem": 0.4767528394721157, "eval_nq_emb_cos_sim": 0.8367584943771362, "eval_nq_emb_cos_sim_sem": 0.00808737436829322, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1646652221679688, "eval_nq_n_ngrams_match_1": 23.14, "eval_nq_n_ngrams_match_2": 8.458, "eval_nq_n_ngrams_match_3": 3.94, "eval_nq_num_pred_words": 49.108, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.71168494960973, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4493522713913332, "eval_nq_runtime": 10.8307, "eval_nq_samples_per_second": 46.165, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.46213798915874227, "eval_nq_token_set_f1_sem": 0.0048903057913209326, "eval_nq_token_set_precision": 0.41949699954193087, "eval_nq_token_set_recall": 0.5234541896359106, "eval_nq_true_num_tokens": 64.0, "step": 150625 }, { "epoch": 28.92, "learning_rate": 0.001, "loss": 2.5482, "step": 150636 }, { "epoch": 28.93, "learning_rate": 0.001, "loss": 2.5355, "step": 150648 }, { "epoch": 28.93, "learning_rate": 0.001, "loss": 2.5374, "step": 150660 }, { "epoch": 28.93, "learning_rate": 0.001, "loss": 2.5314, "step": 150672 }, { "epoch": 28.93, "learning_rate": 0.001, "loss": 2.5472, "step": 150684 }, { "epoch": 28.94, "learning_rate": 0.001, "loss": 2.5384, "step": 150696 }, { "epoch": 28.94, "learning_rate": 0.001, "loss": 2.5426, "step": 150708 }, { "epoch": 28.94, "learning_rate": 0.001, "loss": 2.5484, "step": 150720 }, { "epoch": 28.94, "learning_rate": 0.001, "loss": 2.5528, "step": 150732 }, { "epoch": 28.94, "learning_rate": 0.001, "loss": 2.5344, "step": 150744 }, { "epoch": 28.95, "learning_rate": 0.001, "loss": 2.5384, "step": 150756 }, { "epoch": 28.95, "learning_rate": 0.001, "loss": 2.5318, "step": 150768 }, { "epoch": 28.95, "learning_rate": 0.001, "loss": 2.5415, "step": 150780 }, { "epoch": 28.95, "learning_rate": 0.001, "loss": 2.5446, "step": 150792 }, { "epoch": 28.96, "learning_rate": 0.001, "loss": 2.5438, "step": 150804 }, { "epoch": 28.96, "learning_rate": 0.001, "loss": 2.5379, "step": 150816 }, { "epoch": 28.96, "learning_rate": 0.001, "loss": 2.544, "step": 150828 }, { "epoch": 28.96, "learning_rate": 0.001, "loss": 2.5372, "step": 150840 }, { "epoch": 28.97, "learning_rate": 0.001, "loss": 2.5396, "step": 150852 }, { "epoch": 28.97, "learning_rate": 0.001, "loss": 2.5463, "step": 150864 }, { "epoch": 28.97, "learning_rate": 0.001, "loss": 2.5496, "step": 150876 }, { "epoch": 28.97, "learning_rate": 0.001, "loss": 2.5415, "step": 150888 }, { "epoch": 28.97, "learning_rate": 0.001, "loss": 2.544, "step": 150900 }, { "epoch": 28.98, "learning_rate": 0.001, "loss": 2.5448, "step": 150912 }, { "epoch": 28.98, "learning_rate": 0.001, "loss": 2.5476, "step": 150924 }, { "epoch": 28.98, "learning_rate": 0.001, "loss": 2.5536, "step": 150936 }, { "epoch": 28.98, "learning_rate": 0.001, "loss": 2.55, "step": 150948 }, { "epoch": 28.99, "learning_rate": 0.001, "loss": 2.5567, "step": 150960 }, { "epoch": 28.99, "learning_rate": 0.001, "loss": 2.5452, "step": 150972 }, { "epoch": 28.99, "learning_rate": 0.001, "loss": 2.5576, "step": 150984 }, { "epoch": 28.99, "learning_rate": 0.001, "loss": 2.5472, "step": 150996 }, { "epoch": 29.0, "learning_rate": 0.001, "loss": 2.5341, "step": 151008 }, { "epoch": 29.0, "learning_rate": 0.001, "loss": 2.5476, "step": 151020 }, { "epoch": 29.0, "learning_rate": 0.001, "loss": 2.5569, "step": 151032 }, { "epoch": 29.0, "learning_rate": 0.001, "loss": 2.5311, "step": 151044 }, { "epoch": 29.0, "learning_rate": 0.001, "loss": 2.5384, "step": 151056 }, { "epoch": 29.01, "learning_rate": 0.001, "loss": 2.5299, "step": 151068 }, { "epoch": 29.01, "learning_rate": 0.001, "loss": 2.5156, "step": 151080 }, { "epoch": 29.01, "learning_rate": 0.001, "loss": 2.5281, "step": 151092 }, { "epoch": 29.01, "learning_rate": 0.001, "loss": 2.5237, "step": 151104 }, { "epoch": 29.02, "learning_rate": 0.001, "loss": 2.5177, "step": 151116 }, { "epoch": 29.02, "learning_rate": 0.001, "loss": 2.5295, "step": 151128 }, { "epoch": 29.02, "learning_rate": 0.001, "loss": 2.5181, "step": 151140 }, { "epoch": 29.02, "learning_rate": 0.001, "loss": 2.5274, "step": 151152 }, { "epoch": 29.03, "learning_rate": 0.001, "loss": 2.529, "step": 151164 }, { "epoch": 29.03, "learning_rate": 0.001, "loss": 2.5299, "step": 151176 }, { "epoch": 29.03, "learning_rate": 0.001, "loss": 2.5282, "step": 151188 }, { "epoch": 29.03, "learning_rate": 0.001, "loss": 2.5195, "step": 151200 }, { "epoch": 29.03, "learning_rate": 0.001, "loss": 2.534, "step": 151212 }, { "epoch": 29.04, "learning_rate": 0.001, "loss": 2.5374, "step": 151224 }, { "epoch": 29.04, "learning_rate": 0.001, "loss": 2.5287, "step": 151236 }, { "epoch": 29.04, "learning_rate": 0.001, "loss": 2.5347, "step": 151248 }, { "epoch": 29.04, "eval_ag_news_accuracy": 0.3245, "eval_ag_news_bleu_score": 4.844542290952034, "eval_ag_news_bleu_score_sem": 0.1438908421436427, "eval_ag_news_emb_cos_sim": 0.8165936470031738, "eval_ag_news_emb_cos_sim_sem": 0.007086061395172543, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.522404670715332, "eval_ag_news_n_ngrams_match_1": 14.45, "eval_ag_news_n_ngrams_match_2": 3.27, "eval_ag_news_n_ngrams_match_3": 0.912, "eval_ag_news_num_pred_words": 46.952, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.86576664618892, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3578247568636002, "eval_ag_news_runtime": 10.6168, "eval_ag_news_samples_per_second": 47.095, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.3582750992665711, "eval_ag_news_token_set_f1_sem": 0.004282706102249708, "eval_ag_news_token_set_precision": 0.34616911542423856, "eval_ag_news_token_set_recall": 0.3862545126528324, "eval_ag_news_true_num_tokens": 56.09375, "step": 151250 }, { "epoch": 29.04, "eval_anthropic_toxic_prompts_accuracy": 0.11459375, "eval_anthropic_toxic_prompts_bleu_score": 3.2409256335003986, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12571358938033889, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6790622472763062, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009037667264202444, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2250733375549316, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.044, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.786, "eval_anthropic_toxic_prompts_num_pred_words": 48.104, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.15541892508935, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21603219162426923, "eval_anthropic_toxic_prompts_runtime": 10.2764, "eval_anthropic_toxic_prompts_samples_per_second": 48.655, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3552492838968309, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006414730604567983, "eval_anthropic_toxic_prompts_token_set_precision": 0.44905100122164215, "eval_anthropic_toxic_prompts_token_set_recall": 0.3194184182004095, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 151250 }, { "epoch": 29.04, "eval_arxiv_accuracy": 0.35028125, "eval_arxiv_bleu_score": 4.420792034312053, "eval_arxiv_bleu_score_sem": 0.12908881180779902, "eval_arxiv_emb_cos_sim": 0.7626398205757141, "eval_arxiv_emb_cos_sim_sem": 0.009779892364270019, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3740620613098145, "eval_arxiv_n_ngrams_match_1": 15.22, "eval_arxiv_n_ngrams_match_2": 3.034, "eval_arxiv_n_ngrams_match_3": 0.696, "eval_arxiv_num_pred_words": 40.434, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.196886045490295, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36513158991505124, "eval_arxiv_runtime": 10.3092, "eval_arxiv_samples_per_second": 48.501, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.35699568139393784, "eval_arxiv_token_set_f1_sem": 0.004389136280561084, "eval_arxiv_token_set_precision": 0.3102832813633865, "eval_arxiv_token_set_recall": 0.43479062949989744, "eval_arxiv_true_num_tokens": 64.0, "step": 151250 }, { "epoch": 29.04, "eval_python_code_alpaca_accuracy": 0.161, "eval_python_code_alpaca_bleu_score": 4.604428890700413, "eval_python_code_alpaca_bleu_score_sem": 0.1443318173906819, "eval_python_code_alpaca_emb_cos_sim": 0.7585429549217224, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008154448443520276, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8876655101776123, "eval_python_code_alpaca_n_ngrams_match_1": 9.912, "eval_python_code_alpaca_n_ngrams_match_2": 2.892, "eval_python_code_alpaca_n_ngrams_match_3": 0.982, "eval_python_code_alpaca_num_pred_words": 43.624, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.95135339563683, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3371788273305638, "eval_python_code_alpaca_runtime": 10.0568, "eval_python_code_alpaca_samples_per_second": 49.718, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.4795367750842602, "eval_python_code_alpaca_token_set_f1_sem": 0.005504445672513359, "eval_python_code_alpaca_token_set_precision": 0.5424712281321773, "eval_python_code_alpaca_token_set_recall": 0.44800150958865204, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 151250 }, { "epoch": 29.04, "eval_wikibio_accuracy": 0.3276875, "eval_wikibio_bleu_score": 6.104190922202895, "eval_wikibio_bleu_score_sem": 0.2098002540460547, "eval_wikibio_emb_cos_sim": 0.7561072111129761, "eval_wikibio_emb_cos_sim_sem": 0.007763181468091931, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7275009155273438, "eval_wikibio_n_ngrams_match_1": 10.422, "eval_wikibio_n_ngrams_match_2": 3.576, "eval_wikibio_n_ngrams_match_3": 1.276, "eval_wikibio_num_pred_words": 36.608, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.57507859546454, "eval_wikibio_pred_num_tokens": 62.9609375, "eval_wikibio_rouge_score": 0.36571155693257246, "eval_wikibio_runtime": 10.1138, "eval_wikibio_samples_per_second": 49.437, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.32794561751383144, "eval_wikibio_token_set_f1_sem": 0.005204679436361614, "eval_wikibio_token_set_precision": 0.3378503939148683, "eval_wikibio_token_set_recall": 0.33316220151351056, "eval_wikibio_true_num_tokens": 61.1328125, "step": 151250 }, { "epoch": 29.04, "eval_nq_accuracy": 0.5314375, "eval_nq_bleu_score": 11.71077779044643, "eval_nq_bleu_score_sem": 0.47619231100809106, "eval_nq_emb_cos_sim": 0.8266245722770691, "eval_nq_emb_cos_sim_sem": 0.007954045447207039, "eval_nq_emb_top1_equal": 0.21875, "eval_nq_emb_top1_equal_sem": 0.03668319712192295, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.164546489715576, "eval_nq_n_ngrams_match_1": 23.03, "eval_nq_n_ngrams_match_2": 8.458, "eval_nq_n_ngrams_match_3": 3.884, "eval_nq_num_pred_words": 49.078, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.710650651294795, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44635495070588516, "eval_nq_runtime": 10.4427, "eval_nq_samples_per_second": 47.88, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4605788623881391, "eval_nq_token_set_f1_sem": 0.004967753861172408, "eval_nq_token_set_precision": 0.4196247129328556, "eval_nq_token_set_recall": 0.518356159964923, "eval_nq_true_num_tokens": 64.0, "step": 151250 }, { "epoch": 29.04, "learning_rate": 0.001, "loss": 2.5244, "step": 151260 }, { "epoch": 29.05, "learning_rate": 0.001, "loss": 2.5245, "step": 151272 }, { "epoch": 29.05, "learning_rate": 0.001, "loss": 2.5217, "step": 151284 }, { "epoch": 29.05, "learning_rate": 0.001, "loss": 2.5337, "step": 151296 }, { "epoch": 29.05, "learning_rate": 0.001, "loss": 2.5301, "step": 151308 }, { "epoch": 29.06, "learning_rate": 0.001, "loss": 2.5179, "step": 151320 }, { "epoch": 29.06, "learning_rate": 0.001, "loss": 2.5264, "step": 151332 }, { "epoch": 29.06, "learning_rate": 0.001, "loss": 2.5273, "step": 151344 }, { "epoch": 29.06, "learning_rate": 0.001, "loss": 2.5169, "step": 151356 }, { "epoch": 29.06, "learning_rate": 0.001, "loss": 2.5283, "step": 151368 }, { "epoch": 29.07, "learning_rate": 0.001, "loss": 2.5361, "step": 151380 }, { "epoch": 29.07, "learning_rate": 0.001, "loss": 2.5184, "step": 151392 }, { "epoch": 29.07, "learning_rate": 0.001, "loss": 2.5266, "step": 151404 }, { "epoch": 29.07, "learning_rate": 0.001, "loss": 2.5361, "step": 151416 }, { "epoch": 29.08, "learning_rate": 0.001, "loss": 2.5266, "step": 151428 }, { "epoch": 29.08, "learning_rate": 0.001, "loss": 2.5206, "step": 151440 }, { "epoch": 29.08, "learning_rate": 0.001, "loss": 2.5217, "step": 151452 }, { "epoch": 29.08, "learning_rate": 0.001, "loss": 2.53, "step": 151464 }, { "epoch": 29.09, "learning_rate": 0.001, "loss": 2.535, "step": 151476 }, { "epoch": 29.09, "learning_rate": 0.001, "loss": 2.5392, "step": 151488 }, { "epoch": 29.09, "learning_rate": 0.001, "loss": 2.5296, "step": 151500 }, { "epoch": 29.09, "learning_rate": 0.001, "loss": 2.5331, "step": 151512 }, { "epoch": 29.09, "learning_rate": 0.001, "loss": 2.5319, "step": 151524 }, { "epoch": 29.1, "learning_rate": 0.001, "loss": 2.5273, "step": 151536 }, { "epoch": 29.1, "learning_rate": 0.001, "loss": 2.5283, "step": 151548 }, { "epoch": 29.1, "learning_rate": 0.001, "loss": 2.5343, "step": 151560 }, { "epoch": 29.1, "learning_rate": 0.001, "loss": 2.5331, "step": 151572 }, { "epoch": 29.11, "learning_rate": 0.001, "loss": 2.5268, "step": 151584 }, { "epoch": 29.11, "learning_rate": 0.001, "loss": 2.5263, "step": 151596 }, { "epoch": 29.11, "learning_rate": 0.001, "loss": 2.5348, "step": 151608 }, { "epoch": 29.11, "learning_rate": 0.001, "loss": 2.5278, "step": 151620 }, { "epoch": 29.12, "learning_rate": 0.001, "loss": 2.529, "step": 151632 }, { "epoch": 29.12, "learning_rate": 0.001, "loss": 2.5368, "step": 151644 }, { "epoch": 29.12, "learning_rate": 0.001, "loss": 2.5346, "step": 151656 }, { "epoch": 29.12, "learning_rate": 0.001, "loss": 2.5388, "step": 151668 }, { "epoch": 29.12, "learning_rate": 0.001, "loss": 2.5271, "step": 151680 }, { "epoch": 29.13, "learning_rate": 0.001, "loss": 2.528, "step": 151692 }, { "epoch": 29.13, "learning_rate": 0.001, "loss": 2.5302, "step": 151704 }, { "epoch": 29.13, "learning_rate": 0.001, "loss": 2.5436, "step": 151716 }, { "epoch": 29.13, "learning_rate": 0.001, "loss": 2.5347, "step": 151728 }, { "epoch": 29.14, "learning_rate": 0.001, "loss": 2.5237, "step": 151740 }, { "epoch": 29.14, "learning_rate": 0.001, "loss": 2.5271, "step": 151752 }, { "epoch": 29.14, "learning_rate": 0.001, "loss": 2.5312, "step": 151764 }, { "epoch": 29.14, "learning_rate": 0.001, "loss": 2.5425, "step": 151776 }, { "epoch": 29.15, "learning_rate": 0.001, "loss": 2.5345, "step": 151788 }, { "epoch": 29.15, "learning_rate": 0.001, "loss": 2.5285, "step": 151800 }, { "epoch": 29.15, "learning_rate": 0.001, "loss": 2.5365, "step": 151812 }, { "epoch": 29.15, "learning_rate": 0.001, "loss": 2.5308, "step": 151824 }, { "epoch": 29.15, "learning_rate": 0.001, "loss": 2.5356, "step": 151836 }, { "epoch": 29.16, "learning_rate": 0.001, "loss": 2.524, "step": 151848 }, { "epoch": 29.16, "learning_rate": 0.001, "loss": 2.5281, "step": 151860 }, { "epoch": 29.16, "learning_rate": 0.001, "loss": 2.5337, "step": 151872 }, { "epoch": 29.16, "eval_ag_news_accuracy": 0.3241875, "eval_ag_news_bleu_score": 5.000084158728007, "eval_ag_news_bleu_score_sem": 0.1589416555240998, "eval_ag_news_emb_cos_sim": 0.817887008190155, "eval_ag_news_emb_cos_sim_sem": 0.006153123122682188, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.51975154876709, "eval_ag_news_n_ngrams_match_1": 14.144, "eval_ag_news_n_ngrams_match_2": 3.234, "eval_ag_news_n_ngrams_match_3": 0.95, "eval_ag_news_num_pred_words": 46.55, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.77603572358105, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3541474022600284, "eval_ag_news_runtime": 10.589, "eval_ag_news_samples_per_second": 47.219, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.35428814364454886, "eval_ag_news_token_set_f1_sem": 0.004478389152801508, "eval_ag_news_token_set_precision": 0.3374124046608325, "eval_ag_news_token_set_recall": 0.3881005435868758, "eval_ag_news_true_num_tokens": 56.09375, "step": 151875 }, { "epoch": 29.16, "eval_anthropic_toxic_prompts_accuracy": 0.1148125, "eval_anthropic_toxic_prompts_bleu_score": 3.1510851672400513, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11870211895287368, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6789723634719849, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009058141412955627, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2354660034179688, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.226, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.992, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758, "eval_anthropic_toxic_prompts_num_pred_words": 48.186, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.418213993977535, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21109292456923273, "eval_anthropic_toxic_prompts_runtime": 10.1236, "eval_anthropic_toxic_prompts_samples_per_second": 49.39, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.3611426957589993, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006721470213182206, "eval_anthropic_toxic_prompts_token_set_precision": 0.44139786066338826, "eval_anthropic_toxic_prompts_token_set_recall": 0.3322797000601225, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 151875 }, { "epoch": 29.16, "eval_arxiv_accuracy": 0.35125, "eval_arxiv_bleu_score": 4.538700267590577, "eval_arxiv_bleu_score_sem": 0.13352372088967007, "eval_arxiv_emb_cos_sim": 0.7694689035415649, "eval_arxiv_emb_cos_sim_sem": 0.009277152242886423, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3711049556732178, "eval_arxiv_n_ngrams_match_1": 15.302, "eval_arxiv_n_ngrams_match_2": 3.124, "eval_arxiv_n_ngrams_match_3": 0.75, "eval_arxiv_num_pred_words": 40.328, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.110675299158352, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36464202466603113, "eval_arxiv_runtime": 10.899, "eval_arxiv_samples_per_second": 45.876, "eval_arxiv_steps_per_second": 0.092, "eval_arxiv_token_set_f1": 0.357238884925682, "eval_arxiv_token_set_f1_sem": 0.004458090399046446, "eval_arxiv_token_set_precision": 0.3092882860631226, "eval_arxiv_token_set_recall": 0.4381859232065277, "eval_arxiv_true_num_tokens": 64.0, "step": 151875 }, { "epoch": 29.16, "eval_python_code_alpaca_accuracy": 0.160625, "eval_python_code_alpaca_bleu_score": 4.545048138710379, "eval_python_code_alpaca_bleu_score_sem": 0.1453256164865113, "eval_python_code_alpaca_emb_cos_sim": 0.7475377917289734, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009837034271227679, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8917899131774902, "eval_python_code_alpaca_n_ngrams_match_1": 9.738, "eval_python_code_alpaca_n_ngrams_match_2": 2.862, "eval_python_code_alpaca_n_ngrams_match_3": 0.986, "eval_python_code_alpaca_num_pred_words": 43.544, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.025544904102944, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33105729840874065, "eval_python_code_alpaca_runtime": 9.9601, "eval_python_code_alpaca_samples_per_second": 50.2, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4723053265610159, "eval_python_code_alpaca_token_set_f1_sem": 0.006014967568017886, "eval_python_code_alpaca_token_set_precision": 0.5321521219672057, "eval_python_code_alpaca_token_set_recall": 0.4538138564617063, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 151875 }, { "epoch": 29.16, "eval_wikibio_accuracy": 0.3276875, "eval_wikibio_bleu_score": 6.086745486686603, "eval_wikibio_bleu_score_sem": 0.2122527282361847, "eval_wikibio_emb_cos_sim": 0.7429611682891846, "eval_wikibio_emb_cos_sim_sem": 0.008319091814505544, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7398264408111572, "eval_wikibio_n_ngrams_match_1": 10.28, "eval_wikibio_n_ngrams_match_2": 3.468, "eval_wikibio_n_ngrams_match_3": 1.268, "eval_wikibio_num_pred_words": 36.462, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 42.090684305989775, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36243806599945533, "eval_wikibio_runtime": 10.2282, "eval_wikibio_samples_per_second": 48.885, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.3258695637764661, "eval_wikibio_token_set_f1_sem": 0.005100899580146746, "eval_wikibio_token_set_precision": 0.335925905970325, "eval_wikibio_token_set_recall": 0.3308497906325442, "eval_wikibio_true_num_tokens": 61.1328125, "step": 151875 }, { "epoch": 29.16, "eval_nq_accuracy": 0.53078125, "eval_nq_bleu_score": 11.680687711048577, "eval_nq_bleu_score_sem": 0.47933077030978827, "eval_nq_emb_cos_sim": 0.8354138731956482, "eval_nq_emb_cos_sim_sem": 0.0065807114619038124, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.163125991821289, "eval_nq_n_ngrams_match_1": 23.024, "eval_nq_n_ngrams_match_2": 8.48, "eval_nq_n_ngrams_match_3": 3.9, "eval_nq_num_pred_words": 48.906, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.698285974464595, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4476638171724451, "eval_nq_runtime": 10.3472, "eval_nq_samples_per_second": 48.322, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.46071239013233994, "eval_nq_token_set_f1_sem": 0.005077971257057672, "eval_nq_token_set_precision": 0.4185660630895155, "eval_nq_token_set_recall": 0.5199926249950194, "eval_nq_true_num_tokens": 64.0, "step": 151875 }, { "epoch": 29.16, "learning_rate": 0.001, "loss": 2.5317, "step": 151884 }, { "epoch": 29.17, "learning_rate": 0.001, "loss": 2.52, "step": 151896 }, { "epoch": 29.17, "learning_rate": 0.001, "loss": 2.5266, "step": 151908 }, { "epoch": 29.17, "learning_rate": 0.001, "loss": 2.5393, "step": 151920 }, { "epoch": 29.17, "learning_rate": 0.001, "loss": 2.5375, "step": 151932 }, { "epoch": 29.18, "learning_rate": 0.001, "loss": 2.5261, "step": 151944 }, { "epoch": 29.18, "learning_rate": 0.001, "loss": 2.5321, "step": 151956 }, { "epoch": 29.18, "learning_rate": 0.001, "loss": 2.5309, "step": 151968 }, { "epoch": 29.18, "learning_rate": 0.001, "loss": 2.5254, "step": 151980 }, { "epoch": 29.18, "learning_rate": 0.001, "loss": 2.5432, "step": 151992 }, { "epoch": 29.19, "learning_rate": 0.001, "loss": 2.5323, "step": 152004 }, { "epoch": 29.19, "learning_rate": 0.001, "loss": 2.5386, "step": 152016 }, { "epoch": 29.19, "learning_rate": 0.001, "loss": 2.5255, "step": 152028 }, { "epoch": 29.19, "learning_rate": 0.001, "loss": 2.5317, "step": 152040 }, { "epoch": 29.2, "learning_rate": 0.001, "loss": 2.5431, "step": 152052 }, { "epoch": 29.2, "learning_rate": 0.001, "loss": 2.5364, "step": 152064 }, { "epoch": 29.2, "learning_rate": 0.001, "loss": 2.5277, "step": 152076 }, { "epoch": 29.2, "learning_rate": 0.001, "loss": 2.5306, "step": 152088 }, { "epoch": 29.21, "learning_rate": 0.001, "loss": 2.5384, "step": 152100 }, { "epoch": 29.21, "learning_rate": 0.001, "loss": 2.5227, "step": 152112 }, { "epoch": 29.21, "learning_rate": 0.001, "loss": 2.532, "step": 152124 }, { "epoch": 29.21, "learning_rate": 0.001, "loss": 2.5341, "step": 152136 }, { "epoch": 29.21, "learning_rate": 0.001, "loss": 2.5299, "step": 152148 }, { "epoch": 29.22, "learning_rate": 0.001, "loss": 2.5327, "step": 152160 }, { "epoch": 29.22, "learning_rate": 0.001, "loss": 2.5329, "step": 152172 }, { "epoch": 29.22, "learning_rate": 0.001, "loss": 2.5378, "step": 152184 }, { "epoch": 29.22, "learning_rate": 0.001, "loss": 2.5376, "step": 152196 }, { "epoch": 29.23, "learning_rate": 0.001, "loss": 2.5288, "step": 152208 }, { "epoch": 29.23, "learning_rate": 0.001, "loss": 2.5301, "step": 152220 }, { "epoch": 29.23, "learning_rate": 0.001, "loss": 2.5331, "step": 152232 }, { "epoch": 29.23, "learning_rate": 0.001, "loss": 2.5395, "step": 152244 }, { "epoch": 29.24, "learning_rate": 0.001, "loss": 2.5368, "step": 152256 }, { "epoch": 29.24, "learning_rate": 0.001, "loss": 2.5318, "step": 152268 }, { "epoch": 29.24, "learning_rate": 0.001, "loss": 2.5376, "step": 152280 }, { "epoch": 29.24, "learning_rate": 0.001, "loss": 2.5348, "step": 152292 }, { "epoch": 29.24, "learning_rate": 0.001, "loss": 2.5332, "step": 152304 }, { "epoch": 29.25, "learning_rate": 0.001, "loss": 2.5256, "step": 152316 }, { "epoch": 29.25, "learning_rate": 0.001, "loss": 2.5351, "step": 152328 }, { "epoch": 29.25, "learning_rate": 0.001, "loss": 2.544, "step": 152340 }, { "epoch": 29.25, "learning_rate": 0.001, "loss": 2.5265, "step": 152352 }, { "epoch": 29.26, "learning_rate": 0.001, "loss": 2.5325, "step": 152364 }, { "epoch": 29.26, "learning_rate": 0.001, "loss": 2.5382, "step": 152376 }, { "epoch": 29.26, "learning_rate": 0.001, "loss": 2.5398, "step": 152388 }, { "epoch": 29.26, "learning_rate": 0.001, "loss": 2.524, "step": 152400 }, { "epoch": 29.26, "learning_rate": 0.001, "loss": 2.5398, "step": 152412 }, { "epoch": 29.27, "learning_rate": 0.001, "loss": 2.5366, "step": 152424 }, { "epoch": 29.27, "learning_rate": 0.001, "loss": 2.5282, "step": 152436 }, { "epoch": 29.27, "learning_rate": 0.001, "loss": 2.5317, "step": 152448 }, { "epoch": 29.27, "learning_rate": 0.001, "loss": 2.5351, "step": 152460 }, { "epoch": 29.28, "learning_rate": 0.001, "loss": 2.5366, "step": 152472 }, { "epoch": 29.28, "learning_rate": 0.001, "loss": 2.5367, "step": 152484 }, { "epoch": 29.28, "learning_rate": 0.001, "loss": 2.5276, "step": 152496 }, { "epoch": 29.28, "eval_ag_news_accuracy": 0.32571875, "eval_ag_news_bleu_score": 4.8357952399709845, "eval_ag_news_bleu_score_sem": 0.15108849088612306, "eval_ag_news_emb_cos_sim": 0.8196853995323181, "eval_ag_news_emb_cos_sim_sem": 0.006709633403142574, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.519531488418579, "eval_ag_news_n_ngrams_match_1": 14.172, "eval_ag_news_n_ngrams_match_2": 3.216, "eval_ag_news_n_ngrams_match_3": 0.914, "eval_ag_news_num_pred_words": 46.554, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.76860377515698, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3521433167175206, "eval_ag_news_runtime": 10.3514, "eval_ag_news_samples_per_second": 48.302, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3526650207991813, "eval_ag_news_token_set_f1_sem": 0.004478019878389956, "eval_ag_news_token_set_precision": 0.33673514573567986, "eval_ag_news_token_set_recall": 0.38482114364734127, "eval_ag_news_true_num_tokens": 56.09375, "step": 152500 }, { "epoch": 29.28, "eval_anthropic_toxic_prompts_accuracy": 0.11440625, "eval_anthropic_toxic_prompts_bleu_score": 3.2041181389940774, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12345400482851772, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6702789068222046, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009755586145024885, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2476284503936768, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.302, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, "eval_anthropic_toxic_prompts_num_pred_words": 47.616, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.729249315044623, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2155460628838602, "eval_anthropic_toxic_prompts_runtime": 9.836, "eval_anthropic_toxic_prompts_samples_per_second": 50.833, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.36339760380715835, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006638544596447055, "eval_anthropic_toxic_prompts_token_set_precision": 0.4448925712023457, "eval_anthropic_toxic_prompts_token_set_recall": 0.33426795248847874, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 152500 }, { "epoch": 29.28, "eval_arxiv_accuracy": 0.34853125, "eval_arxiv_bleu_score": 4.392240454264218, "eval_arxiv_bleu_score_sem": 0.12801832934646346, "eval_arxiv_emb_cos_sim": 0.7635323405265808, "eval_arxiv_emb_cos_sim_sem": 0.007354411038185914, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3708224296569824, "eval_arxiv_n_ngrams_match_1": 15.086, "eval_arxiv_n_ngrams_match_2": 2.958, "eval_arxiv_n_ngrams_match_3": 0.684, "eval_arxiv_num_pred_words": 40.34, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.10245193774763, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.359936562999149, "eval_arxiv_runtime": 10.218, "eval_arxiv_samples_per_second": 48.933, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3514872651538017, "eval_arxiv_token_set_f1_sem": 0.00444384618200801, "eval_arxiv_token_set_precision": 0.305201980315453, "eval_arxiv_token_set_recall": 0.429694570079846, "eval_arxiv_true_num_tokens": 64.0, "step": 152500 }, { "epoch": 29.28, "eval_python_code_alpaca_accuracy": 0.16125, "eval_python_code_alpaca_bleu_score": 4.708651767662293, "eval_python_code_alpaca_bleu_score_sem": 0.1450505840872041, "eval_python_code_alpaca_emb_cos_sim": 0.7512420415878296, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009261219417060288, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.871154308319092, "eval_python_code_alpaca_n_ngrams_match_1": 9.822, "eval_python_code_alpaca_n_ngrams_match_2": 2.946, "eval_python_code_alpaca_n_ngrams_match_3": 1.01, "eval_python_code_alpaca_num_pred_words": 43.238, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.65738851121748, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3345757801778281, "eval_python_code_alpaca_runtime": 9.7852, "eval_python_code_alpaca_samples_per_second": 51.098, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4749499215819101, "eval_python_code_alpaca_token_set_f1_sem": 0.005438912566046437, "eval_python_code_alpaca_token_set_precision": 0.5363635997315532, "eval_python_code_alpaca_token_set_recall": 0.4467668670965634, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 152500 }, { "epoch": 29.28, "eval_wikibio_accuracy": 0.3243125, "eval_wikibio_bleu_score": 6.170355489342777, "eval_wikibio_bleu_score_sem": 0.21626342709264484, "eval_wikibio_emb_cos_sim": 0.7447891235351562, "eval_wikibio_emb_cos_sim_sem": 0.00958461481986241, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7073071002960205, "eval_wikibio_n_ngrams_match_1": 10.262, "eval_wikibio_n_ngrams_match_2": 3.528, "eval_wikibio_n_ngrams_match_3": 1.276, "eval_wikibio_num_pred_words": 36.562, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.74393932056487, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3619102206975344, "eval_wikibio_runtime": 9.8393, "eval_wikibio_samples_per_second": 50.816, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.32288781745287093, "eval_wikibio_token_set_f1_sem": 0.005292243994634853, "eval_wikibio_token_set_precision": 0.3318492022677943, "eval_wikibio_token_set_recall": 0.33011026734187837, "eval_wikibio_true_num_tokens": 61.1328125, "step": 152500 }, { "epoch": 29.28, "eval_nq_accuracy": 0.5313125, "eval_nq_bleu_score": 11.604526963423785, "eval_nq_bleu_score_sem": 0.47233919225509874, "eval_nq_emb_cos_sim": 0.8328354358673096, "eval_nq_emb_cos_sim_sem": 0.006959335041323397, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1629464626312256, "eval_nq_n_ngrams_match_1": 23.1, "eval_nq_n_ngrams_match_2": 8.412, "eval_nq_n_ngrams_match_3": 3.856, "eval_nq_num_pred_words": 48.816, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.696724518396325, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4484148132987383, "eval_nq_runtime": 10.8999, "eval_nq_samples_per_second": 45.872, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.4622161706914341, "eval_nq_token_set_f1_sem": 0.004978477992915881, "eval_nq_token_set_precision": 0.4203228620083173, "eval_nq_token_set_recall": 0.5211142140551582, "eval_nq_true_num_tokens": 64.0, "step": 152500 }, { "epoch": 29.28, "learning_rate": 0.001, "loss": 2.5281, "step": 152508 }, { "epoch": 29.29, "learning_rate": 0.001, "loss": 2.531, "step": 152520 }, { "epoch": 29.29, "learning_rate": 0.001, "loss": 2.5323, "step": 152532 }, { "epoch": 29.29, "learning_rate": 0.001, "loss": 2.5331, "step": 152544 }, { "epoch": 29.29, "learning_rate": 0.001, "loss": 2.5316, "step": 152556 }, { "epoch": 29.29, "learning_rate": 0.001, "loss": 2.5349, "step": 152568 }, { "epoch": 29.3, "learning_rate": 0.001, "loss": 2.535, "step": 152580 }, { "epoch": 29.3, "learning_rate": 0.001, "loss": 2.5324, "step": 152592 }, { "epoch": 29.3, "learning_rate": 0.001, "loss": 2.5388, "step": 152604 }, { "epoch": 29.3, "learning_rate": 0.001, "loss": 2.5255, "step": 152616 }, { "epoch": 29.31, "learning_rate": 0.001, "loss": 2.5367, "step": 152628 }, { "epoch": 29.31, "learning_rate": 0.001, "loss": 2.5344, "step": 152640 }, { "epoch": 29.31, "learning_rate": 0.001, "loss": 2.5522, "step": 152652 }, { "epoch": 29.31, "learning_rate": 0.001, "loss": 2.5355, "step": 152664 }, { "epoch": 29.32, "learning_rate": 0.001, "loss": 2.5306, "step": 152676 }, { "epoch": 29.32, "learning_rate": 0.001, "loss": 2.5464, "step": 152688 }, { "epoch": 29.32, "learning_rate": 0.001, "loss": 2.5358, "step": 152700 }, { "epoch": 29.32, "learning_rate": 0.001, "loss": 2.5378, "step": 152712 }, { "epoch": 29.32, "learning_rate": 0.001, "loss": 2.5367, "step": 152724 }, { "epoch": 29.33, "learning_rate": 0.001, "loss": 2.5264, "step": 152736 }, { "epoch": 29.33, "learning_rate": 0.001, "loss": 2.5321, "step": 152748 }, { "epoch": 29.33, "learning_rate": 0.001, "loss": 2.5294, "step": 152760 }, { "epoch": 29.33, "learning_rate": 0.001, "loss": 2.5343, "step": 152772 }, { "epoch": 29.34, "learning_rate": 0.001, "loss": 2.5322, "step": 152784 }, { "epoch": 29.34, "learning_rate": 0.001, "loss": 2.5321, "step": 152796 }, { "epoch": 29.34, "learning_rate": 0.001, "loss": 2.5308, "step": 152808 }, { "epoch": 29.34, "learning_rate": 0.001, "loss": 2.5414, "step": 152820 }, { "epoch": 29.35, "learning_rate": 0.001, "loss": 2.53, "step": 152832 }, { "epoch": 29.35, "learning_rate": 0.001, "loss": 2.533, "step": 152844 }, { "epoch": 29.35, "learning_rate": 0.001, "loss": 2.5316, "step": 152856 }, { "epoch": 29.35, "learning_rate": 0.001, "loss": 2.5324, "step": 152868 }, { "epoch": 29.35, "learning_rate": 0.001, "loss": 2.5387, "step": 152880 }, { "epoch": 29.36, "learning_rate": 0.001, "loss": 2.5392, "step": 152892 }, { "epoch": 29.36, "learning_rate": 0.001, "loss": 2.545, "step": 152904 }, { "epoch": 29.36, "learning_rate": 0.001, "loss": 2.538, "step": 152916 }, { "epoch": 29.36, "learning_rate": 0.001, "loss": 2.539, "step": 152928 }, { "epoch": 29.37, "learning_rate": 0.001, "loss": 2.5393, "step": 152940 }, { "epoch": 29.37, "learning_rate": 0.001, "loss": 2.5368, "step": 152952 }, { "epoch": 29.37, "learning_rate": 0.001, "loss": 2.5489, "step": 152964 }, { "epoch": 29.37, "learning_rate": 0.001, "loss": 2.545, "step": 152976 }, { "epoch": 29.38, "learning_rate": 0.001, "loss": 2.5348, "step": 152988 }, { "epoch": 29.38, "learning_rate": 0.001, "loss": 2.5412, "step": 153000 }, { "epoch": 29.38, "learning_rate": 0.001, "loss": 2.5434, "step": 153012 }, { "epoch": 29.38, "learning_rate": 0.001, "loss": 2.5352, "step": 153024 }, { "epoch": 29.38, "learning_rate": 0.001, "loss": 2.5353, "step": 153036 }, { "epoch": 29.39, "learning_rate": 0.001, "loss": 2.5333, "step": 153048 }, { "epoch": 29.39, "learning_rate": 0.001, "loss": 2.5254, "step": 153060 }, { "epoch": 29.39, "learning_rate": 0.001, "loss": 2.5296, "step": 153072 }, { "epoch": 29.39, "learning_rate": 0.001, "loss": 2.5423, "step": 153084 }, { "epoch": 29.4, "learning_rate": 0.001, "loss": 2.5332, "step": 153096 }, { "epoch": 29.4, "learning_rate": 0.001, "loss": 2.5306, "step": 153108 }, { "epoch": 29.4, "learning_rate": 0.001, "loss": 2.5286, "step": 153120 }, { "epoch": 29.4, "eval_ag_news_accuracy": 0.32275, "eval_ag_news_bleu_score": 4.727131132424163, "eval_ag_news_bleu_score_sem": 0.14846541570604924, "eval_ag_news_emb_cos_sim": 0.8130539059638977, "eval_ag_news_emb_cos_sim_sem": 0.0067402537471302456, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.531450033187866, "eval_ag_news_n_ngrams_match_1": 14.238, "eval_ag_news_n_ngrams_match_2": 3.11, "eval_ag_news_n_ngrams_match_3": 0.834, "eval_ag_news_num_pred_words": 46.772, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.17348439210654, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35234964592223067, "eval_ag_news_runtime": 10.4878, "eval_ag_news_samples_per_second": 47.675, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.35323183667683966, "eval_ag_news_token_set_f1_sem": 0.0045276964664471234, "eval_ag_news_token_set_precision": 0.3391260723421015, "eval_ag_news_token_set_recall": 0.3812286779843616, "eval_ag_news_true_num_tokens": 56.09375, "step": 153125 }, { "epoch": 29.4, "eval_anthropic_toxic_prompts_accuracy": 0.1146875, "eval_anthropic_toxic_prompts_bleu_score": 3.094749298177269, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11965242742021427, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6682591438293457, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009238541119838888, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2619822025299072, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732, "eval_anthropic_toxic_prompts_num_pred_words": 48.024, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 26.10122381074379, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21201649055170305, "eval_anthropic_toxic_prompts_runtime": 10.3113, "eval_anthropic_toxic_prompts_samples_per_second": 48.49, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.35659793556420943, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064802144593629225, "eval_anthropic_toxic_prompts_token_set_precision": 0.4370085658737159, "eval_anthropic_toxic_prompts_token_set_recall": 0.32842114557386587, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 153125 }, { "epoch": 29.4, "eval_arxiv_accuracy": 0.3475625, "eval_arxiv_bleu_score": 4.436679851921937, "eval_arxiv_bleu_score_sem": 0.12367766371593797, "eval_arxiv_emb_cos_sim": 0.7703551650047302, "eval_arxiv_emb_cos_sim_sem": 0.00799319986947366, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.4008877277374268, "eval_arxiv_n_ngrams_match_1": 15.274, "eval_arxiv_n_ngrams_match_2": 3.024, "eval_arxiv_n_ngrams_match_3": 0.7, "eval_arxiv_num_pred_words": 40.638, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.990711820392992, "eval_arxiv_pred_num_tokens": 62.9609375, "eval_arxiv_rouge_score": 0.36658715684338206, "eval_arxiv_runtime": 10.4344, "eval_arxiv_samples_per_second": 47.918, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.35909541712914766, "eval_arxiv_token_set_f1_sem": 0.00411386885948219, "eval_arxiv_token_set_precision": 0.3112874522779814, "eval_arxiv_token_set_recall": 0.44162401115407596, "eval_arxiv_true_num_tokens": 64.0, "step": 153125 }, { "epoch": 29.4, "eval_python_code_alpaca_accuracy": 0.161, "eval_python_code_alpaca_bleu_score": 4.639786625479209, "eval_python_code_alpaca_bleu_score_sem": 0.149060509814988, "eval_python_code_alpaca_emb_cos_sim": 0.7475690841674805, "eval_python_code_alpaca_emb_cos_sim_sem": 0.01049772300012839, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.892303228378296, "eval_python_code_alpaca_n_ngrams_match_1": 9.796, "eval_python_code_alpaca_n_ngrams_match_2": 2.876, "eval_python_code_alpaca_n_ngrams_match_3": 1.006, "eval_python_code_alpaca_num_pred_words": 43.538, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.03480006550932, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33247830362276876, "eval_python_code_alpaca_runtime": 10.2052, "eval_python_code_alpaca_samples_per_second": 48.995, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.46703583762286777, "eval_python_code_alpaca_token_set_f1_sem": 0.005649303604768681, "eval_python_code_alpaca_token_set_precision": 0.5346446187826586, "eval_python_code_alpaca_token_set_recall": 0.43804529421209365, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 153125 }, { "epoch": 29.4, "eval_wikibio_accuracy": 0.3265625, "eval_wikibio_bleu_score": 6.161298010183409, "eval_wikibio_bleu_score_sem": 0.2305446938264135, "eval_wikibio_emb_cos_sim": 0.7415398955345154, "eval_wikibio_emb_cos_sim_sem": 0.007896822539274897, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.717679262161255, "eval_wikibio_n_ngrams_match_1": 10.18, "eval_wikibio_n_ngrams_match_2": 3.45, "eval_wikibio_n_ngrams_match_3": 1.276, "eval_wikibio_num_pred_words": 36.048, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.16874130328316, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.363731288311951, "eval_wikibio_runtime": 10.5334, "eval_wikibio_samples_per_second": 47.468, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.32439116384983174, "eval_wikibio_token_set_f1_sem": 0.005311041508510277, "eval_wikibio_token_set_precision": 0.3323911169049795, "eval_wikibio_token_set_recall": 0.33159097463536835, "eval_wikibio_true_num_tokens": 61.1328125, "step": 153125 }, { "epoch": 29.4, "eval_nq_accuracy": 0.53084375, "eval_nq_bleu_score": 11.9056362080981, "eval_nq_bleu_score_sem": 0.48633410526354454, "eval_nq_emb_cos_sim": 0.8390674591064453, "eval_nq_emb_cos_sim_sem": 0.006772789138547132, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1620209217071533, "eval_nq_n_ngrams_match_1": 23.348, "eval_nq_n_ngrams_match_2": 8.512, "eval_nq_n_ngrams_match_3": 3.964, "eval_nq_num_pred_words": 48.924, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.688679067720422, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4525557854168135, "eval_nq_runtime": 11.1957, "eval_nq_samples_per_second": 44.66, "eval_nq_steps_per_second": 0.089, "eval_nq_token_set_f1": 0.46648311505069967, "eval_nq_token_set_f1_sem": 0.004912789983806017, "eval_nq_token_set_precision": 0.425312631514177, "eval_nq_token_set_recall": 0.5239662201959386, "eval_nq_true_num_tokens": 64.0, "step": 153125 }, { "epoch": 29.4, "learning_rate": 0.001, "loss": 2.5397, "step": 153132 }, { "epoch": 29.41, "learning_rate": 0.001, "loss": 2.5299, "step": 153144 }, { "epoch": 29.41, "learning_rate": 0.001, "loss": 2.531, "step": 153156 }, { "epoch": 29.41, "learning_rate": 0.001, "loss": 2.5342, "step": 153168 }, { "epoch": 29.41, "learning_rate": 0.001, "loss": 2.5286, "step": 153180 }, { "epoch": 29.41, "learning_rate": 0.001, "loss": 2.5324, "step": 153192 }, { "epoch": 29.42, "learning_rate": 0.001, "loss": 2.5322, "step": 153204 }, { "epoch": 29.42, "learning_rate": 0.001, "loss": 2.5272, "step": 153216 }, { "epoch": 29.42, "learning_rate": 0.001, "loss": 2.528, "step": 153228 }, { "epoch": 29.42, "learning_rate": 0.001, "loss": 2.5373, "step": 153240 }, { "epoch": 29.43, "learning_rate": 0.001, "loss": 2.5352, "step": 153252 }, { "epoch": 29.43, "learning_rate": 0.001, "loss": 2.5306, "step": 153264 }, { "epoch": 29.43, "learning_rate": 0.001, "loss": 2.5351, "step": 153276 }, { "epoch": 29.43, "learning_rate": 0.001, "loss": 2.533, "step": 153288 }, { "epoch": 29.44, "learning_rate": 0.001, "loss": 2.5429, "step": 153300 }, { "epoch": 29.44, "learning_rate": 0.001, "loss": 2.5458, "step": 153312 }, { "epoch": 29.44, "learning_rate": 0.001, "loss": 2.5435, "step": 153324 }, { "epoch": 29.44, "learning_rate": 0.001, "loss": 2.5391, "step": 153336 }, { "epoch": 29.44, "learning_rate": 0.001, "loss": 2.5436, "step": 153348 }, { "epoch": 29.45, "learning_rate": 0.001, "loss": 2.5316, "step": 153360 }, { "epoch": 29.45, "learning_rate": 0.001, "loss": 2.5336, "step": 153372 }, { "epoch": 29.45, "learning_rate": 0.001, "loss": 2.5353, "step": 153384 }, { "epoch": 29.45, "learning_rate": 0.001, "loss": 2.5409, "step": 153396 }, { "epoch": 29.46, "learning_rate": 0.001, "loss": 2.5419, "step": 153408 }, { "epoch": 29.46, "learning_rate": 0.001, "loss": 2.5335, "step": 153420 }, { "epoch": 29.46, "learning_rate": 0.001, "loss": 2.5325, "step": 153432 }, { "epoch": 29.46, "learning_rate": 0.001, "loss": 2.5327, "step": 153444 }, { "epoch": 29.47, "learning_rate": 0.001, "loss": 2.5374, "step": 153456 }, { "epoch": 29.47, "learning_rate": 0.001, "loss": 2.534, "step": 153468 }, { "epoch": 29.47, "learning_rate": 0.001, "loss": 2.5386, "step": 153480 }, { "epoch": 29.47, "learning_rate": 0.001, "loss": 2.5335, "step": 153492 }, { "epoch": 29.47, "learning_rate": 0.001, "loss": 2.5389, "step": 153504 }, { "epoch": 29.48, "learning_rate": 0.001, "loss": 2.5401, "step": 153516 }, { "epoch": 29.48, "learning_rate": 0.001, "loss": 2.5292, "step": 153528 }, { "epoch": 29.48, "learning_rate": 0.001, "loss": 2.5329, "step": 153540 }, { "epoch": 29.48, "learning_rate": 0.001, "loss": 2.5361, "step": 153552 }, { "epoch": 29.49, "learning_rate": 0.001, "loss": 2.5382, "step": 153564 }, { "epoch": 29.49, "learning_rate": 0.001, "loss": 2.536, "step": 153576 }, { "epoch": 29.49, "learning_rate": 0.001, "loss": 2.5355, "step": 153588 }, { "epoch": 29.49, "learning_rate": 0.001, "loss": 2.5324, "step": 153600 }, { "epoch": 29.5, "learning_rate": 0.001, "loss": 2.5272, "step": 153612 }, { "epoch": 29.5, "learning_rate": 0.001, "loss": 2.531, "step": 153624 }, { "epoch": 29.5, "learning_rate": 0.001, "loss": 2.5316, "step": 153636 }, { "epoch": 29.5, "learning_rate": 0.001, "loss": 2.5429, "step": 153648 }, { "epoch": 29.5, "learning_rate": 0.001, "loss": 2.5388, "step": 153660 }, { "epoch": 29.51, "learning_rate": 0.001, "loss": 2.5315, "step": 153672 }, { "epoch": 29.51, "learning_rate": 0.001, "loss": 2.532, "step": 153684 }, { "epoch": 29.51, "learning_rate": 0.001, "loss": 2.5375, "step": 153696 }, { "epoch": 29.51, "learning_rate": 0.001, "loss": 2.5311, "step": 153708 }, { "epoch": 29.52, "learning_rate": 0.001, "loss": 2.5375, "step": 153720 }, { "epoch": 29.52, "learning_rate": 0.001, "loss": 2.547, "step": 153732 }, { "epoch": 29.52, "learning_rate": 0.001, "loss": 2.5382, "step": 153744 }, { "epoch": 29.52, "eval_ag_news_accuracy": 0.32546875, "eval_ag_news_bleu_score": 5.0884880683787665, "eval_ag_news_bleu_score_sem": 0.16128621691349004, "eval_ag_news_emb_cos_sim": 0.8166429400444031, "eval_ag_news_emb_cos_sim_sem": 0.006777681465696936, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5160975456237793, "eval_ag_news_n_ngrams_match_1": 14.314, "eval_ag_news_n_ngrams_match_2": 3.178, "eval_ag_news_n_ngrams_match_3": 0.938, "eval_ag_news_num_pred_words": 47.0, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.65284319289521, "eval_ag_news_pred_num_tokens": 62.9609375, "eval_ag_news_rouge_score": 0.35385473180337534, "eval_ag_news_runtime": 10.4679, "eval_ag_news_samples_per_second": 47.765, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35552203181937475, "eval_ag_news_token_set_f1_sem": 0.004324311263915896, "eval_ag_news_token_set_precision": 0.3411261544436801, "eval_ag_news_token_set_recall": 0.3864101046198372, "eval_ag_news_true_num_tokens": 56.09375, "step": 153750 }, { "epoch": 29.52, "eval_anthropic_toxic_prompts_accuracy": 0.11478125, "eval_anthropic_toxic_prompts_bleu_score": 3.166570391996507, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11826813855791353, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6783945560455322, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009271410393321322, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2225420475006104, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.97, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.75, "eval_anthropic_toxic_prompts_num_pred_words": 47.762, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.091823786180072, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.215857727280069, "eval_anthropic_toxic_prompts_runtime": 10.0795, "eval_anthropic_toxic_prompts_samples_per_second": 49.605, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.36667837988495133, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065007062463194, "eval_anthropic_toxic_prompts_token_set_precision": 0.4559403292944089, "eval_anthropic_toxic_prompts_token_set_recall": 0.3345886790171598, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 153750 }, { "epoch": 29.52, "eval_arxiv_accuracy": 0.3509375, "eval_arxiv_bleu_score": 4.526329104388431, "eval_arxiv_bleu_score_sem": 0.13274732339726175, "eval_arxiv_emb_cos_sim": 0.7760580778121948, "eval_arxiv_emb_cos_sim_sem": 0.006906002155799908, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3723182678222656, "eval_arxiv_n_ngrams_match_1": 15.508, "eval_arxiv_n_ngrams_match_2": 3.076, "eval_arxiv_n_ngrams_match_3": 0.72, "eval_arxiv_num_pred_words": 41.262, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.14601707113083, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3684909686343052, "eval_arxiv_runtime": 10.2667, "eval_arxiv_samples_per_second": 48.701, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3627758262701893, "eval_arxiv_token_set_f1_sem": 0.0039809920895349255, "eval_arxiv_token_set_precision": 0.31505173557214594, "eval_arxiv_token_set_recall": 0.4446297743682353, "eval_arxiv_true_num_tokens": 64.0, "step": 153750 }, { "epoch": 29.52, "eval_python_code_alpaca_accuracy": 0.16209375, "eval_python_code_alpaca_bleu_score": 4.722444137748518, "eval_python_code_alpaca_bleu_score_sem": 0.14652267465499919, "eval_python_code_alpaca_emb_cos_sim": 0.7655680775642395, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007991520580978304, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8765552043914795, "eval_python_code_alpaca_n_ngrams_match_1": 9.954, "eval_python_code_alpaca_n_ngrams_match_2": 3.026, "eval_python_code_alpaca_n_ngrams_match_3": 1.046, "eval_python_code_alpaca_num_pred_words": 44.66, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.753012225906247, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3331539217992684, "eval_python_code_alpaca_runtime": 11.0122, "eval_python_code_alpaca_samples_per_second": 45.404, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.48638780011528077, "eval_python_code_alpaca_token_set_f1_sem": 0.005668090953059059, "eval_python_code_alpaca_token_set_precision": 0.5490494054444741, "eval_python_code_alpaca_token_set_recall": 0.4584435720869273, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 153750 }, { "epoch": 29.52, "eval_wikibio_accuracy": 0.32459375, "eval_wikibio_bleu_score": 6.0356667548973855, "eval_wikibio_bleu_score_sem": 0.20725233836470902, "eval_wikibio_emb_cos_sim": 0.7549325227737427, "eval_wikibio_emb_cos_sim_sem": 0.008114191617747292, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6997523307800293, "eval_wikibio_n_ngrams_match_1": 10.192, "eval_wikibio_n_ngrams_match_2": 3.478, "eval_wikibio_n_ngrams_match_3": 1.25, "eval_wikibio_num_pred_words": 35.878, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.43728804816389, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3636025086830905, "eval_wikibio_runtime": 10.5374, "eval_wikibio_samples_per_second": 47.45, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.32287164328214785, "eval_wikibio_token_set_f1_sem": 0.005350142309823111, "eval_wikibio_token_set_precision": 0.33072022461373424, "eval_wikibio_token_set_recall": 0.33270149809465766, "eval_wikibio_true_num_tokens": 61.1328125, "step": 153750 }, { "epoch": 29.52, "eval_nq_accuracy": 0.5319375, "eval_nq_bleu_score": 11.81763805158241, "eval_nq_bleu_score_sem": 0.4656285352602, "eval_nq_emb_cos_sim": 0.8430206775665283, "eval_nq_emb_cos_sim_sem": 0.006386959106576692, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.162168502807617, "eval_nq_n_ngrams_match_1": 23.154, "eval_nq_n_ngrams_match_2": 8.568, "eval_nq_n_ngrams_match_3": 3.944, "eval_nq_num_pred_words": 49.124, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.68996144716397, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44998185161445425, "eval_nq_runtime": 10.5108, "eval_nq_samples_per_second": 47.57, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4656747856974989, "eval_nq_token_set_f1_sem": 0.0049369660674267474, "eval_nq_token_set_precision": 0.4225186523974833, "eval_nq_token_set_recall": 0.527291210402914, "eval_nq_true_num_tokens": 64.0, "step": 153750 }, { "epoch": 29.52, "learning_rate": 0.001, "loss": 2.5372, "step": 153756 }, { "epoch": 29.53, "learning_rate": 0.001, "loss": 2.5395, "step": 153768 }, { "epoch": 29.53, "learning_rate": 0.001, "loss": 2.5414, "step": 153780 }, { "epoch": 29.53, "learning_rate": 0.001, "loss": 2.5342, "step": 153792 }, { "epoch": 29.53, "learning_rate": 0.001, "loss": 2.5437, "step": 153804 }, { "epoch": 29.53, "learning_rate": 0.001, "loss": 2.5409, "step": 153816 }, { "epoch": 29.54, "learning_rate": 0.001, "loss": 2.5365, "step": 153828 }, { "epoch": 29.54, "learning_rate": 0.001, "loss": 2.5424, "step": 153840 }, { "epoch": 29.54, "learning_rate": 0.001, "loss": 2.5415, "step": 153852 }, { "epoch": 29.54, "learning_rate": 0.001, "loss": 2.5482, "step": 153864 }, { "epoch": 29.55, "learning_rate": 0.001, "loss": 2.5371, "step": 153876 }, { "epoch": 29.55, "learning_rate": 0.001, "loss": 2.5317, "step": 153888 }, { "epoch": 29.55, "learning_rate": 0.001, "loss": 2.5439, "step": 153900 }, { "epoch": 29.55, "learning_rate": 0.001, "loss": 2.5437, "step": 153912 }, { "epoch": 29.56, "learning_rate": 0.001, "loss": 2.5383, "step": 153924 }, { "epoch": 29.56, "learning_rate": 0.001, "loss": 2.5421, "step": 153936 }, { "epoch": 29.56, "learning_rate": 0.001, "loss": 2.5366, "step": 153948 }, { "epoch": 29.56, "learning_rate": 0.001, "loss": 2.5327, "step": 153960 }, { "epoch": 29.56, "learning_rate": 0.001, "loss": 2.5432, "step": 153972 }, { "epoch": 29.57, "learning_rate": 0.001, "loss": 2.5473, "step": 153984 }, { "epoch": 29.57, "learning_rate": 0.001, "loss": 2.5444, "step": 153996 }, { "epoch": 29.57, "learning_rate": 0.001, "loss": 2.5287, "step": 154008 }, { "epoch": 29.57, "learning_rate": 0.001, "loss": 2.5401, "step": 154020 }, { "epoch": 29.58, "learning_rate": 0.001, "loss": 2.5391, "step": 154032 }, { "epoch": 29.58, "learning_rate": 0.001, "loss": 2.5428, "step": 154044 }, { "epoch": 29.58, "learning_rate": 0.001, "loss": 2.5381, "step": 154056 }, { "epoch": 29.58, "learning_rate": 0.001, "loss": 2.5426, "step": 154068 }, { "epoch": 29.59, "learning_rate": 0.001, "loss": 2.5325, "step": 154080 }, { "epoch": 29.59, "learning_rate": 0.001, "loss": 2.5352, "step": 154092 }, { "epoch": 29.59, "learning_rate": 0.001, "loss": 2.5409, "step": 154104 }, { "epoch": 29.59, "learning_rate": 0.001, "loss": 2.5392, "step": 154116 }, { "epoch": 29.59, "learning_rate": 0.001, "loss": 2.5373, "step": 154128 }, { "epoch": 29.6, "learning_rate": 0.001, "loss": 2.5287, "step": 154140 }, { "epoch": 29.6, "learning_rate": 0.001, "loss": 2.5492, "step": 154152 }, { "epoch": 29.6, "learning_rate": 0.001, "loss": 2.5339, "step": 154164 }, { "epoch": 29.6, "learning_rate": 0.001, "loss": 2.5329, "step": 154176 }, { "epoch": 29.61, "learning_rate": 0.001, "loss": 2.5308, "step": 154188 }, { "epoch": 29.61, "learning_rate": 0.001, "loss": 2.5303, "step": 154200 }, { "epoch": 29.61, "learning_rate": 0.001, "loss": 2.541, "step": 154212 }, { "epoch": 29.61, "learning_rate": 0.001, "loss": 2.5209, "step": 154224 }, { "epoch": 29.62, "learning_rate": 0.001, "loss": 2.5381, "step": 154236 }, { "epoch": 29.62, "learning_rate": 0.001, "loss": 2.5296, "step": 154248 }, { "epoch": 29.62, "learning_rate": 0.001, "loss": 2.5415, "step": 154260 }, { "epoch": 29.62, "learning_rate": 0.001, "loss": 2.524, "step": 154272 }, { "epoch": 29.62, "learning_rate": 0.001, "loss": 2.5279, "step": 154284 }, { "epoch": 29.63, "learning_rate": 0.001, "loss": 2.5356, "step": 154296 }, { "epoch": 29.63, "learning_rate": 0.001, "loss": 2.537, "step": 154308 }, { "epoch": 29.63, "learning_rate": 0.001, "loss": 2.5246, "step": 154320 }, { "epoch": 29.63, "learning_rate": 0.001, "loss": 2.534, "step": 154332 }, { "epoch": 29.64, "learning_rate": 0.001, "loss": 2.532, "step": 154344 }, { "epoch": 29.64, "learning_rate": 0.001, "loss": 2.5316, "step": 154356 }, { "epoch": 29.64, "learning_rate": 0.001, "loss": 2.5409, "step": 154368 }, { "epoch": 29.64, "eval_ag_news_accuracy": 0.32521875, "eval_ag_news_bleu_score": 5.012902685449532, "eval_ag_news_bleu_score_sem": 0.15705874915875578, "eval_ag_news_emb_cos_sim": 0.8178128004074097, "eval_ag_news_emb_cos_sim_sem": 0.006340916308756573, "eval_ag_news_emb_top1_equal": 0.3125, "eval_ag_news_emb_top1_equal_sem": 0.041130074229814934, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5275087356567383, "eval_ag_news_n_ngrams_match_1": 14.258, "eval_ag_news_n_ngrams_match_2": 3.152, "eval_ag_news_n_ngrams_match_3": 0.932, "eval_ag_news_num_pred_words": 46.564, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.039061596566306, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35532693660651615, "eval_ag_news_runtime": 10.4252, "eval_ag_news_samples_per_second": 47.961, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35746113313108385, "eval_ag_news_token_set_f1_sem": 0.004430115283958541, "eval_ag_news_token_set_precision": 0.34257999315790344, "eval_ag_news_token_set_recall": 0.3886728227187792, "eval_ag_news_true_num_tokens": 56.09375, "step": 154375 }, { "epoch": 29.64, "eval_anthropic_toxic_prompts_accuracy": 0.114125, "eval_anthropic_toxic_prompts_bleu_score": 3.1582854501084268, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12222832786168704, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6750933527946472, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009194914521818557, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.237285614013672, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.338, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.932, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, "eval_anthropic_toxic_prompts_num_pred_words": 47.39, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.464507350652944, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21633668119458238, "eval_anthropic_toxic_prompts_runtime": 9.8646, "eval_anthropic_toxic_prompts_samples_per_second": 50.686, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.360695358838236, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064656241270708344, "eval_anthropic_toxic_prompts_token_set_precision": 0.4470211767403358, "eval_anthropic_toxic_prompts_token_set_recall": 0.32739231824678594, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 154375 }, { "epoch": 29.64, "eval_arxiv_accuracy": 0.35221875, "eval_arxiv_bleu_score": 4.326964203076714, "eval_arxiv_bleu_score_sem": 0.125337949054851, "eval_arxiv_emb_cos_sim": 0.7610146999359131, "eval_arxiv_emb_cos_sim_sem": 0.008225729959608833, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.363086462020874, "eval_arxiv_n_ngrams_match_1": 15.21, "eval_arxiv_n_ngrams_match_2": 2.942, "eval_arxiv_n_ngrams_match_3": 0.67, "eval_arxiv_num_pred_words": 40.348, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.87818489117387, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3622590678362871, "eval_arxiv_runtime": 10.699, "eval_arxiv_samples_per_second": 46.733, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.3565557168678842, "eval_arxiv_token_set_f1_sem": 0.0043836824778191496, "eval_arxiv_token_set_precision": 0.3072107301135902, "eval_arxiv_token_set_recall": 0.4450525329518359, "eval_arxiv_true_num_tokens": 64.0, "step": 154375 }, { "epoch": 29.64, "eval_python_code_alpaca_accuracy": 0.161375, "eval_python_code_alpaca_bleu_score": 4.4499177339602936, "eval_python_code_alpaca_bleu_score_sem": 0.1412257028668921, "eval_python_code_alpaca_emb_cos_sim": 0.7586034536361694, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008925268863213476, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9138526916503906, "eval_python_code_alpaca_n_ngrams_match_1": 9.738, "eval_python_code_alpaca_n_ngrams_match_2": 2.808, "eval_python_code_alpaca_n_ngrams_match_3": 0.93, "eval_python_code_alpaca_num_pred_words": 43.544, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.427658063832194, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32914943647812855, "eval_python_code_alpaca_runtime": 10.0116, "eval_python_code_alpaca_samples_per_second": 49.942, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.471186095054125, "eval_python_code_alpaca_token_set_f1_sem": 0.005914170156224076, "eval_python_code_alpaca_token_set_precision": 0.5312704058706971, "eval_python_code_alpaca_token_set_recall": 0.45339734146655575, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 154375 }, { "epoch": 29.64, "eval_wikibio_accuracy": 0.324625, "eval_wikibio_bleu_score": 6.207945334710554, "eval_wikibio_bleu_score_sem": 0.2215587848145654, "eval_wikibio_emb_cos_sim": 0.752561092376709, "eval_wikibio_emb_cos_sim_sem": 0.007941769519033732, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7271616458892822, "eval_wikibio_n_ngrams_match_1": 10.392, "eval_wikibio_n_ngrams_match_2": 3.554, "eval_wikibio_n_ngrams_match_3": 1.326, "eval_wikibio_num_pred_words": 37.038, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.56097582605309, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.365882819838572, "eval_wikibio_runtime": 9.9368, "eval_wikibio_samples_per_second": 50.318, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.3266248816004787, "eval_wikibio_token_set_f1_sem": 0.005258264579000425, "eval_wikibio_token_set_precision": 0.3389986657301919, "eval_wikibio_token_set_recall": 0.3298875186680289, "eval_wikibio_true_num_tokens": 61.1328125, "step": 154375 }, { "epoch": 29.64, "eval_nq_accuracy": 0.53115625, "eval_nq_bleu_score": 11.722697724335703, "eval_nq_bleu_score_sem": 0.4757861681300113, "eval_nq_emb_cos_sim": 0.838560163974762, "eval_nq_emb_cos_sim_sem": 0.006934290593684814, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.157773971557617, "eval_nq_n_ngrams_match_1": 23.288, "eval_nq_n_ngrams_match_2": 8.554, "eval_nq_n_ngrams_match_3": 3.888, "eval_nq_num_pred_words": 49.286, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.651856927097764, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45020831639411363, "eval_nq_runtime": 10.5843, "eval_nq_samples_per_second": 47.24, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.465704194545563, "eval_nq_token_set_f1_sem": 0.005065087151021448, "eval_nq_token_set_precision": 0.42392459616411754, "eval_nq_token_set_recall": 0.5243949527129446, "eval_nq_true_num_tokens": 64.0, "step": 154375 }, { "epoch": 29.64, "learning_rate": 0.001, "loss": 2.5393, "step": 154380 }, { "epoch": 29.65, "learning_rate": 0.001, "loss": 2.5322, "step": 154392 }, { "epoch": 29.65, "learning_rate": 0.001, "loss": 2.5369, "step": 154404 }, { "epoch": 29.65, "learning_rate": 0.001, "loss": 2.5263, "step": 154416 }, { "epoch": 29.65, "learning_rate": 0.001, "loss": 2.5388, "step": 154428 }, { "epoch": 29.65, "learning_rate": 0.001, "loss": 2.5386, "step": 154440 }, { "epoch": 29.66, "learning_rate": 0.001, "loss": 2.5373, "step": 154452 }, { "epoch": 29.66, "learning_rate": 0.001, "loss": 2.5367, "step": 154464 }, { "epoch": 29.66, "learning_rate": 0.001, "loss": 2.5397, "step": 154476 }, { "epoch": 29.66, "learning_rate": 0.001, "loss": 2.5388, "step": 154488 }, { "epoch": 29.67, "learning_rate": 0.001, "loss": 2.5446, "step": 154500 }, { "epoch": 29.67, "learning_rate": 0.001, "loss": 2.5427, "step": 154512 }, { "epoch": 29.67, "learning_rate": 0.001, "loss": 2.5391, "step": 154524 }, { "epoch": 29.67, "learning_rate": 0.001, "loss": 2.5307, "step": 154536 }, { "epoch": 29.68, "learning_rate": 0.001, "loss": 2.5419, "step": 154548 }, { "epoch": 29.68, "learning_rate": 0.001, "loss": 2.5365, "step": 154560 }, { "epoch": 29.68, "learning_rate": 0.001, "loss": 2.5408, "step": 154572 }, { "epoch": 29.68, "learning_rate": 0.001, "loss": 2.5391, "step": 154584 }, { "epoch": 29.68, "learning_rate": 0.001, "loss": 2.5328, "step": 154596 }, { "epoch": 29.69, "learning_rate": 0.001, "loss": 2.5328, "step": 154608 }, { "epoch": 29.69, "learning_rate": 0.001, "loss": 2.5427, "step": 154620 }, { "epoch": 29.69, "learning_rate": 0.001, "loss": 2.5443, "step": 154632 }, { "epoch": 29.69, "learning_rate": 0.001, "loss": 2.5353, "step": 154644 }, { "epoch": 29.7, "learning_rate": 0.001, "loss": 2.5362, "step": 154656 }, { "epoch": 29.7, "learning_rate": 0.001, "loss": 2.5344, "step": 154668 }, { "epoch": 29.7, "learning_rate": 0.001, "loss": 2.535, "step": 154680 }, { "epoch": 29.7, "learning_rate": 0.001, "loss": 2.5438, "step": 154692 }, { "epoch": 29.71, "learning_rate": 0.001, "loss": 2.5333, "step": 154704 }, { "epoch": 29.71, "learning_rate": 0.001, "loss": 2.5362, "step": 154716 }, { "epoch": 29.71, "learning_rate": 0.001, "loss": 2.5382, "step": 154728 }, { "epoch": 29.71, "learning_rate": 0.001, "loss": 2.5372, "step": 154740 }, { "epoch": 29.71, "learning_rate": 0.001, "loss": 2.5384, "step": 154752 }, { "epoch": 29.72, "learning_rate": 0.001, "loss": 2.5362, "step": 154764 }, { "epoch": 29.72, "learning_rate": 0.001, "loss": 2.5407, "step": 154776 }, { "epoch": 29.72, "learning_rate": 0.001, "loss": 2.5448, "step": 154788 }, { "epoch": 29.72, "learning_rate": 0.001, "loss": 2.528, "step": 154800 }, { "epoch": 29.73, "learning_rate": 0.001, "loss": 2.5406, "step": 154812 }, { "epoch": 29.73, "learning_rate": 0.001, "loss": 2.5488, "step": 154824 }, { "epoch": 29.73, "learning_rate": 0.001, "loss": 2.5457, "step": 154836 }, { "epoch": 29.73, "learning_rate": 0.001, "loss": 2.5464, "step": 154848 }, { "epoch": 29.74, "learning_rate": 0.001, "loss": 2.5497, "step": 154860 }, { "epoch": 29.74, "learning_rate": 0.001, "loss": 2.5425, "step": 154872 }, { "epoch": 29.74, "learning_rate": 0.001, "loss": 2.532, "step": 154884 }, { "epoch": 29.74, "learning_rate": 0.001, "loss": 2.5423, "step": 154896 }, { "epoch": 29.74, "learning_rate": 0.001, "loss": 2.5519, "step": 154908 }, { "epoch": 29.75, "learning_rate": 0.001, "loss": 2.5458, "step": 154920 }, { "epoch": 29.75, "learning_rate": 0.001, "loss": 2.5374, "step": 154932 }, { "epoch": 29.75, "learning_rate": 0.001, "loss": 2.5383, "step": 154944 }, { "epoch": 29.75, "learning_rate": 0.001, "loss": 2.545, "step": 154956 }, { "epoch": 29.76, "learning_rate": 0.001, "loss": 2.539, "step": 154968 }, { "epoch": 29.76, "learning_rate": 0.001, "loss": 2.5463, "step": 154980 }, { "epoch": 29.76, "learning_rate": 0.001, "loss": 2.5435, "step": 154992 }, { "epoch": 29.76, "eval_ag_news_accuracy": 0.32440625, "eval_ag_news_bleu_score": 4.918723393516643, "eval_ag_news_bleu_score_sem": 0.15008339969346499, "eval_ag_news_emb_cos_sim": 0.8142803311347961, "eval_ag_news_emb_cos_sim_sem": 0.006617780273713652, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5221900939941406, "eval_ag_news_n_ngrams_match_1": 14.318, "eval_ag_news_n_ngrams_match_2": 3.164, "eval_ag_news_n_ngrams_match_3": 0.906, "eval_ag_news_num_pred_words": 46.874, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.8585006206092, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35295604619885224, "eval_ag_news_runtime": 10.456, "eval_ag_news_samples_per_second": 47.819, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35509576487982475, "eval_ag_news_token_set_f1_sem": 0.004293523174751427, "eval_ag_news_token_set_precision": 0.3408242759432546, "eval_ag_news_token_set_recall": 0.38406877769672304, "eval_ag_news_true_num_tokens": 56.09375, "step": 155000 }, { "epoch": 29.76, "eval_anthropic_toxic_prompts_accuracy": 0.1141875, "eval_anthropic_toxic_prompts_bleu_score": 3.222160102080987, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1268910601411968, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6764065027236938, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008927243179648409, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2388124465942383, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.33, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758, "eval_anthropic_toxic_prompts_num_pred_words": 47.78, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.503417086891567, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2162627095855561, "eval_anthropic_toxic_prompts_runtime": 10.334, "eval_anthropic_toxic_prompts_samples_per_second": 48.384, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3575747876822104, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00633265733193282, "eval_anthropic_toxic_prompts_token_set_precision": 0.4486097670377029, "eval_anthropic_toxic_prompts_token_set_recall": 0.3221478276575159, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 155000 }, { "epoch": 29.76, "eval_arxiv_accuracy": 0.3510625, "eval_arxiv_bleu_score": 4.371270783679327, "eval_arxiv_bleu_score_sem": 0.12889401862848066, "eval_arxiv_emb_cos_sim": 0.7739661931991577, "eval_arxiv_emb_cos_sim_sem": 0.007297900016866901, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3622114658355713, "eval_arxiv_n_ngrams_match_1": 15.268, "eval_arxiv_n_ngrams_match_2": 2.94, "eval_arxiv_n_ngrams_match_3": 0.654, "eval_arxiv_num_pred_words": 40.586, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.85292764116578, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3637609111878547, "eval_arxiv_runtime": 10.2844, "eval_arxiv_samples_per_second": 48.617, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.355780048560854, "eval_arxiv_token_set_f1_sem": 0.004240442774635202, "eval_arxiv_token_set_precision": 0.30913912412583744, "eval_arxiv_token_set_recall": 0.4339336068734512, "eval_arxiv_true_num_tokens": 64.0, "step": 155000 }, { "epoch": 29.76, "eval_python_code_alpaca_accuracy": 0.16228125, "eval_python_code_alpaca_bleu_score": 4.892382853237619, "eval_python_code_alpaca_bleu_score_sem": 0.1515685779314612, "eval_python_code_alpaca_emb_cos_sim": 0.7611924409866333, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008666500384246748, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.9016482830047607, "eval_python_code_alpaca_n_ngrams_match_1": 10.016, "eval_python_code_alpaca_n_ngrams_match_2": 3.03, "eval_python_code_alpaca_n_ngrams_match_3": 1.094, "eval_python_code_alpaca_num_pred_words": 43.64, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.204126206045533, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33784637920451477, "eval_python_code_alpaca_runtime": 10.0011, "eval_python_code_alpaca_samples_per_second": 49.994, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4827913308952058, "eval_python_code_alpaca_token_set_f1_sem": 0.005714515060068195, "eval_python_code_alpaca_token_set_precision": 0.5482806994834478, "eval_python_code_alpaca_token_set_recall": 0.45291813216331395, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 155000 }, { "epoch": 29.76, "eval_wikibio_accuracy": 0.3261875, "eval_wikibio_bleu_score": 6.225144113051411, "eval_wikibio_bleu_score_sem": 0.21846633972148227, "eval_wikibio_emb_cos_sim": 0.7597200870513916, "eval_wikibio_emb_cos_sim_sem": 0.007626358968390901, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.69722318649292, "eval_wikibio_n_ngrams_match_1": 10.38, "eval_wikibio_n_ngrams_match_2": 3.54, "eval_wikibio_n_ngrams_match_3": 1.28, "eval_wikibio_num_pred_words": 36.36, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.335145533136675, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36752194188001464, "eval_wikibio_runtime": 9.9408, "eval_wikibio_samples_per_second": 50.298, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.3305379955531322, "eval_wikibio_token_set_f1_sem": 0.005038885511079791, "eval_wikibio_token_set_precision": 0.33957527807503374, "eval_wikibio_token_set_recall": 0.33651025077218183, "eval_wikibio_true_num_tokens": 61.1328125, "step": 155000 }, { "epoch": 29.76, "eval_nq_accuracy": 0.5313125, "eval_nq_bleu_score": 11.930756372274262, "eval_nq_bleu_score_sem": 0.4834372163814211, "eval_nq_emb_cos_sim": 0.8348702192306519, "eval_nq_emb_cos_sim_sem": 0.007248769962393358, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1603705883026123, "eval_nq_n_ngrams_match_1": 23.322, "eval_nq_n_ngrams_match_2": 8.588, "eval_nq_n_ngrams_match_3": 3.938, "eval_nq_num_pred_words": 49.088, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.67435167615192, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45370456930437236, "eval_nq_runtime": 10.5323, "eval_nq_samples_per_second": 47.473, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4657585442920766, "eval_nq_token_set_f1_sem": 0.005039962585795291, "eval_nq_token_set_precision": 0.4261823126065449, "eval_nq_token_set_recall": 0.5208232575212093, "eval_nq_true_num_tokens": 64.0, "step": 155000 }, { "epoch": 29.76, "learning_rate": 0.001, "loss": 2.5419, "step": 155004 }, { "epoch": 29.76, "learning_rate": 0.001, "loss": 2.538, "step": 155016 }, { "epoch": 29.77, "learning_rate": 0.001, "loss": 2.5475, "step": 155028 }, { "epoch": 29.77, "learning_rate": 0.001, "loss": 2.5364, "step": 155040 }, { "epoch": 29.77, "learning_rate": 0.001, "loss": 2.5418, "step": 155052 }, { "epoch": 29.77, "learning_rate": 0.001, "loss": 2.5418, "step": 155064 }, { "epoch": 29.78, "learning_rate": 0.001, "loss": 2.5337, "step": 155076 }, { "epoch": 29.78, "learning_rate": 0.001, "loss": 2.543, "step": 155088 }, { "epoch": 29.78, "learning_rate": 0.001, "loss": 2.5295, "step": 155100 }, { "epoch": 29.78, "learning_rate": 0.001, "loss": 2.5439, "step": 155112 }, { "epoch": 29.79, "learning_rate": 0.001, "loss": 2.528, "step": 155124 }, { "epoch": 29.79, "learning_rate": 0.001, "loss": 2.5332, "step": 155136 }, { "epoch": 29.79, "learning_rate": 0.001, "loss": 2.5332, "step": 155148 }, { "epoch": 29.79, "learning_rate": 0.001, "loss": 2.5398, "step": 155160 }, { "epoch": 29.79, "learning_rate": 0.001, "loss": 2.537, "step": 155172 }, { "epoch": 29.8, "learning_rate": 0.001, "loss": 2.5364, "step": 155184 }, { "epoch": 29.8, "learning_rate": 0.001, "loss": 2.539, "step": 155196 }, { "epoch": 29.8, "learning_rate": 0.001, "loss": 2.5342, "step": 155208 }, { "epoch": 29.8, "learning_rate": 0.001, "loss": 2.5372, "step": 155220 }, { "epoch": 29.81, "learning_rate": 0.001, "loss": 2.5408, "step": 155232 }, { "epoch": 29.81, "learning_rate": 0.001, "loss": 2.5488, "step": 155244 }, { "epoch": 29.81, "learning_rate": 0.001, "loss": 2.5413, "step": 155256 }, { "epoch": 29.81, "learning_rate": 0.001, "loss": 2.5417, "step": 155268 }, { "epoch": 29.82, "learning_rate": 0.001, "loss": 2.5418, "step": 155280 }, { "epoch": 29.82, "learning_rate": 0.001, "loss": 2.5465, "step": 155292 }, { "epoch": 29.82, "learning_rate": 0.001, "loss": 2.5508, "step": 155304 }, { "epoch": 29.82, "learning_rate": 0.001, "loss": 2.539, "step": 155316 }, { "epoch": 29.82, "learning_rate": 0.001, "loss": 2.5309, "step": 155328 }, { "epoch": 29.83, "learning_rate": 0.001, "loss": 2.5392, "step": 155340 }, { "epoch": 29.83, "learning_rate": 0.001, "loss": 2.5357, "step": 155352 }, { "epoch": 29.83, "learning_rate": 0.001, "loss": 2.5311, "step": 155364 }, { "epoch": 29.83, "learning_rate": 0.001, "loss": 2.5412, "step": 155376 }, { "epoch": 29.84, "learning_rate": 0.001, "loss": 2.546, "step": 155388 }, { "epoch": 29.84, "learning_rate": 0.001, "loss": 2.5371, "step": 155400 }, { "epoch": 29.84, "learning_rate": 0.001, "loss": 2.5371, "step": 155412 }, { "epoch": 29.84, "learning_rate": 0.001, "loss": 2.5418, "step": 155424 }, { "epoch": 29.85, "learning_rate": 0.001, "loss": 2.5512, "step": 155436 }, { "epoch": 29.85, "learning_rate": 0.001, "loss": 2.543, "step": 155448 }, { "epoch": 29.85, "learning_rate": 0.001, "loss": 2.5429, "step": 155460 }, { "epoch": 29.85, "learning_rate": 0.001, "loss": 2.5361, "step": 155472 }, { "epoch": 29.85, "learning_rate": 0.001, "loss": 2.5384, "step": 155484 }, { "epoch": 29.86, "learning_rate": 0.001, "loss": 2.5438, "step": 155496 }, { "epoch": 29.86, "learning_rate": 0.001, "loss": 2.5353, "step": 155508 }, { "epoch": 29.86, "learning_rate": 0.001, "loss": 2.5465, "step": 155520 }, { "epoch": 29.86, "learning_rate": 0.001, "loss": 2.5413, "step": 155532 }, { "epoch": 29.87, "learning_rate": 0.001, "loss": 2.5428, "step": 155544 }, { "epoch": 29.87, "learning_rate": 0.001, "loss": 2.5356, "step": 155556 }, { "epoch": 29.87, "learning_rate": 0.001, "loss": 2.54, "step": 155568 }, { "epoch": 29.87, "learning_rate": 0.001, "loss": 2.5604, "step": 155580 }, { "epoch": 29.88, "learning_rate": 0.001, "loss": 2.5471, "step": 155592 }, { "epoch": 29.88, "learning_rate": 0.001, "loss": 2.5349, "step": 155604 }, { "epoch": 29.88, "learning_rate": 0.001, "loss": 2.5354, "step": 155616 }, { "epoch": 29.88, "eval_ag_news_accuracy": 0.32484375, "eval_ag_news_bleu_score": 4.952086652546281, "eval_ag_news_bleu_score_sem": 0.15125139442082425, "eval_ag_news_emb_cos_sim": 0.8183904886245728, "eval_ag_news_emb_cos_sim_sem": 0.0065492211806210945, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5249595642089844, "eval_ag_news_n_ngrams_match_1": 14.398, "eval_ag_news_n_ngrams_match_2": 3.19, "eval_ag_news_n_ngrams_match_3": 0.936, "eval_ag_news_num_pred_words": 46.686, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.952400696311145, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35758244962937286, "eval_ag_news_runtime": 10.6914, "eval_ag_news_samples_per_second": 46.766, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.3600904440123456, "eval_ag_news_token_set_f1_sem": 0.00424496723783764, "eval_ag_news_token_set_precision": 0.3453861360499154, "eval_ag_news_token_set_recall": 0.3900398253031491, "eval_ag_news_true_num_tokens": 56.09375, "step": 155625 }, { "epoch": 29.88, "eval_anthropic_toxic_prompts_accuracy": 0.11490625, "eval_anthropic_toxic_prompts_bleu_score": 3.2417495072455913, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12554304611460063, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6748640537261963, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009528572023688574, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2387568950653076, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.256, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.986, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764, "eval_anthropic_toxic_prompts_num_pred_words": 46.98, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.502000372430125, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2148126582692897, "eval_anthropic_toxic_prompts_runtime": 9.9263, "eval_anthropic_toxic_prompts_samples_per_second": 50.371, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.36260856382633255, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006479654440130748, "eval_anthropic_toxic_prompts_token_set_precision": 0.44291925250433334, "eval_anthropic_toxic_prompts_token_set_recall": 0.334202466712735, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 155625 }, { "epoch": 29.88, "eval_arxiv_accuracy": 0.35090625, "eval_arxiv_bleu_score": 4.505581752252082, "eval_arxiv_bleu_score_sem": 0.13209890477513822, "eval_arxiv_emb_cos_sim": 0.770883321762085, "eval_arxiv_emb_cos_sim_sem": 0.008618350244001084, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.37870454788208, "eval_arxiv_n_ngrams_match_1": 15.318, "eval_arxiv_n_ngrams_match_2": 3.096, "eval_arxiv_n_ngrams_match_3": 0.694, "eval_arxiv_num_pred_words": 40.826, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.332747319965197, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36730055458220245, "eval_arxiv_runtime": 10.374, "eval_arxiv_samples_per_second": 48.198, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.358447328534449, "eval_arxiv_token_set_f1_sem": 0.004186700433961449, "eval_arxiv_token_set_precision": 0.31052965207162525, "eval_arxiv_token_set_recall": 0.4398475041172051, "eval_arxiv_true_num_tokens": 64.0, "step": 155625 }, { "epoch": 29.88, "eval_python_code_alpaca_accuracy": 0.1620625, "eval_python_code_alpaca_bleu_score": 4.764005260698104, "eval_python_code_alpaca_bleu_score_sem": 0.1498926327202342, "eval_python_code_alpaca_emb_cos_sim": 0.770979106426239, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007862454418492294, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.884103298187256, "eval_python_code_alpaca_n_ngrams_match_1": 10.004, "eval_python_code_alpaca_n_ngrams_match_2": 2.988, "eval_python_code_alpaca_n_ngrams_match_3": 0.996, "eval_python_code_alpaca_num_pred_words": 43.494, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.887520629749478, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3397302916141258, "eval_python_code_alpaca_runtime": 10.5692, "eval_python_code_alpaca_samples_per_second": 47.307, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.47950619501897257, "eval_python_code_alpaca_token_set_f1_sem": 0.005151002599964493, "eval_python_code_alpaca_token_set_precision": 0.5472949900950337, "eval_python_code_alpaca_token_set_recall": 0.4491924777801191, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 155625 }, { "epoch": 29.88, "eval_wikibio_accuracy": 0.32653125, "eval_wikibio_bleu_score": 6.049818715521647, "eval_wikibio_bleu_score_sem": 0.22226389530913723, "eval_wikibio_emb_cos_sim": 0.7485655546188354, "eval_wikibio_emb_cos_sim_sem": 0.007909253225407578, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.696716547012329, "eval_wikibio_n_ngrams_match_1": 9.998, "eval_wikibio_n_ngrams_match_2": 3.408, "eval_wikibio_n_ngrams_match_3": 1.274, "eval_wikibio_num_pred_words": 36.304, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.314715331764525, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.351339624505013, "eval_wikibio_runtime": 10.1536, "eval_wikibio_samples_per_second": 49.243, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.31757232621088477, "eval_wikibio_token_set_f1_sem": 0.005278620008801585, "eval_wikibio_token_set_precision": 0.3247958929063056, "eval_wikibio_token_set_recall": 0.32918626183086436, "eval_wikibio_true_num_tokens": 61.1328125, "step": 155625 }, { "epoch": 29.88, "eval_nq_accuracy": 0.53109375, "eval_nq_bleu_score": 11.635680358876966, "eval_nq_bleu_score_sem": 0.4753850858972373, "eval_nq_emb_cos_sim": 0.8347564339637756, "eval_nq_emb_cos_sim_sem": 0.006829161865519469, "eval_nq_emb_top1_equal": 0.3515625, "eval_nq_emb_top1_equal_sem": 0.04236756101983345, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.160743236541748, "eval_nq_n_ngrams_match_1": 23.238, "eval_nq_n_ngrams_match_2": 8.568, "eval_nq_n_ngrams_match_3": 3.874, "eval_nq_num_pred_words": 49.114, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.677584760393842, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4497797331113026, "eval_nq_runtime": 10.5859, "eval_nq_samples_per_second": 47.233, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.4631730229111701, "eval_nq_token_set_f1_sem": 0.004904152060659144, "eval_nq_token_set_precision": 0.4224903602667082, "eval_nq_token_set_recall": 0.5206331316733986, "eval_nq_true_num_tokens": 64.0, "step": 155625 }, { "epoch": 29.88, "learning_rate": 0.001, "loss": 2.551, "step": 155628 }, { "epoch": 29.88, "learning_rate": 0.001, "loss": 2.5512, "step": 155640 }, { "epoch": 29.89, "learning_rate": 0.001, "loss": 2.5366, "step": 155652 }, { "epoch": 29.89, "learning_rate": 0.001, "loss": 2.5369, "step": 155664 }, { "epoch": 29.89, "learning_rate": 0.001, "loss": 2.543, "step": 155676 }, { "epoch": 29.89, "learning_rate": 0.001, "loss": 2.5422, "step": 155688 }, { "epoch": 29.9, "learning_rate": 0.001, "loss": 2.5426, "step": 155700 }, { "epoch": 29.9, "learning_rate": 0.001, "loss": 2.5412, "step": 155712 }, { "epoch": 29.9, "learning_rate": 0.001, "loss": 2.5404, "step": 155724 }, { "epoch": 29.9, "learning_rate": 0.001, "loss": 2.5391, "step": 155736 }, { "epoch": 29.91, "learning_rate": 0.001, "loss": 2.5374, "step": 155748 }, { "epoch": 29.91, "learning_rate": 0.001, "loss": 2.5431, "step": 155760 }, { "epoch": 29.91, "learning_rate": 0.001, "loss": 2.552, "step": 155772 }, { "epoch": 29.91, "learning_rate": 0.001, "loss": 2.5305, "step": 155784 }, { "epoch": 29.91, "learning_rate": 0.001, "loss": 2.5437, "step": 155796 }, { "epoch": 29.92, "learning_rate": 0.001, "loss": 2.5344, "step": 155808 }, { "epoch": 29.92, "learning_rate": 0.001, "loss": 2.5412, "step": 155820 }, { "epoch": 29.92, "learning_rate": 0.001, "loss": 2.5406, "step": 155832 }, { "epoch": 29.92, "learning_rate": 0.001, "loss": 2.5325, "step": 155844 }, { "epoch": 29.93, "learning_rate": 0.001, "loss": 2.5474, "step": 155856 }, { "epoch": 29.93, "learning_rate": 0.001, "loss": 2.5323, "step": 155868 }, { "epoch": 29.93, "learning_rate": 0.001, "loss": 2.5424, "step": 155880 }, { "epoch": 29.93, "learning_rate": 0.001, "loss": 2.5465, "step": 155892 }, { "epoch": 29.94, "learning_rate": 0.001, "loss": 2.5396, "step": 155904 }, { "epoch": 29.94, "learning_rate": 0.001, "loss": 2.5471, "step": 155916 }, { "epoch": 29.94, "learning_rate": 0.001, "loss": 2.5441, "step": 155928 }, { "epoch": 29.94, "learning_rate": 0.001, "loss": 2.5372, "step": 155940 }, { "epoch": 29.94, "learning_rate": 0.001, "loss": 2.5483, "step": 155952 }, { "epoch": 29.95, "learning_rate": 0.001, "loss": 2.5362, "step": 155964 }, { "epoch": 29.95, "learning_rate": 0.001, "loss": 2.5353, "step": 155976 }, { "epoch": 29.95, "learning_rate": 0.001, "loss": 2.5401, "step": 155988 }, { "epoch": 29.95, "learning_rate": 0.001, "loss": 2.5376, "step": 156000 }, { "epoch": 29.96, "learning_rate": 0.001, "loss": 2.5443, "step": 156012 }, { "epoch": 29.96, "learning_rate": 0.001, "loss": 2.5489, "step": 156024 }, { "epoch": 29.96, "learning_rate": 0.001, "loss": 2.5513, "step": 156036 }, { "epoch": 29.96, "learning_rate": 0.001, "loss": 2.5415, "step": 156048 }, { "epoch": 29.97, "learning_rate": 0.001, "loss": 2.5285, "step": 156060 }, { "epoch": 29.97, "learning_rate": 0.001, "loss": 2.5405, "step": 156072 }, { "epoch": 29.97, "learning_rate": 0.001, "loss": 2.5374, "step": 156084 }, { "epoch": 29.97, "learning_rate": 0.001, "loss": 2.5471, "step": 156096 }, { "epoch": 29.97, "learning_rate": 0.001, "loss": 2.5417, "step": 156108 }, { "epoch": 29.98, "learning_rate": 0.001, "loss": 2.5391, "step": 156120 }, { "epoch": 29.98, "learning_rate": 0.001, "loss": 2.5384, "step": 156132 }, { "epoch": 29.98, "learning_rate": 0.001, "loss": 2.5306, "step": 156144 }, { "epoch": 29.98, "learning_rate": 0.001, "loss": 2.5364, "step": 156156 }, { "epoch": 29.99, "learning_rate": 0.001, "loss": 2.5407, "step": 156168 }, { "epoch": 29.99, "learning_rate": 0.001, "loss": 2.5446, "step": 156180 }, { "epoch": 29.99, "learning_rate": 0.001, "loss": 2.5333, "step": 156192 }, { "epoch": 29.99, "learning_rate": 0.001, "loss": 2.5466, "step": 156204 }, { "epoch": 30.0, "learning_rate": 0.001, "loss": 2.5479, "step": 156216 }, { "epoch": 30.0, "learning_rate": 0.001, "loss": 2.5438, "step": 156228 }, { "epoch": 30.0, "learning_rate": 0.001, "loss": 2.5266, "step": 156240 }, { "epoch": 30.0, "eval_ag_news_accuracy": 0.3256875, "eval_ag_news_bleu_score": 4.952198712747313, "eval_ag_news_bleu_score_sem": 0.16501101027865148, "eval_ag_news_emb_cos_sim": 0.8201794028282166, "eval_ag_news_emb_cos_sim_sem": 0.006350625230437762, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.50901460647583, "eval_ag_news_n_ngrams_match_1": 14.114, "eval_ag_news_n_ngrams_match_2": 3.146, "eval_ag_news_n_ngrams_match_3": 0.874, "eval_ag_news_num_pred_words": 46.482, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.415324311283904, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35395600557735385, "eval_ag_news_runtime": 10.4507, "eval_ag_news_samples_per_second": 47.844, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3526154363298412, "eval_ag_news_token_set_f1_sem": 0.00427935192668522, "eval_ag_news_token_set_precision": 0.3382957857110494, "eval_ag_news_token_set_recall": 0.3839005172887486, "eval_ag_news_true_num_tokens": 56.09375, "step": 156250 }, { "epoch": 30.0, "eval_anthropic_toxic_prompts_accuracy": 0.11584375, "eval_anthropic_toxic_prompts_bleu_score": 3.191813219317776, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12393265672174822, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6729031801223755, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008686339037549727, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2171788215637207, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.27, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.982, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768, "eval_anthropic_toxic_prompts_num_pred_words": 47.722, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.95761089478656, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2141080923860717, "eval_anthropic_toxic_prompts_runtime": 9.9675, "eval_anthropic_toxic_prompts_samples_per_second": 50.163, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3591335679912616, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006652981611880587, "eval_anthropic_toxic_prompts_token_set_precision": 0.4425810020779211, "eval_anthropic_toxic_prompts_token_set_recall": 0.3308347329763512, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 156250 }, { "epoch": 30.0, "eval_arxiv_accuracy": 0.351125, "eval_arxiv_bleu_score": 4.449350568299391, "eval_arxiv_bleu_score_sem": 0.13938156090728518, "eval_arxiv_emb_cos_sim": 0.7611921429634094, "eval_arxiv_emb_cos_sim_sem": 0.009035034403259529, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.371891975402832, "eval_arxiv_n_ngrams_match_1": 15.102, "eval_arxiv_n_ngrams_match_2": 3.018, "eval_arxiv_n_ngrams_match_3": 0.708, "eval_arxiv_num_pred_words": 40.05, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.133594992903735, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.362784949262172, "eval_arxiv_runtime": 10.2624, "eval_arxiv_samples_per_second": 48.722, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3536816353242398, "eval_arxiv_token_set_f1_sem": 0.0044563905631548, "eval_arxiv_token_set_precision": 0.3053094810539887, "eval_arxiv_token_set_recall": 0.4369377355367627, "eval_arxiv_true_num_tokens": 64.0, "step": 156250 }, { "epoch": 30.0, "eval_python_code_alpaca_accuracy": 0.161875, "eval_python_code_alpaca_bleu_score": 4.567276534819873, "eval_python_code_alpaca_bleu_score_sem": 0.14176757614863209, "eval_python_code_alpaca_emb_cos_sim": 0.7530335187911987, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009529549962282929, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8673653602600098, "eval_python_code_alpaca_n_ngrams_match_1": 9.944, "eval_python_code_alpaca_n_ngrams_match_2": 2.946, "eval_python_code_alpaca_n_ngrams_match_3": 0.994, "eval_python_code_alpaca_num_pred_words": 44.478, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.590612169222595, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33423296055930685, "eval_python_code_alpaca_runtime": 10.1686, "eval_python_code_alpaca_samples_per_second": 49.171, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4795782777959037, "eval_python_code_alpaca_token_set_f1_sem": 0.0055880244121022426, "eval_python_code_alpaca_token_set_precision": 0.5420998372696966, "eval_python_code_alpaca_token_set_recall": 0.452644518300551, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 156250 }, { "epoch": 30.0, "eval_wikibio_accuracy": 0.325375, "eval_wikibio_bleu_score": 6.2310717422902435, "eval_wikibio_bleu_score_sem": 0.2217136427339964, "eval_wikibio_emb_cos_sim": 0.7579187154769897, "eval_wikibio_emb_cos_sim_sem": 0.008380238330616248, "eval_wikibio_emb_top1_equal": 0.1328125, "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.71551775932312, "eval_wikibio_n_ngrams_match_1": 10.214, "eval_wikibio_n_ngrams_match_2": 3.502, "eval_wikibio_n_ngrams_match_3": 1.302, "eval_wikibio_num_pred_words": 35.902, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.07985105498431, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3641476729697386, "eval_wikibio_runtime": 10.281, "eval_wikibio_samples_per_second": 48.633, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.32486307207343096, "eval_wikibio_token_set_f1_sem": 0.0054074189432803885, "eval_wikibio_token_set_precision": 0.33213279154953373, "eval_wikibio_token_set_recall": 0.33403978678338864, "eval_wikibio_true_num_tokens": 61.1328125, "step": 156250 }, { "epoch": 30.0, "eval_nq_accuracy": 0.53334375, "eval_nq_bleu_score": 12.008924948458075, "eval_nq_bleu_score_sem": 0.4770210262206687, "eval_nq_emb_cos_sim": 0.8401103019714355, "eval_nq_emb_cos_sim_sem": 0.006983090058815349, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1568565368652344, "eval_nq_n_ngrams_match_1": 23.262, "eval_nq_n_ngrams_match_2": 8.612, "eval_nq_n_ngrams_match_3": 4.056, "eval_nq_num_pred_words": 48.97, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.643923053361304, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4529741489123078, "eval_nq_runtime": 10.7694, "eval_nq_samples_per_second": 46.428, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.46731023699601365, "eval_nq_token_set_f1_sem": 0.004965429608990865, "eval_nq_token_set_precision": 0.4245477684377968, "eval_nq_token_set_recall": 0.5285678036747155, "eval_nq_true_num_tokens": 64.0, "step": 156250 }, { "epoch": 30.0, "learning_rate": 0.001, "loss": 2.5296, "step": 156252 }, { "epoch": 30.0, "learning_rate": 0.001, "loss": 2.5249, "step": 156264 }, { "epoch": 30.01, "learning_rate": 0.001, "loss": 2.515, "step": 156276 }, { "epoch": 30.01, "learning_rate": 0.001, "loss": 2.5287, "step": 156288 }, { "epoch": 30.01, "learning_rate": 0.001, "loss": 2.5166, "step": 156300 }, { "epoch": 30.01, "learning_rate": 0.001, "loss": 2.5197, "step": 156312 }, { "epoch": 30.02, "learning_rate": 0.001, "loss": 2.5303, "step": 156324 }, { "epoch": 30.02, "learning_rate": 0.001, "loss": 2.515, "step": 156336 }, { "epoch": 30.02, "learning_rate": 0.001, "loss": 2.5269, "step": 156348 }, { "epoch": 30.02, "learning_rate": 0.001, "loss": 2.5233, "step": 156360 }, { "epoch": 30.03, "learning_rate": 0.001, "loss": 2.5289, "step": 156372 }, { "epoch": 30.03, "learning_rate": 0.001, "loss": 2.5309, "step": 156384 }, { "epoch": 30.03, "learning_rate": 0.001, "loss": 2.5323, "step": 156396 }, { "epoch": 30.03, "learning_rate": 0.001, "loss": 2.5091, "step": 156408 }, { "epoch": 30.03, "learning_rate": 0.001, "loss": 2.5168, "step": 156420 }, { "epoch": 30.04, "learning_rate": 0.001, "loss": 2.5318, "step": 156432 }, { "epoch": 30.04, "learning_rate": 0.001, "loss": 2.5195, "step": 156444 }, { "epoch": 30.04, "learning_rate": 0.001, "loss": 2.5195, "step": 156456 }, { "epoch": 30.04, "learning_rate": 0.001, "loss": 2.5206, "step": 156468 }, { "epoch": 30.05, "learning_rate": 0.001, "loss": 2.5158, "step": 156480 }, { "epoch": 30.05, "learning_rate": 0.001, "loss": 2.5295, "step": 156492 }, { "epoch": 30.05, "learning_rate": 0.001, "loss": 2.5239, "step": 156504 }, { "epoch": 30.05, "learning_rate": 0.001, "loss": 2.5243, "step": 156516 }, { "epoch": 30.06, "learning_rate": 0.001, "loss": 2.5108, "step": 156528 }, { "epoch": 30.06, "learning_rate": 0.001, "loss": 2.5195, "step": 156540 }, { "epoch": 30.06, "learning_rate": 0.001, "loss": 2.5171, "step": 156552 }, { "epoch": 30.06, "learning_rate": 0.001, "loss": 2.5256, "step": 156564 }, { "epoch": 30.06, "learning_rate": 0.001, "loss": 2.5271, "step": 156576 }, { "epoch": 30.07, "learning_rate": 0.001, "loss": 2.5107, "step": 156588 }, { "epoch": 30.07, "learning_rate": 0.001, "loss": 2.5246, "step": 156600 }, { "epoch": 30.07, "learning_rate": 0.001, "loss": 2.5215, "step": 156612 }, { "epoch": 30.07, "learning_rate": 0.001, "loss": 2.5279, "step": 156624 }, { "epoch": 30.08, "learning_rate": 0.001, "loss": 2.527, "step": 156636 }, { "epoch": 30.08, "learning_rate": 0.001, "loss": 2.5346, "step": 156648 }, { "epoch": 30.08, "learning_rate": 0.001, "loss": 2.5227, "step": 156660 }, { "epoch": 30.08, "learning_rate": 0.001, "loss": 2.5092, "step": 156672 }, { "epoch": 30.09, "learning_rate": 0.001, "loss": 2.5215, "step": 156684 }, { "epoch": 30.09, "learning_rate": 0.001, "loss": 2.5132, "step": 156696 }, { "epoch": 30.09, "learning_rate": 0.001, "loss": 2.5276, "step": 156708 }, { "epoch": 30.09, "learning_rate": 0.001, "loss": 2.5176, "step": 156720 }, { "epoch": 30.09, "learning_rate": 0.001, "loss": 2.5237, "step": 156732 }, { "epoch": 30.1, "learning_rate": 0.001, "loss": 2.528, "step": 156744 }, { "epoch": 30.1, "learning_rate": 0.001, "loss": 2.5385, "step": 156756 }, { "epoch": 30.1, "learning_rate": 0.001, "loss": 2.5252, "step": 156768 }, { "epoch": 30.1, "learning_rate": 0.001, "loss": 2.5266, "step": 156780 }, { "epoch": 30.11, "learning_rate": 0.001, "loss": 2.5211, "step": 156792 }, { "epoch": 30.11, "learning_rate": 0.001, "loss": 2.5223, "step": 156804 }, { "epoch": 30.11, "learning_rate": 0.001, "loss": 2.5153, "step": 156816 }, { "epoch": 30.11, "learning_rate": 0.001, "loss": 2.5284, "step": 156828 }, { "epoch": 30.12, "learning_rate": 0.001, "loss": 2.5222, "step": 156840 }, { "epoch": 30.12, "learning_rate": 0.001, "loss": 2.5307, "step": 156852 }, { "epoch": 30.12, "learning_rate": 0.001, "loss": 2.5375, "step": 156864 }, { "epoch": 30.12, "eval_ag_news_accuracy": 0.32309375, "eval_ag_news_bleu_score": 4.8949900242191315, "eval_ag_news_bleu_score_sem": 0.1529257653334249, "eval_ag_news_emb_cos_sim": 0.814081072807312, "eval_ag_news_emb_cos_sim_sem": 0.007254983329826175, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.520991325378418, "eval_ag_news_n_ngrams_match_1": 14.25, "eval_ag_news_n_ngrams_match_2": 3.252, "eval_ag_news_n_ngrams_match_3": 0.946, "eval_ag_news_num_pred_words": 47.008, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.81793643108617, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35446625271231846, "eval_ag_news_runtime": 10.9418, "eval_ag_news_samples_per_second": 45.696, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.355520500036169, "eval_ag_news_token_set_f1_sem": 0.004403190894155138, "eval_ag_news_token_set_precision": 0.33965718467372263, "eval_ag_news_token_set_recall": 0.3893419219446139, "eval_ag_news_true_num_tokens": 56.09375, "step": 156875 }, { "epoch": 30.12, "eval_anthropic_toxic_prompts_accuracy": 0.115, "eval_anthropic_toxic_prompts_bleu_score": 3.1505349882027933, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12474808992202031, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6709076166152954, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009117380125626703, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.248901605606079, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.206, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.924, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, "eval_anthropic_toxic_prompts_num_pred_words": 48.024, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.76202750435498, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21068908037830206, "eval_anthropic_toxic_prompts_runtime": 9.9427, "eval_anthropic_toxic_prompts_samples_per_second": 50.288, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.35837125724281116, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006537739562108545, "eval_anthropic_toxic_prompts_token_set_precision": 0.43762910921461895, "eval_anthropic_toxic_prompts_token_set_recall": 0.32986004706646394, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 156875 }, { "epoch": 30.12, "eval_arxiv_accuracy": 0.35159375, "eval_arxiv_bleu_score": 4.407643662944918, "eval_arxiv_bleu_score_sem": 0.13207412593659715, "eval_arxiv_emb_cos_sim": 0.7672524452209473, "eval_arxiv_emb_cos_sim_sem": 0.00845611121901446, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3698806762695312, "eval_arxiv_n_ngrams_match_1": 15.022, "eval_arxiv_n_ngrams_match_2": 2.942, "eval_arxiv_n_ngrams_match_3": 0.706, "eval_arxiv_num_pred_words": 40.666, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.07505750647602, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3595043837157954, "eval_arxiv_runtime": 10.3444, "eval_arxiv_samples_per_second": 48.336, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3499389028075848, "eval_arxiv_token_set_f1_sem": 0.004091876456838303, "eval_arxiv_token_set_precision": 0.30276978660801523, "eval_arxiv_token_set_recall": 0.43161993113824787, "eval_arxiv_true_num_tokens": 64.0, "step": 156875 }, { "epoch": 30.12, "eval_python_code_alpaca_accuracy": 0.16190625, "eval_python_code_alpaca_bleu_score": 4.546297420719376, "eval_python_code_alpaca_bleu_score_sem": 0.1379698677714443, "eval_python_code_alpaca_emb_cos_sim": 0.7658563256263733, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007070094983576502, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.872417449951172, "eval_python_code_alpaca_n_ngrams_match_1": 10.078, "eval_python_code_alpaca_n_ngrams_match_2": 2.944, "eval_python_code_alpaca_n_ngrams_match_3": 0.99, "eval_python_code_alpaca_num_pred_words": 44.704, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.679706386110873, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33324354563833325, "eval_python_code_alpaca_runtime": 10.0571, "eval_python_code_alpaca_samples_per_second": 49.716, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.48325844493624837, "eval_python_code_alpaca_token_set_f1_sem": 0.0054426218348746285, "eval_python_code_alpaca_token_set_precision": 0.5516745554128879, "eval_python_code_alpaca_token_set_recall": 0.4511151282955253, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 156875 }, { "epoch": 30.12, "eval_wikibio_accuracy": 0.3256875, "eval_wikibio_bleu_score": 6.337041882672664, "eval_wikibio_bleu_score_sem": 0.2202144506244899, "eval_wikibio_emb_cos_sim": 0.754089891910553, "eval_wikibio_emb_cos_sim_sem": 0.00780568590627066, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7035880088806152, "eval_wikibio_n_ngrams_match_1": 10.374, "eval_wikibio_n_ngrams_match_2": 3.604, "eval_wikibio_n_ngrams_match_3": 1.38, "eval_wikibio_num_pred_words": 36.792, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.592690314383326, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36584263134246897, "eval_wikibio_runtime": 10.0408, "eval_wikibio_samples_per_second": 49.797, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.32506640393263425, "eval_wikibio_token_set_f1_sem": 0.005165435102722425, "eval_wikibio_token_set_precision": 0.3362564179071182, "eval_wikibio_token_set_recall": 0.3290020871295805, "eval_wikibio_true_num_tokens": 61.1328125, "step": 156875 }, { "epoch": 30.12, "eval_nq_accuracy": 0.5323125, "eval_nq_bleu_score": 12.059231370279635, "eval_nq_bleu_score_sem": 0.47247935205452724, "eval_nq_emb_cos_sim": 0.837883710861206, "eval_nq_emb_cos_sim_sem": 0.007197772275695176, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.159616708755493, "eval_nq_n_ngrams_match_1": 23.338, "eval_nq_n_ngrams_match_2": 8.78, "eval_nq_n_ngrams_match_3": 4.03, "eval_nq_num_pred_words": 49.172, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.667814724185542, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4547128175221566, "eval_nq_runtime": 12.063, "eval_nq_samples_per_second": 41.449, "eval_nq_steps_per_second": 0.083, "eval_nq_token_set_f1": 0.46948511725358355, "eval_nq_token_set_f1_sem": 0.004769747528186598, "eval_nq_token_set_precision": 0.42531129812185897, "eval_nq_token_set_recall": 0.5323654080304644, "eval_nq_true_num_tokens": 64.0, "step": 156875 }, { "epoch": 30.12, "learning_rate": 0.001, "loss": 2.5319, "step": 156876 }, { "epoch": 30.12, "learning_rate": 0.001, "loss": 2.5258, "step": 156888 }, { "epoch": 30.13, "learning_rate": 0.001, "loss": 2.527, "step": 156900 }, { "epoch": 30.13, "learning_rate": 0.001, "loss": 2.5292, "step": 156912 }, { "epoch": 30.13, "learning_rate": 0.001, "loss": 2.5327, "step": 156924 }, { "epoch": 30.13, "learning_rate": 0.001, "loss": 2.5243, "step": 156936 }, { "epoch": 30.14, "learning_rate": 0.001, "loss": 2.5141, "step": 156948 }, { "epoch": 30.14, "learning_rate": 0.001, "loss": 2.5227, "step": 156960 }, { "epoch": 30.14, "learning_rate": 0.001, "loss": 2.5326, "step": 156972 }, { "epoch": 30.14, "learning_rate": 0.001, "loss": 2.5305, "step": 156984 }, { "epoch": 30.15, "learning_rate": 0.001, "loss": 2.5322, "step": 156996 }, { "epoch": 30.15, "learning_rate": 0.001, "loss": 2.5208, "step": 157008 }, { "epoch": 30.15, "learning_rate": 0.001, "loss": 2.5216, "step": 157020 }, { "epoch": 30.15, "learning_rate": 0.001, "loss": 2.5296, "step": 157032 }, { "epoch": 30.15, "learning_rate": 0.001, "loss": 2.5235, "step": 157044 }, { "epoch": 30.16, "learning_rate": 0.001, "loss": 2.5245, "step": 157056 }, { "epoch": 30.16, "learning_rate": 0.001, "loss": 2.5267, "step": 157068 }, { "epoch": 30.16, "learning_rate": 0.001, "loss": 2.5234, "step": 157080 }, { "epoch": 30.16, "learning_rate": 0.001, "loss": 2.5154, "step": 157092 }, { "epoch": 30.17, "learning_rate": 0.001, "loss": 2.5328, "step": 157104 }, { "epoch": 30.17, "learning_rate": 0.001, "loss": 2.5388, "step": 157116 }, { "epoch": 30.17, "learning_rate": 0.001, "loss": 2.5276, "step": 157128 }, { "epoch": 30.17, "learning_rate": 0.001, "loss": 2.5315, "step": 157140 }, { "epoch": 30.18, "learning_rate": 0.001, "loss": 2.5323, "step": 157152 }, { "epoch": 30.18, "learning_rate": 0.001, "loss": 2.526, "step": 157164 }, { "epoch": 30.18, "learning_rate": 0.001, "loss": 2.5325, "step": 157176 }, { "epoch": 30.18, "learning_rate": 0.001, "loss": 2.5257, "step": 157188 }, { "epoch": 30.18, "learning_rate": 0.001, "loss": 2.5298, "step": 157200 }, { "epoch": 30.19, "learning_rate": 0.001, "loss": 2.5292, "step": 157212 }, { "epoch": 30.19, "learning_rate": 0.001, "loss": 2.5246, "step": 157224 }, { "epoch": 30.19, "learning_rate": 0.001, "loss": 2.5165, "step": 157236 }, { "epoch": 30.19, "learning_rate": 0.001, "loss": 2.5235, "step": 157248 }, { "epoch": 30.2, "learning_rate": 0.001, "loss": 2.5144, "step": 157260 }, { "epoch": 30.2, "learning_rate": 0.001, "loss": 2.5318, "step": 157272 }, { "epoch": 30.2, "learning_rate": 0.001, "loss": 2.5275, "step": 157284 }, { "epoch": 30.2, "learning_rate": 0.001, "loss": 2.5313, "step": 157296 }, { "epoch": 30.21, "learning_rate": 0.001, "loss": 2.5148, "step": 157308 }, { "epoch": 30.21, "learning_rate": 0.001, "loss": 2.5216, "step": 157320 }, { "epoch": 30.21, "learning_rate": 0.001, "loss": 2.5226, "step": 157332 }, { "epoch": 30.21, "learning_rate": 0.001, "loss": 2.5247, "step": 157344 }, { "epoch": 30.21, "learning_rate": 0.001, "loss": 2.5236, "step": 157356 }, { "epoch": 30.22, "learning_rate": 0.001, "loss": 2.5286, "step": 157368 }, { "epoch": 30.22, "learning_rate": 0.001, "loss": 2.5303, "step": 157380 }, { "epoch": 30.22, "learning_rate": 0.001, "loss": 2.5244, "step": 157392 }, { "epoch": 30.22, "learning_rate": 0.001, "loss": 2.5354, "step": 157404 }, { "epoch": 30.23, "learning_rate": 0.001, "loss": 2.5234, "step": 157416 }, { "epoch": 30.23, "learning_rate": 0.001, "loss": 2.5166, "step": 157428 }, { "epoch": 30.23, "learning_rate": 0.001, "loss": 2.5336, "step": 157440 }, { "epoch": 30.23, "learning_rate": 0.001, "loss": 2.5336, "step": 157452 }, { "epoch": 30.24, "learning_rate": 0.001, "loss": 2.5264, "step": 157464 }, { "epoch": 30.24, "learning_rate": 0.001, "loss": 2.5224, "step": 157476 }, { "epoch": 30.24, "learning_rate": 0.001, "loss": 2.524, "step": 157488 }, { "epoch": 30.24, "learning_rate": 0.001, "loss": 2.5307, "step": 157500 }, { "epoch": 30.24, "eval_ag_news_accuracy": 0.32434375, "eval_ag_news_bleu_score": 4.758053657351796, "eval_ag_news_bleu_score_sem": 0.143581842791104, "eval_ag_news_emb_cos_sim": 0.8139164447784424, "eval_ag_news_emb_cos_sim_sem": 0.006493669000105704, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5309855937957764, "eval_ag_news_n_ngrams_match_1": 14.186, "eval_ag_news_n_ngrams_match_2": 3.038, "eval_ag_news_n_ngrams_match_3": 0.856, "eval_ag_news_num_pred_words": 46.49, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 34.15761656489712, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35223402904055356, "eval_ag_news_runtime": 10.431, "eval_ag_news_samples_per_second": 47.934, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3543089371065836, "eval_ag_news_token_set_f1_sem": 0.004347235781361584, "eval_ag_news_token_set_precision": 0.3391530198503155, "eval_ag_news_token_set_recall": 0.38411816975795515, "eval_ag_news_true_num_tokens": 56.09375, "step": 157500 }, { "epoch": 30.24, "eval_anthropic_toxic_prompts_accuracy": 0.115125, "eval_anthropic_toxic_prompts_bleu_score": 3.1001371986036967, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11950594486831811, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6717466115951538, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009021263579053102, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2424442768096924, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.11, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.878, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7, "eval_anthropic_toxic_prompts_num_pred_words": 46.852, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.596209568939248, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21005113731368338, "eval_anthropic_toxic_prompts_runtime": 11.0359, "eval_anthropic_toxic_prompts_samples_per_second": 45.307, "eval_anthropic_toxic_prompts_steps_per_second": 0.091, "eval_anthropic_toxic_prompts_token_set_f1": 0.35156103128418076, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00657098766161286, "eval_anthropic_toxic_prompts_token_set_precision": 0.43484267135667165, "eval_anthropic_toxic_prompts_token_set_recall": 0.3257795283642292, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 157500 }, { "epoch": 30.24, "eval_arxiv_accuracy": 0.34884375, "eval_arxiv_bleu_score": 4.363456484179617, "eval_arxiv_bleu_score_sem": 0.12581095029683897, "eval_arxiv_emb_cos_sim": 0.7542411088943481, "eval_arxiv_emb_cos_sim_sem": 0.010374416467909198, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.376727819442749, "eval_arxiv_n_ngrams_match_1": 15.202, "eval_arxiv_n_ngrams_match_2": 2.982, "eval_arxiv_n_ngrams_match_3": 0.66, "eval_arxiv_num_pred_words": 40.176, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.27482171459167, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3649618898857102, "eval_arxiv_runtime": 10.3282, "eval_arxiv_samples_per_second": 48.411, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3557000234224597, "eval_arxiv_token_set_f1_sem": 0.004178592382223671, "eval_arxiv_token_set_precision": 0.3097388394222354, "eval_arxiv_token_set_recall": 0.4354133024821057, "eval_arxiv_true_num_tokens": 64.0, "step": 157500 }, { "epoch": 30.24, "eval_python_code_alpaca_accuracy": 0.16028125, "eval_python_code_alpaca_bleu_score": 4.6251060544518, "eval_python_code_alpaca_bleu_score_sem": 0.15235364170221244, "eval_python_code_alpaca_emb_cos_sim": 0.7514203786849976, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010706493462960057, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8958547115325928, "eval_python_code_alpaca_n_ngrams_match_1": 9.718, "eval_python_code_alpaca_n_ngrams_match_2": 2.858, "eval_python_code_alpaca_n_ngrams_match_3": 0.97, "eval_python_code_alpaca_num_pred_words": 41.97, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.098964225657987, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33508325114583526, "eval_python_code_alpaca_runtime": 10.1957, "eval_python_code_alpaca_samples_per_second": 49.04, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4731767436339594, "eval_python_code_alpaca_token_set_f1_sem": 0.005633077997519158, "eval_python_code_alpaca_token_set_precision": 0.5328896362917185, "eval_python_code_alpaca_token_set_recall": 0.45465780675229217, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 157500 }, { "epoch": 30.24, "eval_wikibio_accuracy": 0.32725, "eval_wikibio_bleu_score": 6.289617508885937, "eval_wikibio_bleu_score_sem": 0.21580508245872906, "eval_wikibio_emb_cos_sim": 0.7436895370483398, "eval_wikibio_emb_cos_sim_sem": 0.009231857551627372, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6938133239746094, "eval_wikibio_n_ngrams_match_1": 10.272, "eval_wikibio_n_ngrams_match_2": 3.532, "eval_wikibio_n_ngrams_match_3": 1.31, "eval_wikibio_num_pred_words": 35.952, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.19784245755524, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3617194131029448, "eval_wikibio_runtime": 10.2425, "eval_wikibio_samples_per_second": 48.816, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.3246204881331519, "eval_wikibio_token_set_f1_sem": 0.005348375476585237, "eval_wikibio_token_set_precision": 0.331537223025125, "eval_wikibio_token_set_recall": 0.33399686531592665, "eval_wikibio_true_num_tokens": 61.1328125, "step": 157500 }, { "epoch": 30.24, "eval_nq_accuracy": 0.5305625, "eval_nq_bleu_score": 11.512087301446106, "eval_nq_bleu_score_sem": 0.46493590767167253, "eval_nq_emb_cos_sim": 0.8377481698989868, "eval_nq_emb_cos_sim_sem": 0.007061162485870224, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1584322452545166, "eval_nq_n_ngrams_match_1": 23.08, "eval_nq_n_ngrams_match_2": 8.334, "eval_nq_n_ngrams_match_3": 3.806, "eval_nq_num_pred_words": 48.712, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.657554091883345, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4506501281163242, "eval_nq_runtime": 10.6207, "eval_nq_samples_per_second": 47.078, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.46324773590136453, "eval_nq_token_set_f1_sem": 0.004793307939318403, "eval_nq_token_set_precision": 0.42221918452776047, "eval_nq_token_set_recall": 0.5208339723322124, "eval_nq_true_num_tokens": 64.0, "step": 157500 }, { "epoch": 30.24, "learning_rate": 0.001, "loss": 2.5299, "step": 157512 }, { "epoch": 30.25, "learning_rate": 0.001, "loss": 2.5235, "step": 157524 }, { "epoch": 30.25, "learning_rate": 0.001, "loss": 2.5259, "step": 157536 }, { "epoch": 30.25, "learning_rate": 0.001, "loss": 2.5268, "step": 157548 }, { "epoch": 30.25, "learning_rate": 0.001, "loss": 2.5268, "step": 157560 }, { "epoch": 30.26, "learning_rate": 0.001, "loss": 2.53, "step": 157572 }, { "epoch": 30.26, "learning_rate": 0.001, "loss": 2.5244, "step": 157584 }, { "epoch": 30.26, "learning_rate": 0.001, "loss": 2.53, "step": 157596 }, { "epoch": 30.26, "learning_rate": 0.001, "loss": 2.5423, "step": 157608 }, { "epoch": 30.26, "learning_rate": 0.001, "loss": 2.5288, "step": 157620 }, { "epoch": 30.27, "learning_rate": 0.001, "loss": 2.5308, "step": 157632 }, { "epoch": 30.27, "learning_rate": 0.001, "loss": 2.5328, "step": 157644 }, { "epoch": 30.27, "learning_rate": 0.001, "loss": 2.5228, "step": 157656 }, { "epoch": 30.27, "learning_rate": 0.001, "loss": 2.5266, "step": 157668 }, { "epoch": 30.28, "learning_rate": 0.001, "loss": 2.5331, "step": 157680 }, { "epoch": 30.28, "learning_rate": 0.001, "loss": 2.5203, "step": 157692 }, { "epoch": 30.28, "learning_rate": 0.001, "loss": 2.5307, "step": 157704 }, { "epoch": 30.28, "learning_rate": 0.001, "loss": 2.5306, "step": 157716 }, { "epoch": 30.29, "learning_rate": 0.001, "loss": 2.5303, "step": 157728 }, { "epoch": 30.29, "learning_rate": 0.001, "loss": 2.5335, "step": 157740 }, { "epoch": 30.29, "learning_rate": 0.001, "loss": 2.5257, "step": 157752 }, { "epoch": 30.29, "learning_rate": 0.001, "loss": 2.5376, "step": 157764 }, { "epoch": 30.29, "learning_rate": 0.001, "loss": 2.5266, "step": 157776 }, { "epoch": 30.3, "learning_rate": 0.001, "loss": 2.5254, "step": 157788 }, { "epoch": 30.3, "learning_rate": 0.001, "loss": 2.5296, "step": 157800 }, { "epoch": 30.3, "learning_rate": 0.001, "loss": 2.5354, "step": 157812 }, { "epoch": 30.3, "learning_rate": 0.001, "loss": 2.5357, "step": 157824 }, { "epoch": 30.31, "learning_rate": 0.001, "loss": 2.5187, "step": 157836 }, { "epoch": 30.31, "learning_rate": 0.001, "loss": 2.5201, "step": 157848 }, { "epoch": 30.31, "learning_rate": 0.001, "loss": 2.5356, "step": 157860 }, { "epoch": 30.31, "learning_rate": 0.001, "loss": 2.5254, "step": 157872 }, { "epoch": 30.32, "learning_rate": 0.001, "loss": 2.5221, "step": 157884 }, { "epoch": 30.32, "learning_rate": 0.001, "loss": 2.5381, "step": 157896 }, { "epoch": 30.32, "learning_rate": 0.001, "loss": 2.5302, "step": 157908 }, { "epoch": 30.32, "learning_rate": 0.001, "loss": 2.5219, "step": 157920 }, { "epoch": 30.32, "learning_rate": 0.001, "loss": 2.5252, "step": 157932 }, { "epoch": 30.33, "learning_rate": 0.001, "loss": 2.5285, "step": 157944 }, { "epoch": 30.33, "learning_rate": 0.001, "loss": 2.5351, "step": 157956 }, { "epoch": 30.33, "learning_rate": 0.001, "loss": 2.5305, "step": 157968 }, { "epoch": 30.33, "learning_rate": 0.001, "loss": 2.5249, "step": 157980 }, { "epoch": 30.34, "learning_rate": 0.001, "loss": 2.5335, "step": 157992 }, { "epoch": 30.34, "learning_rate": 0.001, "loss": 2.5221, "step": 158004 }, { "epoch": 30.34, "learning_rate": 0.001, "loss": 2.5209, "step": 158016 }, { "epoch": 30.34, "learning_rate": 0.001, "loss": 2.5308, "step": 158028 }, { "epoch": 30.35, "learning_rate": 0.001, "loss": 2.5257, "step": 158040 }, { "epoch": 30.35, "learning_rate": 0.001, "loss": 2.5315, "step": 158052 }, { "epoch": 30.35, "learning_rate": 0.001, "loss": 2.5284, "step": 158064 }, { "epoch": 30.35, "learning_rate": 0.001, "loss": 2.5353, "step": 158076 }, { "epoch": 30.35, "learning_rate": 0.001, "loss": 2.5343, "step": 158088 }, { "epoch": 30.36, "learning_rate": 0.001, "loss": 2.5242, "step": 158100 }, { "epoch": 30.36, "learning_rate": 0.001, "loss": 2.5277, "step": 158112 }, { "epoch": 30.36, "learning_rate": 0.001, "loss": 2.5292, "step": 158124 }, { "epoch": 30.36, "eval_ag_news_accuracy": 0.32471875, "eval_ag_news_bleu_score": 4.8427856844103605, "eval_ag_news_bleu_score_sem": 0.15012938702407316, "eval_ag_news_emb_cos_sim": 0.8136986494064331, "eval_ag_news_emb_cos_sim_sem": 0.00657294590506568, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5194272994995117, "eval_ag_news_n_ngrams_match_1": 14.18, "eval_ag_news_n_ngrams_match_2": 3.168, "eval_ag_news_n_ngrams_match_3": 0.9, "eval_ag_news_num_pred_words": 46.76, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.765085644109554, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3533370736970962, "eval_ag_news_runtime": 10.4041, "eval_ag_news_samples_per_second": 48.058, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35501028044739813, "eval_ag_news_token_set_f1_sem": 0.004213580761439838, "eval_ag_news_token_set_precision": 0.34015320444333863, "eval_ag_news_token_set_recall": 0.3854947543851968, "eval_ag_news_true_num_tokens": 56.09375, "step": 158125 }, { "epoch": 30.36, "eval_anthropic_toxic_prompts_accuracy": 0.11484375, "eval_anthropic_toxic_prompts_bleu_score": 3.071500101829459, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11258027794072333, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.674710750579834, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008671535615554784, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2308311462402344, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.28, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.91, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.708, "eval_anthropic_toxic_prompts_num_pred_words": 47.426, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.30067679687331, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21361380952808293, "eval_anthropic_toxic_prompts_runtime": 9.8524, "eval_anthropic_toxic_prompts_samples_per_second": 50.749, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.35759965417590095, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006503850569137356, "eval_anthropic_toxic_prompts_token_set_precision": 0.44059801740878096, "eval_anthropic_toxic_prompts_token_set_recall": 0.3262328269793172, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 158125 }, { "epoch": 30.36, "eval_arxiv_accuracy": 0.34834375, "eval_arxiv_bleu_score": 4.2978621528238, "eval_arxiv_bleu_score_sem": 0.12290908491861556, "eval_arxiv_emb_cos_sim": 0.7749967575073242, "eval_arxiv_emb_cos_sim_sem": 0.006363304846394461, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.382624626159668, "eval_arxiv_n_ngrams_match_1": 15.282, "eval_arxiv_n_ngrams_match_2": 2.946, "eval_arxiv_n_ngrams_match_3": 0.638, "eval_arxiv_num_pred_words": 40.834, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.447959658710285, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3652486584506256, "eval_arxiv_runtime": 10.2121, "eval_arxiv_samples_per_second": 48.962, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3617443614581526, "eval_arxiv_token_set_f1_sem": 0.004176899718743156, "eval_arxiv_token_set_precision": 0.31270599370369495, "eval_arxiv_token_set_recall": 0.4465882658497927, "eval_arxiv_true_num_tokens": 64.0, "step": 158125 }, { "epoch": 30.36, "eval_python_code_alpaca_accuracy": 0.162, "eval_python_code_alpaca_bleu_score": 4.275658775053929, "eval_python_code_alpaca_bleu_score_sem": 0.13693856885349182, "eval_python_code_alpaca_emb_cos_sim": 0.7551605701446533, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008493371667276123, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8826234340667725, "eval_python_code_alpaca_n_ngrams_match_1": 9.8, "eval_python_code_alpaca_n_ngrams_match_2": 2.724, "eval_python_code_alpaca_n_ngrams_match_3": 0.866, "eval_python_code_alpaca_num_pred_words": 44.016, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.861069106922283, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32812303963758704, "eval_python_code_alpaca_runtime": 9.9485, "eval_python_code_alpaca_samples_per_second": 50.259, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4727704776241421, "eval_python_code_alpaca_token_set_f1_sem": 0.005521458940805648, "eval_python_code_alpaca_token_set_precision": 0.538321190845166, "eval_python_code_alpaca_token_set_recall": 0.44409053704885754, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 158125 }, { "epoch": 30.36, "eval_wikibio_accuracy": 0.32465625, "eval_wikibio_bleu_score": 5.942625832732892, "eval_wikibio_bleu_score_sem": 0.22032237041499633, "eval_wikibio_emb_cos_sim": 0.731481671333313, "eval_wikibio_emb_cos_sim_sem": 0.009106736070744179, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.69866681098938, "eval_wikibio_n_ngrams_match_1": 9.924, "eval_wikibio_n_ngrams_match_2": 3.316, "eval_wikibio_n_ngrams_match_3": 1.222, "eval_wikibio_num_pred_words": 35.518, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.39341638779324, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3507729288554753, "eval_wikibio_runtime": 10.0362, "eval_wikibio_samples_per_second": 49.82, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3181687918634861, "eval_wikibio_token_set_f1_sem": 0.005317006306493984, "eval_wikibio_token_set_precision": 0.32243025625466, "eval_wikibio_token_set_recall": 0.333261812433212, "eval_wikibio_true_num_tokens": 61.1328125, "step": 158125 }, { "epoch": 30.36, "eval_nq_accuracy": 0.53153125, "eval_nq_bleu_score": 11.486748725474277, "eval_nq_bleu_score_sem": 0.46453246893931965, "eval_nq_emb_cos_sim": 0.8362016677856445, "eval_nq_emb_cos_sim_sem": 0.007298759417449818, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1573336124420166, "eval_nq_n_ngrams_match_1": 23.212, "eval_nq_n_ngrams_match_2": 8.456, "eval_nq_n_ngrams_match_3": 3.814, "eval_nq_num_pred_words": 49.484, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.64804784177732, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4490733508704824, "eval_nq_runtime": 11.553, "eval_nq_samples_per_second": 43.279, "eval_nq_steps_per_second": 0.087, "eval_nq_token_set_f1": 0.4616884557122342, "eval_nq_token_set_f1_sem": 0.00483469966089439, "eval_nq_token_set_precision": 0.42239282339596934, "eval_nq_token_set_recall": 0.517920002163779, "eval_nq_true_num_tokens": 64.0, "step": 158125 }, { "epoch": 30.36, "learning_rate": 0.001, "loss": 2.5309, "step": 158136 }, { "epoch": 30.37, "learning_rate": 0.001, "loss": 2.5305, "step": 158148 }, { "epoch": 30.37, "learning_rate": 0.001, "loss": 2.5319, "step": 158160 }, { "epoch": 30.37, "learning_rate": 0.001, "loss": 2.5394, "step": 158172 }, { "epoch": 30.37, "learning_rate": 0.001, "loss": 2.5254, "step": 158184 }, { "epoch": 30.38, "learning_rate": 0.001, "loss": 2.5252, "step": 158196 }, { "epoch": 30.38, "learning_rate": 0.001, "loss": 2.5313, "step": 158208 }, { "epoch": 30.38, "learning_rate": 0.001, "loss": 2.5309, "step": 158220 }, { "epoch": 30.38, "learning_rate": 0.001, "loss": 2.5234, "step": 158232 }, { "epoch": 30.38, "learning_rate": 0.001, "loss": 2.5271, "step": 158244 }, { "epoch": 30.39, "learning_rate": 0.001, "loss": 2.5292, "step": 158256 }, { "epoch": 30.39, "learning_rate": 0.001, "loss": 2.5394, "step": 158268 }, { "epoch": 30.39, "learning_rate": 0.001, "loss": 2.5318, "step": 158280 }, { "epoch": 30.39, "learning_rate": 0.001, "loss": 2.5355, "step": 158292 }, { "epoch": 30.4, "learning_rate": 0.001, "loss": 2.5195, "step": 158304 }, { "epoch": 30.4, "learning_rate": 0.001, "loss": 2.5261, "step": 158316 }, { "epoch": 30.4, "learning_rate": 0.001, "loss": 2.5247, "step": 158328 }, { "epoch": 30.4, "learning_rate": 0.001, "loss": 2.5317, "step": 158340 }, { "epoch": 30.41, "learning_rate": 0.001, "loss": 2.5347, "step": 158352 }, { "epoch": 30.41, "learning_rate": 0.001, "loss": 2.5242, "step": 158364 }, { "epoch": 30.41, "learning_rate": 0.001, "loss": 2.5267, "step": 158376 }, { "epoch": 30.41, "learning_rate": 0.001, "loss": 2.5356, "step": 158388 }, { "epoch": 30.41, "learning_rate": 0.001, "loss": 2.53, "step": 158400 }, { "epoch": 30.42, "learning_rate": 0.001, "loss": 2.5351, "step": 158412 }, { "epoch": 30.42, "learning_rate": 0.001, "loss": 2.5308, "step": 158424 }, { "epoch": 30.42, "learning_rate": 0.001, "loss": 2.5322, "step": 158436 }, { "epoch": 30.42, "learning_rate": 0.001, "loss": 2.529, "step": 158448 }, { "epoch": 30.43, "learning_rate": 0.001, "loss": 2.5313, "step": 158460 }, { "epoch": 30.43, "learning_rate": 0.001, "loss": 2.534, "step": 158472 }, { "epoch": 30.43, "learning_rate": 0.001, "loss": 2.5247, "step": 158484 }, { "epoch": 30.43, "learning_rate": 0.001, "loss": 2.5232, "step": 158496 }, { "epoch": 30.44, "learning_rate": 0.001, "loss": 2.5299, "step": 158508 }, { "epoch": 30.44, "learning_rate": 0.001, "loss": 2.5286, "step": 158520 }, { "epoch": 30.44, "learning_rate": 0.001, "loss": 2.538, "step": 158532 }, { "epoch": 30.44, "learning_rate": 0.001, "loss": 2.5387, "step": 158544 }, { "epoch": 30.44, "learning_rate": 0.001, "loss": 2.5377, "step": 158556 }, { "epoch": 30.45, "learning_rate": 0.001, "loss": 2.5267, "step": 158568 }, { "epoch": 30.45, "learning_rate": 0.001, "loss": 2.5317, "step": 158580 }, { "epoch": 30.45, "learning_rate": 0.001, "loss": 2.5207, "step": 158592 }, { "epoch": 30.45, "learning_rate": 0.001, "loss": 2.5404, "step": 158604 }, { "epoch": 30.46, "learning_rate": 0.001, "loss": 2.5313, "step": 158616 }, { "epoch": 30.46, "learning_rate": 0.001, "loss": 2.5285, "step": 158628 }, { "epoch": 30.46, "learning_rate": 0.001, "loss": 2.5242, "step": 158640 }, { "epoch": 30.46, "learning_rate": 0.001, "loss": 2.5294, "step": 158652 }, { "epoch": 30.47, "learning_rate": 0.001, "loss": 2.5421, "step": 158664 }, { "epoch": 30.47, "learning_rate": 0.001, "loss": 2.5348, "step": 158676 }, { "epoch": 30.47, "learning_rate": 0.001, "loss": 2.5253, "step": 158688 }, { "epoch": 30.47, "learning_rate": 0.001, "loss": 2.5235, "step": 158700 }, { "epoch": 30.47, "learning_rate": 0.001, "loss": 2.5277, "step": 158712 }, { "epoch": 30.48, "learning_rate": 0.001, "loss": 2.5279, "step": 158724 }, { "epoch": 30.48, "learning_rate": 0.001, "loss": 2.5286, "step": 158736 }, { "epoch": 30.48, "learning_rate": 0.001, "loss": 2.5371, "step": 158748 }, { "epoch": 30.48, "eval_ag_news_accuracy": 0.325375, "eval_ag_news_bleu_score": 4.878165485857089, "eval_ag_news_bleu_score_sem": 0.15439134540973087, "eval_ag_news_emb_cos_sim": 0.8145061135292053, "eval_ag_news_emb_cos_sim_sem": 0.007508954970596758, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.520663022994995, "eval_ag_news_n_ngrams_match_1": 14.314, "eval_ag_news_n_ngrams_match_2": 3.214, "eval_ag_news_n_ngrams_match_3": 0.92, "eval_ag_news_num_pred_words": 46.69, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.80683574424408, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3561943229560461, "eval_ag_news_runtime": 10.3976, "eval_ag_news_samples_per_second": 48.088, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3558517160863795, "eval_ag_news_token_set_f1_sem": 0.004551006538093029, "eval_ag_news_token_set_precision": 0.34212551662708557, "eval_ag_news_token_set_recall": 0.3844274878298257, "eval_ag_news_true_num_tokens": 56.09375, "step": 158750 }, { "epoch": 30.48, "eval_anthropic_toxic_prompts_accuracy": 0.115625, "eval_anthropic_toxic_prompts_bleu_score": 3.0588368189701205, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11557554497963778, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6634416580200195, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009347339258002713, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.219215154647827, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.198, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.706, "eval_anthropic_toxic_prompts_num_pred_words": 47.324, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.008484683962216, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21079149734891484, "eval_anthropic_toxic_prompts_runtime": 10.2632, "eval_anthropic_toxic_prompts_samples_per_second": 48.718, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3625880362589257, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006706776391805066, "eval_anthropic_toxic_prompts_token_set_precision": 0.4366398585090635, "eval_anthropic_toxic_prompts_token_set_recall": 0.33531326236850506, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 158750 }, { "epoch": 30.48, "eval_arxiv_accuracy": 0.35040625, "eval_arxiv_bleu_score": 4.368822880723829, "eval_arxiv_bleu_score_sem": 0.1277155185574493, "eval_arxiv_emb_cos_sim": 0.7667855024337769, "eval_arxiv_emb_cos_sim_sem": 0.008965874059031183, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3752377033233643, "eval_arxiv_n_ngrams_match_1": 15.296, "eval_arxiv_n_ngrams_match_2": 2.95, "eval_arxiv_n_ngrams_match_3": 0.674, "eval_arxiv_num_pred_words": 40.438, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.231231316306054, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36635425161089796, "eval_arxiv_runtime": 10.4737, "eval_arxiv_samples_per_second": 47.739, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.36084453968004065, "eval_arxiv_token_set_f1_sem": 0.004463439757308821, "eval_arxiv_token_set_precision": 0.313203924279303, "eval_arxiv_token_set_recall": 0.4453061731231374, "eval_arxiv_true_num_tokens": 64.0, "step": 158750 }, { "epoch": 30.48, "eval_python_code_alpaca_accuracy": 0.1638125, "eval_python_code_alpaca_bleu_score": 4.478360967894014, "eval_python_code_alpaca_bleu_score_sem": 0.14594466721567825, "eval_python_code_alpaca_emb_cos_sim": 0.7596727609634399, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008804581362502532, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8606534004211426, "eval_python_code_alpaca_n_ngrams_match_1": 9.77, "eval_python_code_alpaca_n_ngrams_match_2": 2.856, "eval_python_code_alpaca_n_ngrams_match_3": 0.956, "eval_python_code_alpaca_num_pred_words": 44.588, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.472940033889387, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32659435648274376, "eval_python_code_alpaca_runtime": 9.8786, "eval_python_code_alpaca_samples_per_second": 50.615, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4756708808523636, "eval_python_code_alpaca_token_set_f1_sem": 0.005626300794357611, "eval_python_code_alpaca_token_set_precision": 0.5339058879782336, "eval_python_code_alpaca_token_set_recall": 0.4491858371029622, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 158750 }, { "epoch": 30.48, "eval_wikibio_accuracy": 0.32759375, "eval_wikibio_bleu_score": 5.879216905551975, "eval_wikibio_bleu_score_sem": 0.20414104430683466, "eval_wikibio_emb_cos_sim": 0.7370492219924927, "eval_wikibio_emb_cos_sim_sem": 0.00960887757517002, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6781163215637207, "eval_wikibio_n_ngrams_match_1": 10.012, "eval_wikibio_n_ngrams_match_2": 3.352, "eval_wikibio_n_ngrams_match_3": 1.194, "eval_wikibio_num_pred_words": 35.404, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.5717833083871, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35632239314531655, "eval_wikibio_runtime": 10.1985, "eval_wikibio_samples_per_second": 49.027, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.32056417579054197, "eval_wikibio_token_set_f1_sem": 0.005457207044850231, "eval_wikibio_token_set_precision": 0.32565516678818285, "eval_wikibio_token_set_recall": 0.3328260382360125, "eval_wikibio_true_num_tokens": 61.1328125, "step": 158750 }, { "epoch": 30.48, "eval_nq_accuracy": 0.530875, "eval_nq_bleu_score": 11.861891386695568, "eval_nq_bleu_score_sem": 0.4922490714167595, "eval_nq_emb_cos_sim": 0.8299490809440613, "eval_nq_emb_cos_sim_sem": 0.007035008390735691, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1594924926757812, "eval_nq_n_ngrams_match_1": 23.234, "eval_nq_n_ngrams_match_2": 8.47, "eval_nq_n_ngrams_match_3": 3.978, "eval_nq_num_pred_words": 48.904, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.666738109088671, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45310891180677093, "eval_nq_runtime": 10.4601, "eval_nq_samples_per_second": 47.801, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4648715447948946, "eval_nq_token_set_f1_sem": 0.004991581463411779, "eval_nq_token_set_precision": 0.42293491387170323, "eval_nq_token_set_recall": 0.5248400957691678, "eval_nq_true_num_tokens": 64.0, "step": 158750 }, { "epoch": 30.48, "learning_rate": 0.001, "loss": 2.5323, "step": 158760 }, { "epoch": 30.49, "learning_rate": 0.001, "loss": 2.5323, "step": 158772 }, { "epoch": 30.49, "learning_rate": 0.001, "loss": 2.5245, "step": 158784 }, { "epoch": 30.49, "learning_rate": 0.001, "loss": 2.536, "step": 158796 }, { "epoch": 30.49, "learning_rate": 0.001, "loss": 2.5393, "step": 158808 }, { "epoch": 30.5, "learning_rate": 0.001, "loss": 2.5365, "step": 158820 }, { "epoch": 30.5, "learning_rate": 0.001, "loss": 2.5413, "step": 158832 }, { "epoch": 30.5, "learning_rate": 0.001, "loss": 2.5366, "step": 158844 }, { "epoch": 30.5, "learning_rate": 0.001, "loss": 2.5336, "step": 158856 }, { "epoch": 30.5, "learning_rate": 0.001, "loss": 2.5409, "step": 158868 }, { "epoch": 30.51, "learning_rate": 0.001, "loss": 2.5269, "step": 158880 }, { "epoch": 30.51, "learning_rate": 0.001, "loss": 2.5301, "step": 158892 }, { "epoch": 30.51, "learning_rate": 0.001, "loss": 2.5398, "step": 158904 }, { "epoch": 30.51, "learning_rate": 0.001, "loss": 2.5368, "step": 158916 }, { "epoch": 30.52, "learning_rate": 0.001, "loss": 2.5298, "step": 158928 }, { "epoch": 30.52, "learning_rate": 0.001, "loss": 2.5283, "step": 158940 }, { "epoch": 30.52, "learning_rate": 0.001, "loss": 2.5209, "step": 158952 }, { "epoch": 30.52, "learning_rate": 0.001, "loss": 2.5249, "step": 158964 }, { "epoch": 30.53, "learning_rate": 0.001, "loss": 2.5335, "step": 158976 }, { "epoch": 30.53, "learning_rate": 0.001, "loss": 2.523, "step": 158988 }, { "epoch": 30.53, "learning_rate": 0.001, "loss": 2.5297, "step": 159000 }, { "epoch": 30.53, "learning_rate": 0.001, "loss": 2.5377, "step": 159012 }, { "epoch": 30.53, "learning_rate": 0.001, "loss": 2.5326, "step": 159024 }, { "epoch": 30.54, "learning_rate": 0.001, "loss": 2.5265, "step": 159036 }, { "epoch": 30.54, "learning_rate": 0.001, "loss": 2.5296, "step": 159048 }, { "epoch": 30.54, "learning_rate": 0.001, "loss": 2.52, "step": 159060 }, { "epoch": 30.54, "learning_rate": 0.001, "loss": 2.5308, "step": 159072 }, { "epoch": 30.55, "learning_rate": 0.001, "loss": 2.5336, "step": 159084 }, { "epoch": 30.55, "learning_rate": 0.001, "loss": 2.5314, "step": 159096 }, { "epoch": 30.55, "learning_rate": 0.001, "loss": 2.5285, "step": 159108 }, { "epoch": 30.55, "learning_rate": 0.001, "loss": 2.5423, "step": 159120 }, { "epoch": 30.56, "learning_rate": 0.001, "loss": 2.5265, "step": 159132 }, { "epoch": 30.56, "learning_rate": 0.001, "loss": 2.5285, "step": 159144 }, { "epoch": 30.56, "learning_rate": 0.001, "loss": 2.5312, "step": 159156 }, { "epoch": 30.56, "learning_rate": 0.001, "loss": 2.5271, "step": 159168 }, { "epoch": 30.56, "learning_rate": 0.001, "loss": 2.5367, "step": 159180 }, { "epoch": 30.57, "learning_rate": 0.001, "loss": 2.5304, "step": 159192 }, { "epoch": 30.57, "learning_rate": 0.001, "loss": 2.5344, "step": 159204 }, { "epoch": 30.57, "learning_rate": 0.001, "loss": 2.541, "step": 159216 }, { "epoch": 30.57, "learning_rate": 0.001, "loss": 2.5415, "step": 159228 }, { "epoch": 30.58, "learning_rate": 0.001, "loss": 2.5343, "step": 159240 }, { "epoch": 30.58, "learning_rate": 0.001, "loss": 2.5261, "step": 159252 }, { "epoch": 30.58, "learning_rate": 0.001, "loss": 2.5321, "step": 159264 }, { "epoch": 30.58, "learning_rate": 0.001, "loss": 2.5238, "step": 159276 }, { "epoch": 30.59, "learning_rate": 0.001, "loss": 2.5293, "step": 159288 }, { "epoch": 30.59, "learning_rate": 0.001, "loss": 2.5273, "step": 159300 }, { "epoch": 30.59, "learning_rate": 0.001, "loss": 2.5338, "step": 159312 }, { "epoch": 30.59, "learning_rate": 0.001, "loss": 2.536, "step": 159324 }, { "epoch": 30.59, "learning_rate": 0.001, "loss": 2.5294, "step": 159336 }, { "epoch": 30.6, "learning_rate": 0.001, "loss": 2.5177, "step": 159348 }, { "epoch": 30.6, "learning_rate": 0.001, "loss": 2.5302, "step": 159360 }, { "epoch": 30.6, "learning_rate": 0.001, "loss": 2.5305, "step": 159372 }, { "epoch": 30.6, "eval_ag_news_accuracy": 0.32678125, "eval_ag_news_bleu_score": 4.923118249282192, "eval_ag_news_bleu_score_sem": 0.14889419402993623, "eval_ag_news_emb_cos_sim": 0.8170584440231323, "eval_ag_news_emb_cos_sim_sem": 0.007157052493133081, "eval_ag_news_emb_top1_equal": 0.2890625, "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5106892585754395, "eval_ag_news_n_ngrams_match_1": 14.342, "eval_ag_news_n_ngrams_match_2": 3.282, "eval_ag_news_n_ngrams_match_3": 0.896, "eval_ag_news_num_pred_words": 46.724, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.47133023643213, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3555692901811652, "eval_ag_news_runtime": 10.4647, "eval_ag_news_samples_per_second": 47.78, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3569700947398814, "eval_ag_news_token_set_f1_sem": 0.00432541952931231, "eval_ag_news_token_set_precision": 0.3426346521508827, "eval_ag_news_token_set_recall": 0.3863496905375196, "eval_ag_news_true_num_tokens": 56.09375, "step": 159375 }, { "epoch": 30.6, "eval_anthropic_toxic_prompts_accuracy": 0.11646875, "eval_anthropic_toxic_prompts_bleu_score": 3.2010010375991067, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12109595737703649, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.674258828163147, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009089438082152996, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2164714336395264, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.306, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.964, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736, "eval_anthropic_toxic_prompts_num_pred_words": 47.17, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.939962425115947, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21506010858386138, "eval_anthropic_toxic_prompts_runtime": 10.2099, "eval_anthropic_toxic_prompts_samples_per_second": 48.972, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3638356274635589, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006679943671621058, "eval_anthropic_toxic_prompts_token_set_precision": 0.44706926628230775, "eval_anthropic_toxic_prompts_token_set_recall": 0.3326622467851707, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 159375 }, { "epoch": 30.6, "eval_arxiv_accuracy": 0.35059375, "eval_arxiv_bleu_score": 4.341593214572007, "eval_arxiv_bleu_score_sem": 0.12876889998663138, "eval_arxiv_emb_cos_sim": 0.7648574709892273, "eval_arxiv_emb_cos_sim_sem": 0.010043040822759341, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3589510917663574, "eval_arxiv_n_ngrams_match_1": 15.128, "eval_arxiv_n_ngrams_match_2": 2.924, "eval_arxiv_n_ngrams_match_3": 0.664, "eval_arxiv_num_pred_words": 40.054, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.759009491409785, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36219769119335693, "eval_arxiv_runtime": 10.8673, "eval_arxiv_samples_per_second": 46.01, "eval_arxiv_steps_per_second": 0.092, "eval_arxiv_token_set_f1": 0.3561839101546386, "eval_arxiv_token_set_f1_sem": 0.004314308628426794, "eval_arxiv_token_set_precision": 0.3081562061394426, "eval_arxiv_token_set_recall": 0.44107164677241584, "eval_arxiv_true_num_tokens": 64.0, "step": 159375 }, { "epoch": 30.6, "eval_python_code_alpaca_accuracy": 0.164375, "eval_python_code_alpaca_bleu_score": 4.716017573864346, "eval_python_code_alpaca_bleu_score_sem": 0.14934025458985162, "eval_python_code_alpaca_emb_cos_sim": 0.7462612390518188, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011473278321908338, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8493075370788574, "eval_python_code_alpaca_n_ngrams_match_1": 9.708, "eval_python_code_alpaca_n_ngrams_match_2": 2.948, "eval_python_code_alpaca_n_ngrams_match_3": 1.05, "eval_python_code_alpaca_num_pred_words": 43.084, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.275814836485733, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3352271370090748, "eval_python_code_alpaca_runtime": 9.9108, "eval_python_code_alpaca_samples_per_second": 50.45, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.47509973640326264, "eval_python_code_alpaca_token_set_f1_sem": 0.0059746369381191615, "eval_python_code_alpaca_token_set_precision": 0.5295235861561871, "eval_python_code_alpaca_token_set_recall": 0.4579034460887238, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 159375 }, { "epoch": 30.6, "eval_wikibio_accuracy": 0.32615625, "eval_wikibio_bleu_score": 5.964014459015691, "eval_wikibio_bleu_score_sem": 0.20725724989888034, "eval_wikibio_emb_cos_sim": 0.7523312568664551, "eval_wikibio_emb_cos_sim_sem": 0.009288788724342478, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6819639205932617, "eval_wikibio_n_ngrams_match_1": 10.14, "eval_wikibio_n_ngrams_match_2": 3.39, "eval_wikibio_n_ngrams_match_3": 1.232, "eval_wikibio_num_pred_words": 36.278, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.72433295017291, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35975966952098376, "eval_wikibio_runtime": 10.0333, "eval_wikibio_samples_per_second": 49.834, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.31819860957428275, "eval_wikibio_token_set_f1_sem": 0.005583364782927081, "eval_wikibio_token_set_precision": 0.32758241040150776, "eval_wikibio_token_set_recall": 0.32408614188775137, "eval_wikibio_true_num_tokens": 61.1328125, "step": 159375 }, { "epoch": 30.6, "eval_nq_accuracy": 0.53215625, "eval_nq_bleu_score": 11.981191883954724, "eval_nq_bleu_score_sem": 0.47526563796349297, "eval_nq_emb_cos_sim": 0.8336740732192993, "eval_nq_emb_cos_sim_sem": 0.00720989212882009, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1526739597320557, "eval_nq_n_ngrams_match_1": 23.234, "eval_nq_n_ngrams_match_2": 8.634, "eval_nq_n_ngrams_match_3": 4.028, "eval_nq_num_pred_words": 49.158, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.607844681340358, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45084998088904293, "eval_nq_runtime": 10.3957, "eval_nq_samples_per_second": 48.097, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4626375591175635, "eval_nq_token_set_f1_sem": 0.005043510554332337, "eval_nq_token_set_precision": 0.4216860162092115, "eval_nq_token_set_recall": 0.5206586046509615, "eval_nq_true_num_tokens": 64.0, "step": 159375 }, { "epoch": 30.6, "learning_rate": 0.001, "loss": 2.5335, "step": 159384 }, { "epoch": 30.61, "learning_rate": 0.001, "loss": 2.5269, "step": 159396 }, { "epoch": 30.61, "learning_rate": 0.001, "loss": 2.5206, "step": 159408 }, { "epoch": 30.61, "learning_rate": 0.001, "loss": 2.5357, "step": 159420 }, { "epoch": 30.61, "learning_rate": 0.001, "loss": 2.533, "step": 159432 }, { "epoch": 30.62, "learning_rate": 0.001, "loss": 2.5267, "step": 159444 }, { "epoch": 30.62, "learning_rate": 0.001, "loss": 2.5365, "step": 159456 }, { "epoch": 30.62, "learning_rate": 0.001, "loss": 2.5304, "step": 159468 }, { "epoch": 30.62, "learning_rate": 0.001, "loss": 2.5228, "step": 159480 }, { "epoch": 30.62, "learning_rate": 0.001, "loss": 2.5286, "step": 159492 }, { "epoch": 30.63, "learning_rate": 0.001, "loss": 2.5335, "step": 159504 }, { "epoch": 30.63, "learning_rate": 0.001, "loss": 2.5352, "step": 159516 }, { "epoch": 30.63, "learning_rate": 0.001, "loss": 2.5326, "step": 159528 }, { "epoch": 30.63, "learning_rate": 0.001, "loss": 2.5399, "step": 159540 }, { "epoch": 30.64, "learning_rate": 0.001, "loss": 2.5396, "step": 159552 }, { "epoch": 30.64, "learning_rate": 0.001, "loss": 2.5249, "step": 159564 }, { "epoch": 30.64, "learning_rate": 0.001, "loss": 2.5182, "step": 159576 }, { "epoch": 30.64, "learning_rate": 0.001, "loss": 2.5272, "step": 159588 }, { "epoch": 30.65, "learning_rate": 0.001, "loss": 2.5356, "step": 159600 }, { "epoch": 30.65, "learning_rate": 0.001, "loss": 2.5327, "step": 159612 }, { "epoch": 30.65, "learning_rate": 0.001, "loss": 2.53, "step": 159624 }, { "epoch": 30.65, "learning_rate": 0.001, "loss": 2.5218, "step": 159636 }, { "epoch": 30.65, "learning_rate": 0.001, "loss": 2.539, "step": 159648 }, { "epoch": 30.66, "learning_rate": 0.001, "loss": 2.5286, "step": 159660 }, { "epoch": 30.66, "learning_rate": 0.001, "loss": 2.5425, "step": 159672 }, { "epoch": 30.66, "learning_rate": 0.001, "loss": 2.5297, "step": 159684 }, { "epoch": 30.66, "learning_rate": 0.001, "loss": 2.528, "step": 159696 }, { "epoch": 30.67, "learning_rate": 0.001, "loss": 2.5348, "step": 159708 }, { "epoch": 30.67, "learning_rate": 0.001, "loss": 2.532, "step": 159720 }, { "epoch": 30.67, "learning_rate": 0.001, "loss": 2.5311, "step": 159732 }, { "epoch": 30.67, "learning_rate": 0.001, "loss": 2.5311, "step": 159744 }, { "epoch": 30.68, "learning_rate": 0.001, "loss": 2.5276, "step": 159756 }, { "epoch": 30.68, "learning_rate": 0.001, "loss": 2.5261, "step": 159768 }, { "epoch": 30.68, "learning_rate": 0.001, "loss": 2.5381, "step": 159780 }, { "epoch": 30.68, "learning_rate": 0.001, "loss": 2.5288, "step": 159792 }, { "epoch": 30.68, "learning_rate": 0.001, "loss": 2.5303, "step": 159804 }, { "epoch": 30.69, "learning_rate": 0.001, "loss": 2.5292, "step": 159816 }, { "epoch": 30.69, "learning_rate": 0.001, "loss": 2.5309, "step": 159828 }, { "epoch": 30.69, "learning_rate": 0.001, "loss": 2.5296, "step": 159840 }, { "epoch": 30.69, "learning_rate": 0.001, "loss": 2.54, "step": 159852 }, { "epoch": 30.7, "learning_rate": 0.001, "loss": 2.5389, "step": 159864 }, { "epoch": 30.7, "learning_rate": 0.001, "loss": 2.5377, "step": 159876 }, { "epoch": 30.7, "learning_rate": 0.001, "loss": 2.5356, "step": 159888 }, { "epoch": 30.7, "learning_rate": 0.001, "loss": 2.5354, "step": 159900 }, { "epoch": 30.71, "learning_rate": 0.001, "loss": 2.5205, "step": 159912 }, { "epoch": 30.71, "learning_rate": 0.001, "loss": 2.5292, "step": 159924 }, { "epoch": 30.71, "learning_rate": 0.001, "loss": 2.5365, "step": 159936 }, { "epoch": 30.71, "learning_rate": 0.001, "loss": 2.5261, "step": 159948 }, { "epoch": 30.71, "learning_rate": 0.001, "loss": 2.5368, "step": 159960 }, { "epoch": 30.72, "learning_rate": 0.001, "loss": 2.5317, "step": 159972 }, { "epoch": 30.72, "learning_rate": 0.001, "loss": 2.5381, "step": 159984 }, { "epoch": 30.72, "learning_rate": 0.001, "loss": 2.5198, "step": 159996 }, { "epoch": 30.72, "eval_ag_news_accuracy": 0.32665625, "eval_ag_news_bleu_score": 4.8211022640886485, "eval_ag_news_bleu_score_sem": 0.14933529579962598, "eval_ag_news_emb_cos_sim": 0.8135841488838196, "eval_ag_news_emb_cos_sim_sem": 0.007529377752878318, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5108516216278076, "eval_ag_news_n_ngrams_match_1": 14.182, "eval_ag_news_n_ngrams_match_2": 3.176, "eval_ag_news_n_ngrams_match_3": 0.888, "eval_ag_news_num_pred_words": 46.734, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.476765184981616, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3520814450767692, "eval_ag_news_runtime": 13.8999, "eval_ag_news_samples_per_second": 35.971, "eval_ag_news_steps_per_second": 0.072, "eval_ag_news_token_set_f1": 0.3531369150389347, "eval_ag_news_token_set_f1_sem": 0.004415020256536574, "eval_ag_news_token_set_precision": 0.3387969933087183, "eval_ag_news_token_set_recall": 0.3838311845614786, "eval_ag_news_true_num_tokens": 56.09375, "step": 160000 }, { "epoch": 30.72, "eval_anthropic_toxic_prompts_accuracy": 0.11559375, "eval_anthropic_toxic_prompts_bleu_score": 2.9588931101647624, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10841244418804719, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6672136187553406, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009386944783257302, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2257540225982666, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.2, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.86, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.648, "eval_anthropic_toxic_prompts_num_pred_words": 47.534, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.172547671489866, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21143710105673152, "eval_anthropic_toxic_prompts_runtime": 11.9029, "eval_anthropic_toxic_prompts_samples_per_second": 42.007, "eval_anthropic_toxic_prompts_steps_per_second": 0.084, "eval_anthropic_toxic_prompts_token_set_f1": 0.3541665891425984, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006686513148902574, "eval_anthropic_toxic_prompts_token_set_precision": 0.4389848120495779, "eval_anthropic_toxic_prompts_token_set_recall": 0.32438534561698307, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 160000 }, { "epoch": 30.72, "eval_arxiv_accuracy": 0.34984375, "eval_arxiv_bleu_score": 4.4218953829023135, "eval_arxiv_bleu_score_sem": 0.12849593479384475, "eval_arxiv_emb_cos_sim": 0.767446756362915, "eval_arxiv_emb_cos_sim_sem": 0.009005053505989107, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3747940063476562, "eval_arxiv_n_ngrams_match_1": 15.382, "eval_arxiv_n_ngrams_match_2": 2.984, "eval_arxiv_n_ngrams_match_3": 0.69, "eval_arxiv_num_pred_words": 41.178, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.21826438428178, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3657894847798854, "eval_arxiv_runtime": 12.8344, "eval_arxiv_samples_per_second": 38.958, "eval_arxiv_steps_per_second": 0.078, "eval_arxiv_token_set_f1": 0.35791723278536786, "eval_arxiv_token_set_f1_sem": 0.004216046687543057, "eval_arxiv_token_set_precision": 0.31001517612582224, "eval_arxiv_token_set_recall": 0.44020087523703066, "eval_arxiv_true_num_tokens": 64.0, "step": 160000 }, { "epoch": 30.72, "eval_python_code_alpaca_accuracy": 0.1621875, "eval_python_code_alpaca_bleu_score": 4.7142978533552, "eval_python_code_alpaca_bleu_score_sem": 0.14919315946492306, "eval_python_code_alpaca_emb_cos_sim": 0.7564646005630493, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009458119615135567, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8869330883026123, "eval_python_code_alpaca_n_ngrams_match_1": 9.908, "eval_python_code_alpaca_n_ngrams_match_2": 2.974, "eval_python_code_alpaca_n_ngrams_match_3": 1.028, "eval_python_code_alpaca_num_pred_words": 44.166, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.938210245476895, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3318526748730509, "eval_python_code_alpaca_runtime": 12.037, "eval_python_code_alpaca_samples_per_second": 41.539, "eval_python_code_alpaca_steps_per_second": 0.083, "eval_python_code_alpaca_token_set_f1": 0.4827637467161597, "eval_python_code_alpaca_token_set_f1_sem": 0.005482446242744389, "eval_python_code_alpaca_token_set_precision": 0.5429172537645555, "eval_python_code_alpaca_token_set_recall": 0.4560993072903012, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 160000 }, { "epoch": 30.72, "eval_wikibio_accuracy": 0.32628125, "eval_wikibio_bleu_score": 6.102570582861055, "eval_wikibio_bleu_score_sem": 0.23431773435141062, "eval_wikibio_emb_cos_sim": 0.7317752838134766, "eval_wikibio_emb_cos_sim_sem": 0.010413418107849966, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6575965881347656, "eval_wikibio_n_ngrams_match_1": 10.05, "eval_wikibio_n_ngrams_match_2": 3.438, "eval_wikibio_n_ngrams_match_3": 1.268, "eval_wikibio_num_pred_words": 35.938, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.768055208020485, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35477244583029943, "eval_wikibio_runtime": 12.6278, "eval_wikibio_samples_per_second": 39.595, "eval_wikibio_steps_per_second": 0.079, "eval_wikibio_token_set_f1": 0.31950528501300085, "eval_wikibio_token_set_f1_sem": 0.005725821948357305, "eval_wikibio_token_set_precision": 0.3274798394959956, "eval_wikibio_token_set_recall": 0.3281640255184498, "eval_wikibio_true_num_tokens": 61.1328125, "step": 160000 }, { "epoch": 30.72, "eval_nq_accuracy": 0.53140625, "eval_nq_bleu_score": 11.833525440067035, "eval_nq_bleu_score_sem": 0.47609663274117364, "eval_nq_emb_cos_sim": 0.8337178230285645, "eval_nq_emb_cos_sim_sem": 0.0071316583583607585, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1544744968414307, "eval_nq_n_ngrams_match_1": 23.196, "eval_nq_n_ngrams_match_2": 8.506, "eval_nq_n_ngrams_match_3": 3.932, "eval_nq_num_pred_words": 49.366, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.623357386530598, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44948892290774267, "eval_nq_runtime": 12.2507, "eval_nq_samples_per_second": 40.814, "eval_nq_steps_per_second": 0.082, "eval_nq_token_set_f1": 0.46120027141724096, "eval_nq_token_set_f1_sem": 0.0051430027443023895, "eval_nq_token_set_precision": 0.4208765705526024, "eval_nq_token_set_recall": 0.5170084313661555, "eval_nq_true_num_tokens": 64.0, "step": 160000 }, { "epoch": 30.72, "learning_rate": 0.001, "loss": 2.531, "step": 160008 }, { "epoch": 30.73, "learning_rate": 0.001, "loss": 2.5314, "step": 160020 }, { "epoch": 30.73, "learning_rate": 0.001, "loss": 2.5277, "step": 160032 }, { "epoch": 30.73, "learning_rate": 0.001, "loss": 2.5403, "step": 160044 }, { "epoch": 30.73, "learning_rate": 0.001, "loss": 2.5316, "step": 160056 }, { "epoch": 30.74, "learning_rate": 0.001, "loss": 2.5274, "step": 160068 }, { "epoch": 30.74, "learning_rate": 0.001, "loss": 2.5375, "step": 160080 }, { "epoch": 30.74, "learning_rate": 0.001, "loss": 2.5334, "step": 160092 }, { "epoch": 30.74, "learning_rate": 0.001, "loss": 2.5233, "step": 160104 }, { "epoch": 30.74, "learning_rate": 0.001, "loss": 2.5368, "step": 160116 }, { "epoch": 30.75, "learning_rate": 0.001, "loss": 2.5277, "step": 160128 }, { "epoch": 30.75, "learning_rate": 0.001, "loss": 2.5352, "step": 160140 }, { "epoch": 30.75, "learning_rate": 0.001, "loss": 2.5349, "step": 160152 }, { "epoch": 30.75, "learning_rate": 0.001, "loss": 2.5371, "step": 160164 }, { "epoch": 30.76, "learning_rate": 0.001, "loss": 2.5271, "step": 160176 }, { "epoch": 30.76, "learning_rate": 0.001, "loss": 2.5215, "step": 160188 }, { "epoch": 30.76, "learning_rate": 0.001, "loss": 2.5297, "step": 160200 }, { "epoch": 30.76, "learning_rate": 0.001, "loss": 2.5283, "step": 160212 }, { "epoch": 30.76, "learning_rate": 0.001, "loss": 2.5286, "step": 160224 }, { "epoch": 30.77, "learning_rate": 0.001, "loss": 2.5286, "step": 160236 }, { "epoch": 30.77, "learning_rate": 0.001, "loss": 2.5245, "step": 160248 }, { "epoch": 30.77, "learning_rate": 0.001, "loss": 2.5334, "step": 160260 }, { "epoch": 30.77, "learning_rate": 0.001, "loss": 2.5223, "step": 160272 }, { "epoch": 30.78, "learning_rate": 0.001, "loss": 2.5372, "step": 160284 }, { "epoch": 30.78, "learning_rate": 0.001, "loss": 2.5355, "step": 160296 }, { "epoch": 30.78, "learning_rate": 0.001, "loss": 2.5441, "step": 160308 }, { "epoch": 30.78, "learning_rate": 0.001, "loss": 2.5356, "step": 160320 }, { "epoch": 30.79, "learning_rate": 0.001, "loss": 2.533, "step": 160332 }, { "epoch": 30.79, "learning_rate": 0.001, "loss": 2.5192, "step": 160344 }, { "epoch": 30.79, "learning_rate": 0.001, "loss": 2.5252, "step": 160356 }, { "epoch": 30.79, "learning_rate": 0.001, "loss": 2.537, "step": 160368 }, { "epoch": 30.79, "learning_rate": 0.001, "loss": 2.5323, "step": 160380 }, { "epoch": 30.8, "learning_rate": 0.001, "loss": 2.5358, "step": 160392 }, { "epoch": 30.8, "learning_rate": 0.001, "loss": 2.5237, "step": 160404 }, { "epoch": 30.8, "learning_rate": 0.001, "loss": 2.5288, "step": 160416 }, { "epoch": 30.8, "learning_rate": 0.001, "loss": 2.5425, "step": 160428 }, { "epoch": 30.81, "learning_rate": 0.001, "loss": 2.5222, "step": 160440 }, { "epoch": 30.81, "learning_rate": 0.001, "loss": 2.5352, "step": 160452 }, { "epoch": 30.81, "learning_rate": 0.001, "loss": 2.5297, "step": 160464 }, { "epoch": 30.81, "learning_rate": 0.001, "loss": 2.5283, "step": 160476 }, { "epoch": 30.82, "learning_rate": 0.001, "loss": 2.5258, "step": 160488 }, { "epoch": 30.82, "learning_rate": 0.001, "loss": 2.5374, "step": 160500 }, { "epoch": 30.82, "learning_rate": 0.001, "loss": 2.5343, "step": 160512 }, { "epoch": 30.82, "learning_rate": 0.001, "loss": 2.5285, "step": 160524 }, { "epoch": 30.82, "learning_rate": 0.001, "loss": 2.5319, "step": 160536 }, { "epoch": 30.83, "learning_rate": 0.001, "loss": 2.5324, "step": 160548 }, { "epoch": 30.83, "learning_rate": 0.001, "loss": 2.5275, "step": 160560 }, { "epoch": 30.83, "learning_rate": 0.001, "loss": 2.5333, "step": 160572 }, { "epoch": 30.83, "learning_rate": 0.001, "loss": 2.5293, "step": 160584 }, { "epoch": 30.84, "learning_rate": 0.001, "loss": 2.5435, "step": 160596 }, { "epoch": 30.84, "learning_rate": 0.001, "loss": 2.5348, "step": 160608 }, { "epoch": 30.84, "learning_rate": 0.001, "loss": 2.5326, "step": 160620 }, { "epoch": 30.84, "eval_ag_news_accuracy": 0.326625, "eval_ag_news_bleu_score": 4.994859670613498, "eval_ag_news_bleu_score_sem": 0.1503446370989245, "eval_ag_news_emb_cos_sim": 0.821833074092865, "eval_ag_news_emb_cos_sim_sem": 0.007148403169565235, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5184292793273926, "eval_ag_news_n_ngrams_match_1": 14.304, "eval_ag_news_n_ngrams_match_2": 3.256, "eval_ag_news_n_ngrams_match_3": 0.906, "eval_ag_news_num_pred_words": 46.838, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.731404217690574, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35421617091994784, "eval_ag_news_runtime": 11.064, "eval_ag_news_samples_per_second": 45.192, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.35537135166790546, "eval_ag_news_token_set_f1_sem": 0.004342803236543565, "eval_ag_news_token_set_precision": 0.3419064089044689, "eval_ag_news_token_set_recall": 0.3851753583651109, "eval_ag_news_true_num_tokens": 56.09375, "step": 160625 }, { "epoch": 30.84, "eval_anthropic_toxic_prompts_accuracy": 0.1166875, "eval_anthropic_toxic_prompts_bleu_score": 3.1784472985690444, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11829486181476633, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6784992218017578, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008208755974915579, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.21441388130188, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.462, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.982, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722, "eval_anthropic_toxic_prompts_num_pred_words": 47.406, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.888699902873654, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22150702482181478, "eval_anthropic_toxic_prompts_runtime": 9.8862, "eval_anthropic_toxic_prompts_samples_per_second": 50.576, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.36046428621724147, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006224414292495103, "eval_anthropic_toxic_prompts_token_set_precision": 0.4519661442243867, "eval_anthropic_toxic_prompts_token_set_recall": 0.32524242103810214, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 160625 }, { "epoch": 30.84, "eval_arxiv_accuracy": 0.34890625, "eval_arxiv_bleu_score": 4.371614132708319, "eval_arxiv_bleu_score_sem": 0.1284399091651756, "eval_arxiv_emb_cos_sim": 0.7687532901763916, "eval_arxiv_emb_cos_sim_sem": 0.008708890894225595, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.37642765045166, "eval_arxiv_n_ngrams_match_1": 15.296, "eval_arxiv_n_ngrams_match_2": 2.924, "eval_arxiv_n_ngrams_match_3": 0.648, "eval_arxiv_num_pred_words": 40.784, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.266035639612895, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3664985707100959, "eval_arxiv_runtime": 10.2785, "eval_arxiv_samples_per_second": 48.645, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3565195042228485, "eval_arxiv_token_set_f1_sem": 0.004203263291642058, "eval_arxiv_token_set_precision": 0.31159660576734755, "eval_arxiv_token_set_recall": 0.4302166566082645, "eval_arxiv_true_num_tokens": 64.0, "step": 160625 }, { "epoch": 30.84, "eval_python_code_alpaca_accuracy": 0.1629375, "eval_python_code_alpaca_bleu_score": 4.695726901908382, "eval_python_code_alpaca_bleu_score_sem": 0.14703906815409884, "eval_python_code_alpaca_emb_cos_sim": 0.7778176069259644, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007298174629926729, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8541946411132812, "eval_python_code_alpaca_n_ngrams_match_1": 10.148, "eval_python_code_alpaca_n_ngrams_match_2": 3.022, "eval_python_code_alpaca_n_ngrams_match_3": 1.012, "eval_python_code_alpaca_num_pred_words": 44.308, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.360450183292524, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3417267456158005, "eval_python_code_alpaca_runtime": 9.7931, "eval_python_code_alpaca_samples_per_second": 51.056, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4799079705609145, "eval_python_code_alpaca_token_set_f1_sem": 0.005291364236949242, "eval_python_code_alpaca_token_set_precision": 0.5576091271515401, "eval_python_code_alpaca_token_set_recall": 0.44144399430587894, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 160625 }, { "epoch": 30.84, "eval_wikibio_accuracy": 0.32359375, "eval_wikibio_bleu_score": 5.923409481802042, "eval_wikibio_bleu_score_sem": 0.2141103659814, "eval_wikibio_emb_cos_sim": 0.7442214488983154, "eval_wikibio_emb_cos_sim_sem": 0.007870452441541744, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.732048273086548, "eval_wikibio_n_ngrams_match_1": 10.018, "eval_wikibio_n_ngrams_match_2": 3.322, "eval_wikibio_n_ngrams_match_3": 1.214, "eval_wikibio_num_pred_words": 36.23, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.76456585001452, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3555785154706334, "eval_wikibio_runtime": 10.3761, "eval_wikibio_samples_per_second": 48.188, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.3193002262862752, "eval_wikibio_token_set_f1_sem": 0.005199064140476549, "eval_wikibio_token_set_precision": 0.3258305436947781, "eval_wikibio_token_set_recall": 0.3291215165996358, "eval_wikibio_true_num_tokens": 61.1328125, "step": 160625 }, { "epoch": 30.84, "eval_nq_accuracy": 0.5326875, "eval_nq_bleu_score": 11.979396930833406, "eval_nq_bleu_score_sem": 0.4834926063441438, "eval_nq_emb_cos_sim": 0.8307627439498901, "eval_nq_emb_cos_sim_sem": 0.007245120308730118, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1502037048339844, "eval_nq_n_ngrams_match_1": 23.28, "eval_nq_n_ngrams_match_2": 8.63, "eval_nq_n_ngrams_match_3": 4.002, "eval_nq_num_pred_words": 49.262, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.586607352461746, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45030207887744267, "eval_nq_runtime": 10.3617, "eval_nq_samples_per_second": 48.255, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4640430004506094, "eval_nq_token_set_f1_sem": 0.005046485678447061, "eval_nq_token_set_precision": 0.42385014986078706, "eval_nq_token_set_recall": 0.5204246436028306, "eval_nq_true_num_tokens": 64.0, "step": 160625 }, { "epoch": 30.84, "learning_rate": 0.001, "loss": 2.5294, "step": 160632 }, { "epoch": 30.85, "learning_rate": 0.001, "loss": 2.5236, "step": 160644 }, { "epoch": 30.85, "learning_rate": 0.001, "loss": 2.5346, "step": 160656 }, { "epoch": 30.85, "learning_rate": 0.001, "loss": 2.5288, "step": 160668 }, { "epoch": 30.85, "learning_rate": 0.001, "loss": 2.5428, "step": 160680 }, { "epoch": 30.85, "learning_rate": 0.001, "loss": 2.5286, "step": 160692 }, { "epoch": 30.86, "learning_rate": 0.001, "loss": 2.5278, "step": 160704 }, { "epoch": 30.86, "learning_rate": 0.001, "loss": 2.529, "step": 160716 }, { "epoch": 30.86, "learning_rate": 0.001, "loss": 2.5335, "step": 160728 }, { "epoch": 30.86, "learning_rate": 0.001, "loss": 2.5294, "step": 160740 }, { "epoch": 30.87, "learning_rate": 0.001, "loss": 2.5352, "step": 160752 }, { "epoch": 30.87, "learning_rate": 0.001, "loss": 2.5236, "step": 160764 }, { "epoch": 30.87, "learning_rate": 0.001, "loss": 2.5288, "step": 160776 }, { "epoch": 30.87, "learning_rate": 0.001, "loss": 2.5282, "step": 160788 }, { "epoch": 30.88, "learning_rate": 0.001, "loss": 2.533, "step": 160800 }, { "epoch": 30.88, "learning_rate": 0.001, "loss": 2.53, "step": 160812 }, { "epoch": 30.88, "learning_rate": 0.001, "loss": 2.5358, "step": 160824 }, { "epoch": 30.88, "learning_rate": 0.001, "loss": 2.5222, "step": 160836 }, { "epoch": 30.88, "learning_rate": 0.001, "loss": 2.5381, "step": 160848 }, { "epoch": 30.89, "learning_rate": 0.001, "loss": 2.5428, "step": 160860 }, { "epoch": 30.89, "learning_rate": 0.001, "loss": 2.5353, "step": 160872 }, { "epoch": 30.89, "learning_rate": 0.001, "loss": 2.5201, "step": 160884 }, { "epoch": 30.89, "learning_rate": 0.001, "loss": 2.53, "step": 160896 }, { "epoch": 30.9, "learning_rate": 0.001, "loss": 2.5352, "step": 160908 }, { "epoch": 30.9, "learning_rate": 0.001, "loss": 2.5337, "step": 160920 }, { "epoch": 30.9, "learning_rate": 0.001, "loss": 2.535, "step": 160932 }, { "epoch": 30.9, "learning_rate": 0.001, "loss": 2.5404, "step": 160944 }, { "epoch": 30.91, "learning_rate": 0.001, "loss": 2.5426, "step": 160956 }, { "epoch": 30.91, "learning_rate": 0.001, "loss": 2.5409, "step": 160968 }, { "epoch": 30.91, "learning_rate": 0.001, "loss": 2.5303, "step": 160980 }, { "epoch": 30.91, "learning_rate": 0.001, "loss": 2.541, "step": 160992 }, { "epoch": 30.91, "learning_rate": 0.001, "loss": 2.5327, "step": 161004 }, { "epoch": 30.92, "learning_rate": 0.001, "loss": 2.5425, "step": 161016 }, { "epoch": 30.92, "learning_rate": 0.001, "loss": 2.5426, "step": 161028 }, { "epoch": 30.92, "learning_rate": 0.001, "loss": 2.532, "step": 161040 }, { "epoch": 30.92, "learning_rate": 0.001, "loss": 2.535, "step": 161052 }, { "epoch": 30.93, "learning_rate": 0.001, "loss": 2.5264, "step": 161064 }, { "epoch": 30.93, "learning_rate": 0.001, "loss": 2.5269, "step": 161076 }, { "epoch": 30.93, "learning_rate": 0.001, "loss": 2.5338, "step": 161088 }, { "epoch": 30.93, "learning_rate": 0.001, "loss": 2.5262, "step": 161100 }, { "epoch": 30.94, "learning_rate": 0.001, "loss": 2.5347, "step": 161112 }, { "epoch": 30.94, "learning_rate": 0.001, "loss": 2.5313, "step": 161124 }, { "epoch": 30.94, "learning_rate": 0.001, "loss": 2.5347, "step": 161136 }, { "epoch": 30.94, "learning_rate": 0.001, "loss": 2.5332, "step": 161148 }, { "epoch": 30.94, "learning_rate": 0.001, "loss": 2.5421, "step": 161160 }, { "epoch": 30.95, "learning_rate": 0.001, "loss": 2.5289, "step": 161172 }, { "epoch": 30.95, "learning_rate": 0.001, "loss": 2.5313, "step": 161184 }, { "epoch": 30.95, "learning_rate": 0.001, "loss": 2.5315, "step": 161196 }, { "epoch": 30.95, "learning_rate": 0.001, "loss": 2.5334, "step": 161208 }, { "epoch": 30.96, "learning_rate": 0.001, "loss": 2.5357, "step": 161220 }, { "epoch": 30.96, "learning_rate": 0.001, "loss": 2.5189, "step": 161232 }, { "epoch": 30.96, "learning_rate": 0.001, "loss": 2.5337, "step": 161244 }, { "epoch": 30.96, "eval_ag_news_accuracy": 0.32625, "eval_ag_news_bleu_score": 4.763700920805595, "eval_ag_news_bleu_score_sem": 0.1444648189998102, "eval_ag_news_emb_cos_sim": 0.8166588544845581, "eval_ag_news_emb_cos_sim_sem": 0.006733893524272149, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.517921209335327, "eval_ag_news_n_ngrams_match_1": 14.052, "eval_ag_news_n_ngrams_match_2": 3.112, "eval_ag_news_n_ngrams_match_3": 0.864, "eval_ag_news_num_pred_words": 46.58, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.7142706563101, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3512767912932626, "eval_ag_news_runtime": 13.1821, "eval_ag_news_samples_per_second": 37.93, "eval_ag_news_steps_per_second": 0.076, "eval_ag_news_token_set_f1": 0.3497340067166666, "eval_ag_news_token_set_f1_sem": 0.004483809887190126, "eval_ag_news_token_set_precision": 0.33446862515263764, "eval_ag_news_token_set_recall": 0.3812839266087502, "eval_ag_news_true_num_tokens": 56.09375, "step": 161250 }, { "epoch": 30.96, "eval_anthropic_toxic_prompts_accuracy": 0.115625, "eval_anthropic_toxic_prompts_bleu_score": 3.205134210821329, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1178727653003391, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6712790727615356, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008887372919501611, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2249948978424072, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.236, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.954, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, "eval_anthropic_toxic_prompts_num_pred_words": 46.706, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.153445818646397, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21398959768031206, "eval_anthropic_toxic_prompts_runtime": 9.8125, "eval_anthropic_toxic_prompts_samples_per_second": 50.955, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3587687765280112, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00637237883673165, "eval_anthropic_toxic_prompts_token_set_precision": 0.4394723904158812, "eval_anthropic_toxic_prompts_token_set_recall": 0.32832215636207457, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 161250 }, { "epoch": 30.96, "eval_arxiv_accuracy": 0.3493125, "eval_arxiv_bleu_score": 4.424346617931371, "eval_arxiv_bleu_score_sem": 0.12605220824833557, "eval_arxiv_emb_cos_sim": 0.7720806002616882, "eval_arxiv_emb_cos_sim_sem": 0.006980470368762594, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3651304244995117, "eval_arxiv_n_ngrams_match_1": 15.412, "eval_arxiv_n_ngrams_match_2": 3.036, "eval_arxiv_n_ngrams_match_3": 0.682, "eval_arxiv_num_pred_words": 40.152, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.937271182052562, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3676821466955644, "eval_arxiv_runtime": 10.8396, "eval_arxiv_samples_per_second": 46.127, "eval_arxiv_steps_per_second": 0.092, "eval_arxiv_token_set_f1": 0.36101756796020285, "eval_arxiv_token_set_f1_sem": 0.004296467845172957, "eval_arxiv_token_set_precision": 0.3131804612985979, "eval_arxiv_token_set_recall": 0.44547692937519234, "eval_arxiv_true_num_tokens": 64.0, "step": 161250 }, { "epoch": 30.96, "eval_python_code_alpaca_accuracy": 0.159, "eval_python_code_alpaca_bleu_score": 4.682202597730714, "eval_python_code_alpaca_bleu_score_sem": 0.14078400840801508, "eval_python_code_alpaca_emb_cos_sim": 0.7602322101593018, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008470578124765824, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8762505054473877, "eval_python_code_alpaca_n_ngrams_match_1": 9.816, "eval_python_code_alpaca_n_ngrams_match_2": 2.928, "eval_python_code_alpaca_n_ngrams_match_3": 0.99, "eval_python_code_alpaca_num_pred_words": 43.264, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.747603725850535, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33638938309376853, "eval_python_code_alpaca_runtime": 10.0076, "eval_python_code_alpaca_samples_per_second": 49.962, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4754179225257856, "eval_python_code_alpaca_token_set_f1_sem": 0.005564868277901597, "eval_python_code_alpaca_token_set_precision": 0.5349891190389903, "eval_python_code_alpaca_token_set_recall": 0.44713364818560003, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 161250 }, { "epoch": 30.96, "eval_wikibio_accuracy": 0.32415625, "eval_wikibio_bleu_score": 6.177236924693363, "eval_wikibio_bleu_score_sem": 0.22484216461256287, "eval_wikibio_emb_cos_sim": 0.7451162338256836, "eval_wikibio_emb_cos_sim_sem": 0.00865770157107559, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7214102745056152, "eval_wikibio_n_ngrams_match_1": 10.086, "eval_wikibio_n_ngrams_match_2": 3.442, "eval_wikibio_n_ngrams_match_3": 1.252, "eval_wikibio_num_pred_words": 35.816, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.32262928575278, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3626412895880609, "eval_wikibio_runtime": 10.3095, "eval_wikibio_samples_per_second": 48.499, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3218342306058661, "eval_wikibio_token_set_f1_sem": 0.005254815071430934, "eval_wikibio_token_set_precision": 0.32921647572891294, "eval_wikibio_token_set_recall": 0.3344275048608989, "eval_wikibio_true_num_tokens": 61.1328125, "step": 161250 }, { "epoch": 30.96, "eval_nq_accuracy": 0.53203125, "eval_nq_bleu_score": 11.93263695435168, "eval_nq_bleu_score_sem": 0.47192356019846055, "eval_nq_emb_cos_sim": 0.8366599082946777, "eval_nq_emb_cos_sim_sem": 0.00711704262155059, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.149911642074585, "eval_nq_n_ngrams_match_1": 23.294, "eval_nq_n_ngrams_match_2": 8.532, "eval_nq_n_ngrams_match_3": 3.992, "eval_nq_num_pred_words": 48.908, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.584099890410473, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45400187651642276, "eval_nq_runtime": 10.5624, "eval_nq_samples_per_second": 47.338, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.46716666079282915, "eval_nq_token_set_f1_sem": 0.004895317024823251, "eval_nq_token_set_precision": 0.42524513982289924, "eval_nq_token_set_recall": 0.5265835422703615, "eval_nq_true_num_tokens": 64.0, "step": 161250 }, { "epoch": 30.96, "learning_rate": 0.001, "loss": 2.5301, "step": 161256 }, { "epoch": 30.97, "learning_rate": 0.001, "loss": 2.5344, "step": 161268 }, { "epoch": 30.97, "learning_rate": 0.001, "loss": 2.5362, "step": 161280 }, { "epoch": 30.97, "learning_rate": 0.001, "loss": 2.5231, "step": 161292 }, { "epoch": 30.97, "learning_rate": 0.001, "loss": 2.5241, "step": 161304 }, { "epoch": 30.97, "learning_rate": 0.001, "loss": 2.5356, "step": 161316 }, { "epoch": 30.98, "learning_rate": 0.001, "loss": 2.537, "step": 161328 }, { "epoch": 30.98, "learning_rate": 0.001, "loss": 2.5359, "step": 161340 }, { "epoch": 30.98, "learning_rate": 0.001, "loss": 2.5328, "step": 161352 }, { "epoch": 30.98, "learning_rate": 0.001, "loss": 2.5397, "step": 161364 }, { "epoch": 30.99, "learning_rate": 0.001, "loss": 2.5255, "step": 161376 }, { "epoch": 30.99, "learning_rate": 0.001, "loss": 2.534, "step": 161388 }, { "epoch": 30.99, "learning_rate": 0.001, "loss": 2.5314, "step": 161400 }, { "epoch": 30.99, "learning_rate": 0.001, "loss": 2.5233, "step": 161412 }, { "epoch": 31.0, "learning_rate": 0.001, "loss": 2.5324, "step": 161424 }, { "epoch": 31.0, "learning_rate": 0.001, "loss": 2.5382, "step": 161436 }, { "epoch": 31.0, "learning_rate": 0.001, "loss": 2.5309, "step": 161448 }, { "epoch": 31.0, "learning_rate": 0.001, "loss": 2.5166, "step": 161460 }, { "epoch": 31.0, "learning_rate": 0.001, "loss": 2.5185, "step": 161472 }, { "epoch": 31.01, "learning_rate": 0.001, "loss": 2.5255, "step": 161484 }, { "epoch": 31.01, "learning_rate": 0.001, "loss": 2.5173, "step": 161496 }, { "epoch": 31.01, "learning_rate": 0.001, "loss": 2.519, "step": 161508 }, { "epoch": 31.01, "learning_rate": 0.001, "loss": 2.521, "step": 161520 }, { "epoch": 31.02, "learning_rate": 0.001, "loss": 2.5212, "step": 161532 }, { "epoch": 31.02, "learning_rate": 0.001, "loss": 2.5211, "step": 161544 }, { "epoch": 31.02, "learning_rate": 0.001, "loss": 2.5136, "step": 161556 }, { "epoch": 31.02, "learning_rate": 0.001, "loss": 2.5129, "step": 161568 }, { "epoch": 31.03, "learning_rate": 0.001, "loss": 2.522, "step": 161580 }, { "epoch": 31.03, "learning_rate": 0.001, "loss": 2.5238, "step": 161592 }, { "epoch": 31.03, "learning_rate": 0.001, "loss": 2.5154, "step": 161604 }, { "epoch": 31.03, "learning_rate": 0.001, "loss": 2.5213, "step": 161616 }, { "epoch": 31.03, "learning_rate": 0.001, "loss": 2.5257, "step": 161628 }, { "epoch": 31.04, "learning_rate": 0.001, "loss": 2.5234, "step": 161640 }, { "epoch": 31.04, "learning_rate": 0.001, "loss": 2.5198, "step": 161652 }, { "epoch": 31.04, "learning_rate": 0.001, "loss": 2.5217, "step": 161664 }, { "epoch": 31.04, "learning_rate": 0.001, "loss": 2.5149, "step": 161676 }, { "epoch": 31.05, "learning_rate": 0.001, "loss": 2.5226, "step": 161688 }, { "epoch": 31.05, "learning_rate": 0.001, "loss": 2.5211, "step": 161700 }, { "epoch": 31.05, "learning_rate": 0.001, "loss": 2.5273, "step": 161712 }, { "epoch": 31.05, "learning_rate": 0.001, "loss": 2.5147, "step": 161724 }, { "epoch": 31.06, "learning_rate": 0.001, "loss": 2.5171, "step": 161736 }, { "epoch": 31.06, "learning_rate": 0.001, "loss": 2.5252, "step": 161748 }, { "epoch": 31.06, "learning_rate": 0.001, "loss": 2.5171, "step": 161760 }, { "epoch": 31.06, "learning_rate": 0.001, "loss": 2.5284, "step": 161772 }, { "epoch": 31.06, "learning_rate": 0.001, "loss": 2.5188, "step": 161784 }, { "epoch": 31.07, "learning_rate": 0.001, "loss": 2.5221, "step": 161796 }, { "epoch": 31.07, "learning_rate": 0.001, "loss": 2.5229, "step": 161808 }, { "epoch": 31.07, "learning_rate": 0.001, "loss": 2.5161, "step": 161820 }, { "epoch": 31.07, "learning_rate": 0.001, "loss": 2.529, "step": 161832 }, { "epoch": 31.08, "learning_rate": 0.001, "loss": 2.5257, "step": 161844 }, { "epoch": 31.08, "learning_rate": 0.001, "loss": 2.5222, "step": 161856 }, { "epoch": 31.08, "learning_rate": 0.001, "loss": 2.5133, "step": 161868 }, { "epoch": 31.08, "eval_ag_news_accuracy": 0.32559375, "eval_ag_news_bleu_score": 4.901664204289623, "eval_ag_news_bleu_score_sem": 0.15507902193259124, "eval_ag_news_emb_cos_sim": 0.8236854672431946, "eval_ag_news_emb_cos_sim_sem": 0.006079630872680333, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.518623113632202, "eval_ag_news_n_ngrams_match_1": 14.224, "eval_ag_news_n_ngrams_match_2": 3.182, "eval_ag_news_n_ngrams_match_3": 0.926, "eval_ag_news_num_pred_words": 46.668, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.73794315469205, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3547077283256893, "eval_ag_news_runtime": 12.2697, "eval_ag_news_samples_per_second": 40.751, "eval_ag_news_steps_per_second": 0.082, "eval_ag_news_token_set_f1": 0.3571035433728409, "eval_ag_news_token_set_f1_sem": 0.004461182716103829, "eval_ag_news_token_set_precision": 0.3402723154953633, "eval_ag_news_token_set_recall": 0.3905027789000208, "eval_ag_news_true_num_tokens": 56.09375, "step": 161875 }, { "epoch": 31.08, "eval_anthropic_toxic_prompts_accuracy": 0.1168125, "eval_anthropic_toxic_prompts_bleu_score": 3.06864962821325, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10933382191572798, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6762846112251282, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009072190142944421, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1640625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.03286167651298939, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2147488594055176, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.248, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7, "eval_anthropic_toxic_prompts_num_pred_words": 47.848, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.89703846890967, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21388412251431768, "eval_anthropic_toxic_prompts_runtime": 11.1696, "eval_anthropic_toxic_prompts_samples_per_second": 44.764, "eval_anthropic_toxic_prompts_steps_per_second": 0.09, "eval_anthropic_toxic_prompts_token_set_f1": 0.3599860664755848, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006671608185089837, "eval_anthropic_toxic_prompts_token_set_precision": 0.4396704560039951, "eval_anthropic_toxic_prompts_token_set_recall": 0.3342703776125466, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 161875 }, { "epoch": 31.08, "eval_arxiv_accuracy": 0.34921875, "eval_arxiv_bleu_score": 4.5403501089476785, "eval_arxiv_bleu_score_sem": 0.13835839669324407, "eval_arxiv_emb_cos_sim": 0.7701067924499512, "eval_arxiv_emb_cos_sim_sem": 0.006764259011535232, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3710904121398926, "eval_arxiv_n_ngrams_match_1": 15.314, "eval_arxiv_n_ngrams_match_2": 3.05, "eval_arxiv_n_ngrams_match_3": 0.716, "eval_arxiv_num_pred_words": 40.288, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.11025193016067, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36707043037104325, "eval_arxiv_runtime": 11.3767, "eval_arxiv_samples_per_second": 43.949, "eval_arxiv_steps_per_second": 0.088, "eval_arxiv_token_set_f1": 0.3592909166139714, "eval_arxiv_token_set_f1_sem": 0.004200398816105726, "eval_arxiv_token_set_precision": 0.31136650773273344, "eval_arxiv_token_set_recall": 0.438719750780837, "eval_arxiv_true_num_tokens": 64.0, "step": 161875 }, { "epoch": 31.08, "eval_python_code_alpaca_accuracy": 0.163, "eval_python_code_alpaca_bleu_score": 4.510016234122968, "eval_python_code_alpaca_bleu_score_sem": 0.1419761170517517, "eval_python_code_alpaca_emb_cos_sim": 0.7613218426704407, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00808675072664416, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.881549596786499, "eval_python_code_alpaca_n_ngrams_match_1": 9.94, "eval_python_code_alpaca_n_ngrams_match_2": 2.926, "eval_python_code_alpaca_n_ngrams_match_3": 0.948, "eval_python_code_alpaca_num_pred_words": 44.51, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.841899519400673, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3336546760579685, "eval_python_code_alpaca_runtime": 10.9589, "eval_python_code_alpaca_samples_per_second": 45.625, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.47567054116838636, "eval_python_code_alpaca_token_set_f1_sem": 0.005632694293134458, "eval_python_code_alpaca_token_set_precision": 0.5430999931934424, "eval_python_code_alpaca_token_set_recall": 0.4436771494240172, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 161875 }, { "epoch": 31.08, "eval_wikibio_accuracy": 0.323875, "eval_wikibio_bleu_score": 6.00179036755062, "eval_wikibio_bleu_score_sem": 0.22314716690448563, "eval_wikibio_emb_cos_sim": 0.7465529441833496, "eval_wikibio_emb_cos_sim_sem": 0.00961898755045657, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.700416326522827, "eval_wikibio_n_ngrams_match_1": 9.996, "eval_wikibio_n_ngrams_match_2": 3.324, "eval_wikibio_n_ngrams_match_3": 1.256, "eval_wikibio_num_pred_words": 35.502, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.46414715145646, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35757832825184976, "eval_wikibio_runtime": 10.8455, "eval_wikibio_samples_per_second": 46.102, "eval_wikibio_steps_per_second": 0.092, "eval_wikibio_token_set_f1": 0.3167721591700093, "eval_wikibio_token_set_f1_sem": 0.0056380098609037635, "eval_wikibio_token_set_precision": 0.32651134222640493, "eval_wikibio_token_set_recall": 0.3227582660970667, "eval_wikibio_true_num_tokens": 61.1328125, "step": 161875 }, { "epoch": 31.08, "eval_nq_accuracy": 0.5325, "eval_nq_bleu_score": 11.992439285153472, "eval_nq_bleu_score_sem": 0.4845579856759089, "eval_nq_emb_cos_sim": 0.8342034816741943, "eval_nq_emb_cos_sim_sem": 0.008027959824238512, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.151998281478882, "eval_nq_n_ngrams_match_1": 23.178, "eval_nq_n_ngrams_match_2": 8.638, "eval_nq_n_ngrams_match_3": 3.988, "eval_nq_num_pred_words": 49.072, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.60203051235745, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4506395063249068, "eval_nq_runtime": 11.6149, "eval_nq_samples_per_second": 43.048, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.46553303170104965, "eval_nq_token_set_f1_sem": 0.005063049778589747, "eval_nq_token_set_precision": 0.4228063164120291, "eval_nq_token_set_recall": 0.5267799291803646, "eval_nq_true_num_tokens": 64.0, "step": 161875 }, { "epoch": 31.08, "learning_rate": 0.001, "loss": 2.5242, "step": 161880 }, { "epoch": 31.09, "learning_rate": 0.001, "loss": 2.5212, "step": 161892 }, { "epoch": 31.09, "learning_rate": 0.001, "loss": 2.5254, "step": 161904 }, { "epoch": 31.09, "learning_rate": 0.001, "loss": 2.5191, "step": 161916 }, { "epoch": 31.09, "learning_rate": 0.001, "loss": 2.5203, "step": 161928 }, { "epoch": 31.09, "learning_rate": 0.001, "loss": 2.5217, "step": 161940 }, { "epoch": 31.1, "learning_rate": 0.001, "loss": 2.5309, "step": 161952 }, { "epoch": 31.1, "learning_rate": 0.001, "loss": 2.5232, "step": 161964 }, { "epoch": 31.1, "learning_rate": 0.001, "loss": 2.5266, "step": 161976 }, { "epoch": 31.1, "learning_rate": 0.001, "loss": 2.5268, "step": 161988 }, { "epoch": 31.11, "learning_rate": 0.001, "loss": 2.5241, "step": 162000 }, { "epoch": 31.11, "learning_rate": 0.001, "loss": 2.5299, "step": 162012 }, { "epoch": 31.11, "learning_rate": 0.001, "loss": 2.5193, "step": 162024 }, { "epoch": 31.11, "learning_rate": 0.001, "loss": 2.5174, "step": 162036 }, { "epoch": 31.12, "learning_rate": 0.001, "loss": 2.532, "step": 162048 }, { "epoch": 31.12, "learning_rate": 0.001, "loss": 2.5276, "step": 162060 }, { "epoch": 31.12, "learning_rate": 0.001, "loss": 2.5216, "step": 162072 }, { "epoch": 31.12, "learning_rate": 0.001, "loss": 2.523, "step": 162084 }, { "epoch": 31.12, "learning_rate": 0.001, "loss": 2.5286, "step": 162096 }, { "epoch": 31.13, "learning_rate": 0.001, "loss": 2.524, "step": 162108 }, { "epoch": 31.13, "learning_rate": 0.001, "loss": 2.5296, "step": 162120 }, { "epoch": 31.13, "learning_rate": 0.001, "loss": 2.5239, "step": 162132 }, { "epoch": 31.13, "learning_rate": 0.001, "loss": 2.5285, "step": 162144 }, { "epoch": 31.14, "learning_rate": 0.001, "loss": 2.5209, "step": 162156 }, { "epoch": 31.14, "learning_rate": 0.001, "loss": 2.531, "step": 162168 }, { "epoch": 31.14, "learning_rate": 0.001, "loss": 2.5255, "step": 162180 }, { "epoch": 31.14, "learning_rate": 0.001, "loss": 2.521, "step": 162192 }, { "epoch": 31.15, "learning_rate": 0.001, "loss": 2.5254, "step": 162204 }, { "epoch": 31.15, "learning_rate": 0.001, "loss": 2.5099, "step": 162216 }, { "epoch": 31.15, "learning_rate": 0.001, "loss": 2.5311, "step": 162228 }, { "epoch": 31.15, "learning_rate": 0.001, "loss": 2.5184, "step": 162240 }, { "epoch": 31.15, "learning_rate": 0.001, "loss": 2.5277, "step": 162252 }, { "epoch": 31.16, "learning_rate": 0.001, "loss": 2.5207, "step": 162264 }, { "epoch": 31.16, "learning_rate": 0.001, "loss": 2.5192, "step": 162276 }, { "epoch": 31.16, "learning_rate": 0.001, "loss": 2.5313, "step": 162288 }, { "epoch": 31.16, "learning_rate": 0.001, "loss": 2.5253, "step": 162300 }, { "epoch": 31.17, "learning_rate": 0.001, "loss": 2.5274, "step": 162312 }, { "epoch": 31.17, "learning_rate": 0.001, "loss": 2.5331, "step": 162324 }, { "epoch": 31.17, "learning_rate": 0.001, "loss": 2.5301, "step": 162336 }, { "epoch": 31.17, "learning_rate": 0.001, "loss": 2.53, "step": 162348 }, { "epoch": 31.18, "learning_rate": 0.001, "loss": 2.5283, "step": 162360 }, { "epoch": 31.18, "learning_rate": 0.001, "loss": 2.5254, "step": 162372 }, { "epoch": 31.18, "learning_rate": 0.001, "loss": 2.5275, "step": 162384 }, { "epoch": 31.18, "learning_rate": 0.001, "loss": 2.5173, "step": 162396 }, { "epoch": 31.18, "learning_rate": 0.001, "loss": 2.5254, "step": 162408 }, { "epoch": 31.19, "learning_rate": 0.001, "loss": 2.5358, "step": 162420 }, { "epoch": 31.19, "learning_rate": 0.001, "loss": 2.5277, "step": 162432 }, { "epoch": 31.19, "learning_rate": 0.001, "loss": 2.5295, "step": 162444 }, { "epoch": 31.19, "learning_rate": 0.001, "loss": 2.5266, "step": 162456 }, { "epoch": 31.2, "learning_rate": 0.001, "loss": 2.5284, "step": 162468 }, { "epoch": 31.2, "learning_rate": 0.001, "loss": 2.5327, "step": 162480 }, { "epoch": 31.2, "learning_rate": 0.001, "loss": 2.5365, "step": 162492 }, { "epoch": 31.2, "eval_ag_news_accuracy": 0.3255, "eval_ag_news_bleu_score": 4.8157969104955765, "eval_ag_news_bleu_score_sem": 0.14343317213654067, "eval_ag_news_emb_cos_sim": 0.814470112323761, "eval_ag_news_emb_cos_sim_sem": 0.00717236628712171, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5124094486236572, "eval_ag_news_n_ngrams_match_1": 14.252, "eval_ag_news_n_ngrams_match_2": 3.166, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.918, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.52895683574676, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3526979657070748, "eval_ag_news_runtime": 10.7704, "eval_ag_news_samples_per_second": 46.423, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.35472414562009524, "eval_ag_news_token_set_f1_sem": 0.0043366112160831746, "eval_ag_news_token_set_precision": 0.3392239084581563, "eval_ag_news_token_set_recall": 0.3867220334368949, "eval_ag_news_true_num_tokens": 56.09375, "step": 162500 }, { "epoch": 31.2, "eval_anthropic_toxic_prompts_accuracy": 0.115375, "eval_anthropic_toxic_prompts_bleu_score": 3.0765887867127684, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11501881902507922, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6759611368179321, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008862206641129052, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.221386194229126, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.222, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.908, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.728, "eval_anthropic_toxic_prompts_num_pred_words": 48.252, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.06283807441126, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21150325841157802, "eval_anthropic_toxic_prompts_runtime": 10.3298, "eval_anthropic_toxic_prompts_samples_per_second": 48.404, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.36029945475367525, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006256222211117072, "eval_anthropic_toxic_prompts_token_set_precision": 0.44336828737449474, "eval_anthropic_toxic_prompts_token_set_recall": 0.33048520789764047, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 162500 }, { "epoch": 31.2, "eval_arxiv_accuracy": 0.350375, "eval_arxiv_bleu_score": 4.516365933146245, "eval_arxiv_bleu_score_sem": 0.13362652451020654, "eval_arxiv_emb_cos_sim": 0.7727753520011902, "eval_arxiv_emb_cos_sim_sem": 0.008605923509135449, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3669466972351074, "eval_arxiv_n_ngrams_match_1": 15.422, "eval_arxiv_n_ngrams_match_2": 3.054, "eval_arxiv_n_ngrams_match_3": 0.714, "eval_arxiv_num_pred_words": 40.418, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.989876917463025, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3680837233936842, "eval_arxiv_runtime": 10.0863, "eval_arxiv_samples_per_second": 49.572, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3598643137804065, "eval_arxiv_token_set_f1_sem": 0.0043905950071664685, "eval_arxiv_token_set_precision": 0.3122073605404567, "eval_arxiv_token_set_recall": 0.44161512642063805, "eval_arxiv_true_num_tokens": 64.0, "step": 162500 }, { "epoch": 31.2, "eval_python_code_alpaca_accuracy": 0.16128125, "eval_python_code_alpaca_bleu_score": 4.661977437590216, "eval_python_code_alpaca_bleu_score_sem": 0.15195722144163165, "eval_python_code_alpaca_emb_cos_sim": 0.7648204565048218, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008633995944421735, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8921637535095215, "eval_python_code_alpaca_n_ngrams_match_1": 10.008, "eval_python_code_alpaca_n_ngrams_match_2": 2.968, "eval_python_code_alpaca_n_ngrams_match_3": 1.028, "eval_python_code_alpaca_num_pred_words": 44.28, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.032284839546293, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33735298511441925, "eval_python_code_alpaca_runtime": 9.6837, "eval_python_code_alpaca_samples_per_second": 51.633, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.4841179463208355, "eval_python_code_alpaca_token_set_f1_sem": 0.005605954368543609, "eval_python_code_alpaca_token_set_precision": 0.545098084412822, "eval_python_code_alpaca_token_set_recall": 0.4569658162848549, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 162500 }, { "epoch": 31.2, "eval_wikibio_accuracy": 0.32315625, "eval_wikibio_bleu_score": 5.998422941832177, "eval_wikibio_bleu_score_sem": 0.21960553848771863, "eval_wikibio_emb_cos_sim": 0.7335643172264099, "eval_wikibio_emb_cos_sim_sem": 0.01070594687101843, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.67614483833313, "eval_wikibio_n_ngrams_match_1": 10.05, "eval_wikibio_n_ngrams_match_2": 3.354, "eval_wikibio_n_ngrams_match_3": 1.222, "eval_wikibio_num_pred_words": 36.178, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.493845053415235, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3554553576993378, "eval_wikibio_runtime": 10.2708, "eval_wikibio_samples_per_second": 48.682, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3211354887587338, "eval_wikibio_token_set_f1_sem": 0.005245332210316318, "eval_wikibio_token_set_precision": 0.3287808604204915, "eval_wikibio_token_set_recall": 0.33056852341874576, "eval_wikibio_true_num_tokens": 61.1328125, "step": 162500 }, { "epoch": 31.2, "eval_nq_accuracy": 0.53240625, "eval_nq_bleu_score": 11.85249815758199, "eval_nq_bleu_score_sem": 0.4966317828949102, "eval_nq_emb_cos_sim": 0.835763692855835, "eval_nq_emb_cos_sim_sem": 0.007175556935262887, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1523280143737793, "eval_nq_n_ngrams_match_1": 23.25, "eval_nq_n_ngrams_match_2": 8.566, "eval_nq_n_ngrams_match_3": 3.952, "eval_nq_num_pred_words": 49.024, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.604867352454333, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45042104075010403, "eval_nq_runtime": 10.2861, "eval_nq_samples_per_second": 48.609, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.46330395481002234, "eval_nq_token_set_f1_sem": 0.005044755859379872, "eval_nq_token_set_precision": 0.4216141183572738, "eval_nq_token_set_recall": 0.5219597882821291, "eval_nq_true_num_tokens": 64.0, "step": 162500 }, { "epoch": 31.2, "learning_rate": 0.001, "loss": 2.5205, "step": 162504 }, { "epoch": 31.21, "learning_rate": 0.001, "loss": 2.5243, "step": 162516 }, { "epoch": 31.21, "learning_rate": 0.001, "loss": 2.5197, "step": 162528 }, { "epoch": 31.21, "learning_rate": 0.001, "loss": 2.5271, "step": 162540 }, { "epoch": 31.21, "learning_rate": 0.001, "loss": 2.5285, "step": 162552 }, { "epoch": 31.21, "learning_rate": 0.001, "loss": 2.5327, "step": 162564 }, { "epoch": 31.22, "learning_rate": 0.001, "loss": 2.5314, "step": 162576 }, { "epoch": 31.22, "learning_rate": 0.001, "loss": 2.5285, "step": 162588 }, { "epoch": 31.22, "learning_rate": 0.001, "loss": 2.5292, "step": 162600 }, { "epoch": 31.22, "learning_rate": 0.001, "loss": 2.5275, "step": 162612 }, { "epoch": 31.23, "learning_rate": 0.001, "loss": 2.5185, "step": 162624 }, { "epoch": 31.23, "learning_rate": 0.001, "loss": 2.5236, "step": 162636 }, { "epoch": 31.23, "learning_rate": 0.001, "loss": 2.5236, "step": 162648 }, { "epoch": 31.23, "learning_rate": 0.001, "loss": 2.5212, "step": 162660 }, { "epoch": 31.24, "learning_rate": 0.001, "loss": 2.5347, "step": 162672 }, { "epoch": 31.24, "learning_rate": 0.001, "loss": 2.5393, "step": 162684 }, { "epoch": 31.24, "learning_rate": 0.001, "loss": 2.5223, "step": 162696 }, { "epoch": 31.24, "learning_rate": 0.001, "loss": 2.5237, "step": 162708 }, { "epoch": 31.24, "learning_rate": 0.001, "loss": 2.5358, "step": 162720 }, { "epoch": 31.25, "learning_rate": 0.001, "loss": 2.5265, "step": 162732 }, { "epoch": 31.25, "learning_rate": 0.001, "loss": 2.5309, "step": 162744 }, { "epoch": 31.25, "learning_rate": 0.001, "loss": 2.5265, "step": 162756 }, { "epoch": 31.25, "learning_rate": 0.001, "loss": 2.533, "step": 162768 }, { "epoch": 31.26, "learning_rate": 0.001, "loss": 2.5208, "step": 162780 }, { "epoch": 31.26, "learning_rate": 0.001, "loss": 2.5336, "step": 162792 }, { "epoch": 31.26, "learning_rate": 0.001, "loss": 2.5222, "step": 162804 }, { "epoch": 31.26, "learning_rate": 0.001, "loss": 2.5216, "step": 162816 }, { "epoch": 31.26, "learning_rate": 0.001, "loss": 2.5204, "step": 162828 }, { "epoch": 31.27, "learning_rate": 0.001, "loss": 2.5335, "step": 162840 }, { "epoch": 31.27, "learning_rate": 0.001, "loss": 2.5156, "step": 162852 }, { "epoch": 31.27, "learning_rate": 0.001, "loss": 2.5261, "step": 162864 }, { "epoch": 31.27, "learning_rate": 0.001, "loss": 2.5286, "step": 162876 }, { "epoch": 31.28, "learning_rate": 0.001, "loss": 2.5282, "step": 162888 }, { "epoch": 31.28, "learning_rate": 0.001, "loss": 2.5226, "step": 162900 }, { "epoch": 31.28, "learning_rate": 0.001, "loss": 2.5294, "step": 162912 }, { "epoch": 31.28, "learning_rate": 0.001, "loss": 2.5379, "step": 162924 }, { "epoch": 31.29, "learning_rate": 0.001, "loss": 2.5151, "step": 162936 }, { "epoch": 31.29, "learning_rate": 0.001, "loss": 2.5227, "step": 162948 }, { "epoch": 31.29, "learning_rate": 0.001, "loss": 2.5231, "step": 162960 }, { "epoch": 31.29, "learning_rate": 0.001, "loss": 2.5288, "step": 162972 }, { "epoch": 31.29, "learning_rate": 0.001, "loss": 2.5289, "step": 162984 }, { "epoch": 31.3, "learning_rate": 0.001, "loss": 2.526, "step": 162996 }, { "epoch": 31.3, "learning_rate": 0.001, "loss": 2.5173, "step": 163008 }, { "epoch": 31.3, "learning_rate": 0.001, "loss": 2.5185, "step": 163020 }, { "epoch": 31.3, "learning_rate": 0.001, "loss": 2.5226, "step": 163032 }, { "epoch": 31.31, "learning_rate": 0.001, "loss": 2.5294, "step": 163044 }, { "epoch": 31.31, "learning_rate": 0.001, "loss": 2.5414, "step": 163056 }, { "epoch": 31.31, "learning_rate": 0.001, "loss": 2.5303, "step": 163068 }, { "epoch": 31.31, "learning_rate": 0.001, "loss": 2.526, "step": 163080 }, { "epoch": 31.32, "learning_rate": 0.001, "loss": 2.538, "step": 163092 }, { "epoch": 31.32, "learning_rate": 0.001, "loss": 2.5233, "step": 163104 }, { "epoch": 31.32, "learning_rate": 0.001, "loss": 2.5269, "step": 163116 }, { "epoch": 31.32, "eval_ag_news_accuracy": 0.3258125, "eval_ag_news_bleu_score": 5.05141179206533, "eval_ag_news_bleu_score_sem": 0.15785918778929836, "eval_ag_news_emb_cos_sim": 0.816232442855835, "eval_ag_news_emb_cos_sim_sem": 0.007050745628842954, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.523041009902954, "eval_ag_news_n_ngrams_match_1": 14.392, "eval_ag_news_n_ngrams_match_2": 3.31, "eval_ag_news_n_ngrams_match_3": 0.918, "eval_ag_news_num_pred_words": 46.554, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.88732361867052, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35730433387532157, "eval_ag_news_runtime": 10.4459, "eval_ag_news_samples_per_second": 47.866, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35979926231773707, "eval_ag_news_token_set_f1_sem": 0.004275888215531512, "eval_ag_news_token_set_precision": 0.3446038034398017, "eval_ag_news_token_set_recall": 0.39051428680686573, "eval_ag_news_true_num_tokens": 56.09375, "step": 163125 }, { "epoch": 31.32, "eval_anthropic_toxic_prompts_accuracy": 0.11490625, "eval_anthropic_toxic_prompts_bleu_score": 3.1953841379288743, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12380286848932094, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6841152906417847, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00884034493909755, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.207458257675171, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.222, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.99, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, "eval_anthropic_toxic_prompts_num_pred_words": 47.0, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.716184146661625, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9765625, "eval_anthropic_toxic_prompts_rouge_score": 0.21617079038016188, "eval_anthropic_toxic_prompts_runtime": 9.8386, "eval_anthropic_toxic_prompts_samples_per_second": 50.82, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.35916719908479533, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006865310890588754, "eval_anthropic_toxic_prompts_token_set_precision": 0.4417478563921143, "eval_anthropic_toxic_prompts_token_set_recall": 0.3264666692226753, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 163125 }, { "epoch": 31.32, "eval_arxiv_accuracy": 0.34775, "eval_arxiv_bleu_score": 4.274956260823223, "eval_arxiv_bleu_score_sem": 0.12371440771032687, "eval_arxiv_emb_cos_sim": 0.7722713947296143, "eval_arxiv_emb_cos_sim_sem": 0.00696955828626532, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3779945373535156, "eval_arxiv_n_ngrams_match_1": 15.03, "eval_arxiv_n_ngrams_match_2": 2.858, "eval_arxiv_n_ngrams_match_3": 0.62, "eval_arxiv_num_pred_words": 39.608, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.311928152325013, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36297525739503217, "eval_arxiv_runtime": 10.7286, "eval_arxiv_samples_per_second": 46.604, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.35319981847151216, "eval_arxiv_token_set_f1_sem": 0.004238243965915341, "eval_arxiv_token_set_precision": 0.3036387711509786, "eval_arxiv_token_set_recall": 0.4421727786980744, "eval_arxiv_true_num_tokens": 64.0, "step": 163125 }, { "epoch": 31.32, "eval_python_code_alpaca_accuracy": 0.16121875, "eval_python_code_alpaca_bleu_score": 4.704841465728496, "eval_python_code_alpaca_bleu_score_sem": 0.14970498746283054, "eval_python_code_alpaca_emb_cos_sim": 0.7609281539916992, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00770459669011728, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8745200634002686, "eval_python_code_alpaca_n_ngrams_match_1": 9.922, "eval_python_code_alpaca_n_ngrams_match_2": 2.982, "eval_python_code_alpaca_n_ngrams_match_3": 1.002, "eval_python_code_alpaca_num_pred_words": 43.984, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.716919082782915, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33434303139208166, "eval_python_code_alpaca_runtime": 9.8394, "eval_python_code_alpaca_samples_per_second": 50.816, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4839838619599384, "eval_python_code_alpaca_token_set_f1_sem": 0.00538776787646499, "eval_python_code_alpaca_token_set_precision": 0.5419609860173293, "eval_python_code_alpaca_token_set_recall": 0.4579719061606382, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 163125 }, { "epoch": 31.32, "eval_wikibio_accuracy": 0.3233125, "eval_wikibio_bleu_score": 6.032168041877137, "eval_wikibio_bleu_score_sem": 0.20542151548156423, "eval_wikibio_emb_cos_sim": 0.7514613270759583, "eval_wikibio_emb_cos_sim_sem": 0.008478430609479011, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.713515043258667, "eval_wikibio_n_ngrams_match_1": 10.368, "eval_wikibio_n_ngrams_match_2": 3.456, "eval_wikibio_n_ngrams_match_3": 1.268, "eval_wikibio_num_pred_words": 36.624, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.997662105386766, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35995825411647586, "eval_wikibio_runtime": 10.3184, "eval_wikibio_samples_per_second": 48.457, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3271373011203448, "eval_wikibio_token_set_f1_sem": 0.005245375736310838, "eval_wikibio_token_set_precision": 0.3362228613100062, "eval_wikibio_token_set_recall": 0.334700881726363, "eval_wikibio_true_num_tokens": 61.1328125, "step": 163125 }, { "epoch": 31.32, "eval_nq_accuracy": 0.531625, "eval_nq_bleu_score": 11.931165235614845, "eval_nq_bleu_score_sem": 0.4792714629073597, "eval_nq_emb_cos_sim": 0.8411628007888794, "eval_nq_emb_cos_sim_sem": 0.0072527772057244325, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1540606021881104, "eval_nq_n_ngrams_match_1": 23.33, "eval_nq_n_ngrams_match_2": 8.606, "eval_nq_n_ngrams_match_3": 3.948, "eval_nq_num_pred_words": 49.058, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.619788963541184, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4534051113926958, "eval_nq_runtime": 10.404, "eval_nq_samples_per_second": 48.058, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46844718309509725, "eval_nq_token_set_f1_sem": 0.004880959919905421, "eval_nq_token_set_precision": 0.4256123992319499, "eval_nq_token_set_recall": 0.5281752103927022, "eval_nq_true_num_tokens": 64.0, "step": 163125 }, { "epoch": 31.32, "learning_rate": 0.001, "loss": 2.528, "step": 163128 }, { "epoch": 31.32, "learning_rate": 0.001, "loss": 2.527, "step": 163140 }, { "epoch": 31.33, "learning_rate": 0.001, "loss": 2.5294, "step": 163152 }, { "epoch": 31.33, "learning_rate": 0.001, "loss": 2.5267, "step": 163164 }, { "epoch": 31.33, "learning_rate": 0.001, "loss": 2.5278, "step": 163176 }, { "epoch": 31.33, "learning_rate": 0.001, "loss": 2.5244, "step": 163188 }, { "epoch": 31.34, "learning_rate": 0.001, "loss": 2.5261, "step": 163200 }, { "epoch": 31.34, "learning_rate": 0.001, "loss": 2.5257, "step": 163212 }, { "epoch": 31.34, "learning_rate": 0.001, "loss": 2.5194, "step": 163224 }, { "epoch": 31.34, "learning_rate": 0.001, "loss": 2.5295, "step": 163236 }, { "epoch": 31.35, "learning_rate": 0.001, "loss": 2.5185, "step": 163248 }, { "epoch": 31.35, "learning_rate": 0.001, "loss": 2.5253, "step": 163260 }, { "epoch": 31.35, "learning_rate": 0.001, "loss": 2.5279, "step": 163272 }, { "epoch": 31.35, "learning_rate": 0.001, "loss": 2.5325, "step": 163284 }, { "epoch": 31.35, "learning_rate": 0.001, "loss": 2.5275, "step": 163296 }, { "epoch": 31.36, "learning_rate": 0.001, "loss": 2.5324, "step": 163308 }, { "epoch": 31.36, "learning_rate": 0.001, "loss": 2.5221, "step": 163320 }, { "epoch": 31.36, "learning_rate": 0.001, "loss": 2.5308, "step": 163332 }, { "epoch": 31.36, "learning_rate": 0.001, "loss": 2.5248, "step": 163344 }, { "epoch": 31.37, "learning_rate": 0.001, "loss": 2.516, "step": 163356 }, { "epoch": 31.37, "learning_rate": 0.001, "loss": 2.5224, "step": 163368 }, { "epoch": 31.37, "learning_rate": 0.001, "loss": 2.5288, "step": 163380 }, { "epoch": 31.37, "learning_rate": 0.001, "loss": 2.534, "step": 163392 }, { "epoch": 31.38, "learning_rate": 0.001, "loss": 2.5281, "step": 163404 }, { "epoch": 31.38, "learning_rate": 0.001, "loss": 2.5428, "step": 163416 }, { "epoch": 31.38, "learning_rate": 0.001, "loss": 2.5212, "step": 163428 }, { "epoch": 31.38, "learning_rate": 0.001, "loss": 2.5246, "step": 163440 }, { "epoch": 31.38, "learning_rate": 0.001, "loss": 2.5291, "step": 163452 }, { "epoch": 31.39, "learning_rate": 0.001, "loss": 2.534, "step": 163464 }, { "epoch": 31.39, "learning_rate": 0.001, "loss": 2.5315, "step": 163476 }, { "epoch": 31.39, "learning_rate": 0.001, "loss": 2.5305, "step": 163488 }, { "epoch": 31.39, "learning_rate": 0.001, "loss": 2.5449, "step": 163500 }, { "epoch": 31.4, "learning_rate": 0.001, "loss": 2.5256, "step": 163512 }, { "epoch": 31.4, "learning_rate": 0.001, "loss": 2.5313, "step": 163524 }, { "epoch": 31.4, "learning_rate": 0.001, "loss": 2.5265, "step": 163536 }, { "epoch": 31.4, "learning_rate": 0.001, "loss": 2.5311, "step": 163548 }, { "epoch": 31.41, "learning_rate": 0.001, "loss": 2.5263, "step": 163560 }, { "epoch": 31.41, "learning_rate": 0.001, "loss": 2.5267, "step": 163572 }, { "epoch": 31.41, "learning_rate": 0.001, "loss": 2.5295, "step": 163584 }, { "epoch": 31.41, "learning_rate": 0.001, "loss": 2.5198, "step": 163596 }, { "epoch": 31.41, "learning_rate": 0.001, "loss": 2.54, "step": 163608 }, { "epoch": 31.42, "learning_rate": 0.001, "loss": 2.5344, "step": 163620 }, { "epoch": 31.42, "learning_rate": 0.001, "loss": 2.5408, "step": 163632 }, { "epoch": 31.42, "learning_rate": 0.001, "loss": 2.5327, "step": 163644 }, { "epoch": 31.42, "learning_rate": 0.001, "loss": 2.5192, "step": 163656 }, { "epoch": 31.43, "learning_rate": 0.001, "loss": 2.5284, "step": 163668 }, { "epoch": 31.43, "learning_rate": 0.001, "loss": 2.5333, "step": 163680 }, { "epoch": 31.43, "learning_rate": 0.001, "loss": 2.5204, "step": 163692 }, { "epoch": 31.43, "learning_rate": 0.001, "loss": 2.5377, "step": 163704 }, { "epoch": 31.44, "learning_rate": 0.001, "loss": 2.5228, "step": 163716 }, { "epoch": 31.44, "learning_rate": 0.001, "loss": 2.5299, "step": 163728 }, { "epoch": 31.44, "learning_rate": 0.001, "loss": 2.5217, "step": 163740 }, { "epoch": 31.44, "eval_ag_news_accuracy": 0.32846875, "eval_ag_news_bleu_score": 5.039003197574389, "eval_ag_news_bleu_score_sem": 0.1541715898406364, "eval_ag_news_emb_cos_sim": 0.8208545446395874, "eval_ag_news_emb_cos_sim_sem": 0.007011624792346241, "eval_ag_news_emb_top1_equal": 0.328125, "eval_ag_news_emb_top1_equal_sem": 0.041664103776406315, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.505810022354126, "eval_ag_news_n_ngrams_match_1": 14.422, "eval_ag_news_n_ngrams_match_2": 3.276, "eval_ag_news_n_ngrams_match_3": 0.944, "eval_ag_news_num_pred_words": 46.488, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.30841348743077, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.361005150251066, "eval_ag_news_runtime": 11.0556, "eval_ag_news_samples_per_second": 45.226, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.36118238111025575, "eval_ag_news_token_set_f1_sem": 0.0042183406955035715, "eval_ag_news_token_set_precision": 0.34607792607459703, "eval_ag_news_token_set_recall": 0.39142795024551025, "eval_ag_news_true_num_tokens": 56.09375, "step": 163750 }, { "epoch": 31.44, "eval_anthropic_toxic_prompts_accuracy": 0.11471875, "eval_anthropic_toxic_prompts_bleu_score": 3.210588473979575, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12834171134688066, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6734957098960876, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00938085061238043, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2179293632507324, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.24, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722, "eval_anthropic_toxic_prompts_num_pred_words": 46.984, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.976349653401417, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2148668053002212, "eval_anthropic_toxic_prompts_runtime": 9.9728, "eval_anthropic_toxic_prompts_samples_per_second": 50.136, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3547417572838096, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006582008520789323, "eval_anthropic_toxic_prompts_token_set_precision": 0.4365952650506784, "eval_anthropic_toxic_prompts_token_set_recall": 0.3270299461151323, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 163750 }, { "epoch": 31.44, "eval_arxiv_accuracy": 0.3505625, "eval_arxiv_bleu_score": 4.530408259249223, "eval_arxiv_bleu_score_sem": 0.12983283075612403, "eval_arxiv_emb_cos_sim": 0.7802456617355347, "eval_arxiv_emb_cos_sim_sem": 0.006992751565291964, "eval_arxiv_emb_top1_equal": 0.328125, "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.371882438659668, "eval_arxiv_n_ngrams_match_1": 15.624, "eval_arxiv_n_ngrams_match_2": 3.06, "eval_arxiv_n_ngrams_match_3": 0.69, "eval_arxiv_num_pred_words": 40.68, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.13331715461568, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37320598796674354, "eval_arxiv_runtime": 10.458, "eval_arxiv_samples_per_second": 47.81, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.36633409059665983, "eval_arxiv_token_set_f1_sem": 0.004224340322652853, "eval_arxiv_token_set_precision": 0.31885869175441045, "eval_arxiv_token_set_recall": 0.4474813895945389, "eval_arxiv_true_num_tokens": 64.0, "step": 163750 }, { "epoch": 31.44, "eval_python_code_alpaca_accuracy": 0.1603125, "eval_python_code_alpaca_bleu_score": 4.784778660045248, "eval_python_code_alpaca_bleu_score_sem": 0.1469057093692457, "eval_python_code_alpaca_emb_cos_sim": 0.7650465965270996, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008007274941242775, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.880115509033203, "eval_python_code_alpaca_n_ngrams_match_1": 10.232, "eval_python_code_alpaca_n_ngrams_match_2": 3.122, "eval_python_code_alpaca_n_ngrams_match_3": 1.086, "eval_python_code_alpaca_num_pred_words": 44.836, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.816331007930984, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33974047333292134, "eval_python_code_alpaca_runtime": 9.9608, "eval_python_code_alpaca_samples_per_second": 50.197, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4899935861346017, "eval_python_code_alpaca_token_set_f1_sem": 0.005416135402955436, "eval_python_code_alpaca_token_set_precision": 0.5599297540270534, "eval_python_code_alpaca_token_set_recall": 0.4565526748742225, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 163750 }, { "epoch": 31.44, "eval_wikibio_accuracy": 0.32615625, "eval_wikibio_bleu_score": 6.147025233425069, "eval_wikibio_bleu_score_sem": 0.22781627144441916, "eval_wikibio_emb_cos_sim": 0.7602639198303223, "eval_wikibio_emb_cos_sim_sem": 0.007502393891663727, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6860883235931396, "eval_wikibio_n_ngrams_match_1": 10.382, "eval_wikibio_n_ngrams_match_2": 3.492, "eval_wikibio_n_ngrams_match_3": 1.286, "eval_wikibio_num_pred_words": 36.454, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.888510442500575, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36492933202131284, "eval_wikibio_runtime": 10.3641, "eval_wikibio_samples_per_second": 48.243, "eval_wikibio_steps_per_second": 0.096, "eval_wikibio_token_set_f1": 0.32842318131728926, "eval_wikibio_token_set_f1_sem": 0.005267693123155587, "eval_wikibio_token_set_precision": 0.3375679619713562, "eval_wikibio_token_set_recall": 0.33484089095201286, "eval_wikibio_true_num_tokens": 61.1328125, "step": 163750 }, { "epoch": 31.44, "eval_nq_accuracy": 0.531625, "eval_nq_bleu_score": 11.709163434302502, "eval_nq_bleu_score_sem": 0.47968053842186154, "eval_nq_emb_cos_sim": 0.8332261443138123, "eval_nq_emb_cos_sim_sem": 0.007499645126887407, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.153640031814575, "eval_nq_n_ngrams_match_1": 23.192, "eval_nq_n_ngrams_match_2": 8.5, "eval_nq_n_ngrams_match_3": 3.932, "eval_nq_num_pred_words": 49.31, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.61616449790185, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4485341197756589, "eval_nq_runtime": 10.386, "eval_nq_samples_per_second": 48.142, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4646035692329125, "eval_nq_token_set_f1_sem": 0.004942877606261212, "eval_nq_token_set_precision": 0.4225956721210808, "eval_nq_token_set_recall": 0.5241405612354216, "eval_nq_true_num_tokens": 64.0, "step": 163750 }, { "epoch": 31.44, "learning_rate": 0.001, "loss": 2.53, "step": 163752 }, { "epoch": 31.44, "learning_rate": 0.001, "loss": 2.5325, "step": 163764 }, { "epoch": 31.45, "learning_rate": 0.001, "loss": 2.5326, "step": 163776 }, { "epoch": 31.45, "learning_rate": 0.001, "loss": 2.5315, "step": 163788 }, { "epoch": 31.45, "learning_rate": 0.001, "loss": 2.5288, "step": 163800 }, { "epoch": 31.45, "learning_rate": 0.001, "loss": 2.5273, "step": 163812 }, { "epoch": 31.46, "learning_rate": 0.001, "loss": 2.54, "step": 163824 }, { "epoch": 31.46, "learning_rate": 0.001, "loss": 2.5217, "step": 163836 }, { "epoch": 31.46, "learning_rate": 0.001, "loss": 2.5331, "step": 163848 }, { "epoch": 31.46, "learning_rate": 0.001, "loss": 2.5299, "step": 163860 }, { "epoch": 31.47, "learning_rate": 0.001, "loss": 2.5236, "step": 163872 }, { "epoch": 31.47, "learning_rate": 0.001, "loss": 2.5293, "step": 163884 }, { "epoch": 31.47, "learning_rate": 0.001, "loss": 2.5288, "step": 163896 }, { "epoch": 31.47, "learning_rate": 0.001, "loss": 2.5207, "step": 163908 }, { "epoch": 31.47, "learning_rate": 0.001, "loss": 2.5317, "step": 163920 }, { "epoch": 31.48, "learning_rate": 0.001, "loss": 2.5289, "step": 163932 }, { "epoch": 31.48, "learning_rate": 0.001, "loss": 2.5186, "step": 163944 }, { "epoch": 31.48, "learning_rate": 0.001, "loss": 2.5292, "step": 163956 }, { "epoch": 31.48, "learning_rate": 0.001, "loss": 2.5336, "step": 163968 }, { "epoch": 31.49, "learning_rate": 0.001, "loss": 2.5269, "step": 163980 }, { "epoch": 31.49, "learning_rate": 0.001, "loss": 2.5207, "step": 163992 }, { "epoch": 31.49, "learning_rate": 0.001, "loss": 2.5254, "step": 164004 }, { "epoch": 31.49, "learning_rate": 0.001, "loss": 2.5272, "step": 164016 }, { "epoch": 31.5, "learning_rate": 0.001, "loss": 2.5356, "step": 164028 }, { "epoch": 31.5, "learning_rate": 0.001, "loss": 2.519, "step": 164040 }, { "epoch": 31.5, "learning_rate": 0.001, "loss": 2.5175, "step": 164052 }, { "epoch": 31.5, "learning_rate": 0.001, "loss": 2.5293, "step": 164064 }, { "epoch": 31.5, "learning_rate": 0.001, "loss": 2.5371, "step": 164076 }, { "epoch": 31.51, "learning_rate": 0.001, "loss": 2.5306, "step": 164088 }, { "epoch": 31.51, "learning_rate": 0.001, "loss": 2.5396, "step": 164100 }, { "epoch": 31.51, "learning_rate": 0.001, "loss": 2.5272, "step": 164112 }, { "epoch": 31.51, "learning_rate": 0.001, "loss": 2.5226, "step": 164124 }, { "epoch": 31.52, "learning_rate": 0.001, "loss": 2.5238, "step": 164136 }, { "epoch": 31.52, "learning_rate": 0.001, "loss": 2.5428, "step": 164148 }, { "epoch": 31.52, "learning_rate": 0.001, "loss": 2.5368, "step": 164160 }, { "epoch": 31.52, "learning_rate": 0.001, "loss": 2.5262, "step": 164172 }, { "epoch": 31.53, "learning_rate": 0.001, "loss": 2.53, "step": 164184 }, { "epoch": 31.53, "learning_rate": 0.001, "loss": 2.5291, "step": 164196 }, { "epoch": 31.53, "learning_rate": 0.001, "loss": 2.5316, "step": 164208 }, { "epoch": 31.53, "learning_rate": 0.001, "loss": 2.5271, "step": 164220 }, { "epoch": 31.53, "learning_rate": 0.001, "loss": 2.5319, "step": 164232 }, { "epoch": 31.54, "learning_rate": 0.001, "loss": 2.5275, "step": 164244 }, { "epoch": 31.54, "learning_rate": 0.001, "loss": 2.5249, "step": 164256 }, { "epoch": 31.54, "learning_rate": 0.001, "loss": 2.5195, "step": 164268 }, { "epoch": 31.54, "learning_rate": 0.001, "loss": 2.5304, "step": 164280 }, { "epoch": 31.55, "learning_rate": 0.001, "loss": 2.5355, "step": 164292 }, { "epoch": 31.55, "learning_rate": 0.001, "loss": 2.5333, "step": 164304 }, { "epoch": 31.55, "learning_rate": 0.001, "loss": 2.5224, "step": 164316 }, { "epoch": 31.55, "learning_rate": 0.001, "loss": 2.5286, "step": 164328 }, { "epoch": 31.56, "learning_rate": 0.001, "loss": 2.5302, "step": 164340 }, { "epoch": 31.56, "learning_rate": 0.001, "loss": 2.5333, "step": 164352 }, { "epoch": 31.56, "learning_rate": 0.001, "loss": 2.5166, "step": 164364 }, { "epoch": 31.56, "eval_ag_news_accuracy": 0.326375, "eval_ag_news_bleu_score": 5.003465702907255, "eval_ag_news_bleu_score_sem": 0.1654964090580242, "eval_ag_news_emb_cos_sim": 0.814096212387085, "eval_ag_news_emb_cos_sim_sem": 0.007507160436812505, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.517871618270874, "eval_ag_news_n_ngrams_match_1": 14.266, "eval_ag_news_n_ngrams_match_2": 3.21, "eval_ag_news_n_ngrams_match_3": 0.97, "eval_ag_news_num_pred_words": 46.702, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.71259877119662, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3546250416272494, "eval_ag_news_runtime": 10.2094, "eval_ag_news_samples_per_second": 48.974, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.353953756214178, "eval_ag_news_token_set_f1_sem": 0.004544093782482766, "eval_ag_news_token_set_precision": 0.3390273724286383, "eval_ag_news_token_set_recall": 0.3876031235622757, "eval_ag_news_true_num_tokens": 56.09375, "step": 164375 }, { "epoch": 31.56, "eval_anthropic_toxic_prompts_accuracy": 0.115125, "eval_anthropic_toxic_prompts_bleu_score": 3.175341015605718, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11980818916127134, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6803178787231445, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008559745709691192, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.22593092918396, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.288, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, "eval_anthropic_toxic_prompts_num_pred_words": 47.402, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.177001254874135, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21776062843113347, "eval_anthropic_toxic_prompts_runtime": 9.7449, "eval_anthropic_toxic_prompts_samples_per_second": 51.309, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.3570546015576493, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006526763686160576, "eval_anthropic_toxic_prompts_token_set_precision": 0.4420488178664897, "eval_anthropic_toxic_prompts_token_set_recall": 0.3233557179888408, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 164375 }, { "epoch": 31.56, "eval_arxiv_accuracy": 0.34890625, "eval_arxiv_bleu_score": 4.300640942706155, "eval_arxiv_bleu_score_sem": 0.12462781001762695, "eval_arxiv_emb_cos_sim": 0.7701647281646729, "eval_arxiv_emb_cos_sim_sem": 0.009626926304500301, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.373840093612671, "eval_arxiv_n_ngrams_match_1": 15.194, "eval_arxiv_n_ngrams_match_2": 2.918, "eval_arxiv_n_ngrams_match_3": 0.62, "eval_arxiv_num_pred_words": 40.104, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.190405999138104, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3629685994650904, "eval_arxiv_runtime": 10.1877, "eval_arxiv_samples_per_second": 49.079, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.35722902882629975, "eval_arxiv_token_set_f1_sem": 0.0042983235346720486, "eval_arxiv_token_set_precision": 0.310239895502916, "eval_arxiv_token_set_recall": 0.4374415791640846, "eval_arxiv_true_num_tokens": 64.0, "step": 164375 }, { "epoch": 31.56, "eval_python_code_alpaca_accuracy": 0.1600625, "eval_python_code_alpaca_bleu_score": 4.671859038326225, "eval_python_code_alpaca_bleu_score_sem": 0.14711054893136344, "eval_python_code_alpaca_emb_cos_sim": 0.7626504302024841, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008872178322068927, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.893451690673828, "eval_python_code_alpaca_n_ngrams_match_1": 9.994, "eval_python_code_alpaca_n_ngrams_match_2": 2.952, "eval_python_code_alpaca_n_ngrams_match_3": 0.982, "eval_python_code_alpaca_num_pred_words": 43.608, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.055524251587304, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33943716617776704, "eval_python_code_alpaca_runtime": 9.9029, "eval_python_code_alpaca_samples_per_second": 50.49, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4806877998489439, "eval_python_code_alpaca_token_set_f1_sem": 0.005594440906963912, "eval_python_code_alpaca_token_set_precision": 0.54350236287606, "eval_python_code_alpaca_token_set_recall": 0.45065989950433116, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 164375 }, { "epoch": 31.56, "eval_wikibio_accuracy": 0.3254375, "eval_wikibio_bleu_score": 6.085191009966251, "eval_wikibio_bleu_score_sem": 0.20736608793035471, "eval_wikibio_emb_cos_sim": 0.7358441352844238, "eval_wikibio_emb_cos_sim_sem": 0.00892497120109587, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6791563034057617, "eval_wikibio_n_ngrams_match_1": 10.086, "eval_wikibio_n_ngrams_match_2": 3.414, "eval_wikibio_n_ngrams_match_3": 1.248, "eval_wikibio_num_pred_words": 36.012, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.61295865157849, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36009769363907795, "eval_wikibio_runtime": 9.7555, "eval_wikibio_samples_per_second": 51.253, "eval_wikibio_steps_per_second": 0.103, "eval_wikibio_token_set_f1": 0.32237325424182417, "eval_wikibio_token_set_f1_sem": 0.005284262574665414, "eval_wikibio_token_set_precision": 0.3268104017877293, "eval_wikibio_token_set_recall": 0.33361421743377817, "eval_wikibio_true_num_tokens": 61.1328125, "step": 164375 }, { "epoch": 31.56, "eval_nq_accuracy": 0.53259375, "eval_nq_bleu_score": 11.931243476346538, "eval_nq_bleu_score_sem": 0.47923175101304166, "eval_nq_emb_cos_sim": 0.8351569771766663, "eval_nq_emb_cos_sim_sem": 0.006897443711374165, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.153125524520874, "eval_nq_n_ngrams_match_1": 23.21, "eval_nq_n_ngrams_match_2": 8.654, "eval_nq_n_ngrams_match_3": 3.974, "eval_nq_num_pred_words": 48.782, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.611732558654253, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45251041126762526, "eval_nq_runtime": 11.1426, "eval_nq_samples_per_second": 44.873, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.4645486155757907, "eval_nq_token_set_f1_sem": 0.004969776111751923, "eval_nq_token_set_precision": 0.42241175680633714, "eval_nq_token_set_recall": 0.5251101939435706, "eval_nq_true_num_tokens": 64.0, "step": 164375 }, { "epoch": 31.56, "learning_rate": 0.001, "loss": 2.5303, "step": 164376 }, { "epoch": 31.56, "learning_rate": 0.001, "loss": 2.5281, "step": 164388 }, { "epoch": 31.57, "learning_rate": 0.001, "loss": 2.526, "step": 164400 }, { "epoch": 31.57, "learning_rate": 0.001, "loss": 2.5295, "step": 164412 }, { "epoch": 31.57, "learning_rate": 0.001, "loss": 2.5278, "step": 164424 }, { "epoch": 31.57, "learning_rate": 0.001, "loss": 2.5295, "step": 164436 }, { "epoch": 31.58, "learning_rate": 0.001, "loss": 2.5262, "step": 164448 }, { "epoch": 31.58, "learning_rate": 0.001, "loss": 2.5292, "step": 164460 }, { "epoch": 31.58, "learning_rate": 0.001, "loss": 2.5244, "step": 164472 }, { "epoch": 31.58, "learning_rate": 0.001, "loss": 2.5299, "step": 164484 }, { "epoch": 31.59, "learning_rate": 0.001, "loss": 2.5291, "step": 164496 }, { "epoch": 31.59, "learning_rate": 0.001, "loss": 2.5276, "step": 164508 }, { "epoch": 31.59, "learning_rate": 0.001, "loss": 2.5301, "step": 164520 }, { "epoch": 31.59, "learning_rate": 0.001, "loss": 2.5314, "step": 164532 }, { "epoch": 31.59, "learning_rate": 0.001, "loss": 2.5396, "step": 164544 }, { "epoch": 31.6, "learning_rate": 0.001, "loss": 2.5274, "step": 164556 }, { "epoch": 31.6, "learning_rate": 0.001, "loss": 2.5185, "step": 164568 }, { "epoch": 31.6, "learning_rate": 0.001, "loss": 2.5255, "step": 164580 }, { "epoch": 31.6, "learning_rate": 0.001, "loss": 2.5358, "step": 164592 }, { "epoch": 31.61, "learning_rate": 0.001, "loss": 2.5233, "step": 164604 }, { "epoch": 31.61, "learning_rate": 0.001, "loss": 2.5202, "step": 164616 }, { "epoch": 31.61, "learning_rate": 0.001, "loss": 2.5245, "step": 164628 }, { "epoch": 31.61, "learning_rate": 0.001, "loss": 2.5285, "step": 164640 }, { "epoch": 31.62, "learning_rate": 0.001, "loss": 2.5229, "step": 164652 }, { "epoch": 31.62, "learning_rate": 0.001, "loss": 2.5298, "step": 164664 }, { "epoch": 31.62, "learning_rate": 0.001, "loss": 2.5255, "step": 164676 }, { "epoch": 31.62, "learning_rate": 0.001, "loss": 2.5187, "step": 164688 }, { "epoch": 31.62, "learning_rate": 0.001, "loss": 2.532, "step": 164700 }, { "epoch": 31.63, "learning_rate": 0.001, "loss": 2.5282, "step": 164712 }, { "epoch": 31.63, "learning_rate": 0.001, "loss": 2.5307, "step": 164724 }, { "epoch": 31.63, "learning_rate": 0.001, "loss": 2.5295, "step": 164736 }, { "epoch": 31.63, "learning_rate": 0.001, "loss": 2.5291, "step": 164748 }, { "epoch": 31.64, "learning_rate": 0.001, "loss": 2.528, "step": 164760 }, { "epoch": 31.64, "learning_rate": 0.001, "loss": 2.5221, "step": 164772 }, { "epoch": 31.64, "learning_rate": 0.001, "loss": 2.5207, "step": 164784 }, { "epoch": 31.64, "learning_rate": 0.001, "loss": 2.5345, "step": 164796 }, { "epoch": 31.65, "learning_rate": 0.001, "loss": 2.5211, "step": 164808 }, { "epoch": 31.65, "learning_rate": 0.001, "loss": 2.5275, "step": 164820 }, { "epoch": 31.65, "learning_rate": 0.001, "loss": 2.5336, "step": 164832 }, { "epoch": 31.65, "learning_rate": 0.001, "loss": 2.5355, "step": 164844 }, { "epoch": 31.65, "learning_rate": 0.001, "loss": 2.5255, "step": 164856 }, { "epoch": 31.66, "learning_rate": 0.001, "loss": 2.536, "step": 164868 }, { "epoch": 31.66, "learning_rate": 0.001, "loss": 2.5328, "step": 164880 }, { "epoch": 31.66, "learning_rate": 0.001, "loss": 2.5364, "step": 164892 }, { "epoch": 31.66, "learning_rate": 0.001, "loss": 2.5346, "step": 164904 }, { "epoch": 31.67, "learning_rate": 0.001, "loss": 2.5284, "step": 164916 }, { "epoch": 31.67, "learning_rate": 0.001, "loss": 2.529, "step": 164928 }, { "epoch": 31.67, "learning_rate": 0.001, "loss": 2.5346, "step": 164940 }, { "epoch": 31.67, "learning_rate": 0.001, "loss": 2.5314, "step": 164952 }, { "epoch": 31.68, "learning_rate": 0.001, "loss": 2.5366, "step": 164964 }, { "epoch": 31.68, "learning_rate": 0.001, "loss": 2.5302, "step": 164976 }, { "epoch": 31.68, "learning_rate": 0.001, "loss": 2.5277, "step": 164988 }, { "epoch": 31.68, "learning_rate": 0.001, "loss": 2.5155, "step": 165000 }, { "epoch": 31.68, "eval_ag_news_accuracy": 0.325625, "eval_ag_news_bleu_score": 4.949571385475815, "eval_ag_news_bleu_score_sem": 0.1514546212422152, "eval_ag_news_emb_cos_sim": 0.820349395275116, "eval_ag_news_emb_cos_sim_sem": 0.006364998622868993, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5219197273254395, "eval_ag_news_n_ngrams_match_1": 14.3, "eval_ag_news_n_ngrams_match_2": 3.28, "eval_ag_news_n_ngrams_match_3": 0.92, "eval_ag_news_num_pred_words": 46.618, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.849347647974305, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3556960223375405, "eval_ag_news_runtime": 10.5222, "eval_ag_news_samples_per_second": 47.519, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3556806451883441, "eval_ag_news_token_set_f1_sem": 0.004507897490328704, "eval_ag_news_token_set_precision": 0.3419088772426846, "eval_ag_news_token_set_recall": 0.38412966770720486, "eval_ag_news_true_num_tokens": 56.09375, "step": 165000 }, { "epoch": 31.68, "eval_anthropic_toxic_prompts_accuracy": 0.1146875, "eval_anthropic_toxic_prompts_bleu_score": 3.165954720330608, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1191423804704478, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6786759495735168, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00955979164317851, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2220876216888428, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.198, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.94, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, "eval_anthropic_toxic_prompts_num_pred_words": 46.818, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.08042400416108, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21307968750888823, "eval_anthropic_toxic_prompts_runtime": 9.8868, "eval_anthropic_toxic_prompts_samples_per_second": 50.572, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3608222408362462, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006772623360191061, "eval_anthropic_toxic_prompts_token_set_precision": 0.4384232433800395, "eval_anthropic_toxic_prompts_token_set_recall": 0.3327021523574849, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 165000 }, { "epoch": 31.68, "eval_arxiv_accuracy": 0.34846875, "eval_arxiv_bleu_score": 4.390771225550452, "eval_arxiv_bleu_score_sem": 0.12442418811137847, "eval_arxiv_emb_cos_sim": 0.7720425128936768, "eval_arxiv_emb_cos_sim_sem": 0.008063797816362931, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3646864891052246, "eval_arxiv_n_ngrams_match_1": 15.256, "eval_arxiv_n_ngrams_match_2": 2.968, "eval_arxiv_n_ngrams_match_3": 0.662, "eval_arxiv_num_pred_words": 40.028, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.9244277541978, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3680764397478407, "eval_arxiv_runtime": 10.2606, "eval_arxiv_samples_per_second": 48.73, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.35985968147687586, "eval_arxiv_token_set_f1_sem": 0.004275902651255107, "eval_arxiv_token_set_precision": 0.31164442399355274, "eval_arxiv_token_set_recall": 0.4433459528223471, "eval_arxiv_true_num_tokens": 64.0, "step": 165000 }, { "epoch": 31.68, "eval_python_code_alpaca_accuracy": 0.16290625, "eval_python_code_alpaca_bleu_score": 4.832747243268115, "eval_python_code_alpaca_bleu_score_sem": 0.16041248560223093, "eval_python_code_alpaca_emb_cos_sim": 0.746342122554779, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009925256844784092, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8708529472351074, "eval_python_code_alpaca_n_ngrams_match_1": 9.95, "eval_python_code_alpaca_n_ngrams_match_2": 2.998, "eval_python_code_alpaca_n_ngrams_match_3": 1.036, "eval_python_code_alpaca_num_pred_words": 42.788, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.652068063203664, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3406843589151201, "eval_python_code_alpaca_runtime": 9.8107, "eval_python_code_alpaca_samples_per_second": 50.965, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4863497168353393, "eval_python_code_alpaca_token_set_f1_sem": 0.005579415155745632, "eval_python_code_alpaca_token_set_precision": 0.5435204942939534, "eval_python_code_alpaca_token_set_recall": 0.46376368221896563, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 165000 }, { "epoch": 31.68, "eval_wikibio_accuracy": 0.32428125, "eval_wikibio_bleu_score": 6.2112413855973605, "eval_wikibio_bleu_score_sem": 0.22597355532603242, "eval_wikibio_emb_cos_sim": 0.7518303394317627, "eval_wikibio_emb_cos_sim_sem": 0.007407385675497059, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.723221778869629, "eval_wikibio_n_ngrams_match_1": 10.15, "eval_wikibio_n_ngrams_match_2": 3.398, "eval_wikibio_n_ngrams_match_3": 1.294, "eval_wikibio_num_pred_words": 36.474, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.39755325109265, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3604627382537412, "eval_wikibio_runtime": 10.3022, "eval_wikibio_samples_per_second": 48.533, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.32199169460023463, "eval_wikibio_token_set_f1_sem": 0.00509250618171414, "eval_wikibio_token_set_precision": 0.3314764950137064, "eval_wikibio_token_set_recall": 0.32681247147373677, "eval_wikibio_true_num_tokens": 61.1328125, "step": 165000 }, { "epoch": 31.68, "eval_nq_accuracy": 0.5315625, "eval_nq_bleu_score": 11.77311915757099, "eval_nq_bleu_score_sem": 0.4818562872841172, "eval_nq_emb_cos_sim": 0.8427734375, "eval_nq_emb_cos_sim_sem": 0.0066100390830223146, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.154539108276367, "eval_nq_n_ngrams_match_1": 23.3, "eval_nq_n_ngrams_match_2": 8.52, "eval_nq_n_ngrams_match_3": 3.85, "eval_nq_num_pred_words": 49.382, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.623914572025397, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4518855102801904, "eval_nq_runtime": 14.3007, "eval_nq_samples_per_second": 34.963, "eval_nq_steps_per_second": 0.07, "eval_nq_token_set_f1": 0.46532767579981826, "eval_nq_token_set_f1_sem": 0.004879542835258077, "eval_nq_token_set_precision": 0.4228913868077098, "eval_nq_token_set_recall": 0.5248575728469105, "eval_nq_true_num_tokens": 64.0, "step": 165000 }, { "epoch": 31.68, "learning_rate": 0.001, "loss": 2.5235, "step": 165012 }, { "epoch": 31.69, "learning_rate": 0.001, "loss": 2.5295, "step": 165024 }, { "epoch": 31.69, "learning_rate": 0.001, "loss": 2.5263, "step": 165036 }, { "epoch": 31.69, "learning_rate": 0.001, "loss": 2.5298, "step": 165048 }, { "epoch": 31.69, "learning_rate": 0.001, "loss": 2.5418, "step": 165060 }, { "epoch": 31.7, "learning_rate": 0.001, "loss": 2.5312, "step": 165072 }, { "epoch": 31.7, "learning_rate": 0.001, "loss": 2.5213, "step": 165084 }, { "epoch": 31.7, "learning_rate": 0.001, "loss": 2.5429, "step": 165096 }, { "epoch": 31.7, "learning_rate": 0.001, "loss": 2.5396, "step": 165108 }, { "epoch": 31.71, "learning_rate": 0.001, "loss": 2.531, "step": 165120 }, { "epoch": 31.71, "learning_rate": 0.001, "loss": 2.5369, "step": 165132 }, { "epoch": 31.71, "learning_rate": 0.001, "loss": 2.5369, "step": 165144 }, { "epoch": 31.71, "learning_rate": 0.001, "loss": 2.5381, "step": 165156 }, { "epoch": 31.71, "learning_rate": 0.001, "loss": 2.5296, "step": 165168 }, { "epoch": 31.72, "learning_rate": 0.001, "loss": 2.5404, "step": 165180 }, { "epoch": 31.72, "learning_rate": 0.001, "loss": 2.5355, "step": 165192 }, { "epoch": 31.72, "learning_rate": 0.001, "loss": 2.5375, "step": 165204 }, { "epoch": 31.72, "learning_rate": 0.001, "loss": 2.5318, "step": 165216 }, { "epoch": 31.73, "learning_rate": 0.001, "loss": 2.5398, "step": 165228 }, { "epoch": 31.73, "learning_rate": 0.001, "loss": 2.5292, "step": 165240 }, { "epoch": 31.73, "learning_rate": 0.001, "loss": 2.5294, "step": 165252 }, { "epoch": 31.73, "learning_rate": 0.001, "loss": 2.5239, "step": 165264 }, { "epoch": 31.74, "learning_rate": 0.001, "loss": 2.5304, "step": 165276 }, { "epoch": 31.74, "learning_rate": 0.001, "loss": 2.5252, "step": 165288 }, { "epoch": 31.74, "learning_rate": 0.001, "loss": 2.5407, "step": 165300 }, { "epoch": 31.74, "learning_rate": 0.001, "loss": 2.5258, "step": 165312 }, { "epoch": 31.74, "learning_rate": 0.001, "loss": 2.5316, "step": 165324 }, { "epoch": 31.75, "learning_rate": 0.001, "loss": 2.5345, "step": 165336 }, { "epoch": 31.75, "learning_rate": 0.001, "loss": 2.5392, "step": 165348 }, { "epoch": 31.75, "learning_rate": 0.001, "loss": 2.5219, "step": 165360 }, { "epoch": 31.75, "learning_rate": 0.001, "loss": 2.5228, "step": 165372 }, { "epoch": 31.76, "learning_rate": 0.001, "loss": 2.5176, "step": 165384 }, { "epoch": 31.76, "learning_rate": 0.001, "loss": 2.5164, "step": 165396 }, { "epoch": 31.76, "learning_rate": 0.001, "loss": 2.5266, "step": 165408 }, { "epoch": 31.76, "learning_rate": 0.001, "loss": 2.5276, "step": 165420 }, { "epoch": 31.76, "learning_rate": 0.001, "loss": 2.5369, "step": 165432 }, { "epoch": 31.77, "learning_rate": 0.001, "loss": 2.5293, "step": 165444 }, { "epoch": 31.77, "learning_rate": 0.001, "loss": 2.5255, "step": 165456 }, { "epoch": 31.77, "learning_rate": 0.001, "loss": 2.5402, "step": 165468 }, { "epoch": 31.77, "learning_rate": 0.001, "loss": 2.5337, "step": 165480 }, { "epoch": 31.78, "learning_rate": 0.001, "loss": 2.5338, "step": 165492 }, { "epoch": 31.78, "learning_rate": 0.001, "loss": 2.5303, "step": 165504 }, { "epoch": 31.78, "learning_rate": 0.001, "loss": 2.5263, "step": 165516 }, { "epoch": 31.78, "learning_rate": 0.001, "loss": 2.5291, "step": 165528 }, { "epoch": 31.79, "learning_rate": 0.001, "loss": 2.5378, "step": 165540 }, { "epoch": 31.79, "learning_rate": 0.001, "loss": 2.5254, "step": 165552 }, { "epoch": 31.79, "learning_rate": 0.001, "loss": 2.5322, "step": 165564 }, { "epoch": 31.79, "learning_rate": 0.001, "loss": 2.5253, "step": 165576 }, { "epoch": 31.79, "learning_rate": 0.001, "loss": 2.5293, "step": 165588 }, { "epoch": 31.8, "learning_rate": 0.001, "loss": 2.5322, "step": 165600 }, { "epoch": 31.8, "learning_rate": 0.001, "loss": 2.5285, "step": 165612 }, { "epoch": 31.8, "learning_rate": 0.001, "loss": 2.5393, "step": 165624 }, { "epoch": 31.8, "eval_ag_news_accuracy": 0.3273125, "eval_ag_news_bleu_score": 4.855065404281462, "eval_ag_news_bleu_score_sem": 0.15106250472974067, "eval_ag_news_emb_cos_sim": 0.8158227205276489, "eval_ag_news_emb_cos_sim_sem": 0.007410889132279528, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5137622356414795, "eval_ag_news_n_ngrams_match_1": 14.24, "eval_ag_news_n_ngrams_match_2": 3.204, "eval_ag_news_n_ngrams_match_3": 0.934, "eval_ag_news_num_pred_words": 46.844, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.57434506665822, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3553260578487887, "eval_ag_news_runtime": 12.0382, "eval_ag_news_samples_per_second": 41.534, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.35394369376665724, "eval_ag_news_token_set_f1_sem": 0.004517835193987404, "eval_ag_news_token_set_precision": 0.3399524740158876, "eval_ag_news_token_set_recall": 0.3824921760346473, "eval_ag_news_true_num_tokens": 56.09375, "step": 165625 }, { "epoch": 31.8, "eval_anthropic_toxic_prompts_accuracy": 0.114, "eval_anthropic_toxic_prompts_bleu_score": 3.0203975735697655, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11458695695530144, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6762200593948364, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009375118640982949, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2238645553588867, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.128, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.838, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.674, "eval_anthropic_toxic_prompts_num_pred_words": 47.64, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.12502987313139, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21170482733460078, "eval_anthropic_toxic_prompts_runtime": 11.6261, "eval_anthropic_toxic_prompts_samples_per_second": 43.006, "eval_anthropic_toxic_prompts_steps_per_second": 0.086, "eval_anthropic_toxic_prompts_token_set_f1": 0.35560367711589413, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006625378686358801, "eval_anthropic_toxic_prompts_token_set_precision": 0.4345931557504943, "eval_anthropic_toxic_prompts_token_set_recall": 0.3297450542969775, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 165625 }, { "epoch": 31.8, "eval_arxiv_accuracy": 0.350375, "eval_arxiv_bleu_score": 4.424113702290277, "eval_arxiv_bleu_score_sem": 0.12201878097626956, "eval_arxiv_emb_cos_sim": 0.777930736541748, "eval_arxiv_emb_cos_sim_sem": 0.0067551256577540195, "eval_arxiv_emb_top1_equal": 0.328125, "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3710415363311768, "eval_arxiv_n_ngrams_match_1": 15.54, "eval_arxiv_n_ngrams_match_2": 3.024, "eval_arxiv_n_ngrams_match_3": 0.662, "eval_arxiv_num_pred_words": 40.888, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.10882917782503, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.370172741487134, "eval_arxiv_runtime": 11.6252, "eval_arxiv_samples_per_second": 43.01, "eval_arxiv_steps_per_second": 0.086, "eval_arxiv_token_set_f1": 0.3644618244226642, "eval_arxiv_token_set_f1_sem": 0.00395826682458891, "eval_arxiv_token_set_precision": 0.31548204693126, "eval_arxiv_token_set_recall": 0.4482192322084978, "eval_arxiv_true_num_tokens": 64.0, "step": 165625 }, { "epoch": 31.8, "eval_python_code_alpaca_accuracy": 0.16290625, "eval_python_code_alpaca_bleu_score": 4.654106103477634, "eval_python_code_alpaca_bleu_score_sem": 0.15000589666999495, "eval_python_code_alpaca_emb_cos_sim": 0.7658255696296692, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008851702856226728, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.837296962738037, "eval_python_code_alpaca_n_ngrams_match_1": 9.856, "eval_python_code_alpaca_n_ngrams_match_2": 2.976, "eval_python_code_alpaca_n_ngrams_match_3": 1.034, "eval_python_code_alpaca_num_pred_words": 44.628, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.069563456236036, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32679406629130714, "eval_python_code_alpaca_runtime": 12.2432, "eval_python_code_alpaca_samples_per_second": 40.839, "eval_python_code_alpaca_steps_per_second": 0.082, "eval_python_code_alpaca_token_set_f1": 0.4878536234581019, "eval_python_code_alpaca_token_set_f1_sem": 0.0055525206255058164, "eval_python_code_alpaca_token_set_precision": 0.5359377455962341, "eval_python_code_alpaca_token_set_recall": 0.4689726526761777, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 165625 }, { "epoch": 31.8, "eval_wikibio_accuracy": 0.32546875, "eval_wikibio_bleu_score": 5.94630150273813, "eval_wikibio_bleu_score_sem": 0.20746401233977205, "eval_wikibio_emb_cos_sim": 0.7495725154876709, "eval_wikibio_emb_cos_sim_sem": 0.008162560394769607, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6943857669830322, "eval_wikibio_n_ngrams_match_1": 10.038, "eval_wikibio_n_ngrams_match_2": 3.39, "eval_wikibio_n_ngrams_match_3": 1.216, "eval_wikibio_num_pred_words": 35.714, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.22086001891623, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3580946561096108, "eval_wikibio_runtime": 12.0447, "eval_wikibio_samples_per_second": 41.512, "eval_wikibio_steps_per_second": 0.083, "eval_wikibio_token_set_f1": 0.32064573582917255, "eval_wikibio_token_set_f1_sem": 0.005352860187990176, "eval_wikibio_token_set_precision": 0.32857444213278747, "eval_wikibio_token_set_recall": 0.33130887166340145, "eval_wikibio_true_num_tokens": 61.1328125, "step": 165625 }, { "epoch": 31.8, "eval_nq_accuracy": 0.531375, "eval_nq_bleu_score": 11.929233576691335, "eval_nq_bleu_score_sem": 0.49077956369423203, "eval_nq_emb_cos_sim": 0.8397247791290283, "eval_nq_emb_cos_sim_sem": 0.007308006699430911, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1550819873809814, "eval_nq_n_ngrams_match_1": 23.258, "eval_nq_n_ngrams_match_2": 8.548, "eval_nq_n_ngrams_match_3": 3.98, "eval_nq_num_pred_words": 49.092, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.628597586086753, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4528662764298266, "eval_nq_runtime": 28.7433, "eval_nq_samples_per_second": 17.395, "eval_nq_steps_per_second": 0.035, "eval_nq_token_set_f1": 0.46646194474816904, "eval_nq_token_set_f1_sem": 0.004986225753192338, "eval_nq_token_set_precision": 0.4247259510045065, "eval_nq_token_set_recall": 0.5254762336280152, "eval_nq_true_num_tokens": 64.0, "step": 165625 }, { "epoch": 31.8, "learning_rate": 0.001, "loss": 2.5363, "step": 165636 }, { "epoch": 31.81, "learning_rate": 0.001, "loss": 2.5237, "step": 165648 }, { "epoch": 31.81, "learning_rate": 0.001, "loss": 2.523, "step": 165660 }, { "epoch": 31.81, "learning_rate": 0.001, "loss": 2.5218, "step": 165672 }, { "epoch": 31.81, "learning_rate": 0.001, "loss": 2.5285, "step": 165684 }, { "epoch": 31.82, "learning_rate": 0.001, "loss": 2.5306, "step": 165696 }, { "epoch": 31.82, "learning_rate": 0.001, "loss": 2.5225, "step": 165708 }, { "epoch": 31.82, "learning_rate": 0.001, "loss": 2.5288, "step": 165720 }, { "epoch": 31.82, "learning_rate": 0.001, "loss": 2.5295, "step": 165732 }, { "epoch": 31.82, "learning_rate": 0.001, "loss": 2.5204, "step": 165744 }, { "epoch": 31.83, "learning_rate": 0.001, "loss": 2.5279, "step": 165756 }, { "epoch": 31.83, "learning_rate": 0.001, "loss": 2.5243, "step": 165768 }, { "epoch": 31.83, "learning_rate": 0.001, "loss": 2.5298, "step": 165780 }, { "epoch": 31.83, "learning_rate": 0.001, "loss": 2.5247, "step": 165792 }, { "epoch": 31.84, "learning_rate": 0.001, "loss": 2.5305, "step": 165804 }, { "epoch": 31.84, "learning_rate": 0.001, "loss": 2.5353, "step": 165816 }, { "epoch": 31.84, "learning_rate": 0.001, "loss": 2.5345, "step": 165828 }, { "epoch": 31.84, "learning_rate": 0.001, "loss": 2.5353, "step": 165840 }, { "epoch": 31.85, "learning_rate": 0.001, "loss": 2.531, "step": 165852 }, { "epoch": 31.85, "learning_rate": 0.001, "loss": 2.5262, "step": 165864 }, { "epoch": 31.85, "learning_rate": 0.001, "loss": 2.5312, "step": 165876 }, { "epoch": 31.85, "learning_rate": 0.001, "loss": 2.5247, "step": 165888 }, { "epoch": 31.85, "learning_rate": 0.001, "loss": 2.5369, "step": 165900 }, { "epoch": 31.86, "learning_rate": 0.001, "loss": 2.528, "step": 165912 }, { "epoch": 31.86, "learning_rate": 0.001, "loss": 2.5309, "step": 165924 }, { "epoch": 31.86, "learning_rate": 0.001, "loss": 2.5344, "step": 165936 }, { "epoch": 31.86, "learning_rate": 0.001, "loss": 2.5311, "step": 165948 }, { "epoch": 31.87, "learning_rate": 0.001, "loss": 2.53, "step": 165960 }, { "epoch": 31.87, "learning_rate": 0.001, "loss": 2.5309, "step": 165972 }, { "epoch": 31.87, "learning_rate": 0.001, "loss": 2.5289, "step": 165984 }, { "epoch": 31.87, "learning_rate": 0.001, "loss": 2.5431, "step": 165996 }, { "epoch": 31.88, "learning_rate": 0.001, "loss": 2.5352, "step": 166008 }, { "epoch": 31.88, "learning_rate": 0.001, "loss": 2.547, "step": 166020 }, { "epoch": 31.88, "learning_rate": 0.001, "loss": 2.5345, "step": 166032 }, { "epoch": 31.88, "learning_rate": 0.001, "loss": 2.5402, "step": 166044 }, { "epoch": 31.88, "learning_rate": 0.001, "loss": 2.5359, "step": 166056 }, { "epoch": 31.89, "learning_rate": 0.001, "loss": 2.54, "step": 166068 }, { "epoch": 31.89, "learning_rate": 0.001, "loss": 2.5332, "step": 166080 }, { "epoch": 31.89, "learning_rate": 0.001, "loss": 2.5321, "step": 166092 }, { "epoch": 31.89, "learning_rate": 0.001, "loss": 2.5277, "step": 166104 }, { "epoch": 31.9, "learning_rate": 0.001, "loss": 2.5292, "step": 166116 }, { "epoch": 31.9, "learning_rate": 0.001, "loss": 2.5216, "step": 166128 }, { "epoch": 31.9, "learning_rate": 0.001, "loss": 2.5349, "step": 166140 }, { "epoch": 31.9, "learning_rate": 0.001, "loss": 2.5331, "step": 166152 }, { "epoch": 31.91, "learning_rate": 0.001, "loss": 2.5259, "step": 166164 }, { "epoch": 31.91, "learning_rate": 0.001, "loss": 2.5309, "step": 166176 }, { "epoch": 31.91, "learning_rate": 0.001, "loss": 2.5331, "step": 166188 }, { "epoch": 31.91, "learning_rate": 0.001, "loss": 2.5321, "step": 166200 }, { "epoch": 31.91, "learning_rate": 0.001, "loss": 2.5422, "step": 166212 }, { "epoch": 31.92, "learning_rate": 0.001, "loss": 2.5342, "step": 166224 }, { "epoch": 31.92, "learning_rate": 0.001, "loss": 2.5329, "step": 166236 }, { "epoch": 31.92, "learning_rate": 0.001, "loss": 2.5376, "step": 166248 }, { "epoch": 31.92, "eval_ag_news_accuracy": 0.3256875, "eval_ag_news_bleu_score": 4.85846646317945, "eval_ag_news_bleu_score_sem": 0.15553541581902489, "eval_ag_news_emb_cos_sim": 0.8113851547241211, "eval_ag_news_emb_cos_sim_sem": 0.00794740534293305, "eval_ag_news_emb_top1_equal": 0.3046875, "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.525256872177124, "eval_ag_news_n_ngrams_match_1": 14.284, "eval_ag_news_n_ngrams_match_2": 3.108, "eval_ag_news_n_ngrams_match_3": 0.93, "eval_ag_news_num_pred_words": 46.848, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.96249651628512, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35519615332434323, "eval_ag_news_runtime": 12.6734, "eval_ag_news_samples_per_second": 39.453, "eval_ag_news_steps_per_second": 0.079, "eval_ag_news_token_set_f1": 0.3536932412810227, "eval_ag_news_token_set_f1_sem": 0.004645666303538005, "eval_ag_news_token_set_precision": 0.3404722191832777, "eval_ag_news_token_set_recall": 0.38095851057201485, "eval_ag_news_true_num_tokens": 56.09375, "step": 166250 }, { "epoch": 31.92, "eval_anthropic_toxic_prompts_accuracy": 0.115625, "eval_anthropic_toxic_prompts_bleu_score": 3.144514809554743, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11103306250104761, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6765865087509155, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009198360026684322, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2287395000457764, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.254, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.968, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712, "eval_anthropic_toxic_prompts_num_pred_words": 46.928, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.24781203899228, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21664607644543715, "eval_anthropic_toxic_prompts_runtime": 11.9946, "eval_anthropic_toxic_prompts_samples_per_second": 41.686, "eval_anthropic_toxic_prompts_steps_per_second": 0.083, "eval_anthropic_toxic_prompts_token_set_f1": 0.3545282092160286, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066832747366794955, "eval_anthropic_toxic_prompts_token_set_precision": 0.4400092299813725, "eval_anthropic_toxic_prompts_token_set_recall": 0.3213217359727643, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 166250 }, { "epoch": 31.92, "eval_arxiv_accuracy": 0.3515625, "eval_arxiv_bleu_score": 4.306540054361915, "eval_arxiv_bleu_score_sem": 0.11640016404470645, "eval_arxiv_emb_cos_sim": 0.7607088088989258, "eval_arxiv_emb_cos_sim_sem": 0.008919630405136129, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3544704914093018, "eval_arxiv_n_ngrams_match_1": 15.138, "eval_arxiv_n_ngrams_match_2": 2.948, "eval_arxiv_n_ngrams_match_3": 0.618, "eval_arxiv_num_pred_words": 39.854, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.630440112311092, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36514489218200186, "eval_arxiv_runtime": 13.6292, "eval_arxiv_samples_per_second": 36.686, "eval_arxiv_steps_per_second": 0.073, "eval_arxiv_token_set_f1": 0.35790601478294864, "eval_arxiv_token_set_f1_sem": 0.004124732052069127, "eval_arxiv_token_set_precision": 0.3078270279633821, "eval_arxiv_token_set_recall": 0.44536002406991937, "eval_arxiv_true_num_tokens": 64.0, "step": 166250 }, { "epoch": 31.92, "eval_python_code_alpaca_accuracy": 0.16278125, "eval_python_code_alpaca_bleu_score": 4.766742375650273, "eval_python_code_alpaca_bleu_score_sem": 0.15655899201392967, "eval_python_code_alpaca_emb_cos_sim": 0.7535933256149292, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011082196397486737, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.870575189590454, "eval_python_code_alpaca_n_ngrams_match_1": 9.866, "eval_python_code_alpaca_n_ngrams_match_2": 2.9, "eval_python_code_alpaca_n_ngrams_match_3": 1.0, "eval_python_code_alpaca_num_pred_words": 42.66, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.647165747214554, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.338864600954744, "eval_python_code_alpaca_runtime": 11.8655, "eval_python_code_alpaca_samples_per_second": 42.139, "eval_python_code_alpaca_steps_per_second": 0.084, "eval_python_code_alpaca_token_set_f1": 0.480203564277494, "eval_python_code_alpaca_token_set_f1_sem": 0.005615297179799518, "eval_python_code_alpaca_token_set_precision": 0.538813981123486, "eval_python_code_alpaca_token_set_recall": 0.45205983122696125, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 166250 }, { "epoch": 31.92, "eval_wikibio_accuracy": 0.32484375, "eval_wikibio_bleu_score": 6.0773489546388095, "eval_wikibio_bleu_score_sem": 0.2138823830121203, "eval_wikibio_emb_cos_sim": 0.7354307174682617, "eval_wikibio_emb_cos_sim_sem": 0.01016705068051306, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7122159004211426, "eval_wikibio_n_ngrams_match_1": 10.266, "eval_wikibio_n_ngrams_match_2": 3.498, "eval_wikibio_n_ngrams_match_3": 1.278, "eval_wikibio_num_pred_words": 36.418, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.94443486868525, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36146900323814657, "eval_wikibio_runtime": 28.6893, "eval_wikibio_samples_per_second": 17.428, "eval_wikibio_steps_per_second": 0.035, "eval_wikibio_token_set_f1": 0.322545845530306, "eval_wikibio_token_set_f1_sem": 0.005367924037990663, "eval_wikibio_token_set_precision": 0.333561766673221, "eval_wikibio_token_set_recall": 0.32955301613349075, "eval_wikibio_true_num_tokens": 61.1328125, "step": 166250 }, { "epoch": 31.92, "eval_nq_accuracy": 0.53090625, "eval_nq_bleu_score": 12.106554087395727, "eval_nq_bleu_score_sem": 0.4882764762626856, "eval_nq_emb_cos_sim": 0.8306553363800049, "eval_nq_emb_cos_sim_sem": 0.00783127628730636, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1556053161621094, "eval_nq_n_ngrams_match_1": 23.284, "eval_nq_n_ngrams_match_2": 8.658, "eval_nq_n_ngrams_match_3": 4.052, "eval_nq_num_pred_words": 48.89, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.633114361320478, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45088475509557785, "eval_nq_runtime": 27.9785, "eval_nq_samples_per_second": 17.871, "eval_nq_steps_per_second": 0.036, "eval_nq_token_set_f1": 0.4677196245079925, "eval_nq_token_set_f1_sem": 0.005095961423859508, "eval_nq_token_set_precision": 0.4271423837654825, "eval_nq_token_set_recall": 0.525988069901891, "eval_nq_true_num_tokens": 64.0, "step": 166250 }, { "epoch": 31.92, "learning_rate": 0.001, "loss": 2.5329, "step": 166260 }, { "epoch": 31.93, "learning_rate": 0.001, "loss": 2.53, "step": 166272 }, { "epoch": 31.93, "learning_rate": 0.001, "loss": 2.5256, "step": 166284 }, { "epoch": 31.93, "learning_rate": 0.001, "loss": 2.524, "step": 166296 }, { "epoch": 31.93, "learning_rate": 0.001, "loss": 2.5199, "step": 166308 }, { "epoch": 31.94, "learning_rate": 0.001, "loss": 2.5285, "step": 166320 }, { "epoch": 31.94, "learning_rate": 0.001, "loss": 2.5284, "step": 166332 }, { "epoch": 31.94, "learning_rate": 0.001, "loss": 2.523, "step": 166344 }, { "epoch": 31.94, "learning_rate": 0.001, "loss": 2.5277, "step": 166356 }, { "epoch": 31.94, "learning_rate": 0.001, "loss": 2.5319, "step": 166368 }, { "epoch": 31.95, "learning_rate": 0.001, "loss": 2.5381, "step": 166380 }, { "epoch": 31.95, "learning_rate": 0.001, "loss": 2.5333, "step": 166392 }, { "epoch": 31.95, "learning_rate": 0.001, "loss": 2.532, "step": 166404 }, { "epoch": 31.95, "learning_rate": 0.001, "loss": 2.5363, "step": 166416 }, { "epoch": 31.96, "learning_rate": 0.001, "loss": 2.5377, "step": 166428 }, { "epoch": 31.96, "learning_rate": 0.001, "loss": 2.5264, "step": 166440 }, { "epoch": 31.96, "learning_rate": 0.001, "loss": 2.5236, "step": 166452 }, { "epoch": 31.96, "learning_rate": 0.001, "loss": 2.5308, "step": 166464 }, { "epoch": 31.97, "learning_rate": 0.001, "loss": 2.5351, "step": 166476 }, { "epoch": 31.97, "learning_rate": 0.001, "loss": 2.5301, "step": 166488 }, { "epoch": 31.97, "learning_rate": 0.001, "loss": 2.5237, "step": 166500 }, { "epoch": 31.97, "learning_rate": 0.001, "loss": 2.5386, "step": 166512 }, { "epoch": 31.97, "learning_rate": 0.001, "loss": 2.5247, "step": 166524 }, { "epoch": 31.98, "learning_rate": 0.001, "loss": 2.5293, "step": 166536 }, { "epoch": 31.98, "learning_rate": 0.001, "loss": 2.5418, "step": 166548 }, { "epoch": 31.98, "learning_rate": 0.001, "loss": 2.5317, "step": 166560 }, { "epoch": 31.98, "learning_rate": 0.001, "loss": 2.5324, "step": 166572 }, { "epoch": 31.99, "learning_rate": 0.001, "loss": 2.5303, "step": 166584 }, { "epoch": 31.99, "learning_rate": 0.001, "loss": 2.5261, "step": 166596 }, { "epoch": 31.99, "learning_rate": 0.001, "loss": 2.5225, "step": 166608 }, { "epoch": 31.99, "learning_rate": 0.001, "loss": 2.5344, "step": 166620 }, { "epoch": 32.0, "learning_rate": 0.001, "loss": 2.5286, "step": 166632 }, { "epoch": 32.0, "learning_rate": 0.001, "loss": 2.5334, "step": 166644 }, { "epoch": 32.0, "learning_rate": 0.001, "loss": 2.5178, "step": 166656 }, { "epoch": 32.0, "learning_rate": 0.001, "loss": 2.5137, "step": 166668 }, { "epoch": 32.0, "learning_rate": 0.001, "loss": 2.5254, "step": 166680 }, { "epoch": 32.01, "learning_rate": 0.001, "loss": 2.5223, "step": 166692 }, { "epoch": 32.01, "learning_rate": 0.001, "loss": 2.5156, "step": 166704 }, { "epoch": 32.01, "learning_rate": 0.001, "loss": 2.5152, "step": 166716 }, { "epoch": 32.01, "learning_rate": 0.001, "loss": 2.5122, "step": 166728 }, { "epoch": 32.02, "learning_rate": 0.001, "loss": 2.5138, "step": 166740 }, { "epoch": 32.02, "learning_rate": 0.001, "loss": 2.5182, "step": 166752 }, { "epoch": 32.02, "learning_rate": 0.001, "loss": 2.5109, "step": 166764 }, { "epoch": 32.02, "learning_rate": 0.001, "loss": 2.5256, "step": 166776 }, { "epoch": 32.03, "learning_rate": 0.001, "loss": 2.5046, "step": 166788 }, { "epoch": 32.03, "learning_rate": 0.001, "loss": 2.516, "step": 166800 }, { "epoch": 32.03, "learning_rate": 0.001, "loss": 2.5106, "step": 166812 }, { "epoch": 32.03, "learning_rate": 0.001, "loss": 2.5163, "step": 166824 }, { "epoch": 32.03, "learning_rate": 0.001, "loss": 2.519, "step": 166836 }, { "epoch": 32.04, "learning_rate": 0.001, "loss": 2.5129, "step": 166848 }, { "epoch": 32.04, "learning_rate": 0.001, "loss": 2.5179, "step": 166860 }, { "epoch": 32.04, "learning_rate": 0.001, "loss": 2.5165, "step": 166872 }, { "epoch": 32.04, "eval_ag_news_accuracy": 0.32675, "eval_ag_news_bleu_score": 4.815627293562557, "eval_ag_news_bleu_score_sem": 0.16402280387455798, "eval_ag_news_emb_cos_sim": 0.8088845014572144, "eval_ag_news_emb_cos_sim_sem": 0.0069388740634603735, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.50780987739563, "eval_ag_news_n_ngrams_match_1": 13.994, "eval_ag_news_n_ngrams_match_2": 3.084, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.092, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.37509213766253, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3473461043947416, "eval_ag_news_runtime": 12.512, "eval_ag_news_samples_per_second": 39.962, "eval_ag_news_steps_per_second": 0.08, "eval_ag_news_token_set_f1": 0.34820317672812245, "eval_ag_news_token_set_f1_sem": 0.004580834913622929, "eval_ag_news_token_set_precision": 0.33548923470673075, "eval_ag_news_token_set_recall": 0.3783151195881536, "eval_ag_news_true_num_tokens": 56.09375, "step": 166875 }, { "epoch": 32.04, "eval_anthropic_toxic_prompts_accuracy": 0.1156875, "eval_anthropic_toxic_prompts_bleu_score": 3.155059918492553, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12618107355206754, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6594028472900391, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009416124231861509, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2299771308898926, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.148, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.922, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.712, "eval_anthropic_toxic_prompts_num_pred_words": 46.76, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.27907885431468, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21434134525971632, "eval_anthropic_toxic_prompts_runtime": 11.5179, "eval_anthropic_toxic_prompts_samples_per_second": 43.411, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.35164535975769423, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066668385835222015, "eval_anthropic_toxic_prompts_token_set_precision": 0.4329390426527947, "eval_anthropic_toxic_prompts_token_set_recall": 0.3215337314701285, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 166875 }, { "epoch": 32.04, "eval_arxiv_accuracy": 0.35075, "eval_arxiv_bleu_score": 4.3624678262222405, "eval_arxiv_bleu_score_sem": 0.12537337937640036, "eval_arxiv_emb_cos_sim": 0.7738143801689148, "eval_arxiv_emb_cos_sim_sem": 0.008658309407656458, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.365551233291626, "eval_arxiv_n_ngrams_match_1": 15.322, "eval_arxiv_n_ngrams_match_2": 2.996, "eval_arxiv_n_ngrams_match_3": 0.69, "eval_arxiv_num_pred_words": 40.72, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.949450802651754, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36366786647511495, "eval_arxiv_runtime": 28.815, "eval_arxiv_samples_per_second": 17.352, "eval_arxiv_steps_per_second": 0.035, "eval_arxiv_token_set_f1": 0.3565628150129613, "eval_arxiv_token_set_f1_sem": 0.004362686020897914, "eval_arxiv_token_set_precision": 0.30976948128980236, "eval_arxiv_token_set_recall": 0.43864209713035035, "eval_arxiv_true_num_tokens": 64.0, "step": 166875 }, { "epoch": 32.04, "eval_python_code_alpaca_accuracy": 0.1636875, "eval_python_code_alpaca_bleu_score": 4.95157793735963, "eval_python_code_alpaca_bleu_score_sem": 0.15942569667023843, "eval_python_code_alpaca_emb_cos_sim": 0.7661731243133545, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010553217887271586, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.812887668609619, "eval_python_code_alpaca_n_ngrams_match_1": 10.052, "eval_python_code_alpaca_n_ngrams_match_2": 3.18, "eval_python_code_alpaca_n_ngrams_match_3": 1.166, "eval_python_code_alpaca_num_pred_words": 44.59, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.657951476923675, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.336732598944943, "eval_python_code_alpaca_runtime": 26.5719, "eval_python_code_alpaca_samples_per_second": 18.817, "eval_python_code_alpaca_steps_per_second": 0.038, "eval_python_code_alpaca_token_set_f1": 0.4829207570270869, "eval_python_code_alpaca_token_set_f1_sem": 0.005589941155793038, "eval_python_code_alpaca_token_set_precision": 0.5525553475475216, "eval_python_code_alpaca_token_set_recall": 0.4492561548794006, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 166875 }, { "epoch": 32.04, "eval_wikibio_accuracy": 0.3248125, "eval_wikibio_bleu_score": 5.910099657655869, "eval_wikibio_bleu_score_sem": 0.208800568954724, "eval_wikibio_emb_cos_sim": 0.7427431344985962, "eval_wikibio_emb_cos_sim_sem": 0.009208849323607472, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7076754570007324, "eval_wikibio_n_ngrams_match_1": 10.084, "eval_wikibio_n_ngrams_match_2": 3.352, "eval_wikibio_n_ngrams_match_3": 1.194, "eval_wikibio_num_pred_words": 36.234, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.75895038833397, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3573101165393312, "eval_wikibio_runtime": 12.3768, "eval_wikibio_samples_per_second": 40.398, "eval_wikibio_steps_per_second": 0.081, "eval_wikibio_token_set_f1": 0.3206657889556288, "eval_wikibio_token_set_f1_sem": 0.0052431844915149205, "eval_wikibio_token_set_precision": 0.32849448153304733, "eval_wikibio_token_set_recall": 0.3282602416077145, "eval_wikibio_true_num_tokens": 61.1328125, "step": 166875 }, { "epoch": 32.04, "eval_nq_accuracy": 0.53203125, "eval_nq_bleu_score": 11.92770511794608, "eval_nq_bleu_score_sem": 0.49513546551515863, "eval_nq_emb_cos_sim": 0.8254275321960449, "eval_nq_emb_cos_sim_sem": 0.007631238124650545, "eval_nq_emb_top1_equal": 0.34375, "eval_nq_emb_top1_equal_sem": 0.04214578430296913, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.150421142578125, "eval_nq_n_ngrams_match_1": 23.146, "eval_nq_n_ngrams_match_2": 8.612, "eval_nq_n_ngrams_match_3": 3.982, "eval_nq_num_pred_words": 49.224, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.588474607992845, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.44980493575256, "eval_nq_runtime": 12.3261, "eval_nq_samples_per_second": 40.564, "eval_nq_steps_per_second": 0.081, "eval_nq_token_set_f1": 0.46537305773421, "eval_nq_token_set_f1_sem": 0.005133129392622436, "eval_nq_token_set_precision": 0.4241983766233702, "eval_nq_token_set_recall": 0.5235785693292351, "eval_nq_true_num_tokens": 64.0, "step": 166875 }, { "epoch": 32.04, "learning_rate": 0.001, "loss": 2.5168, "step": 166884 }, { "epoch": 32.05, "learning_rate": 0.001, "loss": 2.5203, "step": 166896 }, { "epoch": 32.05, "learning_rate": 0.001, "loss": 2.5286, "step": 166908 }, { "epoch": 32.05, "learning_rate": 0.001, "loss": 2.5206, "step": 166920 }, { "epoch": 32.05, "learning_rate": 0.001, "loss": 2.5143, "step": 166932 }, { "epoch": 32.06, "learning_rate": 0.001, "loss": 2.5213, "step": 166944 }, { "epoch": 32.06, "learning_rate": 0.001, "loss": 2.5112, "step": 166956 }, { "epoch": 32.06, "learning_rate": 0.001, "loss": 2.534, "step": 166968 }, { "epoch": 32.06, "learning_rate": 0.001, "loss": 2.5189, "step": 166980 }, { "epoch": 32.06, "learning_rate": 0.001, "loss": 2.5177, "step": 166992 }, { "epoch": 32.07, "learning_rate": 0.001, "loss": 2.5243, "step": 167004 }, { "epoch": 32.07, "learning_rate": 0.001, "loss": 2.518, "step": 167016 }, { "epoch": 32.07, "learning_rate": 0.001, "loss": 2.5188, "step": 167028 }, { "epoch": 32.07, "learning_rate": 0.001, "loss": 2.5123, "step": 167040 }, { "epoch": 32.08, "learning_rate": 0.001, "loss": 2.5197, "step": 167052 }, { "epoch": 32.08, "learning_rate": 0.001, "loss": 2.5099, "step": 167064 }, { "epoch": 32.08, "learning_rate": 0.001, "loss": 2.5141, "step": 167076 }, { "epoch": 32.08, "learning_rate": 0.001, "loss": 2.5148, "step": 167088 }, { "epoch": 32.09, "learning_rate": 0.001, "loss": 2.5122, "step": 167100 }, { "epoch": 32.09, "learning_rate": 0.001, "loss": 2.517, "step": 167112 }, { "epoch": 32.09, "learning_rate": 0.001, "loss": 2.5168, "step": 167124 }, { "epoch": 32.09, "learning_rate": 0.001, "loss": 2.5107, "step": 167136 }, { "epoch": 32.09, "learning_rate": 0.001, "loss": 2.511, "step": 167148 }, { "epoch": 32.1, "learning_rate": 0.001, "loss": 2.5152, "step": 167160 }, { "epoch": 32.1, "learning_rate": 0.001, "loss": 2.5199, "step": 167172 }, { "epoch": 32.1, "learning_rate": 0.001, "loss": 2.5108, "step": 167184 }, { "epoch": 32.1, "learning_rate": 0.001, "loss": 2.5042, "step": 167196 }, { "epoch": 32.11, "learning_rate": 0.001, "loss": 2.514, "step": 167208 }, { "epoch": 32.11, "learning_rate": 0.001, "loss": 2.5075, "step": 167220 }, { "epoch": 32.11, "learning_rate": 0.001, "loss": 2.5249, "step": 167232 }, { "epoch": 32.11, "learning_rate": 0.001, "loss": 2.5129, "step": 167244 }, { "epoch": 32.12, "learning_rate": 0.001, "loss": 2.5235, "step": 167256 }, { "epoch": 32.12, "learning_rate": 0.001, "loss": 2.5207, "step": 167268 }, { "epoch": 32.12, "learning_rate": 0.001, "loss": 2.5197, "step": 167280 }, { "epoch": 32.12, "learning_rate": 0.001, "loss": 2.5183, "step": 167292 }, { "epoch": 32.12, "learning_rate": 0.001, "loss": 2.5174, "step": 167304 }, { "epoch": 32.13, "learning_rate": 0.001, "loss": 2.5191, "step": 167316 }, { "epoch": 32.13, "learning_rate": 0.001, "loss": 2.5203, "step": 167328 }, { "epoch": 32.13, "learning_rate": 0.001, "loss": 2.5141, "step": 167340 }, { "epoch": 32.13, "learning_rate": 0.001, "loss": 2.5194, "step": 167352 }, { "epoch": 32.14, "learning_rate": 0.001, "loss": 2.5129, "step": 167364 }, { "epoch": 32.14, "learning_rate": 0.001, "loss": 2.5103, "step": 167376 }, { "epoch": 32.14, "learning_rate": 0.001, "loss": 2.525, "step": 167388 }, { "epoch": 32.14, "learning_rate": 0.001, "loss": 2.5187, "step": 167400 }, { "epoch": 32.15, "learning_rate": 0.001, "loss": 2.5197, "step": 167412 }, { "epoch": 32.15, "learning_rate": 0.001, "loss": 2.5216, "step": 167424 }, { "epoch": 32.15, "learning_rate": 0.001, "loss": 2.5222, "step": 167436 }, { "epoch": 32.15, "learning_rate": 0.001, "loss": 2.5174, "step": 167448 }, { "epoch": 32.15, "learning_rate": 0.001, "loss": 2.5191, "step": 167460 }, { "epoch": 32.16, "learning_rate": 0.001, "loss": 2.5175, "step": 167472 }, { "epoch": 32.16, "learning_rate": 0.001, "loss": 2.5105, "step": 167484 }, { "epoch": 32.16, "learning_rate": 0.001, "loss": 2.5155, "step": 167496 }, { "epoch": 32.16, "eval_ag_news_accuracy": 0.326625, "eval_ag_news_bleu_score": 4.92835322917289, "eval_ag_news_bleu_score_sem": 0.1621984539919837, "eval_ag_news_emb_cos_sim": 0.80971759557724, "eval_ag_news_emb_cos_sim_sem": 0.006937972516028945, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5029375553131104, "eval_ag_news_n_ngrams_match_1": 14.248, "eval_ag_news_n_ngrams_match_2": 3.236, "eval_ag_news_n_ngrams_match_3": 0.942, "eval_ag_news_num_pred_words": 46.3, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.21287345099474, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35588709158423726, "eval_ag_news_runtime": 10.418, "eval_ag_news_samples_per_second": 47.994, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3550767604880276, "eval_ag_news_token_set_f1_sem": 0.004443144657969766, "eval_ag_news_token_set_precision": 0.3416093170153735, "eval_ag_news_token_set_recall": 0.3849158723004794, "eval_ag_news_true_num_tokens": 56.09375, "step": 167500 }, { "epoch": 32.16, "eval_anthropic_toxic_prompts_accuracy": 0.11590625, "eval_anthropic_toxic_prompts_bleu_score": 3.1738114586671893, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12247148216827992, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6785756349563599, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008992992263325403, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.220651388168335, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.14, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.904, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, "eval_anthropic_toxic_prompts_num_pred_words": 46.118, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.04442851365031, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21202512794835326, "eval_anthropic_toxic_prompts_runtime": 20.5751, "eval_anthropic_toxic_prompts_samples_per_second": 24.301, "eval_anthropic_toxic_prompts_steps_per_second": 0.049, "eval_anthropic_toxic_prompts_token_set_f1": 0.3519881583116291, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006428653429464225, "eval_anthropic_toxic_prompts_token_set_precision": 0.43315367549246775, "eval_anthropic_toxic_prompts_token_set_recall": 0.3211777126298439, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 167500 }, { "epoch": 32.16, "eval_arxiv_accuracy": 0.35021875, "eval_arxiv_bleu_score": 4.461599688737477, "eval_arxiv_bleu_score_sem": 0.11946554382757948, "eval_arxiv_emb_cos_sim": 0.7828381061553955, "eval_arxiv_emb_cos_sim_sem": 0.005931621677422201, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3561220169067383, "eval_arxiv_n_ngrams_match_1": 15.302, "eval_arxiv_n_ngrams_match_2": 3.006, "eval_arxiv_n_ngrams_match_3": 0.668, "eval_arxiv_num_pred_words": 40.338, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.677763080947802, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3712645859172659, "eval_arxiv_runtime": 16.8477, "eval_arxiv_samples_per_second": 29.678, "eval_arxiv_steps_per_second": 0.059, "eval_arxiv_token_set_f1": 0.3614134705511333, "eval_arxiv_token_set_f1_sem": 0.0038228164116966726, "eval_arxiv_token_set_precision": 0.31328299181723923, "eval_arxiv_token_set_recall": 0.4412616122048446, "eval_arxiv_true_num_tokens": 64.0, "step": 167500 }, { "epoch": 32.16, "eval_python_code_alpaca_accuracy": 0.16278125, "eval_python_code_alpaca_bleu_score": 4.8720891485655375, "eval_python_code_alpaca_bleu_score_sem": 0.15028522067517527, "eval_python_code_alpaca_emb_cos_sim": 0.7576694488525391, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007696727741791395, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8684589862823486, "eval_python_code_alpaca_n_ngrams_match_1": 9.914, "eval_python_code_alpaca_n_ngrams_match_2": 2.99, "eval_python_code_alpaca_n_ngrams_match_3": 0.99, "eval_python_code_alpaca_num_pred_words": 41.864, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.60986024361883, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34480892010778996, "eval_python_code_alpaca_runtime": 15.1031, "eval_python_code_alpaca_samples_per_second": 33.106, "eval_python_code_alpaca_steps_per_second": 0.066, "eval_python_code_alpaca_token_set_f1": 0.4873847453916951, "eval_python_code_alpaca_token_set_f1_sem": 0.005521386656550456, "eval_python_code_alpaca_token_set_precision": 0.5426401735832815, "eval_python_code_alpaca_token_set_recall": 0.4629082375473959, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 167500 }, { "epoch": 32.16, "eval_wikibio_accuracy": 0.32675, "eval_wikibio_bleu_score": 6.256609904913149, "eval_wikibio_bleu_score_sem": 0.21839463405041792, "eval_wikibio_emb_cos_sim": 0.7512813806533813, "eval_wikibio_emb_cos_sim_sem": 0.008049940722825959, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.665400505065918, "eval_wikibio_n_ngrams_match_1": 10.47, "eval_wikibio_n_ngrams_match_2": 3.518, "eval_wikibio_n_ngrams_match_3": 1.284, "eval_wikibio_num_pred_words": 36.594, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.07178147629232, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3721713376157594, "eval_wikibio_runtime": 12.7941, "eval_wikibio_samples_per_second": 39.08, "eval_wikibio_steps_per_second": 0.078, "eval_wikibio_token_set_f1": 0.33015638171528733, "eval_wikibio_token_set_f1_sem": 0.005078328166357356, "eval_wikibio_token_set_precision": 0.3405814091357953, "eval_wikibio_token_set_recall": 0.33360621096323245, "eval_wikibio_true_num_tokens": 61.1328125, "step": 167500 }, { "epoch": 32.16, "eval_nq_accuracy": 0.53203125, "eval_nq_bleu_score": 12.12346825477318, "eval_nq_bleu_score_sem": 0.4960565632802446, "eval_nq_emb_cos_sim": 0.8391648530960083, "eval_nq_emb_cos_sim_sem": 0.006984325488312325, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.147446393966675, "eval_nq_n_ngrams_match_1": 23.406, "eval_nq_n_ngrams_match_2": 8.64, "eval_nq_n_ngrams_match_3": 4.046, "eval_nq_num_pred_words": 48.85, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.562964017686994, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45469851661095806, "eval_nq_runtime": 10.427, "eval_nq_samples_per_second": 47.952, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4684891119014707, "eval_nq_token_set_f1_sem": 0.004811217973134483, "eval_nq_token_set_precision": 0.425919329426332, "eval_nq_token_set_recall": 0.5278613786993426, "eval_nq_true_num_tokens": 64.0, "step": 167500 }, { "epoch": 32.16, "learning_rate": 0.001, "loss": 2.5214, "step": 167508 }, { "epoch": 32.17, "learning_rate": 0.001, "loss": 2.5172, "step": 167520 }, { "epoch": 32.17, "learning_rate": 0.001, "loss": 2.5193, "step": 167532 }, { "epoch": 32.17, "learning_rate": 0.001, "loss": 2.5117, "step": 167544 }, { "epoch": 32.17, "learning_rate": 0.001, "loss": 2.5151, "step": 167556 }, { "epoch": 32.18, "learning_rate": 0.001, "loss": 2.5224, "step": 167568 }, { "epoch": 32.18, "learning_rate": 0.001, "loss": 2.5098, "step": 167580 }, { "epoch": 32.18, "learning_rate": 0.001, "loss": 2.5305, "step": 167592 }, { "epoch": 32.18, "learning_rate": 0.001, "loss": 2.5197, "step": 167604 }, { "epoch": 32.18, "learning_rate": 0.001, "loss": 2.5131, "step": 167616 }, { "epoch": 32.19, "learning_rate": 0.001, "loss": 2.5236, "step": 167628 }, { "epoch": 32.19, "learning_rate": 0.001, "loss": 2.5216, "step": 167640 }, { "epoch": 32.19, "learning_rate": 0.001, "loss": 2.5279, "step": 167652 }, { "epoch": 32.19, "learning_rate": 0.001, "loss": 2.5244, "step": 167664 }, { "epoch": 32.2, "learning_rate": 0.001, "loss": 2.5135, "step": 167676 }, { "epoch": 32.2, "learning_rate": 0.001, "loss": 2.5197, "step": 167688 }, { "epoch": 32.2, "learning_rate": 0.001, "loss": 2.5202, "step": 167700 }, { "epoch": 32.2, "learning_rate": 0.001, "loss": 2.5236, "step": 167712 }, { "epoch": 32.21, "learning_rate": 0.001, "loss": 2.523, "step": 167724 }, { "epoch": 32.21, "learning_rate": 0.001, "loss": 2.5274, "step": 167736 }, { "epoch": 32.21, "learning_rate": 0.001, "loss": 2.5098, "step": 167748 }, { "epoch": 32.21, "learning_rate": 0.001, "loss": 2.5222, "step": 167760 }, { "epoch": 32.21, "learning_rate": 0.001, "loss": 2.5107, "step": 167772 }, { "epoch": 32.22, "learning_rate": 0.001, "loss": 2.5234, "step": 167784 }, { "epoch": 32.22, "learning_rate": 0.001, "loss": 2.5282, "step": 167796 }, { "epoch": 32.22, "learning_rate": 0.001, "loss": 2.5236, "step": 167808 }, { "epoch": 32.22, "learning_rate": 0.001, "loss": 2.5202, "step": 167820 }, { "epoch": 32.23, "learning_rate": 0.001, "loss": 2.5191, "step": 167832 }, { "epoch": 32.23, "learning_rate": 0.001, "loss": 2.5197, "step": 167844 }, { "epoch": 32.23, "learning_rate": 0.001, "loss": 2.5201, "step": 167856 }, { "epoch": 32.23, "learning_rate": 0.001, "loss": 2.5294, "step": 167868 }, { "epoch": 32.24, "learning_rate": 0.001, "loss": 2.5246, "step": 167880 }, { "epoch": 32.24, "learning_rate": 0.001, "loss": 2.5235, "step": 167892 }, { "epoch": 32.24, "learning_rate": 0.001, "loss": 2.517, "step": 167904 }, { "epoch": 32.24, "learning_rate": 0.001, "loss": 2.515, "step": 167916 }, { "epoch": 32.24, "learning_rate": 0.001, "loss": 2.5298, "step": 167928 }, { "epoch": 32.25, "learning_rate": 0.001, "loss": 2.523, "step": 167940 }, { "epoch": 32.25, "learning_rate": 0.001, "loss": 2.5173, "step": 167952 }, { "epoch": 32.25, "learning_rate": 0.001, "loss": 2.5134, "step": 167964 }, { "epoch": 32.25, "learning_rate": 0.001, "loss": 2.5227, "step": 167976 }, { "epoch": 32.26, "learning_rate": 0.001, "loss": 2.5215, "step": 167988 }, { "epoch": 32.26, "learning_rate": 0.001, "loss": 2.5165, "step": 168000 }, { "epoch": 32.26, "learning_rate": 0.001, "loss": 2.5153, "step": 168012 }, { "epoch": 32.26, "learning_rate": 0.001, "loss": 2.5238, "step": 168024 }, { "epoch": 32.26, "learning_rate": 0.001, "loss": 2.5244, "step": 168036 }, { "epoch": 32.27, "learning_rate": 0.001, "loss": 2.5209, "step": 168048 }, { "epoch": 32.27, "learning_rate": 0.001, "loss": 2.524, "step": 168060 }, { "epoch": 32.27, "learning_rate": 0.001, "loss": 2.5261, "step": 168072 }, { "epoch": 32.27, "learning_rate": 0.001, "loss": 2.5205, "step": 168084 }, { "epoch": 32.28, "learning_rate": 0.001, "loss": 2.5183, "step": 168096 }, { "epoch": 32.28, "learning_rate": 0.001, "loss": 2.5234, "step": 168108 }, { "epoch": 32.28, "learning_rate": 0.001, "loss": 2.5194, "step": 168120 }, { "epoch": 32.28, "eval_ag_news_accuracy": 0.32609375, "eval_ag_news_bleu_score": 5.0874406981036895, "eval_ag_news_bleu_score_sem": 0.15433562718436986, "eval_ag_news_emb_cos_sim": 0.8175662755966187, "eval_ag_news_emb_cos_sim_sem": 0.007330140380344211, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5043346881866455, "eval_ag_news_n_ngrams_match_1": 14.428, "eval_ag_news_n_ngrams_match_2": 3.296, "eval_ag_news_n_ngrams_match_3": 0.978, "eval_ag_news_num_pred_words": 46.612, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.25930867885597, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3561177109035263, "eval_ag_news_runtime": 11.7183, "eval_ag_news_samples_per_second": 42.668, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.3555390501855418, "eval_ag_news_token_set_f1_sem": 0.004503538327020847, "eval_ag_news_token_set_precision": 0.3405395321643624, "eval_ag_news_token_set_recall": 0.385143822277539, "eval_ag_news_true_num_tokens": 56.09375, "step": 168125 }, { "epoch": 32.28, "eval_anthropic_toxic_prompts_accuracy": 0.1149375, "eval_anthropic_toxic_prompts_bleu_score": 3.2468605569546534, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11993232135348983, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6756578683853149, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008404350279231526, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2471721172332764, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.342, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.736, "eval_anthropic_toxic_prompts_num_pred_words": 46.66, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.71751088391135, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21966116414130563, "eval_anthropic_toxic_prompts_runtime": 12.076, "eval_anthropic_toxic_prompts_samples_per_second": 41.404, "eval_anthropic_toxic_prompts_steps_per_second": 0.083, "eval_anthropic_toxic_prompts_token_set_f1": 0.3659020303469726, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006501680400489734, "eval_anthropic_toxic_prompts_token_set_precision": 0.44587466210498783, "eval_anthropic_toxic_prompts_token_set_recall": 0.333862705076242, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 168125 }, { "epoch": 32.28, "eval_arxiv_accuracy": 0.35103125, "eval_arxiv_bleu_score": 4.466254601991632, "eval_arxiv_bleu_score_sem": 0.11707830527157961, "eval_arxiv_emb_cos_sim": 0.7804619073867798, "eval_arxiv_emb_cos_sim_sem": 0.006360185321060147, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3633198738098145, "eval_arxiv_n_ngrams_match_1": 15.728, "eval_arxiv_n_ngrams_match_2": 3.062, "eval_arxiv_n_ngrams_match_3": 0.682, "eval_arxiv_num_pred_words": 41.0, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.884926186688986, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3758620737668217, "eval_arxiv_runtime": 14.2801, "eval_arxiv_samples_per_second": 35.014, "eval_arxiv_steps_per_second": 0.07, "eval_arxiv_token_set_f1": 0.36698154951932493, "eval_arxiv_token_set_f1_sem": 0.0039975974750848025, "eval_arxiv_token_set_precision": 0.31998378835337576, "eval_arxiv_token_set_recall": 0.4445848631182827, "eval_arxiv_true_num_tokens": 64.0, "step": 168125 }, { "epoch": 32.28, "eval_python_code_alpaca_accuracy": 0.16246875, "eval_python_code_alpaca_bleu_score": 4.890429466066363, "eval_python_code_alpaca_bleu_score_sem": 0.15518511776298086, "eval_python_code_alpaca_emb_cos_sim": 0.7621194124221802, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009740857797103852, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.865048885345459, "eval_python_code_alpaca_n_ngrams_match_1": 10.096, "eval_python_code_alpaca_n_ngrams_match_2": 3.076, "eval_python_code_alpaca_n_ngrams_match_3": 1.072, "eval_python_code_alpaca_num_pred_words": 43.386, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.54991111708448, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34246083271542194, "eval_python_code_alpaca_runtime": 12.7255, "eval_python_code_alpaca_samples_per_second": 39.291, "eval_python_code_alpaca_steps_per_second": 0.079, "eval_python_code_alpaca_token_set_f1": 0.49057156671525015, "eval_python_code_alpaca_token_set_f1_sem": 0.005661150965871911, "eval_python_code_alpaca_token_set_precision": 0.5514028807233765, "eval_python_code_alpaca_token_set_recall": 0.4654982012385866, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 168125 }, { "epoch": 32.28, "eval_wikibio_accuracy": 0.3263125, "eval_wikibio_bleu_score": 6.191199690493633, "eval_wikibio_bleu_score_sem": 0.2117647776192703, "eval_wikibio_emb_cos_sim": 0.7437809109687805, "eval_wikibio_emb_cos_sim_sem": 0.009498421221935094, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.688481330871582, "eval_wikibio_n_ngrams_match_1": 10.192, "eval_wikibio_n_ngrams_match_2": 3.484, "eval_wikibio_n_ngrams_match_3": 1.33, "eval_wikibio_num_pred_words": 36.148, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.984078239927506, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3578335979464245, "eval_wikibio_runtime": 14.4519, "eval_wikibio_samples_per_second": 34.597, "eval_wikibio_steps_per_second": 0.069, "eval_wikibio_token_set_f1": 0.3218182986244908, "eval_wikibio_token_set_f1_sem": 0.005390369220073895, "eval_wikibio_token_set_precision": 0.33068107982948086, "eval_wikibio_token_set_recall": 0.32825675510883745, "eval_wikibio_true_num_tokens": 61.1328125, "step": 168125 }, { "epoch": 32.28, "eval_nq_accuracy": 0.53190625, "eval_nq_bleu_score": 12.000788674688687, "eval_nq_bleu_score_sem": 0.4910340221504707, "eval_nq_emb_cos_sim": 0.8362958431243896, "eval_nq_emb_cos_sim_sem": 0.007697480458164019, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.150165319442749, "eval_nq_n_ngrams_match_1": 23.484, "eval_nq_n_ngrams_match_2": 8.692, "eval_nq_n_ngrams_match_3": 3.99, "eval_nq_num_pred_words": 49.068, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.586277758504973, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45508165097849884, "eval_nq_runtime": 15.1578, "eval_nq_samples_per_second": 32.986, "eval_nq_steps_per_second": 0.066, "eval_nq_token_set_f1": 0.4681998475056242, "eval_nq_token_set_f1_sem": 0.004910346806228855, "eval_nq_token_set_precision": 0.426228832282288, "eval_nq_token_set_recall": 0.527553492732729, "eval_nq_true_num_tokens": 64.0, "step": 168125 }, { "epoch": 32.28, "learning_rate": 0.001, "loss": 2.5346, "step": 168132 }, { "epoch": 32.29, "learning_rate": 0.001, "loss": 2.5193, "step": 168144 }, { "epoch": 32.29, "learning_rate": 0.001, "loss": 2.5216, "step": 168156 }, { "epoch": 32.29, "learning_rate": 0.001, "loss": 2.5185, "step": 168168 }, { "epoch": 32.29, "learning_rate": 0.001, "loss": 2.5293, "step": 168180 }, { "epoch": 32.29, "learning_rate": 0.001, "loss": 2.5265, "step": 168192 }, { "epoch": 32.3, "learning_rate": 0.001, "loss": 2.5152, "step": 168204 }, { "epoch": 32.3, "learning_rate": 0.001, "loss": 2.517, "step": 168216 }, { "epoch": 32.3, "learning_rate": 0.001, "loss": 2.5182, "step": 168228 }, { "epoch": 32.3, "learning_rate": 0.001, "loss": 2.515, "step": 168240 }, { "epoch": 32.31, "learning_rate": 0.001, "loss": 2.5267, "step": 168252 }, { "epoch": 32.31, "learning_rate": 0.001, "loss": 2.5207, "step": 168264 }, { "epoch": 32.31, "learning_rate": 0.001, "loss": 2.5227, "step": 168276 }, { "epoch": 32.31, "learning_rate": 0.001, "loss": 2.5194, "step": 168288 }, { "epoch": 32.32, "learning_rate": 0.001, "loss": 2.5204, "step": 168300 }, { "epoch": 32.32, "learning_rate": 0.001, "loss": 2.5195, "step": 168312 }, { "epoch": 32.32, "learning_rate": 0.001, "loss": 2.5239, "step": 168324 }, { "epoch": 32.32, "learning_rate": 0.001, "loss": 2.5248, "step": 168336 }, { "epoch": 32.32, "learning_rate": 0.001, "loss": 2.5152, "step": 168348 }, { "epoch": 32.33, "learning_rate": 0.001, "loss": 2.5148, "step": 168360 }, { "epoch": 32.33, "learning_rate": 0.001, "loss": 2.5131, "step": 168372 }, { "epoch": 32.33, "learning_rate": 0.001, "loss": 2.5271, "step": 168384 }, { "epoch": 32.33, "learning_rate": 0.001, "loss": 2.5222, "step": 168396 }, { "epoch": 32.34, "learning_rate": 0.001, "loss": 2.5216, "step": 168408 }, { "epoch": 32.34, "learning_rate": 0.001, "loss": 2.5137, "step": 168420 }, { "epoch": 32.34, "learning_rate": 0.001, "loss": 2.5175, "step": 168432 }, { "epoch": 32.34, "learning_rate": 0.001, "loss": 2.5189, "step": 168444 }, { "epoch": 32.35, "learning_rate": 0.001, "loss": 2.5249, "step": 168456 }, { "epoch": 32.35, "learning_rate": 0.001, "loss": 2.5233, "step": 168468 }, { "epoch": 32.35, "learning_rate": 0.001, "loss": 2.5191, "step": 168480 }, { "epoch": 32.35, "learning_rate": 0.001, "loss": 2.5176, "step": 168492 }, { "epoch": 32.35, "learning_rate": 0.001, "loss": 2.5233, "step": 168504 }, { "epoch": 32.36, "learning_rate": 0.001, "loss": 2.5231, "step": 168516 }, { "epoch": 32.36, "learning_rate": 0.001, "loss": 2.5231, "step": 168528 }, { "epoch": 32.36, "learning_rate": 0.001, "loss": 2.5129, "step": 168540 }, { "epoch": 32.36, "learning_rate": 0.001, "loss": 2.5095, "step": 168552 }, { "epoch": 32.37, "learning_rate": 0.001, "loss": 2.5191, "step": 168564 }, { "epoch": 32.37, "learning_rate": 0.001, "loss": 2.5234, "step": 168576 }, { "epoch": 32.37, "learning_rate": 0.001, "loss": 2.528, "step": 168588 }, { "epoch": 32.37, "learning_rate": 0.001, "loss": 2.5212, "step": 168600 }, { "epoch": 32.38, "learning_rate": 0.001, "loss": 2.5272, "step": 168612 }, { "epoch": 32.38, "learning_rate": 0.001, "loss": 2.5173, "step": 168624 }, { "epoch": 32.38, "learning_rate": 0.001, "loss": 2.5171, "step": 168636 }, { "epoch": 32.38, "learning_rate": 0.001, "loss": 2.525, "step": 168648 }, { "epoch": 32.38, "learning_rate": 0.001, "loss": 2.5211, "step": 168660 }, { "epoch": 32.39, "learning_rate": 0.001, "loss": 2.5224, "step": 168672 }, { "epoch": 32.39, "learning_rate": 0.001, "loss": 2.5113, "step": 168684 }, { "epoch": 32.39, "learning_rate": 0.001, "loss": 2.5169, "step": 168696 }, { "epoch": 32.39, "learning_rate": 0.001, "loss": 2.5177, "step": 168708 }, { "epoch": 32.4, "learning_rate": 0.001, "loss": 2.521, "step": 168720 }, { "epoch": 32.4, "learning_rate": 0.001, "loss": 2.5229, "step": 168732 }, { "epoch": 32.4, "learning_rate": 0.001, "loss": 2.5198, "step": 168744 }, { "epoch": 32.4, "eval_ag_news_accuracy": 0.32546875, "eval_ag_news_bleu_score": 4.869080691834786, "eval_ag_news_bleu_score_sem": 0.15179364946824286, "eval_ag_news_emb_cos_sim": 0.8161188364028931, "eval_ag_news_emb_cos_sim_sem": 0.007580130461019531, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5068631172180176, "eval_ag_news_n_ngrams_match_1": 14.282, "eval_ag_news_n_ngrams_match_2": 3.158, "eval_ag_news_n_ngrams_match_3": 0.868, "eval_ag_news_num_pred_words": 46.71, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.34350888274561, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35231991460636314, "eval_ag_news_runtime": 12.5948, "eval_ag_news_samples_per_second": 39.699, "eval_ag_news_steps_per_second": 0.079, "eval_ag_news_token_set_f1": 0.3534651767529745, "eval_ag_news_token_set_f1_sem": 0.004549650043853658, "eval_ag_news_token_set_precision": 0.3410032798945537, "eval_ag_news_token_set_recall": 0.3826163708016629, "eval_ag_news_true_num_tokens": 56.09375, "step": 168750 }, { "epoch": 32.4, "eval_anthropic_toxic_prompts_accuracy": 0.1155625, "eval_anthropic_toxic_prompts_bleu_score": 3.1825666688922984, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1179177905676714, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6738834381103516, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00948207351306893, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2254481315612793, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.284, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.978, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.762, "eval_anthropic_toxic_prompts_num_pred_words": 47.804, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.164848792348117, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21607119492143562, "eval_anthropic_toxic_prompts_runtime": 13.604, "eval_anthropic_toxic_prompts_samples_per_second": 36.754, "eval_anthropic_toxic_prompts_steps_per_second": 0.074, "eval_anthropic_toxic_prompts_token_set_f1": 0.356748217188059, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006625345638714375, "eval_anthropic_toxic_prompts_token_set_precision": 0.4422180595472049, "eval_anthropic_toxic_prompts_token_set_recall": 0.3252196084496033, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 168750 }, { "epoch": 32.4, "eval_arxiv_accuracy": 0.349625, "eval_arxiv_bleu_score": 4.421865682590429, "eval_arxiv_bleu_score_sem": 0.12826819702096595, "eval_arxiv_emb_cos_sim": 0.7698716521263123, "eval_arxiv_emb_cos_sim_sem": 0.007192445309170104, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.365234613418579, "eval_arxiv_n_ngrams_match_1": 15.262, "eval_arxiv_n_ngrams_match_2": 2.952, "eval_arxiv_n_ngrams_match_3": 0.678, "eval_arxiv_num_pred_words": 40.434, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.94028628212506, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3666530889013224, "eval_arxiv_runtime": 12.2859, "eval_arxiv_samples_per_second": 40.697, "eval_arxiv_steps_per_second": 0.081, "eval_arxiv_token_set_f1": 0.358644789730894, "eval_arxiv_token_set_f1_sem": 0.004076908121318583, "eval_arxiv_token_set_precision": 0.3105454819226174, "eval_arxiv_token_set_recall": 0.4424866302835179, "eval_arxiv_true_num_tokens": 64.0, "step": 168750 }, { "epoch": 32.4, "eval_python_code_alpaca_accuracy": 0.16390625, "eval_python_code_alpaca_bleu_score": 4.9199026786020195, "eval_python_code_alpaca_bleu_score_sem": 0.15989313604744668, "eval_python_code_alpaca_emb_cos_sim": 0.7597689628601074, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007768276626950193, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.850010395050049, "eval_python_code_alpaca_n_ngrams_match_1": 9.972, "eval_python_code_alpaca_n_ngrams_match_2": 3.01, "eval_python_code_alpaca_n_ngrams_match_3": 1.096, "eval_python_code_alpaca_num_pred_words": 43.196, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.28796154885914, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3386660346644988, "eval_python_code_alpaca_runtime": 12.7231, "eval_python_code_alpaca_samples_per_second": 39.298, "eval_python_code_alpaca_steps_per_second": 0.079, "eval_python_code_alpaca_token_set_f1": 0.4828110185435105, "eval_python_code_alpaca_token_set_f1_sem": 0.00540864122200843, "eval_python_code_alpaca_token_set_precision": 0.5437709506675122, "eval_python_code_alpaca_token_set_recall": 0.4564491308932614, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 168750 }, { "epoch": 32.4, "eval_wikibio_accuracy": 0.3256875, "eval_wikibio_bleu_score": 6.000436389695106, "eval_wikibio_bleu_score_sem": 0.2145718380730183, "eval_wikibio_emb_cos_sim": 0.7437461614608765, "eval_wikibio_emb_cos_sim_sem": 0.008876078881189707, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6984331607818604, "eval_wikibio_n_ngrams_match_1": 10.09, "eval_wikibio_n_ngrams_match_2": 3.39, "eval_wikibio_n_ngrams_match_3": 1.228, "eval_wikibio_num_pred_words": 36.44, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.3839795601731, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3592824132988366, "eval_wikibio_runtime": 12.7274, "eval_wikibio_samples_per_second": 39.285, "eval_wikibio_steps_per_second": 0.079, "eval_wikibio_token_set_f1": 0.3206266138727259, "eval_wikibio_token_set_f1_sem": 0.005494052811641807, "eval_wikibio_token_set_precision": 0.328254799149372, "eval_wikibio_token_set_recall": 0.3300923242071554, "eval_wikibio_true_num_tokens": 61.1328125, "step": 168750 }, { "epoch": 32.4, "eval_nq_accuracy": 0.532125, "eval_nq_bleu_score": 11.94649739394824, "eval_nq_bleu_score_sem": 0.499829132336281, "eval_nq_emb_cos_sim": 0.833651065826416, "eval_nq_emb_cos_sim_sem": 0.007120498004583976, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.151710271835327, "eval_nq_n_ngrams_match_1": 23.33, "eval_nq_n_ngrams_match_2": 8.544, "eval_nq_n_ngrams_match_3": 3.994, "eval_nq_num_pred_words": 49.332, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.599553401348793, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4496376778436102, "eval_nq_runtime": 12.6782, "eval_nq_samples_per_second": 39.438, "eval_nq_steps_per_second": 0.079, "eval_nq_token_set_f1": 0.4658140135404253, "eval_nq_token_set_f1_sem": 0.004974203972118167, "eval_nq_token_set_precision": 0.4252537495430019, "eval_nq_token_set_recall": 0.5216959949925327, "eval_nq_true_num_tokens": 64.0, "step": 168750 }, { "epoch": 32.4, "learning_rate": 0.001, "loss": 2.5195, "step": 168756 }, { "epoch": 32.41, "learning_rate": 0.001, "loss": 2.5243, "step": 168768 }, { "epoch": 32.41, "learning_rate": 0.001, "loss": 2.5158, "step": 168780 }, { "epoch": 32.41, "learning_rate": 0.001, "loss": 2.5129, "step": 168792 }, { "epoch": 32.41, "learning_rate": 0.001, "loss": 2.5319, "step": 168804 }, { "epoch": 32.41, "learning_rate": 0.001, "loss": 2.5276, "step": 168816 }, { "epoch": 32.42, "learning_rate": 0.001, "loss": 2.5146, "step": 168828 }, { "epoch": 32.42, "learning_rate": 0.001, "loss": 2.5214, "step": 168840 }, { "epoch": 32.42, "learning_rate": 0.001, "loss": 2.5075, "step": 168852 }, { "epoch": 32.42, "learning_rate": 0.001, "loss": 2.518, "step": 168864 }, { "epoch": 32.43, "learning_rate": 0.001, "loss": 2.5154, "step": 168876 }, { "epoch": 32.43, "learning_rate": 0.001, "loss": 2.5196, "step": 168888 }, { "epoch": 32.43, "learning_rate": 0.001, "loss": 2.5263, "step": 168900 }, { "epoch": 32.43, "learning_rate": 0.001, "loss": 2.5215, "step": 168912 }, { "epoch": 32.44, "learning_rate": 0.001, "loss": 2.5166, "step": 168924 }, { "epoch": 32.44, "learning_rate": 0.001, "loss": 2.5183, "step": 168936 }, { "epoch": 32.44, "learning_rate": 0.001, "loss": 2.517, "step": 168948 }, { "epoch": 32.44, "learning_rate": 0.001, "loss": 2.5277, "step": 168960 }, { "epoch": 32.44, "learning_rate": 0.001, "loss": 2.5178, "step": 168972 }, { "epoch": 32.45, "learning_rate": 0.001, "loss": 2.5279, "step": 168984 }, { "epoch": 32.45, "learning_rate": 0.001, "loss": 2.5131, "step": 168996 }, { "epoch": 32.45, "learning_rate": 0.001, "loss": 2.5145, "step": 169008 }, { "epoch": 32.45, "learning_rate": 0.001, "loss": 2.5224, "step": 169020 }, { "epoch": 32.46, "learning_rate": 0.001, "loss": 2.5276, "step": 169032 }, { "epoch": 32.46, "learning_rate": 0.001, "loss": 2.5289, "step": 169044 }, { "epoch": 32.46, "learning_rate": 0.001, "loss": 2.5244, "step": 169056 }, { "epoch": 32.46, "learning_rate": 0.001, "loss": 2.5202, "step": 169068 }, { "epoch": 32.47, "learning_rate": 0.001, "loss": 2.528, "step": 169080 }, { "epoch": 32.47, "learning_rate": 0.001, "loss": 2.5134, "step": 169092 }, { "epoch": 32.47, "learning_rate": 0.001, "loss": 2.5165, "step": 169104 }, { "epoch": 32.47, "learning_rate": 0.001, "loss": 2.5238, "step": 169116 }, { "epoch": 32.47, "learning_rate": 0.001, "loss": 2.5236, "step": 169128 }, { "epoch": 32.48, "learning_rate": 0.001, "loss": 2.5188, "step": 169140 }, { "epoch": 32.48, "learning_rate": 0.001, "loss": 2.5267, "step": 169152 }, { "epoch": 32.48, "learning_rate": 0.001, "loss": 2.5092, "step": 169164 }, { "epoch": 32.48, "learning_rate": 0.001, "loss": 2.5147, "step": 169176 }, { "epoch": 32.49, "learning_rate": 0.001, "loss": 2.5181, "step": 169188 }, { "epoch": 32.49, "learning_rate": 0.001, "loss": 2.5118, "step": 169200 }, { "epoch": 32.49, "learning_rate": 0.001, "loss": 2.5128, "step": 169212 }, { "epoch": 32.49, "learning_rate": 0.001, "loss": 2.5249, "step": 169224 }, { "epoch": 32.5, "learning_rate": 0.001, "loss": 2.5217, "step": 169236 }, { "epoch": 32.5, "learning_rate": 0.001, "loss": 2.5092, "step": 169248 }, { "epoch": 32.5, "learning_rate": 0.001, "loss": 2.5297, "step": 169260 }, { "epoch": 32.5, "learning_rate": 0.001, "loss": 2.5271, "step": 169272 }, { "epoch": 32.5, "learning_rate": 0.001, "loss": 2.5152, "step": 169284 }, { "epoch": 32.51, "learning_rate": 0.001, "loss": 2.5191, "step": 169296 }, { "epoch": 32.51, "learning_rate": 0.001, "loss": 2.526, "step": 169308 }, { "epoch": 32.51, "learning_rate": 0.001, "loss": 2.5169, "step": 169320 }, { "epoch": 32.51, "learning_rate": 0.001, "loss": 2.5268, "step": 169332 }, { "epoch": 32.52, "learning_rate": 0.001, "loss": 2.5345, "step": 169344 }, { "epoch": 32.52, "learning_rate": 0.001, "loss": 2.5245, "step": 169356 }, { "epoch": 32.52, "learning_rate": 0.001, "loss": 2.528, "step": 169368 }, { "epoch": 32.52, "eval_ag_news_accuracy": 0.3273125, "eval_ag_news_bleu_score": 4.726959521017791, "eval_ag_news_bleu_score_sem": 0.1453776583978361, "eval_ag_news_emb_cos_sim": 0.8186322450637817, "eval_ag_news_emb_cos_sim_sem": 0.007100898402937394, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.503650426864624, "eval_ag_news_n_ngrams_match_1": 14.148, "eval_ag_news_n_ngrams_match_2": 3.118, "eval_ag_news_n_ngrams_match_3": 0.864, "eval_ag_news_num_pred_words": 46.46, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.23655840478383, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3536052680982871, "eval_ag_news_runtime": 12.6821, "eval_ag_news_samples_per_second": 39.426, "eval_ag_news_steps_per_second": 0.079, "eval_ag_news_token_set_f1": 0.3508410630728675, "eval_ag_news_token_set_f1_sem": 0.004530332420736515, "eval_ag_news_token_set_precision": 0.3384757243196865, "eval_ag_news_token_set_recall": 0.37662257935250915, "eval_ag_news_true_num_tokens": 56.09375, "step": 169375 }, { "epoch": 32.52, "eval_anthropic_toxic_prompts_accuracy": 0.11609375, "eval_anthropic_toxic_prompts_bleu_score": 3.227361249340591, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11751250371524187, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6760736703872681, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009089850331014995, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2216336727142334, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752, "eval_anthropic_toxic_prompts_num_pred_words": 46.988, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.06904135516801, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22063724410074512, "eval_anthropic_toxic_prompts_runtime": 11.9857, "eval_anthropic_toxic_prompts_samples_per_second": 41.716, "eval_anthropic_toxic_prompts_steps_per_second": 0.083, "eval_anthropic_toxic_prompts_token_set_f1": 0.3605956493953031, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006794638074546885, "eval_anthropic_toxic_prompts_token_set_precision": 0.44545998227792455, "eval_anthropic_toxic_prompts_token_set_recall": 0.3250501627613176, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 169375 }, { "epoch": 32.52, "eval_arxiv_accuracy": 0.3489375, "eval_arxiv_bleu_score": 4.4262191725177, "eval_arxiv_bleu_score_sem": 0.11920417950576172, "eval_arxiv_emb_cos_sim": 0.7817623615264893, "eval_arxiv_emb_cos_sim_sem": 0.007898044139337205, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.36584734916687, "eval_arxiv_n_ngrams_match_1": 15.45, "eval_arxiv_n_ngrams_match_2": 2.974, "eval_arxiv_n_ngrams_match_3": 0.68, "eval_arxiv_num_pred_words": 40.758, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.95802446394998, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3712272869795471, "eval_arxiv_runtime": 13.8651, "eval_arxiv_samples_per_second": 36.062, "eval_arxiv_steps_per_second": 0.072, "eval_arxiv_token_set_f1": 0.3618575120562036, "eval_arxiv_token_set_f1_sem": 0.004199419017338341, "eval_arxiv_token_set_precision": 0.31694719177648856, "eval_arxiv_token_set_recall": 0.4362523814849827, "eval_arxiv_true_num_tokens": 64.0, "step": 169375 }, { "epoch": 32.52, "eval_python_code_alpaca_accuracy": 0.16265625, "eval_python_code_alpaca_bleu_score": 4.795288258352828, "eval_python_code_alpaca_bleu_score_sem": 0.14700565146763137, "eval_python_code_alpaca_emb_cos_sim": 0.773827075958252, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007188469017430904, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8409552574157715, "eval_python_code_alpaca_n_ngrams_match_1": 10.008, "eval_python_code_alpaca_n_ngrams_match_2": 3.072, "eval_python_code_alpaca_n_ngrams_match_3": 1.018, "eval_python_code_alpaca_num_pred_words": 43.362, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.132123310800242, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34068054770610645, "eval_python_code_alpaca_runtime": 11.7446, "eval_python_code_alpaca_samples_per_second": 42.573, "eval_python_code_alpaca_steps_per_second": 0.085, "eval_python_code_alpaca_token_set_f1": 0.4858328405216677, "eval_python_code_alpaca_token_set_f1_sem": 0.0055085417523748365, "eval_python_code_alpaca_token_set_precision": 0.5473751159039163, "eval_python_code_alpaca_token_set_recall": 0.4575941964242227, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 169375 }, { "epoch": 32.52, "eval_wikibio_accuracy": 0.325, "eval_wikibio_bleu_score": 6.378139245886941, "eval_wikibio_bleu_score_sem": 0.2235415734227433, "eval_wikibio_emb_cos_sim": 0.7461855411529541, "eval_wikibio_emb_cos_sim_sem": 0.009127345879666007, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6928439140319824, "eval_wikibio_n_ngrams_match_1": 10.584, "eval_wikibio_n_ngrams_match_2": 3.638, "eval_wikibio_n_ngrams_match_3": 1.388, "eval_wikibio_num_pred_words": 37.274, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.1588931513773, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3694077329563741, "eval_wikibio_runtime": 14.0317, "eval_wikibio_samples_per_second": 35.634, "eval_wikibio_steps_per_second": 0.071, "eval_wikibio_token_set_f1": 0.33088383938587673, "eval_wikibio_token_set_f1_sem": 0.0052162764037644105, "eval_wikibio_token_set_precision": 0.3439744254658963, "eval_wikibio_token_set_recall": 0.33292363039390155, "eval_wikibio_true_num_tokens": 61.1328125, "step": 169375 }, { "epoch": 32.52, "eval_nq_accuracy": 0.5314375, "eval_nq_bleu_score": 12.025232572179775, "eval_nq_bleu_score_sem": 0.4855058227386262, "eval_nq_emb_cos_sim": 0.8393033742904663, "eval_nq_emb_cos_sim_sem": 0.006955296846400447, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.149290084838867, "eval_nq_n_ngrams_match_1": 23.456, "eval_nq_n_ngrams_match_2": 8.678, "eval_nq_n_ngrams_match_3": 4.002, "eval_nq_num_pred_words": 49.062, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.578766038830206, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4544787891648732, "eval_nq_runtime": 12.8276, "eval_nq_samples_per_second": 38.978, "eval_nq_steps_per_second": 0.078, "eval_nq_token_set_f1": 0.4673685126084771, "eval_nq_token_set_f1_sem": 0.004954060259206487, "eval_nq_token_set_precision": 0.4276777373946588, "eval_nq_token_set_recall": 0.5223366567606176, "eval_nq_true_num_tokens": 64.0, "step": 169375 }, { "epoch": 32.52, "learning_rate": 0.001, "loss": 2.5197, "step": 169380 }, { "epoch": 32.53, "learning_rate": 0.001, "loss": 2.5255, "step": 169392 }, { "epoch": 32.53, "learning_rate": 0.001, "loss": 2.5258, "step": 169404 }, { "epoch": 32.53, "learning_rate": 0.001, "loss": 2.5236, "step": 169416 }, { "epoch": 32.53, "learning_rate": 0.001, "loss": 2.5235, "step": 169428 }, { "epoch": 32.53, "learning_rate": 0.001, "loss": 2.5148, "step": 169440 }, { "epoch": 32.54, "learning_rate": 0.001, "loss": 2.5237, "step": 169452 }, { "epoch": 32.54, "learning_rate": 0.001, "loss": 2.5234, "step": 169464 }, { "epoch": 32.54, "learning_rate": 0.001, "loss": 2.526, "step": 169476 }, { "epoch": 32.54, "learning_rate": 0.001, "loss": 2.5185, "step": 169488 }, { "epoch": 32.55, "learning_rate": 0.001, "loss": 2.5219, "step": 169500 }, { "epoch": 32.55, "learning_rate": 0.001, "loss": 2.5209, "step": 169512 }, { "epoch": 32.55, "learning_rate": 0.001, "loss": 2.5262, "step": 169524 }, { "epoch": 32.55, "learning_rate": 0.001, "loss": 2.5301, "step": 169536 }, { "epoch": 32.56, "learning_rate": 0.001, "loss": 2.5184, "step": 169548 }, { "epoch": 32.56, "learning_rate": 0.001, "loss": 2.5332, "step": 169560 }, { "epoch": 32.56, "learning_rate": 0.001, "loss": 2.522, "step": 169572 }, { "epoch": 32.56, "learning_rate": 0.001, "loss": 2.5368, "step": 169584 }, { "epoch": 32.56, "learning_rate": 0.001, "loss": 2.5242, "step": 169596 }, { "epoch": 32.57, "learning_rate": 0.001, "loss": 2.5252, "step": 169608 }, { "epoch": 32.57, "learning_rate": 0.001, "loss": 2.5172, "step": 169620 }, { "epoch": 32.57, "learning_rate": 0.001, "loss": 2.5183, "step": 169632 }, { "epoch": 32.57, "learning_rate": 0.001, "loss": 2.5359, "step": 169644 }, { "epoch": 32.58, "learning_rate": 0.001, "loss": 2.5206, "step": 169656 }, { "epoch": 32.58, "learning_rate": 0.001, "loss": 2.5243, "step": 169668 }, { "epoch": 32.58, "learning_rate": 0.001, "loss": 2.5221, "step": 169680 }, { "epoch": 32.58, "learning_rate": 0.001, "loss": 2.5157, "step": 169692 }, { "epoch": 32.59, "learning_rate": 0.001, "loss": 2.5145, "step": 169704 }, { "epoch": 32.59, "learning_rate": 0.001, "loss": 2.5219, "step": 169716 }, { "epoch": 32.59, "learning_rate": 0.001, "loss": 2.5237, "step": 169728 }, { "epoch": 32.59, "learning_rate": 0.001, "loss": 2.5337, "step": 169740 }, { "epoch": 32.59, "learning_rate": 0.001, "loss": 2.5212, "step": 169752 }, { "epoch": 32.6, "learning_rate": 0.001, "loss": 2.5105, "step": 169764 }, { "epoch": 32.6, "learning_rate": 0.001, "loss": 2.5203, "step": 169776 }, { "epoch": 32.6, "learning_rate": 0.001, "loss": 2.5218, "step": 169788 }, { "epoch": 32.6, "learning_rate": 0.001, "loss": 2.5217, "step": 169800 }, { "epoch": 32.61, "learning_rate": 0.001, "loss": 2.53, "step": 169812 }, { "epoch": 32.61, "learning_rate": 0.001, "loss": 2.5266, "step": 169824 }, { "epoch": 32.61, "learning_rate": 0.001, "loss": 2.5256, "step": 169836 }, { "epoch": 32.61, "learning_rate": 0.001, "loss": 2.5178, "step": 169848 }, { "epoch": 32.62, "learning_rate": 0.001, "loss": 2.5241, "step": 169860 }, { "epoch": 32.62, "learning_rate": 0.001, "loss": 2.5371, "step": 169872 }, { "epoch": 32.62, "learning_rate": 0.001, "loss": 2.5314, "step": 169884 }, { "epoch": 32.62, "learning_rate": 0.001, "loss": 2.5203, "step": 169896 }, { "epoch": 32.62, "learning_rate": 0.001, "loss": 2.5162, "step": 169908 }, { "epoch": 32.63, "learning_rate": 0.001, "loss": 2.5199, "step": 169920 }, { "epoch": 32.63, "learning_rate": 0.001, "loss": 2.5122, "step": 169932 }, { "epoch": 32.63, "learning_rate": 0.001, "loss": 2.5223, "step": 169944 }, { "epoch": 32.63, "learning_rate": 0.001, "loss": 2.5236, "step": 169956 }, { "epoch": 32.64, "learning_rate": 0.001, "loss": 2.5259, "step": 169968 }, { "epoch": 32.64, "learning_rate": 0.001, "loss": 2.5272, "step": 169980 }, { "epoch": 32.64, "learning_rate": 0.001, "loss": 2.5252, "step": 169992 }, { "epoch": 32.64, "eval_ag_news_accuracy": 0.32609375, "eval_ag_news_bleu_score": 5.103588714402894, "eval_ag_news_bleu_score_sem": 0.15790990091704368, "eval_ag_news_emb_cos_sim": 0.8158586025238037, "eval_ag_news_emb_cos_sim_sem": 0.006535474064016955, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4988529682159424, "eval_ag_news_n_ngrams_match_1": 14.592, "eval_ag_news_n_ngrams_match_2": 3.32, "eval_ag_news_n_ngrams_match_3": 0.984, "eval_ag_news_num_pred_words": 46.78, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.077489259126004, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35906025776254014, "eval_ag_news_runtime": 11.8885, "eval_ag_news_samples_per_second": 42.057, "eval_ag_news_steps_per_second": 0.084, "eval_ag_news_token_set_f1": 0.36058742545039246, "eval_ag_news_token_set_f1_sem": 0.004244958120679543, "eval_ag_news_token_set_precision": 0.3475712214395737, "eval_ag_news_token_set_recall": 0.38892050994854466, "eval_ag_news_true_num_tokens": 56.09375, "step": 170000 }, { "epoch": 32.64, "eval_anthropic_toxic_prompts_accuracy": 0.116125, "eval_anthropic_toxic_prompts_bleu_score": 3.182037965618906, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1169221128708404, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6759343147277832, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008286285102751369, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2131259441375732, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.972, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, "eval_anthropic_toxic_prompts_num_pred_words": 47.13, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.856665454903688, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2181414961458868, "eval_anthropic_toxic_prompts_runtime": 11.6025, "eval_anthropic_toxic_prompts_samples_per_second": 43.094, "eval_anthropic_toxic_prompts_steps_per_second": 0.086, "eval_anthropic_toxic_prompts_token_set_f1": 0.36194357982938186, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065557976352004, "eval_anthropic_toxic_prompts_token_set_precision": 0.443049473231829, "eval_anthropic_toxic_prompts_token_set_recall": 0.3315463960453458, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 170000 }, { "epoch": 32.64, "eval_arxiv_accuracy": 0.34803125, "eval_arxiv_bleu_score": 4.417925651773008, "eval_arxiv_bleu_score_sem": 0.1187334071204115, "eval_arxiv_emb_cos_sim": 0.7819305658340454, "eval_arxiv_emb_cos_sim_sem": 0.006849457548710988, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.366028308868408, "eval_arxiv_n_ngrams_match_1": 15.478, "eval_arxiv_n_ngrams_match_2": 3.022, "eval_arxiv_n_ngrams_match_3": 0.69, "eval_arxiv_num_pred_words": 40.71, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.96326517357843, "eval_arxiv_pred_num_tokens": 62.96875, "eval_arxiv_rouge_score": 0.3704510976994211, "eval_arxiv_runtime": 11.7231, "eval_arxiv_samples_per_second": 42.651, "eval_arxiv_steps_per_second": 0.085, "eval_arxiv_token_set_f1": 0.3632259435023624, "eval_arxiv_token_set_f1_sem": 0.0040308592469305285, "eval_arxiv_token_set_precision": 0.3144717841751199, "eval_arxiv_token_set_recall": 0.448385466760016, "eval_arxiv_true_num_tokens": 64.0, "step": 170000 }, { "epoch": 32.64, "eval_python_code_alpaca_accuracy": 0.16253125, "eval_python_code_alpaca_bleu_score": 4.970254213898489, "eval_python_code_alpaca_bleu_score_sem": 0.1582547434828246, "eval_python_code_alpaca_emb_cos_sim": 0.7549129724502563, "eval_python_code_alpaca_emb_cos_sim_sem": 0.010142988781280386, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8428986072540283, "eval_python_code_alpaca_n_ngrams_match_1": 9.914, "eval_python_code_alpaca_n_ngrams_match_2": 3.06, "eval_python_code_alpaca_n_ngrams_match_3": 1.13, "eval_python_code_alpaca_num_pred_words": 43.058, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.16544939149375, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.337584587472073, "eval_python_code_alpaca_runtime": 11.4225, "eval_python_code_alpaca_samples_per_second": 43.773, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.4818881681938009, "eval_python_code_alpaca_token_set_f1_sem": 0.00590458283299077, "eval_python_code_alpaca_token_set_precision": 0.543161543634205, "eval_python_code_alpaca_token_set_recall": 0.4554755920180267, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 170000 }, { "epoch": 32.64, "eval_wikibio_accuracy": 0.32665625, "eval_wikibio_bleu_score": 5.952713524474662, "eval_wikibio_bleu_score_sem": 0.21708734155055998, "eval_wikibio_emb_cos_sim": 0.7362420558929443, "eval_wikibio_emb_cos_sim_sem": 0.009733167314339628, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6685829162597656, "eval_wikibio_n_ngrams_match_1": 9.872, "eval_wikibio_n_ngrams_match_2": 3.352, "eval_wikibio_n_ngrams_match_3": 1.208, "eval_wikibio_num_pred_words": 35.764, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.1963220155196, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3513284428263801, "eval_wikibio_runtime": 11.4986, "eval_wikibio_samples_per_second": 43.483, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.31845039340589476, "eval_wikibio_token_set_f1_sem": 0.0054494845963791476, "eval_wikibio_token_set_precision": 0.32214312835742664, "eval_wikibio_token_set_recall": 0.3346979182046409, "eval_wikibio_true_num_tokens": 61.1328125, "step": 170000 }, { "epoch": 32.64, "eval_nq_accuracy": 0.5320625, "eval_nq_bleu_score": 11.87375527848572, "eval_nq_bleu_score_sem": 0.4889766435354102, "eval_nq_emb_cos_sim": 0.8338566422462463, "eval_nq_emb_cos_sim_sem": 0.007484752801383885, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.145758867263794, "eval_nq_n_ngrams_match_1": 23.3, "eval_nq_n_ngrams_match_2": 8.64, "eval_nq_n_ngrams_match_3": 4.014, "eval_nq_num_pred_words": 49.014, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.54852597297066, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4514813362329917, "eval_nq_runtime": 11.8327, "eval_nq_samples_per_second": 42.256, "eval_nq_steps_per_second": 0.085, "eval_nq_token_set_f1": 0.4651492914938751, "eval_nq_token_set_f1_sem": 0.004983244804647352, "eval_nq_token_set_precision": 0.42289744589278583, "eval_nq_token_set_recall": 0.5253804325590283, "eval_nq_true_num_tokens": 64.0, "step": 170000 }, { "epoch": 32.64, "learning_rate": 0.001, "loss": 2.509, "step": 170004 }, { "epoch": 32.65, "learning_rate": 0.001, "loss": 2.5204, "step": 170016 }, { "epoch": 32.65, "learning_rate": 0.001, "loss": 2.5225, "step": 170028 }, { "epoch": 32.65, "learning_rate": 0.001, "loss": 2.5158, "step": 170040 }, { "epoch": 32.65, "learning_rate": 0.001, "loss": 2.5183, "step": 170052 }, { "epoch": 32.65, "learning_rate": 0.001, "loss": 2.5245, "step": 170064 }, { "epoch": 32.66, "learning_rate": 0.001, "loss": 2.5234, "step": 170076 }, { "epoch": 32.66, "learning_rate": 0.001, "loss": 2.5111, "step": 170088 }, { "epoch": 32.66, "learning_rate": 0.001, "loss": 2.5087, "step": 170100 }, { "epoch": 32.66, "learning_rate": 0.001, "loss": 2.5344, "step": 170112 }, { "epoch": 32.67, "learning_rate": 0.001, "loss": 2.5176, "step": 170124 }, { "epoch": 32.67, "learning_rate": 0.001, "loss": 2.5188, "step": 170136 }, { "epoch": 32.67, "learning_rate": 0.001, "loss": 2.5118, "step": 170148 }, { "epoch": 32.67, "learning_rate": 0.001, "loss": 2.5233, "step": 170160 }, { "epoch": 32.68, "learning_rate": 0.001, "loss": 2.5315, "step": 170172 }, { "epoch": 32.68, "learning_rate": 0.001, "loss": 2.5251, "step": 170184 }, { "epoch": 32.68, "learning_rate": 0.001, "loss": 2.5267, "step": 170196 }, { "epoch": 32.68, "learning_rate": 0.001, "loss": 2.5187, "step": 170208 }, { "epoch": 32.68, "learning_rate": 0.001, "loss": 2.5342, "step": 170220 }, { "epoch": 32.69, "learning_rate": 0.001, "loss": 2.5195, "step": 170232 }, { "epoch": 32.69, "learning_rate": 0.001, "loss": 2.5193, "step": 170244 }, { "epoch": 32.69, "learning_rate": 0.001, "loss": 2.5339, "step": 170256 }, { "epoch": 32.69, "learning_rate": 0.001, "loss": 2.525, "step": 170268 }, { "epoch": 32.7, "learning_rate": 0.001, "loss": 2.5176, "step": 170280 }, { "epoch": 32.7, "learning_rate": 0.001, "loss": 2.5145, "step": 170292 }, { "epoch": 32.7, "learning_rate": 0.001, "loss": 2.5207, "step": 170304 }, { "epoch": 32.7, "learning_rate": 0.001, "loss": 2.5225, "step": 170316 }, { "epoch": 32.71, "learning_rate": 0.001, "loss": 2.5218, "step": 170328 }, { "epoch": 32.71, "learning_rate": 0.001, "loss": 2.5326, "step": 170340 }, { "epoch": 32.71, "learning_rate": 0.001, "loss": 2.5212, "step": 170352 }, { "epoch": 32.71, "learning_rate": 0.001, "loss": 2.5237, "step": 170364 }, { "epoch": 32.71, "learning_rate": 0.001, "loss": 2.5219, "step": 170376 }, { "epoch": 32.72, "learning_rate": 0.001, "loss": 2.5308, "step": 170388 }, { "epoch": 32.72, "learning_rate": 0.001, "loss": 2.524, "step": 170400 }, { "epoch": 32.72, "learning_rate": 0.001, "loss": 2.5278, "step": 170412 }, { "epoch": 32.72, "learning_rate": 0.001, "loss": 2.5279, "step": 170424 }, { "epoch": 32.73, "learning_rate": 0.001, "loss": 2.5283, "step": 170436 }, { "epoch": 32.73, "learning_rate": 0.001, "loss": 2.5173, "step": 170448 }, { "epoch": 32.73, "learning_rate": 0.001, "loss": 2.5168, "step": 170460 }, { "epoch": 32.73, "learning_rate": 0.001, "loss": 2.5235, "step": 170472 }, { "epoch": 32.74, "learning_rate": 0.001, "loss": 2.5215, "step": 170484 }, { "epoch": 32.74, "learning_rate": 0.001, "loss": 2.5299, "step": 170496 }, { "epoch": 32.74, "learning_rate": 0.001, "loss": 2.5226, "step": 170508 }, { "epoch": 32.74, "learning_rate": 0.001, "loss": 2.5212, "step": 170520 }, { "epoch": 32.74, "learning_rate": 0.001, "loss": 2.5235, "step": 170532 }, { "epoch": 32.75, "learning_rate": 0.001, "loss": 2.5239, "step": 170544 }, { "epoch": 32.75, "learning_rate": 0.001, "loss": 2.5122, "step": 170556 }, { "epoch": 32.75, "learning_rate": 0.001, "loss": 2.5211, "step": 170568 }, { "epoch": 32.75, "learning_rate": 0.001, "loss": 2.5188, "step": 170580 }, { "epoch": 32.76, "learning_rate": 0.001, "loss": 2.5243, "step": 170592 }, { "epoch": 32.76, "learning_rate": 0.001, "loss": 2.5286, "step": 170604 }, { "epoch": 32.76, "learning_rate": 0.001, "loss": 2.5276, "step": 170616 }, { "epoch": 32.76, "eval_ag_news_accuracy": 0.326, "eval_ag_news_bleu_score": 4.856974187090668, "eval_ag_news_bleu_score_sem": 0.15180008377899748, "eval_ag_news_emb_cos_sim": 0.8178597688674927, "eval_ag_news_emb_cos_sim_sem": 0.006283727645138845, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5094616413116455, "eval_ag_news_n_ngrams_match_1": 14.18, "eval_ag_news_n_ngrams_match_2": 3.172, "eval_ag_news_n_ngrams_match_3": 0.916, "eval_ag_news_num_pred_words": 46.816, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.430265464660316, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3505884215736518, "eval_ag_news_runtime": 12.1688, "eval_ag_news_samples_per_second": 41.089, "eval_ag_news_steps_per_second": 0.082, "eval_ag_news_token_set_f1": 0.3522260030815371, "eval_ag_news_token_set_f1_sem": 0.004373883248593534, "eval_ag_news_token_set_precision": 0.3382308289845004, "eval_ag_news_token_set_recall": 0.3801282350636806, "eval_ag_news_true_num_tokens": 56.09375, "step": 170625 }, { "epoch": 32.76, "eval_anthropic_toxic_prompts_accuracy": 0.1154375, "eval_anthropic_toxic_prompts_bleu_score": 3.102271431804127, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11575546645798614, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6726579666137695, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008999542805546308, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2119908332824707, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.91, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, "eval_anthropic_toxic_prompts_num_pred_words": 47.914, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.828466391683374, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21454391369210693, "eval_anthropic_toxic_prompts_runtime": 11.6733, "eval_anthropic_toxic_prompts_samples_per_second": 42.833, "eval_anthropic_toxic_prompts_steps_per_second": 0.086, "eval_anthropic_toxic_prompts_token_set_f1": 0.36399885158677264, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006690731797494028, "eval_anthropic_toxic_prompts_token_set_precision": 0.4445680156283615, "eval_anthropic_toxic_prompts_token_set_recall": 0.33535265358218763, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 170625 }, { "epoch": 32.76, "eval_arxiv_accuracy": 0.349375, "eval_arxiv_bleu_score": 4.463035169627046, "eval_arxiv_bleu_score_sem": 0.12904825556114927, "eval_arxiv_emb_cos_sim": 0.7773054242134094, "eval_arxiv_emb_cos_sim_sem": 0.006682008119577206, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.364659309387207, "eval_arxiv_n_ngrams_match_1": 15.38, "eval_arxiv_n_ngrams_match_2": 3.114, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 40.826, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.923641607091298, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3679257477380071, "eval_arxiv_runtime": 16.3425, "eval_arxiv_samples_per_second": 30.595, "eval_arxiv_steps_per_second": 0.061, "eval_arxiv_token_set_f1": 0.3583587119904286, "eval_arxiv_token_set_f1_sem": 0.004044247773056292, "eval_arxiv_token_set_precision": 0.31065965819744223, "eval_arxiv_token_set_recall": 0.4390801533458176, "eval_arxiv_true_num_tokens": 64.0, "step": 170625 }, { "epoch": 32.76, "eval_python_code_alpaca_accuracy": 0.1629375, "eval_python_code_alpaca_bleu_score": 4.83823739466449, "eval_python_code_alpaca_bleu_score_sem": 0.15965890443932776, "eval_python_code_alpaca_emb_cos_sim": 0.7576169967651367, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011544135076789247, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8417391777038574, "eval_python_code_alpaca_n_ngrams_match_1": 9.948, "eval_python_code_alpaca_n_ngrams_match_2": 2.96, "eval_python_code_alpaca_n_ngrams_match_3": 1.076, "eval_python_code_alpaca_num_pred_words": 42.862, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.145558795327975, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3353055955473792, "eval_python_code_alpaca_runtime": 11.906, "eval_python_code_alpaca_samples_per_second": 41.996, "eval_python_code_alpaca_steps_per_second": 0.084, "eval_python_code_alpaca_token_set_f1": 0.47783723083060203, "eval_python_code_alpaca_token_set_f1_sem": 0.0058067061539587425, "eval_python_code_alpaca_token_set_precision": 0.5402525688840687, "eval_python_code_alpaca_token_set_recall": 0.45255878676672123, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 170625 }, { "epoch": 32.76, "eval_wikibio_accuracy": 0.3281875, "eval_wikibio_bleu_score": 5.973474103548643, "eval_wikibio_bleu_score_sem": 0.21791608459900272, "eval_wikibio_emb_cos_sim": 0.7314437031745911, "eval_wikibio_emb_cos_sim_sem": 0.009586113667162579, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6576809883117676, "eval_wikibio_n_ngrams_match_1": 9.644, "eval_wikibio_n_ngrams_match_2": 3.216, "eval_wikibio_n_ngrams_match_3": 1.21, "eval_wikibio_num_pred_words": 34.786, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.77132737682593, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34238598380280894, "eval_wikibio_runtime": 11.8981, "eval_wikibio_samples_per_second": 42.023, "eval_wikibio_steps_per_second": 0.084, "eval_wikibio_token_set_f1": 0.31324390654108736, "eval_wikibio_token_set_f1_sem": 0.005682618559756961, "eval_wikibio_token_set_precision": 0.313996683749092, "eval_wikibio_token_set_recall": 0.3310759916162236, "eval_wikibio_true_num_tokens": 61.1328125, "step": 170625 }, { "epoch": 32.76, "eval_nq_accuracy": 0.53359375, "eval_nq_bleu_score": 12.06423858892232, "eval_nq_bleu_score_sem": 0.47737088317606996, "eval_nq_emb_cos_sim": 0.8339177966117859, "eval_nq_emb_cos_sim_sem": 0.007387219725573613, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1459178924560547, "eval_nq_n_ngrams_match_1": 23.386, "eval_nq_n_ngrams_match_2": 8.788, "eval_nq_n_ngrams_match_3": 4.052, "eval_nq_num_pred_words": 49.14, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.549885512054676, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45452192133628033, "eval_nq_runtime": 12.1468, "eval_nq_samples_per_second": 41.163, "eval_nq_steps_per_second": 0.082, "eval_nq_token_set_f1": 0.46843519800215117, "eval_nq_token_set_f1_sem": 0.004888333826128115, "eval_nq_token_set_precision": 0.42489588778155346, "eval_nq_token_set_recall": 0.5311409146401321, "eval_nq_true_num_tokens": 64.0, "step": 170625 }, { "epoch": 32.76, "learning_rate": 0.001, "loss": 2.5232, "step": 170628 }, { "epoch": 32.76, "learning_rate": 0.001, "loss": 2.5188, "step": 170640 }, { "epoch": 32.77, "learning_rate": 0.001, "loss": 2.5163, "step": 170652 }, { "epoch": 32.77, "learning_rate": 0.001, "loss": 2.5183, "step": 170664 }, { "epoch": 32.77, "learning_rate": 0.001, "loss": 2.5128, "step": 170676 }, { "epoch": 32.77, "learning_rate": 0.001, "loss": 2.5248, "step": 170688 }, { "epoch": 32.78, "learning_rate": 0.001, "loss": 2.5187, "step": 170700 }, { "epoch": 32.78, "learning_rate": 0.001, "loss": 2.5298, "step": 170712 }, { "epoch": 32.78, "learning_rate": 0.001, "loss": 2.5141, "step": 170724 }, { "epoch": 32.78, "learning_rate": 0.001, "loss": 2.5333, "step": 170736 }, { "epoch": 32.79, "learning_rate": 0.001, "loss": 2.5237, "step": 170748 }, { "epoch": 32.79, "learning_rate": 0.001, "loss": 2.5243, "step": 170760 }, { "epoch": 32.79, "learning_rate": 0.001, "loss": 2.5148, "step": 170772 }, { "epoch": 32.79, "learning_rate": 0.001, "loss": 2.5206, "step": 170784 }, { "epoch": 32.79, "learning_rate": 0.001, "loss": 2.5154, "step": 170796 }, { "epoch": 32.8, "learning_rate": 0.001, "loss": 2.5171, "step": 170808 }, { "epoch": 32.8, "learning_rate": 0.001, "loss": 2.5238, "step": 170820 }, { "epoch": 32.8, "learning_rate": 0.001, "loss": 2.5154, "step": 170832 }, { "epoch": 32.8, "learning_rate": 0.001, "loss": 2.5215, "step": 170844 }, { "epoch": 32.81, "learning_rate": 0.001, "loss": 2.5193, "step": 170856 }, { "epoch": 32.81, "learning_rate": 0.001, "loss": 2.526, "step": 170868 }, { "epoch": 32.81, "learning_rate": 0.001, "loss": 2.5226, "step": 170880 }, { "epoch": 32.81, "learning_rate": 0.001, "loss": 2.5313, "step": 170892 }, { "epoch": 32.82, "learning_rate": 0.001, "loss": 2.5307, "step": 170904 }, { "epoch": 32.82, "learning_rate": 0.001, "loss": 2.5209, "step": 170916 }, { "epoch": 32.82, "learning_rate": 0.001, "loss": 2.5327, "step": 170928 }, { "epoch": 32.82, "learning_rate": 0.001, "loss": 2.5299, "step": 170940 }, { "epoch": 32.82, "learning_rate": 0.001, "loss": 2.5159, "step": 170952 }, { "epoch": 32.83, "learning_rate": 0.001, "loss": 2.5255, "step": 170964 }, { "epoch": 32.83, "learning_rate": 0.001, "loss": 2.5187, "step": 170976 }, { "epoch": 32.83, "learning_rate": 0.001, "loss": 2.5287, "step": 170988 }, { "epoch": 32.83, "learning_rate": 0.001, "loss": 2.5191, "step": 171000 }, { "epoch": 32.84, "learning_rate": 0.001, "loss": 2.5409, "step": 171012 }, { "epoch": 32.84, "learning_rate": 0.001, "loss": 2.5265, "step": 171024 }, { "epoch": 32.84, "learning_rate": 0.001, "loss": 2.5276, "step": 171036 }, { "epoch": 32.84, "learning_rate": 0.001, "loss": 2.52, "step": 171048 }, { "epoch": 32.85, "learning_rate": 0.001, "loss": 2.5268, "step": 171060 }, { "epoch": 32.85, "learning_rate": 0.001, "loss": 2.5235, "step": 171072 }, { "epoch": 32.85, "learning_rate": 0.001, "loss": 2.5263, "step": 171084 }, { "epoch": 32.85, "learning_rate": 0.001, "loss": 2.5266, "step": 171096 }, { "epoch": 32.85, "learning_rate": 0.001, "loss": 2.5342, "step": 171108 }, { "epoch": 32.86, "learning_rate": 0.001, "loss": 2.5181, "step": 171120 }, { "epoch": 32.86, "learning_rate": 0.001, "loss": 2.5299, "step": 171132 }, { "epoch": 32.86, "learning_rate": 0.001, "loss": 2.5236, "step": 171144 }, { "epoch": 32.86, "learning_rate": 0.001, "loss": 2.5156, "step": 171156 }, { "epoch": 32.87, "learning_rate": 0.001, "loss": 2.5259, "step": 171168 }, { "epoch": 32.87, "learning_rate": 0.001, "loss": 2.5281, "step": 171180 }, { "epoch": 32.87, "learning_rate": 0.001, "loss": 2.5264, "step": 171192 }, { "epoch": 32.87, "learning_rate": 0.001, "loss": 2.528, "step": 171204 }, { "epoch": 32.88, "learning_rate": 0.001, "loss": 2.5247, "step": 171216 }, { "epoch": 32.88, "learning_rate": 0.001, "loss": 2.5284, "step": 171228 }, { "epoch": 32.88, "learning_rate": 0.001, "loss": 2.5258, "step": 171240 }, { "epoch": 32.88, "eval_ag_news_accuracy": 0.32809375, "eval_ag_news_bleu_score": 5.020681871710474, "eval_ag_news_bleu_score_sem": 0.15906025845681282, "eval_ag_news_emb_cos_sim": 0.816615104675293, "eval_ag_news_emb_cos_sim_sem": 0.007979907807124535, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4913876056671143, "eval_ag_news_n_ngrams_match_1": 14.416, "eval_ag_news_n_ngrams_match_2": 3.374, "eval_ag_news_n_ngrams_match_3": 0.984, "eval_ag_news_num_pred_words": 47.11, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.831473251518325, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3536024718794171, "eval_ag_news_runtime": 10.8362, "eval_ag_news_samples_per_second": 46.142, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.3571776617999564, "eval_ag_news_token_set_f1_sem": 0.0043720717154677935, "eval_ag_news_token_set_precision": 0.34509287064370947, "eval_ag_news_token_set_recall": 0.38349111843030853, "eval_ag_news_true_num_tokens": 56.09375, "step": 171250 }, { "epoch": 32.88, "eval_anthropic_toxic_prompts_accuracy": 0.11575, "eval_anthropic_toxic_prompts_bleu_score": 3.2108571731562097, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12656372884784825, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6791132688522339, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010473322609018615, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2023022174835205, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.338, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.706, "eval_anthropic_toxic_prompts_num_pred_words": 46.506, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.589074480688563, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21903306242132764, "eval_anthropic_toxic_prompts_runtime": 10.2545, "eval_anthropic_toxic_prompts_samples_per_second": 48.759, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.36427545760431707, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006703274535485368, "eval_anthropic_toxic_prompts_token_set_precision": 0.444038474671246, "eval_anthropic_toxic_prompts_token_set_recall": 0.3347992232530233, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 171250 }, { "epoch": 32.88, "eval_arxiv_accuracy": 0.35146875, "eval_arxiv_bleu_score": 4.529828456965386, "eval_arxiv_bleu_score_sem": 0.13127557612397123, "eval_arxiv_emb_cos_sim": 0.7781928181648254, "eval_arxiv_emb_cos_sim_sem": 0.006602027230538197, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3569257259368896, "eval_arxiv_n_ngrams_match_1": 15.466, "eval_arxiv_n_ngrams_match_2": 3.05, "eval_arxiv_n_ngrams_match_3": 0.716, "eval_arxiv_num_pred_words": 40.812, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.70082092275716, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36981225705842213, "eval_arxiv_runtime": 10.3627, "eval_arxiv_samples_per_second": 48.25, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.36204740011173986, "eval_arxiv_token_set_f1_sem": 0.004320431712320082, "eval_arxiv_token_set_precision": 0.3152460824655595, "eval_arxiv_token_set_recall": 0.43975130406007196, "eval_arxiv_true_num_tokens": 64.0, "step": 171250 }, { "epoch": 32.88, "eval_python_code_alpaca_accuracy": 0.1644375, "eval_python_code_alpaca_bleu_score": 4.743622967198449, "eval_python_code_alpaca_bleu_score_sem": 0.1552756917165026, "eval_python_code_alpaca_emb_cos_sim": 0.7592819929122925, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008080794191569457, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8483197689056396, "eval_python_code_alpaca_n_ngrams_match_1": 9.836, "eval_python_code_alpaca_n_ngrams_match_2": 2.992, "eval_python_code_alpaca_n_ngrams_match_3": 1.058, "eval_python_code_alpaca_num_pred_words": 43.094, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.25875876153463, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.332390215647945, "eval_python_code_alpaca_runtime": 10.4448, "eval_python_code_alpaca_samples_per_second": 47.871, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.4826877469573501, "eval_python_code_alpaca_token_set_f1_sem": 0.0058548081650853295, "eval_python_code_alpaca_token_set_precision": 0.5389869087933613, "eval_python_code_alpaca_token_set_recall": 0.46343555470418046, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 171250 }, { "epoch": 32.88, "eval_wikibio_accuracy": 0.3256875, "eval_wikibio_bleu_score": 6.215087865876436, "eval_wikibio_bleu_score_sem": 0.22410510272224404, "eval_wikibio_emb_cos_sim": 0.7564117908477783, "eval_wikibio_emb_cos_sim_sem": 0.008450591957493057, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6940345764160156, "eval_wikibio_n_ngrams_match_1": 10.506, "eval_wikibio_n_ngrams_match_2": 3.588, "eval_wikibio_n_ngrams_match_3": 1.37, "eval_wikibio_num_pred_words": 37.344, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.20673731230611, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36541968914994144, "eval_wikibio_runtime": 25.6666, "eval_wikibio_samples_per_second": 19.481, "eval_wikibio_steps_per_second": 0.039, "eval_wikibio_token_set_f1": 0.32750053489029707, "eval_wikibio_token_set_f1_sem": 0.005215283465512357, "eval_wikibio_token_set_precision": 0.33889135443982826, "eval_wikibio_token_set_recall": 0.33166598755298143, "eval_wikibio_true_num_tokens": 61.1328125, "step": 171250 }, { "epoch": 32.88, "eval_nq_accuracy": 0.53184375, "eval_nq_bleu_score": 12.131813804947837, "eval_nq_bleu_score_sem": 0.48372117846921714, "eval_nq_emb_cos_sim": 0.8388272523880005, "eval_nq_emb_cos_sim_sem": 0.006887052537582705, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.14902925491333, "eval_nq_n_ngrams_match_1": 23.478, "eval_nq_n_ngrams_match_2": 8.732, "eval_nq_n_ngrams_match_3": 4.068, "eval_nq_num_pred_words": 49.476, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.576528731714106, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45394393092860086, "eval_nq_runtime": 21.5612, "eval_nq_samples_per_second": 23.19, "eval_nq_steps_per_second": 0.046, "eval_nq_token_set_f1": 0.4685044679630783, "eval_nq_token_set_f1_sem": 0.004989990913179821, "eval_nq_token_set_precision": 0.42823506640878917, "eval_nq_token_set_recall": 0.5255791990724895, "eval_nq_true_num_tokens": 64.0, "step": 171250 }, { "epoch": 32.88, "learning_rate": 0.001, "loss": 2.5255, "step": 171252 }, { "epoch": 32.88, "learning_rate": 0.001, "loss": 2.5224, "step": 171264 }, { "epoch": 32.89, "learning_rate": 0.001, "loss": 2.5308, "step": 171276 }, { "epoch": 32.89, "learning_rate": 0.001, "loss": 2.5166, "step": 171288 }, { "epoch": 32.89, "learning_rate": 0.001, "loss": 2.5283, "step": 171300 }, { "epoch": 32.89, "learning_rate": 0.001, "loss": 2.5339, "step": 171312 }, { "epoch": 32.9, "learning_rate": 0.001, "loss": 2.5314, "step": 171324 }, { "epoch": 32.9, "learning_rate": 0.001, "loss": 2.5313, "step": 171336 }, { "epoch": 32.9, "learning_rate": 0.001, "loss": 2.5184, "step": 171348 }, { "epoch": 32.9, "learning_rate": 0.001, "loss": 2.5271, "step": 171360 }, { "epoch": 32.91, "learning_rate": 0.001, "loss": 2.524, "step": 171372 }, { "epoch": 32.91, "learning_rate": 0.001, "loss": 2.5192, "step": 171384 }, { "epoch": 32.91, "learning_rate": 0.001, "loss": 2.5269, "step": 171396 }, { "epoch": 32.91, "learning_rate": 0.001, "loss": 2.5327, "step": 171408 }, { "epoch": 32.91, "learning_rate": 0.001, "loss": 2.525, "step": 171420 }, { "epoch": 32.92, "learning_rate": 0.001, "loss": 2.5333, "step": 171432 }, { "epoch": 32.92, "learning_rate": 0.001, "loss": 2.5284, "step": 171444 }, { "epoch": 32.92, "learning_rate": 0.001, "loss": 2.5261, "step": 171456 }, { "epoch": 32.92, "learning_rate": 0.001, "loss": 2.5347, "step": 171468 }, { "epoch": 32.93, "learning_rate": 0.001, "loss": 2.5296, "step": 171480 }, { "epoch": 32.93, "learning_rate": 0.001, "loss": 2.5278, "step": 171492 }, { "epoch": 32.93, "learning_rate": 0.001, "loss": 2.5194, "step": 171504 }, { "epoch": 32.93, "learning_rate": 0.001, "loss": 2.5217, "step": 171516 }, { "epoch": 32.94, "learning_rate": 0.001, "loss": 2.5209, "step": 171528 }, { "epoch": 32.94, "learning_rate": 0.001, "loss": 2.5344, "step": 171540 }, { "epoch": 32.94, "learning_rate": 0.001, "loss": 2.5178, "step": 171552 }, { "epoch": 32.94, "learning_rate": 0.001, "loss": 2.5361, "step": 171564 }, { "epoch": 32.94, "learning_rate": 0.001, "loss": 2.5317, "step": 171576 }, { "epoch": 32.95, "learning_rate": 0.001, "loss": 2.5233, "step": 171588 }, { "epoch": 32.95, "learning_rate": 0.001, "loss": 2.5313, "step": 171600 }, { "epoch": 32.95, "learning_rate": 0.001, "loss": 2.5286, "step": 171612 }, { "epoch": 32.95, "learning_rate": 0.001, "loss": 2.5274, "step": 171624 }, { "epoch": 32.96, "learning_rate": 0.001, "loss": 2.5298, "step": 171636 }, { "epoch": 32.96, "learning_rate": 0.001, "loss": 2.5351, "step": 171648 }, { "epoch": 32.96, "learning_rate": 0.001, "loss": 2.5316, "step": 171660 }, { "epoch": 32.96, "learning_rate": 0.001, "loss": 2.5289, "step": 171672 }, { "epoch": 32.97, "learning_rate": 0.001, "loss": 2.5376, "step": 171684 }, { "epoch": 32.97, "learning_rate": 0.001, "loss": 2.5312, "step": 171696 }, { "epoch": 32.97, "learning_rate": 0.001, "loss": 2.5319, "step": 171708 }, { "epoch": 32.97, "learning_rate": 0.001, "loss": 2.526, "step": 171720 }, { "epoch": 32.97, "learning_rate": 0.001, "loss": 2.5241, "step": 171732 }, { "epoch": 32.98, "learning_rate": 0.001, "loss": 2.5342, "step": 171744 }, { "epoch": 32.98, "learning_rate": 0.001, "loss": 2.5354, "step": 171756 }, { "epoch": 32.98, "learning_rate": 0.001, "loss": 2.5343, "step": 171768 }, { "epoch": 32.98, "learning_rate": 0.001, "loss": 2.5286, "step": 171780 }, { "epoch": 32.99, "learning_rate": 0.001, "loss": 2.5237, "step": 171792 }, { "epoch": 32.99, "learning_rate": 0.001, "loss": 2.5252, "step": 171804 }, { "epoch": 32.99, "learning_rate": 0.001, "loss": 2.5351, "step": 171816 }, { "epoch": 32.99, "learning_rate": 0.001, "loss": 2.52, "step": 171828 }, { "epoch": 33.0, "learning_rate": 0.001, "loss": 2.5233, "step": 171840 }, { "epoch": 33.0, "learning_rate": 0.001, "loss": 2.5219, "step": 171852 }, { "epoch": 33.0, "learning_rate": 0.001, "loss": 2.5295, "step": 171864 }, { "epoch": 33.0, "eval_ag_news_accuracy": 0.32559375, "eval_ag_news_bleu_score": 4.9235026984387655, "eval_ag_news_bleu_score_sem": 0.16109972817183485, "eval_ag_news_emb_cos_sim": 0.8212471604347229, "eval_ag_news_emb_cos_sim_sem": 0.006564521145175057, "eval_ag_news_emb_top1_equal": 0.1953125, "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.508206605911255, "eval_ag_news_n_ngrams_match_1": 14.346, "eval_ag_news_n_ngrams_match_2": 3.2, "eval_ag_news_n_ngrams_match_3": 0.93, "eval_ag_news_num_pred_words": 46.906, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.388335615284056, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35506901925484513, "eval_ag_news_runtime": 10.3829, "eval_ag_news_samples_per_second": 48.156, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35708858837041757, "eval_ag_news_token_set_f1_sem": 0.004292817297193934, "eval_ag_news_token_set_precision": 0.3429343159509728, "eval_ag_news_token_set_recall": 0.38618130192334754, "eval_ag_news_true_num_tokens": 56.09375, "step": 171875 }, { "epoch": 33.0, "eval_anthropic_toxic_prompts_accuracy": 0.1151875, "eval_anthropic_toxic_prompts_bleu_score": 3.157861456491082, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12697962927166806, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6729145050048828, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009848224259507321, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2260899543762207, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.236, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.9, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.704, "eval_anthropic_toxic_prompts_num_pred_words": 46.644, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.181005350706855, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21613554276009106, "eval_anthropic_toxic_prompts_runtime": 10.1711, "eval_anthropic_toxic_prompts_samples_per_second": 49.159, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3608151069441283, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00668865426346986, "eval_anthropic_toxic_prompts_token_set_precision": 0.43906316172736687, "eval_anthropic_toxic_prompts_token_set_recall": 0.3348320089793444, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 171875 }, { "epoch": 33.0, "eval_arxiv_accuracy": 0.34865625, "eval_arxiv_bleu_score": 4.489403465644735, "eval_arxiv_bleu_score_sem": 0.12713303055628591, "eval_arxiv_emb_cos_sim": 0.7804597616195679, "eval_arxiv_emb_cos_sim_sem": 0.006157709885180288, "eval_arxiv_emb_top1_equal": 0.328125, "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.37156081199646, "eval_arxiv_n_ngrams_match_1": 15.386, "eval_arxiv_n_ngrams_match_2": 3.04, "eval_arxiv_n_ngrams_match_3": 0.71, "eval_arxiv_num_pred_words": 41.134, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.12394860969874, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36819498306324844, "eval_arxiv_runtime": 10.3302, "eval_arxiv_samples_per_second": 48.402, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3609062302540924, "eval_arxiv_token_set_f1_sem": 0.004053174006897004, "eval_arxiv_token_set_precision": 0.3138377129826513, "eval_arxiv_token_set_recall": 0.4429384331972991, "eval_arxiv_true_num_tokens": 64.0, "step": 171875 }, { "epoch": 33.0, "eval_python_code_alpaca_accuracy": 0.161875, "eval_python_code_alpaca_bleu_score": 4.62960173006862, "eval_python_code_alpaca_bleu_score_sem": 0.14550535826787744, "eval_python_code_alpaca_emb_cos_sim": 0.7594995498657227, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00745378738007403, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8776822090148926, "eval_python_code_alpaca_n_ngrams_match_1": 9.834, "eval_python_code_alpaca_n_ngrams_match_2": 2.83, "eval_python_code_alpaca_n_ngrams_match_3": 0.964, "eval_python_code_alpaca_num_pred_words": 42.994, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.773031231401315, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33606171295275333, "eval_python_code_alpaca_runtime": 25.4205, "eval_python_code_alpaca_samples_per_second": 19.669, "eval_python_code_alpaca_steps_per_second": 0.039, "eval_python_code_alpaca_token_set_f1": 0.4748090821307063, "eval_python_code_alpaca_token_set_f1_sem": 0.005542073959793082, "eval_python_code_alpaca_token_set_precision": 0.5367738513795776, "eval_python_code_alpaca_token_set_recall": 0.4488405713810996, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 171875 }, { "epoch": 33.0, "eval_wikibio_accuracy": 0.32353125, "eval_wikibio_bleu_score": 6.051798141522513, "eval_wikibio_bleu_score_sem": 0.2129569468297189, "eval_wikibio_emb_cos_sim": 0.7415202856063843, "eval_wikibio_emb_cos_sim_sem": 0.007917238736111377, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.70955753326416, "eval_wikibio_n_ngrams_match_1": 10.354, "eval_wikibio_n_ngrams_match_2": 3.46, "eval_wikibio_n_ngrams_match_3": 1.258, "eval_wikibio_num_pred_words": 36.76, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.83573407509388, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36204102161015883, "eval_wikibio_runtime": 25.1312, "eval_wikibio_samples_per_second": 19.896, "eval_wikibio_steps_per_second": 0.04, "eval_wikibio_token_set_f1": 0.3242490389331767, "eval_wikibio_token_set_f1_sem": 0.0053147265560793844, "eval_wikibio_token_set_precision": 0.33364966270567153, "eval_wikibio_token_set_recall": 0.3312595464034135, "eval_wikibio_true_num_tokens": 61.1328125, "step": 171875 }, { "epoch": 33.0, "eval_nq_accuracy": 0.53425, "eval_nq_bleu_score": 12.233642418000283, "eval_nq_bleu_score_sem": 0.4961011223190279, "eval_nq_emb_cos_sim": 0.8402355909347534, "eval_nq_emb_cos_sim_sem": 0.006836898446399249, "eval_nq_emb_top1_equal": 0.3984375, "eval_nq_emb_top1_equal_sem": 0.04344287990767221, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.146106243133545, "eval_nq_n_ngrams_match_1": 23.392, "eval_nq_n_ngrams_match_2": 8.764, "eval_nq_n_ngrams_match_3": 4.098, "eval_nq_num_pred_words": 49.17, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.551496040450631, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4530372491260065, "eval_nq_runtime": 10.76, "eval_nq_samples_per_second": 46.468, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.46608364421916054, "eval_nq_token_set_f1_sem": 0.004913049429546021, "eval_nq_token_set_precision": 0.42432459845450177, "eval_nq_token_set_recall": 0.5255722036592672, "eval_nq_true_num_tokens": 64.0, "step": 171875 }, { "epoch": 33.0, "learning_rate": 0.001, "loss": 2.5144, "step": 171876 }, { "epoch": 33.0, "learning_rate": 0.001, "loss": 2.5142, "step": 171888 }, { "epoch": 33.01, "learning_rate": 0.001, "loss": 2.5057, "step": 171900 }, { "epoch": 33.01, "learning_rate": 0.001, "loss": 2.5064, "step": 171912 }, { "epoch": 33.01, "learning_rate": 0.001, "loss": 2.5135, "step": 171924 }, { "epoch": 33.01, "learning_rate": 0.001, "loss": 2.5101, "step": 171936 }, { "epoch": 33.02, "learning_rate": 0.001, "loss": 2.5145, "step": 171948 }, { "epoch": 33.02, "learning_rate": 0.001, "loss": 2.5084, "step": 171960 }, { "epoch": 33.02, "learning_rate": 0.001, "loss": 2.5045, "step": 171972 }, { "epoch": 33.02, "learning_rate": 0.001, "loss": 2.4986, "step": 171984 }, { "epoch": 33.03, "learning_rate": 0.001, "loss": 2.5126, "step": 171996 }, { "epoch": 33.03, "learning_rate": 0.001, "loss": 2.5159, "step": 172008 }, { "epoch": 33.03, "learning_rate": 0.001, "loss": 2.514, "step": 172020 }, { "epoch": 33.03, "learning_rate": 0.001, "loss": 2.5153, "step": 172032 }, { "epoch": 33.03, "learning_rate": 0.001, "loss": 2.518, "step": 172044 }, { "epoch": 33.04, "learning_rate": 0.001, "loss": 2.5139, "step": 172056 }, { "epoch": 33.04, "learning_rate": 0.001, "loss": 2.5009, "step": 172068 }, { "epoch": 33.04, "learning_rate": 0.001, "loss": 2.5137, "step": 172080 }, { "epoch": 33.04, "learning_rate": 0.001, "loss": 2.5115, "step": 172092 }, { "epoch": 33.05, "learning_rate": 0.001, "loss": 2.5168, "step": 172104 }, { "epoch": 33.05, "learning_rate": 0.001, "loss": 2.5144, "step": 172116 }, { "epoch": 33.05, "learning_rate": 0.001, "loss": 2.5074, "step": 172128 }, { "epoch": 33.05, "learning_rate": 0.001, "loss": 2.5071, "step": 172140 }, { "epoch": 33.06, "learning_rate": 0.001, "loss": 2.5126, "step": 172152 }, { "epoch": 33.06, "learning_rate": 0.001, "loss": 2.5059, "step": 172164 }, { "epoch": 33.06, "learning_rate": 0.001, "loss": 2.5108, "step": 172176 }, { "epoch": 33.06, "learning_rate": 0.001, "loss": 2.5093, "step": 172188 }, { "epoch": 33.06, "learning_rate": 0.001, "loss": 2.5123, "step": 172200 }, { "epoch": 33.07, "learning_rate": 0.001, "loss": 2.5147, "step": 172212 }, { "epoch": 33.07, "learning_rate": 0.001, "loss": 2.5155, "step": 172224 }, { "epoch": 33.07, "learning_rate": 0.001, "loss": 2.5131, "step": 172236 }, { "epoch": 33.07, "learning_rate": 0.001, "loss": 2.5156, "step": 172248 }, { "epoch": 33.08, "learning_rate": 0.001, "loss": 2.5135, "step": 172260 }, { "epoch": 33.08, "learning_rate": 0.001, "loss": 2.5142, "step": 172272 }, { "epoch": 33.08, "learning_rate": 0.001, "loss": 2.515, "step": 172284 }, { "epoch": 33.08, "learning_rate": 0.001, "loss": 2.512, "step": 172296 }, { "epoch": 33.09, "learning_rate": 0.001, "loss": 2.5076, "step": 172308 }, { "epoch": 33.09, "learning_rate": 0.001, "loss": 2.5157, "step": 172320 }, { "epoch": 33.09, "learning_rate": 0.001, "loss": 2.511, "step": 172332 }, { "epoch": 33.09, "learning_rate": 0.001, "loss": 2.512, "step": 172344 }, { "epoch": 33.09, "learning_rate": 0.001, "loss": 2.5159, "step": 172356 }, { "epoch": 33.1, "learning_rate": 0.001, "loss": 2.5094, "step": 172368 }, { "epoch": 33.1, "learning_rate": 0.001, "loss": 2.5175, "step": 172380 }, { "epoch": 33.1, "learning_rate": 0.001, "loss": 2.5173, "step": 172392 }, { "epoch": 33.1, "learning_rate": 0.001, "loss": 2.5102, "step": 172404 }, { "epoch": 33.11, "learning_rate": 0.001, "loss": 2.5161, "step": 172416 }, { "epoch": 33.11, "learning_rate": 0.001, "loss": 2.5125, "step": 172428 }, { "epoch": 33.11, "learning_rate": 0.001, "loss": 2.515, "step": 172440 }, { "epoch": 33.11, "learning_rate": 0.001, "loss": 2.512, "step": 172452 }, { "epoch": 33.12, "learning_rate": 0.001, "loss": 2.5046, "step": 172464 }, { "epoch": 33.12, "learning_rate": 0.001, "loss": 2.5215, "step": 172476 }, { "epoch": 33.12, "learning_rate": 0.001, "loss": 2.506, "step": 172488 }, { "epoch": 33.12, "learning_rate": 0.001, "loss": 2.5147, "step": 172500 }, { "epoch": 33.12, "eval_ag_news_accuracy": 0.327, "eval_ag_news_bleu_score": 4.840854296628451, "eval_ag_news_bleu_score_sem": 0.14392723937593332, "eval_ag_news_emb_cos_sim": 0.8134829998016357, "eval_ag_news_emb_cos_sim_sem": 0.007196347185379901, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.519099235534668, "eval_ag_news_n_ngrams_match_1": 14.202, "eval_ag_news_n_ngrams_match_2": 3.218, "eval_ag_news_n_ngrams_match_3": 0.9, "eval_ag_news_num_pred_words": 46.586, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.75401035304114, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3528690318938258, "eval_ag_news_runtime": 10.7349, "eval_ag_news_samples_per_second": 46.577, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.35344379295386363, "eval_ag_news_token_set_f1_sem": 0.004393905665406953, "eval_ag_news_token_set_precision": 0.33958679540526815, "eval_ag_news_token_set_recall": 0.38186312938018047, "eval_ag_news_true_num_tokens": 56.09375, "step": 172500 }, { "epoch": 33.12, "eval_anthropic_toxic_prompts_accuracy": 0.1149375, "eval_anthropic_toxic_prompts_bleu_score": 3.191643144304772, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1184516723759307, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.673818826675415, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008044502462279036, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2498018741607666, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.218, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.978, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.766, "eval_anthropic_toxic_prompts_num_pred_words": 47.804, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.785230690604468, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2134038238842198, "eval_anthropic_toxic_prompts_runtime": 10.2652, "eval_anthropic_toxic_prompts_samples_per_second": 48.708, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3589227578981022, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006480609859653643, "eval_anthropic_toxic_prompts_token_set_precision": 0.442542947179868, "eval_anthropic_toxic_prompts_token_set_recall": 0.3279915647536028, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 172500 }, { "epoch": 33.12, "eval_arxiv_accuracy": 0.3498125, "eval_arxiv_bleu_score": 4.455771918575809, "eval_arxiv_bleu_score_sem": 0.13182251487229993, "eval_arxiv_emb_cos_sim": 0.7759593725204468, "eval_arxiv_emb_cos_sim_sem": 0.006880422970020844, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3673794269561768, "eval_arxiv_n_ngrams_match_1": 15.38, "eval_arxiv_n_ngrams_match_2": 3.056, "eval_arxiv_n_ngrams_match_3": 0.718, "eval_arxiv_num_pred_words": 40.688, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.002424413456776, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3675317301340385, "eval_arxiv_runtime": 10.7617, "eval_arxiv_samples_per_second": 46.461, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.36071689594917433, "eval_arxiv_token_set_f1_sem": 0.004312948057243856, "eval_arxiv_token_set_precision": 0.3123242844667363, "eval_arxiv_token_set_recall": 0.44326443833838713, "eval_arxiv_true_num_tokens": 64.0, "step": 172500 }, { "epoch": 33.12, "eval_python_code_alpaca_accuracy": 0.1641875, "eval_python_code_alpaca_bleu_score": 4.6513737917458675, "eval_python_code_alpaca_bleu_score_sem": 0.14811492889988476, "eval_python_code_alpaca_emb_cos_sim": 0.7585639953613281, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008046173848240298, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8754916191101074, "eval_python_code_alpaca_n_ngrams_match_1": 9.97, "eval_python_code_alpaca_n_ngrams_match_2": 3.036, "eval_python_code_alpaca_n_ngrams_match_3": 1.026, "eval_python_code_alpaca_num_pred_words": 43.902, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.734140421068687, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3336501299400956, "eval_python_code_alpaca_runtime": 10.5101, "eval_python_code_alpaca_samples_per_second": 47.573, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.4796706126926143, "eval_python_code_alpaca_token_set_f1_sem": 0.005703290595550568, "eval_python_code_alpaca_token_set_precision": 0.5477300841077309, "eval_python_code_alpaca_token_set_recall": 0.4519090648944119, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 172500 }, { "epoch": 33.12, "eval_wikibio_accuracy": 0.32640625, "eval_wikibio_bleu_score": 6.154386102374677, "eval_wikibio_bleu_score_sem": 0.21504046650238473, "eval_wikibio_emb_cos_sim": 0.7461612224578857, "eval_wikibio_emb_cos_sim_sem": 0.007946979264636384, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.726470947265625, "eval_wikibio_n_ngrams_match_1": 10.404, "eval_wikibio_n_ngrams_match_2": 3.528, "eval_wikibio_n_ngrams_match_3": 1.28, "eval_wikibio_num_pred_words": 37.06, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.53227962860505, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3653571718231684, "eval_wikibio_runtime": 10.6177, "eval_wikibio_samples_per_second": 47.091, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.3282029153977967, "eval_wikibio_token_set_f1_sem": 0.005138240614149754, "eval_wikibio_token_set_precision": 0.33698672929848705, "eval_wikibio_token_set_recall": 0.33781476723709897, "eval_wikibio_true_num_tokens": 61.1328125, "step": 172500 }, { "epoch": 33.12, "eval_nq_accuracy": 0.53278125, "eval_nq_bleu_score": 11.941494776015876, "eval_nq_bleu_score_sem": 0.4903975075066142, "eval_nq_emb_cos_sim": 0.8348308801651001, "eval_nq_emb_cos_sim_sem": 0.007084477595630845, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.149851083755493, "eval_nq_n_ngrams_match_1": 23.2, "eval_nq_n_ngrams_match_2": 8.58, "eval_nq_n_ngrams_match_3": 4.014, "eval_nq_num_pred_words": 49.454, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.583580067490153, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4492532235681832, "eval_nq_runtime": 10.8861, "eval_nq_samples_per_second": 45.93, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.4636222701951168, "eval_nq_token_set_f1_sem": 0.004804832178364878, "eval_nq_token_set_precision": 0.4221118825741288, "eval_nq_token_set_recall": 0.522550337010352, "eval_nq_true_num_tokens": 64.0, "step": 172500 }, { "epoch": 33.12, "learning_rate": 0.001, "loss": 2.5112, "step": 172512 }, { "epoch": 33.13, "learning_rate": 0.001, "loss": 2.5123, "step": 172524 }, { "epoch": 33.13, "learning_rate": 0.001, "loss": 2.5158, "step": 172536 }, { "epoch": 33.13, "learning_rate": 0.001, "loss": 2.5197, "step": 172548 }, { "epoch": 33.13, "learning_rate": 0.001, "loss": 2.5128, "step": 172560 }, { "epoch": 33.14, "learning_rate": 0.001, "loss": 2.5151, "step": 172572 }, { "epoch": 33.14, "learning_rate": 0.001, "loss": 2.5117, "step": 172584 }, { "epoch": 33.14, "learning_rate": 0.001, "loss": 2.5111, "step": 172596 }, { "epoch": 33.14, "learning_rate": 0.001, "loss": 2.5152, "step": 172608 }, { "epoch": 33.15, "learning_rate": 0.001, "loss": 2.5134, "step": 172620 }, { "epoch": 33.15, "learning_rate": 0.001, "loss": 2.5141, "step": 172632 }, { "epoch": 33.15, "learning_rate": 0.001, "loss": 2.5086, "step": 172644 }, { "epoch": 33.15, "learning_rate": 0.001, "loss": 2.5204, "step": 172656 }, { "epoch": 33.15, "learning_rate": 0.001, "loss": 2.5109, "step": 172668 }, { "epoch": 33.16, "learning_rate": 0.001, "loss": 2.5128, "step": 172680 }, { "epoch": 33.16, "learning_rate": 0.001, "loss": 2.5167, "step": 172692 }, { "epoch": 33.16, "learning_rate": 0.001, "loss": 2.5133, "step": 172704 }, { "epoch": 33.16, "learning_rate": 0.001, "loss": 2.5143, "step": 172716 }, { "epoch": 33.17, "learning_rate": 0.001, "loss": 2.5111, "step": 172728 }, { "epoch": 33.17, "learning_rate": 0.001, "loss": 2.5167, "step": 172740 }, { "epoch": 33.17, "learning_rate": 0.001, "loss": 2.5156, "step": 172752 }, { "epoch": 33.17, "learning_rate": 0.001, "loss": 2.5035, "step": 172764 }, { "epoch": 33.18, "learning_rate": 0.001, "loss": 2.5288, "step": 172776 }, { "epoch": 33.18, "learning_rate": 0.001, "loss": 2.5025, "step": 172788 }, { "epoch": 33.18, "learning_rate": 0.001, "loss": 2.5339, "step": 172800 }, { "epoch": 33.18, "learning_rate": 0.001, "loss": 2.5219, "step": 172812 }, { "epoch": 33.18, "learning_rate": 0.001, "loss": 2.5235, "step": 172824 }, { "epoch": 33.19, "learning_rate": 0.001, "loss": 2.5185, "step": 172836 }, { "epoch": 33.19, "learning_rate": 0.001, "loss": 2.5173, "step": 172848 }, { "epoch": 33.19, "learning_rate": 0.001, "loss": 2.504, "step": 172860 }, { "epoch": 33.19, "learning_rate": 0.001, "loss": 2.5206, "step": 172872 }, { "epoch": 33.2, "learning_rate": 0.001, "loss": 2.5154, "step": 172884 }, { "epoch": 33.2, "learning_rate": 0.001, "loss": 2.5117, "step": 172896 }, { "epoch": 33.2, "learning_rate": 0.001, "loss": 2.5095, "step": 172908 }, { "epoch": 33.2, "learning_rate": 0.001, "loss": 2.5172, "step": 172920 }, { "epoch": 33.21, "learning_rate": 0.001, "loss": 2.5236, "step": 172932 }, { "epoch": 33.21, "learning_rate": 0.001, "loss": 2.516, "step": 172944 }, { "epoch": 33.21, "learning_rate": 0.001, "loss": 2.5201, "step": 172956 }, { "epoch": 33.21, "learning_rate": 0.001, "loss": 2.5225, "step": 172968 }, { "epoch": 33.21, "learning_rate": 0.001, "loss": 2.5055, "step": 172980 }, { "epoch": 33.22, "learning_rate": 0.001, "loss": 2.5101, "step": 172992 }, { "epoch": 33.22, "learning_rate": 0.001, "loss": 2.5084, "step": 173004 }, { "epoch": 33.22, "learning_rate": 0.001, "loss": 2.5229, "step": 173016 }, { "epoch": 33.22, "learning_rate": 0.001, "loss": 2.5254, "step": 173028 }, { "epoch": 33.23, "learning_rate": 0.001, "loss": 2.5126, "step": 173040 }, { "epoch": 33.23, "learning_rate": 0.001, "loss": 2.528, "step": 173052 }, { "epoch": 33.23, "learning_rate": 0.001, "loss": 2.5121, "step": 173064 }, { "epoch": 33.23, "learning_rate": 0.001, "loss": 2.5196, "step": 173076 }, { "epoch": 33.24, "learning_rate": 0.001, "loss": 2.5082, "step": 173088 }, { "epoch": 33.24, "learning_rate": 0.001, "loss": 2.52, "step": 173100 }, { "epoch": 33.24, "learning_rate": 0.001, "loss": 2.5241, "step": 173112 }, { "epoch": 33.24, "learning_rate": 0.001, "loss": 2.5194, "step": 173124 }, { "epoch": 33.24, "eval_ag_news_accuracy": 0.32746875, "eval_ag_news_bleu_score": 4.899918911197241, "eval_ag_news_bleu_score_sem": 0.1538101148685072, "eval_ag_news_emb_cos_sim": 0.814886212348938, "eval_ag_news_emb_cos_sim_sem": 0.007141201985370445, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.494994640350342, "eval_ag_news_n_ngrams_match_1": 14.312, "eval_ag_news_n_ngrams_match_2": 3.136, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.688, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.950111351478306, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35478775454072764, "eval_ag_news_runtime": 10.5804, "eval_ag_news_samples_per_second": 47.257, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.35651615031167716, "eval_ag_news_token_set_f1_sem": 0.004362219936826826, "eval_ag_news_token_set_precision": 0.34187248033445666, "eval_ag_news_token_set_recall": 0.3866185172661851, "eval_ag_news_true_num_tokens": 56.09375, "step": 173125 }, { "epoch": 33.24, "eval_anthropic_toxic_prompts_accuracy": 0.1155625, "eval_anthropic_toxic_prompts_bleu_score": 3.22189957256252, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11831276614066606, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6785842180252075, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008799396641591184, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2247121334075928, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.42, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.006, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752, "eval_anthropic_toxic_prompts_num_pred_words": 47.432, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.146334324242083, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2187582190487244, "eval_anthropic_toxic_prompts_runtime": 10.3065, "eval_anthropic_toxic_prompts_samples_per_second": 48.513, "eval_anthropic_toxic_prompts_steps_per_second": 0.097, "eval_anthropic_toxic_prompts_token_set_f1": 0.3664681323025575, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006596368235438082, "eval_anthropic_toxic_prompts_token_set_precision": 0.4523588585250943, "eval_anthropic_toxic_prompts_token_set_recall": 0.33454868325187875, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 173125 }, { "epoch": 33.24, "eval_arxiv_accuracy": 0.35078125, "eval_arxiv_bleu_score": 4.580869799666467, "eval_arxiv_bleu_score_sem": 0.134405210250314, "eval_arxiv_emb_cos_sim": 0.7647984623908997, "eval_arxiv_emb_cos_sim_sem": 0.008891873412669166, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.350621223449707, "eval_arxiv_n_ngrams_match_1": 15.376, "eval_arxiv_n_ngrams_match_2": 3.124, "eval_arxiv_n_ngrams_match_3": 0.728, "eval_arxiv_num_pred_words": 40.592, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.520445711293767, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36878700185813307, "eval_arxiv_runtime": 11.3099, "eval_arxiv_samples_per_second": 44.209, "eval_arxiv_steps_per_second": 0.088, "eval_arxiv_token_set_f1": 0.35959364145006717, "eval_arxiv_token_set_f1_sem": 0.004325707467276858, "eval_arxiv_token_set_precision": 0.31183180094137114, "eval_arxiv_token_set_recall": 0.4392956192196279, "eval_arxiv_true_num_tokens": 64.0, "step": 173125 }, { "epoch": 33.24, "eval_python_code_alpaca_accuracy": 0.16321875, "eval_python_code_alpaca_bleu_score": 4.789714930698208, "eval_python_code_alpaca_bleu_score_sem": 0.15492731405422203, "eval_python_code_alpaca_emb_cos_sim": 0.7496691942214966, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008979071949515666, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.852346897125244, "eval_python_code_alpaca_n_ngrams_match_1": 9.786, "eval_python_code_alpaca_n_ngrams_match_2": 2.95, "eval_python_code_alpaca_n_ngrams_match_3": 1.056, "eval_python_code_alpaca_num_pred_words": 42.616, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.328402133250716, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3365390822665639, "eval_python_code_alpaca_runtime": 10.1107, "eval_python_code_alpaca_samples_per_second": 49.453, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.47571060384262936, "eval_python_code_alpaca_token_set_f1_sem": 0.00581499096546508, "eval_python_code_alpaca_token_set_precision": 0.5364937773593781, "eval_python_code_alpaca_token_set_recall": 0.45230257993175715, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 173125 }, { "epoch": 33.24, "eval_wikibio_accuracy": 0.3283125, "eval_wikibio_bleu_score": 6.253845379841241, "eval_wikibio_bleu_score_sem": 0.22551707085226777, "eval_wikibio_emb_cos_sim": 0.7445352673530579, "eval_wikibio_emb_cos_sim_sem": 0.009703821253974903, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6684389114379883, "eval_wikibio_n_ngrams_match_1": 10.174, "eval_wikibio_n_ngrams_match_2": 3.44, "eval_wikibio_n_ngrams_match_3": 1.29, "eval_wikibio_num_pred_words": 35.898, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.1906779625486, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35983269969875353, "eval_wikibio_runtime": 10.2797, "eval_wikibio_samples_per_second": 48.64, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3271597122141529, "eval_wikibio_token_set_f1_sem": 0.005313285045508303, "eval_wikibio_token_set_precision": 0.33182161640259683, "eval_wikibio_token_set_recall": 0.33768863930955123, "eval_wikibio_true_num_tokens": 61.1328125, "step": 173125 }, { "epoch": 33.24, "eval_nq_accuracy": 0.5326875, "eval_nq_bleu_score": 11.935532159876267, "eval_nq_bleu_score_sem": 0.47433546106004976, "eval_nq_emb_cos_sim": 0.8320028781890869, "eval_nq_emb_cos_sim_sem": 0.007365523976758122, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.14473295211792, "eval_nq_n_ngrams_match_1": 23.232, "eval_nq_n_ngrams_match_2": 8.596, "eval_nq_n_ngrams_match_3": 3.97, "eval_nq_num_pred_words": 49.198, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.539760407831926, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4508228084144806, "eval_nq_runtime": 10.5952, "eval_nq_samples_per_second": 47.191, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.4640798159498465, "eval_nq_token_set_f1_sem": 0.004961801434360699, "eval_nq_token_set_precision": 0.4216964755775736, "eval_nq_token_set_recall": 0.524348383098691, "eval_nq_true_num_tokens": 64.0, "step": 173125 }, { "epoch": 33.24, "learning_rate": 0.001, "loss": 2.5149, "step": 173136 }, { "epoch": 33.25, "learning_rate": 0.001, "loss": 2.5126, "step": 173148 }, { "epoch": 33.25, "learning_rate": 0.001, "loss": 2.5194, "step": 173160 }, { "epoch": 33.25, "learning_rate": 0.001, "loss": 2.5171, "step": 173172 }, { "epoch": 33.25, "learning_rate": 0.001, "loss": 2.5234, "step": 173184 }, { "epoch": 33.26, "learning_rate": 0.001, "loss": 2.5134, "step": 173196 }, { "epoch": 33.26, "learning_rate": 0.001, "loss": 2.531, "step": 173208 }, { "epoch": 33.26, "learning_rate": 0.001, "loss": 2.5174, "step": 173220 }, { "epoch": 33.26, "learning_rate": 0.001, "loss": 2.5135, "step": 173232 }, { "epoch": 33.26, "learning_rate": 0.001, "loss": 2.5233, "step": 173244 }, { "epoch": 33.27, "learning_rate": 0.001, "loss": 2.5205, "step": 173256 }, { "epoch": 33.27, "learning_rate": 0.001, "loss": 2.5203, "step": 173268 }, { "epoch": 33.27, "learning_rate": 0.001, "loss": 2.5111, "step": 173280 }, { "epoch": 33.27, "learning_rate": 0.001, "loss": 2.507, "step": 173292 }, { "epoch": 33.28, "learning_rate": 0.001, "loss": 2.5174, "step": 173304 }, { "epoch": 33.28, "learning_rate": 0.001, "loss": 2.5068, "step": 173316 }, { "epoch": 33.28, "learning_rate": 0.001, "loss": 2.5192, "step": 173328 }, { "epoch": 33.28, "learning_rate": 0.001, "loss": 2.5169, "step": 173340 }, { "epoch": 33.29, "learning_rate": 0.001, "loss": 2.5146, "step": 173352 }, { "epoch": 33.29, "learning_rate": 0.001, "loss": 2.5099, "step": 173364 }, { "epoch": 33.29, "learning_rate": 0.001, "loss": 2.5135, "step": 173376 }, { "epoch": 33.29, "learning_rate": 0.001, "loss": 2.5132, "step": 173388 }, { "epoch": 33.29, "learning_rate": 0.001, "loss": 2.5178, "step": 173400 }, { "epoch": 33.3, "learning_rate": 0.001, "loss": 2.5113, "step": 173412 }, { "epoch": 33.3, "learning_rate": 0.001, "loss": 2.5163, "step": 173424 }, { "epoch": 33.3, "learning_rate": 0.001, "loss": 2.5238, "step": 173436 }, { "epoch": 33.3, "learning_rate": 0.001, "loss": 2.5191, "step": 173448 }, { "epoch": 33.31, "learning_rate": 0.001, "loss": 2.521, "step": 173460 }, { "epoch": 33.31, "learning_rate": 0.001, "loss": 2.523, "step": 173472 }, { "epoch": 33.31, "learning_rate": 0.001, "loss": 2.5183, "step": 173484 }, { "epoch": 33.31, "learning_rate": 0.001, "loss": 2.5207, "step": 173496 }, { "epoch": 33.32, "learning_rate": 0.001, "loss": 2.5136, "step": 173508 }, { "epoch": 33.32, "learning_rate": 0.001, "loss": 2.5165, "step": 173520 }, { "epoch": 33.32, "learning_rate": 0.001, "loss": 2.5105, "step": 173532 }, { "epoch": 33.32, "learning_rate": 0.001, "loss": 2.5165, "step": 173544 }, { "epoch": 33.32, "learning_rate": 0.001, "loss": 2.5268, "step": 173556 }, { "epoch": 33.33, "learning_rate": 0.001, "loss": 2.5096, "step": 173568 }, { "epoch": 33.33, "learning_rate": 0.001, "loss": 2.527, "step": 173580 }, { "epoch": 33.33, "learning_rate": 0.001, "loss": 2.525, "step": 173592 }, { "epoch": 33.33, "learning_rate": 0.001, "loss": 2.5186, "step": 173604 }, { "epoch": 33.34, "learning_rate": 0.001, "loss": 2.5194, "step": 173616 }, { "epoch": 33.34, "learning_rate": 0.001, "loss": 2.5137, "step": 173628 }, { "epoch": 33.34, "learning_rate": 0.001, "loss": 2.5254, "step": 173640 }, { "epoch": 33.34, "learning_rate": 0.001, "loss": 2.5147, "step": 173652 }, { "epoch": 33.35, "learning_rate": 0.001, "loss": 2.5149, "step": 173664 }, { "epoch": 33.35, "learning_rate": 0.001, "loss": 2.5134, "step": 173676 }, { "epoch": 33.35, "learning_rate": 0.001, "loss": 2.5199, "step": 173688 }, { "epoch": 33.35, "learning_rate": 0.001, "loss": 2.5087, "step": 173700 }, { "epoch": 33.35, "learning_rate": 0.001, "loss": 2.5222, "step": 173712 }, { "epoch": 33.36, "learning_rate": 0.001, "loss": 2.5128, "step": 173724 }, { "epoch": 33.36, "learning_rate": 0.001, "loss": 2.5279, "step": 173736 }, { "epoch": 33.36, "learning_rate": 0.001, "loss": 2.5175, "step": 173748 }, { "epoch": 33.36, "eval_ag_news_accuracy": 0.32675, "eval_ag_news_bleu_score": 4.965034493484933, "eval_ag_news_bleu_score_sem": 0.15584535953177964, "eval_ag_news_emb_cos_sim": 0.8194319009780884, "eval_ag_news_emb_cos_sim_sem": 0.006557308107179123, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.509377956390381, "eval_ag_news_n_ngrams_match_1": 14.434, "eval_ag_news_n_ngrams_match_2": 3.226, "eval_ag_news_n_ngrams_match_3": 0.918, "eval_ag_news_num_pred_words": 47.338, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.42746797258263, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35893226864744426, "eval_ag_news_runtime": 10.6405, "eval_ag_news_samples_per_second": 46.99, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.356872847590079, "eval_ag_news_token_set_f1_sem": 0.0041785754917403415, "eval_ag_news_token_set_precision": 0.34607354411306496, "eval_ag_news_token_set_recall": 0.3816919589951471, "eval_ag_news_true_num_tokens": 56.09375, "step": 173750 }, { "epoch": 33.36, "eval_anthropic_toxic_prompts_accuracy": 0.11678125, "eval_anthropic_toxic_prompts_bleu_score": 3.134337231638774, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11876808220698369, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6827963590621948, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008412511621318956, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2089335918426514, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.942, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.738, "eval_anthropic_toxic_prompts_num_pred_words": 47.884, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.752675689614016, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2180556985886582, "eval_anthropic_toxic_prompts_runtime": 10.4927, "eval_anthropic_toxic_prompts_samples_per_second": 47.652, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.36476377372728713, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006297880303976672, "eval_anthropic_toxic_prompts_token_set_precision": 0.44839266035232345, "eval_anthropic_toxic_prompts_token_set_recall": 0.3328900841915803, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 173750 }, { "epoch": 33.36, "eval_arxiv_accuracy": 0.3509375, "eval_arxiv_bleu_score": 4.434420430612437, "eval_arxiv_bleu_score_sem": 0.12258376722549405, "eval_arxiv_emb_cos_sim": 0.7793766856193542, "eval_arxiv_emb_cos_sim_sem": 0.006540039094546292, "eval_arxiv_emb_top1_equal": 0.328125, "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.35927677154541, "eval_arxiv_n_ngrams_match_1": 15.562, "eval_arxiv_n_ngrams_match_2": 3.024, "eval_arxiv_n_ngrams_match_3": 0.674, "eval_arxiv_num_pred_words": 41.224, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.76837724462782, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37095354797061697, "eval_arxiv_runtime": 10.351, "eval_arxiv_samples_per_second": 48.305, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3624521232998928, "eval_arxiv_token_set_f1_sem": 0.0041198916159214645, "eval_arxiv_token_set_precision": 0.3152315986668038, "eval_arxiv_token_set_recall": 0.4412362093246703, "eval_arxiv_true_num_tokens": 64.0, "step": 173750 }, { "epoch": 33.36, "eval_python_code_alpaca_accuracy": 0.16228125, "eval_python_code_alpaca_bleu_score": 4.8026209937932345, "eval_python_code_alpaca_bleu_score_sem": 0.15575429695317172, "eval_python_code_alpaca_emb_cos_sim": 0.7655423283576965, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007853884778809914, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8448219299316406, "eval_python_code_alpaca_n_ngrams_match_1": 10.094, "eval_python_code_alpaca_n_ngrams_match_2": 3.068, "eval_python_code_alpaca_n_ngrams_match_3": 1.104, "eval_python_code_alpaca_num_pred_words": 44.546, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.198495858902863, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3385839956346942, "eval_python_code_alpaca_runtime": 10.2532, "eval_python_code_alpaca_samples_per_second": 48.765, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.48839037843729227, "eval_python_code_alpaca_token_set_f1_sem": 0.005526841539587616, "eval_python_code_alpaca_token_set_precision": 0.5533478780694027, "eval_python_code_alpaca_token_set_recall": 0.45824260790696586, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 173750 }, { "epoch": 33.36, "eval_wikibio_accuracy": 0.326625, "eval_wikibio_bleu_score": 6.135505009485993, "eval_wikibio_bleu_score_sem": 0.20818576480427142, "eval_wikibio_emb_cos_sim": 0.7598357200622559, "eval_wikibio_emb_cos_sim_sem": 0.007382571718436271, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.7037172317504883, "eval_wikibio_n_ngrams_match_1": 10.618, "eval_wikibio_n_ngrams_match_2": 3.62, "eval_wikibio_n_ngrams_match_3": 1.302, "eval_wikibio_num_pred_words": 37.622, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.59793615725575, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3699478244719296, "eval_wikibio_runtime": 10.1056, "eval_wikibio_samples_per_second": 49.477, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3317120549497993, "eval_wikibio_token_set_f1_sem": 0.004876042724365511, "eval_wikibio_token_set_precision": 0.3431560172842182, "eval_wikibio_token_set_recall": 0.33365987201723285, "eval_wikibio_true_num_tokens": 61.1328125, "step": 173750 }, { "epoch": 33.36, "eval_nq_accuracy": 0.53240625, "eval_nq_bleu_score": 11.973528795799655, "eval_nq_bleu_score_sem": 0.4804154515319206, "eval_nq_emb_cos_sim": 0.8428808450698853, "eval_nq_emb_cos_sim_sem": 0.006753537906945273, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1437206268310547, "eval_nq_n_ngrams_match_1": 23.466, "eval_nq_n_ngrams_match_2": 8.726, "eval_nq_n_ngrams_match_3": 3.968, "eval_nq_num_pred_words": 49.554, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.531119766734959, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45280346244420744, "eval_nq_runtime": 11.604, "eval_nq_samples_per_second": 43.088, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4666227583664684, "eval_nq_token_set_f1_sem": 0.004900215885111777, "eval_nq_token_set_precision": 0.4270406572741068, "eval_nq_token_set_recall": 0.5212560592669406, "eval_nq_true_num_tokens": 64.0, "step": 173750 }, { "epoch": 33.36, "learning_rate": 0.001, "loss": 2.5178, "step": 173760 }, { "epoch": 33.37, "learning_rate": 0.001, "loss": 2.527, "step": 173772 }, { "epoch": 33.37, "learning_rate": 0.001, "loss": 2.5099, "step": 173784 }, { "epoch": 33.37, "learning_rate": 0.001, "loss": 2.5145, "step": 173796 }, { "epoch": 33.37, "learning_rate": 0.001, "loss": 2.5155, "step": 173808 }, { "epoch": 33.38, "learning_rate": 0.001, "loss": 2.5312, "step": 173820 }, { "epoch": 33.38, "learning_rate": 0.001, "loss": 2.5204, "step": 173832 }, { "epoch": 33.38, "learning_rate": 0.001, "loss": 2.5096, "step": 173844 }, { "epoch": 33.38, "learning_rate": 0.001, "loss": 2.5188, "step": 173856 }, { "epoch": 33.38, "learning_rate": 0.001, "loss": 2.5158, "step": 173868 }, { "epoch": 33.39, "learning_rate": 0.001, "loss": 2.5215, "step": 173880 }, { "epoch": 33.39, "learning_rate": 0.001, "loss": 2.5116, "step": 173892 }, { "epoch": 33.39, "learning_rate": 0.001, "loss": 2.5131, "step": 173904 }, { "epoch": 33.39, "learning_rate": 0.001, "loss": 2.5228, "step": 173916 }, { "epoch": 33.4, "learning_rate": 0.001, "loss": 2.5165, "step": 173928 }, { "epoch": 33.4, "learning_rate": 0.001, "loss": 2.5179, "step": 173940 }, { "epoch": 33.4, "learning_rate": 0.001, "loss": 2.5245, "step": 173952 }, { "epoch": 33.4, "learning_rate": 0.001, "loss": 2.5197, "step": 173964 }, { "epoch": 33.41, "learning_rate": 0.001, "loss": 2.5269, "step": 173976 }, { "epoch": 33.41, "learning_rate": 0.001, "loss": 2.512, "step": 173988 }, { "epoch": 33.41, "learning_rate": 0.001, "loss": 2.5339, "step": 174000 }, { "epoch": 33.41, "learning_rate": 0.001, "loss": 2.5234, "step": 174012 }, { "epoch": 33.41, "learning_rate": 0.001, "loss": 2.5243, "step": 174024 }, { "epoch": 33.42, "learning_rate": 0.001, "loss": 2.5229, "step": 174036 }, { "epoch": 33.42, "learning_rate": 0.001, "loss": 2.5149, "step": 174048 }, { "epoch": 33.42, "learning_rate": 0.001, "loss": 2.5113, "step": 174060 }, { "epoch": 33.42, "learning_rate": 0.001, "loss": 2.5149, "step": 174072 }, { "epoch": 33.43, "learning_rate": 0.001, "loss": 2.5085, "step": 174084 }, { "epoch": 33.43, "learning_rate": 0.001, "loss": 2.5135, "step": 174096 }, { "epoch": 33.43, "learning_rate": 0.001, "loss": 2.5136, "step": 174108 }, { "epoch": 33.43, "learning_rate": 0.001, "loss": 2.5229, "step": 174120 }, { "epoch": 33.44, "learning_rate": 0.001, "loss": 2.5267, "step": 174132 }, { "epoch": 33.44, "learning_rate": 0.001, "loss": 2.5174, "step": 174144 }, { "epoch": 33.44, "learning_rate": 0.001, "loss": 2.5138, "step": 174156 }, { "epoch": 33.44, "learning_rate": 0.001, "loss": 2.511, "step": 174168 }, { "epoch": 33.44, "learning_rate": 0.001, "loss": 2.524, "step": 174180 }, { "epoch": 33.45, "learning_rate": 0.001, "loss": 2.514, "step": 174192 }, { "epoch": 33.45, "learning_rate": 0.001, "loss": 2.5159, "step": 174204 }, { "epoch": 33.45, "learning_rate": 0.001, "loss": 2.5198, "step": 174216 }, { "epoch": 33.45, "learning_rate": 0.001, "loss": 2.5243, "step": 174228 }, { "epoch": 33.46, "learning_rate": 0.001, "loss": 2.5196, "step": 174240 }, { "epoch": 33.46, "learning_rate": 0.001, "loss": 2.5193, "step": 174252 }, { "epoch": 33.46, "learning_rate": 0.001, "loss": 2.5128, "step": 174264 }, { "epoch": 33.46, "learning_rate": 0.001, "loss": 2.5174, "step": 174276 }, { "epoch": 33.47, "learning_rate": 0.001, "loss": 2.518, "step": 174288 }, { "epoch": 33.47, "learning_rate": 0.001, "loss": 2.5129, "step": 174300 }, { "epoch": 33.47, "learning_rate": 0.001, "loss": 2.52, "step": 174312 }, { "epoch": 33.47, "learning_rate": 0.001, "loss": 2.5093, "step": 174324 }, { "epoch": 33.47, "learning_rate": 0.001, "loss": 2.5085, "step": 174336 }, { "epoch": 33.48, "learning_rate": 0.001, "loss": 2.5194, "step": 174348 }, { "epoch": 33.48, "learning_rate": 0.001, "loss": 2.5222, "step": 174360 }, { "epoch": 33.48, "learning_rate": 0.001, "loss": 2.5363, "step": 174372 }, { "epoch": 33.48, "eval_ag_news_accuracy": 0.32703125, "eval_ag_news_bleu_score": 4.906854796195355, "eval_ag_news_bleu_score_sem": 0.1534652775521715, "eval_ag_news_emb_cos_sim": 0.821617066860199, "eval_ag_news_emb_cos_sim_sem": 0.006243848823913444, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5055830478668213, "eval_ag_news_n_ngrams_match_1": 14.448, "eval_ag_news_n_ngrams_match_2": 3.238, "eval_ag_news_n_ngrams_match_3": 0.926, "eval_ag_news_num_pred_words": 46.97, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.300854185273344, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3578224676505867, "eval_ag_news_runtime": 10.4357, "eval_ag_news_samples_per_second": 47.912, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3567334792661159, "eval_ag_news_token_set_f1_sem": 0.004295701854635979, "eval_ag_news_token_set_precision": 0.3452890637362383, "eval_ag_news_token_set_recall": 0.38267987839055256, "eval_ag_news_true_num_tokens": 56.09375, "step": 174375 }, { "epoch": 33.48, "eval_anthropic_toxic_prompts_accuracy": 0.11584375, "eval_anthropic_toxic_prompts_bleu_score": 3.165334139202723, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11743674992686823, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6743996143341064, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008732528690795126, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2037386894226074, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.244, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.906, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, "eval_anthropic_toxic_prompts_num_pred_words": 46.942, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.624421377522857, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21463547049141593, "eval_anthropic_toxic_prompts_runtime": 10.1653, "eval_anthropic_toxic_prompts_samples_per_second": 49.187, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3632020696418693, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006562672979982537, "eval_anthropic_toxic_prompts_token_set_precision": 0.4453997781712462, "eval_anthropic_toxic_prompts_token_set_recall": 0.33430944384037714, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 174375 }, { "epoch": 33.48, "eval_arxiv_accuracy": 0.34984375, "eval_arxiv_bleu_score": 4.493597327860133, "eval_arxiv_bleu_score_sem": 0.12371786867659083, "eval_arxiv_emb_cos_sim": 0.7805147171020508, "eval_arxiv_emb_cos_sim_sem": 0.006847012372952848, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3685925006866455, "eval_arxiv_n_ngrams_match_1": 15.398, "eval_arxiv_n_ngrams_match_2": 3.024, "eval_arxiv_n_ngrams_match_3": 0.694, "eval_arxiv_num_pred_words": 41.036, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.037627840491965, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3680955459281964, "eval_arxiv_runtime": 10.1808, "eval_arxiv_samples_per_second": 49.112, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.36035618965034755, "eval_arxiv_token_set_f1_sem": 0.004020412666983252, "eval_arxiv_token_set_precision": 0.31368359341177604, "eval_arxiv_token_set_recall": 0.4378062231189222, "eval_arxiv_true_num_tokens": 64.0, "step": 174375 }, { "epoch": 33.48, "eval_python_code_alpaca_accuracy": 0.16334375, "eval_python_code_alpaca_bleu_score": 4.86654463528897, "eval_python_code_alpaca_bleu_score_sem": 0.15655118955654437, "eval_python_code_alpaca_emb_cos_sim": 0.7589184045791626, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007545616801900665, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8595287799835205, "eval_python_code_alpaca_n_ngrams_match_1": 9.852, "eval_python_code_alpaca_n_ngrams_match_2": 3.054, "eval_python_code_alpaca_n_ngrams_match_3": 1.082, "eval_python_code_alpaca_num_pred_words": 43.25, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.453300653915925, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3357447579434674, "eval_python_code_alpaca_runtime": 9.8106, "eval_python_code_alpaca_samples_per_second": 50.965, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4799576337029294, "eval_python_code_alpaca_token_set_f1_sem": 0.005466198786460032, "eval_python_code_alpaca_token_set_precision": 0.5423816177033097, "eval_python_code_alpaca_token_set_recall": 0.4552521696149737, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 174375 }, { "epoch": 33.48, "eval_wikibio_accuracy": 0.325375, "eval_wikibio_bleu_score": 6.0644674525720985, "eval_wikibio_bleu_score_sem": 0.21806740005371358, "eval_wikibio_emb_cos_sim": 0.7400978803634644, "eval_wikibio_emb_cos_sim_sem": 0.010156600237715882, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6715779304504395, "eval_wikibio_n_ngrams_match_1": 9.984, "eval_wikibio_n_ngrams_match_2": 3.378, "eval_wikibio_n_ngrams_match_3": 1.254, "eval_wikibio_num_pred_words": 35.596, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.31389152947537, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3534394322253904, "eval_wikibio_runtime": 10.7865, "eval_wikibio_samples_per_second": 46.354, "eval_wikibio_steps_per_second": 0.093, "eval_wikibio_token_set_f1": 0.31820052923439396, "eval_wikibio_token_set_f1_sem": 0.005603772208673508, "eval_wikibio_token_set_precision": 0.32455164773925194, "eval_wikibio_token_set_recall": 0.32870129898104994, "eval_wikibio_true_num_tokens": 61.1328125, "step": 174375 }, { "epoch": 33.48, "eval_nq_accuracy": 0.53415625, "eval_nq_bleu_score": 12.095765188391509, "eval_nq_bleu_score_sem": 0.49374744347138455, "eval_nq_emb_cos_sim": 0.8349359631538391, "eval_nq_emb_cos_sim_sem": 0.007024838487898914, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1431634426116943, "eval_nq_n_ngrams_match_1": 23.498, "eval_nq_n_ngrams_match_2": 8.78, "eval_nq_n_ngrams_match_3": 4.044, "eval_nq_num_pred_words": 49.158, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.526367685442755, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45607291509430736, "eval_nq_runtime": 10.3651, "eval_nq_samples_per_second": 48.239, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4686542732313417, "eval_nq_token_set_f1_sem": 0.005069800159174794, "eval_nq_token_set_precision": 0.42843504562473606, "eval_nq_token_set_recall": 0.5235385607916649, "eval_nq_true_num_tokens": 64.0, "step": 174375 }, { "epoch": 33.48, "learning_rate": 0.001, "loss": 2.517, "step": 174384 }, { "epoch": 33.49, "learning_rate": 0.001, "loss": 2.5174, "step": 174396 }, { "epoch": 33.49, "learning_rate": 0.001, "loss": 2.5205, "step": 174408 }, { "epoch": 33.49, "learning_rate": 0.001, "loss": 2.5222, "step": 174420 }, { "epoch": 33.49, "learning_rate": 0.001, "loss": 2.518, "step": 174432 }, { "epoch": 33.5, "learning_rate": 0.001, "loss": 2.5231, "step": 174444 }, { "epoch": 33.5, "learning_rate": 0.001, "loss": 2.5167, "step": 174456 }, { "epoch": 33.5, "learning_rate": 0.001, "loss": 2.5268, "step": 174468 }, { "epoch": 33.5, "learning_rate": 0.001, "loss": 2.518, "step": 174480 }, { "epoch": 33.5, "learning_rate": 0.001, "loss": 2.5192, "step": 174492 }, { "epoch": 33.51, "learning_rate": 0.001, "loss": 2.5165, "step": 174504 }, { "epoch": 33.51, "learning_rate": 0.001, "loss": 2.5162, "step": 174516 }, { "epoch": 33.51, "learning_rate": 0.001, "loss": 2.515, "step": 174528 }, { "epoch": 33.51, "learning_rate": 0.001, "loss": 2.5316, "step": 174540 }, { "epoch": 33.52, "learning_rate": 0.001, "loss": 2.5279, "step": 174552 }, { "epoch": 33.52, "learning_rate": 0.001, "loss": 2.5113, "step": 174564 }, { "epoch": 33.52, "learning_rate": 0.001, "loss": 2.5202, "step": 174576 }, { "epoch": 33.52, "learning_rate": 0.001, "loss": 2.5189, "step": 174588 }, { "epoch": 33.53, "learning_rate": 0.001, "loss": 2.5181, "step": 174600 }, { "epoch": 33.53, "learning_rate": 0.001, "loss": 2.5233, "step": 174612 }, { "epoch": 33.53, "learning_rate": 0.001, "loss": 2.5246, "step": 174624 }, { "epoch": 33.53, "learning_rate": 0.001, "loss": 2.5143, "step": 174636 }, { "epoch": 33.53, "learning_rate": 0.001, "loss": 2.5034, "step": 174648 }, { "epoch": 33.54, "learning_rate": 0.001, "loss": 2.5189, "step": 174660 }, { "epoch": 33.54, "learning_rate": 0.001, "loss": 2.5203, "step": 174672 }, { "epoch": 33.54, "learning_rate": 0.001, "loss": 2.5258, "step": 174684 }, { "epoch": 33.54, "learning_rate": 0.001, "loss": 2.5231, "step": 174696 }, { "epoch": 33.55, "learning_rate": 0.001, "loss": 2.5215, "step": 174708 }, { "epoch": 33.55, "learning_rate": 0.001, "loss": 2.5235, "step": 174720 }, { "epoch": 33.55, "learning_rate": 0.001, "loss": 2.5218, "step": 174732 }, { "epoch": 33.55, "learning_rate": 0.001, "loss": 2.5186, "step": 174744 }, { "epoch": 33.56, "learning_rate": 0.001, "loss": 2.5246, "step": 174756 }, { "epoch": 33.56, "learning_rate": 0.001, "loss": 2.5262, "step": 174768 }, { "epoch": 33.56, "learning_rate": 0.001, "loss": 2.527, "step": 174780 }, { "epoch": 33.56, "learning_rate": 0.001, "loss": 2.5262, "step": 174792 }, { "epoch": 33.56, "learning_rate": 0.001, "loss": 2.5246, "step": 174804 }, { "epoch": 33.57, "learning_rate": 0.001, "loss": 2.515, "step": 174816 }, { "epoch": 33.57, "learning_rate": 0.001, "loss": 2.5221, "step": 174828 }, { "epoch": 33.57, "learning_rate": 0.001, "loss": 2.5201, "step": 174840 }, { "epoch": 33.57, "learning_rate": 0.001, "loss": 2.5164, "step": 174852 }, { "epoch": 33.58, "learning_rate": 0.001, "loss": 2.5302, "step": 174864 }, { "epoch": 33.58, "learning_rate": 0.001, "loss": 2.522, "step": 174876 }, { "epoch": 33.58, "learning_rate": 0.001, "loss": 2.5232, "step": 174888 }, { "epoch": 33.58, "learning_rate": 0.001, "loss": 2.5201, "step": 174900 }, { "epoch": 33.59, "learning_rate": 0.001, "loss": 2.5237, "step": 174912 }, { "epoch": 33.59, "learning_rate": 0.001, "loss": 2.5219, "step": 174924 }, { "epoch": 33.59, "learning_rate": 0.001, "loss": 2.5263, "step": 174936 }, { "epoch": 33.59, "learning_rate": 0.001, "loss": 2.5191, "step": 174948 }, { "epoch": 33.59, "learning_rate": 0.001, "loss": 2.5167, "step": 174960 }, { "epoch": 33.6, "learning_rate": 0.001, "loss": 2.5175, "step": 174972 }, { "epoch": 33.6, "learning_rate": 0.001, "loss": 2.5094, "step": 174984 }, { "epoch": 33.6, "learning_rate": 0.001, "loss": 2.5127, "step": 174996 }, { "epoch": 33.6, "eval_ag_news_accuracy": 0.327, "eval_ag_news_bleu_score": 5.029946715449138, "eval_ag_news_bleu_score_sem": 0.156727994509488, "eval_ag_news_emb_cos_sim": 0.8179185390472412, "eval_ag_news_emb_cos_sim_sem": 0.006700141801149018, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4984447956085205, "eval_ag_news_n_ngrams_match_1": 14.454, "eval_ag_news_n_ngrams_match_2": 3.238, "eval_ag_news_n_ngrams_match_3": 0.922, "eval_ag_news_num_pred_words": 46.614, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.063990689148795, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3587236885662435, "eval_ag_news_runtime": 10.2337, "eval_ag_news_samples_per_second": 48.858, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3615319419488924, "eval_ag_news_token_set_f1_sem": 0.00426277962036405, "eval_ag_news_token_set_precision": 0.3477667685348069, "eval_ag_news_token_set_recall": 0.39019556812546885, "eval_ag_news_true_num_tokens": 56.09375, "step": 175000 }, { "epoch": 33.6, "eval_anthropic_toxic_prompts_accuracy": 0.11571875, "eval_anthropic_toxic_prompts_bleu_score": 3.1958276110554644, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12273843427219648, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6793828010559082, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009504407391512655, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2424569129943848, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.29, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.964, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.728, "eval_anthropic_toxic_prompts_num_pred_words": 46.772, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.59653300941431, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2163873068690737, "eval_anthropic_toxic_prompts_runtime": 9.8575, "eval_anthropic_toxic_prompts_samples_per_second": 50.723, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3563779747433364, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006569863064058106, "eval_anthropic_toxic_prompts_token_set_precision": 0.44309226529630547, "eval_anthropic_toxic_prompts_token_set_recall": 0.3245975095000013, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 175000 }, { "epoch": 33.6, "eval_arxiv_accuracy": 0.3494375, "eval_arxiv_bleu_score": 4.624642327957384, "eval_arxiv_bleu_score_sem": 0.13825152891220188, "eval_arxiv_emb_cos_sim": 0.779121458530426, "eval_arxiv_emb_cos_sim_sem": 0.0069886106374257686, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3809525966644287, "eval_arxiv_n_ngrams_match_1": 15.512, "eval_arxiv_n_ngrams_match_2": 3.106, "eval_arxiv_n_ngrams_match_3": 0.766, "eval_arxiv_num_pred_words": 40.718, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 29.398762942228352, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3727240665286775, "eval_arxiv_runtime": 10.3388, "eval_arxiv_samples_per_second": 48.362, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.36257993640903136, "eval_arxiv_token_set_f1_sem": 0.004277633830519311, "eval_arxiv_token_set_precision": 0.31370067286713244, "eval_arxiv_token_set_recall": 0.44458743740003237, "eval_arxiv_true_num_tokens": 64.0, "step": 175000 }, { "epoch": 33.6, "eval_python_code_alpaca_accuracy": 0.16403125, "eval_python_code_alpaca_bleu_score": 4.810836075629939, "eval_python_code_alpaca_bleu_score_sem": 0.1453644665987554, "eval_python_code_alpaca_emb_cos_sim": 0.7554128170013428, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009746200568697115, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.856598138809204, "eval_python_code_alpaca_n_ngrams_match_1": 9.954, "eval_python_code_alpaca_n_ngrams_match_2": 3.094, "eval_python_code_alpaca_n_ngrams_match_3": 1.08, "eval_python_code_alpaca_num_pred_words": 43.588, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.402226169440514, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33799230051064105, "eval_python_code_alpaca_runtime": 9.9203, "eval_python_code_alpaca_samples_per_second": 50.401, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4858620096599266, "eval_python_code_alpaca_token_set_f1_sem": 0.005469811336237878, "eval_python_code_alpaca_token_set_precision": 0.5479346926760923, "eval_python_code_alpaca_token_set_recall": 0.46111429326864967, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 175000 }, { "epoch": 33.6, "eval_wikibio_accuracy": 0.3253125, "eval_wikibio_bleu_score": 6.021814688120567, "eval_wikibio_bleu_score_sem": 0.20294861340148074, "eval_wikibio_emb_cos_sim": 0.7521679401397705, "eval_wikibio_emb_cos_sim_sem": 0.008066229162686404, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6546378135681152, "eval_wikibio_n_ngrams_match_1": 10.244, "eval_wikibio_n_ngrams_match_2": 3.438, "eval_wikibio_n_ngrams_match_3": 1.236, "eval_wikibio_num_pred_words": 35.88, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.65351879953629, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36374613330735894, "eval_wikibio_runtime": 9.9831, "eval_wikibio_samples_per_second": 50.085, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.32760757770848686, "eval_wikibio_token_set_f1_sem": 0.005100098169329544, "eval_wikibio_token_set_precision": 0.33468626927912914, "eval_wikibio_token_set_recall": 0.3363332916495507, "eval_wikibio_true_num_tokens": 61.1328125, "step": 175000 }, { "epoch": 33.6, "eval_nq_accuracy": 0.5323125, "eval_nq_bleu_score": 12.059236473751525, "eval_nq_bleu_score_sem": 0.5007571574064967, "eval_nq_emb_cos_sim": 0.838355541229248, "eval_nq_emb_cos_sim_sem": 0.006863833575320251, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1420786380767822, "eval_nq_n_ngrams_match_1": 23.284, "eval_nq_n_ngrams_match_2": 8.682, "eval_nq_n_ngrams_match_3": 4.052, "eval_nq_num_pred_words": 49.132, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.517123258216118, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45210621431924725, "eval_nq_runtime": 11.0339, "eval_nq_samples_per_second": 45.315, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4652564768416831, "eval_nq_token_set_f1_sem": 0.005004650064946198, "eval_nq_token_set_precision": 0.42371902368203906, "eval_nq_token_set_recall": 0.5252669319679266, "eval_nq_true_num_tokens": 64.0, "step": 175000 }, { "epoch": 33.6, "learning_rate": 0.001, "loss": 2.5234, "step": 175008 }, { "epoch": 33.61, "learning_rate": 0.001, "loss": 2.5273, "step": 175020 }, { "epoch": 33.61, "learning_rate": 0.001, "loss": 2.5185, "step": 175032 }, { "epoch": 33.61, "learning_rate": 0.001, "loss": 2.5203, "step": 175044 }, { "epoch": 33.61, "learning_rate": 0.001, "loss": 2.5205, "step": 175056 }, { "epoch": 33.62, "learning_rate": 0.001, "loss": 2.5138, "step": 175068 }, { "epoch": 33.62, "learning_rate": 0.001, "loss": 2.5188, "step": 175080 }, { "epoch": 33.62, "learning_rate": 0.001, "loss": 2.5048, "step": 175092 }, { "epoch": 33.62, "learning_rate": 0.001, "loss": 2.5126, "step": 175104 }, { "epoch": 33.62, "learning_rate": 0.001, "loss": 2.5155, "step": 175116 }, { "epoch": 33.63, "learning_rate": 0.001, "loss": 2.5185, "step": 175128 }, { "epoch": 33.63, "learning_rate": 0.001, "loss": 2.5144, "step": 175140 }, { "epoch": 33.63, "learning_rate": 0.001, "loss": 2.5278, "step": 175152 }, { "epoch": 33.63, "learning_rate": 0.001, "loss": 2.5108, "step": 175164 }, { "epoch": 33.64, "learning_rate": 0.001, "loss": 2.5152, "step": 175176 }, { "epoch": 33.64, "learning_rate": 0.001, "loss": 2.5177, "step": 175188 }, { "epoch": 33.64, "learning_rate": 0.001, "loss": 2.5062, "step": 175200 }, { "epoch": 33.64, "learning_rate": 0.001, "loss": 2.5205, "step": 175212 }, { "epoch": 33.65, "learning_rate": 0.001, "loss": 2.5198, "step": 175224 }, { "epoch": 33.65, "learning_rate": 0.001, "loss": 2.5177, "step": 175236 }, { "epoch": 33.65, "learning_rate": 0.001, "loss": 2.5198, "step": 175248 }, { "epoch": 33.65, "learning_rate": 0.001, "loss": 2.5199, "step": 175260 }, { "epoch": 33.65, "learning_rate": 0.001, "loss": 2.5203, "step": 175272 }, { "epoch": 33.66, "learning_rate": 0.001, "loss": 2.5143, "step": 175284 }, { "epoch": 33.66, "learning_rate": 0.001, "loss": 2.5171, "step": 175296 }, { "epoch": 33.66, "learning_rate": 0.001, "loss": 2.5205, "step": 175308 }, { "epoch": 33.66, "learning_rate": 0.001, "loss": 2.5192, "step": 175320 }, { "epoch": 33.67, "learning_rate": 0.001, "loss": 2.5217, "step": 175332 }, { "epoch": 33.67, "learning_rate": 0.001, "loss": 2.5147, "step": 175344 }, { "epoch": 33.67, "learning_rate": 0.001, "loss": 2.5221, "step": 175356 }, { "epoch": 33.67, "learning_rate": 0.001, "loss": 2.5154, "step": 175368 }, { "epoch": 33.68, "learning_rate": 0.001, "loss": 2.5251, "step": 175380 }, { "epoch": 33.68, "learning_rate": 0.001, "loss": 2.5294, "step": 175392 }, { "epoch": 33.68, "learning_rate": 0.001, "loss": 2.5215, "step": 175404 }, { "epoch": 33.68, "learning_rate": 0.001, "loss": 2.525, "step": 175416 }, { "epoch": 33.68, "learning_rate": 0.001, "loss": 2.5191, "step": 175428 }, { "epoch": 33.69, "learning_rate": 0.001, "loss": 2.5176, "step": 175440 }, { "epoch": 33.69, "learning_rate": 0.001, "loss": 2.519, "step": 175452 }, { "epoch": 33.69, "learning_rate": 0.001, "loss": 2.5254, "step": 175464 }, { "epoch": 33.69, "learning_rate": 0.001, "loss": 2.5235, "step": 175476 }, { "epoch": 33.7, "learning_rate": 0.001, "loss": 2.5262, "step": 175488 }, { "epoch": 33.7, "learning_rate": 0.001, "loss": 2.5173, "step": 175500 }, { "epoch": 33.7, "learning_rate": 0.001, "loss": 2.5245, "step": 175512 }, { "epoch": 33.7, "learning_rate": 0.001, "loss": 2.5244, "step": 175524 }, { "epoch": 33.71, "learning_rate": 0.001, "loss": 2.5276, "step": 175536 }, { "epoch": 33.71, "learning_rate": 0.001, "loss": 2.5195, "step": 175548 }, { "epoch": 33.71, "learning_rate": 0.001, "loss": 2.5128, "step": 175560 }, { "epoch": 33.71, "learning_rate": 0.001, "loss": 2.5237, "step": 175572 }, { "epoch": 33.71, "learning_rate": 0.001, "loss": 2.5129, "step": 175584 }, { "epoch": 33.72, "learning_rate": 0.001, "loss": 2.5206, "step": 175596 }, { "epoch": 33.72, "learning_rate": 0.001, "loss": 2.5264, "step": 175608 }, { "epoch": 33.72, "learning_rate": 0.001, "loss": 2.5301, "step": 175620 }, { "epoch": 33.72, "eval_ag_news_accuracy": 0.32684375, "eval_ag_news_bleu_score": 4.855373379192692, "eval_ag_news_bleu_score_sem": 0.14698318106273012, "eval_ag_news_emb_cos_sim": 0.8197398781776428, "eval_ag_news_emb_cos_sim_sem": 0.006081692116990319, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4985544681549072, "eval_ag_news_n_ngrams_match_1": 14.406, "eval_ag_news_n_ngrams_match_2": 3.152, "eval_ag_news_n_ngrams_match_3": 0.878, "eval_ag_news_num_pred_words": 46.648, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.06761710005661, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.359196732256724, "eval_ag_news_runtime": 10.7933, "eval_ag_news_samples_per_second": 46.325, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.3588689648036829, "eval_ag_news_token_set_f1_sem": 0.004311211936397491, "eval_ag_news_token_set_precision": 0.34603148979227266, "eval_ag_news_token_set_recall": 0.3859730557537426, "eval_ag_news_true_num_tokens": 56.09375, "step": 175625 }, { "epoch": 33.72, "eval_anthropic_toxic_prompts_accuracy": 0.11371875, "eval_anthropic_toxic_prompts_bleu_score": 3.301903666975883, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12393022775966903, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.683284342288971, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008835675858535952, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.238818883895874, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.312, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.78, "eval_anthropic_toxic_prompts_num_pred_words": 46.702, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.503581260608513, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21895779495330242, "eval_anthropic_toxic_prompts_runtime": 9.8158, "eval_anthropic_toxic_prompts_samples_per_second": 50.938, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3550808478742661, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006314207547264958, "eval_anthropic_toxic_prompts_token_set_precision": 0.4456959496262504, "eval_anthropic_toxic_prompts_token_set_recall": 0.3198729423060755, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 175625 }, { "epoch": 33.72, "eval_arxiv_accuracy": 0.34953125, "eval_arxiv_bleu_score": 4.331818024119469, "eval_arxiv_bleu_score_sem": 0.12788145618654687, "eval_arxiv_emb_cos_sim": 0.7702836394309998, "eval_arxiv_emb_cos_sim_sem": 0.007456664561029406, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.35262393951416, "eval_arxiv_n_ngrams_match_1": 15.19, "eval_arxiv_n_ngrams_match_2": 2.916, "eval_arxiv_n_ngrams_match_3": 0.644, "eval_arxiv_num_pred_words": 40.098, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.57762130021002, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36624769462691664, "eval_arxiv_runtime": 9.9939, "eval_arxiv_samples_per_second": 50.031, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.357499176963368, "eval_arxiv_token_set_f1_sem": 0.004310979759387343, "eval_arxiv_token_set_precision": 0.3097404851695088, "eval_arxiv_token_set_recall": 0.4439020351908504, "eval_arxiv_true_num_tokens": 64.0, "step": 175625 }, { "epoch": 33.72, "eval_python_code_alpaca_accuracy": 0.163375, "eval_python_code_alpaca_bleu_score": 4.969876834086027, "eval_python_code_alpaca_bleu_score_sem": 0.15598222238486606, "eval_python_code_alpaca_emb_cos_sim": 0.7668463587760925, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008543491514145162, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.864105463027954, "eval_python_code_alpaca_n_ngrams_match_1": 10.132, "eval_python_code_alpaca_n_ngrams_match_2": 3.104, "eval_python_code_alpaca_n_ngrams_match_3": 1.106, "eval_python_code_alpaca_num_pred_words": 43.472, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.53336194692209, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3447611111215496, "eval_python_code_alpaca_runtime": 10.0089, "eval_python_code_alpaca_samples_per_second": 49.956, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.48897683964688116, "eval_python_code_alpaca_token_set_f1_sem": 0.005347935129504301, "eval_python_code_alpaca_token_set_precision": 0.5561481516871388, "eval_python_code_alpaca_token_set_recall": 0.4571091325355401, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 175625 }, { "epoch": 33.72, "eval_wikibio_accuracy": 0.325375, "eval_wikibio_bleu_score": 6.221671890476333, "eval_wikibio_bleu_score_sem": 0.22448468653591203, "eval_wikibio_emb_cos_sim": 0.7493537068367004, "eval_wikibio_emb_cos_sim_sem": 0.008246055277303212, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6775736808776855, "eval_wikibio_n_ngrams_match_1": 10.252, "eval_wikibio_n_ngrams_match_2": 3.5, "eval_wikibio_n_ngrams_match_3": 1.326, "eval_wikibio_num_pred_words": 35.868, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.550315873823486, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36321856530567737, "eval_wikibio_runtime": 9.9853, "eval_wikibio_samples_per_second": 50.073, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3252007598847834, "eval_wikibio_token_set_f1_sem": 0.005536074977434512, "eval_wikibio_token_set_precision": 0.3323976521740242, "eval_wikibio_token_set_recall": 0.33309783714640956, "eval_wikibio_true_num_tokens": 61.1328125, "step": 175625 }, { "epoch": 33.72, "eval_nq_accuracy": 0.533, "eval_nq_bleu_score": 12.243123045485252, "eval_nq_bleu_score_sem": 0.4840684804050873, "eval_nq_emb_cos_sim": 0.8390569686889648, "eval_nq_emb_cos_sim_sem": 0.007764245017472323, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1423661708831787, "eval_nq_n_ngrams_match_1": 23.632, "eval_nq_n_ngrams_match_2": 8.802, "eval_nq_n_ngrams_match_3": 4.058, "eval_nq_num_pred_words": 49.206, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.519572562679796, "eval_nq_pred_num_tokens": 62.9921875, "eval_nq_rouge_score": 0.4584658117502919, "eval_nq_runtime": 10.4933, "eval_nq_samples_per_second": 47.649, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.4710025770467909, "eval_nq_token_set_f1_sem": 0.004918792561906478, "eval_nq_token_set_precision": 0.42980353589918835, "eval_nq_token_set_recall": 0.5285762419652889, "eval_nq_true_num_tokens": 64.0, "step": 175625 }, { "epoch": 33.72, "learning_rate": 0.001, "loss": 2.5136, "step": 175632 }, { "epoch": 33.73, "learning_rate": 0.001, "loss": 2.522, "step": 175644 }, { "epoch": 33.73, "learning_rate": 0.001, "loss": 2.5258, "step": 175656 }, { "epoch": 33.73, "learning_rate": 0.001, "loss": 2.5232, "step": 175668 }, { "epoch": 33.73, "learning_rate": 0.001, "loss": 2.5174, "step": 175680 }, { "epoch": 33.74, "learning_rate": 0.001, "loss": 2.5215, "step": 175692 }, { "epoch": 33.74, "learning_rate": 0.001, "loss": 2.5222, "step": 175704 }, { "epoch": 33.74, "learning_rate": 0.001, "loss": 2.5195, "step": 175716 }, { "epoch": 33.74, "learning_rate": 0.001, "loss": 2.5267, "step": 175728 }, { "epoch": 33.74, "learning_rate": 0.001, "loss": 2.5165, "step": 175740 }, { "epoch": 33.75, "learning_rate": 0.001, "loss": 2.5208, "step": 175752 }, { "epoch": 33.75, "learning_rate": 0.001, "loss": 2.5211, "step": 175764 }, { "epoch": 33.75, "learning_rate": 0.001, "loss": 2.5285, "step": 175776 }, { "epoch": 33.75, "learning_rate": 0.001, "loss": 2.5247, "step": 175788 }, { "epoch": 33.76, "learning_rate": 0.001, "loss": 2.5144, "step": 175800 }, { "epoch": 33.76, "learning_rate": 0.001, "loss": 2.512, "step": 175812 }, { "epoch": 33.76, "learning_rate": 0.001, "loss": 2.5152, "step": 175824 }, { "epoch": 33.76, "learning_rate": 0.001, "loss": 2.5216, "step": 175836 }, { "epoch": 33.76, "learning_rate": 0.001, "loss": 2.5169, "step": 175848 }, { "epoch": 33.77, "learning_rate": 0.001, "loss": 2.5137, "step": 175860 }, { "epoch": 33.77, "learning_rate": 0.001, "loss": 2.5185, "step": 175872 }, { "epoch": 33.77, "learning_rate": 0.001, "loss": 2.5158, "step": 175884 }, { "epoch": 33.77, "learning_rate": 0.001, "loss": 2.5165, "step": 175896 }, { "epoch": 33.78, "learning_rate": 0.001, "loss": 2.5142, "step": 175908 }, { "epoch": 33.78, "learning_rate": 0.001, "loss": 2.5094, "step": 175920 }, { "epoch": 33.78, "learning_rate": 0.001, "loss": 2.5178, "step": 175932 }, { "epoch": 33.78, "learning_rate": 0.001, "loss": 2.5248, "step": 175944 }, { "epoch": 33.79, "learning_rate": 0.001, "loss": 2.5165, "step": 175956 }, { "epoch": 33.79, "learning_rate": 0.001, "loss": 2.5083, "step": 175968 }, { "epoch": 33.79, "learning_rate": 0.001, "loss": 2.5243, "step": 175980 }, { "epoch": 33.79, "learning_rate": 0.001, "loss": 2.5194, "step": 175992 }, { "epoch": 33.79, "learning_rate": 0.001, "loss": 2.5314, "step": 176004 }, { "epoch": 33.8, "learning_rate": 0.001, "loss": 2.5198, "step": 176016 }, { "epoch": 33.8, "learning_rate": 0.001, "loss": 2.5204, "step": 176028 }, { "epoch": 33.8, "learning_rate": 0.001, "loss": 2.5191, "step": 176040 }, { "epoch": 33.8, "learning_rate": 0.001, "loss": 2.5157, "step": 176052 }, { "epoch": 33.81, "learning_rate": 0.001, "loss": 2.5281, "step": 176064 }, { "epoch": 33.81, "learning_rate": 0.001, "loss": 2.5202, "step": 176076 }, { "epoch": 33.81, "learning_rate": 0.001, "loss": 2.5195, "step": 176088 }, { "epoch": 33.81, "learning_rate": 0.001, "loss": 2.5224, "step": 176100 }, { "epoch": 33.82, "learning_rate": 0.001, "loss": 2.5124, "step": 176112 }, { "epoch": 33.82, "learning_rate": 0.001, "loss": 2.5166, "step": 176124 }, { "epoch": 33.82, "learning_rate": 0.001, "loss": 2.5163, "step": 176136 }, { "epoch": 33.82, "learning_rate": 0.001, "loss": 2.5309, "step": 176148 }, { "epoch": 33.82, "learning_rate": 0.001, "loss": 2.5094, "step": 176160 }, { "epoch": 33.83, "learning_rate": 0.001, "loss": 2.5286, "step": 176172 }, { "epoch": 33.83, "learning_rate": 0.001, "loss": 2.5173, "step": 176184 }, { "epoch": 33.83, "learning_rate": 0.001, "loss": 2.5141, "step": 176196 }, { "epoch": 33.83, "learning_rate": 0.001, "loss": 2.524, "step": 176208 }, { "epoch": 33.84, "learning_rate": 0.001, "loss": 2.5194, "step": 176220 }, { "epoch": 33.84, "learning_rate": 0.001, "loss": 2.5331, "step": 176232 }, { "epoch": 33.84, "learning_rate": 0.001, "loss": 2.5205, "step": 176244 }, { "epoch": 33.84, "eval_ag_news_accuracy": 0.32653125, "eval_ag_news_bleu_score": 5.022316794581068, "eval_ag_news_bleu_score_sem": 0.1610563486577137, "eval_ag_news_emb_cos_sim": 0.8117334246635437, "eval_ag_news_emb_cos_sim_sem": 0.007804921336096892, "eval_ag_news_emb_top1_equal": 0.15625, "eval_ag_news_emb_top1_equal_sem": 0.03221922156442571, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4926795959472656, "eval_ag_news_n_ngrams_match_1": 14.332, "eval_ag_news_n_ngrams_match_2": 3.17, "eval_ag_news_n_ngrams_match_3": 0.962, "eval_ag_news_num_pred_words": 46.34, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.87391860943298, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35827660389767435, "eval_ag_news_runtime": 10.3068, "eval_ag_news_samples_per_second": 48.512, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3569641546797145, "eval_ag_news_token_set_f1_sem": 0.004364326287541326, "eval_ag_news_token_set_precision": 0.34392343268458236, "eval_ag_news_token_set_recall": 0.38552168292441336, "eval_ag_news_true_num_tokens": 56.09375, "step": 176250 }, { "epoch": 33.84, "eval_anthropic_toxic_prompts_accuracy": 0.11565625, "eval_anthropic_toxic_prompts_bleu_score": 3.101782117487336, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11736410045728099, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6723465919494629, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008566576133327989, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.19783353805542, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.206, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.898, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732, "eval_anthropic_toxic_prompts_num_pred_words": 47.23, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.479438934898205, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21430135132486505, "eval_anthropic_toxic_prompts_runtime": 10.1328, "eval_anthropic_toxic_prompts_samples_per_second": 49.345, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.35964724783009494, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006470879887319756, "eval_anthropic_toxic_prompts_token_set_precision": 0.4402183209964053, "eval_anthropic_toxic_prompts_token_set_recall": 0.32932754974213424, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 176250 }, { "epoch": 33.84, "eval_arxiv_accuracy": 0.349875, "eval_arxiv_bleu_score": 4.333335033387184, "eval_arxiv_bleu_score_sem": 0.13070468884873127, "eval_arxiv_emb_cos_sim": 0.7708592414855957, "eval_arxiv_emb_cos_sim_sem": 0.008004529469188995, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.363433599472046, "eval_arxiv_n_ngrams_match_1": 15.024, "eval_arxiv_n_ngrams_match_2": 2.974, "eval_arxiv_n_ngrams_match_3": 0.684, "eval_arxiv_num_pred_words": 39.702, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.88821133084713, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3638282134047197, "eval_arxiv_runtime": 10.4254, "eval_arxiv_samples_per_second": 47.96, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.3550458552555655, "eval_arxiv_token_set_f1_sem": 0.004273149787825975, "eval_arxiv_token_set_precision": 0.3055640869524656, "eval_arxiv_token_set_recall": 0.4463338959390171, "eval_arxiv_true_num_tokens": 64.0, "step": 176250 }, { "epoch": 33.84, "eval_python_code_alpaca_accuracy": 0.1631875, "eval_python_code_alpaca_bleu_score": 4.882627397241164, "eval_python_code_alpaca_bleu_score_sem": 0.1474335988021144, "eval_python_code_alpaca_emb_cos_sim": 0.7622191905975342, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008958193454434582, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.862119197845459, "eval_python_code_alpaca_n_ngrams_match_1": 10.102, "eval_python_code_alpaca_n_ngrams_match_2": 3.11, "eval_python_code_alpaca_n_ngrams_match_3": 1.114, "eval_python_code_alpaca_num_pred_words": 43.886, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.498570604409352, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34009863909271487, "eval_python_code_alpaca_runtime": 10.0054, "eval_python_code_alpaca_samples_per_second": 49.973, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4912797406295639, "eval_python_code_alpaca_token_set_f1_sem": 0.005359606041822242, "eval_python_code_alpaca_token_set_precision": 0.5515130652188471, "eval_python_code_alpaca_token_set_recall": 0.46187727605691214, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 176250 }, { "epoch": 33.84, "eval_wikibio_accuracy": 0.32375, "eval_wikibio_bleu_score": 5.862255681674758, "eval_wikibio_bleu_score_sem": 0.22913003328137857, "eval_wikibio_emb_cos_sim": 0.7266998291015625, "eval_wikibio_emb_cos_sim_sem": 0.010587300858282231, "eval_wikibio_emb_top1_equal": 0.140625, "eval_wikibio_emb_top1_equal_sem": 0.030847557647994725, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6650583744049072, "eval_wikibio_n_ngrams_match_1": 9.756, "eval_wikibio_n_ngrams_match_2": 3.254, "eval_wikibio_n_ngrams_match_3": 1.182, "eval_wikibio_num_pred_words": 34.968, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.05841610835043, "eval_wikibio_pred_num_tokens": 62.984375, "eval_wikibio_rouge_score": 0.346696465532531, "eval_wikibio_runtime": 10.0086, "eval_wikibio_samples_per_second": 49.957, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3127898146298991, "eval_wikibio_token_set_f1_sem": 0.005959211422264519, "eval_wikibio_token_set_precision": 0.31750633023827374, "eval_wikibio_token_set_recall": 0.32692533528118073, "eval_wikibio_true_num_tokens": 61.1328125, "step": 176250 }, { "epoch": 33.84, "eval_nq_accuracy": 0.53265625, "eval_nq_bleu_score": 12.032671736687595, "eval_nq_bleu_score_sem": 0.4864826348076925, "eval_nq_emb_cos_sim": 0.8336796760559082, "eval_nq_emb_cos_sim_sem": 0.007405400163805492, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.142033100128174, "eval_nq_n_ngrams_match_1": 23.406, "eval_nq_n_ngrams_match_2": 8.724, "eval_nq_n_ngrams_match_3": 4.014, "eval_nq_num_pred_words": 49.216, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.51673541472576, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4559461107007099, "eval_nq_runtime": 10.4438, "eval_nq_samples_per_second": 47.875, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46876594569524405, "eval_nq_token_set_f1_sem": 0.004906758692664311, "eval_nq_token_set_precision": 0.42689880034459693, "eval_nq_token_set_recall": 0.5284977305367066, "eval_nq_true_num_tokens": 64.0, "step": 176250 }, { "epoch": 33.84, "learning_rate": 0.001, "loss": 2.5226, "step": 176256 }, { "epoch": 33.85, "learning_rate": 0.001, "loss": 2.5169, "step": 176268 }, { "epoch": 33.85, "learning_rate": 0.001, "loss": 2.5184, "step": 176280 }, { "epoch": 33.85, "learning_rate": 0.001, "loss": 2.523, "step": 176292 }, { "epoch": 33.85, "learning_rate": 0.001, "loss": 2.5268, "step": 176304 }, { "epoch": 33.85, "learning_rate": 0.001, "loss": 2.5155, "step": 176316 }, { "epoch": 33.86, "learning_rate": 0.001, "loss": 2.5157, "step": 176328 }, { "epoch": 33.86, "learning_rate": 0.001, "loss": 2.5185, "step": 176340 }, { "epoch": 33.86, "learning_rate": 0.001, "loss": 2.5177, "step": 176352 }, { "epoch": 33.86, "learning_rate": 0.001, "loss": 2.5274, "step": 176364 }, { "epoch": 33.87, "learning_rate": 0.001, "loss": 2.5086, "step": 176376 }, { "epoch": 33.87, "learning_rate": 0.001, "loss": 2.5209, "step": 176388 }, { "epoch": 33.87, "learning_rate": 0.001, "loss": 2.5249, "step": 176400 }, { "epoch": 33.87, "learning_rate": 0.001, "loss": 2.5138, "step": 176412 }, { "epoch": 33.88, "learning_rate": 0.001, "loss": 2.5178, "step": 176424 }, { "epoch": 33.88, "learning_rate": 0.001, "loss": 2.5167, "step": 176436 }, { "epoch": 33.88, "learning_rate": 0.001, "loss": 2.5321, "step": 176448 }, { "epoch": 33.88, "learning_rate": 0.001, "loss": 2.5125, "step": 176460 }, { "epoch": 33.88, "learning_rate": 0.001, "loss": 2.5189, "step": 176472 }, { "epoch": 33.89, "learning_rate": 0.001, "loss": 2.5238, "step": 176484 }, { "epoch": 33.89, "learning_rate": 0.001, "loss": 2.5233, "step": 176496 }, { "epoch": 33.89, "learning_rate": 0.001, "loss": 2.5068, "step": 176508 }, { "epoch": 33.89, "learning_rate": 0.001, "loss": 2.5136, "step": 176520 }, { "epoch": 33.9, "learning_rate": 0.001, "loss": 2.5203, "step": 176532 }, { "epoch": 33.9, "learning_rate": 0.001, "loss": 2.5197, "step": 176544 }, { "epoch": 33.9, "learning_rate": 0.001, "loss": 2.5165, "step": 176556 }, { "epoch": 33.9, "learning_rate": 0.001, "loss": 2.5261, "step": 176568 }, { "epoch": 33.91, "learning_rate": 0.001, "loss": 2.5204, "step": 176580 }, { "epoch": 33.91, "learning_rate": 0.001, "loss": 2.5228, "step": 176592 }, { "epoch": 33.91, "learning_rate": 0.001, "loss": 2.5232, "step": 176604 }, { "epoch": 33.91, "learning_rate": 0.001, "loss": 2.5244, "step": 176616 }, { "epoch": 33.91, "learning_rate": 0.001, "loss": 2.5166, "step": 176628 }, { "epoch": 33.92, "learning_rate": 0.001, "loss": 2.5224, "step": 176640 }, { "epoch": 33.92, "learning_rate": 0.001, "loss": 2.5254, "step": 176652 }, { "epoch": 33.92, "learning_rate": 0.001, "loss": 2.5185, "step": 176664 }, { "epoch": 33.92, "learning_rate": 0.001, "loss": 2.5281, "step": 176676 }, { "epoch": 33.93, "learning_rate": 0.001, "loss": 2.5231, "step": 176688 }, { "epoch": 33.93, "learning_rate": 0.001, "loss": 2.5285, "step": 176700 }, { "epoch": 33.93, "learning_rate": 0.001, "loss": 2.5111, "step": 176712 }, { "epoch": 33.93, "learning_rate": 0.001, "loss": 2.5205, "step": 176724 }, { "epoch": 33.94, "learning_rate": 0.001, "loss": 2.5184, "step": 176736 }, { "epoch": 33.94, "learning_rate": 0.001, "loss": 2.5198, "step": 176748 }, { "epoch": 33.94, "learning_rate": 0.001, "loss": 2.5285, "step": 176760 }, { "epoch": 33.94, "learning_rate": 0.001, "loss": 2.5292, "step": 176772 }, { "epoch": 33.94, "learning_rate": 0.001, "loss": 2.5165, "step": 176784 }, { "epoch": 33.95, "learning_rate": 0.001, "loss": 2.5315, "step": 176796 }, { "epoch": 33.95, "learning_rate": 0.001, "loss": 2.5208, "step": 176808 }, { "epoch": 33.95, "learning_rate": 0.001, "loss": 2.5221, "step": 176820 }, { "epoch": 33.95, "learning_rate": 0.001, "loss": 2.516, "step": 176832 }, { "epoch": 33.96, "learning_rate": 0.001, "loss": 2.5194, "step": 176844 }, { "epoch": 33.96, "learning_rate": 0.001, "loss": 2.5233, "step": 176856 }, { "epoch": 33.96, "learning_rate": 0.001, "loss": 2.5263, "step": 176868 }, { "epoch": 33.96, "eval_ag_news_accuracy": 0.3281875, "eval_ag_news_bleu_score": 4.925127791586091, "eval_ag_news_bleu_score_sem": 0.15694905014873736, "eval_ag_news_emb_cos_sim": 0.8147540092468262, "eval_ag_news_emb_cos_sim_sem": 0.006866652145814417, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.496150016784668, "eval_ag_news_n_ngrams_match_1": 14.384, "eval_ag_news_n_ngrams_match_2": 3.19, "eval_ag_news_n_ngrams_match_3": 0.906, "eval_ag_news_num_pred_words": 46.64, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.988203134579116, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3561577784761314, "eval_ag_news_runtime": 10.2941, "eval_ag_news_samples_per_second": 48.572, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3600030201437264, "eval_ag_news_token_set_f1_sem": 0.00448956066480172, "eval_ag_news_token_set_precision": 0.3439573073316057, "eval_ag_news_token_set_recall": 0.39386164250470707, "eval_ag_news_true_num_tokens": 56.09375, "step": 176875 }, { "epoch": 33.96, "eval_anthropic_toxic_prompts_accuracy": 0.1148125, "eval_anthropic_toxic_prompts_bleu_score": 3.1461440631615756, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1162112442430475, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6758161783218384, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008089936106429272, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2554593086242676, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.216, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.702, "eval_anthropic_toxic_prompts_num_pred_words": 46.86, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.931522370968956, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21552557670242806, "eval_anthropic_toxic_prompts_runtime": 10.0285, "eval_anthropic_toxic_prompts_samples_per_second": 49.858, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.36147746707622597, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006401626003123946, "eval_anthropic_toxic_prompts_token_set_precision": 0.4391229271077412, "eval_anthropic_toxic_prompts_token_set_recall": 0.33594247230708824, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 176875 }, { "epoch": 33.96, "eval_arxiv_accuracy": 0.3485, "eval_arxiv_bleu_score": 4.475956653190206, "eval_arxiv_bleu_score_sem": 0.1307632635874717, "eval_arxiv_emb_cos_sim": 0.7735968232154846, "eval_arxiv_emb_cos_sim_sem": 0.007195954692853144, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3651084899902344, "eval_arxiv_n_ngrams_match_1": 15.386, "eval_arxiv_n_ngrams_match_2": 3.042, "eval_arxiv_n_ngrams_match_3": 0.698, "eval_arxiv_num_pred_words": 40.688, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.936636464170498, "eval_arxiv_pred_num_tokens": 62.9921875, "eval_arxiv_rouge_score": 0.36923529217214174, "eval_arxiv_runtime": 10.7478, "eval_arxiv_samples_per_second": 46.521, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.36256383516191165, "eval_arxiv_token_set_f1_sem": 0.004078160752144186, "eval_arxiv_token_set_precision": 0.3135278337275999, "eval_arxiv_token_set_recall": 0.4472464725676267, "eval_arxiv_true_num_tokens": 64.0, "step": 176875 }, { "epoch": 33.96, "eval_python_code_alpaca_accuracy": 0.1638125, "eval_python_code_alpaca_bleu_score": 4.7387667116625645, "eval_python_code_alpaca_bleu_score_sem": 0.14172375487910976, "eval_python_code_alpaca_emb_cos_sim": 0.7565352916717529, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009347629676130734, "eval_python_code_alpaca_emb_top1_equal": 0.078125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.023813825516515504, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.86161208152771, "eval_python_code_alpaca_n_ngrams_match_1": 9.958, "eval_python_code_alpaca_n_ngrams_match_2": 2.994, "eval_python_code_alpaca_n_ngrams_match_3": 1.052, "eval_python_code_alpaca_num_pred_words": 44.364, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.489699043365377, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3354474476398809, "eval_python_code_alpaca_runtime": 9.7477, "eval_python_code_alpaca_samples_per_second": 51.294, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.4876155805606222, "eval_python_code_alpaca_token_set_f1_sem": 0.0053307150896694236, "eval_python_code_alpaca_token_set_precision": 0.5457988485903871, "eval_python_code_alpaca_token_set_recall": 0.4602532240033791, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 176875 }, { "epoch": 33.96, "eval_wikibio_accuracy": 0.3266875, "eval_wikibio_bleu_score": 5.686736670012528, "eval_wikibio_bleu_score_sem": 0.20163567985024022, "eval_wikibio_emb_cos_sim": 0.7425272464752197, "eval_wikibio_emb_cos_sim_sem": 0.010202726670688516, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.669208288192749, "eval_wikibio_n_ngrams_match_1": 9.97, "eval_wikibio_n_ngrams_match_2": 3.244, "eval_wikibio_n_ngrams_match_3": 1.132, "eval_wikibio_num_pred_words": 35.54, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.22084196142814, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35275426820844197, "eval_wikibio_runtime": 10.0048, "eval_wikibio_samples_per_second": 49.976, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.316749368723803, "eval_wikibio_token_set_f1_sem": 0.0054844982113824185, "eval_wikibio_token_set_precision": 0.32155038904725625, "eval_wikibio_token_set_recall": 0.3311863377336578, "eval_wikibio_true_num_tokens": 61.1328125, "step": 176875 }, { "epoch": 33.96, "eval_nq_accuracy": 0.5338125, "eval_nq_bleu_score": 11.958889977560315, "eval_nq_bleu_score_sem": 0.49241591516146427, "eval_nq_emb_cos_sim": 0.826998233795166, "eval_nq_emb_cos_sim_sem": 0.007967327631388542, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1436386108398438, "eval_nq_n_ngrams_match_1": 23.222, "eval_nq_n_ngrams_match_2": 8.592, "eval_nq_n_ngrams_match_3": 4.026, "eval_nq_num_pred_words": 48.864, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.53042010718318, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45182487939989135, "eval_nq_runtime": 10.4291, "eval_nq_samples_per_second": 47.943, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46485089366770055, "eval_nq_token_set_f1_sem": 0.005049191163962152, "eval_nq_token_set_precision": 0.42159101839303115, "eval_nq_token_set_recall": 0.5278695553695189, "eval_nq_true_num_tokens": 64.0, "step": 176875 }, { "epoch": 33.96, "learning_rate": 0.001, "loss": 2.5192, "step": 176880 }, { "epoch": 33.97, "learning_rate": 0.001, "loss": 2.5177, "step": 176892 }, { "epoch": 33.97, "learning_rate": 0.001, "loss": 2.5186, "step": 176904 }, { "epoch": 33.97, "learning_rate": 0.001, "loss": 2.5316, "step": 176916 }, { "epoch": 33.97, "learning_rate": 0.001, "loss": 2.5152, "step": 176928 }, { "epoch": 33.97, "learning_rate": 0.001, "loss": 2.528, "step": 176940 }, { "epoch": 33.98, "learning_rate": 0.001, "loss": 2.5187, "step": 176952 }, { "epoch": 33.98, "learning_rate": 0.001, "loss": 2.5273, "step": 176964 }, { "epoch": 33.98, "learning_rate": 0.001, "loss": 2.5193, "step": 176976 }, { "epoch": 33.98, "learning_rate": 0.001, "loss": 2.5289, "step": 176988 }, { "epoch": 33.99, "learning_rate": 0.001, "loss": 2.5216, "step": 177000 }, { "epoch": 33.99, "learning_rate": 0.001, "loss": 2.5218, "step": 177012 }, { "epoch": 33.99, "learning_rate": 0.001, "loss": 2.5223, "step": 177024 }, { "epoch": 33.99, "learning_rate": 0.001, "loss": 2.5195, "step": 177036 }, { "epoch": 34.0, "learning_rate": 0.001, "loss": 2.5112, "step": 177048 }, { "epoch": 34.0, "learning_rate": 0.001, "loss": 2.5228, "step": 177060 }, { "epoch": 34.0, "learning_rate": 0.001, "loss": 2.5093, "step": 177072 }, { "epoch": 34.0, "learning_rate": 0.001, "loss": 2.5099, "step": 177084 }, { "epoch": 34.0, "learning_rate": 0.001, "loss": 2.5055, "step": 177096 }, { "epoch": 34.01, "learning_rate": 0.001, "loss": 2.5073, "step": 177108 }, { "epoch": 34.01, "learning_rate": 0.001, "loss": 2.5044, "step": 177120 }, { "epoch": 34.01, "learning_rate": 0.001, "loss": 2.501, "step": 177132 }, { "epoch": 34.01, "learning_rate": 0.001, "loss": 2.5081, "step": 177144 }, { "epoch": 34.02, "learning_rate": 0.001, "loss": 2.5133, "step": 177156 }, { "epoch": 34.02, "learning_rate": 0.001, "loss": 2.5015, "step": 177168 }, { "epoch": 34.02, "learning_rate": 0.001, "loss": 2.5029, "step": 177180 }, { "epoch": 34.02, "learning_rate": 0.001, "loss": 2.507, "step": 177192 }, { "epoch": 34.03, "learning_rate": 0.001, "loss": 2.5022, "step": 177204 }, { "epoch": 34.03, "learning_rate": 0.001, "loss": 2.5075, "step": 177216 }, { "epoch": 34.03, "learning_rate": 0.001, "loss": 2.505, "step": 177228 }, { "epoch": 34.03, "learning_rate": 0.001, "loss": 2.504, "step": 177240 }, { "epoch": 34.03, "learning_rate": 0.001, "loss": 2.5059, "step": 177252 }, { "epoch": 34.04, "learning_rate": 0.001, "loss": 2.5041, "step": 177264 }, { "epoch": 34.04, "learning_rate": 0.001, "loss": 2.4997, "step": 177276 }, { "epoch": 34.04, "learning_rate": 0.001, "loss": 2.5064, "step": 177288 }, { "epoch": 34.04, "learning_rate": 0.001, "loss": 2.5048, "step": 177300 }, { "epoch": 34.05, "learning_rate": 0.001, "loss": 2.507, "step": 177312 }, { "epoch": 34.05, "learning_rate": 0.001, "loss": 2.5025, "step": 177324 }, { "epoch": 34.05, "learning_rate": 0.001, "loss": 2.5081, "step": 177336 }, { "epoch": 34.05, "learning_rate": 0.001, "loss": 2.505, "step": 177348 }, { "epoch": 34.06, "learning_rate": 0.001, "loss": 2.5084, "step": 177360 }, { "epoch": 34.06, "learning_rate": 0.001, "loss": 2.5178, "step": 177372 }, { "epoch": 34.06, "learning_rate": 0.001, "loss": 2.5039, "step": 177384 }, { "epoch": 34.06, "learning_rate": 0.001, "loss": 2.5012, "step": 177396 }, { "epoch": 34.06, "learning_rate": 0.001, "loss": 2.5051, "step": 177408 }, { "epoch": 34.07, "learning_rate": 0.001, "loss": 2.5089, "step": 177420 }, { "epoch": 34.07, "learning_rate": 0.001, "loss": 2.5111, "step": 177432 }, { "epoch": 34.07, "learning_rate": 0.001, "loss": 2.5061, "step": 177444 }, { "epoch": 34.07, "learning_rate": 0.001, "loss": 2.5029, "step": 177456 }, { "epoch": 34.08, "learning_rate": 0.001, "loss": 2.5039, "step": 177468 }, { "epoch": 34.08, "learning_rate": 0.001, "loss": 2.5067, "step": 177480 }, { "epoch": 34.08, "learning_rate": 0.001, "loss": 2.5231, "step": 177492 }, { "epoch": 34.08, "eval_ag_news_accuracy": 0.3284375, "eval_ag_news_bleu_score": 4.9737798732915905, "eval_ag_news_bleu_score_sem": 0.16287397778811172, "eval_ag_news_emb_cos_sim": 0.8183648586273193, "eval_ag_news_emb_cos_sim_sem": 0.006624846456284306, "eval_ag_news_emb_top1_equal": 0.1796875, "eval_ag_news_emb_top1_equal_sem": 0.034068008879424266, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.50203800201416, "eval_ag_news_n_ngrams_match_1": 14.354, "eval_ag_news_n_ngrams_match_2": 3.19, "eval_ag_news_n_ngrams_match_3": 0.952, "eval_ag_news_num_pred_words": 46.748, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.183010134950294, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35565082562172046, "eval_ag_news_runtime": 10.3408, "eval_ag_news_samples_per_second": 48.352, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3544618815318247, "eval_ag_news_token_set_f1_sem": 0.0045137701411784685, "eval_ag_news_token_set_precision": 0.34262575645219906, "eval_ag_news_token_set_recall": 0.3806784263670829, "eval_ag_news_true_num_tokens": 56.09375, "step": 177500 }, { "epoch": 34.08, "eval_anthropic_toxic_prompts_accuracy": 0.11478125, "eval_anthropic_toxic_prompts_bleu_score": 3.071852098732515, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11282711917908927, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.673026978969574, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00923623621406095, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2352213859558105, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.16, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.668, "eval_anthropic_toxic_prompts_num_pred_words": 47.088, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.411997015399496, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21148223741805833, "eval_anthropic_toxic_prompts_runtime": 9.8661, "eval_anthropic_toxic_prompts_samples_per_second": 50.679, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3574934733935281, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006395347285549812, "eval_anthropic_toxic_prompts_token_set_precision": 0.43499871099049386, "eval_anthropic_toxic_prompts_token_set_recall": 0.3298577965119157, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 177500 }, { "epoch": 34.08, "eval_arxiv_accuracy": 0.3490625, "eval_arxiv_bleu_score": 4.496504092248576, "eval_arxiv_bleu_score_sem": 0.13611944155457134, "eval_arxiv_emb_cos_sim": 0.7688542604446411, "eval_arxiv_emb_cos_sim_sem": 0.007601529864806429, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3651444911956787, "eval_arxiv_n_ngrams_match_1": 15.204, "eval_arxiv_n_ngrams_match_2": 3.032, "eval_arxiv_n_ngrams_match_3": 0.71, "eval_arxiv_num_pred_words": 40.39, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.937678236717133, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3635921477805558, "eval_arxiv_runtime": 10.1523, "eval_arxiv_samples_per_second": 49.25, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.3564108125492474, "eval_arxiv_token_set_f1_sem": 0.004195536513763176, "eval_arxiv_token_set_precision": 0.3090003919823345, "eval_arxiv_token_set_recall": 0.4416617371632523, "eval_arxiv_true_num_tokens": 64.0, "step": 177500 }, { "epoch": 34.08, "eval_python_code_alpaca_accuracy": 0.16446875, "eval_python_code_alpaca_bleu_score": 4.789573884652132, "eval_python_code_alpaca_bleu_score_sem": 0.14805177833650254, "eval_python_code_alpaca_emb_cos_sim": 0.7696361541748047, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007880690174462844, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.837475061416626, "eval_python_code_alpaca_n_ngrams_match_1": 10.036, "eval_python_code_alpaca_n_ngrams_match_2": 3.09, "eval_python_code_alpaca_n_ngrams_match_3": 1.108, "eval_python_code_alpaca_num_pred_words": 44.442, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.07260379366368, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33634832860525293, "eval_python_code_alpaca_runtime": 9.8934, "eval_python_code_alpaca_samples_per_second": 50.539, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.49063320634887125, "eval_python_code_alpaca_token_set_f1_sem": 0.0054646803767432, "eval_python_code_alpaca_token_set_precision": 0.5487526530466039, "eval_python_code_alpaca_token_set_recall": 0.4669653512892527, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 177500 }, { "epoch": 34.08, "eval_wikibio_accuracy": 0.325, "eval_wikibio_bleu_score": 5.767346121615908, "eval_wikibio_bleu_score_sem": 0.21379359768355632, "eval_wikibio_emb_cos_sim": 0.7263277769088745, "eval_wikibio_emb_cos_sim_sem": 0.011270721885053998, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6769955158233643, "eval_wikibio_n_ngrams_match_1": 9.558, "eval_wikibio_n_ngrams_match_2": 3.196, "eval_wikibio_n_ngrams_match_3": 1.15, "eval_wikibio_num_pred_words": 35.01, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.52745587236166, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3406878190148007, "eval_wikibio_runtime": 9.9711, "eval_wikibio_samples_per_second": 50.145, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.30822373431140476, "eval_wikibio_token_set_f1_sem": 0.0058045013490851845, "eval_wikibio_token_set_precision": 0.3104517335616784, "eval_wikibio_token_set_recall": 0.3290589404976437, "eval_wikibio_true_num_tokens": 61.1328125, "step": 177500 }, { "epoch": 34.08, "eval_nq_accuracy": 0.53259375, "eval_nq_bleu_score": 11.95770555791102, "eval_nq_bleu_score_sem": 0.4855122276489385, "eval_nq_emb_cos_sim": 0.8312137126922607, "eval_nq_emb_cos_sim_sem": 0.0075484274698607344, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1430771350860596, "eval_nq_n_ngrams_match_1": 23.364, "eval_nq_n_ngrams_match_2": 8.626, "eval_nq_n_ngrams_match_3": 3.964, "eval_nq_num_pred_words": 49.032, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.525631827500668, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45439529064672496, "eval_nq_runtime": 10.8732, "eval_nq_samples_per_second": 45.985, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.46639776023598045, "eval_nq_token_set_f1_sem": 0.00483672933589931, "eval_nq_token_set_precision": 0.4247555007065695, "eval_nq_token_set_recall": 0.5265175502371234, "eval_nq_true_num_tokens": 64.0, "step": 177500 }, { "epoch": 34.08, "learning_rate": 0.001, "loss": 2.5086, "step": 177504 }, { "epoch": 34.09, "learning_rate": 0.001, "loss": 2.5041, "step": 177516 }, { "epoch": 34.09, "learning_rate": 0.001, "loss": 2.5062, "step": 177528 }, { "epoch": 34.09, "learning_rate": 0.001, "loss": 2.499, "step": 177540 }, { "epoch": 34.09, "learning_rate": 0.001, "loss": 2.5103, "step": 177552 }, { "epoch": 34.09, "learning_rate": 0.001, "loss": 2.5129, "step": 177564 }, { "epoch": 34.1, "learning_rate": 0.001, "loss": 2.5155, "step": 177576 }, { "epoch": 34.1, "learning_rate": 0.001, "loss": 2.5001, "step": 177588 }, { "epoch": 34.1, "learning_rate": 0.001, "loss": 2.5078, "step": 177600 }, { "epoch": 34.1, "learning_rate": 0.001, "loss": 2.5151, "step": 177612 }, { "epoch": 34.11, "learning_rate": 0.001, "loss": 2.5145, "step": 177624 }, { "epoch": 34.11, "learning_rate": 0.001, "loss": 2.5124, "step": 177636 }, { "epoch": 34.11, "learning_rate": 0.001, "loss": 2.5091, "step": 177648 }, { "epoch": 34.11, "learning_rate": 0.001, "loss": 2.5003, "step": 177660 }, { "epoch": 34.12, "learning_rate": 0.001, "loss": 2.5025, "step": 177672 }, { "epoch": 34.12, "learning_rate": 0.001, "loss": 2.5122, "step": 177684 }, { "epoch": 34.12, "learning_rate": 0.001, "loss": 2.5131, "step": 177696 }, { "epoch": 34.12, "learning_rate": 0.001, "loss": 2.5149, "step": 177708 }, { "epoch": 34.12, "learning_rate": 0.001, "loss": 2.505, "step": 177720 }, { "epoch": 34.13, "learning_rate": 0.001, "loss": 2.5087, "step": 177732 }, { "epoch": 34.13, "learning_rate": 0.001, "loss": 2.5151, "step": 177744 }, { "epoch": 34.13, "learning_rate": 0.001, "loss": 2.5049, "step": 177756 }, { "epoch": 34.13, "learning_rate": 0.001, "loss": 2.4998, "step": 177768 }, { "epoch": 34.14, "learning_rate": 0.001, "loss": 2.5213, "step": 177780 }, { "epoch": 34.14, "learning_rate": 0.001, "loss": 2.5087, "step": 177792 }, { "epoch": 34.14, "learning_rate": 0.001, "loss": 2.5168, "step": 177804 }, { "epoch": 34.14, "learning_rate": 0.001, "loss": 2.515, "step": 177816 }, { "epoch": 34.15, "learning_rate": 0.001, "loss": 2.497, "step": 177828 }, { "epoch": 34.15, "learning_rate": 0.001, "loss": 2.5162, "step": 177840 }, { "epoch": 34.15, "learning_rate": 0.001, "loss": 2.5117, "step": 177852 }, { "epoch": 34.15, "learning_rate": 0.001, "loss": 2.5175, "step": 177864 }, { "epoch": 34.15, "learning_rate": 0.001, "loss": 2.5143, "step": 177876 }, { "epoch": 34.16, "learning_rate": 0.001, "loss": 2.5139, "step": 177888 }, { "epoch": 34.16, "learning_rate": 0.001, "loss": 2.5095, "step": 177900 }, { "epoch": 34.16, "learning_rate": 0.001, "loss": 2.5202, "step": 177912 }, { "epoch": 34.16, "learning_rate": 0.001, "loss": 2.5148, "step": 177924 }, { "epoch": 34.17, "learning_rate": 0.001, "loss": 2.5181, "step": 177936 }, { "epoch": 34.17, "learning_rate": 0.001, "loss": 2.5074, "step": 177948 }, { "epoch": 34.17, "learning_rate": 0.001, "loss": 2.5127, "step": 177960 }, { "epoch": 34.17, "learning_rate": 0.001, "loss": 2.5178, "step": 177972 }, { "epoch": 34.18, "learning_rate": 0.001, "loss": 2.5098, "step": 177984 }, { "epoch": 34.18, "learning_rate": 0.001, "loss": 2.5083, "step": 177996 }, { "epoch": 34.18, "learning_rate": 0.001, "loss": 2.5024, "step": 178008 }, { "epoch": 34.18, "learning_rate": 0.001, "loss": 2.5019, "step": 178020 }, { "epoch": 34.18, "learning_rate": 0.001, "loss": 2.5042, "step": 178032 }, { "epoch": 34.19, "learning_rate": 0.001, "loss": 2.5016, "step": 178044 }, { "epoch": 34.19, "learning_rate": 0.001, "loss": 2.5119, "step": 178056 }, { "epoch": 34.19, "learning_rate": 0.001, "loss": 2.5181, "step": 178068 }, { "epoch": 34.19, "learning_rate": 0.001, "loss": 2.5107, "step": 178080 }, { "epoch": 34.2, "learning_rate": 0.001, "loss": 2.5086, "step": 178092 }, { "epoch": 34.2, "learning_rate": 0.001, "loss": 2.5104, "step": 178104 }, { "epoch": 34.2, "learning_rate": 0.001, "loss": 2.5109, "step": 178116 }, { "epoch": 34.2, "eval_ag_news_accuracy": 0.327, "eval_ag_news_bleu_score": 4.914052912154212, "eval_ag_news_bleu_score_sem": 0.15733180499147514, "eval_ag_news_emb_cos_sim": 0.8192166090011597, "eval_ag_news_emb_cos_sim_sem": 0.00686424516563774, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5027143955230713, "eval_ag_news_n_ngrams_match_1": 14.314, "eval_ag_news_n_ngrams_match_2": 3.288, "eval_ag_news_n_ngrams_match_3": 0.962, "eval_ag_news_num_pred_words": 47.322, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.205462500072706, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35335455342206823, "eval_ag_news_runtime": 10.6415, "eval_ag_news_samples_per_second": 46.986, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.3532114353394258, "eval_ag_news_token_set_f1_sem": 0.004458795364485328, "eval_ag_news_token_set_precision": 0.34206116848091156, "eval_ag_news_token_set_recall": 0.3817815058903693, "eval_ag_news_true_num_tokens": 56.09375, "step": 178125 }, { "epoch": 34.2, "eval_anthropic_toxic_prompts_accuracy": 0.11540625, "eval_anthropic_toxic_prompts_bleu_score": 3.194002793452142, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1215497287537557, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6790552139282227, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008896723592970458, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.218350648880005, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752, "eval_anthropic_toxic_prompts_num_pred_words": 48.418, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.98687404731438, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9453125, "eval_anthropic_toxic_prompts_rouge_score": 0.2158776896291413, "eval_anthropic_toxic_prompts_runtime": 10.4954, "eval_anthropic_toxic_prompts_samples_per_second": 47.64, "eval_anthropic_toxic_prompts_steps_per_second": 0.095, "eval_anthropic_toxic_prompts_token_set_f1": 0.35868913066859653, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006494699480647151, "eval_anthropic_toxic_prompts_token_set_precision": 0.4506319484675694, "eval_anthropic_toxic_prompts_token_set_recall": 0.3226648135822138, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 178125 }, { "epoch": 34.2, "eval_arxiv_accuracy": 0.3518125, "eval_arxiv_bleu_score": 4.451697055242033, "eval_arxiv_bleu_score_sem": 0.12911423376119463, "eval_arxiv_emb_cos_sim": 0.7778573036193848, "eval_arxiv_emb_cos_sim_sem": 0.007019332397267671, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3617093563079834, "eval_arxiv_n_ngrams_match_1": 15.358, "eval_arxiv_n_ngrams_match_2": 3.064, "eval_arxiv_n_ngrams_match_3": 0.676, "eval_arxiv_num_pred_words": 41.464, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.83844394780286, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3649077414622171, "eval_arxiv_runtime": 10.3311, "eval_arxiv_samples_per_second": 48.398, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.35651892697480697, "eval_arxiv_token_set_f1_sem": 0.004187807776777443, "eval_arxiv_token_set_precision": 0.312031586661256, "eval_arxiv_token_set_recall": 0.430126470096774, "eval_arxiv_true_num_tokens": 64.0, "step": 178125 }, { "epoch": 34.2, "eval_python_code_alpaca_accuracy": 0.16640625, "eval_python_code_alpaca_bleu_score": 4.853427273184483, "eval_python_code_alpaca_bleu_score_sem": 0.15388962455015562, "eval_python_code_alpaca_emb_cos_sim": 0.7732113599777222, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006844760809280055, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8228678703308105, "eval_python_code_alpaca_n_ngrams_match_1": 10.13, "eval_python_code_alpaca_n_ngrams_match_2": 3.188, "eval_python_code_alpaca_n_ngrams_match_3": 1.154, "eval_python_code_alpaca_num_pred_words": 45.034, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.825033562543794, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3365812517837736, "eval_python_code_alpaca_runtime": 9.9033, "eval_python_code_alpaca_samples_per_second": 50.488, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4955358063801914, "eval_python_code_alpaca_token_set_f1_sem": 0.005506109246406978, "eval_python_code_alpaca_token_set_precision": 0.5595442026847773, "eval_python_code_alpaca_token_set_recall": 0.4605743329054844, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 178125 }, { "epoch": 34.2, "eval_wikibio_accuracy": 0.327875, "eval_wikibio_bleu_score": 5.955574538064505, "eval_wikibio_bleu_score_sem": 0.2046288521089413, "eval_wikibio_emb_cos_sim": 0.7384626865386963, "eval_wikibio_emb_cos_sim_sem": 0.009992279553539526, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.701720952987671, "eval_wikibio_n_ngrams_match_1": 9.982, "eval_wikibio_n_ngrams_match_2": 3.394, "eval_wikibio_n_ngrams_match_3": 1.244, "eval_wikibio_num_pred_words": 36.434, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.51697219969297, "eval_wikibio_pred_num_tokens": 62.8828125, "eval_wikibio_rouge_score": 0.35804891031961483, "eval_wikibio_runtime": 10.0943, "eval_wikibio_samples_per_second": 49.533, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.31604749809136645, "eval_wikibio_token_set_f1_sem": 0.005386330428306996, "eval_wikibio_token_set_precision": 0.3246392858455307, "eval_wikibio_token_set_recall": 0.32454571621705997, "eval_wikibio_true_num_tokens": 61.1328125, "step": 178125 }, { "epoch": 34.2, "eval_nq_accuracy": 0.53396875, "eval_nq_bleu_score": 11.914236254970884, "eval_nq_bleu_score_sem": 0.4796626034235853, "eval_nq_emb_cos_sim": 0.8397883772850037, "eval_nq_emb_cos_sim_sem": 0.0069352573370225325, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.144157648086548, "eval_nq_n_ngrams_match_1": 23.282, "eval_nq_n_ngrams_match_2": 8.626, "eval_nq_n_ngrams_match_3": 3.986, "eval_nq_num_pred_words": 49.268, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.534848862193819, "eval_nq_pred_num_tokens": 62.984375, "eval_nq_rouge_score": 0.4510546538336935, "eval_nq_runtime": 10.4038, "eval_nq_samples_per_second": 48.06, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46602718976386026, "eval_nq_token_set_f1_sem": 0.005039947097998777, "eval_nq_token_set_precision": 0.42424083976081683, "eval_nq_token_set_recall": 0.526921316066187, "eval_nq_true_num_tokens": 64.0, "step": 178125 }, { "epoch": 34.2, "learning_rate": 0.001, "loss": 2.5192, "step": 178128 }, { "epoch": 34.21, "learning_rate": 0.001, "loss": 2.5108, "step": 178140 }, { "epoch": 34.21, "learning_rate": 0.001, "loss": 2.5115, "step": 178152 }, { "epoch": 34.21, "learning_rate": 0.001, "loss": 2.5131, "step": 178164 }, { "epoch": 34.21, "learning_rate": 0.001, "loss": 2.507, "step": 178176 }, { "epoch": 34.21, "learning_rate": 0.001, "loss": 2.5078, "step": 178188 }, { "epoch": 34.22, "learning_rate": 0.001, "loss": 2.499, "step": 178200 }, { "epoch": 34.22, "learning_rate": 0.001, "loss": 2.5191, "step": 178212 }, { "epoch": 34.22, "learning_rate": 0.001, "loss": 2.5108, "step": 178224 }, { "epoch": 34.22, "learning_rate": 0.001, "loss": 2.5117, "step": 178236 }, { "epoch": 34.23, "learning_rate": 0.001, "loss": 2.503, "step": 178248 }, { "epoch": 34.23, "learning_rate": 0.001, "loss": 2.5124, "step": 178260 }, { "epoch": 34.23, "learning_rate": 0.001, "loss": 2.5081, "step": 178272 }, { "epoch": 34.23, "learning_rate": 0.001, "loss": 2.5047, "step": 178284 }, { "epoch": 34.24, "learning_rate": 0.001, "loss": 2.5065, "step": 178296 }, { "epoch": 34.24, "learning_rate": 0.001, "loss": 2.5124, "step": 178308 }, { "epoch": 34.24, "learning_rate": 0.001, "loss": 2.5129, "step": 178320 }, { "epoch": 34.24, "learning_rate": 0.001, "loss": 2.5129, "step": 178332 }, { "epoch": 34.24, "learning_rate": 0.001, "loss": 2.5109, "step": 178344 }, { "epoch": 34.25, "learning_rate": 0.001, "loss": 2.5185, "step": 178356 }, { "epoch": 34.25, "learning_rate": 0.001, "loss": 2.5088, "step": 178368 }, { "epoch": 34.25, "learning_rate": 0.001, "loss": 2.5128, "step": 178380 }, { "epoch": 34.25, "learning_rate": 0.001, "loss": 2.5264, "step": 178392 }, { "epoch": 34.26, "learning_rate": 0.001, "loss": 2.5049, "step": 178404 }, { "epoch": 34.26, "learning_rate": 0.001, "loss": 2.5154, "step": 178416 }, { "epoch": 34.26, "learning_rate": 0.001, "loss": 2.5163, "step": 178428 }, { "epoch": 34.26, "learning_rate": 0.001, "loss": 2.511, "step": 178440 }, { "epoch": 34.26, "learning_rate": 0.001, "loss": 2.5105, "step": 178452 }, { "epoch": 34.27, "learning_rate": 0.001, "loss": 2.5074, "step": 178464 }, { "epoch": 34.27, "learning_rate": 0.001, "loss": 2.5129, "step": 178476 }, { "epoch": 34.27, "learning_rate": 0.001, "loss": 2.5071, "step": 178488 }, { "epoch": 34.27, "learning_rate": 0.001, "loss": 2.5071, "step": 178500 }, { "epoch": 34.28, "learning_rate": 0.001, "loss": 2.5101, "step": 178512 }, { "epoch": 34.28, "learning_rate": 0.001, "loss": 2.5093, "step": 178524 }, { "epoch": 34.28, "learning_rate": 0.001, "loss": 2.51, "step": 178536 }, { "epoch": 34.28, "learning_rate": 0.001, "loss": 2.5236, "step": 178548 }, { "epoch": 34.29, "learning_rate": 0.001, "loss": 2.5128, "step": 178560 }, { "epoch": 34.29, "learning_rate": 0.001, "loss": 2.5192, "step": 178572 }, { "epoch": 34.29, "learning_rate": 0.001, "loss": 2.5071, "step": 178584 }, { "epoch": 34.29, "learning_rate": 0.001, "loss": 2.516, "step": 178596 }, { "epoch": 34.29, "learning_rate": 0.001, "loss": 2.5118, "step": 178608 }, { "epoch": 34.3, "learning_rate": 0.001, "loss": 2.5141, "step": 178620 }, { "epoch": 34.3, "learning_rate": 0.001, "loss": 2.5178, "step": 178632 }, { "epoch": 34.3, "learning_rate": 0.001, "loss": 2.5086, "step": 178644 }, { "epoch": 34.3, "learning_rate": 0.001, "loss": 2.5208, "step": 178656 }, { "epoch": 34.31, "learning_rate": 0.001, "loss": 2.5165, "step": 178668 }, { "epoch": 34.31, "learning_rate": 0.001, "loss": 2.5107, "step": 178680 }, { "epoch": 34.31, "learning_rate": 0.001, "loss": 2.5052, "step": 178692 }, { "epoch": 34.31, "learning_rate": 0.001, "loss": 2.5231, "step": 178704 }, { "epoch": 34.32, "learning_rate": 0.001, "loss": 2.5257, "step": 178716 }, { "epoch": 34.32, "learning_rate": 0.001, "loss": 2.4984, "step": 178728 }, { "epoch": 34.32, "learning_rate": 0.001, "loss": 2.5098, "step": 178740 }, { "epoch": 34.32, "eval_ag_news_accuracy": 0.3275625, "eval_ag_news_bleu_score": 4.813433813581762, "eval_ag_news_bleu_score_sem": 0.1592644508946142, "eval_ag_news_emb_cos_sim": 0.820677638053894, "eval_ag_news_emb_cos_sim_sem": 0.006047129067033354, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4964663982391357, "eval_ag_news_n_ngrams_match_1": 14.3, "eval_ag_news_n_ngrams_match_2": 3.108, "eval_ag_news_n_ngrams_match_3": 0.894, "eval_ag_news_num_pred_words": 47.088, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.99864164145504, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3539372001697687, "eval_ag_news_runtime": 10.6254, "eval_ag_news_samples_per_second": 47.057, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.35522350129310926, "eval_ag_news_token_set_f1_sem": 0.004463897894500045, "eval_ag_news_token_set_precision": 0.3422590908037412, "eval_ag_news_token_set_recall": 0.3831179052121366, "eval_ag_news_true_num_tokens": 56.09375, "step": 178750 }, { "epoch": 34.32, "eval_anthropic_toxic_prompts_accuracy": 0.11690625, "eval_anthropic_toxic_prompts_bleu_score": 3.048533923277592, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12074231629676362, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6788101196289062, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00922757174396942, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2201855182647705, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.2, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.876, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.676, "eval_anthropic_toxic_prompts_num_pred_words": 47.39, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.032763785487692, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21349213177239149, "eval_anthropic_toxic_prompts_runtime": 9.6998, "eval_anthropic_toxic_prompts_samples_per_second": 51.548, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.3580416059249323, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006788697257763802, "eval_anthropic_toxic_prompts_token_set_precision": 0.4383428301480111, "eval_anthropic_toxic_prompts_token_set_recall": 0.33051103029251566, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 178750 }, { "epoch": 34.32, "eval_arxiv_accuracy": 0.35159375, "eval_arxiv_bleu_score": 4.4936295243756685, "eval_arxiv_bleu_score_sem": 0.1313267234231422, "eval_arxiv_emb_cos_sim": 0.7800787687301636, "eval_arxiv_emb_cos_sim_sem": 0.007207780176582989, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3529574871063232, "eval_arxiv_n_ngrams_match_1": 15.434, "eval_arxiv_n_ngrams_match_2": 3.048, "eval_arxiv_n_ngrams_match_3": 0.698, "eval_arxiv_num_pred_words": 41.006, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.5871548868485, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36792086512289285, "eval_arxiv_runtime": 10.0689, "eval_arxiv_samples_per_second": 49.658, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.35973660019467957, "eval_arxiv_token_set_f1_sem": 0.004231735129604242, "eval_arxiv_token_set_precision": 0.31408898428209076, "eval_arxiv_token_set_recall": 0.4372305467644753, "eval_arxiv_true_num_tokens": 64.0, "step": 178750 }, { "epoch": 34.32, "eval_python_code_alpaca_accuracy": 0.16346875, "eval_python_code_alpaca_bleu_score": 4.642145102517188, "eval_python_code_alpaca_bleu_score_sem": 0.14502167212739653, "eval_python_code_alpaca_emb_cos_sim": 0.7653725743293762, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008972558285787208, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8381497859954834, "eval_python_code_alpaca_n_ngrams_match_1": 9.966, "eval_python_code_alpaca_n_ngrams_match_2": 3.026, "eval_python_code_alpaca_n_ngrams_match_3": 1.026, "eval_python_code_alpaca_num_pred_words": 44.538, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.084126986121788, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33300856539045176, "eval_python_code_alpaca_runtime": 10.5922, "eval_python_code_alpaca_samples_per_second": 47.204, "eval_python_code_alpaca_steps_per_second": 0.094, "eval_python_code_alpaca_token_set_f1": 0.4863844161296202, "eval_python_code_alpaca_token_set_f1_sem": 0.0055966509281519805, "eval_python_code_alpaca_token_set_precision": 0.5473867815551546, "eval_python_code_alpaca_token_set_recall": 0.4620974978021023, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 178750 }, { "epoch": 34.32, "eval_wikibio_accuracy": 0.3239375, "eval_wikibio_bleu_score": 5.974667265588757, "eval_wikibio_bleu_score_sem": 0.21420723370734615, "eval_wikibio_emb_cos_sim": 0.7359843254089355, "eval_wikibio_emb_cos_sim_sem": 0.00951995496880162, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.714965343475342, "eval_wikibio_n_ngrams_match_1": 10.092, "eval_wikibio_n_ngrams_match_2": 3.33, "eval_wikibio_n_ngrams_match_3": 1.256, "eval_wikibio_num_pred_words": 36.452, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 41.05716416111395, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3585082625405733, "eval_wikibio_runtime": 9.933, "eval_wikibio_samples_per_second": 50.337, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.32100820650657813, "eval_wikibio_token_set_f1_sem": 0.0053061485193654934, "eval_wikibio_token_set_precision": 0.32732396678358794, "eval_wikibio_token_set_recall": 0.3337773079073864, "eval_wikibio_true_num_tokens": 61.1328125, "step": 178750 }, { "epoch": 34.32, "eval_nq_accuracy": 0.53465625, "eval_nq_bleu_score": 12.097115223885114, "eval_nq_bleu_score_sem": 0.46804934255176084, "eval_nq_emb_cos_sim": 0.8387157917022705, "eval_nq_emb_cos_sim_sem": 0.006997872407386038, "eval_nq_emb_top1_equal": 0.3359375, "eval_nq_emb_top1_equal_sem": 0.04191137143408563, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.140587091445923, "eval_nq_n_ngrams_match_1": 23.49, "eval_nq_n_ngrams_match_2": 8.72, "eval_nq_n_ngrams_match_3": 4.028, "eval_nq_num_pred_words": 49.282, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.504429041077529, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4554707610765809, "eval_nq_runtime": 10.4593, "eval_nq_samples_per_second": 47.804, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4678595729521041, "eval_nq_token_set_f1_sem": 0.0048572389473561395, "eval_nq_token_set_precision": 0.42781714349962374, "eval_nq_token_set_recall": 0.5237862459531191, "eval_nq_true_num_tokens": 64.0, "step": 178750 }, { "epoch": 34.32, "learning_rate": 0.001, "loss": 2.5141, "step": 178752 }, { "epoch": 34.32, "learning_rate": 0.001, "loss": 2.5143, "step": 178764 }, { "epoch": 34.33, "learning_rate": 0.001, "loss": 2.5182, "step": 178776 }, { "epoch": 34.33, "learning_rate": 0.001, "loss": 2.5145, "step": 178788 }, { "epoch": 34.33, "learning_rate": 0.001, "loss": 2.5244, "step": 178800 }, { "epoch": 34.33, "learning_rate": 0.001, "loss": 2.5234, "step": 178812 }, { "epoch": 34.34, "learning_rate": 0.001, "loss": 2.528, "step": 178824 }, { "epoch": 34.34, "learning_rate": 0.001, "loss": 2.5132, "step": 178836 }, { "epoch": 34.34, "learning_rate": 0.001, "loss": 2.5204, "step": 178848 }, { "epoch": 34.34, "learning_rate": 0.001, "loss": 2.512, "step": 178860 }, { "epoch": 34.35, "learning_rate": 0.001, "loss": 2.5124, "step": 178872 }, { "epoch": 34.35, "learning_rate": 0.001, "loss": 2.5181, "step": 178884 }, { "epoch": 34.35, "learning_rate": 0.001, "loss": 2.5196, "step": 178896 }, { "epoch": 34.35, "learning_rate": 0.001, "loss": 2.5086, "step": 178908 }, { "epoch": 34.35, "learning_rate": 0.001, "loss": 2.5123, "step": 178920 }, { "epoch": 34.36, "learning_rate": 0.001, "loss": 2.5197, "step": 178932 }, { "epoch": 34.36, "learning_rate": 0.001, "loss": 2.503, "step": 178944 }, { "epoch": 34.36, "learning_rate": 0.001, "loss": 2.5163, "step": 178956 }, { "epoch": 34.36, "learning_rate": 0.001, "loss": 2.5215, "step": 178968 }, { "epoch": 34.37, "learning_rate": 0.001, "loss": 2.5138, "step": 178980 }, { "epoch": 34.37, "learning_rate": 0.001, "loss": 2.5164, "step": 178992 }, { "epoch": 34.37, "learning_rate": 0.001, "loss": 2.5093, "step": 179004 }, { "epoch": 34.37, "learning_rate": 0.001, "loss": 2.5188, "step": 179016 }, { "epoch": 34.38, "learning_rate": 0.001, "loss": 2.5167, "step": 179028 }, { "epoch": 34.38, "learning_rate": 0.001, "loss": 2.5147, "step": 179040 }, { "epoch": 34.38, "learning_rate": 0.001, "loss": 2.5178, "step": 179052 }, { "epoch": 34.38, "learning_rate": 0.001, "loss": 2.5128, "step": 179064 }, { "epoch": 34.38, "learning_rate": 0.001, "loss": 2.5145, "step": 179076 }, { "epoch": 34.39, "learning_rate": 0.001, "loss": 2.5105, "step": 179088 }, { "epoch": 34.39, "learning_rate": 0.001, "loss": 2.5213, "step": 179100 }, { "epoch": 34.39, "learning_rate": 0.001, "loss": 2.5132, "step": 179112 }, { "epoch": 34.39, "learning_rate": 0.001, "loss": 2.5167, "step": 179124 }, { "epoch": 34.4, "learning_rate": 0.001, "loss": 2.5203, "step": 179136 }, { "epoch": 34.4, "learning_rate": 0.001, "loss": 2.5184, "step": 179148 }, { "epoch": 34.4, "learning_rate": 0.001, "loss": 2.5088, "step": 179160 }, { "epoch": 34.4, "learning_rate": 0.001, "loss": 2.5188, "step": 179172 }, { "epoch": 34.41, "learning_rate": 0.001, "loss": 2.5073, "step": 179184 }, { "epoch": 34.41, "learning_rate": 0.001, "loss": 2.5213, "step": 179196 }, { "epoch": 34.41, "learning_rate": 0.001, "loss": 2.5138, "step": 179208 }, { "epoch": 34.41, "learning_rate": 0.001, "loss": 2.5036, "step": 179220 }, { "epoch": 34.41, "learning_rate": 0.001, "loss": 2.5082, "step": 179232 }, { "epoch": 34.42, "learning_rate": 0.001, "loss": 2.5158, "step": 179244 }, { "epoch": 34.42, "learning_rate": 0.001, "loss": 2.5123, "step": 179256 }, { "epoch": 34.42, "learning_rate": 0.001, "loss": 2.5088, "step": 179268 }, { "epoch": 34.42, "learning_rate": 0.001, "loss": 2.507, "step": 179280 }, { "epoch": 34.43, "learning_rate": 0.001, "loss": 2.5164, "step": 179292 }, { "epoch": 34.43, "learning_rate": 0.001, "loss": 2.5144, "step": 179304 }, { "epoch": 34.43, "learning_rate": 0.001, "loss": 2.5115, "step": 179316 }, { "epoch": 34.43, "learning_rate": 0.001, "loss": 2.5145, "step": 179328 }, { "epoch": 34.44, "learning_rate": 0.001, "loss": 2.5048, "step": 179340 }, { "epoch": 34.44, "learning_rate": 0.001, "loss": 2.515, "step": 179352 }, { "epoch": 34.44, "learning_rate": 0.001, "loss": 2.5126, "step": 179364 }, { "epoch": 34.44, "eval_ag_news_accuracy": 0.3274375, "eval_ag_news_bleu_score": 4.811342964802341, "eval_ag_news_bleu_score_sem": 0.15607395767645005, "eval_ag_news_emb_cos_sim": 0.813970148563385, "eval_ag_news_emb_cos_sim_sem": 0.007021656400836342, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5022685527801514, "eval_ag_news_n_ngrams_match_1": 14.39, "eval_ag_news_n_ngrams_match_2": 3.154, "eval_ag_news_n_ngrams_match_3": 0.908, "eval_ag_news_num_pred_words": 47.056, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.190661385321725, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35371846728922646, "eval_ag_news_runtime": 10.5534, "eval_ag_news_samples_per_second": 47.378, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.35611744869577433, "eval_ag_news_token_set_f1_sem": 0.004472964504265946, "eval_ag_news_token_set_precision": 0.34333100902908364, "eval_ag_news_token_set_recall": 0.3830182962470944, "eval_ag_news_true_num_tokens": 56.09375, "step": 179375 }, { "epoch": 34.44, "eval_anthropic_toxic_prompts_accuracy": 0.11471875, "eval_anthropic_toxic_prompts_bleu_score": 3.1958231375226434, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12481154158203375, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6836678385734558, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008170488612101212, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.221111297607422, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.398, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.96, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.724, "eval_anthropic_toxic_prompts_num_pred_words": 47.68, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.055949331783708, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2187404411794168, "eval_anthropic_toxic_prompts_runtime": 9.8943, "eval_anthropic_toxic_prompts_samples_per_second": 50.534, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3609242226719977, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006679386336990643, "eval_anthropic_toxic_prompts_token_set_precision": 0.44935726362851275, "eval_anthropic_toxic_prompts_token_set_recall": 0.33146817955536045, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 179375 }, { "epoch": 34.44, "eval_arxiv_accuracy": 0.353125, "eval_arxiv_bleu_score": 4.540336935215935, "eval_arxiv_bleu_score_sem": 0.13556716701654822, "eval_arxiv_emb_cos_sim": 0.7734513282775879, "eval_arxiv_emb_cos_sim_sem": 0.006722537582776494, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.344017267227173, "eval_arxiv_n_ngrams_match_1": 15.438, "eval_arxiv_n_ngrams_match_2": 3.038, "eval_arxiv_n_ngrams_match_3": 0.734, "eval_arxiv_num_pred_words": 40.814, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.332718489811967, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3697917113353645, "eval_arxiv_runtime": 10.3213, "eval_arxiv_samples_per_second": 48.444, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3596673818683494, "eval_arxiv_token_set_f1_sem": 0.0043135187632635935, "eval_arxiv_token_set_precision": 0.31372170099630187, "eval_arxiv_token_set_recall": 0.439029757604981, "eval_arxiv_true_num_tokens": 64.0, "step": 179375 }, { "epoch": 34.44, "eval_python_code_alpaca_accuracy": 0.162625, "eval_python_code_alpaca_bleu_score": 4.640023344215875, "eval_python_code_alpaca_bleu_score_sem": 0.13450912984635363, "eval_python_code_alpaca_emb_cos_sim": 0.765434741973877, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008636512901531246, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8486969470977783, "eval_python_code_alpaca_n_ngrams_match_1": 10.112, "eval_python_code_alpaca_n_ngrams_match_2": 3.016, "eval_python_code_alpaca_n_ngrams_match_3": 1.002, "eval_python_code_alpaca_num_pred_words": 44.112, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.265269616761977, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3407851142617668, "eval_python_code_alpaca_runtime": 10.9587, "eval_python_code_alpaca_samples_per_second": 45.626, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4863034955772844, "eval_python_code_alpaca_token_set_f1_sem": 0.005305979070785445, "eval_python_code_alpaca_token_set_precision": 0.5548519354276719, "eval_python_code_alpaca_token_set_recall": 0.45395270458790493, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 179375 }, { "epoch": 34.44, "eval_wikibio_accuracy": 0.32678125, "eval_wikibio_bleu_score": 6.004373304213732, "eval_wikibio_bleu_score_sem": 0.2179632308351529, "eval_wikibio_emb_cos_sim": 0.7409726977348328, "eval_wikibio_emb_cos_sim_sem": 0.009032686033544061, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.68875789642334, "eval_wikibio_n_ngrams_match_1": 10.002, "eval_wikibio_n_ngrams_match_2": 3.382, "eval_wikibio_n_ngrams_match_3": 1.234, "eval_wikibio_num_pred_words": 35.824, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.99513798788961, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35291988254204026, "eval_wikibio_runtime": 10.0767, "eval_wikibio_samples_per_second": 49.62, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.3189733917969741, "eval_wikibio_token_set_f1_sem": 0.0054931123466055, "eval_wikibio_token_set_precision": 0.3260878276320435, "eval_wikibio_token_set_recall": 0.3301078009495688, "eval_wikibio_true_num_tokens": 61.1328125, "step": 179375 }, { "epoch": 34.44, "eval_nq_accuracy": 0.53575, "eval_nq_bleu_score": 11.920051862616583, "eval_nq_bleu_score_sem": 0.4998006212207493, "eval_nq_emb_cos_sim": 0.8287443518638611, "eval_nq_emb_cos_sim_sem": 0.00734434781955943, "eval_nq_emb_top1_equal": 0.3515625, "eval_nq_emb_top1_equal_sem": 0.04236756101983345, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.137392044067383, "eval_nq_n_ngrams_match_1": 23.38, "eval_nq_n_ngrams_match_2": 8.586, "eval_nq_n_ngrams_match_3": 3.978, "eval_nq_num_pred_words": 49.148, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.477300349170047, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4534801780466444, "eval_nq_runtime": 10.6294, "eval_nq_samples_per_second": 47.039, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.46627282890371513, "eval_nq_token_set_f1_sem": 0.005003064160241319, "eval_nq_token_set_precision": 0.425000019638213, "eval_nq_token_set_recall": 0.5246687772022757, "eval_nq_true_num_tokens": 64.0, "step": 179375 }, { "epoch": 34.44, "learning_rate": 0.001, "loss": 2.5008, "step": 179376 }, { "epoch": 34.44, "learning_rate": 0.001, "loss": 2.5218, "step": 179388 }, { "epoch": 34.45, "learning_rate": 0.001, "loss": 2.5155, "step": 179400 }, { "epoch": 34.45, "learning_rate": 0.001, "loss": 2.509, "step": 179412 }, { "epoch": 34.45, "learning_rate": 0.001, "loss": 2.5205, "step": 179424 }, { "epoch": 34.45, "learning_rate": 0.001, "loss": 2.4998, "step": 179436 }, { "epoch": 34.46, "learning_rate": 0.001, "loss": 2.5138, "step": 179448 }, { "epoch": 34.46, "learning_rate": 0.001, "loss": 2.5071, "step": 179460 }, { "epoch": 34.46, "learning_rate": 0.001, "loss": 2.5175, "step": 179472 }, { "epoch": 34.46, "learning_rate": 0.001, "loss": 2.5156, "step": 179484 }, { "epoch": 34.47, "learning_rate": 0.001, "loss": 2.5221, "step": 179496 }, { "epoch": 34.47, "learning_rate": 0.001, "loss": 2.5198, "step": 179508 }, { "epoch": 34.47, "learning_rate": 0.001, "loss": 2.527, "step": 179520 }, { "epoch": 34.47, "learning_rate": 0.001, "loss": 2.5128, "step": 179532 }, { "epoch": 34.47, "learning_rate": 0.001, "loss": 2.516, "step": 179544 }, { "epoch": 34.48, "learning_rate": 0.001, "loss": 2.5151, "step": 179556 }, { "epoch": 34.48, "learning_rate": 0.001, "loss": 2.5129, "step": 179568 }, { "epoch": 34.48, "learning_rate": 0.001, "loss": 2.513, "step": 179580 }, { "epoch": 34.48, "learning_rate": 0.001, "loss": 2.5008, "step": 179592 }, { "epoch": 34.49, "learning_rate": 0.001, "loss": 2.5182, "step": 179604 }, { "epoch": 34.49, "learning_rate": 0.001, "loss": 2.519, "step": 179616 }, { "epoch": 34.49, "learning_rate": 0.001, "loss": 2.5099, "step": 179628 }, { "epoch": 34.49, "learning_rate": 0.001, "loss": 2.5205, "step": 179640 }, { "epoch": 34.5, "learning_rate": 0.001, "loss": 2.5122, "step": 179652 }, { "epoch": 34.5, "learning_rate": 0.001, "loss": 2.5161, "step": 179664 }, { "epoch": 34.5, "learning_rate": 0.001, "loss": 2.5199, "step": 179676 }, { "epoch": 34.5, "learning_rate": 0.001, "loss": 2.5222, "step": 179688 }, { "epoch": 34.5, "learning_rate": 0.001, "loss": 2.5111, "step": 179700 }, { "epoch": 34.51, "learning_rate": 0.001, "loss": 2.5173, "step": 179712 }, { "epoch": 34.51, "learning_rate": 0.001, "loss": 2.5133, "step": 179724 }, { "epoch": 34.51, "learning_rate": 0.001, "loss": 2.5187, "step": 179736 }, { "epoch": 34.51, "learning_rate": 0.001, "loss": 2.5102, "step": 179748 }, { "epoch": 34.52, "learning_rate": 0.001, "loss": 2.5088, "step": 179760 }, { "epoch": 34.52, "learning_rate": 0.001, "loss": 2.518, "step": 179772 }, { "epoch": 34.52, "learning_rate": 0.001, "loss": 2.5069, "step": 179784 }, { "epoch": 34.52, "learning_rate": 0.001, "loss": 2.5095, "step": 179796 }, { "epoch": 34.53, "learning_rate": 0.001, "loss": 2.5111, "step": 179808 }, { "epoch": 34.53, "learning_rate": 0.001, "loss": 2.5168, "step": 179820 }, { "epoch": 34.53, "learning_rate": 0.001, "loss": 2.5137, "step": 179832 }, { "epoch": 34.53, "learning_rate": 0.001, "loss": 2.5197, "step": 179844 }, { "epoch": 34.53, "learning_rate": 0.001, "loss": 2.5245, "step": 179856 }, { "epoch": 34.54, "learning_rate": 0.001, "loss": 2.5119, "step": 179868 }, { "epoch": 34.54, "learning_rate": 0.001, "loss": 2.5241, "step": 179880 }, { "epoch": 34.54, "learning_rate": 0.001, "loss": 2.5108, "step": 179892 }, { "epoch": 34.54, "learning_rate": 0.001, "loss": 2.5159, "step": 179904 }, { "epoch": 34.55, "learning_rate": 0.001, "loss": 2.5069, "step": 179916 }, { "epoch": 34.55, "learning_rate": 0.001, "loss": 2.5197, "step": 179928 }, { "epoch": 34.55, "learning_rate": 0.001, "loss": 2.5141, "step": 179940 }, { "epoch": 34.55, "learning_rate": 0.001, "loss": 2.5155, "step": 179952 }, { "epoch": 34.56, "learning_rate": 0.001, "loss": 2.5094, "step": 179964 }, { "epoch": 34.56, "learning_rate": 0.001, "loss": 2.5117, "step": 179976 }, { "epoch": 34.56, "learning_rate": 0.001, "loss": 2.5194, "step": 179988 }, { "epoch": 34.56, "learning_rate": 0.001, "loss": 2.5166, "step": 180000 }, { "epoch": 34.56, "eval_ag_news_accuracy": 0.3271875, "eval_ag_news_bleu_score": 4.992180508673132, "eval_ag_news_bleu_score_sem": 0.16178611761678383, "eval_ag_news_emb_cos_sim": 0.8197941184043884, "eval_ag_news_emb_cos_sim_sem": 0.006158953217211359, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4940409660339355, "eval_ag_news_n_ngrams_match_1": 14.482, "eval_ag_news_n_ngrams_match_2": 3.212, "eval_ag_news_n_ngrams_match_3": 0.936, "eval_ag_news_num_pred_words": 46.568, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.91870265577338, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35836368312617006, "eval_ag_news_runtime": 10.7648, "eval_ag_news_samples_per_second": 46.448, "eval_ag_news_steps_per_second": 0.093, "eval_ag_news_token_set_f1": 0.36173706655885796, "eval_ag_news_token_set_f1_sem": 0.004467765586337585, "eval_ag_news_token_set_precision": 0.34649607846024855, "eval_ag_news_token_set_recall": 0.3927891325694287, "eval_ag_news_true_num_tokens": 56.09375, "step": 180000 }, { "epoch": 34.56, "eval_anthropic_toxic_prompts_accuracy": 0.11428125, "eval_anthropic_toxic_prompts_bleu_score": 3.2153284243726326, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12134144941431634, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6742618680000305, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009172288908157794, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2468583583831787, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.342, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.96, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.762, "eval_anthropic_toxic_prompts_num_pred_words": 47.61, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.7094430530119, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21596396352962755, "eval_anthropic_toxic_prompts_runtime": 9.8169, "eval_anthropic_toxic_prompts_samples_per_second": 50.933, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3637917754044938, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006451456352919871, "eval_anthropic_toxic_prompts_token_set_precision": 0.45173319944318713, "eval_anthropic_toxic_prompts_token_set_recall": 0.3291250054970117, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 180000 }, { "epoch": 34.56, "eval_arxiv_accuracy": 0.34884375, "eval_arxiv_bleu_score": 4.394816127528442, "eval_arxiv_bleu_score_sem": 0.1325831490118087, "eval_arxiv_emb_cos_sim": 0.7746796607971191, "eval_arxiv_emb_cos_sim_sem": 0.00746157664451445, "eval_arxiv_emb_top1_equal": 0.328125, "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.359733819961548, "eval_arxiv_n_ngrams_match_1": 15.32, "eval_arxiv_n_ngrams_match_2": 3.034, "eval_arxiv_n_ngrams_match_3": 0.666, "eval_arxiv_num_pred_words": 40.032, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.781528791100133, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36849553182008965, "eval_arxiv_runtime": 10.2349, "eval_arxiv_samples_per_second": 48.852, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.36144979084583245, "eval_arxiv_token_set_f1_sem": 0.004300922900143846, "eval_arxiv_token_set_precision": 0.31109290396632283, "eval_arxiv_token_set_recall": 0.4515905152525916, "eval_arxiv_true_num_tokens": 64.0, "step": 180000 }, { "epoch": 34.56, "eval_python_code_alpaca_accuracy": 0.16290625, "eval_python_code_alpaca_bleu_score": 4.97443298424783, "eval_python_code_alpaca_bleu_score_sem": 0.15618642989283169, "eval_python_code_alpaca_emb_cos_sim": 0.7671642899513245, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008481689746249016, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8598783016204834, "eval_python_code_alpaca_n_ngrams_match_1": 10.152, "eval_python_code_alpaca_n_ngrams_match_2": 3.146, "eval_python_code_alpaca_n_ngrams_match_3": 1.112, "eval_python_code_alpaca_num_pred_words": 43.632, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.45940202634961, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34560691396560517, "eval_python_code_alpaca_runtime": 9.7684, "eval_python_code_alpaca_samples_per_second": 51.185, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.49139150141678084, "eval_python_code_alpaca_token_set_f1_sem": 0.005344616504036387, "eval_python_code_alpaca_token_set_precision": 0.5600456416206923, "eval_python_code_alpaca_token_set_recall": 0.4590994112023471, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 180000 }, { "epoch": 34.56, "eval_wikibio_accuracy": 0.3230625, "eval_wikibio_bleu_score": 6.376735555836328, "eval_wikibio_bleu_score_sem": 0.23321904107344474, "eval_wikibio_emb_cos_sim": 0.7401601076126099, "eval_wikibio_emb_cos_sim_sem": 0.009658130118922047, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6513254642486572, "eval_wikibio_n_ngrams_match_1": 10.168, "eval_wikibio_n_ngrams_match_2": 3.53, "eval_wikibio_n_ngrams_match_3": 1.334, "eval_wikibio_num_pred_words": 35.93, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.52569665551027, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3621634892428316, "eval_wikibio_runtime": 11.2877, "eval_wikibio_samples_per_second": 44.296, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.32420980005857425, "eval_wikibio_token_set_f1_sem": 0.005499022169025263, "eval_wikibio_token_set_precision": 0.32933575716206726, "eval_wikibio_token_set_recall": 0.33502409467732436, "eval_wikibio_true_num_tokens": 61.1328125, "step": 180000 }, { "epoch": 34.56, "eval_nq_accuracy": 0.53325, "eval_nq_bleu_score": 12.11989823526993, "eval_nq_bleu_score_sem": 0.4859000116476797, "eval_nq_emb_cos_sim": 0.8346042037010193, "eval_nq_emb_cos_sim_sem": 0.00727023983044261, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1411452293395996, "eval_nq_n_ngrams_match_1": 23.364, "eval_nq_n_ngrams_match_2": 8.772, "eval_nq_n_ngrams_match_3": 4.034, "eval_nq_num_pred_words": 48.902, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.509177010076892, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4529333986672015, "eval_nq_runtime": 10.2379, "eval_nq_samples_per_second": 48.838, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.46787318736595207, "eval_nq_token_set_f1_sem": 0.005036197557146816, "eval_nq_token_set_precision": 0.4226296979372575, "eval_nq_token_set_recall": 0.5339993638198486, "eval_nq_true_num_tokens": 64.0, "step": 180000 }, { "epoch": 34.56, "learning_rate": 0.001, "loss": 2.5169, "step": 180012 }, { "epoch": 34.57, "learning_rate": 0.001, "loss": 2.5085, "step": 180024 }, { "epoch": 34.57, "learning_rate": 0.001, "loss": 2.5166, "step": 180036 }, { "epoch": 34.57, "learning_rate": 0.001, "loss": 2.4932, "step": 180048 }, { "epoch": 34.57, "learning_rate": 0.001, "loss": 2.5128, "step": 180060 }, { "epoch": 34.58, "learning_rate": 0.001, "loss": 2.5274, "step": 180072 }, { "epoch": 34.58, "learning_rate": 0.001, "loss": 2.5147, "step": 180084 }, { "epoch": 34.58, "learning_rate": 0.001, "loss": 2.5157, "step": 180096 }, { "epoch": 34.58, "learning_rate": 0.001, "loss": 2.5154, "step": 180108 }, { "epoch": 34.59, "learning_rate": 0.001, "loss": 2.5112, "step": 180120 }, { "epoch": 34.59, "learning_rate": 0.001, "loss": 2.5094, "step": 180132 }, { "epoch": 34.59, "learning_rate": 0.001, "loss": 2.5127, "step": 180144 }, { "epoch": 34.59, "learning_rate": 0.001, "loss": 2.5154, "step": 180156 }, { "epoch": 34.59, "learning_rate": 0.001, "loss": 2.5207, "step": 180168 }, { "epoch": 34.6, "learning_rate": 0.001, "loss": 2.521, "step": 180180 }, { "epoch": 34.6, "learning_rate": 0.001, "loss": 2.5098, "step": 180192 }, { "epoch": 34.6, "learning_rate": 0.001, "loss": 2.5104, "step": 180204 }, { "epoch": 34.6, "learning_rate": 0.001, "loss": 2.5258, "step": 180216 }, { "epoch": 34.61, "learning_rate": 0.001, "loss": 2.5183, "step": 180228 }, { "epoch": 34.61, "learning_rate": 0.001, "loss": 2.522, "step": 180240 }, { "epoch": 34.61, "learning_rate": 0.001, "loss": 2.5175, "step": 180252 }, { "epoch": 34.61, "learning_rate": 0.001, "loss": 2.5131, "step": 180264 }, { "epoch": 34.62, "learning_rate": 0.001, "loss": 2.5204, "step": 180276 }, { "epoch": 34.62, "learning_rate": 0.001, "loss": 2.5062, "step": 180288 }, { "epoch": 34.62, "learning_rate": 0.001, "loss": 2.5252, "step": 180300 }, { "epoch": 34.62, "learning_rate": 0.001, "loss": 2.5223, "step": 180312 }, { "epoch": 34.62, "learning_rate": 0.001, "loss": 2.5179, "step": 180324 }, { "epoch": 34.63, "learning_rate": 0.001, "loss": 2.5237, "step": 180336 }, { "epoch": 34.63, "learning_rate": 0.001, "loss": 2.5218, "step": 180348 }, { "epoch": 34.63, "learning_rate": 0.001, "loss": 2.5135, "step": 180360 }, { "epoch": 34.63, "learning_rate": 0.001, "loss": 2.5211, "step": 180372 }, { "epoch": 34.64, "learning_rate": 0.001, "loss": 2.5127, "step": 180384 }, { "epoch": 34.64, "learning_rate": 0.001, "loss": 2.522, "step": 180396 }, { "epoch": 34.64, "learning_rate": 0.001, "loss": 2.5122, "step": 180408 }, { "epoch": 34.64, "learning_rate": 0.001, "loss": 2.5021, "step": 180420 }, { "epoch": 34.65, "learning_rate": 0.001, "loss": 2.5175, "step": 180432 }, { "epoch": 34.65, "learning_rate": 0.001, "loss": 2.528, "step": 180444 }, { "epoch": 34.65, "learning_rate": 0.001, "loss": 2.5303, "step": 180456 }, { "epoch": 34.65, "learning_rate": 0.001, "loss": 2.5141, "step": 180468 }, { "epoch": 34.65, "learning_rate": 0.001, "loss": 2.5175, "step": 180480 }, { "epoch": 34.66, "learning_rate": 0.001, "loss": 2.5235, "step": 180492 }, { "epoch": 34.66, "learning_rate": 0.001, "loss": 2.5173, "step": 180504 }, { "epoch": 34.66, "learning_rate": 0.001, "loss": 2.515, "step": 180516 }, { "epoch": 34.66, "learning_rate": 0.001, "loss": 2.5169, "step": 180528 }, { "epoch": 34.67, "learning_rate": 0.001, "loss": 2.5148, "step": 180540 }, { "epoch": 34.67, "learning_rate": 0.001, "loss": 2.513, "step": 180552 }, { "epoch": 34.67, "learning_rate": 0.001, "loss": 2.516, "step": 180564 }, { "epoch": 34.67, "learning_rate": 0.001, "loss": 2.5124, "step": 180576 }, { "epoch": 34.68, "learning_rate": 0.001, "loss": 2.5273, "step": 180588 }, { "epoch": 34.68, "learning_rate": 0.001, "loss": 2.5182, "step": 180600 }, { "epoch": 34.68, "learning_rate": 0.001, "loss": 2.5195, "step": 180612 }, { "epoch": 34.68, "learning_rate": 0.001, "loss": 2.5204, "step": 180624 }, { "epoch": 34.68, "eval_ag_news_accuracy": 0.329, "eval_ag_news_bleu_score": 4.974508433046372, "eval_ag_news_bleu_score_sem": 0.15502555477679023, "eval_ag_news_emb_cos_sim": 0.8153427839279175, "eval_ag_news_emb_cos_sim_sem": 0.007401230918525453, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.494140148162842, "eval_ag_news_n_ngrams_match_1": 14.436, "eval_ag_news_n_ngrams_match_2": 3.22, "eval_ag_news_n_ngrams_match_3": 0.956, "eval_ag_news_num_pred_words": 46.872, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.92196776470116, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3557681546184433, "eval_ag_news_runtime": 10.1567, "eval_ag_news_samples_per_second": 49.229, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.35821993120855017, "eval_ag_news_token_set_f1_sem": 0.004680047693740569, "eval_ag_news_token_set_precision": 0.3448182084746686, "eval_ag_news_token_set_recall": 0.38495225114313913, "eval_ag_news_true_num_tokens": 56.09375, "step": 180625 }, { "epoch": 34.68, "eval_anthropic_toxic_prompts_accuracy": 0.114625, "eval_anthropic_toxic_prompts_bleu_score": 3.163946604846592, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12060720723547808, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6742329001426697, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009851039537278948, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.224412679672241, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.316, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732, "eval_anthropic_toxic_prompts_num_pred_words": 47.326, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.138805287853586, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.96875, "eval_anthropic_toxic_prompts_rouge_score": 0.21603344633175534, "eval_anthropic_toxic_prompts_runtime": 9.7285, "eval_anthropic_toxic_prompts_samples_per_second": 51.395, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.36265401707493905, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066885456780596975, "eval_anthropic_toxic_prompts_token_set_precision": 0.44656755152218747, "eval_anthropic_toxic_prompts_token_set_recall": 0.33113894436021063, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 180625 }, { "epoch": 34.68, "eval_arxiv_accuracy": 0.351625, "eval_arxiv_bleu_score": 4.364824424679262, "eval_arxiv_bleu_score_sem": 0.12660735233163145, "eval_arxiv_emb_cos_sim": 0.7752806544303894, "eval_arxiv_emb_cos_sim_sem": 0.007239973124855725, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.351532220840454, "eval_arxiv_n_ngrams_match_1": 15.272, "eval_arxiv_n_ngrams_match_2": 2.934, "eval_arxiv_n_ngrams_match_3": 0.656, "eval_arxiv_num_pred_words": 40.126, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.546439601304957, "eval_arxiv_pred_num_tokens": 62.8984375, "eval_arxiv_rouge_score": 0.3687655812504964, "eval_arxiv_runtime": 10.4478, "eval_arxiv_samples_per_second": 47.857, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.3607652695099407, "eval_arxiv_token_set_f1_sem": 0.004163745905764404, "eval_arxiv_token_set_precision": 0.31325242155882815, "eval_arxiv_token_set_recall": 0.44207455899773457, "eval_arxiv_true_num_tokens": 64.0, "step": 180625 }, { "epoch": 34.68, "eval_python_code_alpaca_accuracy": 0.16125, "eval_python_code_alpaca_bleu_score": 4.794661149080865, "eval_python_code_alpaca_bleu_score_sem": 0.15499046591979487, "eval_python_code_alpaca_emb_cos_sim": 0.7546582221984863, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008590295589416512, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8581385612487793, "eval_python_code_alpaca_n_ngrams_match_1": 9.852, "eval_python_code_alpaca_n_ngrams_match_2": 2.982, "eval_python_code_alpaca_n_ngrams_match_3": 1.046, "eval_python_code_alpaca_num_pred_words": 42.858, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.42905360661869, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3393293773582692, "eval_python_code_alpaca_runtime": 9.8051, "eval_python_code_alpaca_samples_per_second": 50.994, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.47472739527568747, "eval_python_code_alpaca_token_set_f1_sem": 0.005446944661341495, "eval_python_code_alpaca_token_set_precision": 0.5384051703684122, "eval_python_code_alpaca_token_set_recall": 0.4493811862598187, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 180625 }, { "epoch": 34.68, "eval_wikibio_accuracy": 0.3265625, "eval_wikibio_bleu_score": 6.277221085514228, "eval_wikibio_bleu_score_sem": 0.223908910217878, "eval_wikibio_emb_cos_sim": 0.7418359518051147, "eval_wikibio_emb_cos_sim_sem": 0.00923359808476197, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.661391019821167, "eval_wikibio_n_ngrams_match_1": 10.098, "eval_wikibio_n_ngrams_match_2": 3.454, "eval_wikibio_n_ngrams_match_3": 1.328, "eval_wikibio_num_pred_words": 36.168, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.915437384090936, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3576883852165406, "eval_wikibio_runtime": 10.2675, "eval_wikibio_samples_per_second": 48.697, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3213830432880749, "eval_wikibio_token_set_f1_sem": 0.005467455556351841, "eval_wikibio_token_set_precision": 0.32636548310109453, "eval_wikibio_token_set_recall": 0.334278212354833, "eval_wikibio_true_num_tokens": 61.1328125, "step": 180625 }, { "epoch": 34.68, "eval_nq_accuracy": 0.53634375, "eval_nq_bleu_score": 11.923921213060266, "eval_nq_bleu_score_sem": 0.49461021323551146, "eval_nq_emb_cos_sim": 0.8370808959007263, "eval_nq_emb_cos_sim_sem": 0.007509415951752346, "eval_nq_emb_top1_equal": 0.3515625, "eval_nq_emb_top1_equal_sem": 0.04236756101983345, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.14229154586792, "eval_nq_n_ngrams_match_1": 23.246, "eval_nq_n_ngrams_match_2": 8.548, "eval_nq_n_ngrams_match_3": 3.97, "eval_nq_num_pred_words": 49.252, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.518936813169011, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45085028307263575, "eval_nq_runtime": 10.204, "eval_nq_samples_per_second": 49.001, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.46316763725279503, "eval_nq_token_set_f1_sem": 0.0048679272921271196, "eval_nq_token_set_precision": 0.4227747784835111, "eval_nq_token_set_recall": 0.5203570583988628, "eval_nq_true_num_tokens": 64.0, "step": 180625 }, { "epoch": 34.68, "learning_rate": 0.001, "loss": 2.515, "step": 180636 }, { "epoch": 34.69, "learning_rate": 0.001, "loss": 2.5223, "step": 180648 }, { "epoch": 34.69, "learning_rate": 0.001, "loss": 2.5151, "step": 180660 }, { "epoch": 34.69, "learning_rate": 0.001, "loss": 2.5236, "step": 180672 }, { "epoch": 34.69, "learning_rate": 0.001, "loss": 2.5125, "step": 180684 }, { "epoch": 34.7, "learning_rate": 0.001, "loss": 2.5169, "step": 180696 }, { "epoch": 34.7, "learning_rate": 0.001, "loss": 2.5111, "step": 180708 }, { "epoch": 34.7, "learning_rate": 0.001, "loss": 2.5226, "step": 180720 }, { "epoch": 34.7, "learning_rate": 0.001, "loss": 2.5164, "step": 180732 }, { "epoch": 34.71, "learning_rate": 0.001, "loss": 2.518, "step": 180744 }, { "epoch": 34.71, "learning_rate": 0.001, "loss": 2.51, "step": 180756 }, { "epoch": 34.71, "learning_rate": 0.001, "loss": 2.522, "step": 180768 }, { "epoch": 34.71, "learning_rate": 0.001, "loss": 2.5232, "step": 180780 }, { "epoch": 34.71, "learning_rate": 0.001, "loss": 2.5133, "step": 180792 }, { "epoch": 34.72, "learning_rate": 0.001, "loss": 2.516, "step": 180804 }, { "epoch": 34.72, "learning_rate": 0.001, "loss": 2.5202, "step": 180816 }, { "epoch": 34.72, "learning_rate": 0.001, "loss": 2.5161, "step": 180828 }, { "epoch": 34.72, "learning_rate": 0.001, "loss": 2.522, "step": 180840 }, { "epoch": 34.73, "learning_rate": 0.001, "loss": 2.5156, "step": 180852 }, { "epoch": 34.73, "learning_rate": 0.001, "loss": 2.5186, "step": 180864 }, { "epoch": 34.73, "learning_rate": 0.001, "loss": 2.5211, "step": 180876 }, { "epoch": 34.73, "learning_rate": 0.001, "loss": 2.5186, "step": 180888 }, { "epoch": 34.74, "learning_rate": 0.001, "loss": 2.5192, "step": 180900 }, { "epoch": 34.74, "learning_rate": 0.001, "loss": 2.5123, "step": 180912 }, { "epoch": 34.74, "learning_rate": 0.001, "loss": 2.5111, "step": 180924 }, { "epoch": 34.74, "learning_rate": 0.001, "loss": 2.5197, "step": 180936 }, { "epoch": 34.74, "learning_rate": 0.001, "loss": 2.5149, "step": 180948 }, { "epoch": 34.75, "learning_rate": 0.001, "loss": 2.5057, "step": 180960 }, { "epoch": 34.75, "learning_rate": 0.001, "loss": 2.5225, "step": 180972 }, { "epoch": 34.75, "learning_rate": 0.001, "loss": 2.5175, "step": 180984 }, { "epoch": 34.75, "learning_rate": 0.001, "loss": 2.5112, "step": 180996 }, { "epoch": 34.76, "learning_rate": 0.001, "loss": 2.5115, "step": 181008 }, { "epoch": 34.76, "learning_rate": 0.001, "loss": 2.514, "step": 181020 }, { "epoch": 34.76, "learning_rate": 0.001, "loss": 2.5215, "step": 181032 }, { "epoch": 34.76, "learning_rate": 0.001, "loss": 2.5179, "step": 181044 }, { "epoch": 34.76, "learning_rate": 0.001, "loss": 2.5186, "step": 181056 }, { "epoch": 34.77, "learning_rate": 0.001, "loss": 2.5081, "step": 181068 }, { "epoch": 34.77, "learning_rate": 0.001, "loss": 2.515, "step": 181080 }, { "epoch": 34.77, "learning_rate": 0.001, "loss": 2.5162, "step": 181092 }, { "epoch": 34.77, "learning_rate": 0.001, "loss": 2.5275, "step": 181104 }, { "epoch": 34.78, "learning_rate": 0.001, "loss": 2.514, "step": 181116 }, { "epoch": 34.78, "learning_rate": 0.001, "loss": 2.5203, "step": 181128 }, { "epoch": 34.78, "learning_rate": 0.001, "loss": 2.5096, "step": 181140 }, { "epoch": 34.78, "learning_rate": 0.001, "loss": 2.5142, "step": 181152 }, { "epoch": 34.79, "learning_rate": 0.001, "loss": 2.5157, "step": 181164 }, { "epoch": 34.79, "learning_rate": 0.001, "loss": 2.5154, "step": 181176 }, { "epoch": 34.79, "learning_rate": 0.001, "loss": 2.52, "step": 181188 }, { "epoch": 34.79, "learning_rate": 0.001, "loss": 2.5137, "step": 181200 }, { "epoch": 34.79, "learning_rate": 0.001, "loss": 2.5077, "step": 181212 }, { "epoch": 34.8, "learning_rate": 0.001, "loss": 2.5249, "step": 181224 }, { "epoch": 34.8, "learning_rate": 0.001, "loss": 2.5194, "step": 181236 }, { "epoch": 34.8, "learning_rate": 0.001, "loss": 2.5076, "step": 181248 }, { "epoch": 34.8, "eval_ag_news_accuracy": 0.328875, "eval_ag_news_bleu_score": 4.804993873614055, "eval_ag_news_bleu_score_sem": 0.14381557000213738, "eval_ag_news_emb_cos_sim": 0.8208189010620117, "eval_ag_news_emb_cos_sim_sem": 0.006431708522983135, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4985673427581787, "eval_ag_news_n_ngrams_match_1": 14.176, "eval_ag_news_n_ngrams_match_2": 3.086, "eval_ag_news_n_ngrams_match_3": 0.866, "eval_ag_news_num_pred_words": 46.326, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.068042835248484, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3571379081987875, "eval_ag_news_runtime": 10.3061, "eval_ag_news_samples_per_second": 48.515, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3529043986975882, "eval_ag_news_token_set_f1_sem": 0.004098616217864085, "eval_ag_news_token_set_precision": 0.33827297152610164, "eval_ag_news_token_set_recall": 0.38201161879811407, "eval_ag_news_true_num_tokens": 56.09375, "step": 181250 }, { "epoch": 34.8, "eval_anthropic_toxic_prompts_accuracy": 0.11496875, "eval_anthropic_toxic_prompts_bleu_score": 3.2071442026162926, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12430579377141336, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6753900051116943, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00914498565285683, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.21734356880188, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.218, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.918, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.716, "eval_anthropic_toxic_prompts_num_pred_words": 46.356, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.961722930966527, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.216366667957075, "eval_anthropic_toxic_prompts_runtime": 9.8046, "eval_anthropic_toxic_prompts_samples_per_second": 50.996, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.35854985319278126, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006436377657620959, "eval_anthropic_toxic_prompts_token_set_precision": 0.43671869039213135, "eval_anthropic_toxic_prompts_token_set_recall": 0.3316968474751847, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 181250 }, { "epoch": 34.8, "eval_arxiv_accuracy": 0.349, "eval_arxiv_bleu_score": 4.429891031934787, "eval_arxiv_bleu_score_sem": 0.1314076453950146, "eval_arxiv_emb_cos_sim": 0.7752380967140198, "eval_arxiv_emb_cos_sim_sem": 0.007095265871760618, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3631441593170166, "eval_arxiv_n_ngrams_match_1": 15.454, "eval_arxiv_n_ngrams_match_2": 3.038, "eval_arxiv_n_ngrams_match_3": 0.688, "eval_arxiv_num_pred_words": 40.742, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.87985113242794, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3671230992011829, "eval_arxiv_runtime": 10.0578, "eval_arxiv_samples_per_second": 49.713, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3610990832431798, "eval_arxiv_token_set_f1_sem": 0.004241424562178454, "eval_arxiv_token_set_precision": 0.3151974495500728, "eval_arxiv_token_set_recall": 0.4407001617443411, "eval_arxiv_true_num_tokens": 64.0, "step": 181250 }, { "epoch": 34.8, "eval_python_code_alpaca_accuracy": 0.16215625, "eval_python_code_alpaca_bleu_score": 4.676850928545665, "eval_python_code_alpaca_bleu_score_sem": 0.14177454527915884, "eval_python_code_alpaca_emb_cos_sim": 0.7616323232650757, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007801763615181115, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.868537664413452, "eval_python_code_alpaca_n_ngrams_match_1": 9.852, "eval_python_code_alpaca_n_ngrams_match_2": 2.988, "eval_python_code_alpaca_n_ngrams_match_3": 1.004, "eval_python_code_alpaca_num_pred_words": 43.496, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.611245809017923, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3383870860287026, "eval_python_code_alpaca_runtime": 10.3995, "eval_python_code_alpaca_samples_per_second": 48.079, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.4870405216309066, "eval_python_code_alpaca_token_set_f1_sem": 0.005466785875407218, "eval_python_code_alpaca_token_set_precision": 0.5389805849457641, "eval_python_code_alpaca_token_set_recall": 0.4661176088440191, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 181250 }, { "epoch": 34.8, "eval_wikibio_accuracy": 0.32778125, "eval_wikibio_bleu_score": 6.141191043301738, "eval_wikibio_bleu_score_sem": 0.21718134250764892, "eval_wikibio_emb_cos_sim": 0.749444842338562, "eval_wikibio_emb_cos_sim_sem": 0.008318791518209903, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.652595281600952, "eval_wikibio_n_ngrams_match_1": 10.37, "eval_wikibio_n_ngrams_match_2": 3.472, "eval_wikibio_n_ngrams_match_3": 1.306, "eval_wikibio_num_pred_words": 36.18, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.57464832689593, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36860213465066394, "eval_wikibio_runtime": 9.9655, "eval_wikibio_samples_per_second": 50.173, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3242861702515772, "eval_wikibio_token_set_f1_sem": 0.005358514615146788, "eval_wikibio_token_set_precision": 0.33549426531171667, "eval_wikibio_token_set_recall": 0.32899107763484203, "eval_wikibio_true_num_tokens": 61.1328125, "step": 181250 }, { "epoch": 34.8, "eval_nq_accuracy": 0.5345, "eval_nq_bleu_score": 11.753791075831005, "eval_nq_bleu_score_sem": 0.4679960419998606, "eval_nq_emb_cos_sim": 0.8319037556648254, "eval_nq_emb_cos_sim_sem": 0.007247675461421091, "eval_nq_emb_top1_equal": 0.34375, "eval_nq_emb_top1_equal_sem": 0.04214578430296913, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1426806449890137, "eval_nq_n_ngrams_match_1": 23.218, "eval_nq_n_ngrams_match_2": 8.532, "eval_nq_n_ngrams_match_3": 3.858, "eval_nq_num_pred_words": 48.954, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.522252168954852, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4518260088785709, "eval_nq_runtime": 10.4722, "eval_nq_samples_per_second": 47.746, "eval_nq_steps_per_second": 0.095, "eval_nq_token_set_f1": 0.46623278222821024, "eval_nq_token_set_f1_sem": 0.004872212076789529, "eval_nq_token_set_precision": 0.42553115163868954, "eval_nq_token_set_recall": 0.5245004882965694, "eval_nq_true_num_tokens": 64.0, "step": 181250 }, { "epoch": 34.8, "learning_rate": 0.001, "loss": 2.5184, "step": 181260 }, { "epoch": 34.81, "learning_rate": 0.001, "loss": 2.5167, "step": 181272 }, { "epoch": 34.81, "learning_rate": 0.001, "loss": 2.5238, "step": 181284 }, { "epoch": 34.81, "learning_rate": 0.001, "loss": 2.5144, "step": 181296 }, { "epoch": 34.81, "learning_rate": 0.001, "loss": 2.5123, "step": 181308 }, { "epoch": 34.82, "learning_rate": 0.001, "loss": 2.5191, "step": 181320 }, { "epoch": 34.82, "learning_rate": 0.001, "loss": 2.5187, "step": 181332 }, { "epoch": 34.82, "learning_rate": 0.001, "loss": 2.5217, "step": 181344 }, { "epoch": 34.82, "learning_rate": 0.001, "loss": 2.5241, "step": 181356 }, { "epoch": 34.82, "learning_rate": 0.001, "loss": 2.5128, "step": 181368 }, { "epoch": 34.83, "learning_rate": 0.001, "loss": 2.5068, "step": 181380 }, { "epoch": 34.83, "learning_rate": 0.001, "loss": 2.5218, "step": 181392 }, { "epoch": 34.83, "learning_rate": 0.001, "loss": 2.5089, "step": 181404 }, { "epoch": 34.83, "learning_rate": 0.001, "loss": 2.5084, "step": 181416 }, { "epoch": 34.84, "learning_rate": 0.001, "loss": 2.5212, "step": 181428 }, { "epoch": 34.84, "learning_rate": 0.001, "loss": 2.5157, "step": 181440 }, { "epoch": 34.84, "learning_rate": 0.001, "loss": 2.5114, "step": 181452 }, { "epoch": 34.84, "learning_rate": 0.001, "loss": 2.5131, "step": 181464 }, { "epoch": 34.85, "learning_rate": 0.001, "loss": 2.5266, "step": 181476 }, { "epoch": 34.85, "learning_rate": 0.001, "loss": 2.5217, "step": 181488 }, { "epoch": 34.85, "learning_rate": 0.001, "loss": 2.5217, "step": 181500 }, { "epoch": 34.85, "learning_rate": 0.001, "loss": 2.5219, "step": 181512 }, { "epoch": 34.85, "learning_rate": 0.001, "loss": 2.5183, "step": 181524 }, { "epoch": 34.86, "learning_rate": 0.001, "loss": 2.5175, "step": 181536 }, { "epoch": 34.86, "learning_rate": 0.001, "loss": 2.5149, "step": 181548 }, { "epoch": 34.86, "learning_rate": 0.001, "loss": 2.5052, "step": 181560 }, { "epoch": 34.86, "learning_rate": 0.001, "loss": 2.5146, "step": 181572 }, { "epoch": 34.87, "learning_rate": 0.001, "loss": 2.5101, "step": 181584 }, { "epoch": 34.87, "learning_rate": 0.001, "loss": 2.5099, "step": 181596 }, { "epoch": 34.87, "learning_rate": 0.001, "loss": 2.4997, "step": 181608 }, { "epoch": 34.87, "learning_rate": 0.001, "loss": 2.51, "step": 181620 }, { "epoch": 34.88, "learning_rate": 0.001, "loss": 2.5196, "step": 181632 }, { "epoch": 34.88, "learning_rate": 0.001, "loss": 2.5184, "step": 181644 }, { "epoch": 34.88, "learning_rate": 0.001, "loss": 2.5178, "step": 181656 }, { "epoch": 34.88, "learning_rate": 0.001, "loss": 2.5141, "step": 181668 }, { "epoch": 34.88, "learning_rate": 0.001, "loss": 2.5185, "step": 181680 }, { "epoch": 34.89, "learning_rate": 0.001, "loss": 2.5183, "step": 181692 }, { "epoch": 34.89, "learning_rate": 0.001, "loss": 2.5102, "step": 181704 }, { "epoch": 34.89, "learning_rate": 0.001, "loss": 2.5137, "step": 181716 }, { "epoch": 34.89, "learning_rate": 0.001, "loss": 2.5132, "step": 181728 }, { "epoch": 34.9, "learning_rate": 0.001, "loss": 2.5251, "step": 181740 }, { "epoch": 34.9, "learning_rate": 0.001, "loss": 2.5138, "step": 181752 }, { "epoch": 34.9, "learning_rate": 0.001, "loss": 2.5121, "step": 181764 }, { "epoch": 34.9, "learning_rate": 0.001, "loss": 2.5196, "step": 181776 }, { "epoch": 34.91, "learning_rate": 0.001, "loss": 2.5111, "step": 181788 }, { "epoch": 34.91, "learning_rate": 0.001, "loss": 2.5176, "step": 181800 }, { "epoch": 34.91, "learning_rate": 0.001, "loss": 2.5046, "step": 181812 }, { "epoch": 34.91, "learning_rate": 0.001, "loss": 2.5191, "step": 181824 }, { "epoch": 34.91, "learning_rate": 0.001, "loss": 2.5207, "step": 181836 }, { "epoch": 34.92, "learning_rate": 0.001, "loss": 2.5126, "step": 181848 }, { "epoch": 34.92, "learning_rate": 0.001, "loss": 2.5222, "step": 181860 }, { "epoch": 34.92, "learning_rate": 0.001, "loss": 2.5127, "step": 181872 }, { "epoch": 34.92, "eval_ag_news_accuracy": 0.32778125, "eval_ag_news_bleu_score": 4.9602767765405025, "eval_ag_news_bleu_score_sem": 0.15656288994688125, "eval_ag_news_emb_cos_sim": 0.8185689449310303, "eval_ag_news_emb_cos_sim_sem": 0.006665678849957266, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.49713397026062, "eval_ag_news_n_ngrams_match_1": 14.414, "eval_ag_news_n_ngrams_match_2": 3.222, "eval_ag_news_n_ngrams_match_3": 0.946, "eval_ag_news_num_pred_words": 47.056, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.02067796596033, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35595681717589284, "eval_ag_news_runtime": 10.3168, "eval_ag_news_samples_per_second": 48.465, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3572590160496617, "eval_ag_news_token_set_f1_sem": 0.004391761844769307, "eval_ag_news_token_set_precision": 0.34403067172163065, "eval_ag_news_token_set_recall": 0.38612976103485114, "eval_ag_news_true_num_tokens": 56.09375, "step": 181875 }, { "epoch": 34.92, "eval_anthropic_toxic_prompts_accuracy": 0.1145, "eval_anthropic_toxic_prompts_bleu_score": 3.207184484553967, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12655300506028005, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.682706356048584, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009426976386808556, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2380993366241455, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.296, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.96, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.776, "eval_anthropic_toxic_prompts_num_pred_words": 47.36, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.485236828927505, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21522310366020758, "eval_anthropic_toxic_prompts_runtime": 9.8297, "eval_anthropic_toxic_prompts_samples_per_second": 50.866, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3624188922080917, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067680241486070365, "eval_anthropic_toxic_prompts_token_set_precision": 0.44539145807318636, "eval_anthropic_toxic_prompts_token_set_recall": 0.3331691113548435, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 181875 }, { "epoch": 34.92, "eval_arxiv_accuracy": 0.35278125, "eval_arxiv_bleu_score": 4.539418752884601, "eval_arxiv_bleu_score_sem": 0.13076241325439472, "eval_arxiv_emb_cos_sim": 0.7864891290664673, "eval_arxiv_emb_cos_sim_sem": 0.006223616360994691, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3372349739074707, "eval_arxiv_n_ngrams_match_1": 15.746, "eval_arxiv_n_ngrams_match_2": 3.096, "eval_arxiv_n_ngrams_match_3": 0.708, "eval_arxiv_num_pred_words": 41.472, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.14120785722712, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37258023760745, "eval_arxiv_runtime": 10.0854, "eval_arxiv_samples_per_second": 49.576, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3657891911442688, "eval_arxiv_token_set_f1_sem": 0.0041102939618479824, "eval_arxiv_token_set_precision": 0.3195849110747359, "eval_arxiv_token_set_recall": 0.4405098954874597, "eval_arxiv_true_num_tokens": 64.0, "step": 181875 }, { "epoch": 34.92, "eval_python_code_alpaca_accuracy": 0.1626875, "eval_python_code_alpaca_bleu_score": 4.934295158768371, "eval_python_code_alpaca_bleu_score_sem": 0.15448891617669977, "eval_python_code_alpaca_emb_cos_sim": 0.7720087766647339, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007939166951138185, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.867623805999756, "eval_python_code_alpaca_n_ngrams_match_1": 10.168, "eval_python_code_alpaca_n_ngrams_match_2": 3.194, "eval_python_code_alpaca_n_ngrams_match_3": 1.178, "eval_python_code_alpaca_num_pred_words": 45.24, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.595158975523308, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33922738357496696, "eval_python_code_alpaca_runtime": 9.7722, "eval_python_code_alpaca_samples_per_second": 51.166, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4959238966829353, "eval_python_code_alpaca_token_set_f1_sem": 0.005318135492879365, "eval_python_code_alpaca_token_set_precision": 0.5570481958937105, "eval_python_code_alpaca_token_set_recall": 0.47168103921943716, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 181875 }, { "epoch": 34.92, "eval_wikibio_accuracy": 0.3291875, "eval_wikibio_bleu_score": 6.196995746032902, "eval_wikibio_bleu_score_sem": 0.2258332542701892, "eval_wikibio_emb_cos_sim": 0.7374675869941711, "eval_wikibio_emb_cos_sim_sem": 0.00925153025171434, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.61423921585083, "eval_wikibio_n_ngrams_match_1": 10.22, "eval_wikibio_n_ngrams_match_2": 3.418, "eval_wikibio_n_ngrams_match_3": 1.268, "eval_wikibio_num_pred_words": 36.276, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.12309251928056, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3555967650424833, "eval_wikibio_runtime": 10.0263, "eval_wikibio_samples_per_second": 49.869, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3236343940470751, "eval_wikibio_token_set_f1_sem": 0.005430149620092611, "eval_wikibio_token_set_precision": 0.3309549762143529, "eval_wikibio_token_set_recall": 0.33478350801841644, "eval_wikibio_true_num_tokens": 61.1328125, "step": 181875 }, { "epoch": 34.92, "eval_nq_accuracy": 0.5350625, "eval_nq_bleu_score": 12.173006305095079, "eval_nq_bleu_score_sem": 0.4833641743203409, "eval_nq_emb_cos_sim": 0.8393230438232422, "eval_nq_emb_cos_sim_sem": 0.0063963762930408456, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1397511959075928, "eval_nq_n_ngrams_match_1": 23.508, "eval_nq_n_ngrams_match_2": 8.796, "eval_nq_n_ngrams_match_3": 4.08, "eval_nq_num_pred_words": 49.38, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.497323197071449, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4554898941582331, "eval_nq_runtime": 10.8463, "eval_nq_samples_per_second": 46.099, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.4684268262234774, "eval_nq_token_set_f1_sem": 0.004860853452374327, "eval_nq_token_set_precision": 0.42800084347651546, "eval_nq_token_set_recall": 0.5256222382886034, "eval_nq_true_num_tokens": 64.0, "step": 181875 }, { "epoch": 34.92, "learning_rate": 0.001, "loss": 2.5133, "step": 181884 }, { "epoch": 34.93, "learning_rate": 0.001, "loss": 2.5116, "step": 181896 }, { "epoch": 34.93, "learning_rate": 0.001, "loss": 2.5164, "step": 181908 }, { "epoch": 34.93, "learning_rate": 0.001, "loss": 2.5176, "step": 181920 }, { "epoch": 34.93, "learning_rate": 0.001, "loss": 2.5229, "step": 181932 }, { "epoch": 34.94, "learning_rate": 0.001, "loss": 2.5277, "step": 181944 }, { "epoch": 34.94, "learning_rate": 0.001, "loss": 2.5176, "step": 181956 }, { "epoch": 34.94, "learning_rate": 0.001, "loss": 2.5166, "step": 181968 }, { "epoch": 34.94, "learning_rate": 0.001, "loss": 2.5242, "step": 181980 }, { "epoch": 34.94, "learning_rate": 0.001, "loss": 2.5183, "step": 181992 }, { "epoch": 34.95, "learning_rate": 0.001, "loss": 2.5092, "step": 182004 }, { "epoch": 34.95, "learning_rate": 0.001, "loss": 2.5139, "step": 182016 }, { "epoch": 34.95, "learning_rate": 0.001, "loss": 2.5112, "step": 182028 }, { "epoch": 34.95, "learning_rate": 0.001, "loss": 2.5121, "step": 182040 }, { "epoch": 34.96, "learning_rate": 0.001, "loss": 2.5204, "step": 182052 }, { "epoch": 34.96, "learning_rate": 0.001, "loss": 2.516, "step": 182064 }, { "epoch": 34.96, "learning_rate": 0.001, "loss": 2.5255, "step": 182076 }, { "epoch": 34.96, "learning_rate": 0.001, "loss": 2.5172, "step": 182088 }, { "epoch": 34.97, "learning_rate": 0.001, "loss": 2.5211, "step": 182100 }, { "epoch": 34.97, "learning_rate": 0.001, "loss": 2.5204, "step": 182112 }, { "epoch": 34.97, "learning_rate": 0.001, "loss": 2.52, "step": 182124 }, { "epoch": 34.97, "learning_rate": 0.001, "loss": 2.5217, "step": 182136 }, { "epoch": 34.97, "learning_rate": 0.001, "loss": 2.5166, "step": 182148 }, { "epoch": 34.98, "learning_rate": 0.001, "loss": 2.5132, "step": 182160 }, { "epoch": 34.98, "learning_rate": 0.001, "loss": 2.517, "step": 182172 }, { "epoch": 34.98, "learning_rate": 0.001, "loss": 2.5076, "step": 182184 }, { "epoch": 34.98, "learning_rate": 0.001, "loss": 2.5245, "step": 182196 }, { "epoch": 34.99, "learning_rate": 0.001, "loss": 2.5254, "step": 182208 }, { "epoch": 34.99, "learning_rate": 0.001, "loss": 2.5158, "step": 182220 }, { "epoch": 34.99, "learning_rate": 0.001, "loss": 2.5181, "step": 182232 }, { "epoch": 34.99, "learning_rate": 0.001, "loss": 2.509, "step": 182244 }, { "epoch": 35.0, "learning_rate": 0.001, "loss": 2.5102, "step": 182256 }, { "epoch": 35.0, "learning_rate": 0.001, "loss": 2.5118, "step": 182268 }, { "epoch": 35.0, "learning_rate": 0.001, "loss": 2.5058, "step": 182280 }, { "epoch": 35.0, "learning_rate": 0.001, "loss": 2.5103, "step": 182292 }, { "epoch": 35.0, "learning_rate": 0.001, "loss": 2.5078, "step": 182304 }, { "epoch": 35.01, "learning_rate": 0.001, "loss": 2.5089, "step": 182316 }, { "epoch": 35.01, "learning_rate": 0.001, "loss": 2.507, "step": 182328 }, { "epoch": 35.01, "learning_rate": 0.001, "loss": 2.5069, "step": 182340 }, { "epoch": 35.01, "learning_rate": 0.001, "loss": 2.5024, "step": 182352 }, { "epoch": 35.02, "learning_rate": 0.001, "loss": 2.4935, "step": 182364 }, { "epoch": 35.02, "learning_rate": 0.001, "loss": 2.5007, "step": 182376 }, { "epoch": 35.02, "learning_rate": 0.001, "loss": 2.5044, "step": 182388 }, { "epoch": 35.02, "learning_rate": 0.001, "loss": 2.5037, "step": 182400 }, { "epoch": 35.03, "learning_rate": 0.001, "loss": 2.4962, "step": 182412 }, { "epoch": 35.03, "learning_rate": 0.001, "loss": 2.4991, "step": 182424 }, { "epoch": 35.03, "learning_rate": 0.001, "loss": 2.5073, "step": 182436 }, { "epoch": 35.03, "learning_rate": 0.001, "loss": 2.5096, "step": 182448 }, { "epoch": 35.03, "learning_rate": 0.001, "loss": 2.4871, "step": 182460 }, { "epoch": 35.04, "learning_rate": 0.001, "loss": 2.5005, "step": 182472 }, { "epoch": 35.04, "learning_rate": 0.001, "loss": 2.4967, "step": 182484 }, { "epoch": 35.04, "learning_rate": 0.001, "loss": 2.5009, "step": 182496 }, { "epoch": 35.04, "eval_ag_news_accuracy": 0.329375, "eval_ag_news_bleu_score": 5.013241895185381, "eval_ag_news_bleu_score_sem": 0.16971217796875318, "eval_ag_news_emb_cos_sim": 0.8215794563293457, "eval_ag_news_emb_cos_sim_sem": 0.006490493498488212, "eval_ag_news_emb_top1_equal": 0.2890625, "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.495105743408203, "eval_ag_news_n_ngrams_match_1": 14.534, "eval_ag_news_n_ngrams_match_2": 3.246, "eval_ag_news_n_ngrams_match_3": 0.944, "eval_ag_news_num_pred_words": 46.54, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.95377241298012, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3634637101165499, "eval_ag_news_runtime": 10.4592, "eval_ag_news_samples_per_second": 47.805, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35981091616312516, "eval_ag_news_token_set_f1_sem": 0.004489789468071795, "eval_ag_news_token_set_precision": 0.34730553931234, "eval_ag_news_token_set_recall": 0.38580613549033604, "eval_ag_news_true_num_tokens": 56.09375, "step": 182500 }, { "epoch": 35.04, "eval_anthropic_toxic_prompts_accuracy": 0.1156875, "eval_anthropic_toxic_prompts_bleu_score": 3.1064264684772644, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11985823692510499, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6748046278953552, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010383487259962153, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2069003582000732, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.29, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.888, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.67, "eval_anthropic_toxic_prompts_num_pred_words": 46.848, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.70239884626327, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21973808663468183, "eval_anthropic_toxic_prompts_runtime": 9.6768, "eval_anthropic_toxic_prompts_samples_per_second": 51.67, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.35507274023612245, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00657076088930049, "eval_anthropic_toxic_prompts_token_set_precision": 0.4436292879843375, "eval_anthropic_toxic_prompts_token_set_recall": 0.321794609671025, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 182500 }, { "epoch": 35.04, "eval_arxiv_accuracy": 0.3505625, "eval_arxiv_bleu_score": 4.392728758966296, "eval_arxiv_bleu_score_sem": 0.12192842762713288, "eval_arxiv_emb_cos_sim": 0.779847264289856, "eval_arxiv_emb_cos_sim_sem": 0.007890435316091972, "eval_arxiv_emb_top1_equal": 0.375, "eval_arxiv_emb_top1_equal_sem": 0.04295896296396028, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.362316370010376, "eval_arxiv_n_ngrams_match_1": 15.514, "eval_arxiv_n_ngrams_match_2": 3.05, "eval_arxiv_n_ngrams_match_3": 0.624, "eval_arxiv_num_pred_words": 41.092, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.855954592497817, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.370596800073038, "eval_arxiv_runtime": 10.6079, "eval_arxiv_samples_per_second": 47.134, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.3603772299266208, "eval_arxiv_token_set_f1_sem": 0.004226949685001713, "eval_arxiv_token_set_precision": 0.3158418822046445, "eval_arxiv_token_set_recall": 0.4332084098977886, "eval_arxiv_true_num_tokens": 64.0, "step": 182500 }, { "epoch": 35.04, "eval_python_code_alpaca_accuracy": 0.16078125, "eval_python_code_alpaca_bleu_score": 4.838796129483069, "eval_python_code_alpaca_bleu_score_sem": 0.1462466692264986, "eval_python_code_alpaca_emb_cos_sim": 0.7695719003677368, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007619172272175286, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8930504322052, "eval_python_code_alpaca_n_ngrams_match_1": 10.094, "eval_python_code_alpaca_n_ngrams_match_2": 3.0, "eval_python_code_alpaca_n_ngrams_match_3": 1.066, "eval_python_code_alpaca_num_pred_words": 43.838, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.048280772926603, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34592551572692587, "eval_python_code_alpaca_runtime": 9.7173, "eval_python_code_alpaca_samples_per_second": 51.455, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.49022607803130974, "eval_python_code_alpaca_token_set_f1_sem": 0.0054169132953773656, "eval_python_code_alpaca_token_set_precision": 0.5523526157981145, "eval_python_code_alpaca_token_set_recall": 0.46187599070232777, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 182500 }, { "epoch": 35.04, "eval_wikibio_accuracy": 0.326875, "eval_wikibio_bleu_score": 6.176190741503038, "eval_wikibio_bleu_score_sem": 0.22174135625377686, "eval_wikibio_emb_cos_sim": 0.7572555541992188, "eval_wikibio_emb_cos_sim_sem": 0.008205068125670876, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.68223237991333, "eval_wikibio_n_ngrams_match_1": 10.12, "eval_wikibio_n_ngrams_match_2": 3.424, "eval_wikibio_n_ngrams_match_3": 1.294, "eval_wikibio_num_pred_words": 35.302, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.7349987491894, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35591152715847985, "eval_wikibio_runtime": 9.7287, "eval_wikibio_samples_per_second": 51.394, "eval_wikibio_steps_per_second": 0.103, "eval_wikibio_token_set_f1": 0.32301632706906014, "eval_wikibio_token_set_f1_sem": 0.005584873057938655, "eval_wikibio_token_set_precision": 0.3286334257346614, "eval_wikibio_token_set_recall": 0.3364926135384171, "eval_wikibio_true_num_tokens": 61.1328125, "step": 182500 }, { "epoch": 35.04, "eval_nq_accuracy": 0.53509375, "eval_nq_bleu_score": 12.156841420824348, "eval_nq_bleu_score_sem": 0.4956232439593376, "eval_nq_emb_cos_sim": 0.8391368389129639, "eval_nq_emb_cos_sim_sem": 0.0072206975271070705, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.135446786880493, "eval_nq_n_ngrams_match_1": 23.586, "eval_nq_n_ngrams_match_2": 8.792, "eval_nq_n_ngrams_match_3": 4.07, "eval_nq_num_pred_words": 49.326, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.460825848505804, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45521639355119436, "eval_nq_runtime": 10.1561, "eval_nq_samples_per_second": 49.232, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.46914761397712873, "eval_nq_token_set_f1_sem": 0.005000811715255126, "eval_nq_token_set_precision": 0.4286275655618775, "eval_nq_token_set_recall": 0.5261076186952025, "eval_nq_true_num_tokens": 64.0, "step": 182500 }, { "epoch": 35.04, "learning_rate": 0.001, "loss": 2.5014, "step": 182508 }, { "epoch": 35.05, "learning_rate": 0.001, "loss": 2.5049, "step": 182520 }, { "epoch": 35.05, "learning_rate": 0.001, "loss": 2.5012, "step": 182532 }, { "epoch": 35.05, "learning_rate": 0.001, "loss": 2.5047, "step": 182544 }, { "epoch": 35.05, "learning_rate": 0.001, "loss": 2.5032, "step": 182556 }, { "epoch": 35.06, "learning_rate": 0.001, "loss": 2.5044, "step": 182568 }, { "epoch": 35.06, "learning_rate": 0.001, "loss": 2.5096, "step": 182580 }, { "epoch": 35.06, "learning_rate": 0.001, "loss": 2.5007, "step": 182592 }, { "epoch": 35.06, "learning_rate": 0.001, "loss": 2.502, "step": 182604 }, { "epoch": 35.06, "learning_rate": 0.001, "loss": 2.4997, "step": 182616 }, { "epoch": 35.07, "learning_rate": 0.001, "loss": 2.5056, "step": 182628 }, { "epoch": 35.07, "learning_rate": 0.001, "loss": 2.5058, "step": 182640 }, { "epoch": 35.07, "learning_rate": 0.001, "loss": 2.5113, "step": 182652 }, { "epoch": 35.07, "learning_rate": 0.001, "loss": 2.4952, "step": 182664 }, { "epoch": 35.08, "learning_rate": 0.001, "loss": 2.5039, "step": 182676 }, { "epoch": 35.08, "learning_rate": 0.001, "loss": 2.5095, "step": 182688 }, { "epoch": 35.08, "learning_rate": 0.001, "loss": 2.5158, "step": 182700 }, { "epoch": 35.08, "learning_rate": 0.001, "loss": 2.5089, "step": 182712 }, { "epoch": 35.09, "learning_rate": 0.001, "loss": 2.5077, "step": 182724 }, { "epoch": 35.09, "learning_rate": 0.001, "loss": 2.5095, "step": 182736 }, { "epoch": 35.09, "learning_rate": 0.001, "loss": 2.5098, "step": 182748 }, { "epoch": 35.09, "learning_rate": 0.001, "loss": 2.5083, "step": 182760 }, { "epoch": 35.09, "learning_rate": 0.001, "loss": 2.4921, "step": 182772 }, { "epoch": 35.1, "learning_rate": 0.001, "loss": 2.504, "step": 182784 }, { "epoch": 35.1, "learning_rate": 0.001, "loss": 2.5044, "step": 182796 }, { "epoch": 35.1, "learning_rate": 0.001, "loss": 2.5107, "step": 182808 }, { "epoch": 35.1, "learning_rate": 0.001, "loss": 2.5012, "step": 182820 }, { "epoch": 35.11, "learning_rate": 0.001, "loss": 2.5006, "step": 182832 }, { "epoch": 35.11, "learning_rate": 0.001, "loss": 2.5147, "step": 182844 }, { "epoch": 35.11, "learning_rate": 0.001, "loss": 2.5035, "step": 182856 }, { "epoch": 35.11, "learning_rate": 0.001, "loss": 2.5038, "step": 182868 }, { "epoch": 35.12, "learning_rate": 0.001, "loss": 2.502, "step": 182880 }, { "epoch": 35.12, "learning_rate": 0.001, "loss": 2.5058, "step": 182892 }, { "epoch": 35.12, "learning_rate": 0.001, "loss": 2.4899, "step": 182904 }, { "epoch": 35.12, "learning_rate": 0.001, "loss": 2.5128, "step": 182916 }, { "epoch": 35.12, "learning_rate": 0.001, "loss": 2.5001, "step": 182928 }, { "epoch": 35.13, "learning_rate": 0.001, "loss": 2.4961, "step": 182940 }, { "epoch": 35.13, "learning_rate": 0.001, "loss": 2.5109, "step": 182952 }, { "epoch": 35.13, "learning_rate": 0.001, "loss": 2.5069, "step": 182964 }, { "epoch": 35.13, "learning_rate": 0.001, "loss": 2.5125, "step": 182976 }, { "epoch": 35.14, "learning_rate": 0.001, "loss": 2.5077, "step": 182988 }, { "epoch": 35.14, "learning_rate": 0.001, "loss": 2.5055, "step": 183000 }, { "epoch": 35.14, "learning_rate": 0.001, "loss": 2.5068, "step": 183012 }, { "epoch": 35.14, "learning_rate": 0.001, "loss": 2.514, "step": 183024 }, { "epoch": 35.15, "learning_rate": 0.001, "loss": 2.5102, "step": 183036 }, { "epoch": 35.15, "learning_rate": 0.001, "loss": 2.5125, "step": 183048 }, { "epoch": 35.15, "learning_rate": 0.001, "loss": 2.5059, "step": 183060 }, { "epoch": 35.15, "learning_rate": 0.001, "loss": 2.5064, "step": 183072 }, { "epoch": 35.15, "learning_rate": 0.001, "loss": 2.5094, "step": 183084 }, { "epoch": 35.16, "learning_rate": 0.001, "loss": 2.5066, "step": 183096 }, { "epoch": 35.16, "learning_rate": 0.001, "loss": 2.5166, "step": 183108 }, { "epoch": 35.16, "learning_rate": 0.001, "loss": 2.5088, "step": 183120 }, { "epoch": 35.16, "eval_ag_news_accuracy": 0.32628125, "eval_ag_news_bleu_score": 5.1006828275326095, "eval_ag_news_bleu_score_sem": 0.1590254434094689, "eval_ag_news_emb_cos_sim": 0.8179818391799927, "eval_ag_news_emb_cos_sim_sem": 0.008325306499027376, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5029258728027344, "eval_ag_news_n_ngrams_match_1": 14.602, "eval_ag_news_n_ngrams_match_2": 3.306, "eval_ag_news_n_ngrams_match_3": 1.024, "eval_ag_news_num_pred_words": 47.074, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.21248544352249, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3591954083076998, "eval_ag_news_runtime": 10.2184, "eval_ag_news_samples_per_second": 48.931, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.36251884270403273, "eval_ag_news_token_set_f1_sem": 0.00444502850364034, "eval_ag_news_token_set_precision": 0.34961905853337977, "eval_ag_news_token_set_recall": 0.38992204685461157, "eval_ag_news_true_num_tokens": 56.09375, "step": 183125 }, { "epoch": 35.16, "eval_anthropic_toxic_prompts_accuracy": 0.1145625, "eval_anthropic_toxic_prompts_bleu_score": 3.074671202479418, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12037268882944767, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.669966459274292, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009768134710624496, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2112460136413574, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.118, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.692, "eval_anthropic_toxic_prompts_num_pred_words": 47.11, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.80998054742067, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21178055775520543, "eval_anthropic_toxic_prompts_runtime": 9.791, "eval_anthropic_toxic_prompts_samples_per_second": 51.067, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3474581877934118, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006651313609107666, "eval_anthropic_toxic_prompts_token_set_precision": 0.4330612943305178, "eval_anthropic_toxic_prompts_token_set_recall": 0.31454256515335116, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 183125 }, { "epoch": 35.16, "eval_arxiv_accuracy": 0.35140625, "eval_arxiv_bleu_score": 4.41712638921239, "eval_arxiv_bleu_score_sem": 0.11733751366108557, "eval_arxiv_emb_cos_sim": 0.7801276445388794, "eval_arxiv_emb_cos_sim_sem": 0.006798413105352739, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.349822998046875, "eval_arxiv_n_ngrams_match_1": 15.456, "eval_arxiv_n_ngrams_match_2": 3.054, "eval_arxiv_n_ngrams_match_3": 0.652, "eval_arxiv_num_pred_words": 40.494, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.497689050707514, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.370027573022506, "eval_arxiv_runtime": 11.0212, "eval_arxiv_samples_per_second": 45.367, "eval_arxiv_steps_per_second": 0.091, "eval_arxiv_token_set_f1": 0.36386754280687483, "eval_arxiv_token_set_f1_sem": 0.004277710225902754, "eval_arxiv_token_set_precision": 0.3144962474657806, "eval_arxiv_token_set_recall": 0.44736028810800754, "eval_arxiv_true_num_tokens": 64.0, "step": 183125 }, { "epoch": 35.16, "eval_python_code_alpaca_accuracy": 0.16290625, "eval_python_code_alpaca_bleu_score": 4.710107004037594, "eval_python_code_alpaca_bleu_score_sem": 0.13745478952729376, "eval_python_code_alpaca_emb_cos_sim": 0.7673460841178894, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007453236178320849, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8564159870147705, "eval_python_code_alpaca_n_ngrams_match_1": 10.212, "eval_python_code_alpaca_n_ngrams_match_2": 3.016, "eval_python_code_alpaca_n_ngrams_match_3": 0.984, "eval_python_code_alpaca_num_pred_words": 44.2, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.399056611395718, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34731626653215175, "eval_python_code_alpaca_runtime": 9.9789, "eval_python_code_alpaca_samples_per_second": 50.106, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.490336283181496, "eval_python_code_alpaca_token_set_f1_sem": 0.005118987669945117, "eval_python_code_alpaca_token_set_precision": 0.5582848326943095, "eval_python_code_alpaca_token_set_recall": 0.4597384872252604, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 183125 }, { "epoch": 35.16, "eval_wikibio_accuracy": 0.3280625, "eval_wikibio_bleu_score": 5.945074091605398, "eval_wikibio_bleu_score_sem": 0.2225386624734864, "eval_wikibio_emb_cos_sim": 0.7341788411140442, "eval_wikibio_emb_cos_sim_sem": 0.010411692721239051, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6910667419433594, "eval_wikibio_n_ngrams_match_1": 9.96, "eval_wikibio_n_ngrams_match_2": 3.326, "eval_wikibio_n_ngrams_match_3": 1.23, "eval_wikibio_num_pred_words": 35.734, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 40.08758726753929, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3544598126452069, "eval_wikibio_runtime": 9.8404, "eval_wikibio_samples_per_second": 50.811, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.3165713037120058, "eval_wikibio_token_set_f1_sem": 0.005705833550776621, "eval_wikibio_token_set_precision": 0.32312242072404657, "eval_wikibio_token_set_recall": 0.3292619377690935, "eval_wikibio_true_num_tokens": 61.1328125, "step": 183125 }, { "epoch": 35.16, "eval_nq_accuracy": 0.53375, "eval_nq_bleu_score": 11.87979100615898, "eval_nq_bleu_score_sem": 0.47699644080441067, "eval_nq_emb_cos_sim": 0.8350943922996521, "eval_nq_emb_cos_sim_sem": 0.007061478587234057, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.142853021621704, "eval_nq_n_ngrams_match_1": 23.356, "eval_nq_n_ngrams_match_2": 8.68, "eval_nq_n_ngrams_match_3": 3.97, "eval_nq_num_pred_words": 49.004, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.523721332707789, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.452184831196799, "eval_nq_runtime": 10.3145, "eval_nq_samples_per_second": 48.476, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4675832325458132, "eval_nq_token_set_f1_sem": 0.004837720111850746, "eval_nq_token_set_precision": 0.4255649798242282, "eval_nq_token_set_recall": 0.5265182351335549, "eval_nq_true_num_tokens": 64.0, "step": 183125 }, { "epoch": 35.16, "learning_rate": 0.001, "loss": 2.4987, "step": 183132 }, { "epoch": 35.17, "learning_rate": 0.001, "loss": 2.5073, "step": 183144 }, { "epoch": 35.17, "learning_rate": 0.001, "loss": 2.5013, "step": 183156 }, { "epoch": 35.17, "learning_rate": 0.001, "loss": 2.5088, "step": 183168 }, { "epoch": 35.17, "learning_rate": 0.001, "loss": 2.5091, "step": 183180 }, { "epoch": 35.18, "learning_rate": 0.001, "loss": 2.5127, "step": 183192 }, { "epoch": 35.18, "learning_rate": 0.001, "loss": 2.5173, "step": 183204 }, { "epoch": 35.18, "learning_rate": 0.001, "loss": 2.5148, "step": 183216 }, { "epoch": 35.18, "learning_rate": 0.001, "loss": 2.5057, "step": 183228 }, { "epoch": 35.18, "learning_rate": 0.001, "loss": 2.5058, "step": 183240 }, { "epoch": 35.19, "learning_rate": 0.001, "loss": 2.5034, "step": 183252 }, { "epoch": 35.19, "learning_rate": 0.001, "loss": 2.5071, "step": 183264 }, { "epoch": 35.19, "learning_rate": 0.001, "loss": 2.4994, "step": 183276 }, { "epoch": 35.19, "learning_rate": 0.001, "loss": 2.516, "step": 183288 }, { "epoch": 35.2, "learning_rate": 0.001, "loss": 2.5139, "step": 183300 }, { "epoch": 35.2, "learning_rate": 0.001, "loss": 2.5091, "step": 183312 }, { "epoch": 35.2, "learning_rate": 0.001, "loss": 2.5091, "step": 183324 }, { "epoch": 35.2, "learning_rate": 0.001, "loss": 2.5116, "step": 183336 }, { "epoch": 35.21, "learning_rate": 0.001, "loss": 2.5125, "step": 183348 }, { "epoch": 35.21, "learning_rate": 0.001, "loss": 2.5065, "step": 183360 }, { "epoch": 35.21, "learning_rate": 0.001, "loss": 2.5118, "step": 183372 }, { "epoch": 35.21, "learning_rate": 0.001, "loss": 2.5144, "step": 183384 }, { "epoch": 35.21, "learning_rate": 0.001, "loss": 2.5127, "step": 183396 }, { "epoch": 35.22, "learning_rate": 0.001, "loss": 2.5042, "step": 183408 }, { "epoch": 35.22, "learning_rate": 0.001, "loss": 2.5142, "step": 183420 }, { "epoch": 35.22, "learning_rate": 0.001, "loss": 2.5164, "step": 183432 }, { "epoch": 35.22, "learning_rate": 0.001, "loss": 2.5086, "step": 183444 }, { "epoch": 35.23, "learning_rate": 0.001, "loss": 2.5106, "step": 183456 }, { "epoch": 35.23, "learning_rate": 0.001, "loss": 2.5171, "step": 183468 }, { "epoch": 35.23, "learning_rate": 0.001, "loss": 2.505, "step": 183480 }, { "epoch": 35.23, "learning_rate": 0.001, "loss": 2.5016, "step": 183492 }, { "epoch": 35.24, "learning_rate": 0.001, "loss": 2.5034, "step": 183504 }, { "epoch": 35.24, "learning_rate": 0.001, "loss": 2.5169, "step": 183516 }, { "epoch": 35.24, "learning_rate": 0.001, "loss": 2.5131, "step": 183528 }, { "epoch": 35.24, "learning_rate": 0.001, "loss": 2.5069, "step": 183540 }, { "epoch": 35.24, "learning_rate": 0.001, "loss": 2.5073, "step": 183552 }, { "epoch": 35.25, "learning_rate": 0.001, "loss": 2.5138, "step": 183564 }, { "epoch": 35.25, "learning_rate": 0.001, "loss": 2.5062, "step": 183576 }, { "epoch": 35.25, "learning_rate": 0.001, "loss": 2.5, "step": 183588 }, { "epoch": 35.25, "learning_rate": 0.001, "loss": 2.5107, "step": 183600 }, { "epoch": 35.26, "learning_rate": 0.001, "loss": 2.5081, "step": 183612 }, { "epoch": 35.26, "learning_rate": 0.001, "loss": 2.5059, "step": 183624 }, { "epoch": 35.26, "learning_rate": 0.001, "loss": 2.5097, "step": 183636 }, { "epoch": 35.26, "learning_rate": 0.001, "loss": 2.5032, "step": 183648 }, { "epoch": 35.26, "learning_rate": 0.001, "loss": 2.497, "step": 183660 }, { "epoch": 35.27, "learning_rate": 0.001, "loss": 2.5126, "step": 183672 }, { "epoch": 35.27, "learning_rate": 0.001, "loss": 2.513, "step": 183684 }, { "epoch": 35.27, "learning_rate": 0.001, "loss": 2.5149, "step": 183696 }, { "epoch": 35.27, "learning_rate": 0.001, "loss": 2.5054, "step": 183708 }, { "epoch": 35.28, "learning_rate": 0.001, "loss": 2.5105, "step": 183720 }, { "epoch": 35.28, "learning_rate": 0.001, "loss": 2.507, "step": 183732 }, { "epoch": 35.28, "learning_rate": 0.001, "loss": 2.5093, "step": 183744 }, { "epoch": 35.28, "eval_ag_news_accuracy": 0.32928125, "eval_ag_news_bleu_score": 5.0350591745732345, "eval_ag_news_bleu_score_sem": 0.16332784797736272, "eval_ag_news_emb_cos_sim": 0.8193514943122864, "eval_ag_news_emb_cos_sim_sem": 0.00701078251392053, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4954283237457275, "eval_ag_news_n_ngrams_match_1": 14.414, "eval_ag_news_n_ngrams_match_2": 3.292, "eval_ag_news_n_ngrams_match_3": 0.93, "eval_ag_news_num_pred_words": 46.472, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.96440436674523, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3595132850810327, "eval_ag_news_runtime": 10.8547, "eval_ag_news_samples_per_second": 46.063, "eval_ag_news_steps_per_second": 0.092, "eval_ag_news_token_set_f1": 0.3593461006832739, "eval_ag_news_token_set_f1_sem": 0.004323817170071455, "eval_ag_news_token_set_precision": 0.3478113187945974, "eval_ag_news_token_set_recall": 0.385787166846377, "eval_ag_news_true_num_tokens": 56.09375, "step": 183750 }, { "epoch": 35.28, "eval_anthropic_toxic_prompts_accuracy": 0.1151875, "eval_anthropic_toxic_prompts_bleu_score": 3.0494604625327564, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11380085826770461, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6774520874023438, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008644036772534948, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2130579948425293, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.158, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.86, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.68, "eval_anthropic_toxic_prompts_num_pred_words": 47.292, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.854976519390522, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21342173094804487, "eval_anthropic_toxic_prompts_runtime": 9.8464, "eval_anthropic_toxic_prompts_samples_per_second": 50.78, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.35934005578858935, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006353934031636077, "eval_anthropic_toxic_prompts_token_set_precision": 0.43421591494584305, "eval_anthropic_toxic_prompts_token_set_recall": 0.33496714228531227, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 183750 }, { "epoch": 35.28, "eval_arxiv_accuracy": 0.352125, "eval_arxiv_bleu_score": 4.59033915940503, "eval_arxiv_bleu_score_sem": 0.1384014495808851, "eval_arxiv_emb_cos_sim": 0.7802977561950684, "eval_arxiv_emb_cos_sim_sem": 0.0076035937432944465, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3495640754699707, "eval_arxiv_n_ngrams_match_1": 15.722, "eval_arxiv_n_ngrams_match_2": 3.176, "eval_arxiv_n_ngrams_match_3": 0.718, "eval_arxiv_num_pred_words": 41.162, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.49031131079562, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37267638148118504, "eval_arxiv_runtime": 10.0572, "eval_arxiv_samples_per_second": 49.716, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3699975210100182, "eval_arxiv_token_set_f1_sem": 0.004402498100178867, "eval_arxiv_token_set_precision": 0.32354111806907515, "eval_arxiv_token_set_recall": 0.4518563857565413, "eval_arxiv_true_num_tokens": 64.0, "step": 183750 }, { "epoch": 35.28, "eval_python_code_alpaca_accuracy": 0.1634375, "eval_python_code_alpaca_bleu_score": 4.707011325951003, "eval_python_code_alpaca_bleu_score_sem": 0.15056426823941502, "eval_python_code_alpaca_emb_cos_sim": 0.7669054865837097, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00794338953852337, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.854538679122925, "eval_python_code_alpaca_n_ngrams_match_1": 9.994, "eval_python_code_alpaca_n_ngrams_match_2": 3.014, "eval_python_code_alpaca_n_ngrams_match_3": 1.002, "eval_python_code_alpaca_num_pred_words": 44.276, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.366423865548054, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3374696312915223, "eval_python_code_alpaca_runtime": 9.9467, "eval_python_code_alpaca_samples_per_second": 50.268, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.48702681374330326, "eval_python_code_alpaca_token_set_f1_sem": 0.005447475876108956, "eval_python_code_alpaca_token_set_precision": 0.5451984645091623, "eval_python_code_alpaca_token_set_recall": 0.46242735125039724, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 183750 }, { "epoch": 35.28, "eval_wikibio_accuracy": 0.32721875, "eval_wikibio_bleu_score": 6.370350377797937, "eval_wikibio_bleu_score_sem": 0.24008709359603292, "eval_wikibio_emb_cos_sim": 0.7414562702178955, "eval_wikibio_emb_cos_sim_sem": 0.009254724192578057, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.651345729827881, "eval_wikibio_n_ngrams_match_1": 10.288, "eval_wikibio_n_ngrams_match_2": 3.514, "eval_wikibio_n_ngrams_match_3": 1.342, "eval_wikibio_num_pred_words": 35.738, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.52647740897916, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3627555634804943, "eval_wikibio_runtime": 9.8323, "eval_wikibio_samples_per_second": 50.853, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.3269860424070768, "eval_wikibio_token_set_f1_sem": 0.005464955681678214, "eval_wikibio_token_set_precision": 0.33299037134432585, "eval_wikibio_token_set_recall": 0.33610429493305144, "eval_wikibio_true_num_tokens": 61.1328125, "step": 183750 }, { "epoch": 35.28, "eval_nq_accuracy": 0.534875, "eval_nq_bleu_score": 12.090322151045951, "eval_nq_bleu_score_sem": 0.48937771440493366, "eval_nq_emb_cos_sim": 0.838043212890625, "eval_nq_emb_cos_sim_sem": 0.007222411718461349, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1370575428009033, "eval_nq_n_ngrams_match_1": 23.492, "eval_nq_n_ngrams_match_2": 8.674, "eval_nq_n_ngrams_match_3": 4.038, "eval_nq_num_pred_words": 49.254, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.474465155681264, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4544349350026854, "eval_nq_runtime": 10.787, "eval_nq_samples_per_second": 46.352, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.4681845048577129, "eval_nq_token_set_f1_sem": 0.005061039467446109, "eval_nq_token_set_precision": 0.42737175885008416, "eval_nq_token_set_recall": 0.5270632761557374, "eval_nq_true_num_tokens": 64.0, "step": 183750 }, { "epoch": 35.28, "learning_rate": 0.001, "loss": 2.5075, "step": 183756 }, { "epoch": 35.29, "learning_rate": 0.001, "loss": 2.5042, "step": 183768 }, { "epoch": 35.29, "learning_rate": 0.001, "loss": 2.5067, "step": 183780 }, { "epoch": 35.29, "learning_rate": 0.001, "loss": 2.4981, "step": 183792 }, { "epoch": 35.29, "learning_rate": 0.001, "loss": 2.5107, "step": 183804 }, { "epoch": 35.29, "learning_rate": 0.001, "loss": 2.5061, "step": 183816 }, { "epoch": 35.3, "learning_rate": 0.001, "loss": 2.5034, "step": 183828 }, { "epoch": 35.3, "learning_rate": 0.001, "loss": 2.5054, "step": 183840 }, { "epoch": 35.3, "learning_rate": 0.001, "loss": 2.4997, "step": 183852 }, { "epoch": 35.3, "learning_rate": 0.001, "loss": 2.5014, "step": 183864 }, { "epoch": 35.31, "learning_rate": 0.001, "loss": 2.5104, "step": 183876 }, { "epoch": 35.31, "learning_rate": 0.001, "loss": 2.499, "step": 183888 }, { "epoch": 35.31, "learning_rate": 0.001, "loss": 2.5132, "step": 183900 }, { "epoch": 35.31, "learning_rate": 0.001, "loss": 2.5055, "step": 183912 }, { "epoch": 35.32, "learning_rate": 0.001, "loss": 2.5074, "step": 183924 }, { "epoch": 35.32, "learning_rate": 0.001, "loss": 2.5087, "step": 183936 }, { "epoch": 35.32, "learning_rate": 0.001, "loss": 2.5033, "step": 183948 }, { "epoch": 35.32, "learning_rate": 0.001, "loss": 2.5033, "step": 183960 }, { "epoch": 35.32, "learning_rate": 0.001, "loss": 2.5086, "step": 183972 }, { "epoch": 35.33, "learning_rate": 0.001, "loss": 2.502, "step": 183984 }, { "epoch": 35.33, "learning_rate": 0.001, "loss": 2.5009, "step": 183996 }, { "epoch": 35.33, "learning_rate": 0.001, "loss": 2.5084, "step": 184008 }, { "epoch": 35.33, "learning_rate": 0.001, "loss": 2.496, "step": 184020 }, { "epoch": 35.34, "learning_rate": 0.001, "loss": 2.5064, "step": 184032 }, { "epoch": 35.34, "learning_rate": 0.001, "loss": 2.5076, "step": 184044 }, { "epoch": 35.34, "learning_rate": 0.001, "loss": 2.5025, "step": 184056 }, { "epoch": 35.34, "learning_rate": 0.001, "loss": 2.5021, "step": 184068 }, { "epoch": 35.35, "learning_rate": 0.001, "loss": 2.5157, "step": 184080 }, { "epoch": 35.35, "learning_rate": 0.001, "loss": 2.5101, "step": 184092 }, { "epoch": 35.35, "learning_rate": 0.001, "loss": 2.5027, "step": 184104 }, { "epoch": 35.35, "learning_rate": 0.001, "loss": 2.5012, "step": 184116 }, { "epoch": 35.35, "learning_rate": 0.001, "loss": 2.5191, "step": 184128 }, { "epoch": 35.36, "learning_rate": 0.001, "loss": 2.5076, "step": 184140 }, { "epoch": 35.36, "learning_rate": 0.001, "loss": 2.4954, "step": 184152 }, { "epoch": 35.36, "learning_rate": 0.001, "loss": 2.5009, "step": 184164 }, { "epoch": 35.36, "learning_rate": 0.001, "loss": 2.5116, "step": 184176 }, { "epoch": 35.37, "learning_rate": 0.001, "loss": 2.5036, "step": 184188 }, { "epoch": 35.37, "learning_rate": 0.001, "loss": 2.5058, "step": 184200 }, { "epoch": 35.37, "learning_rate": 0.001, "loss": 2.5067, "step": 184212 }, { "epoch": 35.37, "learning_rate": 0.001, "loss": 2.5132, "step": 184224 }, { "epoch": 35.38, "learning_rate": 0.001, "loss": 2.5076, "step": 184236 }, { "epoch": 35.38, "learning_rate": 0.001, "loss": 2.4969, "step": 184248 }, { "epoch": 35.38, "learning_rate": 0.001, "loss": 2.5083, "step": 184260 }, { "epoch": 35.38, "learning_rate": 0.001, "loss": 2.5178, "step": 184272 }, { "epoch": 35.38, "learning_rate": 0.001, "loss": 2.5092, "step": 184284 }, { "epoch": 35.39, "learning_rate": 0.001, "loss": 2.5021, "step": 184296 }, { "epoch": 35.39, "learning_rate": 0.001, "loss": 2.5071, "step": 184308 }, { "epoch": 35.39, "learning_rate": 0.001, "loss": 2.5105, "step": 184320 }, { "epoch": 35.39, "learning_rate": 0.001, "loss": 2.508, "step": 184332 }, { "epoch": 35.4, "learning_rate": 0.001, "loss": 2.5087, "step": 184344 }, { "epoch": 35.4, "learning_rate": 0.001, "loss": 2.4968, "step": 184356 }, { "epoch": 35.4, "learning_rate": 0.001, "loss": 2.5004, "step": 184368 }, { "epoch": 35.4, "eval_ag_news_accuracy": 0.3275, "eval_ag_news_bleu_score": 5.02233286554183, "eval_ag_news_bleu_score_sem": 0.15801342207363395, "eval_ag_news_emb_cos_sim": 0.815298318862915, "eval_ag_news_emb_cos_sim_sem": 0.007963512683053798, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4890224933624268, "eval_ag_news_n_ngrams_match_1": 14.428, "eval_ag_news_n_ngrams_match_2": 3.282, "eval_ag_news_n_ngrams_match_3": 0.974, "eval_ag_news_num_pred_words": 46.588, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.75391488342903, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.357313787349157, "eval_ag_news_runtime": 10.1978, "eval_ag_news_samples_per_second": 49.03, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3594765000825628, "eval_ag_news_token_set_f1_sem": 0.004534021721079182, "eval_ag_news_token_set_precision": 0.3451295688016079, "eval_ag_news_token_set_recall": 0.39205604054841836, "eval_ag_news_true_num_tokens": 56.09375, "step": 184375 }, { "epoch": 35.4, "eval_anthropic_toxic_prompts_accuracy": 0.11596875, "eval_anthropic_toxic_prompts_bleu_score": 3.240663514343173, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12656691333969075, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.67909836769104, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0089736218351676, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.232971429824829, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.18, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.938, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77, "eval_anthropic_toxic_prompts_num_pred_words": 46.806, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.354885410310242, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21533841683439475, "eval_anthropic_toxic_prompts_runtime": 9.8612, "eval_anthropic_toxic_prompts_samples_per_second": 50.704, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.359768815340191, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006654633228710937, "eval_anthropic_toxic_prompts_token_set_precision": 0.4406971070022834, "eval_anthropic_toxic_prompts_token_set_recall": 0.32991240145405437, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 184375 }, { "epoch": 35.4, "eval_arxiv_accuracy": 0.35146875, "eval_arxiv_bleu_score": 4.525680555908677, "eval_arxiv_bleu_score_sem": 0.13355438782488488, "eval_arxiv_emb_cos_sim": 0.7820273041725159, "eval_arxiv_emb_cos_sim_sem": 0.006359780316187738, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.33813214302063, "eval_arxiv_n_ngrams_match_1": 15.652, "eval_arxiv_n_ngrams_match_2": 3.116, "eval_arxiv_n_ngrams_match_3": 0.708, "eval_arxiv_num_pred_words": 40.882, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.166466608715197, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37272214258905817, "eval_arxiv_runtime": 9.9784, "eval_arxiv_samples_per_second": 50.108, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.36720845480659464, "eval_arxiv_token_set_f1_sem": 0.004259083778969454, "eval_arxiv_token_set_precision": 0.3189695068701229, "eval_arxiv_token_set_recall": 0.4496757211632873, "eval_arxiv_true_num_tokens": 64.0, "step": 184375 }, { "epoch": 35.4, "eval_python_code_alpaca_accuracy": 0.1609375, "eval_python_code_alpaca_bleu_score": 4.6095404941324825, "eval_python_code_alpaca_bleu_score_sem": 0.13289128628504768, "eval_python_code_alpaca_emb_cos_sim": 0.7650666832923889, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007235849977691245, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.85518217086792, "eval_python_code_alpaca_n_ngrams_match_1": 9.892, "eval_python_code_alpaca_n_ngrams_match_2": 3.0, "eval_python_code_alpaca_n_ngrams_match_3": 0.996, "eval_python_code_alpaca_num_pred_words": 44.232, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.3776026122755, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33486988706002796, "eval_python_code_alpaca_runtime": 10.4916, "eval_python_code_alpaca_samples_per_second": 47.657, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.48908110645062036, "eval_python_code_alpaca_token_set_f1_sem": 0.005396519787064682, "eval_python_code_alpaca_token_set_precision": 0.5410059954155667, "eval_python_code_alpaca_token_set_recall": 0.4670958976323572, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 184375 }, { "epoch": 35.4, "eval_wikibio_accuracy": 0.332375, "eval_wikibio_bleu_score": 6.125300218373875, "eval_wikibio_bleu_score_sem": 0.2167732904141481, "eval_wikibio_emb_cos_sim": 0.7407878041267395, "eval_wikibio_emb_cos_sim_sem": 0.009500050461047842, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6234383583068848, "eval_wikibio_n_ngrams_match_1": 10.216, "eval_wikibio_n_ngrams_match_2": 3.444, "eval_wikibio_n_ngrams_match_3": 1.282, "eval_wikibio_num_pred_words": 36.448, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.46616871981944, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35692812588875417, "eval_wikibio_runtime": 10.0085, "eval_wikibio_samples_per_second": 49.957, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3241375753432324, "eval_wikibio_token_set_f1_sem": 0.0054715503618298755, "eval_wikibio_token_set_precision": 0.33067655336441815, "eval_wikibio_token_set_recall": 0.3367545729514021, "eval_wikibio_true_num_tokens": 61.1328125, "step": 184375 }, { "epoch": 35.4, "eval_nq_accuracy": 0.53440625, "eval_nq_bleu_score": 12.143635553439868, "eval_nq_bleu_score_sem": 0.4866815622986901, "eval_nq_emb_cos_sim": 0.8312341570854187, "eval_nq_emb_cos_sim_sem": 0.007448146946363158, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1362648010253906, "eval_nq_n_ngrams_match_1": 23.462, "eval_nq_n_ngrams_match_2": 8.722, "eval_nq_n_ngrams_match_3": 4.032, "eval_nq_num_pred_words": 49.122, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.467749755268137, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4550932441780584, "eval_nq_runtime": 10.1267, "eval_nq_samples_per_second": 49.375, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.46999534890370204, "eval_nq_token_set_f1_sem": 0.004968356697842554, "eval_nq_token_set_precision": 0.4265905046927454, "eval_nq_token_set_recall": 0.5309044132825025, "eval_nq_true_num_tokens": 64.0, "step": 184375 }, { "epoch": 35.4, "learning_rate": 0.001, "loss": 2.5042, "step": 184380 }, { "epoch": 35.41, "learning_rate": 0.001, "loss": 2.5063, "step": 184392 }, { "epoch": 35.41, "learning_rate": 0.001, "loss": 2.5048, "step": 184404 }, { "epoch": 35.41, "learning_rate": 0.001, "loss": 2.5215, "step": 184416 }, { "epoch": 35.41, "learning_rate": 0.001, "loss": 2.5133, "step": 184428 }, { "epoch": 35.41, "learning_rate": 0.001, "loss": 2.5071, "step": 184440 }, { "epoch": 35.42, "learning_rate": 0.001, "loss": 2.5024, "step": 184452 }, { "epoch": 35.42, "learning_rate": 0.001, "loss": 2.5143, "step": 184464 }, { "epoch": 35.42, "learning_rate": 0.001, "loss": 2.5082, "step": 184476 }, { "epoch": 35.42, "learning_rate": 0.001, "loss": 2.5059, "step": 184488 }, { "epoch": 35.43, "learning_rate": 0.001, "loss": 2.5098, "step": 184500 }, { "epoch": 35.43, "learning_rate": 0.001, "loss": 2.5083, "step": 184512 }, { "epoch": 35.43, "learning_rate": 0.001, "loss": 2.5104, "step": 184524 }, { "epoch": 35.43, "learning_rate": 0.001, "loss": 2.5124, "step": 184536 }, { "epoch": 35.44, "learning_rate": 0.001, "loss": 2.5095, "step": 184548 }, { "epoch": 35.44, "learning_rate": 0.001, "loss": 2.4981, "step": 184560 }, { "epoch": 35.44, "learning_rate": 0.001, "loss": 2.5137, "step": 184572 }, { "epoch": 35.44, "learning_rate": 0.001, "loss": 2.5085, "step": 184584 }, { "epoch": 35.44, "learning_rate": 0.001, "loss": 2.5138, "step": 184596 }, { "epoch": 35.45, "learning_rate": 0.001, "loss": 2.5107, "step": 184608 }, { "epoch": 35.45, "learning_rate": 0.001, "loss": 2.5088, "step": 184620 }, { "epoch": 35.45, "learning_rate": 0.001, "loss": 2.5136, "step": 184632 }, { "epoch": 35.45, "learning_rate": 0.001, "loss": 2.5105, "step": 184644 }, { "epoch": 35.46, "learning_rate": 0.001, "loss": 2.5042, "step": 184656 }, { "epoch": 35.46, "learning_rate": 0.001, "loss": 2.5034, "step": 184668 }, { "epoch": 35.46, "learning_rate": 0.001, "loss": 2.5175, "step": 184680 }, { "epoch": 35.46, "learning_rate": 0.001, "loss": 2.5097, "step": 184692 }, { "epoch": 35.47, "learning_rate": 0.001, "loss": 2.507, "step": 184704 }, { "epoch": 35.47, "learning_rate": 0.001, "loss": 2.508, "step": 184716 }, { "epoch": 35.47, "learning_rate": 0.001, "loss": 2.5081, "step": 184728 }, { "epoch": 35.47, "learning_rate": 0.001, "loss": 2.494, "step": 184740 }, { "epoch": 35.47, "learning_rate": 0.001, "loss": 2.5043, "step": 184752 }, { "epoch": 35.48, "learning_rate": 0.001, "loss": 2.5199, "step": 184764 }, { "epoch": 35.48, "learning_rate": 0.001, "loss": 2.5124, "step": 184776 }, { "epoch": 35.48, "learning_rate": 0.001, "loss": 2.5142, "step": 184788 }, { "epoch": 35.48, "learning_rate": 0.001, "loss": 2.5085, "step": 184800 }, { "epoch": 35.49, "learning_rate": 0.001, "loss": 2.5007, "step": 184812 }, { "epoch": 35.49, "learning_rate": 0.001, "loss": 2.5139, "step": 184824 }, { "epoch": 35.49, "learning_rate": 0.001, "loss": 2.5112, "step": 184836 }, { "epoch": 35.49, "learning_rate": 0.001, "loss": 2.5148, "step": 184848 }, { "epoch": 35.5, "learning_rate": 0.001, "loss": 2.5113, "step": 184860 }, { "epoch": 35.5, "learning_rate": 0.001, "loss": 2.514, "step": 184872 }, { "epoch": 35.5, "learning_rate": 0.001, "loss": 2.5068, "step": 184884 }, { "epoch": 35.5, "learning_rate": 0.001, "loss": 2.5002, "step": 184896 }, { "epoch": 35.5, "learning_rate": 0.001, "loss": 2.5087, "step": 184908 }, { "epoch": 35.51, "learning_rate": 0.001, "loss": 2.51, "step": 184920 }, { "epoch": 35.51, "learning_rate": 0.001, "loss": 2.507, "step": 184932 }, { "epoch": 35.51, "learning_rate": 0.001, "loss": 2.5098, "step": 184944 }, { "epoch": 35.51, "learning_rate": 0.001, "loss": 2.5083, "step": 184956 }, { "epoch": 35.52, "learning_rate": 0.001, "loss": 2.5049, "step": 184968 }, { "epoch": 35.52, "learning_rate": 0.001, "loss": 2.5107, "step": 184980 }, { "epoch": 35.52, "learning_rate": 0.001, "loss": 2.5111, "step": 184992 }, { "epoch": 35.52, "eval_ag_news_accuracy": 0.32815625, "eval_ag_news_bleu_score": 5.118036734372337, "eval_ag_news_bleu_score_sem": 0.16270480864003506, "eval_ag_news_emb_cos_sim": 0.8269373178482056, "eval_ag_news_emb_cos_sim_sem": 0.006093308183566626, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4824256896972656, "eval_ag_news_n_ngrams_match_1": 14.522, "eval_ag_news_n_ngrams_match_2": 3.398, "eval_ag_news_n_ngrams_match_3": 0.94, "eval_ag_news_num_pred_words": 46.446, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.53855486256443, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36297070002481496, "eval_ag_news_runtime": 10.445, "eval_ag_news_samples_per_second": 47.87, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.36068298264861176, "eval_ag_news_token_set_f1_sem": 0.004496430395808484, "eval_ag_news_token_set_precision": 0.3475763128841663, "eval_ag_news_token_set_recall": 0.3881483723768435, "eval_ag_news_true_num_tokens": 56.09375, "step": 185000 }, { "epoch": 35.52, "eval_anthropic_toxic_prompts_accuracy": 0.115875, "eval_anthropic_toxic_prompts_bleu_score": 3.315489923441029, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13205011448401568, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6768086552619934, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00871899625970059, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2032787799835205, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.292, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.99, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.794, "eval_anthropic_toxic_prompts_num_pred_words": 46.874, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.61309897753817, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21726773142373662, "eval_anthropic_toxic_prompts_runtime": 9.8208, "eval_anthropic_toxic_prompts_samples_per_second": 50.912, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.36199767072267736, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006773983183416234, "eval_anthropic_toxic_prompts_token_set_precision": 0.4412133415317718, "eval_anthropic_toxic_prompts_token_set_recall": 0.33268008669339616, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 185000 }, { "epoch": 35.52, "eval_arxiv_accuracy": 0.35125, "eval_arxiv_bleu_score": 4.4631570942365615, "eval_arxiv_bleu_score_sem": 0.13282769479159373, "eval_arxiv_emb_cos_sim": 0.7777361869812012, "eval_arxiv_emb_cos_sim_sem": 0.006621751297096788, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3531644344329834, "eval_arxiv_n_ngrams_match_1": 15.354, "eval_arxiv_n_ngrams_match_2": 3.01, "eval_arxiv_n_ngrams_match_3": 0.678, "eval_arxiv_num_pred_words": 40.414, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.593071534325226, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36850896703069475, "eval_arxiv_runtime": 10.28, "eval_arxiv_samples_per_second": 48.638, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3603864486751362, "eval_arxiv_token_set_f1_sem": 0.004118420268863841, "eval_arxiv_token_set_precision": 0.31294214276877663, "eval_arxiv_token_set_recall": 0.44204019759506297, "eval_arxiv_true_num_tokens": 64.0, "step": 185000 }, { "epoch": 35.52, "eval_python_code_alpaca_accuracy": 0.160875, "eval_python_code_alpaca_bleu_score": 4.581966012971516, "eval_python_code_alpaca_bleu_score_sem": 0.14082114601207388, "eval_python_code_alpaca_emb_cos_sim": 0.7654900550842285, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007408754130984648, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8690898418426514, "eval_python_code_alpaca_n_ngrams_match_1": 9.854, "eval_python_code_alpaca_n_ngrams_match_2": 2.892, "eval_python_code_alpaca_n_ngrams_match_3": 0.95, "eval_python_code_alpaca_num_pred_words": 43.172, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.620973026781638, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33969685596733734, "eval_python_code_alpaca_runtime": 10.9417, "eval_python_code_alpaca_samples_per_second": 45.697, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4823213908180107, "eval_python_code_alpaca_token_set_f1_sem": 0.005541826362876887, "eval_python_code_alpaca_token_set_precision": 0.5359328765540199, "eval_python_code_alpaca_token_set_recall": 0.45833553577148034, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 185000 }, { "epoch": 35.52, "eval_wikibio_accuracy": 0.32946875, "eval_wikibio_bleu_score": 6.163874855344158, "eval_wikibio_bleu_score_sem": 0.2188857488939065, "eval_wikibio_emb_cos_sim": 0.7442362904548645, "eval_wikibio_emb_cos_sim_sem": 0.009862612139917728, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5925068855285645, "eval_wikibio_n_ngrams_match_1": 10.242, "eval_wikibio_n_ngrams_match_2": 3.472, "eval_wikibio_n_ngrams_match_3": 1.23, "eval_wikibio_num_pred_words": 35.656, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.32502455833161, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3661457900991682, "eval_wikibio_runtime": 9.8336, "eval_wikibio_samples_per_second": 50.846, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.32827031754490676, "eval_wikibio_token_set_f1_sem": 0.005304280767558676, "eval_wikibio_token_set_precision": 0.3328358761016736, "eval_wikibio_token_set_recall": 0.34035115610550265, "eval_wikibio_true_num_tokens": 61.1328125, "step": 185000 }, { "epoch": 35.52, "eval_nq_accuracy": 0.53575, "eval_nq_bleu_score": 11.732349230505335, "eval_nq_bleu_score_sem": 0.4648652582328716, "eval_nq_emb_cos_sim": 0.8390904664993286, "eval_nq_emb_cos_sim_sem": 0.006988886567574614, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.132225275039673, "eval_nq_n_ngrams_match_1": 23.304, "eval_nq_n_ngrams_match_2": 8.6, "eval_nq_n_ngrams_match_3": 3.892, "eval_nq_num_pred_words": 48.854, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.433613054555531, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4529422530628724, "eval_nq_runtime": 10.4006, "eval_nq_samples_per_second": 48.074, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4695926394503976, "eval_nq_token_set_f1_sem": 0.005010782784351347, "eval_nq_token_set_precision": 0.4266045585364064, "eval_nq_token_set_recall": 0.5310916596797804, "eval_nq_true_num_tokens": 64.0, "step": 185000 }, { "epoch": 35.52, "learning_rate": 0.001, "loss": 2.5151, "step": 185004 }, { "epoch": 35.53, "learning_rate": 0.001, "loss": 2.52, "step": 185016 }, { "epoch": 35.53, "learning_rate": 0.001, "loss": 2.4974, "step": 185028 }, { "epoch": 35.53, "learning_rate": 0.001, "loss": 2.5116, "step": 185040 }, { "epoch": 35.53, "learning_rate": 0.001, "loss": 2.5083, "step": 185052 }, { "epoch": 35.53, "learning_rate": 0.001, "loss": 2.5142, "step": 185064 }, { "epoch": 35.54, "learning_rate": 0.001, "loss": 2.5086, "step": 185076 }, { "epoch": 35.54, "learning_rate": 0.001, "loss": 2.5127, "step": 185088 }, { "epoch": 35.54, "learning_rate": 0.001, "loss": 2.5179, "step": 185100 }, { "epoch": 35.54, "learning_rate": 0.001, "loss": 2.5185, "step": 185112 }, { "epoch": 35.55, "learning_rate": 0.001, "loss": 2.5165, "step": 185124 }, { "epoch": 35.55, "learning_rate": 0.001, "loss": 2.5162, "step": 185136 }, { "epoch": 35.55, "learning_rate": 0.001, "loss": 2.5125, "step": 185148 }, { "epoch": 35.55, "learning_rate": 0.001, "loss": 2.5092, "step": 185160 }, { "epoch": 35.56, "learning_rate": 0.001, "loss": 2.513, "step": 185172 }, { "epoch": 35.56, "learning_rate": 0.001, "loss": 2.5016, "step": 185184 }, { "epoch": 35.56, "learning_rate": 0.001, "loss": 2.5107, "step": 185196 }, { "epoch": 35.56, "learning_rate": 0.001, "loss": 2.5073, "step": 185208 }, { "epoch": 35.56, "learning_rate": 0.001, "loss": 2.511, "step": 185220 }, { "epoch": 35.57, "learning_rate": 0.001, "loss": 2.5204, "step": 185232 }, { "epoch": 35.57, "learning_rate": 0.001, "loss": 2.5122, "step": 185244 }, { "epoch": 35.57, "learning_rate": 0.001, "loss": 2.5163, "step": 185256 }, { "epoch": 35.57, "learning_rate": 0.001, "loss": 2.5114, "step": 185268 }, { "epoch": 35.58, "learning_rate": 0.001, "loss": 2.5109, "step": 185280 }, { "epoch": 35.58, "learning_rate": 0.001, "loss": 2.5131, "step": 185292 }, { "epoch": 35.58, "learning_rate": 0.001, "loss": 2.4937, "step": 185304 }, { "epoch": 35.58, "learning_rate": 0.001, "loss": 2.5162, "step": 185316 }, { "epoch": 35.59, "learning_rate": 0.001, "loss": 2.5137, "step": 185328 }, { "epoch": 35.59, "learning_rate": 0.001, "loss": 2.5083, "step": 185340 }, { "epoch": 35.59, "learning_rate": 0.001, "loss": 2.5097, "step": 185352 }, { "epoch": 35.59, "learning_rate": 0.001, "loss": 2.4987, "step": 185364 }, { "epoch": 35.59, "learning_rate": 0.001, "loss": 2.514, "step": 185376 }, { "epoch": 35.6, "learning_rate": 0.001, "loss": 2.5093, "step": 185388 }, { "epoch": 35.6, "learning_rate": 0.001, "loss": 2.5179, "step": 185400 }, { "epoch": 35.6, "learning_rate": 0.001, "loss": 2.5088, "step": 185412 }, { "epoch": 35.6, "learning_rate": 0.001, "loss": 2.5215, "step": 185424 }, { "epoch": 35.61, "learning_rate": 0.001, "loss": 2.5098, "step": 185436 }, { "epoch": 35.61, "learning_rate": 0.001, "loss": 2.5054, "step": 185448 }, { "epoch": 35.61, "learning_rate": 0.001, "loss": 2.5115, "step": 185460 }, { "epoch": 35.61, "learning_rate": 0.001, "loss": 2.5121, "step": 185472 }, { "epoch": 35.62, "learning_rate": 0.001, "loss": 2.5103, "step": 185484 }, { "epoch": 35.62, "learning_rate": 0.001, "loss": 2.5063, "step": 185496 }, { "epoch": 35.62, "learning_rate": 0.001, "loss": 2.5157, "step": 185508 }, { "epoch": 35.62, "learning_rate": 0.001, "loss": 2.5114, "step": 185520 }, { "epoch": 35.62, "learning_rate": 0.001, "loss": 2.5048, "step": 185532 }, { "epoch": 35.63, "learning_rate": 0.001, "loss": 2.5122, "step": 185544 }, { "epoch": 35.63, "learning_rate": 0.001, "loss": 2.52, "step": 185556 }, { "epoch": 35.63, "learning_rate": 0.001, "loss": 2.5124, "step": 185568 }, { "epoch": 35.63, "learning_rate": 0.001, "loss": 2.5102, "step": 185580 }, { "epoch": 35.64, "learning_rate": 0.001, "loss": 2.5158, "step": 185592 }, { "epoch": 35.64, "learning_rate": 0.001, "loss": 2.5191, "step": 185604 }, { "epoch": 35.64, "learning_rate": 0.001, "loss": 2.5181, "step": 185616 }, { "epoch": 35.64, "eval_ag_news_accuracy": 0.32784375, "eval_ag_news_bleu_score": 4.969652937819136, "eval_ag_news_bleu_score_sem": 0.15497264702360217, "eval_ag_news_emb_cos_sim": 0.8214932680130005, "eval_ag_news_emb_cos_sim_sem": 0.0066276590998779, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.499420404434204, "eval_ag_news_n_ngrams_match_1": 14.482, "eval_ag_news_n_ngrams_match_2": 3.21, "eval_ag_news_n_ngrams_match_3": 0.892, "eval_ag_news_num_pred_words": 46.66, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.09626395075704, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36044240578901077, "eval_ag_news_runtime": 10.3922, "eval_ag_news_samples_per_second": 48.113, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3616891505286832, "eval_ag_news_token_set_f1_sem": 0.004351432475538731, "eval_ag_news_token_set_precision": 0.3477839170834988, "eval_ag_news_token_set_recall": 0.3910548282202589, "eval_ag_news_true_num_tokens": 56.09375, "step": 185625 }, { "epoch": 35.64, "eval_anthropic_toxic_prompts_accuracy": 0.11646875, "eval_anthropic_toxic_prompts_bleu_score": 3.330412032871209, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13440409642053766, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6757363080978394, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009961537378817876, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2004940509796143, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.346, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.018, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788, "eval_anthropic_toxic_prompts_num_pred_words": 47.116, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.544653512206903, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2154228106433061, "eval_anthropic_toxic_prompts_runtime": 9.7583, "eval_anthropic_toxic_prompts_samples_per_second": 51.238, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3609275836280132, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006777635325448888, "eval_anthropic_toxic_prompts_token_set_precision": 0.44523539864379863, "eval_anthropic_toxic_prompts_token_set_recall": 0.32516753286760575, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 185625 }, { "epoch": 35.64, "eval_arxiv_accuracy": 0.3505625, "eval_arxiv_bleu_score": 4.55711877997823, "eval_arxiv_bleu_score_sem": 0.13947960612331903, "eval_arxiv_emb_cos_sim": 0.777479887008667, "eval_arxiv_emb_cos_sim_sem": 0.0074299316052723555, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3542287349700928, "eval_arxiv_n_ngrams_match_1": 15.428, "eval_arxiv_n_ngrams_match_2": 3.048, "eval_arxiv_n_ngrams_match_3": 0.72, "eval_arxiv_num_pred_words": 40.398, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.623519355659003, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37035073746534186, "eval_arxiv_runtime": 9.9105, "eval_arxiv_samples_per_second": 50.451, "eval_arxiv_steps_per_second": 0.101, "eval_arxiv_token_set_f1": 0.36150438516886774, "eval_arxiv_token_set_f1_sem": 0.004284446126411858, "eval_arxiv_token_set_precision": 0.31371889064225117, "eval_arxiv_token_set_recall": 0.4444403661489109, "eval_arxiv_true_num_tokens": 64.0, "step": 185625 }, { "epoch": 35.64, "eval_python_code_alpaca_accuracy": 0.16165625, "eval_python_code_alpaca_bleu_score": 5.122092272355227, "eval_python_code_alpaca_bleu_score_sem": 0.16315070949104568, "eval_python_code_alpaca_emb_cos_sim": 0.7703523635864258, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007096247103077512, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.857260227203369, "eval_python_code_alpaca_n_ngrams_match_1": 10.246, "eval_python_code_alpaca_n_ngrams_match_2": 3.25, "eval_python_code_alpaca_n_ngrams_match_3": 1.208, "eval_python_code_alpaca_num_pred_words": 44.29, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.413751796490857, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34227950149125086, "eval_python_code_alpaca_runtime": 9.7595, "eval_python_code_alpaca_samples_per_second": 51.232, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.4877912364045303, "eval_python_code_alpaca_token_set_f1_sem": 0.005282165823117606, "eval_python_code_alpaca_token_set_precision": 0.5613433088662251, "eval_python_code_alpaca_token_set_recall": 0.4497674515220422, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 185625 }, { "epoch": 35.64, "eval_wikibio_accuracy": 0.325875, "eval_wikibio_bleu_score": 6.1927008019799725, "eval_wikibio_bleu_score_sem": 0.21627049446623878, "eval_wikibio_emb_cos_sim": 0.7465753555297852, "eval_wikibio_emb_cos_sim_sem": 0.009358737346346879, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6515841484069824, "eval_wikibio_n_ngrams_match_1": 10.244, "eval_wikibio_n_ngrams_match_2": 3.436, "eval_wikibio_n_ngrams_match_3": 1.302, "eval_wikibio_num_pred_words": 36.276, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.535663932056174, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36233505883510564, "eval_wikibio_runtime": 9.9574, "eval_wikibio_samples_per_second": 50.214, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.32679210392980357, "eval_wikibio_token_set_f1_sem": 0.0052421915807275565, "eval_wikibio_token_set_precision": 0.33289120139715345, "eval_wikibio_token_set_recall": 0.3387257390609709, "eval_wikibio_true_num_tokens": 61.1328125, "step": 185625 }, { "epoch": 35.64, "eval_nq_accuracy": 0.5338125, "eval_nq_bleu_score": 11.953313877716289, "eval_nq_bleu_score_sem": 0.47472028892851664, "eval_nq_emb_cos_sim": 0.8371002674102783, "eval_nq_emb_cos_sim_sem": 0.0070204506058422255, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1374948024749756, "eval_nq_n_ngrams_match_1": 23.538, "eval_nq_n_ngrams_match_2": 8.624, "eval_nq_n_ngrams_match_3": 3.966, "eval_nq_num_pred_words": 49.504, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.478171507813284, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4566189836674177, "eval_nq_runtime": 10.8201, "eval_nq_samples_per_second": 46.21, "eval_nq_steps_per_second": 0.092, "eval_nq_token_set_f1": 0.4696661307912864, "eval_nq_token_set_f1_sem": 0.00488059178251212, "eval_nq_token_set_precision": 0.4286679982147181, "eval_nq_token_set_recall": 0.5257964283648926, "eval_nq_true_num_tokens": 64.0, "step": 185625 }, { "epoch": 35.64, "learning_rate": 0.001, "loss": 2.5138, "step": 185628 }, { "epoch": 35.65, "learning_rate": 0.001, "loss": 2.5078, "step": 185640 }, { "epoch": 35.65, "learning_rate": 0.001, "loss": 2.5142, "step": 185652 }, { "epoch": 35.65, "learning_rate": 0.001, "loss": 2.5154, "step": 185664 }, { "epoch": 35.65, "learning_rate": 0.001, "loss": 2.5086, "step": 185676 }, { "epoch": 35.65, "learning_rate": 0.001, "loss": 2.5159, "step": 185688 }, { "epoch": 35.66, "learning_rate": 0.001, "loss": 2.5166, "step": 185700 }, { "epoch": 35.66, "learning_rate": 0.001, "loss": 2.5097, "step": 185712 }, { "epoch": 35.66, "learning_rate": 0.001, "loss": 2.5176, "step": 185724 }, { "epoch": 35.66, "learning_rate": 0.001, "loss": 2.504, "step": 185736 }, { "epoch": 35.67, "learning_rate": 0.001, "loss": 2.5159, "step": 185748 }, { "epoch": 35.67, "learning_rate": 0.001, "loss": 2.5124, "step": 185760 }, { "epoch": 35.67, "learning_rate": 0.001, "loss": 2.5165, "step": 185772 }, { "epoch": 35.67, "learning_rate": 0.001, "loss": 2.5126, "step": 185784 }, { "epoch": 35.68, "learning_rate": 0.001, "loss": 2.5222, "step": 185796 }, { "epoch": 35.68, "learning_rate": 0.001, "loss": 2.5071, "step": 185808 }, { "epoch": 35.68, "learning_rate": 0.001, "loss": 2.5119, "step": 185820 }, { "epoch": 35.68, "learning_rate": 0.001, "loss": 2.5164, "step": 185832 }, { "epoch": 35.68, "learning_rate": 0.001, "loss": 2.5096, "step": 185844 }, { "epoch": 35.69, "learning_rate": 0.001, "loss": 2.5057, "step": 185856 }, { "epoch": 35.69, "learning_rate": 0.001, "loss": 2.5155, "step": 185868 }, { "epoch": 35.69, "learning_rate": 0.001, "loss": 2.5149, "step": 185880 }, { "epoch": 35.69, "learning_rate": 0.001, "loss": 2.5158, "step": 185892 }, { "epoch": 35.7, "learning_rate": 0.001, "loss": 2.5159, "step": 185904 }, { "epoch": 35.7, "learning_rate": 0.001, "loss": 2.5155, "step": 185916 }, { "epoch": 35.7, "learning_rate": 0.001, "loss": 2.5094, "step": 185928 }, { "epoch": 35.7, "learning_rate": 0.001, "loss": 2.5172, "step": 185940 }, { "epoch": 35.71, "learning_rate": 0.001, "loss": 2.5108, "step": 185952 }, { "epoch": 35.71, "learning_rate": 0.001, "loss": 2.5227, "step": 185964 }, { "epoch": 35.71, "learning_rate": 0.001, "loss": 2.5106, "step": 185976 }, { "epoch": 35.71, "learning_rate": 0.001, "loss": 2.5066, "step": 185988 }, { "epoch": 35.71, "learning_rate": 0.001, "loss": 2.5062, "step": 186000 }, { "epoch": 35.72, "learning_rate": 0.001, "loss": 2.5119, "step": 186012 }, { "epoch": 35.72, "learning_rate": 0.001, "loss": 2.5125, "step": 186024 }, { "epoch": 35.72, "learning_rate": 0.001, "loss": 2.5089, "step": 186036 }, { "epoch": 35.72, "learning_rate": 0.001, "loss": 2.5072, "step": 186048 }, { "epoch": 35.73, "learning_rate": 0.001, "loss": 2.513, "step": 186060 }, { "epoch": 35.73, "learning_rate": 0.001, "loss": 2.5207, "step": 186072 }, { "epoch": 35.73, "learning_rate": 0.001, "loss": 2.5199, "step": 186084 }, { "epoch": 35.73, "learning_rate": 0.001, "loss": 2.5227, "step": 186096 }, { "epoch": 35.74, "learning_rate": 0.001, "loss": 2.5217, "step": 186108 }, { "epoch": 35.74, "learning_rate": 0.001, "loss": 2.5154, "step": 186120 }, { "epoch": 35.74, "learning_rate": 0.001, "loss": 2.5166, "step": 186132 }, { "epoch": 35.74, "learning_rate": 0.001, "loss": 2.5025, "step": 186144 }, { "epoch": 35.74, "learning_rate": 0.001, "loss": 2.5107, "step": 186156 }, { "epoch": 35.75, "learning_rate": 0.001, "loss": 2.5156, "step": 186168 }, { "epoch": 35.75, "learning_rate": 0.001, "loss": 2.5046, "step": 186180 }, { "epoch": 35.75, "learning_rate": 0.001, "loss": 2.5146, "step": 186192 }, { "epoch": 35.75, "learning_rate": 0.001, "loss": 2.5045, "step": 186204 }, { "epoch": 35.76, "learning_rate": 0.001, "loss": 2.5123, "step": 186216 }, { "epoch": 35.76, "learning_rate": 0.001, "loss": 2.5078, "step": 186228 }, { "epoch": 35.76, "learning_rate": 0.001, "loss": 2.508, "step": 186240 }, { "epoch": 35.76, "eval_ag_news_accuracy": 0.3259375, "eval_ag_news_bleu_score": 5.027623565530976, "eval_ag_news_bleu_score_sem": 0.15916542707684467, "eval_ag_news_emb_cos_sim": 0.8101160526275635, "eval_ag_news_emb_cos_sim_sem": 0.007841746486438778, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4996113777160645, "eval_ag_news_n_ngrams_match_1": 14.296, "eval_ag_news_n_ngrams_match_2": 3.234, "eval_ag_news_n_ngrams_match_3": 0.952, "eval_ag_news_num_pred_words": 46.12, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.10258505646297, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3566406920462549, "eval_ag_news_runtime": 10.2616, "eval_ag_news_samples_per_second": 48.725, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3551098809060948, "eval_ag_news_token_set_f1_sem": 0.004530021713566501, "eval_ag_news_token_set_precision": 0.33988054363720377, "eval_ag_news_token_set_recall": 0.38557489037148485, "eval_ag_news_true_num_tokens": 56.09375, "step": 186250 }, { "epoch": 35.76, "eval_anthropic_toxic_prompts_accuracy": 0.11596875, "eval_anthropic_toxic_prompts_bleu_score": 3.2895778757698726, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12464101902062759, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6802198886871338, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009006047249651653, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2073121070861816, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.324, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.038, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746, "eval_anthropic_toxic_prompts_num_pred_words": 47.104, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.712572125746945, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21619338215415287, "eval_anthropic_toxic_prompts_runtime": 11.82, "eval_anthropic_toxic_prompts_samples_per_second": 42.301, "eval_anthropic_toxic_prompts_steps_per_second": 0.085, "eval_anthropic_toxic_prompts_token_set_f1": 0.3683553251888338, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0069471203334324945, "eval_anthropic_toxic_prompts_token_set_precision": 0.44061712802162034, "eval_anthropic_toxic_prompts_token_set_recall": 0.34575818135423747, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 186250 }, { "epoch": 35.76, "eval_arxiv_accuracy": 0.35140625, "eval_arxiv_bleu_score": 4.339785672492532, "eval_arxiv_bleu_score_sem": 0.125440224720847, "eval_arxiv_emb_cos_sim": 0.7803007364273071, "eval_arxiv_emb_cos_sim_sem": 0.007555921706361578, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.361938953399658, "eval_arxiv_n_ngrams_match_1": 15.396, "eval_arxiv_n_ngrams_match_2": 2.982, "eval_arxiv_n_ngrams_match_3": 0.624, "eval_arxiv_num_pred_words": 40.98, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.84506593082663, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36655014803718444, "eval_arxiv_runtime": 10.1295, "eval_arxiv_samples_per_second": 49.361, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3596421632721308, "eval_arxiv_token_set_f1_sem": 0.004136908934240977, "eval_arxiv_token_set_precision": 0.31221322105244453, "eval_arxiv_token_set_recall": 0.44213838864441857, "eval_arxiv_true_num_tokens": 64.0, "step": 186250 }, { "epoch": 35.76, "eval_python_code_alpaca_accuracy": 0.16075, "eval_python_code_alpaca_bleu_score": 4.822352781871232, "eval_python_code_alpaca_bleu_score_sem": 0.1522812408518182, "eval_python_code_alpaca_emb_cos_sim": 0.7608482837677002, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007524945748339595, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8470802307128906, "eval_python_code_alpaca_n_ngrams_match_1": 10.016, "eval_python_code_alpaca_n_ngrams_match_2": 3.02, "eval_python_code_alpaca_n_ngrams_match_3": 1.048, "eval_python_code_alpaca_num_pred_words": 43.718, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.237379124066262, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3399097134245904, "eval_python_code_alpaca_runtime": 10.9552, "eval_python_code_alpaca_samples_per_second": 45.64, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4868519106804113, "eval_python_code_alpaca_token_set_f1_sem": 0.005324954166205798, "eval_python_code_alpaca_token_set_precision": 0.549480602167133, "eval_python_code_alpaca_token_set_recall": 0.45788679034845914, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 186250 }, { "epoch": 35.76, "eval_wikibio_accuracy": 0.326, "eval_wikibio_bleu_score": 6.177749917935727, "eval_wikibio_bleu_score_sem": 0.22831022191086195, "eval_wikibio_emb_cos_sim": 0.7269377708435059, "eval_wikibio_emb_cos_sim_sem": 0.011205283634384772, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6449012756347656, "eval_wikibio_n_ngrams_match_1": 9.854, "eval_wikibio_n_ngrams_match_2": 3.342, "eval_wikibio_n_ngrams_match_3": 1.254, "eval_wikibio_num_pred_words": 34.882, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.27899359566418, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3545169627204853, "eval_wikibio_runtime": 9.8457, "eval_wikibio_samples_per_second": 50.783, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.31934678746931244, "eval_wikibio_token_set_f1_sem": 0.005682141274654576, "eval_wikibio_token_set_precision": 0.3211814485452031, "eval_wikibio_token_set_recall": 0.33600770913283734, "eval_wikibio_true_num_tokens": 61.1328125, "step": 186250 }, { "epoch": 35.76, "eval_nq_accuracy": 0.53353125, "eval_nq_bleu_score": 12.109511152050722, "eval_nq_bleu_score_sem": 0.48191282249374406, "eval_nq_emb_cos_sim": 0.8405352830886841, "eval_nq_emb_cos_sim_sem": 0.0069766053710452434, "eval_nq_emb_top1_equal": 0.359375, "eval_nq_emb_top1_equal_sem": 0.04257689651385297, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1373345851898193, "eval_nq_n_ngrams_match_1": 23.344, "eval_nq_n_ngrams_match_2": 8.656, "eval_nq_n_ngrams_match_3": 4.036, "eval_nq_num_pred_words": 49.212, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.476813267000946, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45299354503655653, "eval_nq_runtime": 10.1289, "eval_nq_samples_per_second": 49.364, "eval_nq_steps_per_second": 0.099, "eval_nq_token_set_f1": 0.46790660942990175, "eval_nq_token_set_f1_sem": 0.004916402100974587, "eval_nq_token_set_precision": 0.42637261787270686, "eval_nq_token_set_recall": 0.5273577632561018, "eval_nq_true_num_tokens": 64.0, "step": 186250 }, { "epoch": 35.76, "learning_rate": 0.001, "loss": 2.5122, "step": 186252 }, { "epoch": 35.76, "learning_rate": 0.001, "loss": 2.5061, "step": 186264 }, { "epoch": 35.77, "learning_rate": 0.001, "loss": 2.513, "step": 186276 }, { "epoch": 35.77, "learning_rate": 0.001, "loss": 2.5095, "step": 186288 }, { "epoch": 35.77, "learning_rate": 0.001, "loss": 2.5177, "step": 186300 }, { "epoch": 35.77, "learning_rate": 0.001, "loss": 2.5079, "step": 186312 }, { "epoch": 35.78, "learning_rate": 0.001, "loss": 2.5101, "step": 186324 }, { "epoch": 35.78, "learning_rate": 0.001, "loss": 2.5161, "step": 186336 }, { "epoch": 35.78, "learning_rate": 0.001, "loss": 2.5263, "step": 186348 }, { "epoch": 35.78, "learning_rate": 0.001, "loss": 2.5139, "step": 186360 }, { "epoch": 35.79, "learning_rate": 0.001, "loss": 2.5149, "step": 186372 }, { "epoch": 35.79, "learning_rate": 0.001, "loss": 2.5076, "step": 186384 }, { "epoch": 35.79, "learning_rate": 0.001, "loss": 2.5146, "step": 186396 }, { "epoch": 35.79, "learning_rate": 0.001, "loss": 2.5072, "step": 186408 }, { "epoch": 35.79, "learning_rate": 0.001, "loss": 2.5199, "step": 186420 }, { "epoch": 35.8, "learning_rate": 0.001, "loss": 2.5125, "step": 186432 }, { "epoch": 35.8, "learning_rate": 0.001, "loss": 2.5127, "step": 186444 }, { "epoch": 35.8, "learning_rate": 0.001, "loss": 2.5161, "step": 186456 }, { "epoch": 35.8, "learning_rate": 0.001, "loss": 2.5144, "step": 186468 }, { "epoch": 35.81, "learning_rate": 0.001, "loss": 2.5252, "step": 186480 }, { "epoch": 35.81, "learning_rate": 0.001, "loss": 2.5149, "step": 186492 }, { "epoch": 35.81, "learning_rate": 0.001, "loss": 2.5253, "step": 186504 }, { "epoch": 35.81, "learning_rate": 0.001, "loss": 2.5241, "step": 186516 }, { "epoch": 35.82, "learning_rate": 0.001, "loss": 2.5208, "step": 186528 }, { "epoch": 35.82, "learning_rate": 0.001, "loss": 2.5182, "step": 186540 }, { "epoch": 35.82, "learning_rate": 0.001, "loss": 2.5084, "step": 186552 }, { "epoch": 35.82, "learning_rate": 0.001, "loss": 2.5115, "step": 186564 }, { "epoch": 35.82, "learning_rate": 0.001, "loss": 2.5139, "step": 186576 }, { "epoch": 35.83, "learning_rate": 0.001, "loss": 2.5007, "step": 186588 }, { "epoch": 35.83, "learning_rate": 0.001, "loss": 2.508, "step": 186600 }, { "epoch": 35.83, "learning_rate": 0.001, "loss": 2.5087, "step": 186612 }, { "epoch": 35.83, "learning_rate": 0.001, "loss": 2.5115, "step": 186624 }, { "epoch": 35.84, "learning_rate": 0.001, "loss": 2.5213, "step": 186636 }, { "epoch": 35.84, "learning_rate": 0.001, "loss": 2.5258, "step": 186648 }, { "epoch": 35.84, "learning_rate": 0.001, "loss": 2.5216, "step": 186660 }, { "epoch": 35.84, "learning_rate": 0.001, "loss": 2.5166, "step": 186672 }, { "epoch": 35.85, "learning_rate": 0.001, "loss": 2.509, "step": 186684 }, { "epoch": 35.85, "learning_rate": 0.001, "loss": 2.5078, "step": 186696 }, { "epoch": 35.85, "learning_rate": 0.001, "loss": 2.5082, "step": 186708 }, { "epoch": 35.85, "learning_rate": 0.001, "loss": 2.5116, "step": 186720 }, { "epoch": 35.85, "learning_rate": 0.001, "loss": 2.5148, "step": 186732 }, { "epoch": 35.86, "learning_rate": 0.001, "loss": 2.5178, "step": 186744 }, { "epoch": 35.86, "learning_rate": 0.001, "loss": 2.5162, "step": 186756 }, { "epoch": 35.86, "learning_rate": 0.001, "loss": 2.517, "step": 186768 }, { "epoch": 35.86, "learning_rate": 0.001, "loss": 2.5159, "step": 186780 }, { "epoch": 35.87, "learning_rate": 0.001, "loss": 2.5106, "step": 186792 }, { "epoch": 35.87, "learning_rate": 0.001, "loss": 2.5136, "step": 186804 }, { "epoch": 35.87, "learning_rate": 0.001, "loss": 2.5191, "step": 186816 }, { "epoch": 35.87, "learning_rate": 0.001, "loss": 2.5063, "step": 186828 }, { "epoch": 35.88, "learning_rate": 0.001, "loss": 2.505, "step": 186840 }, { "epoch": 35.88, "learning_rate": 0.001, "loss": 2.5118, "step": 186852 }, { "epoch": 35.88, "learning_rate": 0.001, "loss": 2.5198, "step": 186864 }, { "epoch": 35.88, "eval_ag_news_accuracy": 0.327375, "eval_ag_news_bleu_score": 4.85270456280253, "eval_ag_news_bleu_score_sem": 0.14786794935780656, "eval_ag_news_emb_cos_sim": 0.8168831467628479, "eval_ag_news_emb_cos_sim_sem": 0.006567543205922189, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.491161584854126, "eval_ag_news_n_ngrams_match_1": 14.208, "eval_ag_news_n_ngrams_match_2": 3.102, "eval_ag_news_n_ngrams_match_3": 0.87, "eval_ag_news_num_pred_words": 46.288, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.824053493783836, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35619190529604994, "eval_ag_news_runtime": 10.4026, "eval_ag_news_samples_per_second": 48.065, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35646040856758254, "eval_ag_news_token_set_f1_sem": 0.004357326085723667, "eval_ag_news_token_set_precision": 0.34191584272141656, "eval_ag_news_token_set_recall": 0.38774763945336205, "eval_ag_news_true_num_tokens": 56.09375, "step": 186875 }, { "epoch": 35.88, "eval_anthropic_toxic_prompts_accuracy": 0.11584375, "eval_anthropic_toxic_prompts_bleu_score": 3.2547740254674387, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12535173891302592, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6843358874320984, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008709347924114135, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.242084264755249, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.0, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792, "eval_anthropic_toxic_prompts_num_pred_words": 46.954, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.58699628349279, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21758626830840716, "eval_anthropic_toxic_prompts_runtime": 9.8205, "eval_anthropic_toxic_prompts_samples_per_second": 50.914, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.35894941563738564, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006725005618732095, "eval_anthropic_toxic_prompts_token_set_precision": 0.4436699086519642, "eval_anthropic_toxic_prompts_token_set_recall": 0.32576846770951484, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 186875 }, { "epoch": 35.88, "eval_arxiv_accuracy": 0.351, "eval_arxiv_bleu_score": 4.507362704308387, "eval_arxiv_bleu_score_sem": 0.131581266589406, "eval_arxiv_emb_cos_sim": 0.7721332311630249, "eval_arxiv_emb_cos_sim_sem": 0.008831844446588508, "eval_arxiv_emb_top1_equal": 0.3515625, "eval_arxiv_emb_top1_equal_sem": 0.04236756101983345, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3566012382507324, "eval_arxiv_n_ngrams_match_1": 15.184, "eval_arxiv_n_ngrams_match_2": 3.02, "eval_arxiv_n_ngrams_match_3": 0.72, "eval_arxiv_num_pred_words": 40.072, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.69150937060883, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3638956432410369, "eval_arxiv_runtime": 10.1365, "eval_arxiv_samples_per_second": 49.326, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.3569405259993931, "eval_arxiv_token_set_f1_sem": 0.004359564774131006, "eval_arxiv_token_set_precision": 0.30743827002505114, "eval_arxiv_token_set_recall": 0.4419336365498523, "eval_arxiv_true_num_tokens": 64.0, "step": 186875 }, { "epoch": 35.88, "eval_python_code_alpaca_accuracy": 0.162875, "eval_python_code_alpaca_bleu_score": 4.895389372581267, "eval_python_code_alpaca_bleu_score_sem": 0.15536732980811435, "eval_python_code_alpaca_emb_cos_sim": 0.762061595916748, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008538410843266077, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8486289978027344, "eval_python_code_alpaca_n_ngrams_match_1": 9.888, "eval_python_code_alpaca_n_ngrams_match_2": 3.038, "eval_python_code_alpaca_n_ngrams_match_3": 1.04, "eval_python_code_alpaca_num_pred_words": 42.546, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.264096493719666, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3425739597371863, "eval_python_code_alpaca_runtime": 10.3168, "eval_python_code_alpaca_samples_per_second": 48.465, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.48025247901822893, "eval_python_code_alpaca_token_set_f1_sem": 0.005285327007492674, "eval_python_code_alpaca_token_set_precision": 0.5378274946053501, "eval_python_code_alpaca_token_set_recall": 0.455818739862497, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 186875 }, { "epoch": 35.88, "eval_wikibio_accuracy": 0.3244375, "eval_wikibio_bleu_score": 6.360273716083152, "eval_wikibio_bleu_score_sem": 0.23722324752144258, "eval_wikibio_emb_cos_sim": 0.74052894115448, "eval_wikibio_emb_cos_sim_sem": 0.009436331670088957, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6699306964874268, "eval_wikibio_n_ngrams_match_1": 10.164, "eval_wikibio_n_ngrams_match_2": 3.512, "eval_wikibio_n_ngrams_match_3": 1.348, "eval_wikibio_num_pred_words": 35.742, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.24918565961401, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3612554319589409, "eval_wikibio_runtime": 9.7816, "eval_wikibio_samples_per_second": 51.116, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.3226035239129618, "eval_wikibio_token_set_f1_sem": 0.005657038901180159, "eval_wikibio_token_set_precision": 0.32892531093457045, "eval_wikibio_token_set_recall": 0.33506598987721126, "eval_wikibio_true_num_tokens": 61.1328125, "step": 186875 }, { "epoch": 35.88, "eval_nq_accuracy": 0.534, "eval_nq_bleu_score": 12.111168040222063, "eval_nq_bleu_score_sem": 0.47666294936925724, "eval_nq_emb_cos_sim": 0.8338232636451721, "eval_nq_emb_cos_sim_sem": 0.007299238179307122, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.135673761367798, "eval_nq_n_ngrams_match_1": 23.444, "eval_nq_n_ngrams_match_2": 8.734, "eval_nq_n_ngrams_match_3": 4.062, "eval_nq_num_pred_words": 48.898, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.462746458071383, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4553753785058464, "eval_nq_runtime": 11.0148, "eval_nq_samples_per_second": 45.393, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.4683151672091482, "eval_nq_token_set_f1_sem": 0.004938079858210904, "eval_nq_token_set_precision": 0.4259236173799477, "eval_nq_token_set_recall": 0.5291033863196826, "eval_nq_true_num_tokens": 64.0, "step": 186875 }, { "epoch": 35.88, "learning_rate": 0.001, "loss": 2.5236, "step": 186876 }, { "epoch": 35.88, "learning_rate": 0.001, "loss": 2.512, "step": 186888 }, { "epoch": 35.89, "learning_rate": 0.001, "loss": 2.5178, "step": 186900 }, { "epoch": 35.89, "learning_rate": 0.001, "loss": 2.5125, "step": 186912 }, { "epoch": 35.89, "learning_rate": 0.001, "loss": 2.5088, "step": 186924 }, { "epoch": 35.89, "learning_rate": 0.001, "loss": 2.528, "step": 186936 }, { "epoch": 35.9, "learning_rate": 0.001, "loss": 2.512, "step": 186948 }, { "epoch": 35.9, "learning_rate": 0.001, "loss": 2.5203, "step": 186960 }, { "epoch": 35.9, "learning_rate": 0.001, "loss": 2.5071, "step": 186972 }, { "epoch": 35.9, "learning_rate": 0.001, "loss": 2.5211, "step": 186984 }, { "epoch": 35.91, "learning_rate": 0.001, "loss": 2.5137, "step": 186996 }, { "epoch": 35.91, "learning_rate": 0.001, "loss": 2.5137, "step": 187008 }, { "epoch": 35.91, "learning_rate": 0.001, "loss": 2.517, "step": 187020 }, { "epoch": 35.91, "learning_rate": 0.001, "loss": 2.5169, "step": 187032 }, { "epoch": 35.91, "learning_rate": 0.001, "loss": 2.5234, "step": 187044 }, { "epoch": 35.92, "learning_rate": 0.001, "loss": 2.515, "step": 187056 }, { "epoch": 35.92, "learning_rate": 0.001, "loss": 2.5161, "step": 187068 }, { "epoch": 35.92, "learning_rate": 0.001, "loss": 2.518, "step": 187080 }, { "epoch": 35.92, "learning_rate": 0.001, "loss": 2.5221, "step": 187092 }, { "epoch": 35.93, "learning_rate": 0.001, "loss": 2.529, "step": 187104 }, { "epoch": 35.93, "learning_rate": 0.001, "loss": 2.513, "step": 187116 }, { "epoch": 35.93, "learning_rate": 0.001, "loss": 2.526, "step": 187128 }, { "epoch": 35.93, "learning_rate": 0.001, "loss": 2.5097, "step": 187140 }, { "epoch": 35.94, "learning_rate": 0.001, "loss": 2.5113, "step": 187152 }, { "epoch": 35.94, "learning_rate": 0.001, "loss": 2.5213, "step": 187164 }, { "epoch": 35.94, "learning_rate": 0.001, "loss": 2.5153, "step": 187176 }, { "epoch": 35.94, "learning_rate": 0.001, "loss": 2.5151, "step": 187188 }, { "epoch": 35.94, "learning_rate": 0.001, "loss": 2.5025, "step": 187200 }, { "epoch": 35.95, "learning_rate": 0.001, "loss": 2.5183, "step": 187212 }, { "epoch": 35.95, "learning_rate": 0.001, "loss": 2.5121, "step": 187224 }, { "epoch": 35.95, "learning_rate": 0.001, "loss": 2.5118, "step": 187236 }, { "epoch": 35.95, "learning_rate": 0.001, "loss": 2.5157, "step": 187248 }, { "epoch": 35.96, "learning_rate": 0.001, "loss": 2.5094, "step": 187260 }, { "epoch": 35.96, "learning_rate": 0.001, "loss": 2.5196, "step": 187272 }, { "epoch": 35.96, "learning_rate": 0.001, "loss": 2.5102, "step": 187284 }, { "epoch": 35.96, "learning_rate": 0.001, "loss": 2.5223, "step": 187296 }, { "epoch": 35.97, "learning_rate": 0.001, "loss": 2.5229, "step": 187308 }, { "epoch": 35.97, "learning_rate": 0.001, "loss": 2.5307, "step": 187320 }, { "epoch": 35.97, "learning_rate": 0.001, "loss": 2.5098, "step": 187332 }, { "epoch": 35.97, "learning_rate": 0.001, "loss": 2.5167, "step": 187344 }, { "epoch": 35.97, "learning_rate": 0.001, "loss": 2.5165, "step": 187356 }, { "epoch": 35.98, "learning_rate": 0.001, "loss": 2.5094, "step": 187368 }, { "epoch": 35.98, "learning_rate": 0.001, "loss": 2.5084, "step": 187380 }, { "epoch": 35.98, "learning_rate": 0.001, "loss": 2.5241, "step": 187392 }, { "epoch": 35.98, "learning_rate": 0.001, "loss": 2.5245, "step": 187404 }, { "epoch": 35.99, "learning_rate": 0.001, "loss": 2.5195, "step": 187416 }, { "epoch": 35.99, "learning_rate": 0.001, "loss": 2.5113, "step": 187428 }, { "epoch": 35.99, "learning_rate": 0.001, "loss": 2.5196, "step": 187440 }, { "epoch": 35.99, "learning_rate": 0.001, "loss": 2.5169, "step": 187452 }, { "epoch": 36.0, "learning_rate": 0.001, "loss": 2.5213, "step": 187464 }, { "epoch": 36.0, "learning_rate": 0.001, "loss": 2.5282, "step": 187476 }, { "epoch": 36.0, "learning_rate": 0.001, "loss": 2.5234, "step": 187488 }, { "epoch": 36.0, "learning_rate": 0.001, "loss": 2.518, "step": 187500 }, { "epoch": 36.0, "eval_ag_news_accuracy": 0.328625, "eval_ag_news_bleu_score": 4.942493222193823, "eval_ag_news_bleu_score_sem": 0.14813384873838195, "eval_ag_news_emb_cos_sim": 0.8226406574249268, "eval_ag_news_emb_cos_sim_sem": 0.006169854104451997, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4921488761901855, "eval_ag_news_n_ngrams_match_1": 14.414, "eval_ag_news_n_ngrams_match_2": 3.212, "eval_ag_news_n_ngrams_match_3": 0.9, "eval_ag_news_num_pred_words": 46.91, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.85647640020623, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3612314634303404, "eval_ag_news_runtime": 10.4452, "eval_ag_news_samples_per_second": 47.869, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35905100444952903, "eval_ag_news_token_set_f1_sem": 0.00432735548364872, "eval_ag_news_token_set_precision": 0.34633022241723005, "eval_ag_news_token_set_recall": 0.3861115140756779, "eval_ag_news_true_num_tokens": 56.09375, "step": 187500 }, { "epoch": 36.0, "eval_anthropic_toxic_prompts_accuracy": 0.11640625, "eval_anthropic_toxic_prompts_bleu_score": 3.2528173812140118, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12860246335865586, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6813318133354187, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009151567805214119, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.185739278793335, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.24, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.936, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.75, "eval_anthropic_toxic_prompts_num_pred_words": 46.844, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.185161375395612, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2164702131498088, "eval_anthropic_toxic_prompts_runtime": 9.9763, "eval_anthropic_toxic_prompts_samples_per_second": 50.119, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.35754420951045773, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006796119587194955, "eval_anthropic_toxic_prompts_token_set_precision": 0.4385873553342065, "eval_anthropic_toxic_prompts_token_set_recall": 0.32701514642386076, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 187500 }, { "epoch": 36.0, "eval_arxiv_accuracy": 0.3544375, "eval_arxiv_bleu_score": 4.416894392451446, "eval_arxiv_bleu_score_sem": 0.1332919077802899, "eval_arxiv_emb_cos_sim": 0.7817720174789429, "eval_arxiv_emb_cos_sim_sem": 0.0075332592142083685, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.335984468460083, "eval_arxiv_n_ngrams_match_1": 15.264, "eval_arxiv_n_ngrams_match_2": 2.982, "eval_arxiv_n_ngrams_match_3": 0.676, "eval_arxiv_num_pred_words": 40.212, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.106039117438897, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36615235226363946, "eval_arxiv_runtime": 10.297, "eval_arxiv_samples_per_second": 48.558, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.36031862169647844, "eval_arxiv_token_set_f1_sem": 0.004229702155609524, "eval_arxiv_token_set_precision": 0.31063931808900175, "eval_arxiv_token_set_recall": 0.4487834177617892, "eval_arxiv_true_num_tokens": 64.0, "step": 187500 }, { "epoch": 36.0, "eval_python_code_alpaca_accuracy": 0.1636875, "eval_python_code_alpaca_bleu_score": 4.636028564665088, "eval_python_code_alpaca_bleu_score_sem": 0.1408475479289397, "eval_python_code_alpaca_emb_cos_sim": 0.7712357044219971, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0077602219690730565, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.817457437515259, "eval_python_code_alpaca_n_ngrams_match_1": 10.062, "eval_python_code_alpaca_n_ngrams_match_2": 3.034, "eval_python_code_alpaca_n_ngrams_match_3": 1.0, "eval_python_code_alpaca_num_pred_words": 44.14, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.734248663094267, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33944866767766774, "eval_python_code_alpaca_runtime": 10.3069, "eval_python_code_alpaca_samples_per_second": 48.511, "eval_python_code_alpaca_steps_per_second": 0.097, "eval_python_code_alpaca_token_set_f1": 0.4891515935263797, "eval_python_code_alpaca_token_set_f1_sem": 0.005444230842267233, "eval_python_code_alpaca_token_set_precision": 0.5536921405394953, "eval_python_code_alpaca_token_set_recall": 0.4588950020998636, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 187500 }, { "epoch": 36.0, "eval_wikibio_accuracy": 0.32615625, "eval_wikibio_bleu_score": 5.9866993447316315, "eval_wikibio_bleu_score_sem": 0.2187414030705155, "eval_wikibio_emb_cos_sim": 0.7428231239318848, "eval_wikibio_emb_cos_sim_sem": 0.008714659744115524, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6888046264648438, "eval_wikibio_n_ngrams_match_1": 10.026, "eval_wikibio_n_ngrams_match_2": 3.382, "eval_wikibio_n_ngrams_match_3": 1.234, "eval_wikibio_num_pred_words": 36.37, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.99700700601704, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35881949450267603, "eval_wikibio_runtime": 9.8381, "eval_wikibio_samples_per_second": 50.823, "eval_wikibio_steps_per_second": 0.102, "eval_wikibio_token_set_f1": 0.32007353674278294, "eval_wikibio_token_set_f1_sem": 0.005424940031134632, "eval_wikibio_token_set_precision": 0.32790295232509387, "eval_wikibio_token_set_recall": 0.3280172324537126, "eval_wikibio_true_num_tokens": 61.1328125, "step": 187500 }, { "epoch": 36.0, "eval_nq_accuracy": 0.5345625, "eval_nq_bleu_score": 11.629154870964845, "eval_nq_bleu_score_sem": 0.47471034362165127, "eval_nq_emb_cos_sim": 0.8308244943618774, "eval_nq_emb_cos_sim_sem": 0.007572436026988261, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1325623989105225, "eval_nq_n_ngrams_match_1": 23.33, "eval_nq_n_ngrams_match_2": 8.426, "eval_nq_n_ngrams_match_3": 3.816, "eval_nq_num_pred_words": 49.362, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.436456706138213, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45329224349146213, "eval_nq_runtime": 10.6286, "eval_nq_samples_per_second": 47.043, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.4640732003403243, "eval_nq_token_set_f1_sem": 0.00508825611317226, "eval_nq_token_set_precision": 0.4242892311907854, "eval_nq_token_set_recall": 0.5206706593580744, "eval_nq_true_num_tokens": 64.0, "step": 187500 }, { "epoch": 36.0, "learning_rate": 0.001, "loss": 2.4937, "step": 187512 }, { "epoch": 36.01, "learning_rate": 0.001, "loss": 2.4931, "step": 187524 }, { "epoch": 36.01, "learning_rate": 0.001, "loss": 2.5052, "step": 187536 }, { "epoch": 36.01, "learning_rate": 0.001, "loss": 2.5067, "step": 187548 }, { "epoch": 36.01, "learning_rate": 0.001, "loss": 2.5043, "step": 187560 }, { "epoch": 36.02, "learning_rate": 0.001, "loss": 2.5012, "step": 187572 }, { "epoch": 36.02, "learning_rate": 0.001, "loss": 2.4939, "step": 187584 }, { "epoch": 36.02, "learning_rate": 0.001, "loss": 2.5131, "step": 187596 }, { "epoch": 36.02, "learning_rate": 0.001, "loss": 2.5016, "step": 187608 }, { "epoch": 36.03, "learning_rate": 0.001, "loss": 2.5018, "step": 187620 }, { "epoch": 36.03, "learning_rate": 0.001, "loss": 2.5032, "step": 187632 }, { "epoch": 36.03, "learning_rate": 0.001, "loss": 2.498, "step": 187644 }, { "epoch": 36.03, "learning_rate": 0.001, "loss": 2.4972, "step": 187656 }, { "epoch": 36.03, "learning_rate": 0.001, "loss": 2.5001, "step": 187668 }, { "epoch": 36.04, "learning_rate": 0.001, "loss": 2.5069, "step": 187680 }, { "epoch": 36.04, "learning_rate": 0.001, "loss": 2.4978, "step": 187692 }, { "epoch": 36.04, "learning_rate": 0.001, "loss": 2.5012, "step": 187704 }, { "epoch": 36.04, "learning_rate": 0.001, "loss": 2.4912, "step": 187716 }, { "epoch": 36.05, "learning_rate": 0.001, "loss": 2.5029, "step": 187728 }, { "epoch": 36.05, "learning_rate": 0.001, "loss": 2.4959, "step": 187740 }, { "epoch": 36.05, "learning_rate": 0.001, "loss": 2.4989, "step": 187752 }, { "epoch": 36.05, "learning_rate": 0.001, "loss": 2.4972, "step": 187764 }, { "epoch": 36.06, "learning_rate": 0.001, "loss": 2.5069, "step": 187776 }, { "epoch": 36.06, "learning_rate": 0.001, "loss": 2.5011, "step": 187788 }, { "epoch": 36.06, "learning_rate": 0.001, "loss": 2.5058, "step": 187800 }, { "epoch": 36.06, "learning_rate": 0.001, "loss": 2.5013, "step": 187812 }, { "epoch": 36.06, "learning_rate": 0.001, "loss": 2.5025, "step": 187824 }, { "epoch": 36.07, "learning_rate": 0.001, "loss": 2.4978, "step": 187836 }, { "epoch": 36.07, "learning_rate": 0.001, "loss": 2.5044, "step": 187848 }, { "epoch": 36.07, "learning_rate": 0.001, "loss": 2.4988, "step": 187860 }, { "epoch": 36.07, "learning_rate": 0.001, "loss": 2.5055, "step": 187872 }, { "epoch": 36.08, "learning_rate": 0.001, "loss": 2.4916, "step": 187884 }, { "epoch": 36.08, "learning_rate": 0.001, "loss": 2.5063, "step": 187896 }, { "epoch": 36.08, "learning_rate": 0.001, "loss": 2.5042, "step": 187908 }, { "epoch": 36.08, "learning_rate": 0.001, "loss": 2.4973, "step": 187920 }, { "epoch": 36.09, "learning_rate": 0.001, "loss": 2.5, "step": 187932 }, { "epoch": 36.09, "learning_rate": 0.001, "loss": 2.5052, "step": 187944 }, { "epoch": 36.09, "learning_rate": 0.001, "loss": 2.5033, "step": 187956 }, { "epoch": 36.09, "learning_rate": 0.001, "loss": 2.5041, "step": 187968 }, { "epoch": 36.09, "learning_rate": 0.001, "loss": 2.5087, "step": 187980 }, { "epoch": 36.1, "learning_rate": 0.001, "loss": 2.5016, "step": 187992 }, { "epoch": 36.1, "learning_rate": 0.001, "loss": 2.5043, "step": 188004 }, { "epoch": 36.1, "learning_rate": 0.001, "loss": 2.5067, "step": 188016 }, { "epoch": 36.1, "learning_rate": 0.001, "loss": 2.5089, "step": 188028 }, { "epoch": 36.11, "learning_rate": 0.001, "loss": 2.5135, "step": 188040 }, { "epoch": 36.11, "learning_rate": 0.001, "loss": 2.5056, "step": 188052 }, { "epoch": 36.11, "learning_rate": 0.001, "loss": 2.5115, "step": 188064 }, { "epoch": 36.11, "learning_rate": 0.001, "loss": 2.4977, "step": 188076 }, { "epoch": 36.12, "learning_rate": 0.001, "loss": 2.5, "step": 188088 }, { "epoch": 36.12, "learning_rate": 0.001, "loss": 2.5041, "step": 188100 }, { "epoch": 36.12, "learning_rate": 0.001, "loss": 2.4984, "step": 188112 }, { "epoch": 36.12, "learning_rate": 0.001, "loss": 2.5205, "step": 188124 }, { "epoch": 36.12, "eval_ag_news_accuracy": 0.32796875, "eval_ag_news_bleu_score": 4.995996668175454, "eval_ag_news_bleu_score_sem": 0.14697083637603395, "eval_ag_news_emb_cos_sim": 0.8210964202880859, "eval_ag_news_emb_cos_sim_sem": 0.006754837215259523, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.5001513957977295, "eval_ag_news_n_ngrams_match_1": 14.512, "eval_ag_news_n_ngrams_match_2": 3.316, "eval_ag_news_n_ngrams_match_3": 0.92, "eval_ag_news_num_pred_words": 46.662, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 33.12046587849239, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36274309202833455, "eval_ag_news_runtime": 11.8057, "eval_ag_news_samples_per_second": 42.353, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.36278094313381254, "eval_ag_news_token_set_f1_sem": 0.004395849795954817, "eval_ag_news_token_set_precision": 0.348730167831629, "eval_ag_news_token_set_recall": 0.3932009532911382, "eval_ag_news_true_num_tokens": 56.09375, "step": 188125 }, { "epoch": 36.12, "eval_anthropic_toxic_prompts_accuracy": 0.11634375, "eval_anthropic_toxic_prompts_bleu_score": 3.235428472490034, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12399404198003786, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.680637001991272, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008425199139809752, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.192927837371826, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.97, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, "eval_anthropic_toxic_prompts_num_pred_words": 46.88, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.359644213352187, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2182232073664308, "eval_anthropic_toxic_prompts_runtime": 9.9854, "eval_anthropic_toxic_prompts_samples_per_second": 50.073, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.36169427325117526, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006905703115647926, "eval_anthropic_toxic_prompts_token_set_precision": 0.4472755540047718, "eval_anthropic_toxic_prompts_token_set_recall": 0.3277039674454963, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 188125 }, { "epoch": 36.12, "eval_arxiv_accuracy": 0.35090625, "eval_arxiv_bleu_score": 4.4174585397988295, "eval_arxiv_bleu_score_sem": 0.1278726088860232, "eval_arxiv_emb_cos_sim": 0.7842085361480713, "eval_arxiv_emb_cos_sim_sem": 0.006283408251052474, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.35910701751709, "eval_arxiv_n_ngrams_match_1": 15.288, "eval_arxiv_n_ngrams_match_2": 2.976, "eval_arxiv_n_ngrams_match_3": 0.65, "eval_arxiv_num_pred_words": 40.326, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.76349411117982, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36982629948667634, "eval_arxiv_runtime": 10.3197, "eval_arxiv_samples_per_second": 48.451, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.358834892982993, "eval_arxiv_token_set_f1_sem": 0.0041332722647101135, "eval_arxiv_token_set_precision": 0.31028017096869653, "eval_arxiv_token_set_recall": 0.4405731224541131, "eval_arxiv_true_num_tokens": 64.0, "step": 188125 }, { "epoch": 36.12, "eval_python_code_alpaca_accuracy": 0.1616875, "eval_python_code_alpaca_bleu_score": 4.6461539174688955, "eval_python_code_alpaca_bleu_score_sem": 0.14160669208012872, "eval_python_code_alpaca_emb_cos_sim": 0.7626280188560486, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008380994339711411, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8524506092071533, "eval_python_code_alpaca_n_ngrams_match_1": 10.03, "eval_python_code_alpaca_n_ngrams_match_2": 3.01, "eval_python_code_alpaca_n_ngrams_match_3": 1.026, "eval_python_code_alpaca_num_pred_words": 44.488, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.33019939110918, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33619768393109695, "eval_python_code_alpaca_runtime": 12.1766, "eval_python_code_alpaca_samples_per_second": 41.062, "eval_python_code_alpaca_steps_per_second": 0.082, "eval_python_code_alpaca_token_set_f1": 0.48191552425964873, "eval_python_code_alpaca_token_set_f1_sem": 0.005149441772851338, "eval_python_code_alpaca_token_set_precision": 0.5498746585934386, "eval_python_code_alpaca_token_set_recall": 0.448166458051428, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 188125 }, { "epoch": 36.12, "eval_wikibio_accuracy": 0.32828125, "eval_wikibio_bleu_score": 5.842899620541409, "eval_wikibio_bleu_score_sem": 0.21367021975295392, "eval_wikibio_emb_cos_sim": 0.7363656759262085, "eval_wikibio_emb_cos_sim_sem": 0.009985318079545641, "eval_wikibio_emb_top1_equal": 0.1328125, "eval_wikibio_emb_top1_equal_sem": 0.030114394778901498, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.671426296234131, "eval_wikibio_n_ngrams_match_1": 9.642, "eval_wikibio_n_ngrams_match_2": 3.226, "eval_wikibio_n_ngrams_match_3": 1.222, "eval_wikibio_num_pred_words": 35.336, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.3079306502913, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34369270005573715, "eval_wikibio_runtime": 11.0798, "eval_wikibio_samples_per_second": 45.127, "eval_wikibio_steps_per_second": 0.09, "eval_wikibio_token_set_f1": 0.30747333378935354, "eval_wikibio_token_set_f1_sem": 0.005869789107156018, "eval_wikibio_token_set_precision": 0.3125237180433861, "eval_wikibio_token_set_recall": 0.32153228529965044, "eval_wikibio_true_num_tokens": 61.1328125, "step": 188125 }, { "epoch": 36.12, "eval_nq_accuracy": 0.535125, "eval_nq_bleu_score": 11.948907734875544, "eval_nq_bleu_score_sem": 0.49290621709386756, "eval_nq_emb_cos_sim": 0.8342015743255615, "eval_nq_emb_cos_sim_sem": 0.0070560791808131065, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.133300542831421, "eval_nq_n_ngrams_match_1": 23.482, "eval_nq_n_ngrams_match_2": 8.656, "eval_nq_n_ngrams_match_3": 3.974, "eval_nq_num_pred_words": 49.194, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.442686324264292, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4538564307692001, "eval_nq_runtime": 29.1975, "eval_nq_samples_per_second": 17.125, "eval_nq_steps_per_second": 0.034, "eval_nq_token_set_f1": 0.4677455973523867, "eval_nq_token_set_f1_sem": 0.005077195308921663, "eval_nq_token_set_precision": 0.4260586515948146, "eval_nq_token_set_recall": 0.5266026745359195, "eval_nq_true_num_tokens": 64.0, "step": 188125 }, { "epoch": 36.12, "learning_rate": 0.001, "loss": 2.5031, "step": 188136 }, { "epoch": 36.13, "learning_rate": 0.001, "loss": 2.5042, "step": 188148 }, { "epoch": 36.13, "learning_rate": 0.001, "loss": 2.5017, "step": 188160 }, { "epoch": 36.13, "learning_rate": 0.001, "loss": 2.5008, "step": 188172 }, { "epoch": 36.13, "learning_rate": 0.001, "loss": 2.4904, "step": 188184 }, { "epoch": 36.14, "learning_rate": 0.001, "loss": 2.5065, "step": 188196 }, { "epoch": 36.14, "learning_rate": 0.001, "loss": 2.4946, "step": 188208 }, { "epoch": 36.14, "learning_rate": 0.001, "loss": 2.501, "step": 188220 }, { "epoch": 36.14, "learning_rate": 0.001, "loss": 2.4967, "step": 188232 }, { "epoch": 36.15, "learning_rate": 0.001, "loss": 2.4967, "step": 188244 }, { "epoch": 36.15, "learning_rate": 0.001, "loss": 2.4991, "step": 188256 }, { "epoch": 36.15, "learning_rate": 0.001, "loss": 2.4974, "step": 188268 }, { "epoch": 36.15, "learning_rate": 0.001, "loss": 2.5098, "step": 188280 }, { "epoch": 36.15, "learning_rate": 0.001, "loss": 2.5019, "step": 188292 }, { "epoch": 36.16, "learning_rate": 0.001, "loss": 2.5017, "step": 188304 }, { "epoch": 36.16, "learning_rate": 0.001, "loss": 2.4982, "step": 188316 }, { "epoch": 36.16, "learning_rate": 0.001, "loss": 2.5112, "step": 188328 }, { "epoch": 36.16, "learning_rate": 0.001, "loss": 2.5011, "step": 188340 }, { "epoch": 36.17, "learning_rate": 0.001, "loss": 2.5089, "step": 188352 }, { "epoch": 36.17, "learning_rate": 0.001, "loss": 2.5155, "step": 188364 }, { "epoch": 36.17, "learning_rate": 0.001, "loss": 2.5094, "step": 188376 }, { "epoch": 36.17, "learning_rate": 0.001, "loss": 2.5022, "step": 188388 }, { "epoch": 36.18, "learning_rate": 0.001, "loss": 2.5036, "step": 188400 }, { "epoch": 36.18, "learning_rate": 0.001, "loss": 2.4974, "step": 188412 }, { "epoch": 36.18, "learning_rate": 0.001, "loss": 2.5034, "step": 188424 }, { "epoch": 36.18, "learning_rate": 0.001, "loss": 2.5026, "step": 188436 }, { "epoch": 36.18, "learning_rate": 0.001, "loss": 2.5056, "step": 188448 }, { "epoch": 36.19, "learning_rate": 0.001, "loss": 2.5073, "step": 188460 }, { "epoch": 36.19, "learning_rate": 0.001, "loss": 2.5003, "step": 188472 }, { "epoch": 36.19, "learning_rate": 0.001, "loss": 2.51, "step": 188484 }, { "epoch": 36.19, "learning_rate": 0.001, "loss": 2.5124, "step": 188496 }, { "epoch": 36.2, "learning_rate": 0.001, "loss": 2.4965, "step": 188508 }, { "epoch": 36.2, "learning_rate": 0.001, "loss": 2.5096, "step": 188520 }, { "epoch": 36.2, "learning_rate": 0.001, "loss": 2.4936, "step": 188532 }, { "epoch": 36.2, "learning_rate": 0.001, "loss": 2.5, "step": 188544 }, { "epoch": 36.21, "learning_rate": 0.001, "loss": 2.5025, "step": 188556 }, { "epoch": 36.21, "learning_rate": 0.001, "loss": 2.5003, "step": 188568 }, { "epoch": 36.21, "learning_rate": 0.001, "loss": 2.503, "step": 188580 }, { "epoch": 36.21, "learning_rate": 0.001, "loss": 2.4987, "step": 188592 }, { "epoch": 36.21, "learning_rate": 0.001, "loss": 2.5062, "step": 188604 }, { "epoch": 36.22, "learning_rate": 0.001, "loss": 2.5054, "step": 188616 }, { "epoch": 36.22, "learning_rate": 0.001, "loss": 2.5043, "step": 188628 }, { "epoch": 36.22, "learning_rate": 0.001, "loss": 2.4962, "step": 188640 }, { "epoch": 36.22, "learning_rate": 0.001, "loss": 2.5071, "step": 188652 }, { "epoch": 36.23, "learning_rate": 0.001, "loss": 2.5136, "step": 188664 }, { "epoch": 36.23, "learning_rate": 0.001, "loss": 2.5091, "step": 188676 }, { "epoch": 36.23, "learning_rate": 0.001, "loss": 2.5005, "step": 188688 }, { "epoch": 36.23, "learning_rate": 0.001, "loss": 2.4998, "step": 188700 }, { "epoch": 36.24, "learning_rate": 0.001, "loss": 2.5134, "step": 188712 }, { "epoch": 36.24, "learning_rate": 0.001, "loss": 2.5077, "step": 188724 }, { "epoch": 36.24, "learning_rate": 0.001, "loss": 2.5066, "step": 188736 }, { "epoch": 36.24, "learning_rate": 0.001, "loss": 2.5074, "step": 188748 }, { "epoch": 36.24, "eval_ag_news_accuracy": 0.32878125, "eval_ag_news_bleu_score": 4.9238574272885, "eval_ag_news_bleu_score_sem": 0.1504874425649532, "eval_ag_news_emb_cos_sim": 0.8129588961601257, "eval_ag_news_emb_cos_sim_sem": 0.007488922705208432, "eval_ag_news_emb_top1_equal": 0.3046875, "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4919283390045166, "eval_ag_news_n_ngrams_match_1": 14.396, "eval_ag_news_n_ngrams_match_2": 3.168, "eval_ag_news_n_ngrams_match_3": 0.912, "eval_ag_news_num_pred_words": 46.464, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.84923112432568, "eval_ag_news_pred_num_tokens": 62.9296875, "eval_ag_news_rouge_score": 0.3576292001405687, "eval_ag_news_runtime": 25.9604, "eval_ag_news_samples_per_second": 19.26, "eval_ag_news_steps_per_second": 0.039, "eval_ag_news_token_set_f1": 0.3568604281270285, "eval_ag_news_token_set_f1_sem": 0.004526422508572503, "eval_ag_news_token_set_precision": 0.3437497553910095, "eval_ag_news_token_set_recall": 0.38662786716267344, "eval_ag_news_true_num_tokens": 56.09375, "step": 188750 }, { "epoch": 36.24, "eval_anthropic_toxic_prompts_accuracy": 0.11684375, "eval_anthropic_toxic_prompts_bleu_score": 3.1998125126454084, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12435613514143139, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6844603419303894, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007683176871450635, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1939547061920166, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.356, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.966, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722, "eval_anthropic_toxic_prompts_num_pred_words": 46.782, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.384671219993038, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22081209373945576, "eval_anthropic_toxic_prompts_runtime": 17.5877, "eval_anthropic_toxic_prompts_samples_per_second": 28.429, "eval_anthropic_toxic_prompts_steps_per_second": 0.057, "eval_anthropic_toxic_prompts_token_set_f1": 0.3585070034131307, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006516720530655714, "eval_anthropic_toxic_prompts_token_set_precision": 0.44482047006478886, "eval_anthropic_toxic_prompts_token_set_recall": 0.3269506017809585, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 188750 }, { "epoch": 36.24, "eval_arxiv_accuracy": 0.35125, "eval_arxiv_bleu_score": 4.439760727117616, "eval_arxiv_bleu_score_sem": 0.13083885801093512, "eval_arxiv_emb_cos_sim": 0.7760308980941772, "eval_arxiv_emb_cos_sim_sem": 0.007010127262135088, "eval_arxiv_emb_top1_equal": 0.328125, "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3531010150909424, "eval_arxiv_n_ngrams_match_1": 15.508, "eval_arxiv_n_ngrams_match_2": 3.024, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 40.642, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.591258238041224, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37046020399137486, "eval_arxiv_runtime": 10.3045, "eval_arxiv_samples_per_second": 48.522, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3615212256100413, "eval_arxiv_token_set_f1_sem": 0.004211571302865289, "eval_arxiv_token_set_precision": 0.3155036624832157, "eval_arxiv_token_set_recall": 0.4408505464993048, "eval_arxiv_true_num_tokens": 64.0, "step": 188750 }, { "epoch": 36.24, "eval_python_code_alpaca_accuracy": 0.161875, "eval_python_code_alpaca_bleu_score": 4.955579151969956, "eval_python_code_alpaca_bleu_score_sem": 0.16010913537652974, "eval_python_code_alpaca_emb_cos_sim": 0.771697998046875, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007489784739969381, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.858457088470459, "eval_python_code_alpaca_n_ngrams_match_1": 10.228, "eval_python_code_alpaca_n_ngrams_match_2": 3.092, "eval_python_code_alpaca_n_ngrams_match_3": 1.068, "eval_python_code_alpaca_num_pred_words": 43.306, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.434606118906725, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3494021093514086, "eval_python_code_alpaca_runtime": 10.4348, "eval_python_code_alpaca_samples_per_second": 47.917, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.4891469573994531, "eval_python_code_alpaca_token_set_f1_sem": 0.005171675634925169, "eval_python_code_alpaca_token_set_precision": 0.5624987652833927, "eval_python_code_alpaca_token_set_recall": 0.45214848305592037, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 188750 }, { "epoch": 36.24, "eval_wikibio_accuracy": 0.32853125, "eval_wikibio_bleu_score": 6.027908547181792, "eval_wikibio_bleu_score_sem": 0.22614174946973598, "eval_wikibio_emb_cos_sim": 0.7355372905731201, "eval_wikibio_emb_cos_sim_sem": 0.010186502768189854, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.676727294921875, "eval_wikibio_n_ngrams_match_1": 9.99, "eval_wikibio_n_ngrams_match_2": 3.356, "eval_wikibio_n_ngrams_match_3": 1.242, "eval_wikibio_num_pred_words": 35.632, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.51685520423793, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35418379515377074, "eval_wikibio_runtime": 9.997, "eval_wikibio_samples_per_second": 50.015, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3175005582880516, "eval_wikibio_token_set_f1_sem": 0.005811048717102578, "eval_wikibio_token_set_precision": 0.32546487710866784, "eval_wikibio_token_set_recall": 0.32806607835011986, "eval_wikibio_true_num_tokens": 61.1328125, "step": 188750 }, { "epoch": 36.24, "eval_nq_accuracy": 0.5355625, "eval_nq_bleu_score": 11.89094613002224, "eval_nq_bleu_score_sem": 0.4823002520918112, "eval_nq_emb_cos_sim": 0.8321588039398193, "eval_nq_emb_cos_sim_sem": 0.008446335125793457, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1361355781555176, "eval_nq_n_ngrams_match_1": 23.438, "eval_nq_n_ngrams_match_2": 8.616, "eval_nq_n_ngrams_match_3": 3.974, "eval_nq_num_pred_words": 49.038, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.466655599039921, "eval_nq_pred_num_tokens": 62.9921875, "eval_nq_rouge_score": 0.45389672735941045, "eval_nq_runtime": 10.3738, "eval_nq_samples_per_second": 48.198, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.467122751926627, "eval_nq_token_set_f1_sem": 0.005178249951963895, "eval_nq_token_set_precision": 0.4255444778971428, "eval_nq_token_set_recall": 0.5259872792433951, "eval_nq_true_num_tokens": 64.0, "step": 188750 }, { "epoch": 36.24, "learning_rate": 0.001, "loss": 2.5034, "step": 188760 }, { "epoch": 36.25, "learning_rate": 0.001, "loss": 2.4949, "step": 188772 }, { "epoch": 36.25, "learning_rate": 0.001, "loss": 2.5008, "step": 188784 }, { "epoch": 36.25, "learning_rate": 0.001, "loss": 2.5078, "step": 188796 }, { "epoch": 36.25, "learning_rate": 0.001, "loss": 2.5045, "step": 188808 }, { "epoch": 36.26, "learning_rate": 0.001, "loss": 2.5089, "step": 188820 }, { "epoch": 36.26, "learning_rate": 0.001, "loss": 2.5024, "step": 188832 }, { "epoch": 36.26, "learning_rate": 0.001, "loss": 2.5005, "step": 188844 }, { "epoch": 36.26, "learning_rate": 0.001, "loss": 2.5009, "step": 188856 }, { "epoch": 36.26, "learning_rate": 0.001, "loss": 2.5067, "step": 188868 }, { "epoch": 36.27, "learning_rate": 0.001, "loss": 2.4985, "step": 188880 }, { "epoch": 36.27, "learning_rate": 0.001, "loss": 2.5082, "step": 188892 }, { "epoch": 36.27, "learning_rate": 0.001, "loss": 2.4995, "step": 188904 }, { "epoch": 36.27, "learning_rate": 0.001, "loss": 2.5053, "step": 188916 }, { "epoch": 36.28, "learning_rate": 0.001, "loss": 2.5011, "step": 188928 }, { "epoch": 36.28, "learning_rate": 0.001, "loss": 2.503, "step": 188940 }, { "epoch": 36.28, "learning_rate": 0.001, "loss": 2.5032, "step": 188952 }, { "epoch": 36.28, "learning_rate": 0.001, "loss": 2.5133, "step": 188964 }, { "epoch": 36.29, "learning_rate": 0.001, "loss": 2.4994, "step": 188976 }, { "epoch": 36.29, "learning_rate": 0.001, "loss": 2.502, "step": 188988 }, { "epoch": 36.29, "learning_rate": 0.001, "loss": 2.4983, "step": 189000 }, { "epoch": 36.29, "learning_rate": 0.001, "loss": 2.4933, "step": 189012 }, { "epoch": 36.29, "learning_rate": 0.001, "loss": 2.5051, "step": 189024 }, { "epoch": 36.3, "learning_rate": 0.001, "loss": 2.5017, "step": 189036 }, { "epoch": 36.3, "learning_rate": 0.001, "loss": 2.5152, "step": 189048 }, { "epoch": 36.3, "learning_rate": 0.001, "loss": 2.4962, "step": 189060 }, { "epoch": 36.3, "learning_rate": 0.001, "loss": 2.4993, "step": 189072 }, { "epoch": 36.31, "learning_rate": 0.001, "loss": 2.5013, "step": 189084 }, { "epoch": 36.31, "learning_rate": 0.001, "loss": 2.5059, "step": 189096 }, { "epoch": 36.31, "learning_rate": 0.001, "loss": 2.5096, "step": 189108 }, { "epoch": 36.31, "learning_rate": 0.001, "loss": 2.5049, "step": 189120 }, { "epoch": 36.32, "learning_rate": 0.001, "loss": 2.5147, "step": 189132 }, { "epoch": 36.32, "learning_rate": 0.001, "loss": 2.4969, "step": 189144 }, { "epoch": 36.32, "learning_rate": 0.001, "loss": 2.5062, "step": 189156 }, { "epoch": 36.32, "learning_rate": 0.001, "loss": 2.4976, "step": 189168 }, { "epoch": 36.32, "learning_rate": 0.001, "loss": 2.5088, "step": 189180 }, { "epoch": 36.33, "learning_rate": 0.001, "loss": 2.508, "step": 189192 }, { "epoch": 36.33, "learning_rate": 0.001, "loss": 2.5101, "step": 189204 }, { "epoch": 36.33, "learning_rate": 0.001, "loss": 2.5071, "step": 189216 }, { "epoch": 36.33, "learning_rate": 0.001, "loss": 2.5042, "step": 189228 }, { "epoch": 36.34, "learning_rate": 0.001, "loss": 2.5048, "step": 189240 }, { "epoch": 36.34, "learning_rate": 0.001, "loss": 2.5105, "step": 189252 }, { "epoch": 36.34, "learning_rate": 0.001, "loss": 2.4981, "step": 189264 }, { "epoch": 36.34, "learning_rate": 0.001, "loss": 2.4955, "step": 189276 }, { "epoch": 36.35, "learning_rate": 0.001, "loss": 2.5062, "step": 189288 }, { "epoch": 36.35, "learning_rate": 0.001, "loss": 2.505, "step": 189300 }, { "epoch": 36.35, "learning_rate": 0.001, "loss": 2.5049, "step": 189312 }, { "epoch": 36.35, "learning_rate": 0.001, "loss": 2.5113, "step": 189324 }, { "epoch": 36.35, "learning_rate": 0.001, "loss": 2.5043, "step": 189336 }, { "epoch": 36.36, "learning_rate": 0.001, "loss": 2.5062, "step": 189348 }, { "epoch": 36.36, "learning_rate": 0.001, "loss": 2.5066, "step": 189360 }, { "epoch": 36.36, "learning_rate": 0.001, "loss": 2.512, "step": 189372 }, { "epoch": 36.36, "eval_ag_news_accuracy": 0.3275625, "eval_ag_news_bleu_score": 5.012270045746594, "eval_ag_news_bleu_score_sem": 0.1620868070547453, "eval_ag_news_emb_cos_sim": 0.8226636648178101, "eval_ag_news_emb_cos_sim_sem": 0.006504384836339599, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.477799654006958, "eval_ag_news_n_ngrams_match_1": 14.522, "eval_ag_news_n_ngrams_match_2": 3.214, "eval_ag_news_n_ngrams_match_3": 0.882, "eval_ag_news_num_pred_words": 47.096, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.38837797609544, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3610714619285975, "eval_ag_news_runtime": 10.3524, "eval_ag_news_samples_per_second": 48.298, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3594347006204614, "eval_ag_news_token_set_f1_sem": 0.004396998571959723, "eval_ag_news_token_set_precision": 0.3482525184071479, "eval_ag_news_token_set_recall": 0.38506998092606903, "eval_ag_news_true_num_tokens": 56.09375, "step": 189375 }, { "epoch": 36.36, "eval_anthropic_toxic_prompts_accuracy": 0.115875, "eval_anthropic_toxic_prompts_bleu_score": 3.1836737369434283, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12205984187178277, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6894845366477966, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00823496341215526, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.193580150604248, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.952, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746, "eval_anthropic_toxic_prompts_num_pred_words": 47.436, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.37553951540394, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21744424596820744, "eval_anthropic_toxic_prompts_runtime": 10.1426, "eval_anthropic_toxic_prompts_samples_per_second": 49.297, "eval_anthropic_toxic_prompts_steps_per_second": 0.099, "eval_anthropic_toxic_prompts_token_set_f1": 0.35701893695884623, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00641915069211616, "eval_anthropic_toxic_prompts_token_set_precision": 0.44348514081552026, "eval_anthropic_toxic_prompts_token_set_recall": 0.32330663031997403, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 189375 }, { "epoch": 36.36, "eval_arxiv_accuracy": 0.349625, "eval_arxiv_bleu_score": 4.359747482241014, "eval_arxiv_bleu_score_sem": 0.12076438492988241, "eval_arxiv_emb_cos_sim": 0.7828538417816162, "eval_arxiv_emb_cos_sim_sem": 0.00740250849487094, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.336054801940918, "eval_arxiv_n_ngrams_match_1": 15.296, "eval_arxiv_n_ngrams_match_2": 3.092, "eval_arxiv_n_ngrams_match_3": 0.674, "eval_arxiv_num_pred_words": 40.256, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.108015982521596, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36834990281488467, "eval_arxiv_runtime": 10.1811, "eval_arxiv_samples_per_second": 49.111, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.36213893966739696, "eval_arxiv_token_set_f1_sem": 0.004160231156175127, "eval_arxiv_token_set_precision": 0.313709864831142, "eval_arxiv_token_set_recall": 0.4500851986397599, "eval_arxiv_true_num_tokens": 64.0, "step": 189375 }, { "epoch": 36.36, "eval_python_code_alpaca_accuracy": 0.16225, "eval_python_code_alpaca_bleu_score": 4.654539109037991, "eval_python_code_alpaca_bleu_score_sem": 0.14086512537791676, "eval_python_code_alpaca_emb_cos_sim": 0.772710919380188, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006816303125456598, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8568575382232666, "eval_python_code_alpaca_n_ngrams_match_1": 10.14, "eval_python_code_alpaca_n_ngrams_match_2": 2.97, "eval_python_code_alpaca_n_ngrams_match_3": 1.01, "eval_python_code_alpaca_num_pred_words": 44.272, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.406740882243863, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34200476896428433, "eval_python_code_alpaca_runtime": 9.7461, "eval_python_code_alpaca_samples_per_second": 51.303, "eval_python_code_alpaca_steps_per_second": 0.103, "eval_python_code_alpaca_token_set_f1": 0.48530332167581197, "eval_python_code_alpaca_token_set_f1_sem": 0.005289122832711609, "eval_python_code_alpaca_token_set_precision": 0.5561826087093645, "eval_python_code_alpaca_token_set_recall": 0.45205733189991626, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 189375 }, { "epoch": 36.36, "eval_wikibio_accuracy": 0.3265625, "eval_wikibio_bleu_score": 6.145622930661852, "eval_wikibio_bleu_score_sem": 0.2320444008990757, "eval_wikibio_emb_cos_sim": 0.7508326768875122, "eval_wikibio_emb_cos_sim_sem": 0.009131730469000156, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.660064697265625, "eval_wikibio_n_ngrams_match_1": 10.158, "eval_wikibio_n_ngrams_match_2": 3.466, "eval_wikibio_n_ngrams_match_3": 1.3, "eval_wikibio_num_pred_words": 36.092, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.863857175288175, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36010979182625363, "eval_wikibio_runtime": 27.5499, "eval_wikibio_samples_per_second": 18.149, "eval_wikibio_steps_per_second": 0.036, "eval_wikibio_token_set_f1": 0.32075633576443446, "eval_wikibio_token_set_f1_sem": 0.005492914719916766, "eval_wikibio_token_set_precision": 0.32943771467184196, "eval_wikibio_token_set_recall": 0.326437197320436, "eval_wikibio_true_num_tokens": 61.1328125, "step": 189375 }, { "epoch": 36.36, "eval_nq_accuracy": 0.53471875, "eval_nq_bleu_score": 12.058839815591211, "eval_nq_bleu_score_sem": 0.4821687388836942, "eval_nq_emb_cos_sim": 0.8358081579208374, "eval_nq_emb_cos_sim_sem": 0.008123797806485237, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.135204315185547, "eval_nq_n_ngrams_match_1": 23.682, "eval_nq_n_ngrams_match_2": 8.72, "eval_nq_n_ngrams_match_3": 4.04, "eval_nq_num_pred_words": 49.268, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.458774586418219, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4580690173697595, "eval_nq_runtime": 27.5464, "eval_nq_samples_per_second": 18.151, "eval_nq_steps_per_second": 0.036, "eval_nq_token_set_f1": 0.4711447883237337, "eval_nq_token_set_f1_sem": 0.004979566751032423, "eval_nq_token_set_precision": 0.4315833516092539, "eval_nq_token_set_recall": 0.5267383484018845, "eval_nq_true_num_tokens": 64.0, "step": 189375 }, { "epoch": 36.36, "learning_rate": 0.001, "loss": 2.5116, "step": 189384 }, { "epoch": 36.37, "learning_rate": 0.001, "loss": 2.502, "step": 189396 }, { "epoch": 36.37, "learning_rate": 0.001, "loss": 2.5149, "step": 189408 }, { "epoch": 36.37, "learning_rate": 0.001, "loss": 2.5086, "step": 189420 }, { "epoch": 36.37, "learning_rate": 0.001, "loss": 2.5042, "step": 189432 }, { "epoch": 36.38, "learning_rate": 0.001, "loss": 2.5099, "step": 189444 }, { "epoch": 36.38, "learning_rate": 0.001, "loss": 2.5007, "step": 189456 }, { "epoch": 36.38, "learning_rate": 0.001, "loss": 2.5036, "step": 189468 }, { "epoch": 36.38, "learning_rate": 0.001, "loss": 2.5078, "step": 189480 }, { "epoch": 36.38, "learning_rate": 0.001, "loss": 2.4987, "step": 189492 }, { "epoch": 36.39, "learning_rate": 0.001, "loss": 2.4949, "step": 189504 }, { "epoch": 36.39, "learning_rate": 0.001, "loss": 2.5013, "step": 189516 }, { "epoch": 36.39, "learning_rate": 0.001, "loss": 2.5056, "step": 189528 }, { "epoch": 36.39, "learning_rate": 0.001, "loss": 2.5063, "step": 189540 }, { "epoch": 36.4, "learning_rate": 0.001, "loss": 2.4983, "step": 189552 }, { "epoch": 36.4, "learning_rate": 0.001, "loss": 2.5043, "step": 189564 }, { "epoch": 36.4, "learning_rate": 0.001, "loss": 2.5006, "step": 189576 }, { "epoch": 36.4, "learning_rate": 0.001, "loss": 2.4997, "step": 189588 }, { "epoch": 36.41, "learning_rate": 0.001, "loss": 2.5068, "step": 189600 }, { "epoch": 36.41, "learning_rate": 0.001, "loss": 2.4994, "step": 189612 }, { "epoch": 36.41, "learning_rate": 0.001, "loss": 2.5047, "step": 189624 }, { "epoch": 36.41, "learning_rate": 0.001, "loss": 2.4994, "step": 189636 }, { "epoch": 36.41, "learning_rate": 0.001, "loss": 2.5098, "step": 189648 }, { "epoch": 36.42, "learning_rate": 0.001, "loss": 2.5064, "step": 189660 }, { "epoch": 36.42, "learning_rate": 0.001, "loss": 2.5046, "step": 189672 }, { "epoch": 36.42, "learning_rate": 0.001, "loss": 2.5021, "step": 189684 }, { "epoch": 36.42, "learning_rate": 0.001, "loss": 2.4965, "step": 189696 }, { "epoch": 36.43, "learning_rate": 0.001, "loss": 2.5092, "step": 189708 }, { "epoch": 36.43, "learning_rate": 0.001, "loss": 2.5095, "step": 189720 }, { "epoch": 36.43, "learning_rate": 0.001, "loss": 2.5084, "step": 189732 }, { "epoch": 36.43, "learning_rate": 0.001, "loss": 2.501, "step": 189744 }, { "epoch": 36.44, "learning_rate": 0.001, "loss": 2.5107, "step": 189756 }, { "epoch": 36.44, "learning_rate": 0.001, "loss": 2.506, "step": 189768 }, { "epoch": 36.44, "learning_rate": 0.001, "loss": 2.5162, "step": 189780 }, { "epoch": 36.44, "learning_rate": 0.001, "loss": 2.5033, "step": 189792 }, { "epoch": 36.44, "learning_rate": 0.001, "loss": 2.5057, "step": 189804 }, { "epoch": 36.45, "learning_rate": 0.001, "loss": 2.502, "step": 189816 }, { "epoch": 36.45, "learning_rate": 0.001, "loss": 2.5014, "step": 189828 }, { "epoch": 36.45, "learning_rate": 0.001, "loss": 2.4973, "step": 189840 }, { "epoch": 36.45, "learning_rate": 0.001, "loss": 2.5021, "step": 189852 }, { "epoch": 36.46, "learning_rate": 0.001, "loss": 2.5073, "step": 189864 }, { "epoch": 36.46, "learning_rate": 0.001, "loss": 2.4972, "step": 189876 }, { "epoch": 36.46, "learning_rate": 0.001, "loss": 2.5064, "step": 189888 }, { "epoch": 36.46, "learning_rate": 0.001, "loss": 2.5015, "step": 189900 }, { "epoch": 36.47, "learning_rate": 0.001, "loss": 2.5137, "step": 189912 }, { "epoch": 36.47, "learning_rate": 0.001, "loss": 2.4966, "step": 189924 }, { "epoch": 36.47, "learning_rate": 0.001, "loss": 2.5043, "step": 189936 }, { "epoch": 36.47, "learning_rate": 0.001, "loss": 2.507, "step": 189948 }, { "epoch": 36.47, "learning_rate": 0.001, "loss": 2.5023, "step": 189960 }, { "epoch": 36.48, "learning_rate": 0.001, "loss": 2.5074, "step": 189972 }, { "epoch": 36.48, "learning_rate": 0.001, "loss": 2.5055, "step": 189984 }, { "epoch": 36.48, "learning_rate": 0.001, "loss": 2.5131, "step": 189996 }, { "epoch": 36.48, "eval_ag_news_accuracy": 0.32859375, "eval_ag_news_bleu_score": 4.889326613839696, "eval_ag_news_bleu_score_sem": 0.150296906940068, "eval_ag_news_emb_cos_sim": 0.8097676038742065, "eval_ag_news_emb_cos_sim_sem": 0.0081402693217189, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.480982780456543, "eval_ag_news_n_ngrams_match_1": 14.2, "eval_ag_news_n_ngrams_match_2": 3.198, "eval_ag_news_n_ngrams_match_3": 0.874, "eval_ag_news_num_pred_words": 46.69, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.491638537213106, "eval_ag_news_pred_num_tokens": 62.9921875, "eval_ag_news_rouge_score": 0.3529091680922551, "eval_ag_news_runtime": 10.4144, "eval_ag_news_samples_per_second": 48.011, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35523733325441376, "eval_ag_news_token_set_f1_sem": 0.004663478066243663, "eval_ag_news_token_set_precision": 0.34007220103033126, "eval_ag_news_token_set_recall": 0.38890395718823123, "eval_ag_news_true_num_tokens": 56.09375, "step": 190000 }, { "epoch": 36.48, "eval_anthropic_toxic_prompts_accuracy": 0.1159375, "eval_anthropic_toxic_prompts_bleu_score": 3.18309835174893, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12002750812927747, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6776454448699951, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008478981811232193, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1892497539520264, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.318, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.986, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.748, "eval_anthropic_toxic_prompts_num_pred_words": 47.496, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.27021198028778, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21491906455286766, "eval_anthropic_toxic_prompts_runtime": 9.9927, "eval_anthropic_toxic_prompts_samples_per_second": 50.036, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.36750045574153034, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006832478995432572, "eval_anthropic_toxic_prompts_token_set_precision": 0.4443156260470236, "eval_anthropic_toxic_prompts_token_set_recall": 0.3403265840379068, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 190000 }, { "epoch": 36.48, "eval_arxiv_accuracy": 0.3525625, "eval_arxiv_bleu_score": 4.503671129384424, "eval_arxiv_bleu_score_sem": 0.13285956255150483, "eval_arxiv_emb_cos_sim": 0.7726466655731201, "eval_arxiv_emb_cos_sim_sem": 0.00748045843264733, "eval_arxiv_emb_top1_equal": 0.34375, "eval_arxiv_emb_top1_equal_sem": 0.04214578430296913, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3361704349517822, "eval_arxiv_n_ngrams_match_1": 15.408, "eval_arxiv_n_ngrams_match_2": 3.086, "eval_arxiv_n_ngrams_match_3": 0.726, "eval_arxiv_num_pred_words": 40.762, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.111266384962363, "eval_arxiv_pred_num_tokens": 62.8984375, "eval_arxiv_rouge_score": 0.36849865990809466, "eval_arxiv_runtime": 10.474, "eval_arxiv_samples_per_second": 47.737, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3635061651643006, "eval_arxiv_token_set_f1_sem": 0.004016926991203057, "eval_arxiv_token_set_precision": 0.31478739769375236, "eval_arxiv_token_set_recall": 0.4511008585885091, "eval_arxiv_true_num_tokens": 64.0, "step": 190000 }, { "epoch": 36.48, "eval_python_code_alpaca_accuracy": 0.1620625, "eval_python_code_alpaca_bleu_score": 4.821750766070412, "eval_python_code_alpaca_bleu_score_sem": 0.14120862933157688, "eval_python_code_alpaca_emb_cos_sim": 0.7656441926956177, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007524264813318341, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8377509117126465, "eval_python_code_alpaca_n_ngrams_match_1": 10.196, "eval_python_code_alpaca_n_ngrams_match_2": 3.066, "eval_python_code_alpaca_n_ngrams_match_3": 1.084, "eval_python_code_alpaca_num_pred_words": 44.234, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.07731392608985, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34422328335384295, "eval_python_code_alpaca_runtime": 9.8998, "eval_python_code_alpaca_samples_per_second": 50.506, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.49656639966862104, "eval_python_code_alpaca_token_set_f1_sem": 0.0053701727727626936, "eval_python_code_alpaca_token_set_precision": 0.5613374215543115, "eval_python_code_alpaca_token_set_recall": 0.4661012946282913, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 190000 }, { "epoch": 36.48, "eval_wikibio_accuracy": 0.33040625, "eval_wikibio_bleu_score": 5.821449665588072, "eval_wikibio_bleu_score_sem": 0.20508171945609774, "eval_wikibio_emb_cos_sim": 0.7408549189567566, "eval_wikibio_emb_cos_sim_sem": 0.009548500239044638, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.640425443649292, "eval_wikibio_n_ngrams_match_1": 9.918, "eval_wikibio_n_ngrams_match_2": 3.336, "eval_wikibio_n_ngrams_match_3": 1.192, "eval_wikibio_num_pred_words": 35.55, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.10804610326765, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34884211846331337, "eval_wikibio_runtime": 10.2106, "eval_wikibio_samples_per_second": 48.969, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.3148316281668783, "eval_wikibio_token_set_f1_sem": 0.005733429946334979, "eval_wikibio_token_set_precision": 0.32222579695056164, "eval_wikibio_token_set_recall": 0.32237351169818707, "eval_wikibio_true_num_tokens": 61.1328125, "step": 190000 }, { "epoch": 36.48, "eval_nq_accuracy": 0.53625, "eval_nq_bleu_score": 12.089524617474838, "eval_nq_bleu_score_sem": 0.4878974414477028, "eval_nq_emb_cos_sim": 0.8411483764648438, "eval_nq_emb_cos_sim_sem": 0.006586143795550161, "eval_nq_emb_top1_equal": 0.34375, "eval_nq_emb_top1_equal_sem": 0.04214578430296913, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1337051391601562, "eval_nq_n_ngrams_match_1": 23.602, "eval_nq_n_ngrams_match_2": 8.768, "eval_nq_n_ngrams_match_3": 4.068, "eval_nq_num_pred_words": 49.082, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.44610289527519, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45755382537163897, "eval_nq_runtime": 11.0329, "eval_nq_samples_per_second": 45.319, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.471376407837602, "eval_nq_token_set_f1_sem": 0.004911739546606772, "eval_nq_token_set_precision": 0.4310105004624452, "eval_nq_token_set_recall": 0.5276862723635517, "eval_nq_true_num_tokens": 64.0, "step": 190000 }, { "epoch": 36.48, "learning_rate": 0.001, "loss": 2.5058, "step": 190008 }, { "epoch": 36.49, "learning_rate": 0.001, "loss": 2.5008, "step": 190020 }, { "epoch": 36.49, "learning_rate": 0.001, "loss": 2.4997, "step": 190032 }, { "epoch": 36.49, "learning_rate": 0.001, "loss": 2.5071, "step": 190044 }, { "epoch": 36.49, "learning_rate": 0.001, "loss": 2.4956, "step": 190056 }, { "epoch": 36.5, "learning_rate": 0.001, "loss": 2.4949, "step": 190068 }, { "epoch": 36.5, "learning_rate": 0.001, "loss": 2.4993, "step": 190080 }, { "epoch": 36.5, "learning_rate": 0.001, "loss": 2.5038, "step": 190092 }, { "epoch": 36.5, "learning_rate": 0.001, "loss": 2.5111, "step": 190104 }, { "epoch": 36.5, "learning_rate": 0.001, "loss": 2.5033, "step": 190116 }, { "epoch": 36.51, "learning_rate": 0.001, "loss": 2.5079, "step": 190128 }, { "epoch": 36.51, "learning_rate": 0.001, "loss": 2.5094, "step": 190140 }, { "epoch": 36.51, "learning_rate": 0.001, "loss": 2.5176, "step": 190152 }, { "epoch": 36.51, "learning_rate": 0.001, "loss": 2.5002, "step": 190164 }, { "epoch": 36.52, "learning_rate": 0.001, "loss": 2.5027, "step": 190176 }, { "epoch": 36.52, "learning_rate": 0.001, "loss": 2.5072, "step": 190188 }, { "epoch": 36.52, "learning_rate": 0.001, "loss": 2.5118, "step": 190200 }, { "epoch": 36.52, "learning_rate": 0.001, "loss": 2.5076, "step": 190212 }, { "epoch": 36.53, "learning_rate": 0.001, "loss": 2.5137, "step": 190224 }, { "epoch": 36.53, "learning_rate": 0.001, "loss": 2.5113, "step": 190236 }, { "epoch": 36.53, "learning_rate": 0.001, "loss": 2.4984, "step": 190248 }, { "epoch": 36.53, "learning_rate": 0.001, "loss": 2.5028, "step": 190260 }, { "epoch": 36.53, "learning_rate": 0.001, "loss": 2.4967, "step": 190272 }, { "epoch": 36.54, "learning_rate": 0.001, "loss": 2.4955, "step": 190284 }, { "epoch": 36.54, "learning_rate": 0.001, "loss": 2.5166, "step": 190296 }, { "epoch": 36.54, "learning_rate": 0.001, "loss": 2.511, "step": 190308 }, { "epoch": 36.54, "learning_rate": 0.001, "loss": 2.4976, "step": 190320 }, { "epoch": 36.55, "learning_rate": 0.001, "loss": 2.4976, "step": 190332 }, { "epoch": 36.55, "learning_rate": 0.001, "loss": 2.5017, "step": 190344 }, { "epoch": 36.55, "learning_rate": 0.001, "loss": 2.512, "step": 190356 }, { "epoch": 36.55, "learning_rate": 0.001, "loss": 2.499, "step": 190368 }, { "epoch": 36.56, "learning_rate": 0.001, "loss": 2.499, "step": 190380 }, { "epoch": 36.56, "learning_rate": 0.001, "loss": 2.5088, "step": 190392 }, { "epoch": 36.56, "learning_rate": 0.001, "loss": 2.5059, "step": 190404 }, { "epoch": 36.56, "learning_rate": 0.001, "loss": 2.4955, "step": 190416 }, { "epoch": 36.56, "learning_rate": 0.001, "loss": 2.5054, "step": 190428 }, { "epoch": 36.57, "learning_rate": 0.001, "loss": 2.5039, "step": 190440 }, { "epoch": 36.57, "learning_rate": 0.001, "loss": 2.4957, "step": 190452 }, { "epoch": 36.57, "learning_rate": 0.001, "loss": 2.5118, "step": 190464 }, { "epoch": 36.57, "learning_rate": 0.001, "loss": 2.4985, "step": 190476 }, { "epoch": 36.58, "learning_rate": 0.001, "loss": 2.5052, "step": 190488 }, { "epoch": 36.58, "learning_rate": 0.001, "loss": 2.5102, "step": 190500 }, { "epoch": 36.58, "learning_rate": 0.001, "loss": 2.5113, "step": 190512 }, { "epoch": 36.58, "learning_rate": 0.001, "loss": 2.4973, "step": 190524 }, { "epoch": 36.59, "learning_rate": 0.001, "loss": 2.5114, "step": 190536 }, { "epoch": 36.59, "learning_rate": 0.001, "loss": 2.4908, "step": 190548 }, { "epoch": 36.59, "learning_rate": 0.001, "loss": 2.502, "step": 190560 }, { "epoch": 36.59, "learning_rate": 0.001, "loss": 2.5057, "step": 190572 }, { "epoch": 36.59, "learning_rate": 0.001, "loss": 2.5068, "step": 190584 }, { "epoch": 36.6, "learning_rate": 0.001, "loss": 2.5031, "step": 190596 }, { "epoch": 36.6, "learning_rate": 0.001, "loss": 2.4883, "step": 190608 }, { "epoch": 36.6, "learning_rate": 0.001, "loss": 2.5021, "step": 190620 }, { "epoch": 36.6, "eval_ag_news_accuracy": 0.32790625, "eval_ag_news_bleu_score": 4.970183054805225, "eval_ag_news_bleu_score_sem": 0.15658988378414648, "eval_ag_news_emb_cos_sim": 0.8129646182060242, "eval_ag_news_emb_cos_sim_sem": 0.008132920623554321, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.488006830215454, "eval_ag_news_n_ngrams_match_1": 14.302, "eval_ag_news_n_ngrams_match_2": 3.23, "eval_ag_news_n_ngrams_match_3": 0.95, "eval_ag_news_num_pred_words": 46.01, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.720664827449426, "eval_ag_news_pred_num_tokens": 62.9921875, "eval_ag_news_rouge_score": 0.35718559401245165, "eval_ag_news_runtime": 10.6665, "eval_ag_news_samples_per_second": 46.876, "eval_ag_news_steps_per_second": 0.094, "eval_ag_news_token_set_f1": 0.35591973392216913, "eval_ag_news_token_set_f1_sem": 0.004579572732854059, "eval_ag_news_token_set_precision": 0.3409783392758831, "eval_ag_news_token_set_recall": 0.388332797829414, "eval_ag_news_true_num_tokens": 56.09375, "step": 190625 }, { "epoch": 36.6, "eval_anthropic_toxic_prompts_accuracy": 0.11740625, "eval_anthropic_toxic_prompts_bleu_score": 3.2230847961552382, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12441745931148382, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6794794797897339, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007912346408961574, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.187584161758423, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.202, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.932, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, "eval_anthropic_toxic_prompts_num_pred_words": 46.66, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.229821351172216, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21566638527432538, "eval_anthropic_toxic_prompts_runtime": 10.3769, "eval_anthropic_toxic_prompts_samples_per_second": 48.184, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.35942395714194436, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00662595840290432, "eval_anthropic_toxic_prompts_token_set_precision": 0.438351593603664, "eval_anthropic_toxic_prompts_token_set_recall": 0.3316988295536793, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 190625 }, { "epoch": 36.6, "eval_arxiv_accuracy": 0.3519375, "eval_arxiv_bleu_score": 4.529135521041015, "eval_arxiv_bleu_score_sem": 0.13090129068801212, "eval_arxiv_emb_cos_sim": 0.7814404368400574, "eval_arxiv_emb_cos_sim_sem": 0.006839354158869517, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3420345783233643, "eval_arxiv_n_ngrams_match_1": 15.472, "eval_arxiv_n_ngrams_match_2": 3.046, "eval_arxiv_n_ngrams_match_3": 0.722, "eval_arxiv_num_pred_words": 40.864, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.276599175202673, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3688866299810071, "eval_arxiv_runtime": 10.2496, "eval_arxiv_samples_per_second": 48.783, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.36288703756801005, "eval_arxiv_token_set_f1_sem": 0.004146852334698222, "eval_arxiv_token_set_precision": 0.3150340745764005, "eval_arxiv_token_set_recall": 0.44637937930033084, "eval_arxiv_true_num_tokens": 64.0, "step": 190625 }, { "epoch": 36.6, "eval_python_code_alpaca_accuracy": 0.1643125, "eval_python_code_alpaca_bleu_score": 4.845545085774176, "eval_python_code_alpaca_bleu_score_sem": 0.15143531997666987, "eval_python_code_alpaca_emb_cos_sim": 0.7690310478210449, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007414402467229616, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8400282859802246, "eval_python_code_alpaca_n_ngrams_match_1": 10.096, "eval_python_code_alpaca_n_ngrams_match_2": 3.102, "eval_python_code_alpaca_n_ngrams_match_3": 1.118, "eval_python_code_alpaca_num_pred_words": 44.636, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.11624968019859, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33805627720845466, "eval_python_code_alpaca_runtime": 10.1755, "eval_python_code_alpaca_samples_per_second": 49.138, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.48657765601849395, "eval_python_code_alpaca_token_set_f1_sem": 0.00544803390017607, "eval_python_code_alpaca_token_set_precision": 0.5525007636804988, "eval_python_code_alpaca_token_set_recall": 0.45726584468655856, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 190625 }, { "epoch": 36.6, "eval_wikibio_accuracy": 0.33078125, "eval_wikibio_bleu_score": 6.05024393516826, "eval_wikibio_bleu_score_sem": 0.22924731291614986, "eval_wikibio_emb_cos_sim": 0.7359403967857361, "eval_wikibio_emb_cos_sim_sem": 0.010322207717860837, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6080996990203857, "eval_wikibio_n_ngrams_match_1": 9.936, "eval_wikibio_n_ngrams_match_2": 3.42, "eval_wikibio_n_ngrams_match_3": 1.242, "eval_wikibio_num_pred_words": 35.516, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.89587289105412, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3531432564053888, "eval_wikibio_runtime": 10.6937, "eval_wikibio_samples_per_second": 46.756, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.31618956456395764, "eval_wikibio_token_set_f1_sem": 0.0057675730544714515, "eval_wikibio_token_set_precision": 0.3214490436287887, "eval_wikibio_token_set_recall": 0.331749318634022, "eval_wikibio_true_num_tokens": 61.1328125, "step": 190625 }, { "epoch": 36.6, "eval_nq_accuracy": 0.53615625, "eval_nq_bleu_score": 12.209789525909452, "eval_nq_bleu_score_sem": 0.49577106116458447, "eval_nq_emb_cos_sim": 0.8389476537704468, "eval_nq_emb_cos_sim_sem": 0.006790953113166311, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.127750873565674, "eval_nq_n_ngrams_match_1": 23.538, "eval_nq_n_ngrams_match_2": 8.668, "eval_nq_n_ngrams_match_3": 4.106, "eval_nq_num_pred_words": 49.03, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.39596197970087, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4577853482780898, "eval_nq_runtime": 10.7903, "eval_nq_samples_per_second": 46.338, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.4689916771684365, "eval_nq_token_set_f1_sem": 0.0048284255827953145, "eval_nq_token_set_precision": 0.4277415814122987, "eval_nq_token_set_recall": 0.5269846898422346, "eval_nq_true_num_tokens": 64.0, "step": 190625 }, { "epoch": 36.6, "learning_rate": 0.001, "loss": 2.5011, "step": 190632 }, { "epoch": 36.61, "learning_rate": 0.001, "loss": 2.505, "step": 190644 }, { "epoch": 36.61, "learning_rate": 0.001, "loss": 2.5075, "step": 190656 }, { "epoch": 36.61, "learning_rate": 0.001, "loss": 2.5063, "step": 190668 }, { "epoch": 36.61, "learning_rate": 0.001, "loss": 2.5009, "step": 190680 }, { "epoch": 36.62, "learning_rate": 0.001, "loss": 2.5102, "step": 190692 }, { "epoch": 36.62, "learning_rate": 0.001, "loss": 2.5031, "step": 190704 }, { "epoch": 36.62, "learning_rate": 0.001, "loss": 2.5028, "step": 190716 }, { "epoch": 36.62, "learning_rate": 0.001, "loss": 2.5047, "step": 190728 }, { "epoch": 36.62, "learning_rate": 0.001, "loss": 2.515, "step": 190740 }, { "epoch": 36.63, "learning_rate": 0.001, "loss": 2.506, "step": 190752 }, { "epoch": 36.63, "learning_rate": 0.001, "loss": 2.5084, "step": 190764 }, { "epoch": 36.63, "learning_rate": 0.001, "loss": 2.5072, "step": 190776 }, { "epoch": 36.63, "learning_rate": 0.001, "loss": 2.5179, "step": 190788 }, { "epoch": 36.64, "learning_rate": 0.001, "loss": 2.5095, "step": 190800 }, { "epoch": 36.64, "learning_rate": 0.001, "loss": 2.5059, "step": 190812 }, { "epoch": 36.64, "learning_rate": 0.001, "loss": 2.5039, "step": 190824 }, { "epoch": 36.64, "learning_rate": 0.001, "loss": 2.5009, "step": 190836 }, { "epoch": 36.65, "learning_rate": 0.001, "loss": 2.5004, "step": 190848 }, { "epoch": 36.65, "learning_rate": 0.001, "loss": 2.4976, "step": 190860 }, { "epoch": 36.65, "learning_rate": 0.001, "loss": 2.5066, "step": 190872 }, { "epoch": 36.65, "learning_rate": 0.001, "loss": 2.5023, "step": 190884 }, { "epoch": 36.65, "learning_rate": 0.001, "loss": 2.5059, "step": 190896 }, { "epoch": 36.66, "learning_rate": 0.001, "loss": 2.5148, "step": 190908 }, { "epoch": 36.66, "learning_rate": 0.001, "loss": 2.51, "step": 190920 }, { "epoch": 36.66, "learning_rate": 0.001, "loss": 2.5055, "step": 190932 }, { "epoch": 36.66, "learning_rate": 0.001, "loss": 2.4991, "step": 190944 }, { "epoch": 36.67, "learning_rate": 0.001, "loss": 2.5109, "step": 190956 }, { "epoch": 36.67, "learning_rate": 0.001, "loss": 2.5136, "step": 190968 }, { "epoch": 36.67, "learning_rate": 0.001, "loss": 2.5048, "step": 190980 }, { "epoch": 36.67, "learning_rate": 0.001, "loss": 2.5029, "step": 190992 }, { "epoch": 36.68, "learning_rate": 0.001, "loss": 2.5135, "step": 191004 }, { "epoch": 36.68, "learning_rate": 0.001, "loss": 2.5013, "step": 191016 }, { "epoch": 36.68, "learning_rate": 0.001, "loss": 2.498, "step": 191028 }, { "epoch": 36.68, "learning_rate": 0.001, "loss": 2.505, "step": 191040 }, { "epoch": 36.68, "learning_rate": 0.001, "loss": 2.4917, "step": 191052 }, { "epoch": 36.69, "learning_rate": 0.001, "loss": 2.5092, "step": 191064 }, { "epoch": 36.69, "learning_rate": 0.001, "loss": 2.5012, "step": 191076 }, { "epoch": 36.69, "learning_rate": 0.001, "loss": 2.5096, "step": 191088 }, { "epoch": 36.69, "learning_rate": 0.001, "loss": 2.5197, "step": 191100 }, { "epoch": 36.7, "learning_rate": 0.001, "loss": 2.5072, "step": 191112 }, { "epoch": 36.7, "learning_rate": 0.001, "loss": 2.5024, "step": 191124 }, { "epoch": 36.7, "learning_rate": 0.001, "loss": 2.4972, "step": 191136 }, { "epoch": 36.7, "learning_rate": 0.001, "loss": 2.5127, "step": 191148 }, { "epoch": 36.71, "learning_rate": 0.001, "loss": 2.5033, "step": 191160 }, { "epoch": 36.71, "learning_rate": 0.001, "loss": 2.5078, "step": 191172 }, { "epoch": 36.71, "learning_rate": 0.001, "loss": 2.5068, "step": 191184 }, { "epoch": 36.71, "learning_rate": 0.001, "loss": 2.5002, "step": 191196 }, { "epoch": 36.71, "learning_rate": 0.001, "loss": 2.5118, "step": 191208 }, { "epoch": 36.72, "learning_rate": 0.001, "loss": 2.5038, "step": 191220 }, { "epoch": 36.72, "learning_rate": 0.001, "loss": 2.5005, "step": 191232 }, { "epoch": 36.72, "learning_rate": 0.001, "loss": 2.509, "step": 191244 }, { "epoch": 36.72, "eval_ag_news_accuracy": 0.32865625, "eval_ag_news_bleu_score": 4.874845444214836, "eval_ag_news_bleu_score_sem": 0.15551308839001637, "eval_ag_news_emb_cos_sim": 0.821132481098175, "eval_ag_news_emb_cos_sim_sem": 0.006658413128400693, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4878122806549072, "eval_ag_news_n_ngrams_match_1": 14.42, "eval_ag_news_n_ngrams_match_2": 3.15, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.766, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.7142996556772, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3569458635557363, "eval_ag_news_runtime": 10.5645, "eval_ag_news_samples_per_second": 47.328, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.357107765394553, "eval_ag_news_token_set_f1_sem": 0.0044922367423741024, "eval_ag_news_token_set_precision": 0.34201986581189936, "eval_ag_news_token_set_recall": 0.38759391638912366, "eval_ag_news_true_num_tokens": 56.09375, "step": 191250 }, { "epoch": 36.72, "eval_anthropic_toxic_prompts_accuracy": 0.1169375, "eval_anthropic_toxic_prompts_bleu_score": 3.3454322364822757, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12618890293920984, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6851115822792053, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008640836246226153, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2210209369659424, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.26, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.028, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.794, "eval_anthropic_toxic_prompts_num_pred_words": 46.476, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.053685362417614, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21500217716272796, "eval_anthropic_toxic_prompts_runtime": 9.7647, "eval_anthropic_toxic_prompts_samples_per_second": 51.205, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3569594211281399, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00653108879760226, "eval_anthropic_toxic_prompts_token_set_precision": 0.4398992791319905, "eval_anthropic_toxic_prompts_token_set_recall": 0.32468329980141386, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 191250 }, { "epoch": 36.72, "eval_arxiv_accuracy": 0.35284375, "eval_arxiv_bleu_score": 4.456663465691136, "eval_arxiv_bleu_score_sem": 0.12454161831872101, "eval_arxiv_emb_cos_sim": 0.7780709266662598, "eval_arxiv_emb_cos_sim_sem": 0.006981997533476606, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3266918659210205, "eval_arxiv_n_ngrams_match_1": 15.616, "eval_arxiv_n_ngrams_match_2": 3.09, "eval_arxiv_n_ngrams_match_3": 0.654, "eval_arxiv_num_pred_words": 40.876, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.8460706296287, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3723113187218906, "eval_arxiv_runtime": 10.5588, "eval_arxiv_samples_per_second": 47.354, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.36602190801024914, "eval_arxiv_token_set_f1_sem": 0.004233314161744006, "eval_arxiv_token_set_precision": 0.31895854846175137, "eval_arxiv_token_set_recall": 0.4479607899479252, "eval_arxiv_true_num_tokens": 64.0, "step": 191250 }, { "epoch": 36.72, "eval_python_code_alpaca_accuracy": 0.162, "eval_python_code_alpaca_bleu_score": 4.869967117063928, "eval_python_code_alpaca_bleu_score_sem": 0.14995947754495137, "eval_python_code_alpaca_emb_cos_sim": 0.7709345817565918, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007327043904067676, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8595130443573, "eval_python_code_alpaca_n_ngrams_match_1": 9.962, "eval_python_code_alpaca_n_ngrams_match_2": 3.022, "eval_python_code_alpaca_n_ngrams_match_3": 1.068, "eval_python_code_alpaca_num_pred_words": 43.068, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.45302601746131, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34106871123170546, "eval_python_code_alpaca_runtime": 9.8012, "eval_python_code_alpaca_samples_per_second": 51.014, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.48344841239500697, "eval_python_code_alpaca_token_set_f1_sem": 0.005146199673156121, "eval_python_code_alpaca_token_set_precision": 0.545758760293622, "eval_python_code_alpaca_token_set_recall": 0.45612336885109833, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 191250 }, { "epoch": 36.72, "eval_wikibio_accuracy": 0.330875, "eval_wikibio_bleu_score": 5.8798543960203595, "eval_wikibio_bleu_score_sem": 0.22478571728584212, "eval_wikibio_emb_cos_sim": 0.7380439043045044, "eval_wikibio_emb_cos_sim_sem": 0.009970870271585008, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6549746990203857, "eval_wikibio_n_ngrams_match_1": 9.676, "eval_wikibio_n_ngrams_match_2": 3.294, "eval_wikibio_n_ngrams_match_3": 1.254, "eval_wikibio_num_pred_words": 35.27, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.66654280137412, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3423513361375374, "eval_wikibio_runtime": 10.6715, "eval_wikibio_samples_per_second": 46.854, "eval_wikibio_steps_per_second": 0.094, "eval_wikibio_token_set_f1": 0.3084683876421554, "eval_wikibio_token_set_f1_sem": 0.006050335069347697, "eval_wikibio_token_set_precision": 0.31470856913338124, "eval_wikibio_token_set_recall": 0.3248336981986287, "eval_wikibio_true_num_tokens": 61.1328125, "step": 191250 }, { "epoch": 36.72, "eval_nq_accuracy": 0.5354375, "eval_nq_bleu_score": 12.210826306975214, "eval_nq_bleu_score_sem": 0.48720291355776335, "eval_nq_emb_cos_sim": 0.8385281562805176, "eval_nq_emb_cos_sim_sem": 0.006453378588562813, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.132600784301758, "eval_nq_n_ngrams_match_1": 23.548, "eval_nq_n_ngrams_match_2": 8.74, "eval_nq_n_ngrams_match_3": 4.092, "eval_nq_num_pred_words": 49.204, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.436780549044897, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45753519315122376, "eval_nq_runtime": 10.3536, "eval_nq_samples_per_second": 48.292, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4719651167065373, "eval_nq_token_set_f1_sem": 0.004741203260339542, "eval_nq_token_set_precision": 0.4287682934356082, "eval_nq_token_set_recall": 0.5328991235947066, "eval_nq_true_num_tokens": 64.0, "step": 191250 }, { "epoch": 36.72, "learning_rate": 0.001, "loss": 2.505, "step": 191256 }, { "epoch": 36.73, "learning_rate": 0.001, "loss": 2.5077, "step": 191268 }, { "epoch": 36.73, "learning_rate": 0.001, "loss": 2.5083, "step": 191280 }, { "epoch": 36.73, "learning_rate": 0.001, "loss": 2.5048, "step": 191292 }, { "epoch": 36.73, "learning_rate": 0.001, "loss": 2.5143, "step": 191304 }, { "epoch": 36.74, "learning_rate": 0.001, "loss": 2.5018, "step": 191316 }, { "epoch": 36.74, "learning_rate": 0.001, "loss": 2.5047, "step": 191328 }, { "epoch": 36.74, "learning_rate": 0.001, "loss": 2.5131, "step": 191340 }, { "epoch": 36.74, "learning_rate": 0.001, "loss": 2.5055, "step": 191352 }, { "epoch": 36.74, "learning_rate": 0.001, "loss": 2.5002, "step": 191364 }, { "epoch": 36.75, "learning_rate": 0.001, "loss": 2.5028, "step": 191376 }, { "epoch": 36.75, "learning_rate": 0.001, "loss": 2.5138, "step": 191388 }, { "epoch": 36.75, "learning_rate": 0.001, "loss": 2.5111, "step": 191400 }, { "epoch": 36.75, "learning_rate": 0.001, "loss": 2.5118, "step": 191412 }, { "epoch": 36.76, "learning_rate": 0.001, "loss": 2.5062, "step": 191424 }, { "epoch": 36.76, "learning_rate": 0.001, "loss": 2.5085, "step": 191436 }, { "epoch": 36.76, "learning_rate": 0.001, "loss": 2.5141, "step": 191448 }, { "epoch": 36.76, "learning_rate": 0.001, "loss": 2.5111, "step": 191460 }, { "epoch": 36.76, "learning_rate": 0.001, "loss": 2.5034, "step": 191472 }, { "epoch": 36.77, "learning_rate": 0.001, "loss": 2.5162, "step": 191484 }, { "epoch": 36.77, "learning_rate": 0.001, "loss": 2.5075, "step": 191496 }, { "epoch": 36.77, "learning_rate": 0.001, "loss": 2.5053, "step": 191508 }, { "epoch": 36.77, "learning_rate": 0.001, "loss": 2.5001, "step": 191520 }, { "epoch": 36.78, "learning_rate": 0.001, "loss": 2.5056, "step": 191532 }, { "epoch": 36.78, "learning_rate": 0.001, "loss": 2.5048, "step": 191544 }, { "epoch": 36.78, "learning_rate": 0.001, "loss": 2.5063, "step": 191556 }, { "epoch": 36.78, "learning_rate": 0.001, "loss": 2.5027, "step": 191568 }, { "epoch": 36.79, "learning_rate": 0.001, "loss": 2.5108, "step": 191580 }, { "epoch": 36.79, "learning_rate": 0.001, "loss": 2.5013, "step": 191592 }, { "epoch": 36.79, "learning_rate": 0.001, "loss": 2.5128, "step": 191604 }, { "epoch": 36.79, "learning_rate": 0.001, "loss": 2.5106, "step": 191616 }, { "epoch": 36.79, "learning_rate": 0.001, "loss": 2.504, "step": 191628 }, { "epoch": 36.8, "learning_rate": 0.001, "loss": 2.5169, "step": 191640 }, { "epoch": 36.8, "learning_rate": 0.001, "loss": 2.5079, "step": 191652 }, { "epoch": 36.8, "learning_rate": 0.001, "loss": 2.5151, "step": 191664 }, { "epoch": 36.8, "learning_rate": 0.001, "loss": 2.498, "step": 191676 }, { "epoch": 36.81, "learning_rate": 0.001, "loss": 2.5005, "step": 191688 }, { "epoch": 36.81, "learning_rate": 0.001, "loss": 2.5118, "step": 191700 }, { "epoch": 36.81, "learning_rate": 0.001, "loss": 2.5143, "step": 191712 }, { "epoch": 36.81, "learning_rate": 0.001, "loss": 2.5119, "step": 191724 }, { "epoch": 36.82, "learning_rate": 0.001, "loss": 2.5034, "step": 191736 }, { "epoch": 36.82, "learning_rate": 0.001, "loss": 2.5022, "step": 191748 }, { "epoch": 36.82, "learning_rate": 0.001, "loss": 2.5034, "step": 191760 }, { "epoch": 36.82, "learning_rate": 0.001, "loss": 2.5021, "step": 191772 }, { "epoch": 36.82, "learning_rate": 0.001, "loss": 2.5109, "step": 191784 }, { "epoch": 36.83, "learning_rate": 0.001, "loss": 2.52, "step": 191796 }, { "epoch": 36.83, "learning_rate": 0.001, "loss": 2.5066, "step": 191808 }, { "epoch": 36.83, "learning_rate": 0.001, "loss": 2.5049, "step": 191820 }, { "epoch": 36.83, "learning_rate": 0.001, "loss": 2.4984, "step": 191832 }, { "epoch": 36.84, "learning_rate": 0.001, "loss": 2.5128, "step": 191844 }, { "epoch": 36.84, "learning_rate": 0.001, "loss": 2.5007, "step": 191856 }, { "epoch": 36.84, "learning_rate": 0.001, "loss": 2.5065, "step": 191868 }, { "epoch": 36.84, "eval_ag_news_accuracy": 0.32890625, "eval_ag_news_bleu_score": 4.961828104843212, "eval_ag_news_bleu_score_sem": 0.16032314557285152, "eval_ag_news_emb_cos_sim": 0.8242726922035217, "eval_ag_news_emb_cos_sim_sem": 0.006349109260980386, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4847490787506104, "eval_ag_news_n_ngrams_match_1": 14.4, "eval_ag_news_n_ngrams_match_2": 3.188, "eval_ag_news_n_ngrams_match_3": 0.904, "eval_ag_news_num_pred_words": 46.562, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.614242476583044, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3599661587819324, "eval_ag_news_runtime": 11.0218, "eval_ag_news_samples_per_second": 45.365, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.3626752166450062, "eval_ag_news_token_set_f1_sem": 0.004522312777982414, "eval_ag_news_token_set_precision": 0.34749341850621096, "eval_ag_news_token_set_recall": 0.3957483171577414, "eval_ag_news_true_num_tokens": 56.09375, "step": 191875 }, { "epoch": 36.84, "eval_anthropic_toxic_prompts_accuracy": 0.11709375, "eval_anthropic_toxic_prompts_bleu_score": 3.281347898377915, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12580742555756105, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6784017086029053, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00871333211838743, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.197878360748291, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.29, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.006, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778, "eval_anthropic_toxic_prompts_num_pred_words": 46.95, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.48053619386211, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2166390113462608, "eval_anthropic_toxic_prompts_runtime": 9.9694, "eval_anthropic_toxic_prompts_samples_per_second": 50.154, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.35545920993497343, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006452949749060365, "eval_anthropic_toxic_prompts_token_set_precision": 0.4444899045629664, "eval_anthropic_toxic_prompts_token_set_recall": 0.32280364009316653, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 191875 }, { "epoch": 36.84, "eval_arxiv_accuracy": 0.35175, "eval_arxiv_bleu_score": 4.676512130556084, "eval_arxiv_bleu_score_sem": 0.14350609871824133, "eval_arxiv_emb_cos_sim": 0.7798517942428589, "eval_arxiv_emb_cos_sim_sem": 0.007022511191607703, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.338822603225708, "eval_arxiv_n_ngrams_match_1": 15.576, "eval_arxiv_n_ngrams_match_2": 3.118, "eval_arxiv_n_ngrams_match_3": 0.77, "eval_arxiv_num_pred_words": 40.468, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.18592114854607, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.372831543507237, "eval_arxiv_runtime": 10.2924, "eval_arxiv_samples_per_second": 48.579, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3666216839205324, "eval_arxiv_token_set_f1_sem": 0.004222766437259845, "eval_arxiv_token_set_precision": 0.31763263029430083, "eval_arxiv_token_set_recall": 0.4488276679301581, "eval_arxiv_true_num_tokens": 64.0, "step": 191875 }, { "epoch": 36.84, "eval_python_code_alpaca_accuracy": 0.16040625, "eval_python_code_alpaca_bleu_score": 4.555213771220985, "eval_python_code_alpaca_bleu_score_sem": 0.144761155222261, "eval_python_code_alpaca_emb_cos_sim": 0.7632350921630859, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008651484252375725, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8670387268066406, "eval_python_code_alpaca_n_ngrams_match_1": 9.852, "eval_python_code_alpaca_n_ngrams_match_2": 2.806, "eval_python_code_alpaca_n_ngrams_match_3": 0.902, "eval_python_code_alpaca_num_pred_words": 42.264, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.58486742508677, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34191790035151476, "eval_python_code_alpaca_runtime": 9.9994, "eval_python_code_alpaca_samples_per_second": 50.003, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.4770770858748362, "eval_python_code_alpaca_token_set_f1_sem": 0.005352187216895897, "eval_python_code_alpaca_token_set_precision": 0.5363579753651608, "eval_python_code_alpaca_token_set_recall": 0.45543513079432774, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 191875 }, { "epoch": 36.84, "eval_wikibio_accuracy": 0.3295625, "eval_wikibio_bleu_score": 6.005917623987676, "eval_wikibio_bleu_score_sem": 0.2255834478810922, "eval_wikibio_emb_cos_sim": 0.7490946054458618, "eval_wikibio_emb_cos_sim_sem": 0.008131074064753937, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.64663028717041, "eval_wikibio_n_ngrams_match_1": 10.054, "eval_wikibio_n_ngrams_match_2": 3.356, "eval_wikibio_n_ngrams_match_3": 1.23, "eval_wikibio_num_pred_words": 35.918, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.34523566731452, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3558271770968173, "eval_wikibio_runtime": 9.9332, "eval_wikibio_samples_per_second": 50.336, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.321376023951464, "eval_wikibio_token_set_f1_sem": 0.005481143993179589, "eval_wikibio_token_set_precision": 0.327372122150575, "eval_wikibio_token_set_recall": 0.33075503118179234, "eval_wikibio_true_num_tokens": 61.1328125, "step": 191875 }, { "epoch": 36.84, "eval_nq_accuracy": 0.53565625, "eval_nq_bleu_score": 12.075373742611097, "eval_nq_bleu_score_sem": 0.48848954483686985, "eval_nq_emb_cos_sim": 0.8354136347770691, "eval_nq_emb_cos_sim_sem": 0.00677859091566246, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.132789373397827, "eval_nq_n_ngrams_match_1": 23.636, "eval_nq_n_ngrams_match_2": 8.752, "eval_nq_n_ngrams_match_3": 4.016, "eval_nq_num_pred_words": 49.436, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.438371783902433, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4573583033802422, "eval_nq_runtime": 10.7189, "eval_nq_samples_per_second": 46.647, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.47062080889447266, "eval_nq_token_set_f1_sem": 0.004858305314723143, "eval_nq_token_set_precision": 0.4281424166269732, "eval_nq_token_set_recall": 0.5292878656494459, "eval_nq_true_num_tokens": 64.0, "step": 191875 }, { "epoch": 36.84, "learning_rate": 0.001, "loss": 2.508, "step": 191880 }, { "epoch": 36.85, "learning_rate": 0.001, "loss": 2.5093, "step": 191892 }, { "epoch": 36.85, "learning_rate": 0.001, "loss": 2.5079, "step": 191904 }, { "epoch": 36.85, "learning_rate": 0.001, "loss": 2.505, "step": 191916 }, { "epoch": 36.85, "learning_rate": 0.001, "loss": 2.5022, "step": 191928 }, { "epoch": 36.85, "learning_rate": 0.001, "loss": 2.5181, "step": 191940 }, { "epoch": 36.86, "learning_rate": 0.001, "loss": 2.511, "step": 191952 }, { "epoch": 36.86, "learning_rate": 0.001, "loss": 2.5123, "step": 191964 }, { "epoch": 36.86, "learning_rate": 0.001, "loss": 2.5161, "step": 191976 }, { "epoch": 36.86, "learning_rate": 0.001, "loss": 2.5117, "step": 191988 }, { "epoch": 36.87, "learning_rate": 0.001, "loss": 2.5234, "step": 192000 }, { "epoch": 36.87, "learning_rate": 0.001, "loss": 2.5095, "step": 192012 }, { "epoch": 36.87, "learning_rate": 0.001, "loss": 2.5126, "step": 192024 }, { "epoch": 36.87, "learning_rate": 0.001, "loss": 2.5045, "step": 192036 }, { "epoch": 36.88, "learning_rate": 0.001, "loss": 2.5025, "step": 192048 }, { "epoch": 36.88, "learning_rate": 0.001, "loss": 2.4989, "step": 192060 }, { "epoch": 36.88, "learning_rate": 0.001, "loss": 2.5082, "step": 192072 }, { "epoch": 36.88, "learning_rate": 0.001, "loss": 2.511, "step": 192084 }, { "epoch": 36.88, "learning_rate": 0.001, "loss": 2.5068, "step": 192096 }, { "epoch": 36.89, "learning_rate": 0.001, "loss": 2.5091, "step": 192108 }, { "epoch": 36.89, "learning_rate": 0.001, "loss": 2.5031, "step": 192120 }, { "epoch": 36.89, "learning_rate": 0.001, "loss": 2.5153, "step": 192132 }, { "epoch": 36.89, "learning_rate": 0.001, "loss": 2.5099, "step": 192144 }, { "epoch": 36.9, "learning_rate": 0.001, "loss": 2.5003, "step": 192156 }, { "epoch": 36.9, "learning_rate": 0.001, "loss": 2.5003, "step": 192168 }, { "epoch": 36.9, "learning_rate": 0.001, "loss": 2.5132, "step": 192180 }, { "epoch": 36.9, "learning_rate": 0.001, "loss": 2.5047, "step": 192192 }, { "epoch": 36.91, "learning_rate": 0.001, "loss": 2.5117, "step": 192204 }, { "epoch": 36.91, "learning_rate": 0.001, "loss": 2.507, "step": 192216 }, { "epoch": 36.91, "learning_rate": 0.001, "loss": 2.5137, "step": 192228 }, { "epoch": 36.91, "learning_rate": 0.001, "loss": 2.5131, "step": 192240 }, { "epoch": 36.91, "learning_rate": 0.001, "loss": 2.5155, "step": 192252 }, { "epoch": 36.92, "learning_rate": 0.001, "loss": 2.5074, "step": 192264 }, { "epoch": 36.92, "learning_rate": 0.001, "loss": 2.514, "step": 192276 }, { "epoch": 36.92, "learning_rate": 0.001, "loss": 2.5034, "step": 192288 }, { "epoch": 36.92, "learning_rate": 0.001, "loss": 2.5162, "step": 192300 }, { "epoch": 36.93, "learning_rate": 0.001, "loss": 2.5128, "step": 192312 }, { "epoch": 36.93, "learning_rate": 0.001, "loss": 2.5069, "step": 192324 }, { "epoch": 36.93, "learning_rate": 0.001, "loss": 2.51, "step": 192336 }, { "epoch": 36.93, "learning_rate": 0.001, "loss": 2.5122, "step": 192348 }, { "epoch": 36.94, "learning_rate": 0.001, "loss": 2.506, "step": 192360 }, { "epoch": 36.94, "learning_rate": 0.001, "loss": 2.5094, "step": 192372 }, { "epoch": 36.94, "learning_rate": 0.001, "loss": 2.512, "step": 192384 }, { "epoch": 36.94, "learning_rate": 0.001, "loss": 2.5145, "step": 192396 }, { "epoch": 36.94, "learning_rate": 0.001, "loss": 2.5103, "step": 192408 }, { "epoch": 36.95, "learning_rate": 0.001, "loss": 2.5004, "step": 192420 }, { "epoch": 36.95, "learning_rate": 0.001, "loss": 2.5048, "step": 192432 }, { "epoch": 36.95, "learning_rate": 0.001, "loss": 2.5177, "step": 192444 }, { "epoch": 36.95, "learning_rate": 0.001, "loss": 2.5056, "step": 192456 }, { "epoch": 36.96, "learning_rate": 0.001, "loss": 2.5038, "step": 192468 }, { "epoch": 36.96, "learning_rate": 0.001, "loss": 2.5011, "step": 192480 }, { "epoch": 36.96, "learning_rate": 0.001, "loss": 2.4996, "step": 192492 }, { "epoch": 36.96, "eval_ag_news_accuracy": 0.32921875, "eval_ag_news_bleu_score": 4.94957534953809, "eval_ag_news_bleu_score_sem": 0.14853514945693413, "eval_ag_news_emb_cos_sim": 0.820341944694519, "eval_ag_news_emb_cos_sim_sem": 0.006971182257022006, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.48870587348938, "eval_ag_news_n_ngrams_match_1": 14.304, "eval_ag_news_n_ngrams_match_2": 3.224, "eval_ag_news_n_ngrams_match_3": 0.91, "eval_ag_news_num_pred_words": 46.654, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.743545984643184, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.358820998943969, "eval_ag_news_runtime": 14.5588, "eval_ag_news_samples_per_second": 34.343, "eval_ag_news_steps_per_second": 0.069, "eval_ag_news_token_set_f1": 0.3587045411243008, "eval_ag_news_token_set_f1_sem": 0.004347115915121873, "eval_ag_news_token_set_precision": 0.3433912714335185, "eval_ag_news_token_set_recall": 0.39036720805435543, "eval_ag_news_true_num_tokens": 56.09375, "step": 192500 }, { "epoch": 36.96, "eval_anthropic_toxic_prompts_accuracy": 0.1160625, "eval_anthropic_toxic_prompts_bleu_score": 3.183696577411922, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12454425373907795, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6816329956054688, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008621962360627369, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2554514408111572, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.162, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.92, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, "eval_anthropic_toxic_prompts_num_pred_words": 46.512, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.931318347399888, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21238038842071585, "eval_anthropic_toxic_prompts_runtime": 9.9481, "eval_anthropic_toxic_prompts_samples_per_second": 50.261, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.35923065859600295, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006756317873980588, "eval_anthropic_toxic_prompts_token_set_precision": 0.4337587112924653, "eval_anthropic_toxic_prompts_token_set_recall": 0.3327786501825931, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 192500 }, { "epoch": 36.96, "eval_arxiv_accuracy": 0.34921875, "eval_arxiv_bleu_score": 4.583263205881023, "eval_arxiv_bleu_score_sem": 0.13138340016539993, "eval_arxiv_emb_cos_sim": 0.7842831611633301, "eval_arxiv_emb_cos_sim_sem": 0.006627923176225601, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3556504249572754, "eval_arxiv_n_ngrams_match_1": 15.618, "eval_arxiv_n_ngrams_match_2": 3.182, "eval_arxiv_n_ngrams_match_3": 0.728, "eval_arxiv_num_pred_words": 40.422, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.664242067201428, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37410331590847146, "eval_arxiv_runtime": 10.473, "eval_arxiv_samples_per_second": 47.742, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.3652666730821766, "eval_arxiv_token_set_f1_sem": 0.004156405458533003, "eval_arxiv_token_set_precision": 0.31611131200351805, "eval_arxiv_token_set_recall": 0.44949166081900477, "eval_arxiv_true_num_tokens": 64.0, "step": 192500 }, { "epoch": 36.96, "eval_python_code_alpaca_accuracy": 0.1601875, "eval_python_code_alpaca_bleu_score": 4.558052190681162, "eval_python_code_alpaca_bleu_score_sem": 0.14585363179511066, "eval_python_code_alpaca_emb_cos_sim": 0.7560789585113525, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008701819443298876, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8719370365142822, "eval_python_code_alpaca_n_ngrams_match_1": 9.85, "eval_python_code_alpaca_n_ngrams_match_2": 2.92, "eval_python_code_alpaca_n_ngrams_match_3": 1.002, "eval_python_code_alpaca_num_pred_words": 44.196, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.671214857488266, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3326628578833356, "eval_python_code_alpaca_runtime": 10.3863, "eval_python_code_alpaca_samples_per_second": 48.14, "eval_python_code_alpaca_steps_per_second": 0.096, "eval_python_code_alpaca_token_set_f1": 0.48301183568944633, "eval_python_code_alpaca_token_set_f1_sem": 0.005631118753790701, "eval_python_code_alpaca_token_set_precision": 0.540086277860109, "eval_python_code_alpaca_token_set_recall": 0.4636515897902906, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 192500 }, { "epoch": 36.96, "eval_wikibio_accuracy": 0.3309375, "eval_wikibio_bleu_score": 5.91978715623965, "eval_wikibio_bleu_score_sem": 0.2084940686323091, "eval_wikibio_emb_cos_sim": 0.7401781678199768, "eval_wikibio_emb_cos_sim_sem": 0.00995853309877246, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6312167644500732, "eval_wikibio_n_ngrams_match_1": 9.746, "eval_wikibio_n_ngrams_match_2": 3.302, "eval_wikibio_n_ngrams_match_3": 1.168, "eval_wikibio_num_pred_words": 35.224, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.75873216028906, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35089385192024114, "eval_wikibio_runtime": 9.9671, "eval_wikibio_samples_per_second": 50.165, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3145718034888637, "eval_wikibio_token_set_f1_sem": 0.0055858330471781565, "eval_wikibio_token_set_precision": 0.31818262148560844, "eval_wikibio_token_set_recall": 0.32801109531710293, "eval_wikibio_true_num_tokens": 61.1328125, "step": 192500 }, { "epoch": 36.96, "eval_nq_accuracy": 0.5361875, "eval_nq_bleu_score": 12.269416750995518, "eval_nq_bleu_score_sem": 0.4863028889827014, "eval_nq_emb_cos_sim": 0.8320356607437134, "eval_nq_emb_cos_sim_sem": 0.00702092541643248, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1297690868377686, "eval_nq_n_ngrams_match_1": 23.412, "eval_nq_n_ngrams_match_2": 8.748, "eval_nq_n_ngrams_match_3": 4.16, "eval_nq_num_pred_words": 49.11, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.412923932261764, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45498159980778147, "eval_nq_runtime": 10.1997, "eval_nq_samples_per_second": 49.021, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.468365486828502, "eval_nq_token_set_f1_sem": 0.0048862762311763, "eval_nq_token_set_precision": 0.4252104214006085, "eval_nq_token_set_recall": 0.5301342461670362, "eval_nq_true_num_tokens": 64.0, "step": 192500 }, { "epoch": 36.96, "learning_rate": 0.001, "loss": 2.504, "step": 192504 }, { "epoch": 36.97, "learning_rate": 0.001, "loss": 2.5135, "step": 192516 }, { "epoch": 36.97, "learning_rate": 0.001, "loss": 2.4947, "step": 192528 }, { "epoch": 36.97, "learning_rate": 0.001, "loss": 2.5163, "step": 192540 }, { "epoch": 36.97, "learning_rate": 0.001, "loss": 2.5147, "step": 192552 }, { "epoch": 36.97, "learning_rate": 0.001, "loss": 2.5024, "step": 192564 }, { "epoch": 36.98, "learning_rate": 0.001, "loss": 2.5028, "step": 192576 }, { "epoch": 36.98, "learning_rate": 0.001, "loss": 2.4993, "step": 192588 }, { "epoch": 36.98, "learning_rate": 0.001, "loss": 2.5014, "step": 192600 }, { "epoch": 36.98, "learning_rate": 0.001, "loss": 2.5027, "step": 192612 }, { "epoch": 36.99, "learning_rate": 0.001, "loss": 2.5029, "step": 192624 }, { "epoch": 36.99, "learning_rate": 0.001, "loss": 2.5065, "step": 192636 }, { "epoch": 36.99, "learning_rate": 0.001, "loss": 2.4974, "step": 192648 }, { "epoch": 36.99, "learning_rate": 0.001, "loss": 2.5083, "step": 192660 }, { "epoch": 37.0, "learning_rate": 0.001, "loss": 2.5048, "step": 192672 }, { "epoch": 37.0, "learning_rate": 0.001, "loss": 2.5144, "step": 192684 }, { "epoch": 37.0, "learning_rate": 0.001, "loss": 2.5012, "step": 192696 }, { "epoch": 37.0, "learning_rate": 0.001, "loss": 2.4909, "step": 192708 }, { "epoch": 37.0, "learning_rate": 0.001, "loss": 2.5, "step": 192720 }, { "epoch": 37.01, "learning_rate": 0.001, "loss": 2.4901, "step": 192732 }, { "epoch": 37.01, "learning_rate": 0.001, "loss": 2.4916, "step": 192744 }, { "epoch": 37.01, "learning_rate": 0.001, "loss": 2.4899, "step": 192756 }, { "epoch": 37.01, "learning_rate": 0.001, "loss": 2.4933, "step": 192768 }, { "epoch": 37.02, "learning_rate": 0.001, "loss": 2.4994, "step": 192780 }, { "epoch": 37.02, "learning_rate": 0.001, "loss": 2.4842, "step": 192792 }, { "epoch": 37.02, "learning_rate": 0.001, "loss": 2.4923, "step": 192804 }, { "epoch": 37.02, "learning_rate": 0.001, "loss": 2.4962, "step": 192816 }, { "epoch": 37.03, "learning_rate": 0.001, "loss": 2.5029, "step": 192828 }, { "epoch": 37.03, "learning_rate": 0.001, "loss": 2.4979, "step": 192840 }, { "epoch": 37.03, "learning_rate": 0.001, "loss": 2.4978, "step": 192852 }, { "epoch": 37.03, "learning_rate": 0.001, "loss": 2.4921, "step": 192864 }, { "epoch": 37.03, "learning_rate": 0.001, "loss": 2.508, "step": 192876 }, { "epoch": 37.04, "learning_rate": 0.001, "loss": 2.5015, "step": 192888 }, { "epoch": 37.04, "learning_rate": 0.001, "loss": 2.5003, "step": 192900 }, { "epoch": 37.04, "learning_rate": 0.001, "loss": 2.4948, "step": 192912 }, { "epoch": 37.04, "learning_rate": 0.001, "loss": 2.4893, "step": 192924 }, { "epoch": 37.05, "learning_rate": 0.001, "loss": 2.501, "step": 192936 }, { "epoch": 37.05, "learning_rate": 0.001, "loss": 2.4901, "step": 192948 }, { "epoch": 37.05, "learning_rate": 0.001, "loss": 2.4907, "step": 192960 }, { "epoch": 37.05, "learning_rate": 0.001, "loss": 2.5089, "step": 192972 }, { "epoch": 37.06, "learning_rate": 0.001, "loss": 2.4941, "step": 192984 }, { "epoch": 37.06, "learning_rate": 0.001, "loss": 2.5054, "step": 192996 }, { "epoch": 37.06, "learning_rate": 0.001, "loss": 2.4938, "step": 193008 }, { "epoch": 37.06, "learning_rate": 0.001, "loss": 2.4981, "step": 193020 }, { "epoch": 37.06, "learning_rate": 0.001, "loss": 2.4941, "step": 193032 }, { "epoch": 37.07, "learning_rate": 0.001, "loss": 2.5013, "step": 193044 }, { "epoch": 37.07, "learning_rate": 0.001, "loss": 2.4967, "step": 193056 }, { "epoch": 37.07, "learning_rate": 0.001, "loss": 2.4952, "step": 193068 }, { "epoch": 37.07, "learning_rate": 0.001, "loss": 2.4983, "step": 193080 }, { "epoch": 37.08, "learning_rate": 0.001, "loss": 2.5001, "step": 193092 }, { "epoch": 37.08, "learning_rate": 0.001, "loss": 2.4846, "step": 193104 }, { "epoch": 37.08, "learning_rate": 0.001, "loss": 2.4908, "step": 193116 }, { "epoch": 37.08, "eval_ag_news_accuracy": 0.3293125, "eval_ag_news_bleu_score": 5.0472875870568314, "eval_ag_news_bleu_score_sem": 0.16974254125289642, "eval_ag_news_emb_cos_sim": 0.8192209005355835, "eval_ag_news_emb_cos_sim_sem": 0.006446453334516867, "eval_ag_news_emb_top1_equal": 0.3203125, "eval_ag_news_emb_top1_equal_sem": 0.041403754790620424, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4924087524414062, "eval_ag_news_n_ngrams_match_1": 14.496, "eval_ag_news_n_ngrams_match_2": 3.232, "eval_ag_news_n_ngrams_match_3": 0.908, "eval_ag_news_num_pred_words": 46.624, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.865016127709566, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36331507324856993, "eval_ag_news_runtime": 10.2781, "eval_ag_news_samples_per_second": 48.647, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.36136002826625707, "eval_ag_news_token_set_f1_sem": 0.004416313646991193, "eval_ag_news_token_set_precision": 0.3492121929628191, "eval_ag_news_token_set_recall": 0.38782790738673356, "eval_ag_news_true_num_tokens": 56.09375, "step": 193125 }, { "epoch": 37.08, "eval_anthropic_toxic_prompts_accuracy": 0.11675, "eval_anthropic_toxic_prompts_bleu_score": 3.183582336003102, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12301005787103086, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6879515647888184, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008261988761673854, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.223449230194092, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.4, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.722, "eval_anthropic_toxic_prompts_num_pred_words": 47.276, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.114596982629802, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21872745927530302, "eval_anthropic_toxic_prompts_runtime": 10.2158, "eval_anthropic_toxic_prompts_samples_per_second": 48.944, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.3579716251366683, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00650853606887439, "eval_anthropic_toxic_prompts_token_set_precision": 0.45274270629613617, "eval_anthropic_toxic_prompts_token_set_recall": 0.3200028890383885, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 193125 }, { "epoch": 37.08, "eval_arxiv_accuracy": 0.34984375, "eval_arxiv_bleu_score": 4.399790539622733, "eval_arxiv_bleu_score_sem": 0.12277834051173142, "eval_arxiv_emb_cos_sim": 0.7755804061889648, "eval_arxiv_emb_cos_sim_sem": 0.006803201382470282, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.36305570602417, "eval_arxiv_n_ngrams_match_1": 15.482, "eval_arxiv_n_ngrams_match_2": 3.068, "eval_arxiv_n_ngrams_match_3": 0.656, "eval_arxiv_num_pred_words": 40.658, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.877296727472785, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3717343898829476, "eval_arxiv_runtime": 10.1065, "eval_arxiv_samples_per_second": 49.473, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.362411897609555, "eval_arxiv_token_set_f1_sem": 0.004161236094802209, "eval_arxiv_token_set_precision": 0.31616375213373865, "eval_arxiv_token_set_recall": 0.44116641020039854, "eval_arxiv_true_num_tokens": 64.0, "step": 193125 }, { "epoch": 37.08, "eval_python_code_alpaca_accuracy": 0.16078125, "eval_python_code_alpaca_bleu_score": 4.781514928853036, "eval_python_code_alpaca_bleu_score_sem": 0.14685525073175398, "eval_python_code_alpaca_emb_cos_sim": 0.7665096521377563, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007397083405214178, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8710124492645264, "eval_python_code_alpaca_n_ngrams_match_1": 10.064, "eval_python_code_alpaca_n_ngrams_match_2": 3.07, "eval_python_code_alpaca_n_ngrams_match_3": 1.054, "eval_python_code_alpaca_num_pred_words": 43.626, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.65488382843735, "eval_python_code_alpaca_pred_num_tokens": 62.9609375, "eval_python_code_alpaca_rouge_score": 0.3452564879086022, "eval_python_code_alpaca_runtime": 9.8738, "eval_python_code_alpaca_samples_per_second": 50.639, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.48209317633085697, "eval_python_code_alpaca_token_set_f1_sem": 0.005186671950039917, "eval_python_code_alpaca_token_set_precision": 0.5547722503496798, "eval_python_code_alpaca_token_set_recall": 0.4487437550030472, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 193125 }, { "epoch": 37.08, "eval_wikibio_accuracy": 0.3285625, "eval_wikibio_bleu_score": 5.926579864580395, "eval_wikibio_bleu_score_sem": 0.21905318042124408, "eval_wikibio_emb_cos_sim": 0.7521117925643921, "eval_wikibio_emb_cos_sim_sem": 0.010075538677139274, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6762471199035645, "eval_wikibio_n_ngrams_match_1": 9.878, "eval_wikibio_n_ngrams_match_2": 3.298, "eval_wikibio_n_ngrams_match_3": 1.24, "eval_wikibio_num_pred_words": 35.422, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.497884752499665, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3503315835033644, "eval_wikibio_runtime": 9.8661, "eval_wikibio_samples_per_second": 50.679, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.31365354782013044, "eval_wikibio_token_set_f1_sem": 0.005838103597768974, "eval_wikibio_token_set_precision": 0.321877752264818, "eval_wikibio_token_set_recall": 0.32082498590336117, "eval_wikibio_true_num_tokens": 61.1328125, "step": 193125 }, { "epoch": 37.08, "eval_nq_accuracy": 0.5366875, "eval_nq_bleu_score": 12.370374831293935, "eval_nq_bleu_score_sem": 0.49273457524295083, "eval_nq_emb_cos_sim": 0.8358243107795715, "eval_nq_emb_cos_sim_sem": 0.00722880355145583, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.131504535675049, "eval_nq_n_ngrams_match_1": 23.678, "eval_nq_n_ngrams_match_2": 8.858, "eval_nq_n_ngrams_match_3": 4.172, "eval_nq_num_pred_words": 49.272, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.427536807599324, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4585536076556796, "eval_nq_runtime": 11.841, "eval_nq_samples_per_second": 42.226, "eval_nq_steps_per_second": 0.084, "eval_nq_token_set_f1": 0.4708879282164866, "eval_nq_token_set_f1_sem": 0.004901324214237776, "eval_nq_token_set_precision": 0.4308746724594358, "eval_nq_token_set_recall": 0.5272144650390836, "eval_nq_true_num_tokens": 64.0, "step": 193125 }, { "epoch": 37.08, "learning_rate": 0.001, "loss": 2.4946, "step": 193128 }, { "epoch": 37.09, "learning_rate": 0.001, "loss": 2.4935, "step": 193140 }, { "epoch": 37.09, "learning_rate": 0.001, "loss": 2.4927, "step": 193152 }, { "epoch": 37.09, "learning_rate": 0.001, "loss": 2.5052, "step": 193164 }, { "epoch": 37.09, "learning_rate": 0.001, "loss": 2.4944, "step": 193176 }, { "epoch": 37.09, "learning_rate": 0.001, "loss": 2.4997, "step": 193188 }, { "epoch": 37.1, "learning_rate": 0.001, "loss": 2.4931, "step": 193200 }, { "epoch": 37.1, "learning_rate": 0.001, "loss": 2.5031, "step": 193212 }, { "epoch": 37.1, "learning_rate": 0.001, "loss": 2.5077, "step": 193224 }, { "epoch": 37.1, "learning_rate": 0.001, "loss": 2.5074, "step": 193236 }, { "epoch": 37.11, "learning_rate": 0.001, "loss": 2.4973, "step": 193248 }, { "epoch": 37.11, "learning_rate": 0.001, "loss": 2.4989, "step": 193260 }, { "epoch": 37.11, "learning_rate": 0.001, "loss": 2.5029, "step": 193272 }, { "epoch": 37.11, "learning_rate": 0.001, "loss": 2.4981, "step": 193284 }, { "epoch": 37.12, "learning_rate": 0.001, "loss": 2.5016, "step": 193296 }, { "epoch": 37.12, "learning_rate": 0.001, "loss": 2.4937, "step": 193308 }, { "epoch": 37.12, "learning_rate": 0.001, "loss": 2.495, "step": 193320 }, { "epoch": 37.12, "learning_rate": 0.001, "loss": 2.4956, "step": 193332 }, { "epoch": 37.12, "learning_rate": 0.001, "loss": 2.5048, "step": 193344 }, { "epoch": 37.13, "learning_rate": 0.001, "loss": 2.4972, "step": 193356 }, { "epoch": 37.13, "learning_rate": 0.001, "loss": 2.493, "step": 193368 }, { "epoch": 37.13, "learning_rate": 0.001, "loss": 2.4918, "step": 193380 }, { "epoch": 37.13, "learning_rate": 0.001, "loss": 2.4914, "step": 193392 }, { "epoch": 37.14, "learning_rate": 0.001, "loss": 2.4937, "step": 193404 }, { "epoch": 37.14, "learning_rate": 0.001, "loss": 2.5009, "step": 193416 }, { "epoch": 37.14, "learning_rate": 0.001, "loss": 2.4953, "step": 193428 }, { "epoch": 37.14, "learning_rate": 0.001, "loss": 2.5101, "step": 193440 }, { "epoch": 37.15, "learning_rate": 0.001, "loss": 2.5015, "step": 193452 }, { "epoch": 37.15, "learning_rate": 0.001, "loss": 2.5075, "step": 193464 }, { "epoch": 37.15, "learning_rate": 0.001, "loss": 2.4948, "step": 193476 }, { "epoch": 37.15, "learning_rate": 0.001, "loss": 2.4897, "step": 193488 }, { "epoch": 37.15, "learning_rate": 0.001, "loss": 2.5031, "step": 193500 }, { "epoch": 37.16, "learning_rate": 0.001, "loss": 2.4956, "step": 193512 }, { "epoch": 37.16, "learning_rate": 0.001, "loss": 2.4931, "step": 193524 }, { "epoch": 37.16, "learning_rate": 0.001, "loss": 2.49, "step": 193536 }, { "epoch": 37.16, "learning_rate": 0.001, "loss": 2.4892, "step": 193548 }, { "epoch": 37.17, "learning_rate": 0.001, "loss": 2.5037, "step": 193560 }, { "epoch": 37.17, "learning_rate": 0.001, "loss": 2.495, "step": 193572 }, { "epoch": 37.17, "learning_rate": 0.001, "loss": 2.5015, "step": 193584 }, { "epoch": 37.17, "learning_rate": 0.001, "loss": 2.4922, "step": 193596 }, { "epoch": 37.18, "learning_rate": 0.001, "loss": 2.4919, "step": 193608 }, { "epoch": 37.18, "learning_rate": 0.001, "loss": 2.4932, "step": 193620 }, { "epoch": 37.18, "learning_rate": 0.001, "loss": 2.4889, "step": 193632 }, { "epoch": 37.18, "learning_rate": 0.001, "loss": 2.4997, "step": 193644 }, { "epoch": 37.18, "learning_rate": 0.001, "loss": 2.5007, "step": 193656 }, { "epoch": 37.19, "learning_rate": 0.001, "loss": 2.5014, "step": 193668 }, { "epoch": 37.19, "learning_rate": 0.001, "loss": 2.4886, "step": 193680 }, { "epoch": 37.19, "learning_rate": 0.001, "loss": 2.5022, "step": 193692 }, { "epoch": 37.19, "learning_rate": 0.001, "loss": 2.5053, "step": 193704 }, { "epoch": 37.2, "learning_rate": 0.001, "loss": 2.4933, "step": 193716 }, { "epoch": 37.2, "learning_rate": 0.001, "loss": 2.5031, "step": 193728 }, { "epoch": 37.2, "learning_rate": 0.001, "loss": 2.4995, "step": 193740 }, { "epoch": 37.2, "eval_ag_news_accuracy": 0.32765625, "eval_ag_news_bleu_score": 4.987352437371996, "eval_ag_news_bleu_score_sem": 0.16506155774105166, "eval_ag_news_emb_cos_sim": 0.8157989978790283, "eval_ag_news_emb_cos_sim_sem": 0.007362029081054258, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.49606990814209, "eval_ag_news_n_ngrams_match_1": 14.412, "eval_ag_news_n_ngrams_match_2": 3.232, "eval_ag_news_n_ngrams_match_3": 0.918, "eval_ag_news_num_pred_words": 46.842, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.98556060025125, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3590879086206805, "eval_ag_news_runtime": 11.0347, "eval_ag_news_samples_per_second": 45.312, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.35810359564648675, "eval_ag_news_token_set_f1_sem": 0.0045469070027615195, "eval_ag_news_token_set_precision": 0.34649717777803274, "eval_ag_news_token_set_recall": 0.38418050892403593, "eval_ag_news_true_num_tokens": 56.09375, "step": 193750 }, { "epoch": 37.2, "eval_anthropic_toxic_prompts_accuracy": 0.11609375, "eval_anthropic_toxic_prompts_bleu_score": 3.415877945184848, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1287005073311264, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6898690462112427, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0080730760499359, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.209953546524048, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.53, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.102, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.804, "eval_anthropic_toxic_prompts_num_pred_words": 46.88, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.77793517663694, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22442653942444352, "eval_anthropic_toxic_prompts_runtime": 10.7562, "eval_anthropic_toxic_prompts_samples_per_second": 46.485, "eval_anthropic_toxic_prompts_steps_per_second": 0.093, "eval_anthropic_toxic_prompts_token_set_f1": 0.3655782024453944, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006377973840075936, "eval_anthropic_toxic_prompts_token_set_precision": 0.46001993108976014, "eval_anthropic_toxic_prompts_token_set_recall": 0.32589759195678614, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 193750 }, { "epoch": 37.2, "eval_arxiv_accuracy": 0.3536875, "eval_arxiv_bleu_score": 4.618313153989449, "eval_arxiv_bleu_score_sem": 0.1350069263448915, "eval_arxiv_emb_cos_sim": 0.7814196944236755, "eval_arxiv_emb_cos_sim_sem": 0.006535607090007568, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.335975408554077, "eval_arxiv_n_ngrams_match_1": 15.636, "eval_arxiv_n_ngrams_match_2": 3.11, "eval_arxiv_n_ngrams_match_3": 0.732, "eval_arxiv_num_pred_words": 40.66, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.10578448051979, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37455004000824543, "eval_arxiv_runtime": 10.4242, "eval_arxiv_samples_per_second": 47.965, "eval_arxiv_steps_per_second": 0.096, "eval_arxiv_token_set_f1": 0.3672948883127612, "eval_arxiv_token_set_f1_sem": 0.004040077812151361, "eval_arxiv_token_set_precision": 0.31889205301294843, "eval_arxiv_token_set_recall": 0.4494963085270952, "eval_arxiv_true_num_tokens": 64.0, "step": 193750 }, { "epoch": 37.2, "eval_python_code_alpaca_accuracy": 0.16096875, "eval_python_code_alpaca_bleu_score": 4.892885050657554, "eval_python_code_alpaca_bleu_score_sem": 0.15617172133774, "eval_python_code_alpaca_emb_cos_sim": 0.7601250410079956, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008629491499987135, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.856093645095825, "eval_python_code_alpaca_n_ngrams_match_1": 10.082, "eval_python_code_alpaca_n_ngrams_match_2": 3.146, "eval_python_code_alpaca_n_ngrams_match_3": 1.078, "eval_python_code_alpaca_num_pred_words": 43.088, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.393449069921157, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34200803190834106, "eval_python_code_alpaca_runtime": 10.1633, "eval_python_code_alpaca_samples_per_second": 49.197, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4862262850886598, "eval_python_code_alpaca_token_set_f1_sem": 0.005619742519728222, "eval_python_code_alpaca_token_set_precision": 0.5489696423712079, "eval_python_code_alpaca_token_set_recall": 0.4603094019709726, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 193750 }, { "epoch": 37.2, "eval_wikibio_accuracy": 0.3301875, "eval_wikibio_bleu_score": 5.926853942417159, "eval_wikibio_bleu_score_sem": 0.21383495753295081, "eval_wikibio_emb_cos_sim": 0.7365812063217163, "eval_wikibio_emb_cos_sim_sem": 0.010531684798949567, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6643404960632324, "eval_wikibio_n_ngrams_match_1": 9.988, "eval_wikibio_n_ngrams_match_2": 3.378, "eval_wikibio_n_ngrams_match_3": 1.256, "eval_wikibio_num_pred_words": 36.072, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.03038697932219, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3587143477871787, "eval_wikibio_runtime": 10.4952, "eval_wikibio_samples_per_second": 47.641, "eval_wikibio_steps_per_second": 0.095, "eval_wikibio_token_set_f1": 0.31864203588405426, "eval_wikibio_token_set_f1_sem": 0.005533096524289964, "eval_wikibio_token_set_precision": 0.326705964008385, "eval_wikibio_token_set_recall": 0.33015486000763916, "eval_wikibio_true_num_tokens": 61.1328125, "step": 193750 }, { "epoch": 37.2, "eval_nq_accuracy": 0.53721875, "eval_nq_bleu_score": 12.245648338199913, "eval_nq_bleu_score_sem": 0.48728367368860276, "eval_nq_emb_cos_sim": 0.8392508029937744, "eval_nq_emb_cos_sim_sem": 0.007142248412593629, "eval_nq_emb_top1_equal": 0.3359375, "eval_nq_emb_top1_equal_sem": 0.04191137143408563, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.130819320678711, "eval_nq_n_ngrams_match_1": 23.536, "eval_nq_n_ngrams_match_2": 8.806, "eval_nq_n_ngrams_match_3": 4.084, "eval_nq_num_pred_words": 49.102, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.421764110991576, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4564676868639352, "eval_nq_runtime": 10.7337, "eval_nq_samples_per_second": 46.582, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.47014775712288226, "eval_nq_token_set_f1_sem": 0.0049832179699802765, "eval_nq_token_set_precision": 0.42928421059191796, "eval_nq_token_set_recall": 0.5279550463868034, "eval_nq_true_num_tokens": 64.0, "step": 193750 }, { "epoch": 37.2, "learning_rate": 0.001, "loss": 2.504, "step": 193752 }, { "epoch": 37.21, "learning_rate": 0.001, "loss": 2.4988, "step": 193764 }, { "epoch": 37.21, "learning_rate": 0.001, "loss": 2.4853, "step": 193776 }, { "epoch": 37.21, "learning_rate": 0.001, "loss": 2.4926, "step": 193788 }, { "epoch": 37.21, "learning_rate": 0.001, "loss": 2.4993, "step": 193800 }, { "epoch": 37.21, "learning_rate": 0.001, "loss": 2.5047, "step": 193812 }, { "epoch": 37.22, "learning_rate": 0.001, "loss": 2.497, "step": 193824 }, { "epoch": 37.22, "learning_rate": 0.001, "loss": 2.4949, "step": 193836 }, { "epoch": 37.22, "learning_rate": 0.001, "loss": 2.4997, "step": 193848 }, { "epoch": 37.22, "learning_rate": 0.001, "loss": 2.5074, "step": 193860 }, { "epoch": 37.23, "learning_rate": 0.001, "loss": 2.5011, "step": 193872 }, { "epoch": 37.23, "learning_rate": 0.001, "loss": 2.4944, "step": 193884 }, { "epoch": 37.23, "learning_rate": 0.001, "loss": 2.5037, "step": 193896 }, { "epoch": 37.23, "learning_rate": 0.001, "loss": 2.5067, "step": 193908 }, { "epoch": 37.24, "learning_rate": 0.001, "loss": 2.49, "step": 193920 }, { "epoch": 37.24, "learning_rate": 0.001, "loss": 2.49, "step": 193932 }, { "epoch": 37.24, "learning_rate": 0.001, "loss": 2.4951, "step": 193944 }, { "epoch": 37.24, "learning_rate": 0.001, "loss": 2.5022, "step": 193956 }, { "epoch": 37.24, "learning_rate": 0.001, "loss": 2.495, "step": 193968 }, { "epoch": 37.25, "learning_rate": 0.001, "loss": 2.5077, "step": 193980 }, { "epoch": 37.25, "learning_rate": 0.001, "loss": 2.4982, "step": 193992 }, { "epoch": 37.25, "learning_rate": 0.001, "loss": 2.5044, "step": 194004 }, { "epoch": 37.25, "learning_rate": 0.001, "loss": 2.495, "step": 194016 }, { "epoch": 37.26, "learning_rate": 0.001, "loss": 2.4979, "step": 194028 }, { "epoch": 37.26, "learning_rate": 0.001, "loss": 2.4923, "step": 194040 }, { "epoch": 37.26, "learning_rate": 0.001, "loss": 2.5038, "step": 194052 }, { "epoch": 37.26, "learning_rate": 0.001, "loss": 2.5023, "step": 194064 }, { "epoch": 37.26, "learning_rate": 0.001, "loss": 2.4932, "step": 194076 }, { "epoch": 37.27, "learning_rate": 0.001, "loss": 2.4958, "step": 194088 }, { "epoch": 37.27, "learning_rate": 0.001, "loss": 2.4954, "step": 194100 }, { "epoch": 37.27, "learning_rate": 0.001, "loss": 2.4869, "step": 194112 }, { "epoch": 37.27, "learning_rate": 0.001, "loss": 2.5045, "step": 194124 }, { "epoch": 37.28, "learning_rate": 0.001, "loss": 2.4939, "step": 194136 }, { "epoch": 37.28, "learning_rate": 0.001, "loss": 2.498, "step": 194148 }, { "epoch": 37.28, "learning_rate": 0.001, "loss": 2.4957, "step": 194160 }, { "epoch": 37.28, "learning_rate": 0.001, "loss": 2.5027, "step": 194172 }, { "epoch": 37.29, "learning_rate": 0.001, "loss": 2.4953, "step": 194184 }, { "epoch": 37.29, "learning_rate": 0.001, "loss": 2.4926, "step": 194196 }, { "epoch": 37.29, "learning_rate": 0.001, "loss": 2.5073, "step": 194208 }, { "epoch": 37.29, "learning_rate": 0.001, "loss": 2.4954, "step": 194220 }, { "epoch": 37.29, "learning_rate": 0.001, "loss": 2.5072, "step": 194232 }, { "epoch": 37.3, "learning_rate": 0.001, "loss": 2.4972, "step": 194244 }, { "epoch": 37.3, "learning_rate": 0.001, "loss": 2.4991, "step": 194256 }, { "epoch": 37.3, "learning_rate": 0.001, "loss": 2.4991, "step": 194268 }, { "epoch": 37.3, "learning_rate": 0.001, "loss": 2.498, "step": 194280 }, { "epoch": 37.31, "learning_rate": 0.001, "loss": 2.5065, "step": 194292 }, { "epoch": 37.31, "learning_rate": 0.001, "loss": 2.4951, "step": 194304 }, { "epoch": 37.31, "learning_rate": 0.001, "loss": 2.4935, "step": 194316 }, { "epoch": 37.31, "learning_rate": 0.001, "loss": 2.4935, "step": 194328 }, { "epoch": 37.32, "learning_rate": 0.001, "loss": 2.4883, "step": 194340 }, { "epoch": 37.32, "learning_rate": 0.001, "loss": 2.5012, "step": 194352 }, { "epoch": 37.32, "learning_rate": 0.001, "loss": 2.5056, "step": 194364 }, { "epoch": 37.32, "eval_ag_news_accuracy": 0.3278125, "eval_ag_news_bleu_score": 4.930670692561991, "eval_ag_news_bleu_score_sem": 0.15178758369773337, "eval_ag_news_emb_cos_sim": 0.8194109201431274, "eval_ag_news_emb_cos_sim_sem": 0.006857397619843736, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4890034198760986, "eval_ag_news_n_ngrams_match_1": 14.396, "eval_ag_news_n_ngrams_match_2": 3.212, "eval_ag_news_n_ngrams_match_3": 0.9, "eval_ag_news_num_pred_words": 46.952, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.75329015803918, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3582680219683052, "eval_ag_news_runtime": 10.3699, "eval_ag_news_samples_per_second": 48.216, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.3568136618038535, "eval_ag_news_token_set_f1_sem": 0.0044437308384324145, "eval_ag_news_token_set_precision": 0.34709898326299293, "eval_ag_news_token_set_recall": 0.38049060297805015, "eval_ag_news_true_num_tokens": 56.09375, "step": 194375 }, { "epoch": 37.32, "eval_anthropic_toxic_prompts_accuracy": 0.116875, "eval_anthropic_toxic_prompts_bleu_score": 3.2592883582400756, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12186789282290272, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6781432628631592, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008447857680695912, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1484375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.031548465007086954, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.21248197555542, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.308, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.0, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.748, "eval_anthropic_toxic_prompts_num_pred_words": 46.324, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.840663696161567, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2171679287748752, "eval_anthropic_toxic_prompts_runtime": 10.3736, "eval_anthropic_toxic_prompts_samples_per_second": 48.199, "eval_anthropic_toxic_prompts_steps_per_second": 0.096, "eval_anthropic_toxic_prompts_token_set_f1": 0.3529143401339154, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006555732752202401, "eval_anthropic_toxic_prompts_token_set_precision": 0.4391528107955721, "eval_anthropic_toxic_prompts_token_set_recall": 0.320556486725126, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 194375 }, { "epoch": 37.32, "eval_arxiv_accuracy": 0.35265625, "eval_arxiv_bleu_score": 4.3999055260186575, "eval_arxiv_bleu_score_sem": 0.12258063090345232, "eval_arxiv_emb_cos_sim": 0.7799035310745239, "eval_arxiv_emb_cos_sim_sem": 0.006211274578370587, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3566486835479736, "eval_arxiv_n_ngrams_match_1": 15.36, "eval_arxiv_n_ngrams_match_2": 3.028, "eval_arxiv_n_ngrams_match_3": 0.66, "eval_arxiv_num_pred_words": 40.45, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.692870680092827, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3701688705609501, "eval_arxiv_runtime": 11.8409, "eval_arxiv_samples_per_second": 42.227, "eval_arxiv_steps_per_second": 0.084, "eval_arxiv_token_set_f1": 0.3582110091680112, "eval_arxiv_token_set_f1_sem": 0.004059473478740534, "eval_arxiv_token_set_precision": 0.3113617155700013, "eval_arxiv_token_set_recall": 0.4401518079863058, "eval_arxiv_true_num_tokens": 64.0, "step": 194375 }, { "epoch": 37.32, "eval_python_code_alpaca_accuracy": 0.16371875, "eval_python_code_alpaca_bleu_score": 4.775014853561377, "eval_python_code_alpaca_bleu_score_sem": 0.15479763267848184, "eval_python_code_alpaca_emb_cos_sim": 0.755107581615448, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008915453257321994, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.833731174468994, "eval_python_code_alpaca_n_ngrams_match_1": 9.934, "eval_python_code_alpaca_n_ngrams_match_2": 2.962, "eval_python_code_alpaca_n_ngrams_match_3": 1.018, "eval_python_code_alpaca_num_pred_words": 43.312, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.008805396671562, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3401846253313796, "eval_python_code_alpaca_runtime": 9.8021, "eval_python_code_alpaca_samples_per_second": 51.009, "eval_python_code_alpaca_steps_per_second": 0.102, "eval_python_code_alpaca_token_set_f1": 0.47675677416632645, "eval_python_code_alpaca_token_set_f1_sem": 0.005513643105916052, "eval_python_code_alpaca_token_set_precision": 0.5411193627439754, "eval_python_code_alpaca_token_set_recall": 0.4497073035938302, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 194375 }, { "epoch": 37.32, "eval_wikibio_accuracy": 0.3295625, "eval_wikibio_bleu_score": 6.203771319594105, "eval_wikibio_bleu_score_sem": 0.22153006970172062, "eval_wikibio_emb_cos_sim": 0.7438701391220093, "eval_wikibio_emb_cos_sim_sem": 0.009452194690197245, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6615147590637207, "eval_wikibio_n_ngrams_match_1": 10.324, "eval_wikibio_n_ngrams_match_2": 3.494, "eval_wikibio_n_ngrams_match_3": 1.322, "eval_wikibio_num_pred_words": 36.582, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.920253048773695, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36229334941439284, "eval_wikibio_runtime": 9.4919, "eval_wikibio_samples_per_second": 52.677, "eval_wikibio_steps_per_second": 0.105, "eval_wikibio_token_set_f1": 0.32509779845142867, "eval_wikibio_token_set_f1_sem": 0.005426929573211413, "eval_wikibio_token_set_precision": 0.33686437038097244, "eval_wikibio_token_set_recall": 0.32875227375568794, "eval_wikibio_true_num_tokens": 61.1328125, "step": 194375 }, { "epoch": 37.32, "eval_nq_accuracy": 0.53740625, "eval_nq_bleu_score": 12.00340341710867, "eval_nq_bleu_score_sem": 0.48171148468188396, "eval_nq_emb_cos_sim": 0.8391791582107544, "eval_nq_emb_cos_sim_sem": 0.006667585336307877, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1303999423980713, "eval_nq_n_ngrams_match_1": 23.3, "eval_nq_n_ngrams_match_2": 8.578, "eval_nq_n_ngrams_match_3": 3.948, "eval_nq_num_pred_words": 49.0, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.41823294653735, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45372407114469504, "eval_nq_runtime": 10.4044, "eval_nq_samples_per_second": 48.057, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46251134527244336, "eval_nq_token_set_f1_sem": 0.0049133807000650565, "eval_nq_token_set_precision": 0.4233934234837151, "eval_nq_token_set_recall": 0.5171948329638827, "eval_nq_true_num_tokens": 64.0, "step": 194375 }, { "epoch": 37.32, "learning_rate": 0.001, "loss": 2.497, "step": 194376 }, { "epoch": 37.32, "learning_rate": 0.001, "loss": 2.5044, "step": 194388 }, { "epoch": 37.33, "learning_rate": 0.001, "loss": 2.5042, "step": 194400 }, { "epoch": 37.33, "learning_rate": 0.001, "loss": 2.4949, "step": 194412 }, { "epoch": 37.33, "learning_rate": 0.001, "loss": 2.5047, "step": 194424 }, { "epoch": 37.33, "learning_rate": 0.001, "loss": 2.501, "step": 194436 }, { "epoch": 37.34, "learning_rate": 0.001, "loss": 2.502, "step": 194448 }, { "epoch": 37.34, "learning_rate": 0.001, "loss": 2.5058, "step": 194460 }, { "epoch": 37.34, "learning_rate": 0.001, "loss": 2.5073, "step": 194472 }, { "epoch": 37.34, "learning_rate": 0.001, "loss": 2.4941, "step": 194484 }, { "epoch": 37.35, "learning_rate": 0.001, "loss": 2.5084, "step": 194496 }, { "epoch": 37.35, "learning_rate": 0.001, "loss": 2.5015, "step": 194508 }, { "epoch": 37.35, "learning_rate": 0.001, "loss": 2.5032, "step": 194520 }, { "epoch": 37.35, "learning_rate": 0.001, "loss": 2.5043, "step": 194532 }, { "epoch": 37.35, "learning_rate": 0.001, "loss": 2.4949, "step": 194544 }, { "epoch": 37.36, "learning_rate": 0.001, "loss": 2.5094, "step": 194556 }, { "epoch": 37.36, "learning_rate": 0.001, "loss": 2.5018, "step": 194568 }, { "epoch": 37.36, "learning_rate": 0.001, "loss": 2.5087, "step": 194580 }, { "epoch": 37.36, "learning_rate": 0.001, "loss": 2.4969, "step": 194592 }, { "epoch": 37.37, "learning_rate": 0.001, "loss": 2.4911, "step": 194604 }, { "epoch": 37.37, "learning_rate": 0.001, "loss": 2.5102, "step": 194616 }, { "epoch": 37.37, "learning_rate": 0.001, "loss": 2.4975, "step": 194628 }, { "epoch": 37.37, "learning_rate": 0.001, "loss": 2.498, "step": 194640 }, { "epoch": 37.38, "learning_rate": 0.001, "loss": 2.4909, "step": 194652 }, { "epoch": 37.38, "learning_rate": 0.001, "loss": 2.5023, "step": 194664 }, { "epoch": 37.38, "learning_rate": 0.001, "loss": 2.5023, "step": 194676 }, { "epoch": 37.38, "learning_rate": 0.001, "loss": 2.4991, "step": 194688 }, { "epoch": 37.38, "learning_rate": 0.001, "loss": 2.5055, "step": 194700 }, { "epoch": 37.39, "learning_rate": 0.001, "loss": 2.4987, "step": 194712 }, { "epoch": 37.39, "learning_rate": 0.001, "loss": 2.5023, "step": 194724 }, { "epoch": 37.39, "learning_rate": 0.001, "loss": 2.4964, "step": 194736 }, { "epoch": 37.39, "learning_rate": 0.001, "loss": 2.5012, "step": 194748 }, { "epoch": 37.4, "learning_rate": 0.001, "loss": 2.5084, "step": 194760 }, { "epoch": 37.4, "learning_rate": 0.001, "loss": 2.5061, "step": 194772 }, { "epoch": 37.4, "learning_rate": 0.001, "loss": 2.5008, "step": 194784 }, { "epoch": 37.4, "learning_rate": 0.001, "loss": 2.5058, "step": 194796 }, { "epoch": 37.41, "learning_rate": 0.001, "loss": 2.5098, "step": 194808 }, { "epoch": 37.41, "learning_rate": 0.001, "loss": 2.5043, "step": 194820 }, { "epoch": 37.41, "learning_rate": 0.001, "loss": 2.4973, "step": 194832 }, { "epoch": 37.41, "learning_rate": 0.001, "loss": 2.4984, "step": 194844 }, { "epoch": 37.41, "learning_rate": 0.001, "loss": 2.4957, "step": 194856 }, { "epoch": 37.42, "learning_rate": 0.001, "loss": 2.4987, "step": 194868 }, { "epoch": 37.42, "learning_rate": 0.001, "loss": 2.4997, "step": 194880 }, { "epoch": 37.42, "learning_rate": 0.001, "loss": 2.504, "step": 194892 }, { "epoch": 37.42, "learning_rate": 0.001, "loss": 2.5045, "step": 194904 }, { "epoch": 37.43, "learning_rate": 0.001, "loss": 2.4958, "step": 194916 }, { "epoch": 37.43, "learning_rate": 0.001, "loss": 2.4978, "step": 194928 }, { "epoch": 37.43, "learning_rate": 0.001, "loss": 2.5051, "step": 194940 }, { "epoch": 37.43, "learning_rate": 0.001, "loss": 2.4967, "step": 194952 }, { "epoch": 37.44, "learning_rate": 0.001, "loss": 2.5037, "step": 194964 }, { "epoch": 37.44, "learning_rate": 0.001, "loss": 2.5047, "step": 194976 }, { "epoch": 37.44, "learning_rate": 0.001, "loss": 2.4989, "step": 194988 }, { "epoch": 37.44, "learning_rate": 0.001, "loss": 2.4962, "step": 195000 }, { "epoch": 37.44, "eval_ag_news_accuracy": 0.32796875, "eval_ag_news_bleu_score": 5.244067875540459, "eval_ag_news_bleu_score_sem": 0.17487987941260041, "eval_ag_news_emb_cos_sim": 0.8198561072349548, "eval_ag_news_emb_cos_sim_sem": 0.007308784440494839, "eval_ag_news_emb_top1_equal": 0.1953125, "eval_ag_news_emb_top1_equal_sem": 0.035178457165496856, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.478668451309204, "eval_ag_news_n_ngrams_match_1": 14.404, "eval_ag_news_n_ngrams_match_2": 3.358, "eval_ag_news_n_ngrams_match_3": 1.02, "eval_ag_news_num_pred_words": 46.516, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.41652913856147, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35791499620070455, "eval_ag_news_runtime": 10.0458, "eval_ag_news_samples_per_second": 49.772, "eval_ag_news_steps_per_second": 0.1, "eval_ag_news_token_set_f1": 0.3576864714670146, "eval_ag_news_token_set_f1_sem": 0.004596480762188387, "eval_ag_news_token_set_precision": 0.3448366794518283, "eval_ag_news_token_set_recall": 0.38447758016454087, "eval_ag_news_true_num_tokens": 56.09375, "step": 195000 }, { "epoch": 37.44, "eval_anthropic_toxic_prompts_accuracy": 0.116, "eval_anthropic_toxic_prompts_bleu_score": 3.230960722920636, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12652715997134178, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6770283579826355, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008723208968918155, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.187702178955078, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.326, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.974, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768, "eval_anthropic_toxic_prompts_num_pred_words": 47.392, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.232681055507367, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9921875, "eval_anthropic_toxic_prompts_rouge_score": 0.21719043400690022, "eval_anthropic_toxic_prompts_runtime": 9.9488, "eval_anthropic_toxic_prompts_samples_per_second": 50.257, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3599878178488781, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006559571815956049, "eval_anthropic_toxic_prompts_token_set_precision": 0.4446544910892496, "eval_anthropic_toxic_prompts_token_set_recall": 0.32877459401478754, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 195000 }, { "epoch": 37.44, "eval_arxiv_accuracy": 0.35075, "eval_arxiv_bleu_score": 4.421723876239833, "eval_arxiv_bleu_score_sem": 0.13081013770764, "eval_arxiv_emb_cos_sim": 0.7836287021636963, "eval_arxiv_emb_cos_sim_sem": 0.007277140059797254, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3493075370788574, "eval_arxiv_n_ngrams_match_1": 15.398, "eval_arxiv_n_ngrams_match_2": 3.06, "eval_arxiv_n_ngrams_match_3": 0.684, "eval_arxiv_num_pred_words": 40.316, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.483003389590884, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3701232713221733, "eval_arxiv_runtime": 10.1425, "eval_arxiv_samples_per_second": 49.297, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.36066145867072974, "eval_arxiv_token_set_f1_sem": 0.004317172138607544, "eval_arxiv_token_set_precision": 0.31440047985832426, "eval_arxiv_token_set_recall": 0.44132088538840525, "eval_arxiv_true_num_tokens": 64.0, "step": 195000 }, { "epoch": 37.44, "eval_python_code_alpaca_accuracy": 0.1626875, "eval_python_code_alpaca_bleu_score": 4.944235461568967, "eval_python_code_alpaca_bleu_score_sem": 0.16625114340706515, "eval_python_code_alpaca_emb_cos_sim": 0.7681782841682434, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008460871837262667, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8473594188690186, "eval_python_code_alpaca_n_ngrams_match_1": 9.926, "eval_python_code_alpaca_n_ngrams_match_2": 3.064, "eval_python_code_alpaca_n_ngrams_match_3": 1.132, "eval_python_code_alpaca_num_pred_words": 43.318, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.24219226801552, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34094041620934323, "eval_python_code_alpaca_runtime": 9.932, "eval_python_code_alpaca_samples_per_second": 50.342, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.48029914494784204, "eval_python_code_alpaca_token_set_f1_sem": 0.0058647336376740225, "eval_python_code_alpaca_token_set_precision": 0.5420172127474987, "eval_python_code_alpaca_token_set_recall": 0.4579217159509164, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 195000 }, { "epoch": 37.44, "eval_wikibio_accuracy": 0.32665625, "eval_wikibio_bleu_score": 5.905575571952946, "eval_wikibio_bleu_score_sem": 0.21851932046204833, "eval_wikibio_emb_cos_sim": 0.7433996796607971, "eval_wikibio_emb_cos_sim_sem": 0.009556348113946268, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.665127992630005, "eval_wikibio_n_ngrams_match_1": 10.19, "eval_wikibio_n_ngrams_match_2": 3.406, "eval_wikibio_n_ngrams_match_3": 1.252, "eval_wikibio_num_pred_words": 36.964, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.06113538060937, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35906320474372844, "eval_wikibio_runtime": 10.362, "eval_wikibio_samples_per_second": 48.253, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.31800409151553855, "eval_wikibio_token_set_f1_sem": 0.005476764499547402, "eval_wikibio_token_set_precision": 0.3291303454959236, "eval_wikibio_token_set_recall": 0.3238962928597888, "eval_wikibio_true_num_tokens": 61.1328125, "step": 195000 }, { "epoch": 37.44, "eval_nq_accuracy": 0.536125, "eval_nq_bleu_score": 12.102546142598007, "eval_nq_bleu_score_sem": 0.48849432140077825, "eval_nq_emb_cos_sim": 0.8412253856658936, "eval_nq_emb_cos_sim_sem": 0.006508606765180276, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1275899410247803, "eval_nq_n_ngrams_match_1": 23.464, "eval_nq_n_ngrams_match_2": 8.688, "eval_nq_n_ngrams_match_3": 4.038, "eval_nq_num_pred_words": 48.968, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.394610904925095, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4571428648214083, "eval_nq_runtime": 10.2017, "eval_nq_samples_per_second": 49.011, "eval_nq_steps_per_second": 0.098, "eval_nq_token_set_f1": 0.471117674143217, "eval_nq_token_set_f1_sem": 0.0048392622866591265, "eval_nq_token_set_precision": 0.428317159728763, "eval_nq_token_set_recall": 0.5313456789986281, "eval_nq_true_num_tokens": 64.0, "step": 195000 }, { "epoch": 37.44, "learning_rate": 0.001, "loss": 2.4993, "step": 195012 }, { "epoch": 37.45, "learning_rate": 0.001, "loss": 2.5137, "step": 195024 }, { "epoch": 37.45, "learning_rate": 0.001, "loss": 2.5003, "step": 195036 }, { "epoch": 37.45, "learning_rate": 0.001, "loss": 2.5028, "step": 195048 }, { "epoch": 37.45, "learning_rate": 0.001, "loss": 2.5027, "step": 195060 }, { "epoch": 37.46, "learning_rate": 0.001, "loss": 2.4997, "step": 195072 }, { "epoch": 37.46, "learning_rate": 0.001, "loss": 2.5054, "step": 195084 }, { "epoch": 37.46, "learning_rate": 0.001, "loss": 2.488, "step": 195096 }, { "epoch": 37.46, "learning_rate": 0.001, "loss": 2.5032, "step": 195108 }, { "epoch": 37.47, "learning_rate": 0.001, "loss": 2.505, "step": 195120 }, { "epoch": 37.47, "learning_rate": 0.001, "loss": 2.5021, "step": 195132 }, { "epoch": 37.47, "learning_rate": 0.001, "loss": 2.5079, "step": 195144 }, { "epoch": 37.47, "learning_rate": 0.001, "loss": 2.5024, "step": 195156 }, { "epoch": 37.47, "learning_rate": 0.001, "loss": 2.4956, "step": 195168 }, { "epoch": 37.48, "learning_rate": 0.001, "loss": 2.5047, "step": 195180 }, { "epoch": 37.48, "learning_rate": 0.001, "loss": 2.4877, "step": 195192 }, { "epoch": 37.48, "learning_rate": 0.001, "loss": 2.5075, "step": 195204 }, { "epoch": 37.48, "learning_rate": 0.001, "loss": 2.5042, "step": 195216 }, { "epoch": 37.49, "learning_rate": 0.001, "loss": 2.4984, "step": 195228 }, { "epoch": 37.49, "learning_rate": 0.001, "loss": 2.5033, "step": 195240 }, { "epoch": 37.49, "learning_rate": 0.001, "loss": 2.4938, "step": 195252 }, { "epoch": 37.49, "learning_rate": 0.001, "loss": 2.4914, "step": 195264 }, { "epoch": 37.5, "learning_rate": 0.001, "loss": 2.4996, "step": 195276 }, { "epoch": 37.5, "learning_rate": 0.001, "loss": 2.5058, "step": 195288 }, { "epoch": 37.5, "learning_rate": 0.001, "loss": 2.4942, "step": 195300 }, { "epoch": 37.5, "learning_rate": 0.001, "loss": 2.5038, "step": 195312 }, { "epoch": 37.5, "learning_rate": 0.001, "loss": 2.4996, "step": 195324 }, { "epoch": 37.51, "learning_rate": 0.001, "loss": 2.5061, "step": 195336 }, { "epoch": 37.51, "learning_rate": 0.001, "loss": 2.5062, "step": 195348 }, { "epoch": 37.51, "learning_rate": 0.001, "loss": 2.4995, "step": 195360 }, { "epoch": 37.51, "learning_rate": 0.001, "loss": 2.4906, "step": 195372 }, { "epoch": 37.52, "learning_rate": 0.001, "loss": 2.5109, "step": 195384 }, { "epoch": 37.52, "learning_rate": 0.001, "loss": 2.4973, "step": 195396 }, { "epoch": 37.52, "learning_rate": 0.001, "loss": 2.4986, "step": 195408 }, { "epoch": 37.52, "learning_rate": 0.001, "loss": 2.498, "step": 195420 }, { "epoch": 37.53, "learning_rate": 0.001, "loss": 2.5054, "step": 195432 }, { "epoch": 37.53, "learning_rate": 0.001, "loss": 2.4908, "step": 195444 }, { "epoch": 37.53, "learning_rate": 0.001, "loss": 2.5037, "step": 195456 }, { "epoch": 37.53, "learning_rate": 0.001, "loss": 2.4987, "step": 195468 }, { "epoch": 37.53, "learning_rate": 0.001, "loss": 2.4942, "step": 195480 }, { "epoch": 37.54, "learning_rate": 0.001, "loss": 2.5064, "step": 195492 }, { "epoch": 37.54, "learning_rate": 0.001, "loss": 2.4968, "step": 195504 }, { "epoch": 37.54, "learning_rate": 0.001, "loss": 2.4998, "step": 195516 }, { "epoch": 37.54, "learning_rate": 0.001, "loss": 2.5067, "step": 195528 }, { "epoch": 37.55, "learning_rate": 0.001, "loss": 2.5001, "step": 195540 }, { "epoch": 37.55, "learning_rate": 0.001, "loss": 2.5012, "step": 195552 }, { "epoch": 37.55, "learning_rate": 0.001, "loss": 2.5052, "step": 195564 }, { "epoch": 37.55, "learning_rate": 0.001, "loss": 2.5052, "step": 195576 }, { "epoch": 37.56, "learning_rate": 0.001, "loss": 2.5019, "step": 195588 }, { "epoch": 37.56, "learning_rate": 0.001, "loss": 2.4972, "step": 195600 }, { "epoch": 37.56, "learning_rate": 0.001, "loss": 2.4981, "step": 195612 }, { "epoch": 37.56, "learning_rate": 0.001, "loss": 2.4918, "step": 195624 }, { "epoch": 37.56, "eval_ag_news_accuracy": 0.32840625, "eval_ag_news_bleu_score": 5.160678237118373, "eval_ag_news_bleu_score_sem": 0.15644910179101512, "eval_ag_news_emb_cos_sim": 0.8232282400131226, "eval_ag_news_emb_cos_sim_sem": 0.00639512110720863, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.479858636856079, "eval_ag_news_n_ngrams_match_1": 14.61, "eval_ag_news_n_ngrams_match_2": 3.34, "eval_ag_news_n_ngrams_match_3": 1.0, "eval_ag_news_num_pred_words": 47.11, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.45513379181512, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3644997735690672, "eval_ag_news_runtime": 10.9843, "eval_ag_news_samples_per_second": 45.519, "eval_ag_news_steps_per_second": 0.091, "eval_ag_news_token_set_f1": 0.3636258052726045, "eval_ag_news_token_set_f1_sem": 0.0043789982790820685, "eval_ag_news_token_set_precision": 0.3518743547836162, "eval_ag_news_token_set_recall": 0.3894813692401069, "eval_ag_news_true_num_tokens": 56.09375, "step": 195625 }, { "epoch": 37.56, "eval_anthropic_toxic_prompts_accuracy": 0.11653125, "eval_anthropic_toxic_prompts_bleu_score": 3.2536863914236536, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12414063209611298, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6823281645774841, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008623482281351793, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2047436237335205, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.47, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.046, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788, "eval_anthropic_toxic_prompts_num_pred_words": 48.036, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.649179741632693, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9296875, "eval_anthropic_toxic_prompts_rouge_score": 0.21857256722718568, "eval_anthropic_toxic_prompts_runtime": 9.9715, "eval_anthropic_toxic_prompts_samples_per_second": 50.143, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.3652828021103452, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006487779074896755, "eval_anthropic_toxic_prompts_token_set_precision": 0.45705235729397486, "eval_anthropic_toxic_prompts_token_set_recall": 0.3291836020040149, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 195625 }, { "epoch": 37.56, "eval_arxiv_accuracy": 0.34915625, "eval_arxiv_bleu_score": 4.5476246633263235, "eval_arxiv_bleu_score_sem": 0.13177119287371816, "eval_arxiv_emb_cos_sim": 0.7904843091964722, "eval_arxiv_emb_cos_sim_sem": 0.006650471081634421, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3637514114379883, "eval_arxiv_n_ngrams_match_1": 15.692, "eval_arxiv_n_ngrams_match_2": 3.126, "eval_arxiv_n_ngrams_match_3": 0.694, "eval_arxiv_num_pred_words": 41.608, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.8973938091562, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3737865356288159, "eval_arxiv_runtime": 10.6201, "eval_arxiv_samples_per_second": 47.081, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.36484636584780045, "eval_arxiv_token_set_f1_sem": 0.004090870558584512, "eval_arxiv_token_set_precision": 0.3208891362951231, "eval_arxiv_token_set_recall": 0.4374011321819683, "eval_arxiv_true_num_tokens": 64.0, "step": 195625 }, { "epoch": 37.56, "eval_python_code_alpaca_accuracy": 0.1638125, "eval_python_code_alpaca_bleu_score": 4.856616076429742, "eval_python_code_alpaca_bleu_score_sem": 0.1576413255673956, "eval_python_code_alpaca_emb_cos_sim": 0.7713407278060913, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007635985572008593, "eval_python_code_alpaca_emb_top1_equal": 0.1875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.827100992202759, "eval_python_code_alpaca_n_ngrams_match_1": 10.092, "eval_python_code_alpaca_n_ngrams_match_2": 3.166, "eval_python_code_alpaca_n_ngrams_match_3": 1.148, "eval_python_code_alpaca_num_pred_words": 45.384, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.896406939635312, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33791876503371887, "eval_python_code_alpaca_runtime": 10.1229, "eval_python_code_alpaca_samples_per_second": 49.393, "eval_python_code_alpaca_steps_per_second": 0.099, "eval_python_code_alpaca_token_set_f1": 0.4841025604174241, "eval_python_code_alpaca_token_set_f1_sem": 0.005484594221514288, "eval_python_code_alpaca_token_set_precision": 0.5530509643743557, "eval_python_code_alpaca_token_set_recall": 0.45028786757907063, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 195625 }, { "epoch": 37.56, "eval_wikibio_accuracy": 0.3275, "eval_wikibio_bleu_score": 6.167448437456132, "eval_wikibio_bleu_score_sem": 0.2218471402877958, "eval_wikibio_emb_cos_sim": 0.7572469711303711, "eval_wikibio_emb_cos_sim_sem": 0.009437384682757222, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6589012145996094, "eval_wikibio_n_ngrams_match_1": 10.308, "eval_wikibio_n_ngrams_match_2": 3.49, "eval_wikibio_n_ngrams_match_3": 1.334, "eval_wikibio_num_pred_words": 36.57, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.81866604577605, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3622507798370945, "eval_wikibio_runtime": 9.9982, "eval_wikibio_samples_per_second": 50.009, "eval_wikibio_steps_per_second": 0.1, "eval_wikibio_token_set_f1": 0.3240266841869486, "eval_wikibio_token_set_f1_sem": 0.005392734000007475, "eval_wikibio_token_set_precision": 0.3332030008260182, "eval_wikibio_token_set_recall": 0.3291543991379892, "eval_wikibio_true_num_tokens": 61.1328125, "step": 195625 }, { "epoch": 37.56, "eval_nq_accuracy": 0.53659375, "eval_nq_bleu_score": 12.054164118492311, "eval_nq_bleu_score_sem": 0.49342112605169397, "eval_nq_emb_cos_sim": 0.8338325619697571, "eval_nq_emb_cos_sim_sem": 0.007536739621933057, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1246421337127686, "eval_nq_n_ngrams_match_1": 23.52, "eval_nq_n_ngrams_match_2": 8.734, "eval_nq_n_ngrams_match_3": 4.018, "eval_nq_num_pred_words": 49.55, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.369901646477157, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4547006044907649, "eval_nq_runtime": 10.4494, "eval_nq_samples_per_second": 47.85, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46850882076678196, "eval_nq_token_set_f1_sem": 0.004857155245198665, "eval_nq_token_set_precision": 0.42857432408107793, "eval_nq_token_set_recall": 0.525536764171176, "eval_nq_true_num_tokens": 64.0, "step": 195625 }, { "epoch": 37.56, "learning_rate": 0.001, "loss": 2.4905, "step": 195636 }, { "epoch": 37.57, "learning_rate": 0.001, "loss": 2.5061, "step": 195648 }, { "epoch": 37.57, "learning_rate": 0.001, "loss": 2.4976, "step": 195660 }, { "epoch": 37.57, "learning_rate": 0.001, "loss": 2.5041, "step": 195672 }, { "epoch": 37.57, "learning_rate": 0.001, "loss": 2.5031, "step": 195684 }, { "epoch": 37.58, "learning_rate": 0.001, "loss": 2.5066, "step": 195696 }, { "epoch": 37.58, "learning_rate": 0.001, "loss": 2.4899, "step": 195708 }, { "epoch": 37.58, "learning_rate": 0.001, "loss": 2.4911, "step": 195720 }, { "epoch": 37.58, "learning_rate": 0.001, "loss": 2.5028, "step": 195732 }, { "epoch": 37.59, "learning_rate": 0.001, "loss": 2.5088, "step": 195744 }, { "epoch": 37.59, "learning_rate": 0.001, "loss": 2.5104, "step": 195756 }, { "epoch": 37.59, "learning_rate": 0.001, "loss": 2.493, "step": 195768 }, { "epoch": 37.59, "learning_rate": 0.001, "loss": 2.4995, "step": 195780 }, { "epoch": 37.59, "learning_rate": 0.001, "loss": 2.5028, "step": 195792 }, { "epoch": 37.6, "learning_rate": 0.001, "loss": 2.5048, "step": 195804 }, { "epoch": 37.6, "learning_rate": 0.001, "loss": 2.5004, "step": 195816 }, { "epoch": 37.6, "learning_rate": 0.001, "loss": 2.5108, "step": 195828 }, { "epoch": 37.6, "learning_rate": 0.001, "loss": 2.5105, "step": 195840 }, { "epoch": 37.61, "learning_rate": 0.001, "loss": 2.4927, "step": 195852 }, { "epoch": 37.61, "learning_rate": 0.001, "loss": 2.5083, "step": 195864 }, { "epoch": 37.61, "learning_rate": 0.001, "loss": 2.5018, "step": 195876 }, { "epoch": 37.61, "learning_rate": 0.001, "loss": 2.5023, "step": 195888 }, { "epoch": 37.62, "learning_rate": 0.001, "loss": 2.5055, "step": 195900 }, { "epoch": 37.62, "learning_rate": 0.001, "loss": 2.502, "step": 195912 }, { "epoch": 37.62, "learning_rate": 0.001, "loss": 2.5072, "step": 195924 }, { "epoch": 37.62, "learning_rate": 0.001, "loss": 2.5084, "step": 195936 }, { "epoch": 37.62, "learning_rate": 0.001, "loss": 2.5109, "step": 195948 }, { "epoch": 37.63, "learning_rate": 0.001, "loss": 2.5087, "step": 195960 }, { "epoch": 37.63, "learning_rate": 0.001, "loss": 2.505, "step": 195972 }, { "epoch": 37.63, "learning_rate": 0.001, "loss": 2.5074, "step": 195984 }, { "epoch": 37.63, "learning_rate": 0.001, "loss": 2.5101, "step": 195996 }, { "epoch": 37.64, "learning_rate": 0.001, "loss": 2.5035, "step": 196008 }, { "epoch": 37.64, "learning_rate": 0.001, "loss": 2.5055, "step": 196020 }, { "epoch": 37.64, "learning_rate": 0.001, "loss": 2.5074, "step": 196032 }, { "epoch": 37.64, "learning_rate": 0.001, "loss": 2.5038, "step": 196044 }, { "epoch": 37.65, "learning_rate": 0.001, "loss": 2.4983, "step": 196056 }, { "epoch": 37.65, "learning_rate": 0.001, "loss": 2.5058, "step": 196068 }, { "epoch": 37.65, "learning_rate": 0.001, "loss": 2.51, "step": 196080 }, { "epoch": 37.65, "learning_rate": 0.001, "loss": 2.5117, "step": 196092 }, { "epoch": 37.65, "learning_rate": 0.001, "loss": 2.5021, "step": 196104 }, { "epoch": 37.66, "learning_rate": 0.001, "loss": 2.5222, "step": 196116 }, { "epoch": 37.66, "learning_rate": 0.001, "loss": 2.5106, "step": 196128 }, { "epoch": 37.66, "learning_rate": 0.001, "loss": 2.5134, "step": 196140 }, { "epoch": 37.66, "learning_rate": 0.001, "loss": 2.503, "step": 196152 }, { "epoch": 37.67, "learning_rate": 0.001, "loss": 2.5023, "step": 196164 }, { "epoch": 37.67, "learning_rate": 0.001, "loss": 2.5094, "step": 196176 }, { "epoch": 37.67, "learning_rate": 0.001, "loss": 2.5081, "step": 196188 }, { "epoch": 37.67, "learning_rate": 0.001, "loss": 2.5133, "step": 196200 }, { "epoch": 37.68, "learning_rate": 0.001, "loss": 2.502, "step": 196212 }, { "epoch": 37.68, "learning_rate": 0.001, "loss": 2.4978, "step": 196224 }, { "epoch": 37.68, "learning_rate": 0.001, "loss": 2.4956, "step": 196236 }, { "epoch": 37.68, "learning_rate": 0.001, "loss": 2.495, "step": 196248 }, { "epoch": 37.68, "eval_ag_news_accuracy": 0.328875, "eval_ag_news_bleu_score": 5.191943237557456, "eval_ag_news_bleu_score_sem": 0.1691118400321775, "eval_ag_news_emb_cos_sim": 0.8163424730300903, "eval_ag_news_emb_cos_sim_sem": 0.00759417128847423, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.471841335296631, "eval_ag_news_n_ngrams_match_1": 14.508, "eval_ag_news_n_ngrams_match_2": 3.338, "eval_ag_news_n_ngrams_match_3": 1.0, "eval_ag_news_num_pred_words": 46.768, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.19597147645399, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36278319533389847, "eval_ag_news_runtime": 10.5807, "eval_ag_news_samples_per_second": 47.256, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.3630105993233433, "eval_ag_news_token_set_f1_sem": 0.004483880190431542, "eval_ag_news_token_set_precision": 0.34978741208910036, "eval_ag_news_token_set_recall": 0.3899184770063757, "eval_ag_news_true_num_tokens": 56.09375, "step": 196250 }, { "epoch": 37.68, "eval_anthropic_toxic_prompts_accuracy": 0.11528125, "eval_anthropic_toxic_prompts_bleu_score": 3.3222841018869005, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12694519578173327, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6914851665496826, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008522632116854811, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2299375534057617, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.356, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.058, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.772, "eval_anthropic_toxic_prompts_num_pred_words": 47.128, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.278078391770503, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2169343396150039, "eval_anthropic_toxic_prompts_runtime": 10.1541, "eval_anthropic_toxic_prompts_samples_per_second": 49.241, "eval_anthropic_toxic_prompts_steps_per_second": 0.098, "eval_anthropic_toxic_prompts_token_set_f1": 0.36212943352384325, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006544156448989002, "eval_anthropic_toxic_prompts_token_set_precision": 0.45090677936596274, "eval_anthropic_toxic_prompts_token_set_recall": 0.3282954790726491, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 196250 }, { "epoch": 37.68, "eval_arxiv_accuracy": 0.350625, "eval_arxiv_bleu_score": 4.348194417630073, "eval_arxiv_bleu_score_sem": 0.12087874354688675, "eval_arxiv_emb_cos_sim": 0.7824056148529053, "eval_arxiv_emb_cos_sim_sem": 0.006829816458760404, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3426730632781982, "eval_arxiv_n_ngrams_match_1": 15.338, "eval_arxiv_n_ngrams_match_2": 3.0, "eval_arxiv_n_ngrams_match_3": 0.636, "eval_arxiv_num_pred_words": 40.22, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.294659123238933, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3704195679433676, "eval_arxiv_runtime": 10.5147, "eval_arxiv_samples_per_second": 47.552, "eval_arxiv_steps_per_second": 0.095, "eval_arxiv_token_set_f1": 0.35889324267200223, "eval_arxiv_token_set_f1_sem": 0.004168044743930564, "eval_arxiv_token_set_precision": 0.3110964680727157, "eval_arxiv_token_set_recall": 0.44507439509782587, "eval_arxiv_true_num_tokens": 64.0, "step": 196250 }, { "epoch": 37.68, "eval_python_code_alpaca_accuracy": 0.16015625, "eval_python_code_alpaca_bleu_score": 4.470202827015937, "eval_python_code_alpaca_bleu_score_sem": 0.1369550803608607, "eval_python_code_alpaca_emb_cos_sim": 0.7453908920288086, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009904095175564575, "eval_python_code_alpaca_emb_top1_equal": 0.1953125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.862358808517456, "eval_python_code_alpaca_n_ngrams_match_1": 9.532, "eval_python_code_alpaca_n_ngrams_match_2": 2.864, "eval_python_code_alpaca_n_ngrams_match_3": 0.958, "eval_python_code_alpaca_num_pred_words": 43.6, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.5027639510361, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3252311283517712, "eval_python_code_alpaca_runtime": 10.0341, "eval_python_code_alpaca_samples_per_second": 49.83, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.47097496401311145, "eval_python_code_alpaca_token_set_f1_sem": 0.005564428237760059, "eval_python_code_alpaca_token_set_precision": 0.5172152055187773, "eval_python_code_alpaca_token_set_recall": 0.4558242906613513, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 196250 }, { "epoch": 37.68, "eval_wikibio_accuracy": 0.32784375, "eval_wikibio_bleu_score": 6.044218406578306, "eval_wikibio_bleu_score_sem": 0.20489907065935145, "eval_wikibio_emb_cos_sim": 0.7580497860908508, "eval_wikibio_emb_cos_sim_sem": 0.00828936643250422, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6197497844696045, "eval_wikibio_n_ngrams_match_1": 10.322, "eval_wikibio_n_ngrams_match_2": 3.464, "eval_wikibio_n_ngrams_match_3": 1.268, "eval_wikibio_num_pred_words": 36.502, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.328226551431605, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3640825541645696, "eval_wikibio_runtime": 10.3425, "eval_wikibio_samples_per_second": 48.344, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3264149641025879, "eval_wikibio_token_set_f1_sem": 0.005387873059022707, "eval_wikibio_token_set_precision": 0.33619901291779125, "eval_wikibio_token_set_recall": 0.3336449063810576, "eval_wikibio_true_num_tokens": 61.1328125, "step": 196250 }, { "epoch": 37.68, "eval_nq_accuracy": 0.534875, "eval_nq_bleu_score": 11.945385418077185, "eval_nq_bleu_score_sem": 0.48931319491620795, "eval_nq_emb_cos_sim": 0.8394231200218201, "eval_nq_emb_cos_sim_sem": 0.00646038879485029, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.129955530166626, "eval_nq_n_ngrams_match_1": 23.596, "eval_nq_n_ngrams_match_2": 8.612, "eval_nq_n_ngrams_match_3": 3.958, "eval_nq_num_pred_words": 49.094, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.414492612035515, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45782448353581756, "eval_nq_runtime": 10.419, "eval_nq_samples_per_second": 47.989, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4699820335881326, "eval_nq_token_set_f1_sem": 0.004957978387288264, "eval_nq_token_set_precision": 0.4284136995609277, "eval_nq_token_set_recall": 0.5287816983017264, "eval_nq_true_num_tokens": 64.0, "step": 196250 }, { "epoch": 37.68, "learning_rate": 0.001, "loss": 2.5105, "step": 196260 }, { "epoch": 37.69, "learning_rate": 0.001, "loss": 2.5141, "step": 196272 }, { "epoch": 37.69, "learning_rate": 0.001, "loss": 2.4893, "step": 196284 }, { "epoch": 37.69, "learning_rate": 0.001, "loss": 2.5117, "step": 196296 }, { "epoch": 37.69, "learning_rate": 0.001, "loss": 2.4995, "step": 196308 }, { "epoch": 37.7, "learning_rate": 0.001, "loss": 2.5066, "step": 196320 }, { "epoch": 37.7, "learning_rate": 0.001, "loss": 2.508, "step": 196332 }, { "epoch": 37.7, "learning_rate": 0.001, "loss": 2.504, "step": 196344 }, { "epoch": 37.7, "learning_rate": 0.001, "loss": 2.5158, "step": 196356 }, { "epoch": 37.71, "learning_rate": 0.001, "loss": 2.5004, "step": 196368 }, { "epoch": 37.71, "learning_rate": 0.001, "loss": 2.5082, "step": 196380 }, { "epoch": 37.71, "learning_rate": 0.001, "loss": 2.5096, "step": 196392 }, { "epoch": 37.71, "learning_rate": 0.001, "loss": 2.5096, "step": 196404 }, { "epoch": 37.71, "learning_rate": 0.001, "loss": 2.5088, "step": 196416 }, { "epoch": 37.72, "learning_rate": 0.001, "loss": 2.5057, "step": 196428 }, { "epoch": 37.72, "learning_rate": 0.001, "loss": 2.4962, "step": 196440 }, { "epoch": 37.72, "learning_rate": 0.001, "loss": 2.5054, "step": 196452 }, { "epoch": 37.72, "learning_rate": 0.001, "loss": 2.5043, "step": 196464 }, { "epoch": 37.73, "learning_rate": 0.001, "loss": 2.5033, "step": 196476 }, { "epoch": 37.73, "learning_rate": 0.001, "loss": 2.5048, "step": 196488 }, { "epoch": 37.73, "learning_rate": 0.001, "loss": 2.5022, "step": 196500 }, { "epoch": 37.73, "learning_rate": 0.001, "loss": 2.5115, "step": 196512 }, { "epoch": 37.74, "learning_rate": 0.001, "loss": 2.5017, "step": 196524 }, { "epoch": 37.74, "learning_rate": 0.001, "loss": 2.4968, "step": 196536 }, { "epoch": 37.74, "learning_rate": 0.001, "loss": 2.503, "step": 196548 }, { "epoch": 37.74, "learning_rate": 0.001, "loss": 2.5054, "step": 196560 }, { "epoch": 37.74, "learning_rate": 0.001, "loss": 2.5055, "step": 196572 }, { "epoch": 37.75, "learning_rate": 0.001, "loss": 2.503, "step": 196584 }, { "epoch": 37.75, "learning_rate": 0.001, "loss": 2.4883, "step": 196596 }, { "epoch": 37.75, "learning_rate": 0.001, "loss": 2.5029, "step": 196608 }, { "epoch": 37.75, "learning_rate": 0.001, "loss": 2.507, "step": 196620 }, { "epoch": 37.76, "learning_rate": 0.001, "loss": 2.5024, "step": 196632 }, { "epoch": 37.76, "learning_rate": 0.001, "loss": 2.5048, "step": 196644 }, { "epoch": 37.76, "learning_rate": 0.001, "loss": 2.5113, "step": 196656 }, { "epoch": 37.76, "learning_rate": 0.001, "loss": 2.5095, "step": 196668 }, { "epoch": 37.76, "learning_rate": 0.001, "loss": 2.5036, "step": 196680 }, { "epoch": 37.77, "learning_rate": 0.001, "loss": 2.5055, "step": 196692 }, { "epoch": 37.77, "learning_rate": 0.001, "loss": 2.4991, "step": 196704 }, { "epoch": 37.77, "learning_rate": 0.001, "loss": 2.501, "step": 196716 }, { "epoch": 37.77, "learning_rate": 0.001, "loss": 2.5099, "step": 196728 }, { "epoch": 37.78, "learning_rate": 0.001, "loss": 2.4975, "step": 196740 }, { "epoch": 37.78, "learning_rate": 0.001, "loss": 2.4949, "step": 196752 }, { "epoch": 37.78, "learning_rate": 0.001, "loss": 2.5035, "step": 196764 }, { "epoch": 37.78, "learning_rate": 0.001, "loss": 2.5056, "step": 196776 }, { "epoch": 37.79, "learning_rate": 0.001, "loss": 2.5017, "step": 196788 }, { "epoch": 37.79, "learning_rate": 0.001, "loss": 2.498, "step": 196800 }, { "epoch": 37.79, "learning_rate": 0.001, "loss": 2.5117, "step": 196812 }, { "epoch": 37.79, "learning_rate": 0.001, "loss": 2.5073, "step": 196824 }, { "epoch": 37.79, "learning_rate": 0.001, "loss": 2.5101, "step": 196836 }, { "epoch": 37.8, "learning_rate": 0.001, "loss": 2.497, "step": 196848 }, { "epoch": 37.8, "learning_rate": 0.001, "loss": 2.5096, "step": 196860 }, { "epoch": 37.8, "learning_rate": 0.001, "loss": 2.4981, "step": 196872 }, { "epoch": 37.8, "eval_ag_news_accuracy": 0.32965625, "eval_ag_news_bleu_score": 5.034891070714903, "eval_ag_news_bleu_score_sem": 0.16478578625660043, "eval_ag_news_emb_cos_sim": 0.82633376121521, "eval_ag_news_emb_cos_sim_sem": 0.006461327879318673, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4684040546417236, "eval_ag_news_n_ngrams_match_1": 14.482, "eval_ag_news_n_ngrams_match_2": 3.242, "eval_ag_news_n_ngrams_match_3": 0.932, "eval_ag_news_num_pred_words": 46.71, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.08549486486442, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3626753752710302, "eval_ag_news_runtime": 11.0597, "eval_ag_news_samples_per_second": 45.209, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.3601345675263891, "eval_ag_news_token_set_f1_sem": 0.004389001993053166, "eval_ag_news_token_set_precision": 0.3465120254155709, "eval_ag_news_token_set_recall": 0.3890541211196841, "eval_ag_news_true_num_tokens": 56.09375, "step": 196875 }, { "epoch": 37.8, "eval_anthropic_toxic_prompts_accuracy": 0.11653125, "eval_anthropic_toxic_prompts_bleu_score": 3.297903386605315, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1217828454410264, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6917194724082947, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008183295327147951, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2022769451141357, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.354, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.064, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.8, "eval_anthropic_toxic_prompts_num_pred_words": 47.47, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.588453064367823, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21787486151933474, "eval_anthropic_toxic_prompts_runtime": 13.2722, "eval_anthropic_toxic_prompts_samples_per_second": 37.673, "eval_anthropic_toxic_prompts_steps_per_second": 0.075, "eval_anthropic_toxic_prompts_token_set_f1": 0.3601464756738046, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006612663323356015, "eval_anthropic_toxic_prompts_token_set_precision": 0.45008079394599904, "eval_anthropic_toxic_prompts_token_set_recall": 0.32665501895132754, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 196875 }, { "epoch": 37.8, "eval_arxiv_accuracy": 0.35278125, "eval_arxiv_bleu_score": 4.485032326418497, "eval_arxiv_bleu_score_sem": 0.13034045040757414, "eval_arxiv_emb_cos_sim": 0.782996416091919, "eval_arxiv_emb_cos_sim_sem": 0.006943852001396215, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3341031074523926, "eval_arxiv_n_ngrams_match_1": 15.504, "eval_arxiv_n_ngrams_match_2": 3.122, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 40.666, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.053211221116968, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3725994041119809, "eval_arxiv_runtime": 9.9953, "eval_arxiv_samples_per_second": 50.024, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.3624400607030841, "eval_arxiv_token_set_f1_sem": 0.004251640996338694, "eval_arxiv_token_set_precision": 0.31597757531856513, "eval_arxiv_token_set_recall": 0.44393532148918696, "eval_arxiv_true_num_tokens": 64.0, "step": 196875 }, { "epoch": 37.8, "eval_python_code_alpaca_accuracy": 0.16140625, "eval_python_code_alpaca_bleu_score": 4.850083712381379, "eval_python_code_alpaca_bleu_score_sem": 0.14978757826362565, "eval_python_code_alpaca_emb_cos_sim": 0.7660810947418213, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007439076812854712, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8679707050323486, "eval_python_code_alpaca_n_ngrams_match_1": 10.138, "eval_python_code_alpaca_n_ngrams_match_2": 3.116, "eval_python_code_alpaca_n_ngrams_match_3": 1.092, "eval_python_code_alpaca_num_pred_words": 44.432, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.601263777964043, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33948634462601757, "eval_python_code_alpaca_runtime": 9.9249, "eval_python_code_alpaca_samples_per_second": 50.378, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.49157670046570423, "eval_python_code_alpaca_token_set_f1_sem": 0.005254068065986003, "eval_python_code_alpaca_token_set_precision": 0.555358362906108, "eval_python_code_alpaca_token_set_recall": 0.46388114306897893, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 196875 }, { "epoch": 37.8, "eval_wikibio_accuracy": 0.33021875, "eval_wikibio_bleu_score": 6.139729124755908, "eval_wikibio_bleu_score_sem": 0.2158405026929467, "eval_wikibio_emb_cos_sim": 0.7544930577278137, "eval_wikibio_emb_cos_sim_sem": 0.00775497468643345, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6849443912506104, "eval_wikibio_n_ngrams_match_1": 10.18, "eval_wikibio_n_ngrams_match_2": 3.426, "eval_wikibio_n_ngrams_match_3": 1.29, "eval_wikibio_num_pred_words": 36.176, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.84290677403874, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36006466597621756, "eval_wikibio_runtime": 9.935, "eval_wikibio_samples_per_second": 50.327, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.32254195008755326, "eval_wikibio_token_set_f1_sem": 0.005338669202045662, "eval_wikibio_token_set_precision": 0.3309832938014916, "eval_wikibio_token_set_recall": 0.32899516700243975, "eval_wikibio_true_num_tokens": 61.1328125, "step": 196875 }, { "epoch": 37.8, "eval_nq_accuracy": 0.535375, "eval_nq_bleu_score": 12.135138218921748, "eval_nq_bleu_score_sem": 0.48553437251501824, "eval_nq_emb_cos_sim": 0.8353713750839233, "eval_nq_emb_cos_sim_sem": 0.007257361334044501, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1225414276123047, "eval_nq_n_ngrams_match_1": 23.664, "eval_nq_n_ngrams_match_2": 8.804, "eval_nq_n_ngrams_match_3": 4.082, "eval_nq_num_pred_words": 49.356, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.352337398149128, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4584016645809208, "eval_nq_runtime": 26.5183, "eval_nq_samples_per_second": 18.855, "eval_nq_steps_per_second": 0.038, "eval_nq_token_set_f1": 0.4715105559238299, "eval_nq_token_set_f1_sem": 0.00483828325260912, "eval_nq_token_set_precision": 0.43156210457097177, "eval_nq_token_set_recall": 0.5291180792544479, "eval_nq_true_num_tokens": 64.0, "step": 196875 }, { "epoch": 37.8, "learning_rate": 0.001, "loss": 2.5051, "step": 196884 }, { "epoch": 37.81, "learning_rate": 0.001, "loss": 2.5069, "step": 196896 }, { "epoch": 37.81, "learning_rate": 0.001, "loss": 2.5071, "step": 196908 }, { "epoch": 37.81, "learning_rate": 0.001, "loss": 2.5061, "step": 196920 }, { "epoch": 37.81, "learning_rate": 0.001, "loss": 2.5085, "step": 196932 }, { "epoch": 37.82, "learning_rate": 0.001, "loss": 2.5031, "step": 196944 }, { "epoch": 37.82, "learning_rate": 0.001, "loss": 2.5057, "step": 196956 }, { "epoch": 37.82, "learning_rate": 0.001, "loss": 2.4908, "step": 196968 }, { "epoch": 37.82, "learning_rate": 0.001, "loss": 2.5014, "step": 196980 }, { "epoch": 37.82, "learning_rate": 0.001, "loss": 2.5001, "step": 196992 }, { "epoch": 37.83, "learning_rate": 0.001, "loss": 2.5052, "step": 197004 }, { "epoch": 37.83, "learning_rate": 0.001, "loss": 2.5057, "step": 197016 }, { "epoch": 37.83, "learning_rate": 0.001, "loss": 2.5001, "step": 197028 }, { "epoch": 37.83, "learning_rate": 0.001, "loss": 2.5075, "step": 197040 }, { "epoch": 37.84, "learning_rate": 0.001, "loss": 2.5087, "step": 197052 }, { "epoch": 37.84, "learning_rate": 0.001, "loss": 2.5112, "step": 197064 }, { "epoch": 37.84, "learning_rate": 0.001, "loss": 2.5027, "step": 197076 }, { "epoch": 37.84, "learning_rate": 0.001, "loss": 2.5139, "step": 197088 }, { "epoch": 37.85, "learning_rate": 0.001, "loss": 2.5062, "step": 197100 }, { "epoch": 37.85, "learning_rate": 0.001, "loss": 2.5088, "step": 197112 }, { "epoch": 37.85, "learning_rate": 0.001, "loss": 2.4982, "step": 197124 }, { "epoch": 37.85, "learning_rate": 0.001, "loss": 2.5188, "step": 197136 }, { "epoch": 37.85, "learning_rate": 0.001, "loss": 2.5116, "step": 197148 }, { "epoch": 37.86, "learning_rate": 0.001, "loss": 2.5012, "step": 197160 }, { "epoch": 37.86, "learning_rate": 0.001, "loss": 2.5074, "step": 197172 }, { "epoch": 37.86, "learning_rate": 0.001, "loss": 2.5086, "step": 197184 }, { "epoch": 37.86, "learning_rate": 0.001, "loss": 2.5072, "step": 197196 }, { "epoch": 37.87, "learning_rate": 0.001, "loss": 2.5062, "step": 197208 }, { "epoch": 37.87, "learning_rate": 0.001, "loss": 2.5081, "step": 197220 }, { "epoch": 37.87, "learning_rate": 0.001, "loss": 2.5029, "step": 197232 }, { "epoch": 37.87, "learning_rate": 0.001, "loss": 2.5056, "step": 197244 }, { "epoch": 37.88, "learning_rate": 0.001, "loss": 2.5012, "step": 197256 }, { "epoch": 37.88, "learning_rate": 0.001, "loss": 2.5094, "step": 197268 }, { "epoch": 37.88, "learning_rate": 0.001, "loss": 2.5047, "step": 197280 }, { "epoch": 37.88, "learning_rate": 0.001, "loss": 2.4975, "step": 197292 }, { "epoch": 37.88, "learning_rate": 0.001, "loss": 2.5057, "step": 197304 }, { "epoch": 37.89, "learning_rate": 0.001, "loss": 2.5012, "step": 197316 }, { "epoch": 37.89, "learning_rate": 0.001, "loss": 2.5097, "step": 197328 }, { "epoch": 37.89, "learning_rate": 0.001, "loss": 2.5105, "step": 197340 }, { "epoch": 37.89, "learning_rate": 0.001, "loss": 2.5003, "step": 197352 }, { "epoch": 37.9, "learning_rate": 0.001, "loss": 2.5104, "step": 197364 }, { "epoch": 37.9, "learning_rate": 0.001, "loss": 2.5011, "step": 197376 }, { "epoch": 37.9, "learning_rate": 0.001, "loss": 2.5033, "step": 197388 }, { "epoch": 37.9, "learning_rate": 0.001, "loss": 2.5005, "step": 197400 }, { "epoch": 37.91, "learning_rate": 0.001, "loss": 2.4963, "step": 197412 }, { "epoch": 37.91, "learning_rate": 0.001, "loss": 2.5069, "step": 197424 }, { "epoch": 37.91, "learning_rate": 0.001, "loss": 2.5056, "step": 197436 }, { "epoch": 37.91, "learning_rate": 0.001, "loss": 2.5103, "step": 197448 }, { "epoch": 37.91, "learning_rate": 0.001, "loss": 2.5027, "step": 197460 }, { "epoch": 37.92, "learning_rate": 0.001, "loss": 2.5046, "step": 197472 }, { "epoch": 37.92, "learning_rate": 0.001, "loss": 2.5009, "step": 197484 }, { "epoch": 37.92, "learning_rate": 0.001, "loss": 2.5115, "step": 197496 }, { "epoch": 37.92, "eval_ag_news_accuracy": 0.32834375, "eval_ag_news_bleu_score": 4.861613269007893, "eval_ag_news_bleu_score_sem": 0.159717906672351, "eval_ag_news_emb_cos_sim": 0.8243966102600098, "eval_ag_news_emb_cos_sim_sem": 0.006059606509826103, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4781150817871094, "eval_ag_news_n_ngrams_match_1": 14.382, "eval_ag_news_n_ngrams_match_2": 3.182, "eval_ag_news_n_ngrams_match_3": 0.932, "eval_ag_news_num_pred_words": 47.022, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.39859578166829, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35717536589227183, "eval_ag_news_runtime": 11.6284, "eval_ag_news_samples_per_second": 42.998, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.35674726323291067, "eval_ag_news_token_set_f1_sem": 0.004488013295578366, "eval_ag_news_token_set_precision": 0.3437195601210618, "eval_ag_news_token_set_recall": 0.38333595013475924, "eval_ag_news_true_num_tokens": 56.09375, "step": 197500 }, { "epoch": 37.92, "eval_anthropic_toxic_prompts_accuracy": 0.11721875, "eval_anthropic_toxic_prompts_bleu_score": 3.2591161537375735, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12229344402211764, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6847412586212158, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008042622317708746, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2084994316101074, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.376, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.992, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.756, "eval_anthropic_toxic_prompts_num_pred_words": 47.182, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.741931394722084, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22045067726602688, "eval_anthropic_toxic_prompts_runtime": 9.7483, "eval_anthropic_toxic_prompts_samples_per_second": 51.291, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.36311138438462426, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006420653005946579, "eval_anthropic_toxic_prompts_token_set_precision": 0.4520250464711051, "eval_anthropic_toxic_prompts_token_set_recall": 0.3285887161972419, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 197500 }, { "epoch": 37.92, "eval_arxiv_accuracy": 0.35121875, "eval_arxiv_bleu_score": 4.468312128326134, "eval_arxiv_bleu_score_sem": 0.13089383375985866, "eval_arxiv_emb_cos_sim": 0.7762400507926941, "eval_arxiv_emb_cos_sim_sem": 0.007188606653233073, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3464467525482178, "eval_arxiv_n_ngrams_match_1": 15.598, "eval_arxiv_n_ngrams_match_2": 3.082, "eval_arxiv_n_ngrams_match_3": 0.69, "eval_arxiv_num_pred_words": 40.828, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.401636096747204, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37067769859652583, "eval_arxiv_runtime": 27.5747, "eval_arxiv_samples_per_second": 18.133, "eval_arxiv_steps_per_second": 0.036, "eval_arxiv_token_set_f1": 0.36355016903606596, "eval_arxiv_token_set_f1_sem": 0.0042682182617868915, "eval_arxiv_token_set_precision": 0.31657228234954976, "eval_arxiv_token_set_recall": 0.4444480168321842, "eval_arxiv_true_num_tokens": 64.0, "step": 197500 }, { "epoch": 37.92, "eval_python_code_alpaca_accuracy": 0.162375, "eval_python_code_alpaca_bleu_score": 4.741918729269589, "eval_python_code_alpaca_bleu_score_sem": 0.14631391556279452, "eval_python_code_alpaca_emb_cos_sim": 0.7607436776161194, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008395980178535066, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8623900413513184, "eval_python_code_alpaca_n_ngrams_match_1": 9.914, "eval_python_code_alpaca_n_ngrams_match_2": 2.994, "eval_python_code_alpaca_n_ngrams_match_3": 1.008, "eval_python_code_alpaca_num_pred_words": 43.538, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.50331062049169, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3392678720857105, "eval_python_code_alpaca_runtime": 27.1786, "eval_python_code_alpaca_samples_per_second": 18.397, "eval_python_code_alpaca_steps_per_second": 0.037, "eval_python_code_alpaca_token_set_f1": 0.4887016008546284, "eval_python_code_alpaca_token_set_f1_sem": 0.005246184540964671, "eval_python_code_alpaca_token_set_precision": 0.5423432729635894, "eval_python_code_alpaca_token_set_recall": 0.4675765806219998, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 197500 }, { "epoch": 37.92, "eval_wikibio_accuracy": 0.32940625, "eval_wikibio_bleu_score": 6.266780440032098, "eval_wikibio_bleu_score_sem": 0.2138327169571218, "eval_wikibio_emb_cos_sim": 0.7499379515647888, "eval_wikibio_emb_cos_sim_sem": 0.008426136907189118, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.647045135498047, "eval_wikibio_n_ngrams_match_1": 10.314, "eval_wikibio_n_ngrams_match_2": 3.534, "eval_wikibio_n_ngrams_match_3": 1.322, "eval_wikibio_num_pred_words": 36.368, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.3611464242512, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3688827438497856, "eval_wikibio_runtime": 10.1861, "eval_wikibio_samples_per_second": 49.086, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.32924503510272013, "eval_wikibio_token_set_f1_sem": 0.005265673392254785, "eval_wikibio_token_set_precision": 0.3362216882441393, "eval_wikibio_token_set_recall": 0.3372910414871243, "eval_wikibio_true_num_tokens": 61.1328125, "step": 197500 }, { "epoch": 37.92, "eval_nq_accuracy": 0.5363125, "eval_nq_bleu_score": 12.215521925519054, "eval_nq_bleu_score_sem": 0.47856955015150066, "eval_nq_emb_cos_sim": 0.8401731252670288, "eval_nq_emb_cos_sim_sem": 0.006614980142465709, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.127146005630493, "eval_nq_n_ngrams_match_1": 23.614, "eval_nq_n_ngrams_match_2": 8.824, "eval_nq_n_ngrams_match_3": 4.06, "eval_nq_num_pred_words": 49.22, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.390885067099962, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45807987506689185, "eval_nq_runtime": 10.7838, "eval_nq_samples_per_second": 46.366, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.4720235415433743, "eval_nq_token_set_f1_sem": 0.004795410289763898, "eval_nq_token_set_precision": 0.4301537191288202, "eval_nq_token_set_recall": 0.5307444854441206, "eval_nq_true_num_tokens": 64.0, "step": 197500 }, { "epoch": 37.92, "learning_rate": 0.001, "loss": 2.5086, "step": 197508 }, { "epoch": 37.93, "learning_rate": 0.001, "loss": 2.5157, "step": 197520 }, { "epoch": 37.93, "learning_rate": 0.001, "loss": 2.4954, "step": 197532 }, { "epoch": 37.93, "learning_rate": 0.001, "loss": 2.5115, "step": 197544 }, { "epoch": 37.93, "learning_rate": 0.001, "loss": 2.5018, "step": 197556 }, { "epoch": 37.94, "learning_rate": 0.001, "loss": 2.5104, "step": 197568 }, { "epoch": 37.94, "learning_rate": 0.001, "loss": 2.5074, "step": 197580 }, { "epoch": 37.94, "learning_rate": 0.001, "loss": 2.5075, "step": 197592 }, { "epoch": 37.94, "learning_rate": 0.001, "loss": 2.5028, "step": 197604 }, { "epoch": 37.94, "learning_rate": 0.001, "loss": 2.5151, "step": 197616 }, { "epoch": 37.95, "learning_rate": 0.001, "loss": 2.505, "step": 197628 }, { "epoch": 37.95, "learning_rate": 0.001, "loss": 2.496, "step": 197640 }, { "epoch": 37.95, "learning_rate": 0.001, "loss": 2.5097, "step": 197652 }, { "epoch": 37.95, "learning_rate": 0.001, "loss": 2.5132, "step": 197664 }, { "epoch": 37.96, "learning_rate": 0.001, "loss": 2.5036, "step": 197676 }, { "epoch": 37.96, "learning_rate": 0.001, "loss": 2.5034, "step": 197688 }, { "epoch": 37.96, "learning_rate": 0.001, "loss": 2.499, "step": 197700 }, { "epoch": 37.96, "learning_rate": 0.001, "loss": 2.506, "step": 197712 }, { "epoch": 37.97, "learning_rate": 0.001, "loss": 2.504, "step": 197724 }, { "epoch": 37.97, "learning_rate": 0.001, "loss": 2.5077, "step": 197736 }, { "epoch": 37.97, "learning_rate": 0.001, "loss": 2.5041, "step": 197748 }, { "epoch": 37.97, "learning_rate": 0.001, "loss": 2.516, "step": 197760 }, { "epoch": 37.97, "learning_rate": 0.001, "loss": 2.5077, "step": 197772 }, { "epoch": 37.98, "learning_rate": 0.001, "loss": 2.4978, "step": 197784 }, { "epoch": 37.98, "learning_rate": 0.001, "loss": 2.5104, "step": 197796 }, { "epoch": 37.98, "learning_rate": 0.001, "loss": 2.5091, "step": 197808 }, { "epoch": 37.98, "learning_rate": 0.001, "loss": 2.498, "step": 197820 }, { "epoch": 37.99, "learning_rate": 0.001, "loss": 2.5056, "step": 197832 }, { "epoch": 37.99, "learning_rate": 0.001, "loss": 2.5123, "step": 197844 }, { "epoch": 37.99, "learning_rate": 0.001, "loss": 2.5084, "step": 197856 }, { "epoch": 37.99, "learning_rate": 0.001, "loss": 2.5019, "step": 197868 }, { "epoch": 38.0, "learning_rate": 0.001, "loss": 2.511, "step": 197880 }, { "epoch": 38.0, "learning_rate": 0.001, "loss": 2.5085, "step": 197892 }, { "epoch": 38.0, "learning_rate": 0.001, "loss": 2.4936, "step": 197904 }, { "epoch": 38.0, "learning_rate": 0.001, "loss": 2.4949, "step": 197916 }, { "epoch": 38.0, "learning_rate": 0.001, "loss": 2.4887, "step": 197928 }, { "epoch": 38.01, "learning_rate": 0.001, "loss": 2.4896, "step": 197940 }, { "epoch": 38.01, "learning_rate": 0.001, "loss": 2.4919, "step": 197952 }, { "epoch": 38.01, "learning_rate": 0.001, "loss": 2.4899, "step": 197964 }, { "epoch": 38.01, "learning_rate": 0.001, "loss": 2.495, "step": 197976 }, { "epoch": 38.02, "learning_rate": 0.001, "loss": 2.4998, "step": 197988 }, { "epoch": 38.02, "learning_rate": 0.001, "loss": 2.502, "step": 198000 }, { "epoch": 38.02, "learning_rate": 0.001, "loss": 2.5027, "step": 198012 }, { "epoch": 38.02, "learning_rate": 0.001, "loss": 2.498, "step": 198024 }, { "epoch": 38.03, "learning_rate": 0.001, "loss": 2.4971, "step": 198036 }, { "epoch": 38.03, "learning_rate": 0.001, "loss": 2.4879, "step": 198048 }, { "epoch": 38.03, "learning_rate": 0.001, "loss": 2.4933, "step": 198060 }, { "epoch": 38.03, "learning_rate": 0.001, "loss": 2.4915, "step": 198072 }, { "epoch": 38.03, "learning_rate": 0.001, "loss": 2.4941, "step": 198084 }, { "epoch": 38.04, "learning_rate": 0.001, "loss": 2.4966, "step": 198096 }, { "epoch": 38.04, "learning_rate": 0.001, "loss": 2.5091, "step": 198108 }, { "epoch": 38.04, "learning_rate": 0.001, "loss": 2.4987, "step": 198120 }, { "epoch": 38.04, "eval_ag_news_accuracy": 0.3280625, "eval_ag_news_bleu_score": 4.910747375853292, "eval_ag_news_bleu_score_sem": 0.16110935687850506, "eval_ag_news_emb_cos_sim": 0.8183833360671997, "eval_ag_news_emb_cos_sim_sem": 0.006989458184207542, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4828641414642334, "eval_ag_news_n_ngrams_match_1": 14.482, "eval_ag_news_n_ngrams_match_2": 3.31, "eval_ag_news_n_ngrams_match_3": 0.922, "eval_ag_news_num_pred_words": 47.004, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.552824577500765, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3583929650329284, "eval_ag_news_runtime": 10.2549, "eval_ag_news_samples_per_second": 48.757, "eval_ag_news_steps_per_second": 0.098, "eval_ag_news_token_set_f1": 0.3588915372672611, "eval_ag_news_token_set_f1_sem": 0.004475782302359824, "eval_ag_news_token_set_precision": 0.34898361643974435, "eval_ag_news_token_set_recall": 0.382470079100238, "eval_ag_news_true_num_tokens": 56.09375, "step": 198125 }, { "epoch": 38.04, "eval_anthropic_toxic_prompts_accuracy": 0.1168125, "eval_anthropic_toxic_prompts_bleu_score": 3.301207113242068, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12817840580299233, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6753901243209839, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009142267181127879, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.197187662124634, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.386, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77, "eval_anthropic_toxic_prompts_num_pred_words": 47.264, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.463633359260893, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22114043342891399, "eval_anthropic_toxic_prompts_runtime": 9.6947, "eval_anthropic_toxic_prompts_samples_per_second": 51.575, "eval_anthropic_toxic_prompts_steps_per_second": 0.103, "eval_anthropic_toxic_prompts_token_set_f1": 0.3624377696756126, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065926288693393, "eval_anthropic_toxic_prompts_token_set_precision": 0.4477993879178638, "eval_anthropic_toxic_prompts_token_set_recall": 0.32928570147706593, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 198125 }, { "epoch": 38.04, "eval_arxiv_accuracy": 0.35209375, "eval_arxiv_bleu_score": 4.491844717531946, "eval_arxiv_bleu_score_sem": 0.13040605317656828, "eval_arxiv_emb_cos_sim": 0.7792878746986389, "eval_arxiv_emb_cos_sim_sem": 0.008038685538640025, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3622093200683594, "eval_arxiv_n_ngrams_match_1": 15.868, "eval_arxiv_n_ngrams_match_2": 3.126, "eval_arxiv_n_ngrams_match_3": 0.662, "eval_arxiv_num_pred_words": 41.138, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.852865729566105, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37843141426570354, "eval_arxiv_runtime": 12.4725, "eval_arxiv_samples_per_second": 40.088, "eval_arxiv_steps_per_second": 0.08, "eval_arxiv_token_set_f1": 0.3697806797510946, "eval_arxiv_token_set_f1_sem": 0.004175549794410236, "eval_arxiv_token_set_precision": 0.32548584849093193, "eval_arxiv_token_set_recall": 0.44412619507525863, "eval_arxiv_true_num_tokens": 64.0, "step": 198125 }, { "epoch": 38.04, "eval_python_code_alpaca_accuracy": 0.16153125, "eval_python_code_alpaca_bleu_score": 4.5971349748079025, "eval_python_code_alpaca_bleu_score_sem": 0.14031230680149287, "eval_python_code_alpaca_emb_cos_sim": 0.7603492736816406, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008283170845773118, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8606696128845215, "eval_python_code_alpaca_n_ngrams_match_1": 9.908, "eval_python_code_alpaca_n_ngrams_match_2": 2.896, "eval_python_code_alpaca_n_ngrams_match_3": 0.972, "eval_python_code_alpaca_num_pred_words": 43.838, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.47322331558615, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3385536942912206, "eval_python_code_alpaca_runtime": 9.9127, "eval_python_code_alpaca_samples_per_second": 50.44, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4770394134582058, "eval_python_code_alpaca_token_set_f1_sem": 0.00530785938213543, "eval_python_code_alpaca_token_set_precision": 0.5417601021033696, "eval_python_code_alpaca_token_set_recall": 0.4504018587054074, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 198125 }, { "epoch": 38.04, "eval_wikibio_accuracy": 0.33003125, "eval_wikibio_bleu_score": 6.547435990060557, "eval_wikibio_bleu_score_sem": 0.232108214409257, "eval_wikibio_emb_cos_sim": 0.758441686630249, "eval_wikibio_emb_cos_sim_sem": 0.0074278150432237, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6872265338897705, "eval_wikibio_n_ngrams_match_1": 10.488, "eval_wikibio_n_ngrams_match_2": 3.602, "eval_wikibio_n_ngrams_match_3": 1.388, "eval_wikibio_num_pred_words": 36.29, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.933937803844366, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36917191817795747, "eval_wikibio_runtime": 10.1784, "eval_wikibio_samples_per_second": 49.124, "eval_wikibio_steps_per_second": 0.098, "eval_wikibio_token_set_f1": 0.33310988103124406, "eval_wikibio_token_set_f1_sem": 0.005188332268974138, "eval_wikibio_token_set_precision": 0.3417388783542738, "eval_wikibio_token_set_recall": 0.3388453247062207, "eval_wikibio_true_num_tokens": 61.1328125, "step": 198125 }, { "epoch": 38.04, "eval_nq_accuracy": 0.5355625, "eval_nq_bleu_score": 12.033799415002097, "eval_nq_bleu_score_sem": 0.49216421229279367, "eval_nq_emb_cos_sim": 0.8342651128768921, "eval_nq_emb_cos_sim_sem": 0.007439761699143014, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.127384662628174, "eval_nq_n_ngrams_match_1": 23.448, "eval_nq_n_ngrams_match_2": 8.676, "eval_nq_n_ngrams_match_3": 4.052, "eval_nq_num_pred_words": 49.188, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.392887849517473, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4548787648948454, "eval_nq_runtime": 12.62, "eval_nq_samples_per_second": 39.62, "eval_nq_steps_per_second": 0.079, "eval_nq_token_set_f1": 0.4669965182066276, "eval_nq_token_set_f1_sem": 0.00503342605619811, "eval_nq_token_set_precision": 0.42673897101559916, "eval_nq_token_set_recall": 0.5234327128185235, "eval_nq_true_num_tokens": 64.0, "step": 198125 }, { "epoch": 38.04, "learning_rate": 0.001, "loss": 2.5045, "step": 198132 }, { "epoch": 38.05, "learning_rate": 0.001, "loss": 2.5019, "step": 198144 }, { "epoch": 38.05, "learning_rate": 0.001, "loss": 2.4955, "step": 198156 }, { "epoch": 38.05, "learning_rate": 0.001, "loss": 2.505, "step": 198168 }, { "epoch": 38.05, "learning_rate": 0.001, "loss": 2.4914, "step": 198180 }, { "epoch": 38.06, "learning_rate": 0.001, "loss": 2.5056, "step": 198192 }, { "epoch": 38.06, "learning_rate": 0.001, "loss": 2.4971, "step": 198204 }, { "epoch": 38.06, "learning_rate": 0.001, "loss": 2.5082, "step": 198216 }, { "epoch": 38.06, "learning_rate": 0.001, "loss": 2.505, "step": 198228 }, { "epoch": 38.06, "learning_rate": 0.001, "loss": 2.4987, "step": 198240 }, { "epoch": 38.07, "learning_rate": 0.001, "loss": 2.5032, "step": 198252 }, { "epoch": 38.07, "learning_rate": 0.001, "loss": 2.5019, "step": 198264 }, { "epoch": 38.07, "learning_rate": 0.001, "loss": 2.4917, "step": 198276 }, { "epoch": 38.07, "learning_rate": 0.001, "loss": 2.497, "step": 198288 }, { "epoch": 38.08, "learning_rate": 0.001, "loss": 2.4973, "step": 198300 }, { "epoch": 38.08, "learning_rate": 0.001, "loss": 2.5001, "step": 198312 }, { "epoch": 38.08, "learning_rate": 0.001, "loss": 2.4906, "step": 198324 }, { "epoch": 38.08, "learning_rate": 0.001, "loss": 2.4857, "step": 198336 }, { "epoch": 38.09, "learning_rate": 0.001, "loss": 2.4923, "step": 198348 }, { "epoch": 38.09, "learning_rate": 0.001, "loss": 2.4921, "step": 198360 }, { "epoch": 38.09, "learning_rate": 0.001, "loss": 2.498, "step": 198372 }, { "epoch": 38.09, "learning_rate": 0.001, "loss": 2.4905, "step": 198384 }, { "epoch": 38.09, "learning_rate": 0.001, "loss": 2.5061, "step": 198396 }, { "epoch": 38.1, "learning_rate": 0.001, "loss": 2.4902, "step": 198408 }, { "epoch": 38.1, "learning_rate": 0.001, "loss": 2.5041, "step": 198420 }, { "epoch": 38.1, "learning_rate": 0.001, "loss": 2.5027, "step": 198432 }, { "epoch": 38.1, "learning_rate": 0.001, "loss": 2.504, "step": 198444 }, { "epoch": 38.11, "learning_rate": 0.001, "loss": 2.51, "step": 198456 }, { "epoch": 38.11, "learning_rate": 0.001, "loss": 2.5003, "step": 198468 }, { "epoch": 38.11, "learning_rate": 0.001, "loss": 2.5165, "step": 198480 }, { "epoch": 38.11, "learning_rate": 0.001, "loss": 2.5067, "step": 198492 }, { "epoch": 38.12, "learning_rate": 0.001, "loss": 2.5087, "step": 198504 }, { "epoch": 38.12, "learning_rate": 0.001, "loss": 2.4967, "step": 198516 }, { "epoch": 38.12, "learning_rate": 0.001, "loss": 2.5003, "step": 198528 }, { "epoch": 38.12, "learning_rate": 0.001, "loss": 2.5006, "step": 198540 }, { "epoch": 38.12, "learning_rate": 0.001, "loss": 2.5077, "step": 198552 }, { "epoch": 38.13, "learning_rate": 0.001, "loss": 2.5101, "step": 198564 }, { "epoch": 38.13, "learning_rate": 0.001, "loss": 2.5079, "step": 198576 }, { "epoch": 38.13, "learning_rate": 0.001, "loss": 2.5095, "step": 198588 }, { "epoch": 38.13, "learning_rate": 0.001, "loss": 2.5039, "step": 198600 }, { "epoch": 38.14, "learning_rate": 0.001, "loss": 2.5045, "step": 198612 }, { "epoch": 38.14, "learning_rate": 0.001, "loss": 2.5015, "step": 198624 }, { "epoch": 38.14, "learning_rate": 0.001, "loss": 2.5079, "step": 198636 }, { "epoch": 38.14, "learning_rate": 0.001, "loss": 2.5038, "step": 198648 }, { "epoch": 38.15, "learning_rate": 0.001, "loss": 2.4984, "step": 198660 }, { "epoch": 38.15, "learning_rate": 0.001, "loss": 2.5124, "step": 198672 }, { "epoch": 38.15, "learning_rate": 0.001, "loss": 2.4912, "step": 198684 }, { "epoch": 38.15, "learning_rate": 0.001, "loss": 2.5178, "step": 198696 }, { "epoch": 38.15, "learning_rate": 0.001, "loss": 2.5052, "step": 198708 }, { "epoch": 38.16, "learning_rate": 0.001, "loss": 2.5075, "step": 198720 }, { "epoch": 38.16, "learning_rate": 0.001, "loss": 2.5033, "step": 198732 }, { "epoch": 38.16, "learning_rate": 0.001, "loss": 2.5026, "step": 198744 }, { "epoch": 38.16, "eval_ag_news_accuracy": 0.32909375, "eval_ag_news_bleu_score": 4.862735768010554, "eval_ag_news_bleu_score_sem": 0.15753661699455201, "eval_ag_news_emb_cos_sim": 0.8131601214408875, "eval_ag_news_emb_cos_sim_sem": 0.007208148302962952, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.477194309234619, "eval_ag_news_n_ngrams_match_1": 14.306, "eval_ag_news_n_ngrams_match_2": 3.23, "eval_ag_news_n_ngrams_match_3": 0.892, "eval_ag_news_num_pred_words": 46.706, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.36877777384159, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35472643245210533, "eval_ag_news_runtime": 10.3812, "eval_ag_news_samples_per_second": 48.164, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.35454808679556604, "eval_ag_news_token_set_f1_sem": 0.004681066017851176, "eval_ag_news_token_set_precision": 0.3417000046428554, "eval_ag_news_token_set_recall": 0.38523533717129893, "eval_ag_news_true_num_tokens": 56.09375, "step": 198750 }, { "epoch": 38.16, "eval_anthropic_toxic_prompts_accuracy": 0.1173125, "eval_anthropic_toxic_prompts_bleu_score": 3.296478468232397, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12471177678908144, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6765120029449463, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008981230658412833, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2036640644073486, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.35, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.08, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.796, "eval_anthropic_toxic_prompts_num_pred_words": 47.278, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.6225838482655, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21715898210835177, "eval_anthropic_toxic_prompts_runtime": 9.8897, "eval_anthropic_toxic_prompts_samples_per_second": 50.558, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.36368491999903624, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006597898824306445, "eval_anthropic_toxic_prompts_token_set_precision": 0.4442149308136982, "eval_anthropic_toxic_prompts_token_set_recall": 0.33304464530685085, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 198750 }, { "epoch": 38.16, "eval_arxiv_accuracy": 0.35190625, "eval_arxiv_bleu_score": 4.451322987118088, "eval_arxiv_bleu_score_sem": 0.1275649117194889, "eval_arxiv_emb_cos_sim": 0.7822730541229248, "eval_arxiv_emb_cos_sim_sem": 0.007400052782400672, "eval_arxiv_emb_top1_equal": 0.328125, "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.34447979927063, "eval_arxiv_n_ngrams_match_1": 15.59, "eval_arxiv_n_ngrams_match_2": 3.084, "eval_arxiv_n_ngrams_match_3": 0.702, "eval_arxiv_num_pred_words": 41.076, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.34582631115176, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36946012229350955, "eval_arxiv_runtime": 10.0215, "eval_arxiv_samples_per_second": 49.893, "eval_arxiv_steps_per_second": 0.1, "eval_arxiv_token_set_f1": 0.3642653446787465, "eval_arxiv_token_set_f1_sem": 0.0040808309403072215, "eval_arxiv_token_set_precision": 0.317690930807214, "eval_arxiv_token_set_recall": 0.44532565597467866, "eval_arxiv_true_num_tokens": 64.0, "step": 198750 }, { "epoch": 38.16, "eval_python_code_alpaca_accuracy": 0.1618125, "eval_python_code_alpaca_bleu_score": 4.845829567155077, "eval_python_code_alpaca_bleu_score_sem": 0.14953287889336236, "eval_python_code_alpaca_emb_cos_sim": 0.7627565860748291, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007619878890432351, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.866325616836548, "eval_python_code_alpaca_n_ngrams_match_1": 9.982, "eval_python_code_alpaca_n_ngrams_match_2": 3.104, "eval_python_code_alpaca_n_ngrams_match_3": 1.12, "eval_python_code_alpaca_num_pred_words": 43.956, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.5723319509202, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3374877262826108, "eval_python_code_alpaca_runtime": 11.1604, "eval_python_code_alpaca_samples_per_second": 44.801, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.48260256452000383, "eval_python_code_alpaca_token_set_f1_sem": 0.005435004351010331, "eval_python_code_alpaca_token_set_precision": 0.544912521425928, "eval_python_code_alpaca_token_set_recall": 0.45590778148872574, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 198750 }, { "epoch": 38.16, "eval_wikibio_accuracy": 0.3309375, "eval_wikibio_bleu_score": 6.292063292076303, "eval_wikibio_bleu_score_sem": 0.22239109030590454, "eval_wikibio_emb_cos_sim": 0.7575108408927917, "eval_wikibio_emb_cos_sim_sem": 0.008050685536664487, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6480793952941895, "eval_wikibio_n_ngrams_match_1": 10.45, "eval_wikibio_n_ngrams_match_2": 3.588, "eval_wikibio_n_ngrams_match_3": 1.386, "eval_wikibio_num_pred_words": 37.068, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.40084234013815, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36807835747819223, "eval_wikibio_runtime": 10.1277, "eval_wikibio_samples_per_second": 49.37, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.32839154985238966, "eval_wikibio_token_set_f1_sem": 0.005251312121649202, "eval_wikibio_token_set_precision": 0.33912558808325355, "eval_wikibio_token_set_recall": 0.33362293170263874, "eval_wikibio_true_num_tokens": 61.1328125, "step": 198750 }, { "epoch": 38.16, "eval_nq_accuracy": 0.5350625, "eval_nq_bleu_score": 12.04315443277687, "eval_nq_bleu_score_sem": 0.4933023759468999, "eval_nq_emb_cos_sim": 0.8419560790061951, "eval_nq_emb_cos_sim_sem": 0.006557228423293656, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.133126974105835, "eval_nq_n_ngrams_match_1": 23.534, "eval_nq_n_ngrams_match_2": 8.64, "eval_nq_n_ngrams_match_3": 4.022, "eval_nq_num_pred_words": 49.282, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.441221065123727, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45750899903593667, "eval_nq_runtime": 10.3819, "eval_nq_samples_per_second": 48.161, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.46728131934242007, "eval_nq_token_set_f1_sem": 0.005001053555814796, "eval_nq_token_set_precision": 0.4269902145666502, "eval_nq_token_set_recall": 0.5235735900445585, "eval_nq_true_num_tokens": 64.0, "step": 198750 }, { "epoch": 38.16, "learning_rate": 0.001, "loss": 2.509, "step": 198756 }, { "epoch": 38.17, "learning_rate": 0.001, "loss": 2.5075, "step": 198768 }, { "epoch": 38.17, "learning_rate": 0.001, "loss": 2.5069, "step": 198780 }, { "epoch": 38.17, "learning_rate": 0.001, "loss": 2.5067, "step": 198792 }, { "epoch": 38.17, "learning_rate": 0.001, "loss": 2.5069, "step": 198804 }, { "epoch": 38.18, "learning_rate": 0.001, "loss": 2.5129, "step": 198816 }, { "epoch": 38.18, "learning_rate": 0.001, "loss": 2.5062, "step": 198828 }, { "epoch": 38.18, "learning_rate": 0.001, "loss": 2.5066, "step": 198840 }, { "epoch": 38.18, "learning_rate": 0.001, "loss": 2.5047, "step": 198852 }, { "epoch": 38.18, "learning_rate": 0.001, "loss": 2.5112, "step": 198864 }, { "epoch": 38.19, "learning_rate": 0.001, "loss": 2.5094, "step": 198876 }, { "epoch": 38.19, "learning_rate": 0.001, "loss": 2.503, "step": 198888 }, { "epoch": 38.19, "learning_rate": 0.001, "loss": 2.5014, "step": 198900 }, { "epoch": 38.19, "learning_rate": 0.001, "loss": 2.5023, "step": 198912 }, { "epoch": 38.2, "learning_rate": 0.001, "loss": 2.5043, "step": 198924 }, { "epoch": 38.2, "learning_rate": 0.001, "loss": 2.5033, "step": 198936 }, { "epoch": 38.2, "learning_rate": 0.001, "loss": 2.5087, "step": 198948 }, { "epoch": 38.2, "learning_rate": 0.001, "loss": 2.5137, "step": 198960 }, { "epoch": 38.21, "learning_rate": 0.001, "loss": 2.5046, "step": 198972 }, { "epoch": 38.21, "learning_rate": 0.001, "loss": 2.5006, "step": 198984 }, { "epoch": 38.21, "learning_rate": 0.001, "loss": 2.508, "step": 198996 }, { "epoch": 38.21, "learning_rate": 0.001, "loss": 2.5035, "step": 199008 }, { "epoch": 38.21, "learning_rate": 0.001, "loss": 2.4955, "step": 199020 }, { "epoch": 38.22, "learning_rate": 0.001, "loss": 2.4908, "step": 199032 }, { "epoch": 38.22, "learning_rate": 0.001, "loss": 2.4965, "step": 199044 }, { "epoch": 38.22, "learning_rate": 0.001, "loss": 2.5054, "step": 199056 }, { "epoch": 38.22, "learning_rate": 0.001, "loss": 2.5005, "step": 199068 }, { "epoch": 38.23, "learning_rate": 0.001, "loss": 2.5012, "step": 199080 }, { "epoch": 38.23, "learning_rate": 0.001, "loss": 2.5147, "step": 199092 }, { "epoch": 38.23, "learning_rate": 0.001, "loss": 2.5003, "step": 199104 }, { "epoch": 38.23, "learning_rate": 0.001, "loss": 2.4897, "step": 199116 }, { "epoch": 38.24, "learning_rate": 0.001, "loss": 2.4978, "step": 199128 }, { "epoch": 38.24, "learning_rate": 0.001, "loss": 2.4944, "step": 199140 }, { "epoch": 38.24, "learning_rate": 0.001, "loss": 2.5024, "step": 199152 }, { "epoch": 38.24, "learning_rate": 0.001, "loss": 2.5071, "step": 199164 }, { "epoch": 38.24, "learning_rate": 0.001, "loss": 2.499, "step": 199176 }, { "epoch": 38.25, "learning_rate": 0.001, "loss": 2.5029, "step": 199188 }, { "epoch": 38.25, "learning_rate": 0.001, "loss": 2.4907, "step": 199200 }, { "epoch": 38.25, "learning_rate": 0.001, "loss": 2.5056, "step": 199212 }, { "epoch": 38.25, "learning_rate": 0.001, "loss": 2.5071, "step": 199224 }, { "epoch": 38.26, "learning_rate": 0.001, "loss": 2.5024, "step": 199236 }, { "epoch": 38.26, "learning_rate": 0.001, "loss": 2.5029, "step": 199248 }, { "epoch": 38.26, "learning_rate": 0.001, "loss": 2.5123, "step": 199260 }, { "epoch": 38.26, "learning_rate": 0.001, "loss": 2.4996, "step": 199272 }, { "epoch": 38.26, "learning_rate": 0.001, "loss": 2.5049, "step": 199284 }, { "epoch": 38.27, "learning_rate": 0.001, "loss": 2.5044, "step": 199296 }, { "epoch": 38.27, "learning_rate": 0.001, "loss": 2.4983, "step": 199308 }, { "epoch": 38.27, "learning_rate": 0.001, "loss": 2.5085, "step": 199320 }, { "epoch": 38.27, "learning_rate": 0.001, "loss": 2.4997, "step": 199332 }, { "epoch": 38.28, "learning_rate": 0.001, "loss": 2.5046, "step": 199344 }, { "epoch": 38.28, "learning_rate": 0.001, "loss": 2.5051, "step": 199356 }, { "epoch": 38.28, "learning_rate": 0.001, "loss": 2.5061, "step": 199368 }, { "epoch": 38.28, "eval_ag_news_accuracy": 0.327625, "eval_ag_news_bleu_score": 4.9417311987063215, "eval_ag_news_bleu_score_sem": 0.15424605211228998, "eval_ag_news_emb_cos_sim": 0.8186274766921997, "eval_ag_news_emb_cos_sim_sem": 0.007027419323825698, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.487560510635376, "eval_ag_news_n_ngrams_match_1": 14.42, "eval_ag_news_n_ngrams_match_2": 3.174, "eval_ag_news_n_ngrams_match_3": 0.882, "eval_ag_news_num_pred_words": 46.32, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.70606421257628, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3590849174322741, "eval_ag_news_runtime": 11.8147, "eval_ag_news_samples_per_second": 42.32, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.3580719528062785, "eval_ag_news_token_set_f1_sem": 0.00460088936336261, "eval_ag_news_token_set_precision": 0.34515331744894023, "eval_ag_news_token_set_recall": 0.3874022150564543, "eval_ag_news_true_num_tokens": 56.09375, "step": 199375 }, { "epoch": 38.28, "eval_anthropic_toxic_prompts_accuracy": 0.114625, "eval_anthropic_toxic_prompts_bleu_score": 3.30288252904598, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12912518825207164, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6769576072692871, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008583475043947334, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2431907653808594, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.188, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.774, "eval_anthropic_toxic_prompts_num_pred_words": 47.032, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.61532398030485, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9765625, "eval_anthropic_toxic_prompts_rouge_score": 0.21343969703196963, "eval_anthropic_toxic_prompts_runtime": 13.5743, "eval_anthropic_toxic_prompts_samples_per_second": 36.834, "eval_anthropic_toxic_prompts_steps_per_second": 0.074, "eval_anthropic_toxic_prompts_token_set_f1": 0.36012047929585433, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006933007484110533, "eval_anthropic_toxic_prompts_token_set_precision": 0.4408752537883833, "eval_anthropic_toxic_prompts_token_set_recall": 0.32990273330221304, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 199375 }, { "epoch": 38.28, "eval_arxiv_accuracy": 0.3524375, "eval_arxiv_bleu_score": 4.500845138328313, "eval_arxiv_bleu_score_sem": 0.13359574834055554, "eval_arxiv_emb_cos_sim": 0.7729178071022034, "eval_arxiv_emb_cos_sim_sem": 0.008900314636172487, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.327899932861328, "eval_arxiv_n_ngrams_match_1": 15.324, "eval_arxiv_n_ngrams_match_2": 3.052, "eval_arxiv_n_ngrams_match_3": 0.696, "eval_arxiv_num_pred_words": 40.018, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.8797308747948, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3696750277898393, "eval_arxiv_runtime": 13.2087, "eval_arxiv_samples_per_second": 37.854, "eval_arxiv_steps_per_second": 0.076, "eval_arxiv_token_set_f1": 0.35973240314146787, "eval_arxiv_token_set_f1_sem": 0.00427649242879882, "eval_arxiv_token_set_precision": 0.311809219710262, "eval_arxiv_token_set_recall": 0.4420829017756489, "eval_arxiv_true_num_tokens": 64.0, "step": 199375 }, { "epoch": 38.28, "eval_python_code_alpaca_accuracy": 0.1625, "eval_python_code_alpaca_bleu_score": 4.8226810975350025, "eval_python_code_alpaca_bleu_score_sem": 0.15699190851218892, "eval_python_code_alpaca_emb_cos_sim": 0.7646726369857788, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008021080668308124, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8512156009674072, "eval_python_code_alpaca_n_ngrams_match_1": 9.894, "eval_python_code_alpaca_n_ngrams_match_2": 3.056, "eval_python_code_alpaca_n_ngrams_match_3": 1.056, "eval_python_code_alpaca_num_pred_words": 44.07, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.308809663028644, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33691055931155556, "eval_python_code_alpaca_runtime": 11.0349, "eval_python_code_alpaca_samples_per_second": 45.311, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4823490759806321, "eval_python_code_alpaca_token_set_f1_sem": 0.0051191226550350395, "eval_python_code_alpaca_token_set_precision": 0.5451297281940832, "eval_python_code_alpaca_token_set_recall": 0.45611962723311733, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 199375 }, { "epoch": 38.28, "eval_wikibio_accuracy": 0.32953125, "eval_wikibio_bleu_score": 6.042169552848864, "eval_wikibio_bleu_score_sem": 0.2188123058716265, "eval_wikibio_emb_cos_sim": 0.7414339184761047, "eval_wikibio_emb_cos_sim_sem": 0.008044945004188401, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6446495056152344, "eval_wikibio_n_ngrams_match_1": 9.962, "eval_wikibio_n_ngrams_match_2": 3.342, "eval_wikibio_n_ngrams_match_3": 1.214, "eval_wikibio_num_pred_words": 35.222, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.26935730581431, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35563535234548793, "eval_wikibio_runtime": 12.9718, "eval_wikibio_samples_per_second": 38.545, "eval_wikibio_steps_per_second": 0.077, "eval_wikibio_token_set_f1": 0.3181026646514537, "eval_wikibio_token_set_f1_sem": 0.00552625825197284, "eval_wikibio_token_set_precision": 0.323409945130816, "eval_wikibio_token_set_recall": 0.3296236853527382, "eval_wikibio_true_num_tokens": 61.1328125, "step": 199375 }, { "epoch": 38.28, "eval_nq_accuracy": 0.5341875, "eval_nq_bleu_score": 11.837841350754823, "eval_nq_bleu_score_sem": 0.4807961040164993, "eval_nq_emb_cos_sim": 0.8370886445045471, "eval_nq_emb_cos_sim_sem": 0.006791194799000741, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1303882598876953, "eval_nq_n_ngrams_match_1": 23.45, "eval_nq_n_ngrams_match_2": 8.582, "eval_nq_n_ngrams_match_3": 3.936, "eval_nq_num_pred_words": 49.21, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.418134601018068, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4552353713490796, "eval_nq_runtime": 12.048, "eval_nq_samples_per_second": 41.501, "eval_nq_steps_per_second": 0.083, "eval_nq_token_set_f1": 0.47160475008168445, "eval_nq_token_set_f1_sem": 0.004894827177870064, "eval_nq_token_set_precision": 0.42939985785051693, "eval_nq_token_set_recall": 0.5324201956013755, "eval_nq_true_num_tokens": 64.0, "step": 199375 }, { "epoch": 38.28, "learning_rate": 0.001, "loss": 2.5042, "step": 199380 }, { "epoch": 38.29, "learning_rate": 0.001, "loss": 2.505, "step": 199392 }, { "epoch": 38.29, "learning_rate": 0.001, "loss": 2.5102, "step": 199404 }, { "epoch": 38.29, "learning_rate": 0.001, "loss": 2.5109, "step": 199416 }, { "epoch": 38.29, "learning_rate": 0.001, "loss": 2.5057, "step": 199428 }, { "epoch": 38.29, "learning_rate": 0.001, "loss": 2.516, "step": 199440 }, { "epoch": 38.3, "learning_rate": 0.001, "loss": 2.5044, "step": 199452 }, { "epoch": 38.3, "learning_rate": 0.001, "loss": 2.5078, "step": 199464 }, { "epoch": 38.3, "learning_rate": 0.001, "loss": 2.5136, "step": 199476 }, { "epoch": 38.3, "learning_rate": 0.001, "loss": 2.5159, "step": 199488 }, { "epoch": 38.31, "learning_rate": 0.001, "loss": 2.5041, "step": 199500 }, { "epoch": 38.31, "learning_rate": 0.001, "loss": 2.5081, "step": 199512 }, { "epoch": 38.31, "learning_rate": 0.001, "loss": 2.5065, "step": 199524 }, { "epoch": 38.31, "learning_rate": 0.001, "loss": 2.5117, "step": 199536 }, { "epoch": 38.32, "learning_rate": 0.001, "loss": 2.5067, "step": 199548 }, { "epoch": 38.32, "learning_rate": 0.001, "loss": 2.5045, "step": 199560 }, { "epoch": 38.32, "learning_rate": 0.001, "loss": 2.5049, "step": 199572 }, { "epoch": 38.32, "learning_rate": 0.001, "loss": 2.506, "step": 199584 }, { "epoch": 38.32, "learning_rate": 0.001, "loss": 2.5113, "step": 199596 }, { "epoch": 38.33, "learning_rate": 0.001, "loss": 2.5016, "step": 199608 }, { "epoch": 38.33, "learning_rate": 0.001, "loss": 2.513, "step": 199620 }, { "epoch": 38.33, "learning_rate": 0.001, "loss": 2.4948, "step": 199632 }, { "epoch": 38.33, "learning_rate": 0.001, "loss": 2.4988, "step": 199644 }, { "epoch": 38.34, "learning_rate": 0.001, "loss": 2.4992, "step": 199656 }, { "epoch": 38.34, "learning_rate": 0.001, "loss": 2.4999, "step": 199668 }, { "epoch": 38.34, "learning_rate": 0.001, "loss": 2.4948, "step": 199680 }, { "epoch": 38.34, "learning_rate": 0.001, "loss": 2.5074, "step": 199692 }, { "epoch": 38.35, "learning_rate": 0.001, "loss": 2.5032, "step": 199704 }, { "epoch": 38.35, "learning_rate": 0.001, "loss": 2.5101, "step": 199716 }, { "epoch": 38.35, "learning_rate": 0.001, "loss": 2.5037, "step": 199728 }, { "epoch": 38.35, "learning_rate": 0.001, "loss": 2.5042, "step": 199740 }, { "epoch": 38.35, "learning_rate": 0.001, "loss": 2.5058, "step": 199752 }, { "epoch": 38.36, "learning_rate": 0.001, "loss": 2.5092, "step": 199764 }, { "epoch": 38.36, "learning_rate": 0.001, "loss": 2.4986, "step": 199776 }, { "epoch": 38.36, "learning_rate": 0.001, "loss": 2.5059, "step": 199788 }, { "epoch": 38.36, "learning_rate": 0.001, "loss": 2.5134, "step": 199800 }, { "epoch": 38.37, "learning_rate": 0.001, "loss": 2.5077, "step": 199812 }, { "epoch": 38.37, "learning_rate": 0.001, "loss": 2.4985, "step": 199824 }, { "epoch": 38.37, "learning_rate": 0.001, "loss": 2.501, "step": 199836 }, { "epoch": 38.37, "learning_rate": 0.001, "loss": 2.505, "step": 199848 }, { "epoch": 38.38, "learning_rate": 0.001, "loss": 2.5074, "step": 199860 }, { "epoch": 38.38, "learning_rate": 0.001, "loss": 2.5045, "step": 199872 }, { "epoch": 38.38, "learning_rate": 0.001, "loss": 2.5086, "step": 199884 }, { "epoch": 38.38, "learning_rate": 0.001, "loss": 2.5046, "step": 199896 }, { "epoch": 38.38, "learning_rate": 0.001, "loss": 2.5125, "step": 199908 }, { "epoch": 38.39, "learning_rate": 0.001, "loss": 2.4982, "step": 199920 }, { "epoch": 38.39, "learning_rate": 0.001, "loss": 2.5028, "step": 199932 }, { "epoch": 38.39, "learning_rate": 0.001, "loss": 2.4973, "step": 199944 }, { "epoch": 38.39, "learning_rate": 0.001, "loss": 2.5103, "step": 199956 }, { "epoch": 38.4, "learning_rate": 0.001, "loss": 2.5031, "step": 199968 }, { "epoch": 38.4, "learning_rate": 0.001, "loss": 2.5133, "step": 199980 }, { "epoch": 38.4, "learning_rate": 0.001, "loss": 2.5095, "step": 199992 }, { "epoch": 38.4, "eval_ag_news_accuracy": 0.32825, "eval_ag_news_bleu_score": 5.172552201596351, "eval_ag_news_bleu_score_sem": 0.16181223343041456, "eval_ag_news_emb_cos_sim": 0.8215479850769043, "eval_ag_news_emb_cos_sim_sem": 0.007498894386148306, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.488349676132202, "eval_ag_news_n_ngrams_match_1": 14.652, "eval_ag_news_n_ngrams_match_2": 3.414, "eval_ag_news_n_ngrams_match_3": 1.0, "eval_ag_news_num_pred_words": 46.966, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.73188489704641, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3624800378888058, "eval_ag_news_runtime": 12.8154, "eval_ag_news_samples_per_second": 39.016, "eval_ag_news_steps_per_second": 0.078, "eval_ag_news_token_set_f1": 0.3634194636723417, "eval_ag_news_token_set_f1_sem": 0.004405233955182402, "eval_ag_news_token_set_precision": 0.3511867546689802, "eval_ag_news_token_set_recall": 0.38956414681062035, "eval_ag_news_true_num_tokens": 56.09375, "step": 200000 }, { "epoch": 38.4, "eval_anthropic_toxic_prompts_accuracy": 0.1160625, "eval_anthropic_toxic_prompts_bleu_score": 3.3744040639541364, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1311318100792003, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6873105764389038, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.007988750742777728, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.232280969619751, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.316, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.07, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.826, "eval_anthropic_toxic_prompts_num_pred_words": 47.116, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.337384913323735, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21686406936866284, "eval_anthropic_toxic_prompts_runtime": 9.9469, "eval_anthropic_toxic_prompts_samples_per_second": 50.267, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.3590520345784447, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006762216294785205, "eval_anthropic_toxic_prompts_token_set_precision": 0.44224462054379854, "eval_anthropic_toxic_prompts_token_set_recall": 0.32948871745396646, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 200000 }, { "epoch": 38.4, "eval_arxiv_accuracy": 0.35278125, "eval_arxiv_bleu_score": 4.380235097489727, "eval_arxiv_bleu_score_sem": 0.12933162875966375, "eval_arxiv_emb_cos_sim": 0.7866820096969604, "eval_arxiv_emb_cos_sim_sem": 0.006539146108193467, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.340620756149292, "eval_arxiv_n_ngrams_match_1": 15.772, "eval_arxiv_n_ngrams_match_2": 2.996, "eval_arxiv_n_ngrams_match_3": 0.656, "eval_arxiv_num_pred_words": 40.778, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.236649339917527, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3748535188797494, "eval_arxiv_runtime": 10.1305, "eval_arxiv_samples_per_second": 49.356, "eval_arxiv_steps_per_second": 0.099, "eval_arxiv_token_set_f1": 0.36868663200142515, "eval_arxiv_token_set_f1_sem": 0.004493028079803148, "eval_arxiv_token_set_precision": 0.3241241758652874, "eval_arxiv_token_set_recall": 0.44456752717847087, "eval_arxiv_true_num_tokens": 64.0, "step": 200000 }, { "epoch": 38.4, "eval_python_code_alpaca_accuracy": 0.16425, "eval_python_code_alpaca_bleu_score": 4.915179625615341, "eval_python_code_alpaca_bleu_score_sem": 0.15284102400875269, "eval_python_code_alpaca_emb_cos_sim": 0.7691056132316589, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007020678462241987, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.838472843170166, "eval_python_code_alpaca_n_ngrams_match_1": 10.202, "eval_python_code_alpaca_n_ngrams_match_2": 3.154, "eval_python_code_alpaca_n_ngrams_match_3": 1.076, "eval_python_code_alpaca_num_pred_words": 43.7, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.08964702751432, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34479702451640815, "eval_python_code_alpaca_runtime": 10.5125, "eval_python_code_alpaca_samples_per_second": 47.563, "eval_python_code_alpaca_steps_per_second": 0.095, "eval_python_code_alpaca_token_set_f1": 0.49356710271560095, "eval_python_code_alpaca_token_set_f1_sem": 0.00512612959845217, "eval_python_code_alpaca_token_set_precision": 0.5574205363902113, "eval_python_code_alpaca_token_set_recall": 0.465838193643123, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 200000 }, { "epoch": 38.4, "eval_wikibio_accuracy": 0.32740625, "eval_wikibio_bleu_score": 6.209100645734727, "eval_wikibio_bleu_score_sem": 0.22584343950398492, "eval_wikibio_emb_cos_sim": 0.7476376295089722, "eval_wikibio_emb_cos_sim_sem": 0.009153704123598014, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6737782955169678, "eval_wikibio_n_ngrams_match_1": 10.224, "eval_wikibio_n_ngrams_match_2": 3.502, "eval_wikibio_n_ngrams_match_3": 1.344, "eval_wikibio_num_pred_words": 36.268, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.40049168406325, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3583372177560415, "eval_wikibio_runtime": 9.9414, "eval_wikibio_samples_per_second": 50.294, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.32298246517111684, "eval_wikibio_token_set_f1_sem": 0.00558874538217453, "eval_wikibio_token_set_precision": 0.3335038329257763, "eval_wikibio_token_set_recall": 0.32766481741272097, "eval_wikibio_true_num_tokens": 61.1328125, "step": 200000 }, { "epoch": 38.4, "eval_nq_accuracy": 0.53621875, "eval_nq_bleu_score": 12.159658329574835, "eval_nq_bleu_score_sem": 0.48712114140477525, "eval_nq_emb_cos_sim": 0.8404883146286011, "eval_nq_emb_cos_sim_sem": 0.006631654489407832, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.127622604370117, "eval_nq_n_ngrams_match_1": 23.456, "eval_nq_n_ngrams_match_2": 8.694, "eval_nq_n_ngrams_match_3": 4.064, "eval_nq_num_pred_words": 49.04, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.394885105478181, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45384860546852174, "eval_nq_runtime": 10.4328, "eval_nq_samples_per_second": 47.926, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4700611381379461, "eval_nq_token_set_f1_sem": 0.0049640822515344156, "eval_nq_token_set_precision": 0.42943715348694084, "eval_nq_token_set_recall": 0.5279278469408664, "eval_nq_true_num_tokens": 64.0, "step": 200000 }, { "epoch": 38.4, "learning_rate": 0.001, "loss": 2.5025, "step": 200004 }, { "epoch": 38.41, "learning_rate": 0.001, "loss": 2.4939, "step": 200016 }, { "epoch": 38.41, "learning_rate": 0.001, "loss": 2.5075, "step": 200028 }, { "epoch": 38.41, "learning_rate": 0.001, "loss": 2.5059, "step": 200040 }, { "epoch": 38.41, "learning_rate": 0.001, "loss": 2.4987, "step": 200052 }, { "epoch": 38.41, "learning_rate": 0.001, "loss": 2.506, "step": 200064 }, { "epoch": 38.42, "learning_rate": 0.001, "loss": 2.4962, "step": 200076 }, { "epoch": 38.42, "learning_rate": 0.001, "loss": 2.4986, "step": 200088 }, { "epoch": 38.42, "learning_rate": 0.001, "loss": 2.5016, "step": 200100 }, { "epoch": 38.42, "learning_rate": 0.001, "loss": 2.5042, "step": 200112 }, { "epoch": 38.43, "learning_rate": 0.001, "loss": 2.508, "step": 200124 }, { "epoch": 38.43, "learning_rate": 0.001, "loss": 2.5056, "step": 200136 }, { "epoch": 38.43, "learning_rate": 0.001, "loss": 2.5063, "step": 200148 }, { "epoch": 38.43, "learning_rate": 0.001, "loss": 2.5123, "step": 200160 }, { "epoch": 38.44, "learning_rate": 0.001, "loss": 2.5059, "step": 200172 }, { "epoch": 38.44, "learning_rate": 0.001, "loss": 2.5142, "step": 200184 }, { "epoch": 38.44, "learning_rate": 0.001, "loss": 2.5072, "step": 200196 }, { "epoch": 38.44, "learning_rate": 0.001, "loss": 2.5013, "step": 200208 }, { "epoch": 38.44, "learning_rate": 0.001, "loss": 2.5075, "step": 200220 }, { "epoch": 38.45, "learning_rate": 0.001, "loss": 2.5065, "step": 200232 }, { "epoch": 38.45, "learning_rate": 0.001, "loss": 2.5085, "step": 200244 }, { "epoch": 38.45, "learning_rate": 0.001, "loss": 2.5048, "step": 200256 }, { "epoch": 38.45, "learning_rate": 0.001, "loss": 2.4989, "step": 200268 }, { "epoch": 38.46, "learning_rate": 0.001, "loss": 2.4999, "step": 200280 }, { "epoch": 38.46, "learning_rate": 0.001, "loss": 2.5048, "step": 200292 }, { "epoch": 38.46, "learning_rate": 0.001, "loss": 2.5187, "step": 200304 }, { "epoch": 38.46, "learning_rate": 0.001, "loss": 2.5133, "step": 200316 }, { "epoch": 38.47, "learning_rate": 0.001, "loss": 2.5046, "step": 200328 }, { "epoch": 38.47, "learning_rate": 0.001, "loss": 2.5046, "step": 200340 }, { "epoch": 38.47, "learning_rate": 0.001, "loss": 2.495, "step": 200352 }, { "epoch": 38.47, "learning_rate": 0.001, "loss": 2.4932, "step": 200364 }, { "epoch": 38.47, "learning_rate": 0.001, "loss": 2.504, "step": 200376 }, { "epoch": 38.48, "learning_rate": 0.001, "loss": 2.4958, "step": 200388 }, { "epoch": 38.48, "learning_rate": 0.001, "loss": 2.5039, "step": 200400 }, { "epoch": 38.48, "learning_rate": 0.001, "loss": 2.5018, "step": 200412 }, { "epoch": 38.48, "learning_rate": 0.001, "loss": 2.5062, "step": 200424 }, { "epoch": 38.49, "learning_rate": 0.001, "loss": 2.5044, "step": 200436 }, { "epoch": 38.49, "learning_rate": 0.001, "loss": 2.502, "step": 200448 }, { "epoch": 38.49, "learning_rate": 0.001, "loss": 2.4996, "step": 200460 }, { "epoch": 38.49, "learning_rate": 0.001, "loss": 2.4987, "step": 200472 }, { "epoch": 38.5, "learning_rate": 0.001, "loss": 2.5062, "step": 200484 }, { "epoch": 38.5, "learning_rate": 0.001, "loss": 2.5077, "step": 200496 }, { "epoch": 38.5, "learning_rate": 0.001, "loss": 2.5089, "step": 200508 }, { "epoch": 38.5, "learning_rate": 0.001, "loss": 2.5003, "step": 200520 }, { "epoch": 38.5, "learning_rate": 0.001, "loss": 2.5085, "step": 200532 }, { "epoch": 38.51, "learning_rate": 0.001, "loss": 2.5103, "step": 200544 }, { "epoch": 38.51, "learning_rate": 0.001, "loss": 2.5022, "step": 200556 }, { "epoch": 38.51, "learning_rate": 0.001, "loss": 2.5005, "step": 200568 }, { "epoch": 38.51, "learning_rate": 0.001, "loss": 2.5033, "step": 200580 }, { "epoch": 38.52, "learning_rate": 0.001, "loss": 2.5065, "step": 200592 }, { "epoch": 38.52, "learning_rate": 0.001, "loss": 2.5077, "step": 200604 }, { "epoch": 38.52, "learning_rate": 0.001, "loss": 2.5049, "step": 200616 }, { "epoch": 38.52, "eval_ag_news_accuracy": 0.32696875, "eval_ag_news_bleu_score": 5.124490805927564, "eval_ag_news_bleu_score_sem": 0.16806403085212412, "eval_ag_news_emb_cos_sim": 0.8210431337356567, "eval_ag_news_emb_cos_sim_sem": 0.006155401686679809, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.482522487640381, "eval_ag_news_n_ngrams_match_1": 14.35, "eval_ag_news_n_ngrams_match_2": 3.246, "eval_ag_news_n_ngrams_match_3": 0.95, "eval_ag_news_num_pred_words": 46.392, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.541704680192545, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3591422003217143, "eval_ag_news_runtime": 11.5848, "eval_ag_news_samples_per_second": 43.16, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.3602993094966992, "eval_ag_news_token_set_f1_sem": 0.004505170963543107, "eval_ag_news_token_set_precision": 0.34517211413055204, "eval_ag_news_token_set_recall": 0.39151240323973885, "eval_ag_news_true_num_tokens": 56.09375, "step": 200625 }, { "epoch": 38.52, "eval_anthropic_toxic_prompts_accuracy": 0.1169375, "eval_anthropic_toxic_prompts_bleu_score": 3.3712601061706833, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13318402728049128, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6785003542900085, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008789142445057385, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.224536657333374, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.314, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.082, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.828, "eval_anthropic_toxic_prompts_num_pred_words": 47.176, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.141922131342337, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21586434959105966, "eval_anthropic_toxic_prompts_runtime": 10.0031, "eval_anthropic_toxic_prompts_samples_per_second": 49.985, "eval_anthropic_toxic_prompts_steps_per_second": 0.1, "eval_anthropic_toxic_prompts_token_set_f1": 0.36242229949343746, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006938019150534992, "eval_anthropic_toxic_prompts_token_set_precision": 0.4416903305306638, "eval_anthropic_toxic_prompts_token_set_recall": 0.3343296823954955, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 200625 }, { "epoch": 38.52, "eval_arxiv_accuracy": 0.35140625, "eval_arxiv_bleu_score": 4.579195338607506, "eval_arxiv_bleu_score_sem": 0.1405080470622127, "eval_arxiv_emb_cos_sim": 0.7835668921470642, "eval_arxiv_emb_cos_sim_sem": 0.006613090119727798, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.343899965286255, "eval_arxiv_n_ngrams_match_1": 15.592, "eval_arxiv_n_ngrams_match_2": 3.15, "eval_arxiv_n_ngrams_match_3": 0.752, "eval_arxiv_num_pred_words": 40.6, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.3293952018595, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37238043762067263, "eval_arxiv_runtime": 10.864, "eval_arxiv_samples_per_second": 46.023, "eval_arxiv_steps_per_second": 0.092, "eval_arxiv_token_set_f1": 0.365866023498149, "eval_arxiv_token_set_f1_sem": 0.004428000742937261, "eval_arxiv_token_set_precision": 0.31829054517088734, "eval_arxiv_token_set_recall": 0.4480372605531303, "eval_arxiv_true_num_tokens": 64.0, "step": 200625 }, { "epoch": 38.52, "eval_python_code_alpaca_accuracy": 0.16246875, "eval_python_code_alpaca_bleu_score": 4.902811574303316, "eval_python_code_alpaca_bleu_score_sem": 0.1572073012295978, "eval_python_code_alpaca_emb_cos_sim": 0.7700563669204712, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007293403474966395, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8625295162200928, "eval_python_code_alpaca_n_ngrams_match_1": 9.88, "eval_python_code_alpaca_n_ngrams_match_2": 3.012, "eval_python_code_alpaca_n_ngrams_match_3": 1.086, "eval_python_code_alpaca_num_pred_words": 43.388, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.505752062699557, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3385448207373508, "eval_python_code_alpaca_runtime": 9.9187, "eval_python_code_alpaca_samples_per_second": 50.41, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.48832918345736936, "eval_python_code_alpaca_token_set_f1_sem": 0.005340555258537011, "eval_python_code_alpaca_token_set_precision": 0.5415004190520915, "eval_python_code_alpaca_token_set_recall": 0.46614241816551255, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 200625 }, { "epoch": 38.52, "eval_wikibio_accuracy": 0.32828125, "eval_wikibio_bleu_score": 6.021161512863206, "eval_wikibio_bleu_score_sem": 0.22085702483213818, "eval_wikibio_emb_cos_sim": 0.744111955165863, "eval_wikibio_emb_cos_sim_sem": 0.009517643477578601, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.642890691757202, "eval_wikibio_n_ngrams_match_1": 9.96, "eval_wikibio_n_ngrams_match_2": 3.35, "eval_wikibio_n_ngrams_match_3": 1.242, "eval_wikibio_num_pred_words": 35.786, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.202107786875736, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3531122350233396, "eval_wikibio_runtime": 14.9241, "eval_wikibio_samples_per_second": 33.503, "eval_wikibio_steps_per_second": 0.067, "eval_wikibio_token_set_f1": 0.3167796918315341, "eval_wikibio_token_set_f1_sem": 0.005669717027321806, "eval_wikibio_token_set_precision": 0.3235410572755014, "eval_wikibio_token_set_recall": 0.32786112063324696, "eval_wikibio_true_num_tokens": 61.1328125, "step": 200625 }, { "epoch": 38.52, "eval_nq_accuracy": 0.53546875, "eval_nq_bleu_score": 11.67899940002113, "eval_nq_bleu_score_sem": 0.45548232175890974, "eval_nq_emb_cos_sim": 0.8368167877197266, "eval_nq_emb_cos_sim_sem": 0.0069787383967066, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1302053928375244, "eval_nq_n_ngrams_match_1": 23.304, "eval_nq_n_ngrams_match_2": 8.47, "eval_nq_n_ngrams_match_3": 3.836, "eval_nq_num_pred_words": 48.926, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.416595342319775, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45345869442626835, "eval_nq_runtime": 10.2932, "eval_nq_samples_per_second": 48.576, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4690944405401308, "eval_nq_token_set_f1_sem": 0.005093111331519668, "eval_nq_token_set_precision": 0.42536645564826464, "eval_nq_token_set_recall": 0.5313037685433415, "eval_nq_true_num_tokens": 64.0, "step": 200625 }, { "epoch": 38.52, "learning_rate": 0.001, "loss": 2.5001, "step": 200628 }, { "epoch": 38.53, "learning_rate": 0.001, "loss": 2.5057, "step": 200640 }, { "epoch": 38.53, "learning_rate": 0.001, "loss": 2.4998, "step": 200652 }, { "epoch": 38.53, "learning_rate": 0.001, "loss": 2.5063, "step": 200664 }, { "epoch": 38.53, "learning_rate": 0.001, "loss": 2.4956, "step": 200676 }, { "epoch": 38.53, "learning_rate": 0.001, "loss": 2.4931, "step": 200688 }, { "epoch": 38.54, "learning_rate": 0.001, "loss": 2.4999, "step": 200700 }, { "epoch": 38.54, "learning_rate": 0.001, "loss": 2.5019, "step": 200712 }, { "epoch": 38.54, "learning_rate": 0.001, "loss": 2.4955, "step": 200724 }, { "epoch": 38.54, "learning_rate": 0.001, "loss": 2.5033, "step": 200736 }, { "epoch": 38.55, "learning_rate": 0.001, "loss": 2.5017, "step": 200748 }, { "epoch": 38.55, "learning_rate": 0.001, "loss": 2.5108, "step": 200760 }, { "epoch": 38.55, "learning_rate": 0.001, "loss": 2.5146, "step": 200772 }, { "epoch": 38.55, "learning_rate": 0.001, "loss": 2.5142, "step": 200784 }, { "epoch": 38.56, "learning_rate": 0.001, "loss": 2.5035, "step": 200796 }, { "epoch": 38.56, "learning_rate": 0.001, "loss": 2.5006, "step": 200808 }, { "epoch": 38.56, "learning_rate": 0.001, "loss": 2.5109, "step": 200820 }, { "epoch": 38.56, "learning_rate": 0.001, "loss": 2.5032, "step": 200832 }, { "epoch": 38.56, "learning_rate": 0.001, "loss": 2.5027, "step": 200844 }, { "epoch": 38.57, "learning_rate": 0.001, "loss": 2.4963, "step": 200856 }, { "epoch": 38.57, "learning_rate": 0.001, "loss": 2.4946, "step": 200868 }, { "epoch": 38.57, "learning_rate": 0.001, "loss": 2.5009, "step": 200880 }, { "epoch": 38.57, "learning_rate": 0.001, "loss": 2.5096, "step": 200892 }, { "epoch": 38.58, "learning_rate": 0.001, "loss": 2.5075, "step": 200904 }, { "epoch": 38.58, "learning_rate": 0.001, "loss": 2.5071, "step": 200916 }, { "epoch": 38.58, "learning_rate": 0.001, "loss": 2.5054, "step": 200928 }, { "epoch": 38.58, "learning_rate": 0.001, "loss": 2.503, "step": 200940 }, { "epoch": 38.59, "learning_rate": 0.001, "loss": 2.5034, "step": 200952 }, { "epoch": 38.59, "learning_rate": 0.001, "loss": 2.5034, "step": 200964 }, { "epoch": 38.59, "learning_rate": 0.001, "loss": 2.5033, "step": 200976 }, { "epoch": 38.59, "learning_rate": 0.001, "loss": 2.4914, "step": 200988 }, { "epoch": 38.59, "learning_rate": 0.001, "loss": 2.5097, "step": 201000 }, { "epoch": 38.6, "learning_rate": 0.001, "loss": 2.5036, "step": 201012 }, { "epoch": 38.6, "learning_rate": 0.001, "loss": 2.4975, "step": 201024 }, { "epoch": 38.6, "learning_rate": 0.001, "loss": 2.5098, "step": 201036 }, { "epoch": 38.6, "learning_rate": 0.001, "loss": 2.5082, "step": 201048 }, { "epoch": 38.61, "learning_rate": 0.001, "loss": 2.5062, "step": 201060 }, { "epoch": 38.61, "learning_rate": 0.001, "loss": 2.5021, "step": 201072 }, { "epoch": 38.61, "learning_rate": 0.001, "loss": 2.4985, "step": 201084 }, { "epoch": 38.61, "learning_rate": 0.001, "loss": 2.5039, "step": 201096 }, { "epoch": 38.62, "learning_rate": 0.001, "loss": 2.5177, "step": 201108 }, { "epoch": 38.62, "learning_rate": 0.001, "loss": 2.4961, "step": 201120 }, { "epoch": 38.62, "learning_rate": 0.001, "loss": 2.5002, "step": 201132 }, { "epoch": 38.62, "learning_rate": 0.001, "loss": 2.5024, "step": 201144 }, { "epoch": 38.62, "learning_rate": 0.001, "loss": 2.5004, "step": 201156 }, { "epoch": 38.63, "learning_rate": 0.001, "loss": 2.5049, "step": 201168 }, { "epoch": 38.63, "learning_rate": 0.001, "loss": 2.5065, "step": 201180 }, { "epoch": 38.63, "learning_rate": 0.001, "loss": 2.5039, "step": 201192 }, { "epoch": 38.63, "learning_rate": 0.001, "loss": 2.5069, "step": 201204 }, { "epoch": 38.64, "learning_rate": 0.001, "loss": 2.508, "step": 201216 }, { "epoch": 38.64, "learning_rate": 0.001, "loss": 2.5021, "step": 201228 }, { "epoch": 38.64, "learning_rate": 0.001, "loss": 2.5073, "step": 201240 }, { "epoch": 38.64, "eval_ag_news_accuracy": 0.3296875, "eval_ag_news_bleu_score": 4.916234398109618, "eval_ag_news_bleu_score_sem": 0.16556309465932698, "eval_ag_news_emb_cos_sim": 0.8227593898773193, "eval_ag_news_emb_cos_sim_sem": 0.006438031208804276, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4632151126861572, "eval_ag_news_n_ngrams_match_1": 14.44, "eval_ag_news_n_ngrams_match_2": 3.234, "eval_ag_news_n_ngrams_match_3": 0.902, "eval_ag_news_num_pred_words": 46.448, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.919436301114658, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3606588435428477, "eval_ag_news_runtime": 10.4227, "eval_ag_news_samples_per_second": 47.972, "eval_ag_news_steps_per_second": 0.096, "eval_ag_news_token_set_f1": 0.36046218119862616, "eval_ag_news_token_set_f1_sem": 0.00453020238609333, "eval_ag_news_token_set_precision": 0.34579195831015946, "eval_ag_news_token_set_recall": 0.3906633539178233, "eval_ag_news_true_num_tokens": 56.09375, "step": 201250 }, { "epoch": 38.64, "eval_anthropic_toxic_prompts_accuracy": 0.11621875, "eval_anthropic_toxic_prompts_bleu_score": 3.3607888777637753, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13015718057924883, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6823102235794067, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00962869252087071, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2163546085357666, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.37, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.044, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778, "eval_anthropic_toxic_prompts_num_pred_words": 46.596, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.937048981602853, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.953125, "eval_anthropic_toxic_prompts_rouge_score": 0.21896378837396635, "eval_anthropic_toxic_prompts_runtime": 9.877, "eval_anthropic_toxic_prompts_samples_per_second": 50.623, "eval_anthropic_toxic_prompts_steps_per_second": 0.101, "eval_anthropic_toxic_prompts_token_set_f1": 0.36041119788731574, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00669103893115273, "eval_anthropic_toxic_prompts_token_set_precision": 0.4449990491056998, "eval_anthropic_toxic_prompts_token_set_recall": 0.3286289655604711, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 201250 }, { "epoch": 38.64, "eval_arxiv_accuracy": 0.3535, "eval_arxiv_bleu_score": 4.318002499163436, "eval_arxiv_bleu_score_sem": 0.13218365332714507, "eval_arxiv_emb_cos_sim": 0.7763446569442749, "eval_arxiv_emb_cos_sim_sem": 0.007534848940650639, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3291878700256348, "eval_arxiv_n_ngrams_match_1": 15.296, "eval_arxiv_n_ngrams_match_2": 2.898, "eval_arxiv_n_ngrams_match_3": 0.67, "eval_arxiv_num_pred_words": 39.494, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.915661349449397, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36947079046896836, "eval_arxiv_runtime": 14.7458, "eval_arxiv_samples_per_second": 33.908, "eval_arxiv_steps_per_second": 0.068, "eval_arxiv_token_set_f1": 0.36258928552527303, "eval_arxiv_token_set_f1_sem": 0.0044621756079646765, "eval_arxiv_token_set_precision": 0.31356423140260836, "eval_arxiv_token_set_recall": 0.44947400262918213, "eval_arxiv_true_num_tokens": 64.0, "step": 201250 }, { "epoch": 38.64, "eval_python_code_alpaca_accuracy": 0.1628125, "eval_python_code_alpaca_bleu_score": 4.653238948438494, "eval_python_code_alpaca_bleu_score_sem": 0.1484399888376643, "eval_python_code_alpaca_emb_cos_sim": 0.7610523700714111, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008129999978661418, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8632025718688965, "eval_python_code_alpaca_n_ngrams_match_1": 9.8, "eval_python_code_alpaca_n_ngrams_match_2": 2.876, "eval_python_code_alpaca_n_ngrams_match_3": 0.986, "eval_python_code_alpaca_num_pred_words": 43.418, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.51753837398867, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3318641293223034, "eval_python_code_alpaca_runtime": 9.6409, "eval_python_code_alpaca_samples_per_second": 51.862, "eval_python_code_alpaca_steps_per_second": 0.104, "eval_python_code_alpaca_token_set_f1": 0.47870735390552044, "eval_python_code_alpaca_token_set_f1_sem": 0.005561339573159282, "eval_python_code_alpaca_token_set_precision": 0.533145417806957, "eval_python_code_alpaca_token_set_recall": 0.4581533491929219, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 201250 }, { "epoch": 38.64, "eval_wikibio_accuracy": 0.33134375, "eval_wikibio_bleu_score": 6.0463956689639975, "eval_wikibio_bleu_score_sem": 0.21884906055287173, "eval_wikibio_emb_cos_sim": 0.7410796880722046, "eval_wikibio_emb_cos_sim_sem": 0.009447447242839198, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6174113750457764, "eval_wikibio_n_ngrams_match_1": 9.648, "eval_wikibio_n_ngrams_match_2": 3.286, "eval_wikibio_n_ngrams_match_3": 1.216, "eval_wikibio_num_pred_words": 34.904, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.24103985351619, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3474419480356657, "eval_wikibio_runtime": 14.4, "eval_wikibio_samples_per_second": 34.722, "eval_wikibio_steps_per_second": 0.069, "eval_wikibio_token_set_f1": 0.3133670602315977, "eval_wikibio_token_set_f1_sem": 0.005841852625337701, "eval_wikibio_token_set_precision": 0.31555529983383407, "eval_wikibio_token_set_recall": 0.3307085213568827, "eval_wikibio_true_num_tokens": 61.1328125, "step": 201250 }, { "epoch": 38.64, "eval_nq_accuracy": 0.53721875, "eval_nq_bleu_score": 12.095965169289103, "eval_nq_bleu_score_sem": 0.48117336932309007, "eval_nq_emb_cos_sim": 0.8377463221549988, "eval_nq_emb_cos_sim_sem": 0.00777562927638182, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.124088764190674, "eval_nq_n_ngrams_match_1": 23.472, "eval_nq_n_ngrams_match_2": 8.668, "eval_nq_n_ngrams_match_3": 3.992, "eval_nq_num_pred_words": 48.832, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.365271279273268, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4600786336278272, "eval_nq_runtime": 10.457, "eval_nq_samples_per_second": 47.815, "eval_nq_steps_per_second": 0.096, "eval_nq_token_set_f1": 0.4733094220281565, "eval_nq_token_set_f1_sem": 0.005083996611667977, "eval_nq_token_set_precision": 0.42903930774077786, "eval_nq_token_set_recall": 0.5364713139797934, "eval_nq_true_num_tokens": 64.0, "step": 201250 }, { "epoch": 38.64, "learning_rate": 0.001, "loss": 2.4995, "step": 201252 }, { "epoch": 38.65, "learning_rate": 0.001, "loss": 2.5015, "step": 201264 }, { "epoch": 38.65, "learning_rate": 0.001, "loss": 2.5153, "step": 201276 }, { "epoch": 38.65, "learning_rate": 0.001, "loss": 2.5047, "step": 201288 }, { "epoch": 38.65, "learning_rate": 0.001, "loss": 2.5092, "step": 201300 }, { "epoch": 38.65, "learning_rate": 0.001, "loss": 2.508, "step": 201312 }, { "epoch": 38.66, "learning_rate": 0.001, "loss": 2.5107, "step": 201324 }, { "epoch": 38.66, "learning_rate": 0.001, "loss": 2.4973, "step": 201336 }, { "epoch": 38.66, "learning_rate": 0.001, "loss": 2.5124, "step": 201348 }, { "epoch": 38.66, "learning_rate": 0.001, "loss": 2.5041, "step": 201360 }, { "epoch": 38.67, "learning_rate": 0.001, "loss": 2.5095, "step": 201372 }, { "epoch": 38.67, "learning_rate": 0.001, "loss": 2.5113, "step": 201384 }, { "epoch": 38.67, "learning_rate": 0.001, "loss": 2.5082, "step": 201396 }, { "epoch": 38.67, "learning_rate": 0.001, "loss": 2.5095, "step": 201408 }, { "epoch": 38.68, "learning_rate": 0.001, "loss": 2.5092, "step": 201420 }, { "epoch": 38.68, "learning_rate": 0.001, "loss": 2.5093, "step": 201432 }, { "epoch": 38.68, "learning_rate": 0.001, "loss": 2.5033, "step": 201444 }, { "epoch": 38.68, "learning_rate": 0.001, "loss": 2.5017, "step": 201456 }, { "epoch": 38.68, "learning_rate": 0.001, "loss": 2.4977, "step": 201468 }, { "epoch": 38.69, "learning_rate": 0.001, "loss": 2.513, "step": 201480 }, { "epoch": 38.69, "learning_rate": 0.001, "loss": 2.4955, "step": 201492 }, { "epoch": 38.69, "learning_rate": 0.001, "loss": 2.5184, "step": 201504 }, { "epoch": 38.69, "learning_rate": 0.001, "loss": 2.5074, "step": 201516 }, { "epoch": 38.7, "learning_rate": 0.001, "loss": 2.5032, "step": 201528 }, { "epoch": 38.7, "learning_rate": 0.001, "loss": 2.5104, "step": 201540 }, { "epoch": 38.7, "learning_rate": 0.001, "loss": 2.5155, "step": 201552 }, { "epoch": 38.7, "learning_rate": 0.001, "loss": 2.5045, "step": 201564 }, { "epoch": 38.71, "learning_rate": 0.001, "loss": 2.5077, "step": 201576 }, { "epoch": 38.71, "learning_rate": 0.001, "loss": 2.5001, "step": 201588 }, { "epoch": 38.71, "learning_rate": 0.001, "loss": 2.5092, "step": 201600 }, { "epoch": 38.71, "learning_rate": 0.001, "loss": 2.5143, "step": 201612 }, { "epoch": 38.71, "learning_rate": 0.001, "loss": 2.506, "step": 201624 }, { "epoch": 38.72, "learning_rate": 0.001, "loss": 2.5062, "step": 201636 }, { "epoch": 38.72, "learning_rate": 0.001, "loss": 2.507, "step": 201648 }, { "epoch": 38.72, "learning_rate": 0.001, "loss": 2.5105, "step": 201660 }, { "epoch": 38.72, "learning_rate": 0.001, "loss": 2.5066, "step": 201672 }, { "epoch": 38.73, "learning_rate": 0.001, "loss": 2.4964, "step": 201684 }, { "epoch": 38.73, "learning_rate": 0.001, "loss": 2.5115, "step": 201696 }, { "epoch": 38.73, "learning_rate": 0.001, "loss": 2.5186, "step": 201708 }, { "epoch": 38.73, "learning_rate": 0.001, "loss": 2.5066, "step": 201720 }, { "epoch": 38.74, "learning_rate": 0.001, "loss": 2.5118, "step": 201732 }, { "epoch": 38.74, "learning_rate": 0.001, "loss": 2.5188, "step": 201744 }, { "epoch": 38.74, "learning_rate": 0.001, "loss": 2.5094, "step": 201756 }, { "epoch": 38.74, "learning_rate": 0.001, "loss": 2.518, "step": 201768 }, { "epoch": 38.74, "learning_rate": 0.001, "loss": 2.5007, "step": 201780 }, { "epoch": 38.75, "learning_rate": 0.001, "loss": 2.5032, "step": 201792 }, { "epoch": 38.75, "learning_rate": 0.001, "loss": 2.5079, "step": 201804 }, { "epoch": 38.75, "learning_rate": 0.001, "loss": 2.5128, "step": 201816 }, { "epoch": 38.75, "learning_rate": 0.001, "loss": 2.512, "step": 201828 }, { "epoch": 38.76, "learning_rate": 0.001, "loss": 2.5104, "step": 201840 }, { "epoch": 38.76, "learning_rate": 0.001, "loss": 2.5061, "step": 201852 }, { "epoch": 38.76, "learning_rate": 0.001, "loss": 2.512, "step": 201864 }, { "epoch": 38.76, "eval_ag_news_accuracy": 0.3266875, "eval_ag_news_bleu_score": 4.917675719839939, "eval_ag_news_bleu_score_sem": 0.15777813278336045, "eval_ag_news_emb_cos_sim": 0.8131592273712158, "eval_ag_news_emb_cos_sim_sem": 0.0074618334768725625, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.488602876663208, "eval_ag_news_n_ngrams_match_1": 14.318, "eval_ag_news_n_ngrams_match_2": 3.134, "eval_ag_news_n_ngrams_match_3": 0.938, "eval_ag_news_num_pred_words": 46.674, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.74017367700063, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.356389705023823, "eval_ag_news_runtime": 10.3043, "eval_ag_news_samples_per_second": 48.524, "eval_ag_news_steps_per_second": 0.097, "eval_ag_news_token_set_f1": 0.3554133112421214, "eval_ag_news_token_set_f1_sem": 0.004590297937167434, "eval_ag_news_token_set_precision": 0.3431914770540108, "eval_ag_news_token_set_recall": 0.3843889410895245, "eval_ag_news_true_num_tokens": 56.09375, "step": 201875 }, { "epoch": 38.76, "eval_anthropic_toxic_prompts_accuracy": 0.1173125, "eval_anthropic_toxic_prompts_bleu_score": 3.242991435867587, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1304039427410768, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.679387629032135, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008760039387616105, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.187056064605713, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.306, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.978, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.766, "eval_anthropic_toxic_prompts_num_pred_words": 47.026, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.217029029597594, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21501595188965897, "eval_anthropic_toxic_prompts_runtime": 10.9332, "eval_anthropic_toxic_prompts_samples_per_second": 45.732, "eval_anthropic_toxic_prompts_steps_per_second": 0.091, "eval_anthropic_toxic_prompts_token_set_f1": 0.36261502689196223, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066560905457200776, "eval_anthropic_toxic_prompts_token_set_precision": 0.4416186025811198, "eval_anthropic_toxic_prompts_token_set_recall": 0.33389050103492146, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 201875 }, { "epoch": 38.76, "eval_arxiv_accuracy": 0.35465625, "eval_arxiv_bleu_score": 4.55229740831341, "eval_arxiv_bleu_score_sem": 0.13688962556401163, "eval_arxiv_emb_cos_sim": 0.783316969871521, "eval_arxiv_emb_cos_sim_sem": 0.0069025401873214435, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3419687747955322, "eval_arxiv_n_ngrams_match_1": 15.376, "eval_arxiv_n_ngrams_match_2": 3.126, "eval_arxiv_n_ngrams_match_3": 0.744, "eval_arxiv_num_pred_words": 40.752, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.27473853644082, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3692863261658166, "eval_arxiv_runtime": 10.1712, "eval_arxiv_samples_per_second": 49.158, "eval_arxiv_steps_per_second": 0.098, "eval_arxiv_token_set_f1": 0.36152102203325975, "eval_arxiv_token_set_f1_sem": 0.004236665988412645, "eval_arxiv_token_set_precision": 0.3137950745892967, "eval_arxiv_token_set_recall": 0.44747270361294245, "eval_arxiv_true_num_tokens": 64.0, "step": 201875 }, { "epoch": 38.76, "eval_python_code_alpaca_accuracy": 0.1630625, "eval_python_code_alpaca_bleu_score": 4.512972974426608, "eval_python_code_alpaca_bleu_score_sem": 0.1402479341852449, "eval_python_code_alpaca_emb_cos_sim": 0.7586859464645386, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00823875992124378, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8622183799743652, "eval_python_code_alpaca_n_ngrams_match_1": 9.832, "eval_python_code_alpaca_n_ngrams_match_2": 2.836, "eval_python_code_alpaca_n_ngrams_match_3": 0.96, "eval_python_code_alpaca_num_pred_words": 44.33, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.500306235965105, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.32899641063380053, "eval_python_code_alpaca_runtime": 10.0203, "eval_python_code_alpaca_samples_per_second": 49.899, "eval_python_code_alpaca_steps_per_second": 0.1, "eval_python_code_alpaca_token_set_f1": 0.48051080934504997, "eval_python_code_alpaca_token_set_f1_sem": 0.005507289813331886, "eval_python_code_alpaca_token_set_precision": 0.536617700549984, "eval_python_code_alpaca_token_set_recall": 0.45787161406975196, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 201875 }, { "epoch": 38.76, "eval_wikibio_accuracy": 0.329, "eval_wikibio_bleu_score": 6.317726898131954, "eval_wikibio_bleu_score_sem": 0.23536335607271658, "eval_wikibio_emb_cos_sim": 0.7388870120048523, "eval_wikibio_emb_cos_sim_sem": 0.00989785810052947, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6539878845214844, "eval_wikibio_n_ngrams_match_1": 10.048, "eval_wikibio_n_ngrams_match_2": 3.446, "eval_wikibio_n_ngrams_match_3": 1.338, "eval_wikibio_num_pred_words": 35.838, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.6284049169189, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35968329711628844, "eval_wikibio_runtime": 11.4744, "eval_wikibio_samples_per_second": 43.575, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.3213910517015467, "eval_wikibio_token_set_f1_sem": 0.005781536706777318, "eval_wikibio_token_set_precision": 0.32744484232953247, "eval_wikibio_token_set_recall": 0.33254251410522806, "eval_wikibio_true_num_tokens": 61.1328125, "step": 201875 }, { "epoch": 38.76, "eval_nq_accuracy": 0.5334375, "eval_nq_bleu_score": 12.264501630654925, "eval_nq_bleu_score_sem": 0.48704479745097623, "eval_nq_emb_cos_sim": 0.8376739025115967, "eval_nq_emb_cos_sim_sem": 0.006610134571975972, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1322991847991943, "eval_nq_n_ngrams_match_1": 23.564, "eval_nq_n_ngrams_match_2": 8.8, "eval_nq_n_ngrams_match_3": 4.11, "eval_nq_num_pred_words": 49.27, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.434236403903807, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45702857873677716, "eval_nq_runtime": 10.7031, "eval_nq_samples_per_second": 46.715, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.47155928446618667, "eval_nq_token_set_f1_sem": 0.004943600773138105, "eval_nq_token_set_precision": 0.4303732457619844, "eval_nq_token_set_recall": 0.5303012318140418, "eval_nq_true_num_tokens": 64.0, "step": 201875 }, { "epoch": 38.76, "learning_rate": 0.001, "loss": 2.5037, "step": 201876 }, { "epoch": 38.76, "learning_rate": 0.001, "loss": 2.5069, "step": 201888 }, { "epoch": 38.77, "learning_rate": 0.001, "loss": 2.5177, "step": 201900 }, { "epoch": 38.77, "learning_rate": 0.001, "loss": 2.5022, "step": 201912 }, { "epoch": 38.77, "learning_rate": 0.001, "loss": 2.5204, "step": 201924 }, { "epoch": 38.77, "learning_rate": 0.001, "loss": 2.5016, "step": 201936 }, { "epoch": 38.78, "learning_rate": 0.001, "loss": 2.4997, "step": 201948 }, { "epoch": 38.78, "learning_rate": 0.001, "loss": 2.5071, "step": 201960 }, { "epoch": 38.78, "learning_rate": 0.001, "loss": 2.5138, "step": 201972 }, { "epoch": 38.78, "learning_rate": 0.001, "loss": 2.5097, "step": 201984 }, { "epoch": 38.79, "learning_rate": 0.001, "loss": 2.5154, "step": 201996 }, { "epoch": 38.79, "learning_rate": 0.001, "loss": 2.5126, "step": 202008 }, { "epoch": 38.79, "learning_rate": 0.001, "loss": 2.501, "step": 202020 }, { "epoch": 38.79, "learning_rate": 0.001, "loss": 2.5098, "step": 202032 }, { "epoch": 38.79, "learning_rate": 0.001, "loss": 2.5159, "step": 202044 }, { "epoch": 38.8, "learning_rate": 0.001, "loss": 2.5162, "step": 202056 }, { "epoch": 38.8, "learning_rate": 0.001, "loss": 2.505, "step": 202068 }, { "epoch": 38.8, "learning_rate": 0.001, "loss": 2.5042, "step": 202080 }, { "epoch": 38.8, "learning_rate": 0.001, "loss": 2.5087, "step": 202092 }, { "epoch": 38.81, "learning_rate": 0.001, "loss": 2.5136, "step": 202104 }, { "epoch": 38.81, "learning_rate": 0.001, "loss": 2.5176, "step": 202116 }, { "epoch": 38.81, "learning_rate": 0.001, "loss": 2.5119, "step": 202128 }, { "epoch": 38.81, "learning_rate": 0.001, "loss": 2.5103, "step": 202140 }, { "epoch": 38.82, "learning_rate": 0.001, "loss": 2.5156, "step": 202152 }, { "epoch": 38.82, "learning_rate": 0.001, "loss": 2.5034, "step": 202164 }, { "epoch": 38.82, "learning_rate": 0.001, "loss": 2.513, "step": 202176 }, { "epoch": 38.82, "learning_rate": 0.001, "loss": 2.5153, "step": 202188 }, { "epoch": 38.82, "learning_rate": 0.001, "loss": 2.518, "step": 202200 }, { "epoch": 38.83, "learning_rate": 0.001, "loss": 2.5091, "step": 202212 }, { "epoch": 38.83, "learning_rate": 0.001, "loss": 2.507, "step": 202224 }, { "epoch": 38.83, "learning_rate": 0.001, "loss": 2.5116, "step": 202236 }, { "epoch": 38.83, "learning_rate": 0.001, "loss": 2.5153, "step": 202248 }, { "epoch": 38.84, "learning_rate": 0.001, "loss": 2.5192, "step": 202260 }, { "epoch": 38.84, "learning_rate": 0.001, "loss": 2.518, "step": 202272 }, { "epoch": 38.84, "learning_rate": 0.001, "loss": 2.5165, "step": 202284 }, { "epoch": 38.84, "learning_rate": 0.001, "loss": 2.529, "step": 202296 }, { "epoch": 38.85, "learning_rate": 0.001, "loss": 2.5186, "step": 202308 }, { "epoch": 38.85, "learning_rate": 0.001, "loss": 2.52, "step": 202320 }, { "epoch": 38.85, "learning_rate": 0.001, "loss": 2.5129, "step": 202332 }, { "epoch": 38.85, "learning_rate": 0.001, "loss": 2.5163, "step": 202344 }, { "epoch": 38.85, "learning_rate": 0.001, "loss": 2.5174, "step": 202356 }, { "epoch": 38.86, "learning_rate": 0.001, "loss": 2.5114, "step": 202368 }, { "epoch": 38.86, "learning_rate": 0.001, "loss": 2.5263, "step": 202380 }, { "epoch": 38.86, "learning_rate": 0.001, "loss": 2.5064, "step": 202392 }, { "epoch": 38.86, "learning_rate": 0.001, "loss": 2.514, "step": 202404 }, { "epoch": 38.87, "learning_rate": 0.001, "loss": 2.5191, "step": 202416 }, { "epoch": 38.87, "learning_rate": 0.001, "loss": 2.5118, "step": 202428 }, { "epoch": 38.87, "learning_rate": 0.001, "loss": 2.5103, "step": 202440 }, { "epoch": 38.87, "learning_rate": 0.001, "loss": 2.5148, "step": 202452 }, { "epoch": 38.88, "learning_rate": 0.001, "loss": 2.5132, "step": 202464 }, { "epoch": 38.88, "learning_rate": 0.001, "loss": 2.5192, "step": 202476 }, { "epoch": 38.88, "learning_rate": 0.001, "loss": 2.5131, "step": 202488 }, { "epoch": 38.88, "learning_rate": 0.001, "loss": 2.5145, "step": 202500 }, { "epoch": 38.88, "eval_ag_news_accuracy": 0.328375, "eval_ag_news_bleu_score": 5.1527155688806054, "eval_ag_news_bleu_score_sem": 0.16033182908607604, "eval_ag_news_emb_cos_sim": 0.8261315822601318, "eval_ag_news_emb_cos_sim_sem": 0.006198742476383676, "eval_ag_news_emb_top1_equal": 0.1875, "eval_ag_news_emb_top1_equal_sem": 0.034634623208270626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4841575622558594, "eval_ag_news_n_ngrams_match_1": 14.65, "eval_ag_news_n_ngrams_match_2": 3.39, "eval_ag_news_n_ngrams_match_3": 0.992, "eval_ag_news_num_pred_words": 47.036, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.5949563187969, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3638596673897808, "eval_ag_news_runtime": 29.9712, "eval_ag_news_samples_per_second": 16.683, "eval_ag_news_steps_per_second": 0.033, "eval_ag_news_token_set_f1": 0.3621769282116598, "eval_ag_news_token_set_f1_sem": 0.0044052607054207, "eval_ag_news_token_set_precision": 0.3486148355316827, "eval_ag_news_token_set_recall": 0.3895147200352479, "eval_ag_news_true_num_tokens": 56.09375, "step": 202500 }, { "epoch": 38.88, "eval_anthropic_toxic_prompts_accuracy": 0.115375, "eval_anthropic_toxic_prompts_bleu_score": 3.3330506736959085, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12584082822832918, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6926984786987305, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008459507991586636, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2154390811920166, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.43, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.066, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.796, "eval_anthropic_toxic_prompts_num_pred_words": 47.64, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.914228879195598, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22039217915760645, "eval_anthropic_toxic_prompts_runtime": 11.1155, "eval_anthropic_toxic_prompts_samples_per_second": 44.982, "eval_anthropic_toxic_prompts_steps_per_second": 0.09, "eval_anthropic_toxic_prompts_token_set_f1": 0.3622944960966976, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00657948807338437, "eval_anthropic_toxic_prompts_token_set_precision": 0.4565711999608672, "eval_anthropic_toxic_prompts_token_set_recall": 0.32324141954753727, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 202500 }, { "epoch": 38.88, "eval_arxiv_accuracy": 0.35040625, "eval_arxiv_bleu_score": 4.576550455069013, "eval_arxiv_bleu_score_sem": 0.13265521502907604, "eval_arxiv_emb_cos_sim": 0.7901520133018494, "eval_arxiv_emb_cos_sim_sem": 0.006274881416763113, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.353154182434082, "eval_arxiv_n_ngrams_match_1": 15.728, "eval_arxiv_n_ngrams_match_2": 3.108, "eval_arxiv_n_ngrams_match_3": 0.726, "eval_arxiv_num_pred_words": 40.996, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.59277839968988, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3747648255053755, "eval_arxiv_runtime": 10.8355, "eval_arxiv_samples_per_second": 46.145, "eval_arxiv_steps_per_second": 0.092, "eval_arxiv_token_set_f1": 0.3652804947432187, "eval_arxiv_token_set_f1_sem": 0.004229053235562321, "eval_arxiv_token_set_precision": 0.31876671598538453, "eval_arxiv_token_set_recall": 0.44378250447759915, "eval_arxiv_true_num_tokens": 64.0, "step": 202500 }, { "epoch": 38.88, "eval_python_code_alpaca_accuracy": 0.16284375, "eval_python_code_alpaca_bleu_score": 4.501096018473689, "eval_python_code_alpaca_bleu_score_sem": 0.13955968690839765, "eval_python_code_alpaca_emb_cos_sim": 0.7713341116905212, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007082156884784714, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.865617275238037, "eval_python_code_alpaca_n_ngrams_match_1": 10.062, "eval_python_code_alpaca_n_ngrams_match_2": 2.938, "eval_python_code_alpaca_n_ngrams_match_3": 1.0, "eval_python_code_alpaca_num_pred_words": 45.6, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.559889144615443, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33501372485379877, "eval_python_code_alpaca_runtime": 10.1982, "eval_python_code_alpaca_samples_per_second": 49.028, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4856092523507604, "eval_python_code_alpaca_token_set_f1_sem": 0.005396022704446391, "eval_python_code_alpaca_token_set_precision": 0.5490716266038246, "eval_python_code_alpaca_token_set_recall": 0.45765075648728376, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 202500 }, { "epoch": 38.88, "eval_wikibio_accuracy": 0.32696875, "eval_wikibio_bleu_score": 6.145897708698071, "eval_wikibio_bleu_score_sem": 0.2171454817931754, "eval_wikibio_emb_cos_sim": 0.7525253295898438, "eval_wikibio_emb_cos_sim_sem": 0.008551091776311791, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6499736309051514, "eval_wikibio_n_ngrams_match_1": 10.256, "eval_wikibio_n_ngrams_match_2": 3.462, "eval_wikibio_n_ngrams_match_3": 1.272, "eval_wikibio_num_pred_words": 36.164, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.47365152028997, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.361788703810096, "eval_wikibio_runtime": 9.9363, "eval_wikibio_samples_per_second": 50.32, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.3209350589139348, "eval_wikibio_token_set_f1_sem": 0.005467771706450652, "eval_wikibio_token_set_precision": 0.3316562099364624, "eval_wikibio_token_set_recall": 0.3265698305645069, "eval_wikibio_true_num_tokens": 61.1328125, "step": 202500 }, { "epoch": 38.88, "eval_nq_accuracy": 0.53553125, "eval_nq_bleu_score": 12.218923377094882, "eval_nq_bleu_score_sem": 0.4981483004349057, "eval_nq_emb_cos_sim": 0.8463374376296997, "eval_nq_emb_cos_sim_sem": 0.0064939376862649615, "eval_nq_emb_top1_equal": 0.3359375, "eval_nq_emb_top1_equal_sem": 0.04191137143408563, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.129981517791748, "eval_nq_n_ngrams_match_1": 23.772, "eval_nq_n_ngrams_match_2": 8.866, "eval_nq_n_ngrams_match_3": 4.08, "eval_nq_num_pred_words": 49.104, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.414711287556525, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46200736068215403, "eval_nq_runtime": 10.6016, "eval_nq_samples_per_second": 47.163, "eval_nq_steps_per_second": 0.094, "eval_nq_token_set_f1": 0.475367129495662, "eval_nq_token_set_f1_sem": 0.0048558998034506995, "eval_nq_token_set_precision": 0.4335902694750858, "eval_nq_token_set_recall": 0.5337332485971581, "eval_nq_true_num_tokens": 64.0, "step": 202500 }, { "epoch": 38.88, "learning_rate": 0.001, "loss": 2.5191, "step": 202512 }, { "epoch": 38.89, "learning_rate": 0.001, "loss": 2.5076, "step": 202524 }, { "epoch": 38.89, "learning_rate": 0.001, "loss": 2.52, "step": 202536 }, { "epoch": 38.89, "learning_rate": 0.001, "loss": 2.5101, "step": 202548 }, { "epoch": 38.89, "learning_rate": 0.001, "loss": 2.5146, "step": 202560 }, { "epoch": 38.9, "learning_rate": 0.001, "loss": 2.5122, "step": 202572 }, { "epoch": 38.9, "learning_rate": 0.001, "loss": 2.5176, "step": 202584 }, { "epoch": 38.9, "learning_rate": 0.001, "loss": 2.5065, "step": 202596 }, { "epoch": 38.9, "learning_rate": 0.001, "loss": 2.5172, "step": 202608 }, { "epoch": 38.91, "learning_rate": 0.001, "loss": 2.5115, "step": 202620 }, { "epoch": 38.91, "learning_rate": 0.001, "loss": 2.5062, "step": 202632 }, { "epoch": 38.91, "learning_rate": 0.001, "loss": 2.5095, "step": 202644 }, { "epoch": 38.91, "learning_rate": 0.001, "loss": 2.5126, "step": 202656 }, { "epoch": 38.91, "learning_rate": 0.001, "loss": 2.5152, "step": 202668 }, { "epoch": 38.92, "learning_rate": 0.001, "loss": 2.5165, "step": 202680 }, { "epoch": 38.92, "learning_rate": 0.001, "loss": 2.5106, "step": 202692 }, { "epoch": 38.92, "learning_rate": 0.001, "loss": 2.5098, "step": 202704 }, { "epoch": 38.92, "learning_rate": 0.001, "loss": 2.5117, "step": 202716 }, { "epoch": 38.93, "learning_rate": 0.001, "loss": 2.5146, "step": 202728 }, { "epoch": 38.93, "learning_rate": 0.001, "loss": 2.5004, "step": 202740 }, { "epoch": 38.93, "learning_rate": 0.001, "loss": 2.515, "step": 202752 }, { "epoch": 38.93, "learning_rate": 0.001, "loss": 2.502, "step": 202764 }, { "epoch": 38.94, "learning_rate": 0.001, "loss": 2.5187, "step": 202776 }, { "epoch": 38.94, "learning_rate": 0.001, "loss": 2.5104, "step": 202788 }, { "epoch": 38.94, "learning_rate": 0.001, "loss": 2.5074, "step": 202800 }, { "epoch": 38.94, "learning_rate": 0.001, "loss": 2.5021, "step": 202812 }, { "epoch": 38.94, "learning_rate": 0.001, "loss": 2.5143, "step": 202824 }, { "epoch": 38.95, "learning_rate": 0.001, "loss": 2.5199, "step": 202836 }, { "epoch": 38.95, "learning_rate": 0.001, "loss": 2.5087, "step": 202848 }, { "epoch": 38.95, "learning_rate": 0.001, "loss": 2.5112, "step": 202860 }, { "epoch": 38.95, "learning_rate": 0.001, "loss": 2.5031, "step": 202872 }, { "epoch": 38.96, "learning_rate": 0.001, "loss": 2.5183, "step": 202884 }, { "epoch": 38.96, "learning_rate": 0.001, "loss": 2.5072, "step": 202896 }, { "epoch": 38.96, "learning_rate": 0.001, "loss": 2.5059, "step": 202908 }, { "epoch": 38.96, "learning_rate": 0.001, "loss": 2.5115, "step": 202920 }, { "epoch": 38.97, "learning_rate": 0.001, "loss": 2.5133, "step": 202932 }, { "epoch": 38.97, "learning_rate": 0.001, "loss": 2.5035, "step": 202944 }, { "epoch": 38.97, "learning_rate": 0.001, "loss": 2.5205, "step": 202956 }, { "epoch": 38.97, "learning_rate": 0.001, "loss": 2.5173, "step": 202968 }, { "epoch": 38.97, "learning_rate": 0.001, "loss": 2.5073, "step": 202980 }, { "epoch": 38.98, "learning_rate": 0.001, "loss": 2.5107, "step": 202992 }, { "epoch": 38.98, "learning_rate": 0.001, "loss": 2.5146, "step": 203004 }, { "epoch": 38.98, "learning_rate": 0.001, "loss": 2.5094, "step": 203016 }, { "epoch": 38.98, "learning_rate": 0.001, "loss": 2.4934, "step": 203028 }, { "epoch": 38.99, "learning_rate": 0.001, "loss": 2.5081, "step": 203040 }, { "epoch": 38.99, "learning_rate": 0.001, "loss": 2.5076, "step": 203052 }, { "epoch": 38.99, "learning_rate": 0.001, "loss": 2.5168, "step": 203064 }, { "epoch": 38.99, "learning_rate": 0.001, "loss": 2.5112, "step": 203076 }, { "epoch": 39.0, "learning_rate": 0.001, "loss": 2.5042, "step": 203088 }, { "epoch": 39.0, "learning_rate": 0.001, "loss": 2.5118, "step": 203100 }, { "epoch": 39.0, "learning_rate": 0.001, "loss": 2.5241, "step": 203112 }, { "epoch": 39.0, "learning_rate": 0.001, "loss": 2.4926, "step": 203124 }, { "epoch": 39.0, "eval_ag_news_accuracy": 0.32684375, "eval_ag_news_bleu_score": 4.930483665400879, "eval_ag_news_bleu_score_sem": 0.1525254372087238, "eval_ag_news_emb_cos_sim": 0.8217231035232544, "eval_ag_news_emb_cos_sim_sem": 0.006145553812107435, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.483769416809082, "eval_ag_news_n_ngrams_match_1": 14.442, "eval_ag_news_n_ngrams_match_2": 3.224, "eval_ag_news_n_ngrams_match_3": 0.912, "eval_ag_news_num_pred_words": 46.874, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.58230718992355, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3589650751193681, "eval_ag_news_runtime": 10.4713, "eval_ag_news_samples_per_second": 47.75, "eval_ag_news_steps_per_second": 0.095, "eval_ag_news_token_set_f1": 0.35973880612370873, "eval_ag_news_token_set_f1_sem": 0.004445421841233903, "eval_ag_news_token_set_precision": 0.3482240008313258, "eval_ag_news_token_set_recall": 0.38569646833944277, "eval_ag_news_true_num_tokens": 56.09375, "step": 203125 }, { "epoch": 39.0, "eval_anthropic_toxic_prompts_accuracy": 0.11621875, "eval_anthropic_toxic_prompts_bleu_score": 3.2150605163469677, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1231294136668318, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6852802038192749, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008253135947853043, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2176363468170166, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.406, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.992, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746, "eval_anthropic_toxic_prompts_num_pred_words": 47.69, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.969032244611608, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21995527486306599, "eval_anthropic_toxic_prompts_runtime": 9.8172, "eval_anthropic_toxic_prompts_samples_per_second": 50.931, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3630440112872896, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006359723865418526, "eval_anthropic_toxic_prompts_token_set_precision": 0.45075556344855516, "eval_anthropic_toxic_prompts_token_set_recall": 0.33287990041219423, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 203125 }, { "epoch": 39.0, "eval_arxiv_accuracy": 0.351125, "eval_arxiv_bleu_score": 4.744306671947361, "eval_arxiv_bleu_score_sem": 0.13858429074770123, "eval_arxiv_emb_cos_sim": 0.788700520992279, "eval_arxiv_emb_cos_sim_sem": 0.00591725487043605, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3417587280273438, "eval_arxiv_n_ngrams_match_1": 15.732, "eval_arxiv_n_ngrams_match_2": 3.208, "eval_arxiv_n_ngrams_match_3": 0.774, "eval_arxiv_num_pred_words": 41.684, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.26880014268191, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3719481559322388, "eval_arxiv_runtime": 10.6133, "eval_arxiv_samples_per_second": 47.111, "eval_arxiv_steps_per_second": 0.094, "eval_arxiv_token_set_f1": 0.3664804712129069, "eval_arxiv_token_set_f1_sem": 0.00401123671147056, "eval_arxiv_token_set_precision": 0.3214731191284198, "eval_arxiv_token_set_recall": 0.4385559628883223, "eval_arxiv_true_num_tokens": 64.0, "step": 203125 }, { "epoch": 39.0, "eval_python_code_alpaca_accuracy": 0.1634375, "eval_python_code_alpaca_bleu_score": 4.646376557469093, "eval_python_code_alpaca_bleu_score_sem": 0.14656820083468533, "eval_python_code_alpaca_emb_cos_sim": 0.7661327123641968, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007154286606199553, "eval_python_code_alpaca_emb_top1_equal": 0.1875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8423428535461426, "eval_python_code_alpaca_n_ngrams_match_1": 9.934, "eval_python_code_alpaca_n_ngrams_match_2": 2.978, "eval_python_code_alpaca_n_ngrams_match_3": 1.008, "eval_python_code_alpaca_num_pred_words": 44.414, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.15591227973498, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33536381990607, "eval_python_code_alpaca_runtime": 18.6613, "eval_python_code_alpaca_samples_per_second": 26.793, "eval_python_code_alpaca_steps_per_second": 0.054, "eval_python_code_alpaca_token_set_f1": 0.48069464667014317, "eval_python_code_alpaca_token_set_f1_sem": 0.005202922550663726, "eval_python_code_alpaca_token_set_precision": 0.5388164144056561, "eval_python_code_alpaca_token_set_recall": 0.4559315599631794, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 203125 }, { "epoch": 39.0, "eval_wikibio_accuracy": 0.32928125, "eval_wikibio_bleu_score": 6.274163216293377, "eval_wikibio_bleu_score_sem": 0.22766949167906855, "eval_wikibio_emb_cos_sim": 0.7473706007003784, "eval_wikibio_emb_cos_sim_sem": 0.008597789825917355, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.662121057510376, "eval_wikibio_n_ngrams_match_1": 10.224, "eval_wikibio_n_ngrams_match_2": 3.522, "eval_wikibio_n_ngrams_match_3": 1.31, "eval_wikibio_num_pred_words": 36.074, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.943857492686355, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3627067170521525, "eval_wikibio_runtime": 29.8847, "eval_wikibio_samples_per_second": 16.731, "eval_wikibio_steps_per_second": 0.033, "eval_wikibio_token_set_f1": 0.32544113241180084, "eval_wikibio_token_set_f1_sem": 0.0053832983976861174, "eval_wikibio_token_set_precision": 0.3316719645727253, "eval_wikibio_token_set_recall": 0.3361491030281298, "eval_wikibio_true_num_tokens": 61.1328125, "step": 203125 }, { "epoch": 39.0, "eval_nq_accuracy": 0.536, "eval_nq_bleu_score": 12.234332571274141, "eval_nq_bleu_score_sem": 0.49121778967894003, "eval_nq_emb_cos_sim": 0.8396992683410645, "eval_nq_emb_cos_sim_sem": 0.006906599455668649, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.128368616104126, "eval_nq_n_ngrams_match_1": 23.756, "eval_nq_n_ngrams_match_2": 8.774, "eval_nq_n_ngrams_match_3": 4.074, "eval_nq_num_pred_words": 49.208, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.401150124870956, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4587641294054045, "eval_nq_runtime": 30.3867, "eval_nq_samples_per_second": 16.455, "eval_nq_steps_per_second": 0.033, "eval_nq_token_set_f1": 0.47121116363681237, "eval_nq_token_set_f1_sem": 0.0046953435380699795, "eval_nq_token_set_precision": 0.43083229617691476, "eval_nq_token_set_recall": 0.5270876495552392, "eval_nq_true_num_tokens": 64.0, "step": 203125 }, { "epoch": 39.0, "learning_rate": 0.001, "loss": 2.5027, "step": 203136 }, { "epoch": 39.01, "learning_rate": 0.001, "loss": 2.484, "step": 203148 }, { "epoch": 39.01, "learning_rate": 0.001, "loss": 2.4969, "step": 203160 }, { "epoch": 39.01, "learning_rate": 0.001, "loss": 2.4952, "step": 203172 }, { "epoch": 39.01, "learning_rate": 0.001, "loss": 2.5022, "step": 203184 }, { "epoch": 39.02, "learning_rate": 0.001, "loss": 2.4886, "step": 203196 }, { "epoch": 39.02, "learning_rate": 0.001, "loss": 2.4855, "step": 203208 }, { "epoch": 39.02, "learning_rate": 0.001, "loss": 2.4983, "step": 203220 }, { "epoch": 39.02, "learning_rate": 0.001, "loss": 2.5006, "step": 203232 }, { "epoch": 39.03, "learning_rate": 0.001, "loss": 2.4899, "step": 203244 }, { "epoch": 39.03, "learning_rate": 0.001, "loss": 2.4945, "step": 203256 }, { "epoch": 39.03, "learning_rate": 0.001, "loss": 2.4842, "step": 203268 }, { "epoch": 39.03, "learning_rate": 0.001, "loss": 2.4999, "step": 203280 }, { "epoch": 39.03, "learning_rate": 0.001, "loss": 2.5004, "step": 203292 }, { "epoch": 39.04, "learning_rate": 0.001, "loss": 2.4989, "step": 203304 }, { "epoch": 39.04, "learning_rate": 0.001, "loss": 2.5004, "step": 203316 }, { "epoch": 39.04, "learning_rate": 0.001, "loss": 2.4967, "step": 203328 }, { "epoch": 39.04, "learning_rate": 0.001, "loss": 2.4882, "step": 203340 }, { "epoch": 39.05, "learning_rate": 0.001, "loss": 2.5003, "step": 203352 }, { "epoch": 39.05, "learning_rate": 0.001, "loss": 2.4892, "step": 203364 }, { "epoch": 39.05, "learning_rate": 0.001, "loss": 2.4933, "step": 203376 }, { "epoch": 39.05, "learning_rate": 0.001, "loss": 2.4942, "step": 203388 }, { "epoch": 39.06, "learning_rate": 0.001, "loss": 2.495, "step": 203400 }, { "epoch": 39.06, "learning_rate": 0.001, "loss": 2.4855, "step": 203412 }, { "epoch": 39.06, "learning_rate": 0.001, "loss": 2.4938, "step": 203424 }, { "epoch": 39.06, "learning_rate": 0.001, "loss": 2.4877, "step": 203436 }, { "epoch": 39.06, "learning_rate": 0.001, "loss": 2.4968, "step": 203448 }, { "epoch": 39.07, "learning_rate": 0.001, "loss": 2.4891, "step": 203460 }, { "epoch": 39.07, "learning_rate": 0.001, "loss": 2.4992, "step": 203472 }, { "epoch": 39.07, "learning_rate": 0.001, "loss": 2.4955, "step": 203484 }, { "epoch": 39.07, "learning_rate": 0.001, "loss": 2.4977, "step": 203496 }, { "epoch": 39.08, "learning_rate": 0.001, "loss": 2.4842, "step": 203508 }, { "epoch": 39.08, "learning_rate": 0.001, "loss": 2.4958, "step": 203520 }, { "epoch": 39.08, "learning_rate": 0.001, "loss": 2.4998, "step": 203532 }, { "epoch": 39.08, "learning_rate": 0.001, "loss": 2.5019, "step": 203544 }, { "epoch": 39.09, "learning_rate": 0.001, "loss": 2.4997, "step": 203556 }, { "epoch": 39.09, "learning_rate": 0.001, "loss": 2.5044, "step": 203568 }, { "epoch": 39.09, "learning_rate": 0.001, "loss": 2.5021, "step": 203580 }, { "epoch": 39.09, "learning_rate": 0.001, "loss": 2.4936, "step": 203592 }, { "epoch": 39.09, "learning_rate": 0.001, "loss": 2.511, "step": 203604 }, { "epoch": 39.1, "learning_rate": 0.001, "loss": 2.5019, "step": 203616 }, { "epoch": 39.1, "learning_rate": 0.001, "loss": 2.5036, "step": 203628 }, { "epoch": 39.1, "learning_rate": 0.001, "loss": 2.5105, "step": 203640 }, { "epoch": 39.1, "learning_rate": 0.001, "loss": 2.5141, "step": 203652 }, { "epoch": 39.11, "learning_rate": 0.001, "loss": 2.4925, "step": 203664 }, { "epoch": 39.11, "learning_rate": 0.001, "loss": 2.4912, "step": 203676 }, { "epoch": 39.11, "learning_rate": 0.001, "loss": 2.493, "step": 203688 }, { "epoch": 39.11, "learning_rate": 0.001, "loss": 2.4884, "step": 203700 }, { "epoch": 39.12, "learning_rate": 0.001, "loss": 2.5084, "step": 203712 }, { "epoch": 39.12, "learning_rate": 0.001, "loss": 2.492, "step": 203724 }, { "epoch": 39.12, "learning_rate": 0.001, "loss": 2.4973, "step": 203736 }, { "epoch": 39.12, "learning_rate": 0.001, "loss": 2.4996, "step": 203748 }, { "epoch": 39.12, "eval_ag_news_accuracy": 0.32875, "eval_ag_news_bleu_score": 4.9473473248805, "eval_ag_news_bleu_score_sem": 0.16153919793596638, "eval_ag_news_emb_cos_sim": 0.8240107893943787, "eval_ag_news_emb_cos_sim_sem": 0.0062118824149514544, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.472358465194702, "eval_ag_news_n_ngrams_match_1": 14.482, "eval_ag_news_n_ngrams_match_2": 3.24, "eval_ag_news_n_ngrams_match_3": 0.9, "eval_ag_news_num_pred_words": 46.866, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.212625281621065, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3573148274794353, "eval_ag_news_runtime": 31.1177, "eval_ag_news_samples_per_second": 16.068, "eval_ag_news_steps_per_second": 0.032, "eval_ag_news_token_set_f1": 0.358938561080545, "eval_ag_news_token_set_f1_sem": 0.004421656460898802, "eval_ag_news_token_set_precision": 0.3436742059885483, "eval_ag_news_token_set_recall": 0.3899739805137839, "eval_ag_news_true_num_tokens": 56.09375, "step": 203750 }, { "epoch": 39.12, "eval_anthropic_toxic_prompts_accuracy": 0.1155625, "eval_anthropic_toxic_prompts_bleu_score": 3.335496479194916, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13568852905974288, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6861573457717896, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008483738478213352, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2237801551818848, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.326, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792, "eval_anthropic_toxic_prompts_num_pred_words": 47.106, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.12290940564809, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21685699248327955, "eval_anthropic_toxic_prompts_runtime": 29.42, "eval_anthropic_toxic_prompts_samples_per_second": 16.995, "eval_anthropic_toxic_prompts_steps_per_second": 0.034, "eval_anthropic_toxic_prompts_token_set_f1": 0.3638831479368107, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006794694963805851, "eval_anthropic_toxic_prompts_token_set_precision": 0.44146976634021673, "eval_anthropic_toxic_prompts_token_set_recall": 0.33590747740380744, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 203750 }, { "epoch": 39.12, "eval_arxiv_accuracy": 0.35075, "eval_arxiv_bleu_score": 4.260086412890832, "eval_arxiv_bleu_score_sem": 0.12473198770508127, "eval_arxiv_emb_cos_sim": 0.7841321229934692, "eval_arxiv_emb_cos_sim_sem": 0.008339556743635615, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3372983932495117, "eval_arxiv_n_ngrams_match_1": 15.346, "eval_arxiv_n_ngrams_match_2": 2.978, "eval_arxiv_n_ngrams_match_3": 0.614, "eval_arxiv_num_pred_words": 40.292, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.14299261070701, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3702762396043655, "eval_arxiv_runtime": 31.5376, "eval_arxiv_samples_per_second": 15.854, "eval_arxiv_steps_per_second": 0.032, "eval_arxiv_token_set_f1": 0.35932383291701797, "eval_arxiv_token_set_f1_sem": 0.004228270528906881, "eval_arxiv_token_set_precision": 0.3108394176820865, "eval_arxiv_token_set_recall": 0.44266503946504865, "eval_arxiv_true_num_tokens": 64.0, "step": 203750 }, { "epoch": 39.12, "eval_python_code_alpaca_accuracy": 0.16259375, "eval_python_code_alpaca_bleu_score": 4.964613270381475, "eval_python_code_alpaca_bleu_score_sem": 0.16002868777585713, "eval_python_code_alpaca_emb_cos_sim": 0.7728931903839111, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007140004751454932, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8552279472351074, "eval_python_code_alpaca_n_ngrams_match_1": 9.98, "eval_python_code_alpaca_n_ngrams_match_2": 3.12, "eval_python_code_alpaca_n_ngrams_match_3": 1.13, "eval_python_code_alpaca_num_pred_words": 43.832, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.378398114000966, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3398192028790994, "eval_python_code_alpaca_runtime": 30.4794, "eval_python_code_alpaca_samples_per_second": 16.405, "eval_python_code_alpaca_steps_per_second": 0.033, "eval_python_code_alpaca_token_set_f1": 0.48745195903516697, "eval_python_code_alpaca_token_set_f1_sem": 0.005331404605602125, "eval_python_code_alpaca_token_set_precision": 0.5450476333221513, "eval_python_code_alpaca_token_set_recall": 0.460519920061498, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 203750 }, { "epoch": 39.12, "eval_wikibio_accuracy": 0.32909375, "eval_wikibio_bleu_score": 5.943904832216061, "eval_wikibio_bleu_score_sem": 0.21462632240466614, "eval_wikibio_emb_cos_sim": 0.7392363548278809, "eval_wikibio_emb_cos_sim_sem": 0.010813693972147514, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.675292730331421, "eval_wikibio_n_ngrams_match_1": 10.082, "eval_wikibio_n_ngrams_match_2": 3.346, "eval_wikibio_n_ngrams_match_3": 1.25, "eval_wikibio_num_pred_words": 36.616, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.46020636595973, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3569254595229421, "eval_wikibio_runtime": 30.7658, "eval_wikibio_samples_per_second": 16.252, "eval_wikibio_steps_per_second": 0.033, "eval_wikibio_token_set_f1": 0.31921761263118037, "eval_wikibio_token_set_f1_sem": 0.005456603898546164, "eval_wikibio_token_set_precision": 0.3264479250203626, "eval_wikibio_token_set_recall": 0.32812690487486335, "eval_wikibio_true_num_tokens": 61.1328125, "step": 203750 }, { "epoch": 39.12, "eval_nq_accuracy": 0.534875, "eval_nq_bleu_score": 11.846666092724572, "eval_nq_bleu_score_sem": 0.47097923772056954, "eval_nq_emb_cos_sim": 0.8380686044692993, "eval_nq_emb_cos_sim_sem": 0.007021667596092977, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.127319574356079, "eval_nq_n_ngrams_match_1": 23.394, "eval_nq_n_ngrams_match_2": 8.606, "eval_nq_n_ngrams_match_3": 3.964, "eval_nq_num_pred_words": 49.172, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.392341588727241, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4520691614068224, "eval_nq_runtime": 29.1737, "eval_nq_samples_per_second": 17.139, "eval_nq_steps_per_second": 0.034, "eval_nq_token_set_f1": 0.466333941026941, "eval_nq_token_set_f1_sem": 0.004832038928368858, "eval_nq_token_set_precision": 0.4244480202142972, "eval_nq_token_set_recall": 0.5258094064124522, "eval_nq_true_num_tokens": 64.0, "step": 203750 }, { "epoch": 39.12, "learning_rate": 0.001, "loss": 2.4968, "step": 203760 }, { "epoch": 39.13, "learning_rate": 0.001, "loss": 2.4894, "step": 203772 }, { "epoch": 39.13, "learning_rate": 0.001, "loss": 2.4904, "step": 203784 }, { "epoch": 39.13, "learning_rate": 0.001, "loss": 2.4917, "step": 203796 }, { "epoch": 39.13, "learning_rate": 0.001, "loss": 2.5055, "step": 203808 }, { "epoch": 39.14, "learning_rate": 0.001, "loss": 2.4984, "step": 203820 }, { "epoch": 39.14, "learning_rate": 0.001, "loss": 2.4934, "step": 203832 }, { "epoch": 39.14, "learning_rate": 0.001, "loss": 2.4994, "step": 203844 }, { "epoch": 39.14, "learning_rate": 0.001, "loss": 2.4935, "step": 203856 }, { "epoch": 39.15, "learning_rate": 0.001, "loss": 2.5123, "step": 203868 }, { "epoch": 39.15, "learning_rate": 0.001, "loss": 2.4898, "step": 203880 }, { "epoch": 39.15, "learning_rate": 0.001, "loss": 2.5025, "step": 203892 }, { "epoch": 39.15, "learning_rate": 0.001, "loss": 2.5069, "step": 203904 }, { "epoch": 39.15, "learning_rate": 0.001, "loss": 2.495, "step": 203916 }, { "epoch": 39.16, "learning_rate": 0.001, "loss": 2.4915, "step": 203928 }, { "epoch": 39.16, "learning_rate": 0.001, "loss": 2.4976, "step": 203940 }, { "epoch": 39.16, "learning_rate": 0.001, "loss": 2.5005, "step": 203952 }, { "epoch": 39.16, "learning_rate": 0.001, "loss": 2.4911, "step": 203964 }, { "epoch": 39.17, "learning_rate": 0.001, "loss": 2.4942, "step": 203976 }, { "epoch": 39.17, "learning_rate": 0.001, "loss": 2.4993, "step": 203988 }, { "epoch": 39.17, "learning_rate": 0.001, "loss": 2.4962, "step": 204000 }, { "epoch": 39.17, "learning_rate": 0.001, "loss": 2.5043, "step": 204012 }, { "epoch": 39.18, "learning_rate": 0.001, "loss": 2.4936, "step": 204024 }, { "epoch": 39.18, "learning_rate": 0.001, "loss": 2.5006, "step": 204036 }, { "epoch": 39.18, "learning_rate": 0.001, "loss": 2.4998, "step": 204048 }, { "epoch": 39.18, "learning_rate": 0.001, "loss": 2.4979, "step": 204060 }, { "epoch": 39.18, "learning_rate": 0.001, "loss": 2.5006, "step": 204072 }, { "epoch": 39.19, "learning_rate": 0.001, "loss": 2.501, "step": 204084 }, { "epoch": 39.19, "learning_rate": 0.001, "loss": 2.4919, "step": 204096 }, { "epoch": 39.19, "learning_rate": 0.001, "loss": 2.504, "step": 204108 }, { "epoch": 39.19, "learning_rate": 0.001, "loss": 2.508, "step": 204120 }, { "epoch": 39.2, "learning_rate": 0.001, "loss": 2.4901, "step": 204132 }, { "epoch": 39.2, "learning_rate": 0.001, "loss": 2.4971, "step": 204144 }, { "epoch": 39.2, "learning_rate": 0.001, "loss": 2.5054, "step": 204156 }, { "epoch": 39.2, "learning_rate": 0.001, "loss": 2.4941, "step": 204168 }, { "epoch": 39.21, "learning_rate": 0.001, "loss": 2.4984, "step": 204180 }, { "epoch": 39.21, "learning_rate": 0.001, "loss": 2.4974, "step": 204192 }, { "epoch": 39.21, "learning_rate": 0.001, "loss": 2.4904, "step": 204204 }, { "epoch": 39.21, "learning_rate": 0.001, "loss": 2.5072, "step": 204216 }, { "epoch": 39.21, "learning_rate": 0.001, "loss": 2.4894, "step": 204228 }, { "epoch": 39.22, "learning_rate": 0.001, "loss": 2.505, "step": 204240 }, { "epoch": 39.22, "learning_rate": 0.001, "loss": 2.504, "step": 204252 }, { "epoch": 39.22, "learning_rate": 0.001, "loss": 2.5045, "step": 204264 }, { "epoch": 39.22, "learning_rate": 0.001, "loss": 2.4913, "step": 204276 }, { "epoch": 39.23, "learning_rate": 0.001, "loss": 2.4944, "step": 204288 }, { "epoch": 39.23, "learning_rate": 0.001, "loss": 2.5013, "step": 204300 }, { "epoch": 39.23, "learning_rate": 0.001, "loss": 2.5086, "step": 204312 }, { "epoch": 39.23, "learning_rate": 0.001, "loss": 2.5024, "step": 204324 }, { "epoch": 39.24, "learning_rate": 0.001, "loss": 2.4878, "step": 204336 }, { "epoch": 39.24, "learning_rate": 0.001, "loss": 2.5007, "step": 204348 }, { "epoch": 39.24, "learning_rate": 0.001, "loss": 2.4936, "step": 204360 }, { "epoch": 39.24, "learning_rate": 0.001, "loss": 2.4986, "step": 204372 }, { "epoch": 39.24, "eval_ag_news_accuracy": 0.32759375, "eval_ag_news_bleu_score": 4.812495436231566, "eval_ag_news_bleu_score_sem": 0.15096311663611467, "eval_ag_news_emb_cos_sim": 0.8207637667655945, "eval_ag_news_emb_cos_sim_sem": 0.00670883129593185, "eval_ag_news_emb_top1_equal": 0.3046875, "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4924023151397705, "eval_ag_news_n_ngrams_match_1": 14.48, "eval_ag_news_n_ngrams_match_2": 3.228, "eval_ag_news_n_ngrams_match_3": 0.884, "eval_ag_news_num_pred_words": 47.206, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.864804566368434, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35893640554544903, "eval_ag_news_runtime": 31.7315, "eval_ag_news_samples_per_second": 15.757, "eval_ag_news_steps_per_second": 0.032, "eval_ag_news_token_set_f1": 0.3602045285771965, "eval_ag_news_token_set_f1_sem": 0.004520542981490912, "eval_ag_news_token_set_precision": 0.3464958004265387, "eval_ag_news_token_set_recall": 0.3897373353445049, "eval_ag_news_true_num_tokens": 56.09375, "step": 204375 }, { "epoch": 39.24, "eval_anthropic_toxic_prompts_accuracy": 0.11640625, "eval_anthropic_toxic_prompts_bleu_score": 3.275336911045023, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12975173034238963, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.682098925113678, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008458190244026161, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2052226066589355, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.336, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.034, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.784, "eval_anthropic_toxic_prompts_num_pred_words": 47.042, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.660989105870556, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21746538669019225, "eval_anthropic_toxic_prompts_runtime": 11.5974, "eval_anthropic_toxic_prompts_samples_per_second": 43.113, "eval_anthropic_toxic_prompts_steps_per_second": 0.086, "eval_anthropic_toxic_prompts_token_set_f1": 0.36053459678352867, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0068794499294335745, "eval_anthropic_toxic_prompts_token_set_precision": 0.4444711280808593, "eval_anthropic_toxic_prompts_token_set_recall": 0.3300423361431506, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 204375 }, { "epoch": 39.24, "eval_arxiv_accuracy": 0.35346875, "eval_arxiv_bleu_score": 4.539169202684817, "eval_arxiv_bleu_score_sem": 0.13164209352990341, "eval_arxiv_emb_cos_sim": 0.7800302505493164, "eval_arxiv_emb_cos_sim_sem": 0.006690054216375739, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.339123010635376, "eval_arxiv_n_ngrams_match_1": 15.482, "eval_arxiv_n_ngrams_match_2": 3.072, "eval_arxiv_n_ngrams_match_3": 0.734, "eval_arxiv_num_pred_words": 41.21, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.194389680048527, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3691642799597068, "eval_arxiv_runtime": 10.7989, "eval_arxiv_samples_per_second": 46.301, "eval_arxiv_steps_per_second": 0.093, "eval_arxiv_token_set_f1": 0.362099512088158, "eval_arxiv_token_set_f1_sem": 0.0040664446910102505, "eval_arxiv_token_set_precision": 0.31570788709091974, "eval_arxiv_token_set_recall": 0.44104465378559937, "eval_arxiv_true_num_tokens": 64.0, "step": 204375 }, { "epoch": 39.24, "eval_python_code_alpaca_accuracy": 0.16290625, "eval_python_code_alpaca_bleu_score": 4.549148174301849, "eval_python_code_alpaca_bleu_score_sem": 0.14668840249181564, "eval_python_code_alpaca_emb_cos_sim": 0.7676301002502441, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006873681449892624, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8761374950408936, "eval_python_code_alpaca_n_ngrams_match_1": 9.74, "eval_python_code_alpaca_n_ngrams_match_2": 2.838, "eval_python_code_alpaca_n_ngrams_match_3": 0.958, "eval_python_code_alpaca_num_pred_words": 43.51, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.745598175265357, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33312843667493164, "eval_python_code_alpaca_runtime": 9.9423, "eval_python_code_alpaca_samples_per_second": 50.29, "eval_python_code_alpaca_steps_per_second": 0.101, "eval_python_code_alpaca_token_set_f1": 0.4818592495707684, "eval_python_code_alpaca_token_set_f1_sem": 0.005614890313518356, "eval_python_code_alpaca_token_set_precision": 0.5306696535406978, "eval_python_code_alpaca_token_set_recall": 0.46558071392198613, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 204375 }, { "epoch": 39.24, "eval_wikibio_accuracy": 0.32940625, "eval_wikibio_bleu_score": 6.3679576527504285, "eval_wikibio_bleu_score_sem": 0.2383635099425456, "eval_wikibio_emb_cos_sim": 0.745114803314209, "eval_wikibio_emb_cos_sim_sem": 0.009390952026588377, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6570096015930176, "eval_wikibio_n_ngrams_match_1": 10.156, "eval_wikibio_n_ngrams_match_2": 3.47, "eval_wikibio_n_ngrams_match_3": 1.348, "eval_wikibio_num_pred_words": 36.118, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.745305558885775, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3614692441132036, "eval_wikibio_runtime": 10.1342, "eval_wikibio_samples_per_second": 49.338, "eval_wikibio_steps_per_second": 0.099, "eval_wikibio_token_set_f1": 0.32359679767768096, "eval_wikibio_token_set_f1_sem": 0.0056194700104635, "eval_wikibio_token_set_precision": 0.3303050107824481, "eval_wikibio_token_set_recall": 0.33378729511068844, "eval_wikibio_true_num_tokens": 61.1328125, "step": 204375 }, { "epoch": 39.24, "eval_nq_accuracy": 0.53715625, "eval_nq_bleu_score": 12.009216620232321, "eval_nq_bleu_score_sem": 0.4765669954716235, "eval_nq_emb_cos_sim": 0.8332507610321045, "eval_nq_emb_cos_sim_sem": 0.007234609279838205, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.128572463989258, "eval_nq_n_ngrams_match_1": 23.572, "eval_nq_n_ngrams_match_2": 8.698, "eval_nq_n_ngrams_match_3": 4.028, "eval_nq_num_pred_words": 49.216, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.402862856118976, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45629042965050415, "eval_nq_runtime": 11.1603, "eval_nq_samples_per_second": 44.802, "eval_nq_steps_per_second": 0.09, "eval_nq_token_set_f1": 0.472371337056887, "eval_nq_token_set_f1_sem": 0.004851472801572849, "eval_nq_token_set_precision": 0.43052903678363896, "eval_nq_token_set_recall": 0.5312989999453681, "eval_nq_true_num_tokens": 64.0, "step": 204375 }, { "epoch": 39.24, "learning_rate": 0.001, "loss": 2.5063, "step": 204384 }, { "epoch": 39.25, "learning_rate": 0.001, "loss": 2.4995, "step": 204396 }, { "epoch": 39.25, "learning_rate": 0.001, "loss": 2.5021, "step": 204408 }, { "epoch": 39.25, "learning_rate": 0.001, "loss": 2.5004, "step": 204420 }, { "epoch": 39.25, "learning_rate": 0.001, "loss": 2.4947, "step": 204432 }, { "epoch": 39.26, "learning_rate": 0.001, "loss": 2.5092, "step": 204444 }, { "epoch": 39.26, "learning_rate": 0.001, "loss": 2.5051, "step": 204456 }, { "epoch": 39.26, "learning_rate": 0.001, "loss": 2.5066, "step": 204468 }, { "epoch": 39.26, "learning_rate": 0.001, "loss": 2.4857, "step": 204480 }, { "epoch": 39.26, "learning_rate": 0.001, "loss": 2.4905, "step": 204492 }, { "epoch": 39.27, "learning_rate": 0.001, "loss": 2.4974, "step": 204504 }, { "epoch": 39.27, "learning_rate": 0.001, "loss": 2.498, "step": 204516 }, { "epoch": 39.27, "learning_rate": 0.001, "loss": 2.4956, "step": 204528 }, { "epoch": 39.27, "learning_rate": 0.001, "loss": 2.498, "step": 204540 }, { "epoch": 39.28, "learning_rate": 0.001, "loss": 2.4949, "step": 204552 }, { "epoch": 39.28, "learning_rate": 0.001, "loss": 2.4957, "step": 204564 }, { "epoch": 39.28, "learning_rate": 0.001, "loss": 2.5067, "step": 204576 }, { "epoch": 39.28, "learning_rate": 0.001, "loss": 2.5018, "step": 204588 }, { "epoch": 39.29, "learning_rate": 0.001, "loss": 2.5094, "step": 204600 }, { "epoch": 39.29, "learning_rate": 0.001, "loss": 2.4971, "step": 204612 }, { "epoch": 39.29, "learning_rate": 0.001, "loss": 2.5062, "step": 204624 }, { "epoch": 39.29, "learning_rate": 0.001, "loss": 2.4946, "step": 204636 }, { "epoch": 39.29, "learning_rate": 0.001, "loss": 2.4868, "step": 204648 }, { "epoch": 39.3, "learning_rate": 0.001, "loss": 2.4985, "step": 204660 }, { "epoch": 39.3, "learning_rate": 0.001, "loss": 2.4981, "step": 204672 }, { "epoch": 39.3, "learning_rate": 0.001, "loss": 2.4952, "step": 204684 }, { "epoch": 39.3, "learning_rate": 0.001, "loss": 2.4956, "step": 204696 }, { "epoch": 39.31, "learning_rate": 0.001, "loss": 2.5049, "step": 204708 }, { "epoch": 39.31, "learning_rate": 0.001, "loss": 2.5029, "step": 204720 }, { "epoch": 39.31, "learning_rate": 0.001, "loss": 2.5103, "step": 204732 }, { "epoch": 39.31, "learning_rate": 0.001, "loss": 2.5033, "step": 204744 }, { "epoch": 39.32, "learning_rate": 0.001, "loss": 2.499, "step": 204756 }, { "epoch": 39.32, "learning_rate": 0.001, "loss": 2.4949, "step": 204768 }, { "epoch": 39.32, "learning_rate": 0.001, "loss": 2.5106, "step": 204780 }, { "epoch": 39.32, "learning_rate": 0.001, "loss": 2.5117, "step": 204792 }, { "epoch": 39.32, "learning_rate": 0.001, "loss": 2.4972, "step": 204804 }, { "epoch": 39.33, "learning_rate": 0.001, "loss": 2.5085, "step": 204816 }, { "epoch": 39.33, "learning_rate": 0.001, "loss": 2.5053, "step": 204828 }, { "epoch": 39.33, "learning_rate": 0.001, "loss": 2.4931, "step": 204840 }, { "epoch": 39.33, "learning_rate": 0.001, "loss": 2.4947, "step": 204852 }, { "epoch": 39.34, "learning_rate": 0.001, "loss": 2.5002, "step": 204864 }, { "epoch": 39.34, "learning_rate": 0.001, "loss": 2.4936, "step": 204876 }, { "epoch": 39.34, "learning_rate": 0.001, "loss": 2.5096, "step": 204888 }, { "epoch": 39.34, "learning_rate": 0.001, "loss": 2.5068, "step": 204900 }, { "epoch": 39.35, "learning_rate": 0.001, "loss": 2.5079, "step": 204912 }, { "epoch": 39.35, "learning_rate": 0.001, "loss": 2.4988, "step": 204924 }, { "epoch": 39.35, "learning_rate": 0.001, "loss": 2.4999, "step": 204936 }, { "epoch": 39.35, "learning_rate": 0.001, "loss": 2.4973, "step": 204948 }, { "epoch": 39.35, "learning_rate": 0.001, "loss": 2.4935, "step": 204960 }, { "epoch": 39.36, "learning_rate": 0.001, "loss": 2.5, "step": 204972 }, { "epoch": 39.36, "learning_rate": 0.001, "loss": 2.4952, "step": 204984 }, { "epoch": 39.36, "learning_rate": 0.001, "loss": 2.4909, "step": 204996 }, { "epoch": 39.36, "eval_ag_news_accuracy": 0.32890625, "eval_ag_news_bleu_score": 4.972089043523076, "eval_ag_news_bleu_score_sem": 0.15269235607015177, "eval_ag_news_emb_cos_sim": 0.8183099031448364, "eval_ag_news_emb_cos_sim_sem": 0.00684426294963202, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.465911626815796, "eval_ag_news_n_ngrams_match_1": 14.298, "eval_ag_news_n_ngrams_match_2": 3.22, "eval_ag_news_n_ngrams_match_3": 0.918, "eval_ag_news_num_pred_words": 46.454, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.005623662606034, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3569282486716935, "eval_ag_news_runtime": 11.0802, "eval_ag_news_samples_per_second": 45.126, "eval_ag_news_steps_per_second": 0.09, "eval_ag_news_token_set_f1": 0.35690292047766675, "eval_ag_news_token_set_f1_sem": 0.00436639409262066, "eval_ag_news_token_set_precision": 0.34312029375680186, "eval_ag_news_token_set_recall": 0.386596390311987, "eval_ag_news_true_num_tokens": 56.09375, "step": 205000 }, { "epoch": 39.36, "eval_anthropic_toxic_prompts_accuracy": 0.11609375, "eval_anthropic_toxic_prompts_bleu_score": 3.195115955667645, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12260503525819286, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6771164536476135, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009827696768648992, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.20070743560791, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.192, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.99, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732, "eval_anthropic_toxic_prompts_num_pred_words": 46.492, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.549891522808853, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21549260545630755, "eval_anthropic_toxic_prompts_runtime": 27.6955, "eval_anthropic_toxic_prompts_samples_per_second": 18.053, "eval_anthropic_toxic_prompts_steps_per_second": 0.036, "eval_anthropic_toxic_prompts_token_set_f1": 0.35354395396152655, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006709371489207184, "eval_anthropic_toxic_prompts_token_set_precision": 0.43306553432635153, "eval_anthropic_toxic_prompts_token_set_recall": 0.3236904524492781, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 205000 }, { "epoch": 39.36, "eval_arxiv_accuracy": 0.3525, "eval_arxiv_bleu_score": 4.426560125726239, "eval_arxiv_bleu_score_sem": 0.13234200413543656, "eval_arxiv_emb_cos_sim": 0.7778306603431702, "eval_arxiv_emb_cos_sim_sem": 0.007646142962499719, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.323012113571167, "eval_arxiv_n_ngrams_match_1": 15.5, "eval_arxiv_n_ngrams_match_2": 3.05, "eval_arxiv_n_ngrams_match_3": 0.666, "eval_arxiv_num_pred_words": 39.892, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.743792280701502, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3725569742263649, "eval_arxiv_runtime": 29.1768, "eval_arxiv_samples_per_second": 17.137, "eval_arxiv_steps_per_second": 0.034, "eval_arxiv_token_set_f1": 0.3633950572510826, "eval_arxiv_token_set_f1_sem": 0.004459922468865658, "eval_arxiv_token_set_precision": 0.31620526554235473, "eval_arxiv_token_set_recall": 0.4464147904051689, "eval_arxiv_true_num_tokens": 64.0, "step": 205000 }, { "epoch": 39.36, "eval_python_code_alpaca_accuracy": 0.16215625, "eval_python_code_alpaca_bleu_score": 4.8797278329786495, "eval_python_code_alpaca_bleu_score_sem": 0.1540356088352136, "eval_python_code_alpaca_emb_cos_sim": 0.7656465172767639, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007427849946082622, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.865713357925415, "eval_python_code_alpaca_n_ngrams_match_1": 9.878, "eval_python_code_alpaca_n_ngrams_match_2": 3.07, "eval_python_code_alpaca_n_ngrams_match_3": 1.102, "eval_python_code_alpaca_num_pred_words": 43.934, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.56157642701253, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33499245697364344, "eval_python_code_alpaca_runtime": 30.4813, "eval_python_code_alpaca_samples_per_second": 16.403, "eval_python_code_alpaca_steps_per_second": 0.033, "eval_python_code_alpaca_token_set_f1": 0.48794981355749495, "eval_python_code_alpaca_token_set_f1_sem": 0.005702809367852026, "eval_python_code_alpaca_token_set_precision": 0.5412429496627671, "eval_python_code_alpaca_token_set_recall": 0.46541709498234735, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 205000 }, { "epoch": 39.36, "eval_wikibio_accuracy": 0.33003125, "eval_wikibio_bleu_score": 6.252296456260381, "eval_wikibio_bleu_score_sem": 0.23739503021767203, "eval_wikibio_emb_cos_sim": 0.7440297603607178, "eval_wikibio_emb_cos_sim_sem": 0.009114521383917551, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.63110089302063, "eval_wikibio_n_ngrams_match_1": 9.842, "eval_wikibio_n_ngrams_match_2": 3.398, "eval_wikibio_n_ngrams_match_3": 1.314, "eval_wikibio_num_pred_words": 34.428, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.754357255487804, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34894261351182987, "eval_wikibio_runtime": 11.1381, "eval_wikibio_samples_per_second": 44.891, "eval_wikibio_steps_per_second": 0.09, "eval_wikibio_token_set_f1": 0.31831396800720774, "eval_wikibio_token_set_f1_sem": 0.005887976119773076, "eval_wikibio_token_set_precision": 0.31992828361884634, "eval_wikibio_token_set_recall": 0.33741098380256, "eval_wikibio_true_num_tokens": 61.1328125, "step": 205000 }, { "epoch": 39.36, "eval_nq_accuracy": 0.534375, "eval_nq_bleu_score": 12.085231910151327, "eval_nq_bleu_score_sem": 0.4852618752677989, "eval_nq_emb_cos_sim": 0.8346148133277893, "eval_nq_emb_cos_sim_sem": 0.006960163490314439, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.127124547958374, "eval_nq_n_ngrams_match_1": 23.294, "eval_nq_n_ngrams_match_2": 8.686, "eval_nq_n_ngrams_match_3": 4.052, "eval_nq_num_pred_words": 48.678, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.390705020171104, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45202098172641186, "eval_nq_runtime": 10.937, "eval_nq_samples_per_second": 45.716, "eval_nq_steps_per_second": 0.091, "eval_nq_token_set_f1": 0.46591854253911574, "eval_nq_token_set_f1_sem": 0.004851088053524866, "eval_nq_token_set_precision": 0.42455476004033643, "eval_nq_token_set_recall": 0.5256758889383211, "eval_nq_true_num_tokens": 64.0, "step": 205000 }, { "epoch": 39.36, "learning_rate": 0.001, "loss": 2.4967, "step": 205008 }, { "epoch": 39.37, "learning_rate": 0.001, "loss": 2.5013, "step": 205020 }, { "epoch": 39.37, "learning_rate": 0.001, "loss": 2.5044, "step": 205032 }, { "epoch": 39.37, "learning_rate": 0.001, "loss": 2.4934, "step": 205044 }, { "epoch": 39.37, "learning_rate": 0.001, "loss": 2.5084, "step": 205056 }, { "epoch": 39.38, "learning_rate": 0.001, "loss": 2.4945, "step": 205068 }, { "epoch": 39.38, "learning_rate": 0.001, "loss": 2.4971, "step": 205080 }, { "epoch": 39.38, "learning_rate": 0.001, "loss": 2.5032, "step": 205092 }, { "epoch": 39.38, "learning_rate": 0.001, "loss": 2.5005, "step": 205104 }, { "epoch": 39.38, "learning_rate": 0.001, "loss": 2.4945, "step": 205116 }, { "epoch": 39.39, "learning_rate": 0.001, "loss": 2.506, "step": 205128 }, { "epoch": 39.39, "learning_rate": 0.001, "loss": 2.5022, "step": 205140 }, { "epoch": 39.39, "learning_rate": 0.001, "loss": 2.4984, "step": 205152 }, { "epoch": 39.39, "learning_rate": 0.001, "loss": 2.5015, "step": 205164 }, { "epoch": 39.4, "learning_rate": 0.001, "loss": 2.5006, "step": 205176 }, { "epoch": 39.4, "learning_rate": 0.001, "loss": 2.5022, "step": 205188 }, { "epoch": 39.4, "learning_rate": 0.001, "loss": 2.4949, "step": 205200 }, { "epoch": 39.4, "learning_rate": 0.001, "loss": 2.504, "step": 205212 }, { "epoch": 39.41, "learning_rate": 0.001, "loss": 2.5034, "step": 205224 }, { "epoch": 39.41, "learning_rate": 0.001, "loss": 2.5016, "step": 205236 }, { "epoch": 39.41, "learning_rate": 0.001, "loss": 2.4946, "step": 205248 }, { "epoch": 39.41, "learning_rate": 0.001, "loss": 2.5013, "step": 205260 }, { "epoch": 39.41, "learning_rate": 0.001, "loss": 2.4957, "step": 205272 }, { "epoch": 39.42, "learning_rate": 0.001, "loss": 2.4946, "step": 205284 }, { "epoch": 39.42, "learning_rate": 0.001, "loss": 2.4995, "step": 205296 }, { "epoch": 39.42, "learning_rate": 0.001, "loss": 2.4933, "step": 205308 }, { "epoch": 39.42, "learning_rate": 0.001, "loss": 2.5081, "step": 205320 }, { "epoch": 39.43, "learning_rate": 0.001, "loss": 2.5092, "step": 205332 }, { "epoch": 39.43, "learning_rate": 0.001, "loss": 2.497, "step": 205344 }, { "epoch": 39.43, "learning_rate": 0.001, "loss": 2.4944, "step": 205356 }, { "epoch": 39.43, "learning_rate": 0.001, "loss": 2.5077, "step": 205368 }, { "epoch": 39.44, "learning_rate": 0.001, "loss": 2.4906, "step": 205380 }, { "epoch": 39.44, "learning_rate": 0.001, "loss": 2.5063, "step": 205392 }, { "epoch": 39.44, "learning_rate": 0.001, "loss": 2.4959, "step": 205404 }, { "epoch": 39.44, "learning_rate": 0.001, "loss": 2.5001, "step": 205416 }, { "epoch": 39.44, "learning_rate": 0.001, "loss": 2.4963, "step": 205428 }, { "epoch": 39.45, "learning_rate": 0.001, "loss": 2.5043, "step": 205440 }, { "epoch": 39.45, "learning_rate": 0.001, "loss": 2.4984, "step": 205452 }, { "epoch": 39.45, "learning_rate": 0.001, "loss": 2.5096, "step": 205464 }, { "epoch": 39.45, "learning_rate": 0.001, "loss": 2.5054, "step": 205476 }, { "epoch": 39.46, "learning_rate": 0.001, "loss": 2.5018, "step": 205488 }, { "epoch": 39.46, "learning_rate": 0.001, "loss": 2.5052, "step": 205500 }, { "epoch": 39.46, "learning_rate": 0.001, "loss": 2.5021, "step": 205512 }, { "epoch": 39.46, "learning_rate": 0.001, "loss": 2.5012, "step": 205524 }, { "epoch": 39.47, "learning_rate": 0.001, "loss": 2.5129, "step": 205536 }, { "epoch": 39.47, "learning_rate": 0.001, "loss": 2.5116, "step": 205548 }, { "epoch": 39.47, "learning_rate": 0.001, "loss": 2.5019, "step": 205560 }, { "epoch": 39.47, "learning_rate": 0.001, "loss": 2.4961, "step": 205572 }, { "epoch": 39.47, "learning_rate": 0.001, "loss": 2.4991, "step": 205584 }, { "epoch": 39.48, "learning_rate": 0.001, "loss": 2.5008, "step": 205596 }, { "epoch": 39.48, "learning_rate": 0.001, "loss": 2.4948, "step": 205608 }, { "epoch": 39.48, "learning_rate": 0.001, "loss": 2.4981, "step": 205620 }, { "epoch": 39.48, "eval_ag_news_accuracy": 0.32765625, "eval_ag_news_bleu_score": 4.758995001411007, "eval_ag_news_bleu_score_sem": 0.1511947463405855, "eval_ag_news_emb_cos_sim": 0.8204512596130371, "eval_ag_news_emb_cos_sim_sem": 0.007165907282587416, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4871253967285156, "eval_ag_news_n_ngrams_match_1": 14.36, "eval_ag_news_n_ngrams_match_2": 3.142, "eval_ag_news_n_ngrams_match_3": 0.87, "eval_ag_news_num_pred_words": 46.626, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.69183644477301, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3563840613557255, "eval_ag_news_runtime": 22.9417, "eval_ag_news_samples_per_second": 21.794, "eval_ag_news_steps_per_second": 0.044, "eval_ag_news_token_set_f1": 0.35745356870307254, "eval_ag_news_token_set_f1_sem": 0.004486212538546395, "eval_ag_news_token_set_precision": 0.3436420648906715, "eval_ag_news_token_set_recall": 0.3863338606938275, "eval_ag_news_true_num_tokens": 56.09375, "step": 205625 }, { "epoch": 39.48, "eval_anthropic_toxic_prompts_accuracy": 0.11615625, "eval_anthropic_toxic_prompts_bleu_score": 3.293664304154164, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12637628066087048, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6872742772102356, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008042833710495809, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1951358318328857, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.27, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.8, "eval_anthropic_toxic_prompts_num_pred_words": 47.214, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.41348959612635, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2154265671085718, "eval_anthropic_toxic_prompts_runtime": 9.837, "eval_anthropic_toxic_prompts_samples_per_second": 50.828, "eval_anthropic_toxic_prompts_steps_per_second": 0.102, "eval_anthropic_toxic_prompts_token_set_f1": 0.3579592530906661, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006544063633919236, "eval_anthropic_toxic_prompts_token_set_precision": 0.44162066025403757, "eval_anthropic_toxic_prompts_token_set_recall": 0.3285182505153322, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 205625 }, { "epoch": 39.48, "eval_arxiv_accuracy": 0.35128125, "eval_arxiv_bleu_score": 4.420155284020418, "eval_arxiv_bleu_score_sem": 0.13189450532811015, "eval_arxiv_emb_cos_sim": 0.7840715050697327, "eval_arxiv_emb_cos_sim_sem": 0.006795955417248948, "eval_arxiv_emb_top1_equal": 0.21875, "eval_arxiv_emb_top1_equal_sem": 0.03668319712192295, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.35141921043396, "eval_arxiv_n_ngrams_match_1": 15.516, "eval_arxiv_n_ngrams_match_2": 3.014, "eval_arxiv_n_ngrams_match_3": 0.656, "eval_arxiv_num_pred_words": 40.762, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.5432137388431, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36983822139778655, "eval_arxiv_runtime": 10.2965, "eval_arxiv_samples_per_second": 48.56, "eval_arxiv_steps_per_second": 0.097, "eval_arxiv_token_set_f1": 0.3616314861062852, "eval_arxiv_token_set_f1_sem": 0.0042738420918988045, "eval_arxiv_token_set_precision": 0.31616064222926643, "eval_arxiv_token_set_recall": 0.43917531572056145, "eval_arxiv_true_num_tokens": 64.0, "step": 205625 }, { "epoch": 39.48, "eval_python_code_alpaca_accuracy": 0.161875, "eval_python_code_alpaca_bleu_score": 4.653499177932582, "eval_python_code_alpaca_bleu_score_sem": 0.1508105252176043, "eval_python_code_alpaca_emb_cos_sim": 0.7711987495422363, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007043589884219214, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8417410850524902, "eval_python_code_alpaca_n_ngrams_match_1": 9.842, "eval_python_code_alpaca_n_ngrams_match_2": 2.87, "eval_python_code_alpaca_n_ngrams_match_3": 0.956, "eval_python_code_alpaca_num_pred_words": 43.16, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.14559149791729, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33916703918604696, "eval_python_code_alpaca_runtime": 10.1591, "eval_python_code_alpaca_samples_per_second": 49.217, "eval_python_code_alpaca_steps_per_second": 0.098, "eval_python_code_alpaca_token_set_f1": 0.4796166415804258, "eval_python_code_alpaca_token_set_f1_sem": 0.005395598108368435, "eval_python_code_alpaca_token_set_precision": 0.5369490874312501, "eval_python_code_alpaca_token_set_recall": 0.4539022940867618, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 205625 }, { "epoch": 39.48, "eval_wikibio_accuracy": 0.32878125, "eval_wikibio_bleu_score": 5.947674251706092, "eval_wikibio_bleu_score_sem": 0.21861251412515462, "eval_wikibio_emb_cos_sim": 0.7443200349807739, "eval_wikibio_emb_cos_sim_sem": 0.0097666088629995, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.673942804336548, "eval_wikibio_n_ngrams_match_1": 10.05, "eval_wikibio_n_ngrams_match_2": 3.418, "eval_wikibio_n_ngrams_match_3": 1.244, "eval_wikibio_num_pred_words": 35.824, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 39.40697394562105, "eval_wikibio_pred_num_tokens": 62.875, "eval_wikibio_rouge_score": 0.35283360241892536, "eval_wikibio_runtime": 10.3324, "eval_wikibio_samples_per_second": 48.392, "eval_wikibio_steps_per_second": 0.097, "eval_wikibio_token_set_f1": 0.3192528381879088, "eval_wikibio_token_set_f1_sem": 0.005905905546061624, "eval_wikibio_token_set_precision": 0.3262740030259134, "eval_wikibio_token_set_recall": 0.3307886234579299, "eval_wikibio_true_num_tokens": 61.1328125, "step": 205625 }, { "epoch": 39.48, "eval_nq_accuracy": 0.53646875, "eval_nq_bleu_score": 11.96204997457386, "eval_nq_bleu_score_sem": 0.47779306403289684, "eval_nq_emb_cos_sim": 0.8318569660186768, "eval_nq_emb_cos_sim_sem": 0.007318271432676838, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1247787475585938, "eval_nq_n_ngrams_match_1": 23.398, "eval_nq_n_ngrams_match_2": 8.666, "eval_nq_n_ngrams_match_3": 3.998, "eval_nq_num_pred_words": 49.308, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.37104516903899, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45392492208944635, "eval_nq_runtime": 10.6998, "eval_nq_samples_per_second": 46.73, "eval_nq_steps_per_second": 0.093, "eval_nq_token_set_f1": 0.46797845611138295, "eval_nq_token_set_f1_sem": 0.0049779903721139055, "eval_nq_token_set_precision": 0.42704323674352884, "eval_nq_token_set_recall": 0.5263336533959055, "eval_nq_true_num_tokens": 64.0, "step": 205625 }, { "epoch": 39.48, "learning_rate": 0.001, "loss": 2.4903, "step": 205632 }, { "epoch": 39.49, "learning_rate": 0.001, "loss": 2.5015, "step": 205644 }, { "epoch": 39.49, "learning_rate": 0.001, "loss": 2.5084, "step": 205656 }, { "epoch": 39.49, "learning_rate": 0.001, "loss": 2.5009, "step": 205668 }, { "epoch": 39.49, "learning_rate": 0.001, "loss": 2.5023, "step": 205680 }, { "epoch": 39.5, "learning_rate": 0.001, "loss": 2.503, "step": 205692 }, { "epoch": 39.5, "learning_rate": 0.001, "loss": 2.5, "step": 205704 }, { "epoch": 39.5, "learning_rate": 0.001, "loss": 2.496, "step": 205716 }, { "epoch": 39.5, "learning_rate": 0.001, "loss": 2.4933, "step": 205728 }, { "epoch": 39.5, "learning_rate": 0.001, "loss": 2.505, "step": 205740 }, { "epoch": 39.51, "learning_rate": 0.001, "loss": 2.4971, "step": 205752 }, { "epoch": 39.51, "learning_rate": 0.001, "loss": 2.5072, "step": 205764 }, { "epoch": 39.51, "learning_rate": 0.001, "loss": 2.5012, "step": 205776 }, { "epoch": 39.51, "learning_rate": 0.001, "loss": 2.4991, "step": 205788 }, { "epoch": 39.52, "learning_rate": 0.001, "loss": 2.5005, "step": 205800 }, { "epoch": 39.52, "learning_rate": 0.001, "loss": 2.4954, "step": 205812 }, { "epoch": 39.52, "learning_rate": 0.001, "loss": 2.4958, "step": 205824 }, { "epoch": 39.52, "learning_rate": 0.001, "loss": 2.4922, "step": 205836 }, { "epoch": 39.53, "learning_rate": 0.001, "loss": 2.5041, "step": 205848 }, { "epoch": 39.53, "learning_rate": 0.001, "loss": 2.5157, "step": 205860 }, { "epoch": 39.53, "learning_rate": 0.001, "loss": 2.5029, "step": 205872 }, { "epoch": 39.53, "learning_rate": 0.001, "loss": 2.4971, "step": 205884 }, { "epoch": 39.53, "learning_rate": 0.001, "loss": 2.4989, "step": 205896 }, { "epoch": 39.54, "learning_rate": 0.001, "loss": 2.4992, "step": 205908 }, { "epoch": 39.54, "learning_rate": 0.001, "loss": 2.5048, "step": 205920 }, { "epoch": 39.54, "learning_rate": 0.001, "loss": 2.5092, "step": 205932 }, { "epoch": 39.54, "learning_rate": 0.001, "loss": 2.4954, "step": 205944 }, { "epoch": 39.55, "learning_rate": 0.001, "loss": 2.5045, "step": 205956 }, { "epoch": 39.55, "learning_rate": 0.001, "loss": 2.5002, "step": 205968 }, { "epoch": 39.55, "learning_rate": 0.001, "loss": 2.4896, "step": 205980 }, { "epoch": 39.55, "learning_rate": 0.001, "loss": 2.5122, "step": 205992 }, { "epoch": 39.56, "learning_rate": 0.001, "loss": 2.5043, "step": 206004 }, { "epoch": 39.56, "learning_rate": 0.001, "loss": 2.498, "step": 206016 }, { "epoch": 39.56, "learning_rate": 0.001, "loss": 2.5086, "step": 206028 }, { "epoch": 39.56, "learning_rate": 0.001, "loss": 2.5051, "step": 206040 }, { "epoch": 39.56, "learning_rate": 0.001, "loss": 2.5078, "step": 206052 }, { "epoch": 39.57, "learning_rate": 0.001, "loss": 2.4995, "step": 206064 }, { "epoch": 39.57, "learning_rate": 0.001, "loss": 2.4985, "step": 206076 }, { "epoch": 39.57, "learning_rate": 0.001, "loss": 2.4993, "step": 206088 }, { "epoch": 39.57, "learning_rate": 0.001, "loss": 2.4893, "step": 206100 }, { "epoch": 39.58, "learning_rate": 0.001, "loss": 2.5048, "step": 206112 }, { "epoch": 39.58, "learning_rate": 0.001, "loss": 2.4951, "step": 206124 }, { "epoch": 39.58, "learning_rate": 0.001, "loss": 2.5083, "step": 206136 }, { "epoch": 39.58, "learning_rate": 0.001, "loss": 2.5053, "step": 206148 }, { "epoch": 39.59, "learning_rate": 0.001, "loss": 2.5015, "step": 206160 }, { "epoch": 39.59, "learning_rate": 0.001, "loss": 2.5077, "step": 206172 }, { "epoch": 39.59, "learning_rate": 0.001, "loss": 2.4862, "step": 206184 }, { "epoch": 39.59, "learning_rate": 0.001, "loss": 2.5049, "step": 206196 }, { "epoch": 39.59, "learning_rate": 0.001, "loss": 2.5034, "step": 206208 }, { "epoch": 39.6, "learning_rate": 0.001, "loss": 2.5002, "step": 206220 }, { "epoch": 39.6, "learning_rate": 0.001, "loss": 2.503, "step": 206232 }, { "epoch": 39.6, "learning_rate": 0.001, "loss": 2.496, "step": 206244 }, { "epoch": 39.6, "eval_ag_news_accuracy": 0.32921875, "eval_ag_news_bleu_score": 4.884203174248183, "eval_ag_news_bleu_score_sem": 0.15401255351595175, "eval_ag_news_emb_cos_sim": 0.822045624256134, "eval_ag_news_emb_cos_sim_sem": 0.006645820440319048, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4755210876464844, "eval_ag_news_n_ngrams_match_1": 14.374, "eval_ag_news_n_ngrams_match_2": 3.19, "eval_ag_news_n_ngrams_match_3": 0.896, "eval_ag_news_num_pred_words": 46.452, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.31466292178353, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3585548501456335, "eval_ag_news_runtime": 28.1772, "eval_ag_news_samples_per_second": 17.745, "eval_ag_news_steps_per_second": 0.035, "eval_ag_news_token_set_f1": 0.35929547386418587, "eval_ag_news_token_set_f1_sem": 0.004474480874861666, "eval_ag_news_token_set_precision": 0.3449567106477639, "eval_ag_news_token_set_recall": 0.39057899466923424, "eval_ag_news_true_num_tokens": 56.09375, "step": 206250 }, { "epoch": 39.6, "eval_anthropic_toxic_prompts_accuracy": 0.116, "eval_anthropic_toxic_prompts_bleu_score": 3.219723676354917, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12922401070142656, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6761558055877686, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008352271262451237, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.188995122909546, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.338, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.974, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746, "eval_anthropic_toxic_prompts_num_pred_words": 47.342, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.26403281764672, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2167797885681345, "eval_anthropic_toxic_prompts_runtime": 19.4513, "eval_anthropic_toxic_prompts_samples_per_second": 25.705, "eval_anthropic_toxic_prompts_steps_per_second": 0.051, "eval_anthropic_toxic_prompts_token_set_f1": 0.356023255124861, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006553045984357989, "eval_anthropic_toxic_prompts_token_set_precision": 0.44371696485369744, "eval_anthropic_toxic_prompts_token_set_recall": 0.3210130147500456, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 206250 }, { "epoch": 39.6, "eval_arxiv_accuracy": 0.351625, "eval_arxiv_bleu_score": 4.416606303699746, "eval_arxiv_bleu_score_sem": 0.13074291373240818, "eval_arxiv_emb_cos_sim": 0.7801247239112854, "eval_arxiv_emb_cos_sim_sem": 0.006129065174716564, "eval_arxiv_emb_top1_equal": 0.328125, "eval_arxiv_emb_top1_equal_sem": 0.041664103776406315, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.350700616836548, "eval_arxiv_n_ngrams_match_1": 15.32, "eval_arxiv_n_ngrams_match_2": 2.974, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 39.632, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.52271013596198, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37201024135752503, "eval_arxiv_runtime": 12.3612, "eval_arxiv_samples_per_second": 40.449, "eval_arxiv_steps_per_second": 0.081, "eval_arxiv_token_set_f1": 0.3590328214546753, "eval_arxiv_token_set_f1_sem": 0.004241096979003804, "eval_arxiv_token_set_precision": 0.3130754958349433, "eval_arxiv_token_set_recall": 0.4406278496156961, "eval_arxiv_true_num_tokens": 64.0, "step": 206250 }, { "epoch": 39.6, "eval_python_code_alpaca_accuracy": 0.1638125, "eval_python_code_alpaca_bleu_score": 4.768408780132859, "eval_python_code_alpaca_bleu_score_sem": 0.15065037302790762, "eval_python_code_alpaca_emb_cos_sim": 0.7646748423576355, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007684574302896574, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.829019069671631, "eval_python_code_alpaca_n_ngrams_match_1": 9.888, "eval_python_code_alpaca_n_ngrams_match_2": 2.978, "eval_python_code_alpaca_n_ngrams_match_3": 1.012, "eval_python_code_alpaca_num_pred_words": 43.214, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.92884665809212, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34163067713443895, "eval_python_code_alpaca_runtime": 14.698, "eval_python_code_alpaca_samples_per_second": 34.018, "eval_python_code_alpaca_steps_per_second": 0.068, "eval_python_code_alpaca_token_set_f1": 0.4790692803649071, "eval_python_code_alpaca_token_set_f1_sem": 0.005652008607884245, "eval_python_code_alpaca_token_set_precision": 0.5426187165129907, "eval_python_code_alpaca_token_set_recall": 0.45155630006571473, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 206250 }, { "epoch": 39.6, "eval_wikibio_accuracy": 0.33040625, "eval_wikibio_bleu_score": 6.024209986316364, "eval_wikibio_bleu_score_sem": 0.21936321620038762, "eval_wikibio_emb_cos_sim": 0.714040219783783, "eval_wikibio_emb_cos_sim_sem": 0.011807102435538876, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6262500286102295, "eval_wikibio_n_ngrams_match_1": 9.524, "eval_wikibio_n_ngrams_match_2": 3.28, "eval_wikibio_n_ngrams_match_3": 1.252, "eval_wikibio_num_pred_words": 34.202, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.57165946689315, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34425747474653673, "eval_wikibio_runtime": 9.8873, "eval_wikibio_samples_per_second": 50.57, "eval_wikibio_steps_per_second": 0.101, "eval_wikibio_token_set_f1": 0.30739173183559565, "eval_wikibio_token_set_f1_sem": 0.005977212403921819, "eval_wikibio_token_set_precision": 0.30895612659226857, "eval_wikibio_token_set_recall": 0.32452173528551265, "eval_wikibio_true_num_tokens": 61.1328125, "step": 206250 }, { "epoch": 39.6, "eval_nq_accuracy": 0.5366875, "eval_nq_bleu_score": 12.075448880867258, "eval_nq_bleu_score_sem": 0.4823829974590332, "eval_nq_emb_cos_sim": 0.8319193124771118, "eval_nq_emb_cos_sim_sem": 0.007117690629346445, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.127696990966797, "eval_nq_n_ngrams_match_1": 23.326, "eval_nq_n_ngrams_match_2": 8.69, "eval_nq_n_ngrams_match_3": 4.026, "eval_nq_num_pred_words": 48.624, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.395509595637256, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.455331581858606, "eval_nq_runtime": 10.3617, "eval_nq_samples_per_second": 48.255, "eval_nq_steps_per_second": 0.097, "eval_nq_token_set_f1": 0.4683670883532629, "eval_nq_token_set_f1_sem": 0.004948844677081082, "eval_nq_token_set_precision": 0.4257490155971734, "eval_nq_token_set_recall": 0.5319635142423168, "eval_nq_true_num_tokens": 64.0, "step": 206250 }, { "epoch": 39.6, "learning_rate": 0.001, "loss": 2.5062, "step": 206256 }, { "epoch": 39.61, "learning_rate": 0.001, "loss": 2.4938, "step": 206268 }, { "epoch": 39.61, "learning_rate": 0.001, "loss": 2.5, "step": 206280 }, { "epoch": 39.61, "learning_rate": 0.001, "loss": 2.5026, "step": 206292 }, { "epoch": 39.61, "learning_rate": 0.001, "loss": 2.4946, "step": 206304 }, { "epoch": 39.62, "learning_rate": 0.001, "loss": 2.5001, "step": 206316 }, { "epoch": 39.62, "learning_rate": 0.001, "loss": 2.5049, "step": 206328 }, { "epoch": 39.62, "learning_rate": 0.001, "loss": 2.4948, "step": 206340 }, { "epoch": 39.62, "learning_rate": 0.001, "loss": 2.4984, "step": 206352 }, { "epoch": 39.62, "learning_rate": 0.001, "loss": 2.4934, "step": 206364 }, { "epoch": 39.63, "learning_rate": 0.001, "loss": 2.5015, "step": 206376 }, { "epoch": 39.63, "learning_rate": 0.001, "loss": 2.5024, "step": 206388 }, { "epoch": 39.63, "learning_rate": 0.001, "loss": 2.5005, "step": 206400 }, { "epoch": 39.63, "learning_rate": 0.001, "loss": 2.4967, "step": 206412 }, { "epoch": 39.64, "learning_rate": 0.001, "loss": 2.5052, "step": 206424 }, { "epoch": 39.64, "learning_rate": 0.001, "loss": 2.5008, "step": 206436 }, { "epoch": 39.64, "learning_rate": 0.001, "loss": 2.5102, "step": 206448 }, { "epoch": 39.64, "learning_rate": 0.001, "loss": 2.4943, "step": 206460 }, { "epoch": 39.65, "learning_rate": 0.001, "loss": 2.4998, "step": 206472 }, { "epoch": 39.65, "learning_rate": 0.001, "loss": 2.5028, "step": 206484 }, { "epoch": 39.65, "learning_rate": 0.001, "loss": 2.5008, "step": 206496 }, { "epoch": 39.65, "learning_rate": 0.001, "loss": 2.5079, "step": 206508 }, { "epoch": 39.65, "learning_rate": 0.001, "loss": 2.4989, "step": 206520 }, { "epoch": 39.66, "learning_rate": 0.001, "loss": 2.5035, "step": 206532 }, { "epoch": 39.66, "learning_rate": 0.001, "loss": 2.5076, "step": 206544 }, { "epoch": 39.66, "learning_rate": 0.001, "loss": 2.496, "step": 206556 }, { "epoch": 39.66, "learning_rate": 0.001, "loss": 2.4981, "step": 206568 }, { "epoch": 39.67, "learning_rate": 0.001, "loss": 2.5064, "step": 206580 }, { "epoch": 39.67, "learning_rate": 0.001, "loss": 2.5084, "step": 206592 }, { "epoch": 39.67, "learning_rate": 0.001, "loss": 2.4939, "step": 206604 }, { "epoch": 39.67, "learning_rate": 0.001, "loss": 2.5035, "step": 206616 }, { "epoch": 39.68, "learning_rate": 0.001, "loss": 2.5004, "step": 206628 }, { "epoch": 39.68, "learning_rate": 0.001, "loss": 2.5087, "step": 206640 }, { "epoch": 39.68, "learning_rate": 0.001, "loss": 2.5028, "step": 206652 }, { "epoch": 39.68, "learning_rate": 0.001, "loss": 2.492, "step": 206664 }, { "epoch": 39.68, "learning_rate": 0.001, "loss": 2.5117, "step": 206676 }, { "epoch": 39.69, "learning_rate": 0.001, "loss": 2.4969, "step": 206688 }, { "epoch": 39.69, "learning_rate": 0.001, "loss": 2.5026, "step": 206700 }, { "epoch": 39.69, "learning_rate": 0.001, "loss": 2.4985, "step": 206712 }, { "epoch": 39.69, "learning_rate": 0.001, "loss": 2.4958, "step": 206724 }, { "epoch": 39.7, "learning_rate": 0.001, "loss": 2.5047, "step": 206736 }, { "epoch": 39.7, "learning_rate": 0.001, "loss": 2.5041, "step": 206748 }, { "epoch": 39.7, "learning_rate": 0.001, "loss": 2.4969, "step": 206760 }, { "epoch": 39.7, "learning_rate": 0.001, "loss": 2.5077, "step": 206772 }, { "epoch": 39.71, "learning_rate": 0.001, "loss": 2.5085, "step": 206784 }, { "epoch": 39.71, "learning_rate": 0.001, "loss": 2.4917, "step": 206796 }, { "epoch": 39.71, "learning_rate": 0.001, "loss": 2.5005, "step": 206808 }, { "epoch": 39.71, "learning_rate": 0.001, "loss": 2.5011, "step": 206820 }, { "epoch": 39.71, "learning_rate": 0.001, "loss": 2.5037, "step": 206832 }, { "epoch": 39.72, "learning_rate": 0.001, "loss": 2.5054, "step": 206844 }, { "epoch": 39.72, "learning_rate": 0.001, "loss": 2.4979, "step": 206856 }, { "epoch": 39.72, "learning_rate": 0.001, "loss": 2.4997, "step": 206868 }, { "epoch": 39.72, "eval_ag_news_accuracy": 0.33003125, "eval_ag_news_bleu_score": 5.0389521107507935, "eval_ag_news_bleu_score_sem": 0.15799023083732622, "eval_ag_news_emb_cos_sim": 0.815719723701477, "eval_ag_news_emb_cos_sim_sem": 0.006692581710197377, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.480170965194702, "eval_ag_news_n_ngrams_match_1": 14.488, "eval_ag_news_n_ngrams_match_2": 3.346, "eval_ag_news_n_ngrams_match_3": 0.984, "eval_ag_news_num_pred_words": 47.4, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.465272032979705, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3583619457881534, "eval_ag_news_runtime": 30.5734, "eval_ag_news_samples_per_second": 16.354, "eval_ag_news_steps_per_second": 0.033, "eval_ag_news_token_set_f1": 0.35789815677468456, "eval_ag_news_token_set_f1_sem": 0.004543490543371862, "eval_ag_news_token_set_precision": 0.34557749570836926, "eval_ag_news_token_set_recall": 0.3856556188351927, "eval_ag_news_true_num_tokens": 56.09375, "step": 206875 }, { "epoch": 39.72, "eval_anthropic_toxic_prompts_accuracy": 0.11675, "eval_anthropic_toxic_prompts_bleu_score": 3.2455844921988, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12387271822905203, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6814239025115967, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00827429695852805, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1947433948516846, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.29, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.998, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768, "eval_anthropic_toxic_prompts_num_pred_words": 47.258, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.40391071964428, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2164375772817482, "eval_anthropic_toxic_prompts_runtime": 22.4068, "eval_anthropic_toxic_prompts_samples_per_second": 22.315, "eval_anthropic_toxic_prompts_steps_per_second": 0.045, "eval_anthropic_toxic_prompts_token_set_f1": 0.36258321846646896, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006652217700374055, "eval_anthropic_toxic_prompts_token_set_precision": 0.4451837827278284, "eval_anthropic_toxic_prompts_token_set_recall": 0.3351589472883476, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 206875 }, { "epoch": 39.72, "eval_arxiv_accuracy": 0.35040625, "eval_arxiv_bleu_score": 4.585184052532934, "eval_arxiv_bleu_score_sem": 0.14132065716788414, "eval_arxiv_emb_cos_sim": 0.7889276146888733, "eval_arxiv_emb_cos_sim_sem": 0.006292711509316745, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3383877277374268, "eval_arxiv_n_ngrams_match_1": 15.538, "eval_arxiv_n_ngrams_match_2": 3.134, "eval_arxiv_n_ngrams_match_3": 0.754, "eval_arxiv_num_pred_words": 40.716, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.173666447151692, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37131630759312895, "eval_arxiv_runtime": 11.4428, "eval_arxiv_samples_per_second": 43.695, "eval_arxiv_steps_per_second": 0.087, "eval_arxiv_token_set_f1": 0.3652076707006868, "eval_arxiv_token_set_f1_sem": 0.004213045804341446, "eval_arxiv_token_set_precision": 0.3178682300620975, "eval_arxiv_token_set_recall": 0.44796678101824844, "eval_arxiv_true_num_tokens": 64.0, "step": 206875 }, { "epoch": 39.72, "eval_python_code_alpaca_accuracy": 0.16375, "eval_python_code_alpaca_bleu_score": 5.053320707424584, "eval_python_code_alpaca_bleu_score_sem": 0.16023282589836868, "eval_python_code_alpaca_emb_cos_sim": 0.763613224029541, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00747103070947105, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8547465801239014, "eval_python_code_alpaca_n_ngrams_match_1": 9.928, "eval_python_code_alpaca_n_ngrams_match_2": 3.088, "eval_python_code_alpaca_n_ngrams_match_3": 1.086, "eval_python_code_alpaca_num_pred_words": 42.504, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.370034737792057, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3442229310993092, "eval_python_code_alpaca_runtime": 11.0653, "eval_python_code_alpaca_samples_per_second": 45.186, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.48335399384702316, "eval_python_code_alpaca_token_set_f1_sem": 0.005360806054945681, "eval_python_code_alpaca_token_set_precision": 0.5444294017872537, "eval_python_code_alpaca_token_set_recall": 0.4543502961454699, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 206875 }, { "epoch": 39.72, "eval_wikibio_accuracy": 0.33128125, "eval_wikibio_bleu_score": 6.448642743853439, "eval_wikibio_bleu_score_sem": 0.22362736590252225, "eval_wikibio_emb_cos_sim": 0.7393869161605835, "eval_wikibio_emb_cos_sim_sem": 0.010121785623756867, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6404178142547607, "eval_wikibio_n_ngrams_match_1": 10.3, "eval_wikibio_n_ngrams_match_2": 3.6, "eval_wikibio_n_ngrams_match_3": 1.374, "eval_wikibio_num_pred_words": 35.588, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.107755363058196, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3654155712334787, "eval_wikibio_runtime": 10.9774, "eval_wikibio_samples_per_second": 45.548, "eval_wikibio_steps_per_second": 0.091, "eval_wikibio_token_set_f1": 0.3291532832861972, "eval_wikibio_token_set_f1_sem": 0.005412232160063852, "eval_wikibio_token_set_precision": 0.3356559131664926, "eval_wikibio_token_set_recall": 0.3401851146789689, "eval_wikibio_true_num_tokens": 61.1328125, "step": 206875 }, { "epoch": 39.72, "eval_nq_accuracy": 0.537125, "eval_nq_bleu_score": 12.091661840283177, "eval_nq_bleu_score_sem": 0.48048349052468936, "eval_nq_emb_cos_sim": 0.8348127007484436, "eval_nq_emb_cos_sim_sem": 0.007264742959434478, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.122670888900757, "eval_nq_n_ngrams_match_1": 23.528, "eval_nq_n_ngrams_match_2": 8.684, "eval_nq_n_ngrams_match_3": 4.042, "eval_nq_num_pred_words": 48.902, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.353418772506828, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4552505289813184, "eval_nq_runtime": 11.6148, "eval_nq_samples_per_second": 43.049, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4709065843451083, "eval_nq_token_set_f1_sem": 0.004988196633602711, "eval_nq_token_set_precision": 0.4293234465148486, "eval_nq_token_set_recall": 0.5294117325746042, "eval_nq_true_num_tokens": 64.0, "step": 206875 }, { "epoch": 39.72, "learning_rate": 0.001, "loss": 2.5048, "step": 206880 }, { "epoch": 39.73, "learning_rate": 0.001, "loss": 2.4898, "step": 206892 }, { "epoch": 39.73, "learning_rate": 0.001, "loss": 2.4991, "step": 206904 }, { "epoch": 39.73, "learning_rate": 0.001, "loss": 2.499, "step": 206916 }, { "epoch": 39.73, "learning_rate": 0.001, "loss": 2.5029, "step": 206928 }, { "epoch": 39.74, "learning_rate": 0.001, "loss": 2.5007, "step": 206940 }, { "epoch": 39.74, "learning_rate": 0.001, "loss": 2.503, "step": 206952 }, { "epoch": 39.74, "learning_rate": 0.001, "loss": 2.5001, "step": 206964 }, { "epoch": 39.74, "learning_rate": 0.001, "loss": 2.4924, "step": 206976 }, { "epoch": 39.74, "learning_rate": 0.001, "loss": 2.4926, "step": 206988 }, { "epoch": 39.75, "learning_rate": 0.001, "loss": 2.4964, "step": 207000 }, { "epoch": 39.75, "learning_rate": 0.001, "loss": 2.4925, "step": 207012 }, { "epoch": 39.75, "learning_rate": 0.001, "loss": 2.4977, "step": 207024 }, { "epoch": 39.75, "learning_rate": 0.001, "loss": 2.4973, "step": 207036 }, { "epoch": 39.76, "learning_rate": 0.001, "loss": 2.5039, "step": 207048 }, { "epoch": 39.76, "learning_rate": 0.001, "loss": 2.4997, "step": 207060 }, { "epoch": 39.76, "learning_rate": 0.001, "loss": 2.5032, "step": 207072 }, { "epoch": 39.76, "learning_rate": 0.001, "loss": 2.5021, "step": 207084 }, { "epoch": 39.76, "learning_rate": 0.001, "loss": 2.495, "step": 207096 }, { "epoch": 39.77, "learning_rate": 0.001, "loss": 2.5043, "step": 207108 }, { "epoch": 39.77, "learning_rate": 0.001, "loss": 2.502, "step": 207120 }, { "epoch": 39.77, "learning_rate": 0.001, "loss": 2.5042, "step": 207132 }, { "epoch": 39.77, "learning_rate": 0.001, "loss": 2.4992, "step": 207144 }, { "epoch": 39.78, "learning_rate": 0.001, "loss": 2.5049, "step": 207156 }, { "epoch": 39.78, "learning_rate": 0.001, "loss": 2.4958, "step": 207168 }, { "epoch": 39.78, "learning_rate": 0.001, "loss": 2.5004, "step": 207180 }, { "epoch": 39.78, "learning_rate": 0.001, "loss": 2.5109, "step": 207192 }, { "epoch": 39.79, "learning_rate": 0.001, "loss": 2.5003, "step": 207204 }, { "epoch": 39.79, "learning_rate": 0.001, "loss": 2.5068, "step": 207216 }, { "epoch": 39.79, "learning_rate": 0.001, "loss": 2.5014, "step": 207228 }, { "epoch": 39.79, "learning_rate": 0.001, "loss": 2.5022, "step": 207240 }, { "epoch": 39.79, "learning_rate": 0.001, "loss": 2.4948, "step": 207252 }, { "epoch": 39.8, "learning_rate": 0.001, "loss": 2.5, "step": 207264 }, { "epoch": 39.8, "learning_rate": 0.001, "loss": 2.5018, "step": 207276 }, { "epoch": 39.8, "learning_rate": 0.001, "loss": 2.5037, "step": 207288 }, { "epoch": 39.8, "learning_rate": 0.001, "loss": 2.5039, "step": 207300 }, { "epoch": 39.81, "learning_rate": 0.001, "loss": 2.5069, "step": 207312 }, { "epoch": 39.81, "learning_rate": 0.001, "loss": 2.4991, "step": 207324 }, { "epoch": 39.81, "learning_rate": 0.001, "loss": 2.504, "step": 207336 }, { "epoch": 39.81, "learning_rate": 0.001, "loss": 2.5095, "step": 207348 }, { "epoch": 39.82, "learning_rate": 0.001, "loss": 2.499, "step": 207360 }, { "epoch": 39.82, "learning_rate": 0.001, "loss": 2.5018, "step": 207372 }, { "epoch": 39.82, "learning_rate": 0.001, "loss": 2.5138, "step": 207384 }, { "epoch": 39.82, "learning_rate": 0.001, "loss": 2.4976, "step": 207396 }, { "epoch": 39.82, "learning_rate": 0.001, "loss": 2.4985, "step": 207408 }, { "epoch": 39.83, "learning_rate": 0.001, "loss": 2.5028, "step": 207420 }, { "epoch": 39.83, "learning_rate": 0.001, "loss": 2.5023, "step": 207432 }, { "epoch": 39.83, "learning_rate": 0.001, "loss": 2.4992, "step": 207444 }, { "epoch": 39.83, "learning_rate": 0.001, "loss": 2.5168, "step": 207456 }, { "epoch": 39.84, "learning_rate": 0.001, "loss": 2.4968, "step": 207468 }, { "epoch": 39.84, "learning_rate": 0.001, "loss": 2.5063, "step": 207480 }, { "epoch": 39.84, "learning_rate": 0.001, "loss": 2.5039, "step": 207492 }, { "epoch": 39.84, "eval_ag_news_accuracy": 0.32965625, "eval_ag_news_bleu_score": 4.974430581480158, "eval_ag_news_bleu_score_sem": 0.15357801433268847, "eval_ag_news_emb_cos_sim": 0.8197583556175232, "eval_ag_news_emb_cos_sim_sem": 0.007557454139431654, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4721226692199707, "eval_ag_news_n_ngrams_match_1": 14.406, "eval_ag_news_n_ngrams_match_2": 3.306, "eval_ag_news_n_ngrams_match_3": 0.956, "eval_ag_news_num_pred_words": 46.86, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.205030569680574, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35729248455953744, "eval_ag_news_runtime": 29.8942, "eval_ag_news_samples_per_second": 16.726, "eval_ag_news_steps_per_second": 0.033, "eval_ag_news_token_set_f1": 0.3589743507365347, "eval_ag_news_token_set_f1_sem": 0.0045018404347702, "eval_ag_news_token_set_precision": 0.34413415343274584, "eval_ag_news_token_set_recall": 0.3897680680992472, "eval_ag_news_true_num_tokens": 56.09375, "step": 207500 }, { "epoch": 39.84, "eval_anthropic_toxic_prompts_accuracy": 0.11621875, "eval_anthropic_toxic_prompts_bleu_score": 3.180429764852126, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12068592920934888, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6749402284622192, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008165359867473042, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.202605962753296, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.262, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.926, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.748, "eval_anthropic_toxic_prompts_num_pred_words": 47.024, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.596544430173704, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21634137899409395, "eval_anthropic_toxic_prompts_runtime": 19.5564, "eval_anthropic_toxic_prompts_samples_per_second": 25.567, "eval_anthropic_toxic_prompts_steps_per_second": 0.051, "eval_anthropic_toxic_prompts_token_set_f1": 0.359952372641166, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006684297985333396, "eval_anthropic_toxic_prompts_token_set_precision": 0.4437842390260149, "eval_anthropic_toxic_prompts_token_set_recall": 0.3302393170791224, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 207500 }, { "epoch": 39.84, "eval_arxiv_accuracy": 0.353625, "eval_arxiv_bleu_score": 4.4837706668470565, "eval_arxiv_bleu_score_sem": 0.13277773425486106, "eval_arxiv_emb_cos_sim": 0.785962700843811, "eval_arxiv_emb_cos_sim_sem": 0.006809564239506411, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.327991247177124, "eval_arxiv_n_ngrams_match_1": 15.388, "eval_arxiv_n_ngrams_match_2": 3.092, "eval_arxiv_n_ngrams_match_3": 0.736, "eval_arxiv_num_pred_words": 40.114, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.88227680958258, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3718989531713206, "eval_arxiv_runtime": 12.3234, "eval_arxiv_samples_per_second": 40.573, "eval_arxiv_steps_per_second": 0.081, "eval_arxiv_token_set_f1": 0.36151538580594356, "eval_arxiv_token_set_f1_sem": 0.004413554332080352, "eval_arxiv_token_set_precision": 0.31321404093939337, "eval_arxiv_token_set_recall": 0.44813877961170784, "eval_arxiv_true_num_tokens": 64.0, "step": 207500 }, { "epoch": 39.84, "eval_python_code_alpaca_accuracy": 0.16325, "eval_python_code_alpaca_bleu_score": 4.81153566685242, "eval_python_code_alpaca_bleu_score_sem": 0.14915521931042197, "eval_python_code_alpaca_emb_cos_sim": 0.7589878439903259, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007668823893899151, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.857896566390991, "eval_python_code_alpaca_n_ngrams_match_1": 9.894, "eval_python_code_alpaca_n_ngrams_match_2": 2.998, "eval_python_code_alpaca_n_ngrams_match_3": 1.03, "eval_python_code_alpaca_num_pred_words": 42.856, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.42483637556447, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3362432062250609, "eval_python_code_alpaca_runtime": 18.2458, "eval_python_code_alpaca_samples_per_second": 27.404, "eval_python_code_alpaca_steps_per_second": 0.055, "eval_python_code_alpaca_token_set_f1": 0.4818597306786629, "eval_python_code_alpaca_token_set_f1_sem": 0.005618661827092928, "eval_python_code_alpaca_token_set_precision": 0.5397537359191535, "eval_python_code_alpaca_token_set_recall": 0.4600229760708083, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 207500 }, { "epoch": 39.84, "eval_wikibio_accuracy": 0.332875, "eval_wikibio_bleu_score": 6.378910879757371, "eval_wikibio_bleu_score_sem": 0.2347065515807592, "eval_wikibio_emb_cos_sim": 0.7445087432861328, "eval_wikibio_emb_cos_sim_sem": 0.008973689665251923, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.60418963432312, "eval_wikibio_n_ngrams_match_1": 10.088, "eval_wikibio_n_ngrams_match_2": 3.48, "eval_wikibio_n_ngrams_match_3": 1.308, "eval_wikibio_num_pred_words": 35.004, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.7518893169753, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36013711711533297, "eval_wikibio_runtime": 12.503, "eval_wikibio_samples_per_second": 39.99, "eval_wikibio_steps_per_second": 0.08, "eval_wikibio_token_set_f1": 0.3248800530088759, "eval_wikibio_token_set_f1_sem": 0.005520006533132245, "eval_wikibio_token_set_precision": 0.3289126554818462, "eval_wikibio_token_set_recall": 0.3396739876203575, "eval_wikibio_true_num_tokens": 61.1328125, "step": 207500 }, { "epoch": 39.84, "eval_nq_accuracy": 0.5355, "eval_nq_bleu_score": 12.053846841680388, "eval_nq_bleu_score_sem": 0.48184386502529686, "eval_nq_emb_cos_sim": 0.8340513706207275, "eval_nq_emb_cos_sim_sem": 0.0069508845981969625, "eval_nq_emb_top1_equal": 0.2421875, "eval_nq_emb_top1_equal_sem": 0.038014990119662626, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.122384786605835, "eval_nq_n_ngrams_match_1": 23.456, "eval_nq_n_ngrams_match_2": 8.716, "eval_nq_n_ngrams_match_3": 4.034, "eval_nq_num_pred_words": 49.132, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.351029182075523, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4557837840848542, "eval_nq_runtime": 31.412, "eval_nq_samples_per_second": 15.917, "eval_nq_steps_per_second": 0.032, "eval_nq_token_set_f1": 0.46939636316425354, "eval_nq_token_set_f1_sem": 0.004888744949717722, "eval_nq_token_set_precision": 0.4279788650858496, "eval_nq_token_set_recall": 0.5278032065376848, "eval_nq_true_num_tokens": 64.0, "step": 207500 }, { "epoch": 39.84, "learning_rate": 0.001, "loss": 2.4973, "step": 207504 }, { "epoch": 39.85, "learning_rate": 0.001, "loss": 2.4949, "step": 207516 }, { "epoch": 39.85, "learning_rate": 0.001, "loss": 2.5172, "step": 207528 }, { "epoch": 39.85, "learning_rate": 0.001, "loss": 2.494, "step": 207540 }, { "epoch": 39.85, "learning_rate": 0.001, "loss": 2.5018, "step": 207552 }, { "epoch": 39.85, "learning_rate": 0.001, "loss": 2.4915, "step": 207564 }, { "epoch": 39.86, "learning_rate": 0.001, "loss": 2.5157, "step": 207576 }, { "epoch": 39.86, "learning_rate": 0.001, "loss": 2.4959, "step": 207588 }, { "epoch": 39.86, "learning_rate": 0.001, "loss": 2.5001, "step": 207600 }, { "epoch": 39.86, "learning_rate": 0.001, "loss": 2.5006, "step": 207612 }, { "epoch": 39.87, "learning_rate": 0.001, "loss": 2.4946, "step": 207624 }, { "epoch": 39.87, "learning_rate": 0.001, "loss": 2.503, "step": 207636 }, { "epoch": 39.87, "learning_rate": 0.001, "loss": 2.5118, "step": 207648 }, { "epoch": 39.87, "learning_rate": 0.001, "loss": 2.4951, "step": 207660 }, { "epoch": 39.88, "learning_rate": 0.001, "loss": 2.4916, "step": 207672 }, { "epoch": 39.88, "learning_rate": 0.001, "loss": 2.497, "step": 207684 }, { "epoch": 39.88, "learning_rate": 0.001, "loss": 2.5058, "step": 207696 }, { "epoch": 39.88, "learning_rate": 0.001, "loss": 2.4901, "step": 207708 }, { "epoch": 39.88, "learning_rate": 0.001, "loss": 2.4976, "step": 207720 }, { "epoch": 39.89, "learning_rate": 0.001, "loss": 2.4941, "step": 207732 }, { "epoch": 39.89, "learning_rate": 0.001, "loss": 2.4911, "step": 207744 }, { "epoch": 39.89, "learning_rate": 0.001, "loss": 2.4976, "step": 207756 }, { "epoch": 39.89, "learning_rate": 0.001, "loss": 2.5133, "step": 207768 }, { "epoch": 39.9, "learning_rate": 0.001, "loss": 2.4997, "step": 207780 }, { "epoch": 39.9, "learning_rate": 0.001, "loss": 2.4981, "step": 207792 }, { "epoch": 39.9, "learning_rate": 0.001, "loss": 2.5085, "step": 207804 }, { "epoch": 39.9, "learning_rate": 0.001, "loss": 2.4968, "step": 207816 }, { "epoch": 39.91, "learning_rate": 0.001, "loss": 2.512, "step": 207828 }, { "epoch": 39.91, "learning_rate": 0.001, "loss": 2.5072, "step": 207840 }, { "epoch": 39.91, "learning_rate": 0.001, "loss": 2.5055, "step": 207852 }, { "epoch": 39.91, "learning_rate": 0.001, "loss": 2.4934, "step": 207864 }, { "epoch": 39.91, "learning_rate": 0.001, "loss": 2.4991, "step": 207876 }, { "epoch": 39.92, "learning_rate": 0.001, "loss": 2.5069, "step": 207888 }, { "epoch": 39.92, "learning_rate": 0.001, "loss": 2.5074, "step": 207900 }, { "epoch": 39.92, "learning_rate": 0.001, "loss": 2.4977, "step": 207912 }, { "epoch": 39.92, "learning_rate": 0.001, "loss": 2.504, "step": 207924 }, { "epoch": 39.93, "learning_rate": 0.001, "loss": 2.488, "step": 207936 }, { "epoch": 39.93, "learning_rate": 0.001, "loss": 2.4991, "step": 207948 }, { "epoch": 39.93, "learning_rate": 0.001, "loss": 2.4975, "step": 207960 }, { "epoch": 39.93, "learning_rate": 0.001, "loss": 2.5077, "step": 207972 }, { "epoch": 39.94, "learning_rate": 0.001, "loss": 2.5031, "step": 207984 }, { "epoch": 39.94, "learning_rate": 0.001, "loss": 2.5033, "step": 207996 }, { "epoch": 39.94, "learning_rate": 0.001, "loss": 2.4879, "step": 208008 }, { "epoch": 39.94, "learning_rate": 0.001, "loss": 2.5076, "step": 208020 }, { "epoch": 39.94, "learning_rate": 0.001, "loss": 2.498, "step": 208032 }, { "epoch": 39.95, "learning_rate": 0.001, "loss": 2.5131, "step": 208044 }, { "epoch": 39.95, "learning_rate": 0.001, "loss": 2.5099, "step": 208056 }, { "epoch": 39.95, "learning_rate": 0.001, "loss": 2.5012, "step": 208068 }, { "epoch": 39.95, "learning_rate": 0.001, "loss": 2.5042, "step": 208080 }, { "epoch": 39.96, "learning_rate": 0.001, "loss": 2.5033, "step": 208092 }, { "epoch": 39.96, "learning_rate": 0.001, "loss": 2.5018, "step": 208104 }, { "epoch": 39.96, "learning_rate": 0.001, "loss": 2.507, "step": 208116 }, { "epoch": 39.96, "eval_ag_news_accuracy": 0.32915625, "eval_ag_news_bleu_score": 4.957941693897078, "eval_ag_news_bleu_score_sem": 0.15837617001652107, "eval_ag_news_emb_cos_sim": 0.82401442527771, "eval_ag_news_emb_cos_sim_sem": 0.006690185925277335, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.481919527053833, "eval_ag_news_n_ngrams_match_1": 14.496, "eval_ag_news_n_ngrams_match_2": 3.234, "eval_ag_news_n_ngrams_match_3": 0.91, "eval_ag_news_num_pred_words": 46.938, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.52208922911754, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35757241730470135, "eval_ag_news_runtime": 11.8374, "eval_ag_news_samples_per_second": 42.239, "eval_ag_news_steps_per_second": 0.084, "eval_ag_news_token_set_f1": 0.3621840493357217, "eval_ag_news_token_set_f1_sem": 0.004443586697038994, "eval_ag_news_token_set_precision": 0.34816432493038174, "eval_ag_news_token_set_recall": 0.3933903747258254, "eval_ag_news_true_num_tokens": 56.09375, "step": 208125 }, { "epoch": 39.96, "eval_anthropic_toxic_prompts_accuracy": 0.1175, "eval_anthropic_toxic_prompts_bleu_score": 3.204275397146809, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12370260761442398, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.681277871131897, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00874392414496126, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2054481506347656, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.32, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.958, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.742, "eval_anthropic_toxic_prompts_num_pred_words": 47.162, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.666551870701863, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2163057434799895, "eval_anthropic_toxic_prompts_runtime": 16.3233, "eval_anthropic_toxic_prompts_samples_per_second": 30.631, "eval_anthropic_toxic_prompts_steps_per_second": 0.061, "eval_anthropic_toxic_prompts_token_set_f1": 0.36520158966222227, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065147292234425174, "eval_anthropic_toxic_prompts_token_set_precision": 0.44653479021577364, "eval_anthropic_toxic_prompts_token_set_recall": 0.3343949383960443, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 208125 }, { "epoch": 39.96, "eval_arxiv_accuracy": 0.351625, "eval_arxiv_bleu_score": 4.428123532189055, "eval_arxiv_bleu_score_sem": 0.12908601435282482, "eval_arxiv_emb_cos_sim": 0.7822293639183044, "eval_arxiv_emb_cos_sim_sem": 0.006974802934726895, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3303310871124268, "eval_arxiv_n_ngrams_match_1": 15.374, "eval_arxiv_n_ngrams_match_2": 3.042, "eval_arxiv_n_ngrams_match_3": 0.698, "eval_arxiv_num_pred_words": 40.46, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.947593259568045, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36891843774496025, "eval_arxiv_runtime": 13.0162, "eval_arxiv_samples_per_second": 38.414, "eval_arxiv_steps_per_second": 0.077, "eval_arxiv_token_set_f1": 0.362821681666496, "eval_arxiv_token_set_f1_sem": 0.004411087348191074, "eval_arxiv_token_set_precision": 0.31337042526699593, "eval_arxiv_token_set_recall": 0.4524449360911324, "eval_arxiv_true_num_tokens": 64.0, "step": 208125 }, { "epoch": 39.96, "eval_python_code_alpaca_accuracy": 0.1635625, "eval_python_code_alpaca_bleu_score": 4.82214657008156, "eval_python_code_alpaca_bleu_score_sem": 0.15081900208830815, "eval_python_code_alpaca_emb_cos_sim": 0.7682631015777588, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007369671490069397, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8760225772857666, "eval_python_code_alpaca_n_ngrams_match_1": 9.924, "eval_python_code_alpaca_n_ngrams_match_2": 2.968, "eval_python_code_alpaca_n_ngrams_match_3": 1.02, "eval_python_code_alpaca_num_pred_words": 42.676, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.74355900813017, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33868127528350833, "eval_python_code_alpaca_runtime": 10.896, "eval_python_code_alpaca_samples_per_second": 45.888, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.48473469986698553, "eval_python_code_alpaca_token_set_f1_sem": 0.005543439296479784, "eval_python_code_alpaca_token_set_precision": 0.5427007918187946, "eval_python_code_alpaca_token_set_recall": 0.46240800856523556, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 208125 }, { "epoch": 39.96, "eval_wikibio_accuracy": 0.32965625, "eval_wikibio_bleu_score": 6.263466919901522, "eval_wikibio_bleu_score_sem": 0.22157059711157456, "eval_wikibio_emb_cos_sim": 0.7542101144790649, "eval_wikibio_emb_cos_sim_sem": 0.008448111878875994, "eval_wikibio_emb_top1_equal": 0.2578125, "eval_wikibio_emb_top1_equal_sem": 0.038815656435002115, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.624647855758667, "eval_wikibio_n_ngrams_match_1": 10.376, "eval_wikibio_n_ngrams_match_2": 3.536, "eval_wikibio_n_ngrams_match_3": 1.336, "eval_wikibio_num_pred_words": 36.55, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.51151137079693, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36463240139282665, "eval_wikibio_runtime": 10.7165, "eval_wikibio_samples_per_second": 46.657, "eval_wikibio_steps_per_second": 0.093, "eval_wikibio_token_set_f1": 0.3266759581809525, "eval_wikibio_token_set_f1_sem": 0.005431868781735417, "eval_wikibio_token_set_precision": 0.3381562381859853, "eval_wikibio_token_set_recall": 0.3308893163261011, "eval_wikibio_true_num_tokens": 61.1328125, "step": 208125 }, { "epoch": 39.96, "eval_nq_accuracy": 0.536375, "eval_nq_bleu_score": 12.16683357808994, "eval_nq_bleu_score_sem": 0.46993456627543373, "eval_nq_emb_cos_sim": 0.8329232931137085, "eval_nq_emb_cos_sim_sem": 0.007862013193671946, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.12465238571167, "eval_nq_n_ngrams_match_1": 23.45, "eval_nq_n_ngrams_match_2": 8.714, "eval_nq_n_ngrams_match_3": 4.078, "eval_nq_num_pred_words": 49.314, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.369987455139494, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45513151392729906, "eval_nq_runtime": 15.7634, "eval_nq_samples_per_second": 31.719, "eval_nq_steps_per_second": 0.063, "eval_nq_token_set_f1": 0.4673853917081633, "eval_nq_token_set_f1_sem": 0.0050488571731012065, "eval_nq_token_set_precision": 0.4244986155749227, "eval_nq_token_set_recall": 0.5287762809407234, "eval_nq_true_num_tokens": 64.0, "step": 208125 }, { "epoch": 39.96, "learning_rate": 0.001, "loss": 2.5111, "step": 208128 }, { "epoch": 39.97, "learning_rate": 0.001, "loss": 2.4984, "step": 208140 }, { "epoch": 39.97, "learning_rate": 0.001, "loss": 2.5121, "step": 208152 }, { "epoch": 39.97, "learning_rate": 0.001, "loss": 2.4984, "step": 208164 }, { "epoch": 39.97, "learning_rate": 0.001, "loss": 2.5047, "step": 208176 }, { "epoch": 39.97, "learning_rate": 0.001, "loss": 2.5061, "step": 208188 }, { "epoch": 39.98, "learning_rate": 0.001, "loss": 2.509, "step": 208200 }, { "epoch": 39.98, "learning_rate": 0.001, "loss": 2.489, "step": 208212 }, { "epoch": 39.98, "learning_rate": 0.001, "loss": 2.4996, "step": 208224 }, { "epoch": 39.98, "learning_rate": 0.001, "loss": 2.501, "step": 208236 }, { "epoch": 39.99, "learning_rate": 0.001, "loss": 2.5011, "step": 208248 }, { "epoch": 39.99, "learning_rate": 0.001, "loss": 2.4998, "step": 208260 }, { "epoch": 39.99, "learning_rate": 0.001, "loss": 2.5021, "step": 208272 }, { "epoch": 39.99, "learning_rate": 0.001, "loss": 2.5105, "step": 208284 }, { "epoch": 40.0, "learning_rate": 0.001, "loss": 2.5113, "step": 208296 }, { "epoch": 40.0, "learning_rate": 0.001, "loss": 2.5038, "step": 208308 }, { "epoch": 40.0, "learning_rate": 0.001, "loss": 2.5078, "step": 208320 }, { "epoch": 40.0, "learning_rate": 0.001, "loss": 2.4925, "step": 208332 }, { "epoch": 40.0, "learning_rate": 0.001, "loss": 2.4829, "step": 208344 }, { "epoch": 40.01, "learning_rate": 0.001, "loss": 2.4909, "step": 208356 }, { "epoch": 40.01, "learning_rate": 0.001, "loss": 2.4872, "step": 208368 }, { "epoch": 40.01, "learning_rate": 0.001, "loss": 2.4833, "step": 208380 }, { "epoch": 40.01, "learning_rate": 0.001, "loss": 2.4857, "step": 208392 }, { "epoch": 40.02, "learning_rate": 0.001, "loss": 2.4834, "step": 208404 }, { "epoch": 40.02, "learning_rate": 0.001, "loss": 2.4881, "step": 208416 }, { "epoch": 40.02, "learning_rate": 0.001, "loss": 2.4887, "step": 208428 }, { "epoch": 40.02, "learning_rate": 0.001, "loss": 2.48, "step": 208440 }, { "epoch": 40.03, "learning_rate": 0.001, "loss": 2.4926, "step": 208452 }, { "epoch": 40.03, "learning_rate": 0.001, "loss": 2.4912, "step": 208464 }, { "epoch": 40.03, "learning_rate": 0.001, "loss": 2.484, "step": 208476 }, { "epoch": 40.03, "learning_rate": 0.001, "loss": 2.4929, "step": 208488 }, { "epoch": 40.03, "learning_rate": 0.001, "loss": 2.4869, "step": 208500 }, { "epoch": 40.04, "learning_rate": 0.001, "loss": 2.4808, "step": 208512 }, { "epoch": 40.04, "learning_rate": 0.001, "loss": 2.4849, "step": 208524 }, { "epoch": 40.04, "learning_rate": 0.001, "loss": 2.494, "step": 208536 }, { "epoch": 40.04, "learning_rate": 0.001, "loss": 2.493, "step": 208548 }, { "epoch": 40.05, "learning_rate": 0.001, "loss": 2.4947, "step": 208560 }, { "epoch": 40.05, "learning_rate": 0.001, "loss": 2.4966, "step": 208572 }, { "epoch": 40.05, "learning_rate": 0.001, "loss": 2.4982, "step": 208584 }, { "epoch": 40.05, "learning_rate": 0.001, "loss": 2.4981, "step": 208596 }, { "epoch": 40.06, "learning_rate": 0.001, "loss": 2.4866, "step": 208608 }, { "epoch": 40.06, "learning_rate": 0.001, "loss": 2.4872, "step": 208620 }, { "epoch": 40.06, "learning_rate": 0.001, "loss": 2.4903, "step": 208632 }, { "epoch": 40.06, "learning_rate": 0.001, "loss": 2.4858, "step": 208644 }, { "epoch": 40.06, "learning_rate": 0.001, "loss": 2.4941, "step": 208656 }, { "epoch": 40.07, "learning_rate": 0.001, "loss": 2.4916, "step": 208668 }, { "epoch": 40.07, "learning_rate": 0.001, "loss": 2.4971, "step": 208680 }, { "epoch": 40.07, "learning_rate": 0.001, "loss": 2.491, "step": 208692 }, { "epoch": 40.07, "learning_rate": 0.001, "loss": 2.4889, "step": 208704 }, { "epoch": 40.08, "learning_rate": 0.001, "loss": 2.4882, "step": 208716 }, { "epoch": 40.08, "learning_rate": 0.001, "loss": 2.4907, "step": 208728 }, { "epoch": 40.08, "learning_rate": 0.001, "loss": 2.4951, "step": 208740 }, { "epoch": 40.08, "eval_ag_news_accuracy": 0.33096875, "eval_ag_news_bleu_score": 4.935941367708634, "eval_ag_news_bleu_score_sem": 0.1538459294985076, "eval_ag_news_emb_cos_sim": 0.8213269114494324, "eval_ag_news_emb_cos_sim_sem": 0.0065055465088516805, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.483672857284546, "eval_ag_news_n_ngrams_match_1": 14.498, "eval_ag_news_n_ngrams_match_2": 3.258, "eval_ag_news_n_ngrams_match_3": 0.894, "eval_ag_news_num_pred_words": 46.594, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.57916120972262, "eval_ag_news_pred_num_tokens": 62.828125, "eval_ag_news_rouge_score": 0.3610071342974589, "eval_ag_news_runtime": 11.5752, "eval_ag_news_samples_per_second": 43.196, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.3621832274607168, "eval_ag_news_token_set_f1_sem": 0.0045705840055422714, "eval_ag_news_token_set_precision": 0.34763943650042556, "eval_ag_news_token_set_recall": 0.3930541701857484, "eval_ag_news_true_num_tokens": 56.09375, "step": 208750 }, { "epoch": 40.08, "eval_anthropic_toxic_prompts_accuracy": 0.11525, "eval_anthropic_toxic_prompts_bleu_score": 3.097525101761242, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11663056039354772, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6789708137512207, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00862111020403404, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2116432189941406, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.234, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.892, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.7, "eval_anthropic_toxic_prompts_num_pred_words": 47.18, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.81983716191698, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21516375380994998, "eval_anthropic_toxic_prompts_runtime": 17.3061, "eval_anthropic_toxic_prompts_samples_per_second": 28.892, "eval_anthropic_toxic_prompts_steps_per_second": 0.058, "eval_anthropic_toxic_prompts_token_set_f1": 0.354691382890654, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006530206880100251, "eval_anthropic_toxic_prompts_token_set_precision": 0.44011877541032673, "eval_anthropic_toxic_prompts_token_set_recall": 0.3234596627295621, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 208750 }, { "epoch": 40.08, "eval_arxiv_accuracy": 0.3536875, "eval_arxiv_bleu_score": 4.448229909673317, "eval_arxiv_bleu_score_sem": 0.130967657186042, "eval_arxiv_emb_cos_sim": 0.7863626480102539, "eval_arxiv_emb_cos_sim_sem": 0.006658712107607318, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.340484857559204, "eval_arxiv_n_ngrams_match_1": 15.592, "eval_arxiv_n_ngrams_match_2": 3.068, "eval_arxiv_n_ngrams_match_3": 0.672, "eval_arxiv_num_pred_words": 39.946, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.232812279814862, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37596768707277856, "eval_arxiv_runtime": 11.5887, "eval_arxiv_samples_per_second": 43.145, "eval_arxiv_steps_per_second": 0.086, "eval_arxiv_token_set_f1": 0.36347388815320153, "eval_arxiv_token_set_f1_sem": 0.004275748206608021, "eval_arxiv_token_set_precision": 0.3164097050357384, "eval_arxiv_token_set_recall": 0.44445140774480296, "eval_arxiv_true_num_tokens": 64.0, "step": 208750 }, { "epoch": 40.08, "eval_python_code_alpaca_accuracy": 0.16396875, "eval_python_code_alpaca_bleu_score": 4.885110576187877, "eval_python_code_alpaca_bleu_score_sem": 0.15907377143308932, "eval_python_code_alpaca_emb_cos_sim": 0.7705926895141602, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008294998305136489, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.860717535018921, "eval_python_code_alpaca_n_ngrams_match_1": 9.988, "eval_python_code_alpaca_n_ngrams_match_2": 3.104, "eval_python_code_alpaca_n_ngrams_match_3": 1.092, "eval_python_code_alpaca_num_pred_words": 43.784, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.47406068980649, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33739495106213235, "eval_python_code_alpaca_runtime": 10.9683, "eval_python_code_alpaca_samples_per_second": 45.586, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.48477394076078506, "eval_python_code_alpaca_token_set_f1_sem": 0.005571838075289238, "eval_python_code_alpaca_token_set_precision": 0.5453718842198467, "eval_python_code_alpaca_token_set_recall": 0.45541301161504594, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 208750 }, { "epoch": 40.08, "eval_wikibio_accuracy": 0.33278125, "eval_wikibio_bleu_score": 6.3016171814692585, "eval_wikibio_bleu_score_sem": 0.21304977920731905, "eval_wikibio_emb_cos_sim": 0.7512096166610718, "eval_wikibio_emb_cos_sim_sem": 0.009017273457879236, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6376986503601074, "eval_wikibio_n_ngrams_match_1": 10.122, "eval_wikibio_n_ngrams_match_2": 3.532, "eval_wikibio_n_ngrams_match_3": 1.308, "eval_wikibio_num_pred_words": 35.194, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.00427488451915, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36452884895941184, "eval_wikibio_runtime": 10.8683, "eval_wikibio_samples_per_second": 46.005, "eval_wikibio_steps_per_second": 0.092, "eval_wikibio_token_set_f1": 0.32566865170184894, "eval_wikibio_token_set_f1_sem": 0.005296164996467761, "eval_wikibio_token_set_precision": 0.33206117543770086, "eval_wikibio_token_set_recall": 0.3365955105087792, "eval_wikibio_true_num_tokens": 61.1328125, "step": 208750 }, { "epoch": 40.08, "eval_nq_accuracy": 0.53803125, "eval_nq_bleu_score": 12.16726096911051, "eval_nq_bleu_score_sem": 0.4802400977228546, "eval_nq_emb_cos_sim": 0.837855339050293, "eval_nq_emb_cos_sim_sem": 0.007137091350551616, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1221439838409424, "eval_nq_n_ngrams_match_1": 23.38, "eval_nq_n_ngrams_match_2": 8.702, "eval_nq_n_ngrams_match_3": 4.084, "eval_nq_num_pred_words": 48.842, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.349018473260617, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.455708917268529, "eval_nq_runtime": 14.7363, "eval_nq_samples_per_second": 33.93, "eval_nq_steps_per_second": 0.068, "eval_nq_token_set_f1": 0.46982223235919346, "eval_nq_token_set_f1_sem": 0.005105655711209726, "eval_nq_token_set_precision": 0.4263708186276958, "eval_nq_token_set_recall": 0.5329242638413485, "eval_nq_true_num_tokens": 64.0, "step": 208750 }, { "epoch": 40.08, "learning_rate": 0.001, "loss": 2.4833, "step": 208752 }, { "epoch": 40.09, "learning_rate": 0.001, "loss": 2.4887, "step": 208764 }, { "epoch": 40.09, "learning_rate": 0.001, "loss": 2.4933, "step": 208776 }, { "epoch": 40.09, "learning_rate": 0.001, "loss": 2.5015, "step": 208788 }, { "epoch": 40.09, "learning_rate": 0.001, "loss": 2.489, "step": 208800 }, { "epoch": 40.09, "learning_rate": 0.001, "loss": 2.4893, "step": 208812 }, { "epoch": 40.1, "learning_rate": 0.001, "loss": 2.4956, "step": 208824 }, { "epoch": 40.1, "learning_rate": 0.001, "loss": 2.4961, "step": 208836 }, { "epoch": 40.1, "learning_rate": 0.001, "loss": 2.4892, "step": 208848 }, { "epoch": 40.1, "learning_rate": 0.001, "loss": 2.4886, "step": 208860 }, { "epoch": 40.11, "learning_rate": 0.001, "loss": 2.4931, "step": 208872 }, { "epoch": 40.11, "learning_rate": 0.001, "loss": 2.4817, "step": 208884 }, { "epoch": 40.11, "learning_rate": 0.001, "loss": 2.5012, "step": 208896 }, { "epoch": 40.11, "learning_rate": 0.001, "loss": 2.4917, "step": 208908 }, { "epoch": 40.12, "learning_rate": 0.001, "loss": 2.4963, "step": 208920 }, { "epoch": 40.12, "learning_rate": 0.001, "loss": 2.4891, "step": 208932 }, { "epoch": 40.12, "learning_rate": 0.001, "loss": 2.4914, "step": 208944 }, { "epoch": 40.12, "learning_rate": 0.001, "loss": 2.4903, "step": 208956 }, { "epoch": 40.12, "learning_rate": 0.001, "loss": 2.4879, "step": 208968 }, { "epoch": 40.13, "learning_rate": 0.001, "loss": 2.4952, "step": 208980 }, { "epoch": 40.13, "learning_rate": 0.001, "loss": 2.4798, "step": 208992 }, { "epoch": 40.13, "learning_rate": 0.001, "loss": 2.486, "step": 209004 }, { "epoch": 40.13, "learning_rate": 0.001, "loss": 2.4872, "step": 209016 }, { "epoch": 40.14, "learning_rate": 0.001, "loss": 2.4923, "step": 209028 }, { "epoch": 40.14, "learning_rate": 0.001, "loss": 2.5015, "step": 209040 }, { "epoch": 40.14, "learning_rate": 0.001, "loss": 2.4986, "step": 209052 }, { "epoch": 40.14, "learning_rate": 0.001, "loss": 2.4891, "step": 209064 }, { "epoch": 40.15, "learning_rate": 0.001, "loss": 2.493, "step": 209076 }, { "epoch": 40.15, "learning_rate": 0.001, "loss": 2.4851, "step": 209088 }, { "epoch": 40.15, "learning_rate": 0.001, "loss": 2.4873, "step": 209100 }, { "epoch": 40.15, "learning_rate": 0.001, "loss": 2.4876, "step": 209112 }, { "epoch": 40.15, "learning_rate": 0.001, "loss": 2.4863, "step": 209124 }, { "epoch": 40.16, "learning_rate": 0.001, "loss": 2.4904, "step": 209136 }, { "epoch": 40.16, "learning_rate": 0.001, "loss": 2.4834, "step": 209148 }, { "epoch": 40.16, "learning_rate": 0.001, "loss": 2.4986, "step": 209160 }, { "epoch": 40.16, "learning_rate": 0.001, "loss": 2.4931, "step": 209172 }, { "epoch": 40.17, "learning_rate": 0.001, "loss": 2.4881, "step": 209184 }, { "epoch": 40.17, "learning_rate": 0.001, "loss": 2.4928, "step": 209196 }, { "epoch": 40.17, "learning_rate": 0.001, "loss": 2.493, "step": 209208 }, { "epoch": 40.17, "learning_rate": 0.001, "loss": 2.5013, "step": 209220 }, { "epoch": 40.18, "learning_rate": 0.001, "loss": 2.5006, "step": 209232 }, { "epoch": 40.18, "learning_rate": 0.001, "loss": 2.4961, "step": 209244 }, { "epoch": 40.18, "learning_rate": 0.001, "loss": 2.4886, "step": 209256 }, { "epoch": 40.18, "learning_rate": 0.001, "loss": 2.4916, "step": 209268 }, { "epoch": 40.18, "learning_rate": 0.001, "loss": 2.4919, "step": 209280 }, { "epoch": 40.19, "learning_rate": 0.001, "loss": 2.4939, "step": 209292 }, { "epoch": 40.19, "learning_rate": 0.001, "loss": 2.485, "step": 209304 }, { "epoch": 40.19, "learning_rate": 0.001, "loss": 2.4927, "step": 209316 }, { "epoch": 40.19, "learning_rate": 0.001, "loss": 2.4931, "step": 209328 }, { "epoch": 40.2, "learning_rate": 0.001, "loss": 2.4862, "step": 209340 }, { "epoch": 40.2, "learning_rate": 0.001, "loss": 2.4988, "step": 209352 }, { "epoch": 40.2, "learning_rate": 0.001, "loss": 2.49, "step": 209364 }, { "epoch": 40.2, "eval_ag_news_accuracy": 0.3309375, "eval_ag_news_bleu_score": 4.93318129896034, "eval_ag_news_bleu_score_sem": 0.14533783890757565, "eval_ag_news_emb_cos_sim": 0.8203567862510681, "eval_ag_news_emb_cos_sim_sem": 0.006500523131344789, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4776253700256348, "eval_ag_news_n_ngrams_match_1": 14.488, "eval_ag_news_n_ngrams_match_2": 3.296, "eval_ag_news_n_ngrams_match_3": 0.918, "eval_ag_news_num_pred_words": 47.072, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.38273369250156, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35990483215168867, "eval_ag_news_runtime": 12.1129, "eval_ag_news_samples_per_second": 41.278, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.3590367377856191, "eval_ag_news_token_set_f1_sem": 0.004419847666648293, "eval_ag_news_token_set_precision": 0.3464645656915929, "eval_ag_news_token_set_recall": 0.3877209631188465, "eval_ag_news_true_num_tokens": 56.09375, "step": 209375 }, { "epoch": 40.2, "eval_anthropic_toxic_prompts_accuracy": 0.1156875, "eval_anthropic_toxic_prompts_bleu_score": 3.194501195207348, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12205462381697112, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6905585527420044, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008911199059800427, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.20143723487854, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.276, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.958, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.73, "eval_anthropic_toxic_prompts_num_pred_words": 47.008, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.567814555048678, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21425931687079672, "eval_anthropic_toxic_prompts_runtime": 11.5404, "eval_anthropic_toxic_prompts_samples_per_second": 43.326, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.35961137061853843, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006596553207682877, "eval_anthropic_toxic_prompts_token_set_precision": 0.43994764336799463, "eval_anthropic_toxic_prompts_token_set_recall": 0.3301284961525737, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 209375 }, { "epoch": 40.2, "eval_arxiv_accuracy": 0.35215625, "eval_arxiv_bleu_score": 4.452252721548929, "eval_arxiv_bleu_score_sem": 0.13454578809668796, "eval_arxiv_emb_cos_sim": 0.7796381711959839, "eval_arxiv_emb_cos_sim_sem": 0.00762268692421439, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3422701358795166, "eval_arxiv_n_ngrams_match_1": 15.41, "eval_arxiv_n_ngrams_match_2": 3.11, "eval_arxiv_n_ngrams_match_3": 0.722, "eval_arxiv_num_pred_words": 40.47, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.283260726359238, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3708061192118505, "eval_arxiv_runtime": 12.7394, "eval_arxiv_samples_per_second": 39.248, "eval_arxiv_steps_per_second": 0.078, "eval_arxiv_token_set_f1": 0.3624315697913239, "eval_arxiv_token_set_f1_sem": 0.004341473938293956, "eval_arxiv_token_set_precision": 0.3137251735981584, "eval_arxiv_token_set_recall": 0.44873390944587865, "eval_arxiv_true_num_tokens": 64.0, "step": 209375 }, { "epoch": 40.2, "eval_python_code_alpaca_accuracy": 0.16234375, "eval_python_code_alpaca_bleu_score": 4.58743821535871, "eval_python_code_alpaca_bleu_score_sem": 0.1477392500255441, "eval_python_code_alpaca_emb_cos_sim": 0.7528814077377319, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008492655829395944, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8829312324523926, "eval_python_code_alpaca_n_ngrams_match_1": 9.81, "eval_python_code_alpaca_n_ngrams_match_2": 2.876, "eval_python_code_alpaca_n_ngrams_match_3": 1.0, "eval_python_code_alpaca_num_pred_words": 43.536, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.866567561323127, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3308213686440464, "eval_python_code_alpaca_runtime": 11.4652, "eval_python_code_alpaca_samples_per_second": 43.61, "eval_python_code_alpaca_steps_per_second": 0.087, "eval_python_code_alpaca_token_set_f1": 0.47938306180475776, "eval_python_code_alpaca_token_set_f1_sem": 0.0053381083713815326, "eval_python_code_alpaca_token_set_precision": 0.5344018065093825, "eval_python_code_alpaca_token_set_recall": 0.457908204204466, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 209375 }, { "epoch": 40.2, "eval_wikibio_accuracy": 0.33221875, "eval_wikibio_bleu_score": 6.122986326382544, "eval_wikibio_bleu_score_sem": 0.2108694764063836, "eval_wikibio_emb_cos_sim": 0.745849609375, "eval_wikibio_emb_cos_sim_sem": 0.00949193258289794, "eval_wikibio_emb_top1_equal": 0.2890625, "eval_wikibio_emb_top1_equal_sem": 0.04022626667363519, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6151468753814697, "eval_wikibio_n_ngrams_match_1": 10.006, "eval_wikibio_n_ngrams_match_2": 3.364, "eval_wikibio_n_ngrams_match_3": 1.246, "eval_wikibio_num_pred_words": 35.648, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.15680294449252, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.356341779339303, "eval_wikibio_runtime": 12.0963, "eval_wikibio_samples_per_second": 41.335, "eval_wikibio_steps_per_second": 0.083, "eval_wikibio_token_set_f1": 0.3223301674895014, "eval_wikibio_token_set_f1_sem": 0.005406018528662601, "eval_wikibio_token_set_precision": 0.32741973151775566, "eval_wikibio_token_set_recall": 0.335332420786525, "eval_wikibio_true_num_tokens": 61.1328125, "step": 209375 }, { "epoch": 40.2, "eval_nq_accuracy": 0.5385, "eval_nq_bleu_score": 11.904968635578221, "eval_nq_bleu_score_sem": 0.4694043231357101, "eval_nq_emb_cos_sim": 0.8366987705230713, "eval_nq_emb_cos_sim_sem": 0.006706703538626558, "eval_nq_emb_top1_equal": 0.34375, "eval_nq_emb_top1_equal_sem": 0.04214578430296913, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1219308376312256, "eval_nq_n_ngrams_match_1": 23.432, "eval_nq_n_ngrams_match_2": 8.68, "eval_nq_n_ngrams_match_3": 3.964, "eval_nq_num_pred_words": 49.208, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.347239101258122, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4552547819811946, "eval_nq_runtime": 12.2671, "eval_nq_samples_per_second": 40.759, "eval_nq_steps_per_second": 0.082, "eval_nq_token_set_f1": 0.4698852687233232, "eval_nq_token_set_f1_sem": 0.004933105360904908, "eval_nq_token_set_precision": 0.42628357462327926, "eval_nq_token_set_recall": 0.5323010559740611, "eval_nq_true_num_tokens": 64.0, "step": 209375 }, { "epoch": 40.2, "learning_rate": 0.001, "loss": 2.4962, "step": 209376 }, { "epoch": 40.21, "learning_rate": 0.001, "loss": 2.4984, "step": 209388 }, { "epoch": 40.21, "learning_rate": 0.001, "loss": 2.5088, "step": 209400 }, { "epoch": 40.21, "learning_rate": 0.001, "loss": 2.4959, "step": 209412 }, { "epoch": 40.21, "learning_rate": 0.001, "loss": 2.4842, "step": 209424 }, { "epoch": 40.21, "learning_rate": 0.001, "loss": 2.4953, "step": 209436 }, { "epoch": 40.22, "learning_rate": 0.001, "loss": 2.4906, "step": 209448 }, { "epoch": 40.22, "learning_rate": 0.001, "loss": 2.4912, "step": 209460 }, { "epoch": 40.22, "learning_rate": 0.001, "loss": 2.4849, "step": 209472 }, { "epoch": 40.22, "learning_rate": 0.001, "loss": 2.5075, "step": 209484 }, { "epoch": 40.23, "learning_rate": 0.001, "loss": 2.4945, "step": 209496 }, { "epoch": 40.23, "learning_rate": 0.001, "loss": 2.492, "step": 209508 }, { "epoch": 40.23, "learning_rate": 0.001, "loss": 2.4938, "step": 209520 }, { "epoch": 40.23, "learning_rate": 0.001, "loss": 2.4954, "step": 209532 }, { "epoch": 40.24, "learning_rate": 0.001, "loss": 2.4865, "step": 209544 }, { "epoch": 40.24, "learning_rate": 0.001, "loss": 2.4852, "step": 209556 }, { "epoch": 40.24, "learning_rate": 0.001, "loss": 2.4981, "step": 209568 }, { "epoch": 40.24, "learning_rate": 0.001, "loss": 2.4969, "step": 209580 }, { "epoch": 40.24, "learning_rate": 0.001, "loss": 2.4839, "step": 209592 }, { "epoch": 40.25, "learning_rate": 0.001, "loss": 2.5008, "step": 209604 }, { "epoch": 40.25, "learning_rate": 0.001, "loss": 2.4896, "step": 209616 }, { "epoch": 40.25, "learning_rate": 0.001, "loss": 2.494, "step": 209628 }, { "epoch": 40.25, "learning_rate": 0.001, "loss": 2.4956, "step": 209640 }, { "epoch": 40.26, "learning_rate": 0.001, "loss": 2.4879, "step": 209652 }, { "epoch": 40.26, "learning_rate": 0.001, "loss": 2.4991, "step": 209664 }, { "epoch": 40.26, "learning_rate": 0.001, "loss": 2.5009, "step": 209676 }, { "epoch": 40.26, "learning_rate": 0.001, "loss": 2.4913, "step": 209688 }, { "epoch": 40.26, "learning_rate": 0.001, "loss": 2.4911, "step": 209700 }, { "epoch": 40.27, "learning_rate": 0.001, "loss": 2.5031, "step": 209712 }, { "epoch": 40.27, "learning_rate": 0.001, "loss": 2.481, "step": 209724 }, { "epoch": 40.27, "learning_rate": 0.001, "loss": 2.4987, "step": 209736 }, { "epoch": 40.27, "learning_rate": 0.001, "loss": 2.501, "step": 209748 }, { "epoch": 40.28, "learning_rate": 0.001, "loss": 2.4854, "step": 209760 }, { "epoch": 40.28, "learning_rate": 0.001, "loss": 2.4991, "step": 209772 }, { "epoch": 40.28, "learning_rate": 0.001, "loss": 2.4913, "step": 209784 }, { "epoch": 40.28, "learning_rate": 0.001, "loss": 2.5034, "step": 209796 }, { "epoch": 40.29, "learning_rate": 0.001, "loss": 2.4982, "step": 209808 }, { "epoch": 40.29, "learning_rate": 0.001, "loss": 2.5002, "step": 209820 }, { "epoch": 40.29, "learning_rate": 0.001, "loss": 2.4911, "step": 209832 }, { "epoch": 40.29, "learning_rate": 0.001, "loss": 2.4936, "step": 209844 }, { "epoch": 40.29, "learning_rate": 0.001, "loss": 2.4979, "step": 209856 }, { "epoch": 40.3, "learning_rate": 0.001, "loss": 2.4885, "step": 209868 }, { "epoch": 40.3, "learning_rate": 0.001, "loss": 2.4951, "step": 209880 }, { "epoch": 40.3, "learning_rate": 0.001, "loss": 2.48, "step": 209892 }, { "epoch": 40.3, "learning_rate": 0.001, "loss": 2.4951, "step": 209904 }, { "epoch": 40.31, "learning_rate": 0.001, "loss": 2.5018, "step": 209916 }, { "epoch": 40.31, "learning_rate": 0.001, "loss": 2.4855, "step": 209928 }, { "epoch": 40.31, "learning_rate": 0.001, "loss": 2.4949, "step": 209940 }, { "epoch": 40.31, "learning_rate": 0.001, "loss": 2.489, "step": 209952 }, { "epoch": 40.32, "learning_rate": 0.001, "loss": 2.4968, "step": 209964 }, { "epoch": 40.32, "learning_rate": 0.001, "loss": 2.4877, "step": 209976 }, { "epoch": 40.32, "learning_rate": 0.001, "loss": 2.4998, "step": 209988 }, { "epoch": 40.32, "learning_rate": 0.001, "loss": 2.4889, "step": 210000 }, { "epoch": 40.32, "eval_ag_news_accuracy": 0.32790625, "eval_ag_news_bleu_score": 4.80583148150224, "eval_ag_news_bleu_score_sem": 0.14883307171151608, "eval_ag_news_emb_cos_sim": 0.8192859292030334, "eval_ag_news_emb_cos_sim_sem": 0.006844483562042194, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4955286979675293, "eval_ag_news_n_ngrams_match_1": 14.352, "eval_ag_news_n_ngrams_match_2": 3.178, "eval_ag_news_n_ngrams_match_3": 0.862, "eval_ag_news_num_pred_words": 46.744, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.96771330924418, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3562794744503931, "eval_ag_news_runtime": 11.7802, "eval_ag_news_samples_per_second": 42.444, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.35605547466433163, "eval_ag_news_token_set_f1_sem": 0.00449296281684269, "eval_ag_news_token_set_precision": 0.34346409945266354, "eval_ag_news_token_set_recall": 0.3853818908189327, "eval_ag_news_true_num_tokens": 56.09375, "step": 210000 }, { "epoch": 40.32, "eval_anthropic_toxic_prompts_accuracy": 0.1156875, "eval_anthropic_toxic_prompts_bleu_score": 3.343361252546647, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13079115699973715, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6850472092628479, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008981338001167634, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2146494388580322, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.314, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.812, "eval_anthropic_toxic_prompts_num_pred_words": 46.922, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.894563314756958, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22100386270129746, "eval_anthropic_toxic_prompts_runtime": 11.1469, "eval_anthropic_toxic_prompts_samples_per_second": 44.856, "eval_anthropic_toxic_prompts_steps_per_second": 0.09, "eval_anthropic_toxic_prompts_token_set_f1": 0.3556333968218707, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006553275851375397, "eval_anthropic_toxic_prompts_token_set_precision": 0.4440497811808492, "eval_anthropic_toxic_prompts_token_set_recall": 0.3227870203113184, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 210000 }, { "epoch": 40.32, "eval_arxiv_accuracy": 0.351375, "eval_arxiv_bleu_score": 4.426529597736935, "eval_arxiv_bleu_score_sem": 0.12933387731600798, "eval_arxiv_emb_cos_sim": 0.7829951047897339, "eval_arxiv_emb_cos_sim_sem": 0.006709320594501281, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.35552978515625, "eval_arxiv_n_ngrams_match_1": 15.472, "eval_arxiv_n_ngrams_match_2": 3.062, "eval_arxiv_n_ngrams_match_3": 0.684, "eval_arxiv_num_pred_words": 40.708, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.660784227322647, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37010746942971606, "eval_arxiv_runtime": 12.8819, "eval_arxiv_samples_per_second": 38.814, "eval_arxiv_steps_per_second": 0.078, "eval_arxiv_token_set_f1": 0.36062436532149145, "eval_arxiv_token_set_f1_sem": 0.004291935635760588, "eval_arxiv_token_set_precision": 0.3148144243616023, "eval_arxiv_token_set_recall": 0.43898269360850617, "eval_arxiv_true_num_tokens": 64.0, "step": 210000 }, { "epoch": 40.32, "eval_python_code_alpaca_accuracy": 0.1619375, "eval_python_code_alpaca_bleu_score": 4.8255838334559575, "eval_python_code_alpaca_bleu_score_sem": 0.1555355140434654, "eval_python_code_alpaca_emb_cos_sim": 0.7616056203842163, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009041864168351817, "eval_python_code_alpaca_emb_top1_equal": 0.109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.027695207821224692, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8678431510925293, "eval_python_code_alpaca_n_ngrams_match_1": 10.03, "eval_python_code_alpaca_n_ngrams_match_2": 3.062, "eval_python_code_alpaca_n_ngrams_match_3": 1.042, "eval_python_code_alpaca_num_pred_words": 43.044, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.599018810603624, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3436341632969879, "eval_python_code_alpaca_runtime": 11.2051, "eval_python_code_alpaca_samples_per_second": 44.623, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.4795116049501576, "eval_python_code_alpaca_token_set_f1_sem": 0.005590784476489017, "eval_python_code_alpaca_token_set_precision": 0.5465008965189574, "eval_python_code_alpaca_token_set_recall": 0.4483892761876059, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 210000 }, { "epoch": 40.32, "eval_wikibio_accuracy": 0.3320625, "eval_wikibio_bleu_score": 6.137502554745872, "eval_wikibio_bleu_score_sem": 0.21732014561507812, "eval_wikibio_emb_cos_sim": 0.7399588823318481, "eval_wikibio_emb_cos_sim_sem": 0.009743779100541262, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.634054660797119, "eval_wikibio_n_ngrams_match_1": 10.158, "eval_wikibio_n_ngrams_match_2": 3.422, "eval_wikibio_n_ngrams_match_3": 1.254, "eval_wikibio_num_pred_words": 35.796, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.8660397202038, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35955470888347896, "eval_wikibio_runtime": 11.2374, "eval_wikibio_samples_per_second": 44.494, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.32491266562385956, "eval_wikibio_token_set_f1_sem": 0.005370835752497085, "eval_wikibio_token_set_precision": 0.33108912271739827, "eval_wikibio_token_set_recall": 0.3356568324562391, "eval_wikibio_true_num_tokens": 61.1328125, "step": 210000 }, { "epoch": 40.32, "eval_nq_accuracy": 0.5374375, "eval_nq_bleu_score": 12.006375269684451, "eval_nq_bleu_score_sem": 0.47693535751820915, "eval_nq_emb_cos_sim": 0.8355339765548706, "eval_nq_emb_cos_sim_sem": 0.007053370587251773, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1234610080718994, "eval_nq_n_ngrams_match_1": 23.29, "eval_nq_n_ngrams_match_2": 8.666, "eval_nq_n_ngrams_match_3": 3.988, "eval_nq_num_pred_words": 48.744, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.360021576981262, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4538973253726808, "eval_nq_runtime": 11.7989, "eval_nq_samples_per_second": 42.377, "eval_nq_steps_per_second": 0.085, "eval_nq_token_set_f1": 0.46452968421213203, "eval_nq_token_set_f1_sem": 0.004976013658556225, "eval_nq_token_set_precision": 0.42438179938582704, "eval_nq_token_set_recall": 0.5202218240750814, "eval_nq_true_num_tokens": 64.0, "step": 210000 }, { "epoch": 40.32, "learning_rate": 0.001, "loss": 2.495, "step": 210012 }, { "epoch": 40.33, "learning_rate": 0.001, "loss": 2.4981, "step": 210024 }, { "epoch": 40.33, "learning_rate": 0.001, "loss": 2.4994, "step": 210036 }, { "epoch": 40.33, "learning_rate": 0.001, "loss": 2.4988, "step": 210048 }, { "epoch": 40.33, "learning_rate": 0.001, "loss": 2.4969, "step": 210060 }, { "epoch": 40.34, "learning_rate": 0.001, "loss": 2.4906, "step": 210072 }, { "epoch": 40.34, "learning_rate": 0.001, "loss": 2.4912, "step": 210084 }, { "epoch": 40.34, "learning_rate": 0.001, "loss": 2.4957, "step": 210096 }, { "epoch": 40.34, "learning_rate": 0.001, "loss": 2.4957, "step": 210108 }, { "epoch": 40.35, "learning_rate": 0.001, "loss": 2.4965, "step": 210120 }, { "epoch": 40.35, "learning_rate": 0.001, "loss": 2.4946, "step": 210132 }, { "epoch": 40.35, "learning_rate": 0.001, "loss": 2.497, "step": 210144 }, { "epoch": 40.35, "learning_rate": 0.001, "loss": 2.4925, "step": 210156 }, { "epoch": 40.35, "learning_rate": 0.001, "loss": 2.4966, "step": 210168 }, { "epoch": 40.36, "learning_rate": 0.001, "loss": 2.5001, "step": 210180 }, { "epoch": 40.36, "learning_rate": 0.001, "loss": 2.4943, "step": 210192 }, { "epoch": 40.36, "learning_rate": 0.001, "loss": 2.4923, "step": 210204 }, { "epoch": 40.36, "learning_rate": 0.001, "loss": 2.4911, "step": 210216 }, { "epoch": 40.37, "learning_rate": 0.001, "loss": 2.4894, "step": 210228 }, { "epoch": 40.37, "learning_rate": 0.001, "loss": 2.4941, "step": 210240 }, { "epoch": 40.37, "learning_rate": 0.001, "loss": 2.4959, "step": 210252 }, { "epoch": 40.37, "learning_rate": 0.001, "loss": 2.4988, "step": 210264 }, { "epoch": 40.38, "learning_rate": 0.001, "loss": 2.4937, "step": 210276 }, { "epoch": 40.38, "learning_rate": 0.001, "loss": 2.5067, "step": 210288 }, { "epoch": 40.38, "learning_rate": 0.001, "loss": 2.4982, "step": 210300 }, { "epoch": 40.38, "learning_rate": 0.001, "loss": 2.4943, "step": 210312 }, { "epoch": 40.38, "learning_rate": 0.001, "loss": 2.4969, "step": 210324 }, { "epoch": 40.39, "learning_rate": 0.001, "loss": 2.5031, "step": 210336 }, { "epoch": 40.39, "learning_rate": 0.001, "loss": 2.5006, "step": 210348 }, { "epoch": 40.39, "learning_rate": 0.001, "loss": 2.4944, "step": 210360 }, { "epoch": 40.39, "learning_rate": 0.001, "loss": 2.4933, "step": 210372 }, { "epoch": 40.4, "learning_rate": 0.001, "loss": 2.4974, "step": 210384 }, { "epoch": 40.4, "learning_rate": 0.001, "loss": 2.5061, "step": 210396 }, { "epoch": 40.4, "learning_rate": 0.001, "loss": 2.4884, "step": 210408 }, { "epoch": 40.4, "learning_rate": 0.001, "loss": 2.4967, "step": 210420 }, { "epoch": 40.41, "learning_rate": 0.001, "loss": 2.4899, "step": 210432 }, { "epoch": 40.41, "learning_rate": 0.001, "loss": 2.5007, "step": 210444 }, { "epoch": 40.41, "learning_rate": 0.001, "loss": 2.501, "step": 210456 }, { "epoch": 40.41, "learning_rate": 0.001, "loss": 2.4941, "step": 210468 }, { "epoch": 40.41, "learning_rate": 0.001, "loss": 2.4993, "step": 210480 }, { "epoch": 40.42, "learning_rate": 0.001, "loss": 2.4834, "step": 210492 }, { "epoch": 40.42, "learning_rate": 0.001, "loss": 2.4981, "step": 210504 }, { "epoch": 40.42, "learning_rate": 0.001, "loss": 2.4917, "step": 210516 }, { "epoch": 40.42, "learning_rate": 0.001, "loss": 2.5036, "step": 210528 }, { "epoch": 40.43, "learning_rate": 0.001, "loss": 2.4883, "step": 210540 }, { "epoch": 40.43, "learning_rate": 0.001, "loss": 2.5039, "step": 210552 }, { "epoch": 40.43, "learning_rate": 0.001, "loss": 2.4921, "step": 210564 }, { "epoch": 40.43, "learning_rate": 0.001, "loss": 2.5046, "step": 210576 }, { "epoch": 40.44, "learning_rate": 0.001, "loss": 2.4961, "step": 210588 }, { "epoch": 40.44, "learning_rate": 0.001, "loss": 2.4977, "step": 210600 }, { "epoch": 40.44, "learning_rate": 0.001, "loss": 2.4921, "step": 210612 }, { "epoch": 40.44, "learning_rate": 0.001, "loss": 2.4956, "step": 210624 }, { "epoch": 40.44, "eval_ag_news_accuracy": 0.32871875, "eval_ag_news_bleu_score": 4.876870398780209, "eval_ag_news_bleu_score_sem": 0.16241775785277915, "eval_ag_news_emb_cos_sim": 0.8137305974960327, "eval_ag_news_emb_cos_sim_sem": 0.00768045247282111, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4638900756835938, "eval_ag_news_n_ngrams_match_1": 14.358, "eval_ag_news_n_ngrams_match_2": 3.22, "eval_ag_news_n_ngrams_match_3": 0.864, "eval_ag_news_num_pred_words": 46.612, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.940988012002432, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35835874044039284, "eval_ag_news_runtime": 11.7547, "eval_ag_news_samples_per_second": 42.536, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.35823911175592515, "eval_ag_news_token_set_f1_sem": 0.004558974469125087, "eval_ag_news_token_set_precision": 0.3435106189570707, "eval_ag_news_token_set_recall": 0.3883246332005096, "eval_ag_news_true_num_tokens": 56.09375, "step": 210625 }, { "epoch": 40.44, "eval_anthropic_toxic_prompts_accuracy": 0.1143125, "eval_anthropic_toxic_prompts_bleu_score": 3.1847753912167023, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1245610900900696, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6829575896263123, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008382243598643055, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.201392889022827, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.216, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.902, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714, "eval_anthropic_toxic_prompts_num_pred_words": 47.05, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.566725098445858, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2157115303601157, "eval_anthropic_toxic_prompts_runtime": 10.9879, "eval_anthropic_toxic_prompts_samples_per_second": 45.504, "eval_anthropic_toxic_prompts_steps_per_second": 0.091, "eval_anthropic_toxic_prompts_token_set_f1": 0.3595051590713531, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006581567480878276, "eval_anthropic_toxic_prompts_token_set_precision": 0.43888456085591127, "eval_anthropic_toxic_prompts_token_set_recall": 0.3318332029080335, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 210625 }, { "epoch": 40.44, "eval_arxiv_accuracy": 0.35271875, "eval_arxiv_bleu_score": 4.338077270623405, "eval_arxiv_bleu_score_sem": 0.12700943965583625, "eval_arxiv_emb_cos_sim": 0.7769384384155273, "eval_arxiv_emb_cos_sim_sem": 0.006924786479345607, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3294804096221924, "eval_arxiv_n_ngrams_match_1": 15.314, "eval_arxiv_n_ngrams_match_2": 2.94, "eval_arxiv_n_ngrams_match_3": 0.672, "eval_arxiv_num_pred_words": 40.278, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.923828980377685, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36808475488458736, "eval_arxiv_runtime": 12.9984, "eval_arxiv_samples_per_second": 38.466, "eval_arxiv_steps_per_second": 0.077, "eval_arxiv_token_set_f1": 0.3600837828079317, "eval_arxiv_token_set_f1_sem": 0.0042502580140088885, "eval_arxiv_token_set_precision": 0.31187614693706206, "eval_arxiv_token_set_recall": 0.44790754064248495, "eval_arxiv_true_num_tokens": 64.0, "step": 210625 }, { "epoch": 40.44, "eval_python_code_alpaca_accuracy": 0.16315625, "eval_python_code_alpaca_bleu_score": 4.907965020543179, "eval_python_code_alpaca_bleu_score_sem": 0.15581937041705646, "eval_python_code_alpaca_emb_cos_sim": 0.7710789442062378, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007235904636885408, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.850116729736328, "eval_python_code_alpaca_n_ngrams_match_1": 10.08, "eval_python_code_alpaca_n_ngrams_match_2": 3.158, "eval_python_code_alpaca_n_ngrams_match_3": 1.126, "eval_python_code_alpaca_num_pred_words": 44.12, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.28979995656837, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3388770115783863, "eval_python_code_alpaca_runtime": 11.5358, "eval_python_code_alpaca_samples_per_second": 43.343, "eval_python_code_alpaca_steps_per_second": 0.087, "eval_python_code_alpaca_token_set_f1": 0.4852917335105785, "eval_python_code_alpaca_token_set_f1_sem": 0.005660214690095791, "eval_python_code_alpaca_token_set_precision": 0.5487585492618948, "eval_python_code_alpaca_token_set_recall": 0.4561044806844883, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 210625 }, { "epoch": 40.44, "eval_wikibio_accuracy": 0.332, "eval_wikibio_bleu_score": 5.828536278432289, "eval_wikibio_bleu_score_sem": 0.22581346194596846, "eval_wikibio_emb_cos_sim": 0.7287882566452026, "eval_wikibio_emb_cos_sim_sem": 0.010525572188826472, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6336963176727295, "eval_wikibio_n_ngrams_match_1": 9.594, "eval_wikibio_n_ngrams_match_2": 3.22, "eval_wikibio_n_ngrams_match_3": 1.24, "eval_wikibio_num_pred_words": 34.668, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.852473116117025, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3414386348689038, "eval_wikibio_runtime": 11.1153, "eval_wikibio_samples_per_second": 44.983, "eval_wikibio_steps_per_second": 0.09, "eval_wikibio_token_set_f1": 0.30615204940000623, "eval_wikibio_token_set_f1_sem": 0.006072646876585744, "eval_wikibio_token_set_precision": 0.312479302205344, "eval_wikibio_token_set_recall": 0.32157828067846506, "eval_wikibio_true_num_tokens": 61.1328125, "step": 210625 }, { "epoch": 40.44, "eval_nq_accuracy": 0.53746875, "eval_nq_bleu_score": 12.100714747366794, "eval_nq_bleu_score_sem": 0.4895064940662709, "eval_nq_emb_cos_sim": 0.8344129323959351, "eval_nq_emb_cos_sim_sem": 0.007559017524093605, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1229686737060547, "eval_nq_n_ngrams_match_1": 23.416, "eval_nq_n_ngrams_match_2": 8.66, "eval_nq_n_ngrams_match_3": 4.064, "eval_nq_num_pred_words": 49.138, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.355906664099342, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.453806256172518, "eval_nq_runtime": 13.3251, "eval_nq_samples_per_second": 37.523, "eval_nq_steps_per_second": 0.075, "eval_nq_token_set_f1": 0.46764387993165013, "eval_nq_token_set_f1_sem": 0.005044550602033417, "eval_nq_token_set_precision": 0.426408823021345, "eval_nq_token_set_recall": 0.5270325611115051, "eval_nq_true_num_tokens": 64.0, "step": 210625 }, { "epoch": 40.44, "learning_rate": 0.001, "loss": 2.492, "step": 210636 }, { "epoch": 40.45, "learning_rate": 0.001, "loss": 2.4994, "step": 210648 }, { "epoch": 40.45, "learning_rate": 0.001, "loss": 2.4972, "step": 210660 }, { "epoch": 40.45, "learning_rate": 0.001, "loss": 2.4869, "step": 210672 }, { "epoch": 40.45, "learning_rate": 0.001, "loss": 2.4948, "step": 210684 }, { "epoch": 40.46, "learning_rate": 0.001, "loss": 2.4881, "step": 210696 }, { "epoch": 40.46, "learning_rate": 0.001, "loss": 2.5013, "step": 210708 }, { "epoch": 40.46, "learning_rate": 0.001, "loss": 2.4981, "step": 210720 }, { "epoch": 40.46, "learning_rate": 0.001, "loss": 2.4971, "step": 210732 }, { "epoch": 40.47, "learning_rate": 0.001, "loss": 2.4905, "step": 210744 }, { "epoch": 40.47, "learning_rate": 0.001, "loss": 2.4942, "step": 210756 }, { "epoch": 40.47, "learning_rate": 0.001, "loss": 2.4934, "step": 210768 }, { "epoch": 40.47, "learning_rate": 0.001, "loss": 2.5043, "step": 210780 }, { "epoch": 40.47, "learning_rate": 0.001, "loss": 2.4997, "step": 210792 }, { "epoch": 40.48, "learning_rate": 0.001, "loss": 2.4995, "step": 210804 }, { "epoch": 40.48, "learning_rate": 0.001, "loss": 2.4828, "step": 210816 }, { "epoch": 40.48, "learning_rate": 0.001, "loss": 2.4928, "step": 210828 }, { "epoch": 40.48, "learning_rate": 0.001, "loss": 2.5021, "step": 210840 }, { "epoch": 40.49, "learning_rate": 0.001, "loss": 2.4906, "step": 210852 }, { "epoch": 40.49, "learning_rate": 0.001, "loss": 2.4952, "step": 210864 }, { "epoch": 40.49, "learning_rate": 0.001, "loss": 2.5012, "step": 210876 }, { "epoch": 40.49, "learning_rate": 0.001, "loss": 2.4972, "step": 210888 }, { "epoch": 40.5, "learning_rate": 0.001, "loss": 2.5052, "step": 210900 }, { "epoch": 40.5, "learning_rate": 0.001, "loss": 2.4957, "step": 210912 }, { "epoch": 40.5, "learning_rate": 0.001, "loss": 2.4983, "step": 210924 }, { "epoch": 40.5, "learning_rate": 0.001, "loss": 2.4913, "step": 210936 }, { "epoch": 40.5, "learning_rate": 0.001, "loss": 2.493, "step": 210948 }, { "epoch": 40.51, "learning_rate": 0.001, "loss": 2.4934, "step": 210960 }, { "epoch": 40.51, "learning_rate": 0.001, "loss": 2.4925, "step": 210972 }, { "epoch": 40.51, "learning_rate": 0.001, "loss": 2.4823, "step": 210984 }, { "epoch": 40.51, "learning_rate": 0.001, "loss": 2.497, "step": 210996 }, { "epoch": 40.52, "learning_rate": 0.001, "loss": 2.4963, "step": 211008 }, { "epoch": 40.52, "learning_rate": 0.001, "loss": 2.5011, "step": 211020 }, { "epoch": 40.52, "learning_rate": 0.001, "loss": 2.4966, "step": 211032 }, { "epoch": 40.52, "learning_rate": 0.001, "loss": 2.4983, "step": 211044 }, { "epoch": 40.53, "learning_rate": 0.001, "loss": 2.5001, "step": 211056 }, { "epoch": 40.53, "learning_rate": 0.001, "loss": 2.4891, "step": 211068 }, { "epoch": 40.53, "learning_rate": 0.001, "loss": 2.4943, "step": 211080 }, { "epoch": 40.53, "learning_rate": 0.001, "loss": 2.4964, "step": 211092 }, { "epoch": 40.53, "learning_rate": 0.001, "loss": 2.4974, "step": 211104 }, { "epoch": 40.54, "learning_rate": 0.001, "loss": 2.4976, "step": 211116 }, { "epoch": 40.54, "learning_rate": 0.001, "loss": 2.4939, "step": 211128 }, { "epoch": 40.54, "learning_rate": 0.001, "loss": 2.4975, "step": 211140 }, { "epoch": 40.54, "learning_rate": 0.001, "loss": 2.4961, "step": 211152 }, { "epoch": 40.55, "learning_rate": 0.001, "loss": 2.5008, "step": 211164 }, { "epoch": 40.55, "learning_rate": 0.001, "loss": 2.4949, "step": 211176 }, { "epoch": 40.55, "learning_rate": 0.001, "loss": 2.4964, "step": 211188 }, { "epoch": 40.55, "learning_rate": 0.001, "loss": 2.4949, "step": 211200 }, { "epoch": 40.56, "learning_rate": 0.001, "loss": 2.4976, "step": 211212 }, { "epoch": 40.56, "learning_rate": 0.001, "loss": 2.5006, "step": 211224 }, { "epoch": 40.56, "learning_rate": 0.001, "loss": 2.5006, "step": 211236 }, { "epoch": 40.56, "learning_rate": 0.001, "loss": 2.495, "step": 211248 }, { "epoch": 40.56, "eval_ag_news_accuracy": 0.330625, "eval_ag_news_bleu_score": 4.965679517767526, "eval_ag_news_bleu_score_sem": 0.15529164176969681, "eval_ag_news_emb_cos_sim": 0.8184776902198792, "eval_ag_news_emb_cos_sim_sem": 0.006984504612418496, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4651143550872803, "eval_ag_news_n_ngrams_match_1": 14.518, "eval_ag_news_n_ngrams_match_2": 3.28, "eval_ag_news_n_ngrams_match_3": 0.914, "eval_ag_news_num_pred_words": 46.82, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.980116653066233, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3601847621843889, "eval_ag_news_runtime": 12.4137, "eval_ag_news_samples_per_second": 40.278, "eval_ag_news_steps_per_second": 0.081, "eval_ag_news_token_set_f1": 0.35821609908491003, "eval_ag_news_token_set_f1_sem": 0.004377098564057673, "eval_ag_news_token_set_precision": 0.34504259764844536, "eval_ag_news_token_set_recall": 0.38826048122376766, "eval_ag_news_true_num_tokens": 56.09375, "step": 211250 }, { "epoch": 40.56, "eval_anthropic_toxic_prompts_accuracy": 0.114375, "eval_anthropic_toxic_prompts_bleu_score": 3.229517019320023, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1241547252813052, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6804816722869873, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008320458294359607, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.219191312789917, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.288, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.972, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.732, "eval_anthropic_toxic_prompts_num_pred_words": 46.672, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.007888442331616, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22019143045454434, "eval_anthropic_toxic_prompts_runtime": 12.1742, "eval_anthropic_toxic_prompts_samples_per_second": 41.07, "eval_anthropic_toxic_prompts_steps_per_second": 0.082, "eval_anthropic_toxic_prompts_token_set_f1": 0.3629537146769006, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00648898051165861, "eval_anthropic_toxic_prompts_token_set_precision": 0.4443750867682475, "eval_anthropic_toxic_prompts_token_set_recall": 0.3334166465013034, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 211250 }, { "epoch": 40.56, "eval_arxiv_accuracy": 0.3526875, "eval_arxiv_bleu_score": 4.517229432060944, "eval_arxiv_bleu_score_sem": 0.13048247904619614, "eval_arxiv_emb_cos_sim": 0.7832765579223633, "eval_arxiv_emb_cos_sim_sem": 0.00662021622984868, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3431384563446045, "eval_arxiv_n_ngrams_match_1": 15.442, "eval_arxiv_n_ngrams_match_2": 3.068, "eval_arxiv_n_ngrams_match_3": 0.716, "eval_arxiv_num_pred_words": 40.462, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.307830326066725, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37094627656375234, "eval_arxiv_runtime": 11.6376, "eval_arxiv_samples_per_second": 42.964, "eval_arxiv_steps_per_second": 0.086, "eval_arxiv_token_set_f1": 0.3637689808034788, "eval_arxiv_token_set_f1_sem": 0.0041864600168504094, "eval_arxiv_token_set_precision": 0.31430136831508204, "eval_arxiv_token_set_recall": 0.4516966199853352, "eval_arxiv_true_num_tokens": 64.0, "step": 211250 }, { "epoch": 40.56, "eval_python_code_alpaca_accuracy": 0.162875, "eval_python_code_alpaca_bleu_score": 5.001133545530016, "eval_python_code_alpaca_bleu_score_sem": 0.16316511733482023, "eval_python_code_alpaca_emb_cos_sim": 0.7707309722900391, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00773646168322504, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.880523920059204, "eval_python_code_alpaca_n_ngrams_match_1": 10.1, "eval_python_code_alpaca_n_ngrams_match_2": 3.12, "eval_python_code_alpaca_n_ngrams_match_3": 1.148, "eval_python_code_alpaca_num_pred_words": 43.604, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.823608880037952, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3442481904266813, "eval_python_code_alpaca_runtime": 16.3009, "eval_python_code_alpaca_samples_per_second": 30.673, "eval_python_code_alpaca_steps_per_second": 0.061, "eval_python_code_alpaca_token_set_f1": 0.48811303618622753, "eval_python_code_alpaca_token_set_f1_sem": 0.0053038756078575226, "eval_python_code_alpaca_token_set_precision": 0.5529669880239189, "eval_python_code_alpaca_token_set_recall": 0.4554527505613025, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 211250 }, { "epoch": 40.56, "eval_wikibio_accuracy": 0.3328125, "eval_wikibio_bleu_score": 6.106486569138538, "eval_wikibio_bleu_score_sem": 0.21862486709111842, "eval_wikibio_emb_cos_sim": 0.7377462387084961, "eval_wikibio_emb_cos_sim_sem": 0.010332872187623108, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6214442253112793, "eval_wikibio_n_ngrams_match_1": 10.036, "eval_wikibio_n_ngrams_match_2": 3.37, "eval_wikibio_n_ngrams_match_3": 1.26, "eval_wikibio_num_pred_words": 35.068, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.39153064041837, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3529416400819134, "eval_wikibio_runtime": 12.5853, "eval_wikibio_samples_per_second": 39.729, "eval_wikibio_steps_per_second": 0.079, "eval_wikibio_token_set_f1": 0.3216823173080126, "eval_wikibio_token_set_f1_sem": 0.0057506740642656, "eval_wikibio_token_set_precision": 0.3279516456954584, "eval_wikibio_token_set_recall": 0.33241708463340364, "eval_wikibio_true_num_tokens": 61.1328125, "step": 211250 }, { "epoch": 40.56, "eval_nq_accuracy": 0.53803125, "eval_nq_bleu_score": 12.029555287348698, "eval_nq_bleu_score_sem": 0.4873451969618991, "eval_nq_emb_cos_sim": 0.8419448137283325, "eval_nq_emb_cos_sim_sem": 0.006734322895291353, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1206815242767334, "eval_nq_n_ngrams_match_1": 23.474, "eval_nq_n_ngrams_match_2": 8.722, "eval_nq_n_ngrams_match_3": 4.03, "eval_nq_num_pred_words": 48.762, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.336817295381927, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45793415195912957, "eval_nq_runtime": 12.8976, "eval_nq_samples_per_second": 38.767, "eval_nq_steps_per_second": 0.078, "eval_nq_token_set_f1": 0.4699142640313866, "eval_nq_token_set_f1_sem": 0.004932251966649148, "eval_nq_token_set_precision": 0.42773513140946984, "eval_nq_token_set_recall": 0.5297096013171274, "eval_nq_true_num_tokens": 64.0, "step": 211250 }, { "epoch": 40.56, "learning_rate": 0.001, "loss": 2.4972, "step": 211260 }, { "epoch": 40.57, "learning_rate": 0.001, "loss": 2.4879, "step": 211272 }, { "epoch": 40.57, "learning_rate": 0.001, "loss": 2.4899, "step": 211284 }, { "epoch": 40.57, "learning_rate": 0.001, "loss": 2.5088, "step": 211296 }, { "epoch": 40.57, "learning_rate": 0.001, "loss": 2.4872, "step": 211308 }, { "epoch": 40.58, "learning_rate": 0.001, "loss": 2.4866, "step": 211320 }, { "epoch": 40.58, "learning_rate": 0.001, "loss": 2.4954, "step": 211332 }, { "epoch": 40.58, "learning_rate": 0.001, "loss": 2.4885, "step": 211344 }, { "epoch": 40.58, "learning_rate": 0.001, "loss": 2.4885, "step": 211356 }, { "epoch": 40.59, "learning_rate": 0.001, "loss": 2.4959, "step": 211368 }, { "epoch": 40.59, "learning_rate": 0.001, "loss": 2.4951, "step": 211380 }, { "epoch": 40.59, "learning_rate": 0.001, "loss": 2.5028, "step": 211392 }, { "epoch": 40.59, "learning_rate": 0.001, "loss": 2.5044, "step": 211404 }, { "epoch": 40.59, "learning_rate": 0.001, "loss": 2.4948, "step": 211416 }, { "epoch": 40.6, "learning_rate": 0.001, "loss": 2.4938, "step": 211428 }, { "epoch": 40.6, "learning_rate": 0.001, "loss": 2.4973, "step": 211440 }, { "epoch": 40.6, "learning_rate": 0.001, "loss": 2.5054, "step": 211452 }, { "epoch": 40.6, "learning_rate": 0.001, "loss": 2.5031, "step": 211464 }, { "epoch": 40.61, "learning_rate": 0.001, "loss": 2.4952, "step": 211476 }, { "epoch": 40.61, "learning_rate": 0.001, "loss": 2.498, "step": 211488 }, { "epoch": 40.61, "learning_rate": 0.001, "loss": 2.4941, "step": 211500 }, { "epoch": 40.61, "learning_rate": 0.001, "loss": 2.5051, "step": 211512 }, { "epoch": 40.62, "learning_rate": 0.001, "loss": 2.4979, "step": 211524 }, { "epoch": 40.62, "learning_rate": 0.001, "loss": 2.5072, "step": 211536 }, { "epoch": 40.62, "learning_rate": 0.001, "loss": 2.492, "step": 211548 }, { "epoch": 40.62, "learning_rate": 0.001, "loss": 2.4809, "step": 211560 }, { "epoch": 40.62, "learning_rate": 0.001, "loss": 2.5034, "step": 211572 }, { "epoch": 40.63, "learning_rate": 0.001, "loss": 2.4977, "step": 211584 }, { "epoch": 40.63, "learning_rate": 0.001, "loss": 2.4972, "step": 211596 }, { "epoch": 40.63, "learning_rate": 0.001, "loss": 2.5017, "step": 211608 }, { "epoch": 40.63, "learning_rate": 0.001, "loss": 2.4981, "step": 211620 }, { "epoch": 40.64, "learning_rate": 0.001, "loss": 2.5049, "step": 211632 }, { "epoch": 40.64, "learning_rate": 0.001, "loss": 2.5152, "step": 211644 }, { "epoch": 40.64, "learning_rate": 0.001, "loss": 2.4919, "step": 211656 }, { "epoch": 40.64, "learning_rate": 0.001, "loss": 2.5047, "step": 211668 }, { "epoch": 40.65, "learning_rate": 0.001, "loss": 2.4889, "step": 211680 }, { "epoch": 40.65, "learning_rate": 0.001, "loss": 2.5006, "step": 211692 }, { "epoch": 40.65, "learning_rate": 0.001, "loss": 2.5043, "step": 211704 }, { "epoch": 40.65, "learning_rate": 0.001, "loss": 2.5145, "step": 211716 }, { "epoch": 40.65, "learning_rate": 0.001, "loss": 2.4987, "step": 211728 }, { "epoch": 40.66, "learning_rate": 0.001, "loss": 2.4935, "step": 211740 }, { "epoch": 40.66, "learning_rate": 0.001, "loss": 2.5008, "step": 211752 }, { "epoch": 40.66, "learning_rate": 0.001, "loss": 2.4926, "step": 211764 }, { "epoch": 40.66, "learning_rate": 0.001, "loss": 2.4896, "step": 211776 }, { "epoch": 40.67, "learning_rate": 0.001, "loss": 2.4963, "step": 211788 }, { "epoch": 40.67, "learning_rate": 0.001, "loss": 2.5058, "step": 211800 }, { "epoch": 40.67, "learning_rate": 0.001, "loss": 2.5102, "step": 211812 }, { "epoch": 40.67, "learning_rate": 0.001, "loss": 2.5032, "step": 211824 }, { "epoch": 40.68, "learning_rate": 0.001, "loss": 2.5014, "step": 211836 }, { "epoch": 40.68, "learning_rate": 0.001, "loss": 2.5016, "step": 211848 }, { "epoch": 40.68, "learning_rate": 0.001, "loss": 2.5096, "step": 211860 }, { "epoch": 40.68, "learning_rate": 0.001, "loss": 2.4947, "step": 211872 }, { "epoch": 40.68, "eval_ag_news_accuracy": 0.32915625, "eval_ag_news_bleu_score": 4.963234534569914, "eval_ag_news_bleu_score_sem": 0.15203737496492756, "eval_ag_news_emb_cos_sim": 0.8209718465805054, "eval_ag_news_emb_cos_sim_sem": 0.007165501619170499, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4764633178710938, "eval_ag_news_n_ngrams_match_1": 14.484, "eval_ag_news_n_ngrams_match_2": 3.296, "eval_ag_news_n_ngrams_match_3": 0.972, "eval_ag_news_num_pred_words": 47.052, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.34512512283607, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35927382853782835, "eval_ag_news_runtime": 20.4873, "eval_ag_news_samples_per_second": 24.405, "eval_ag_news_steps_per_second": 0.049, "eval_ag_news_token_set_f1": 0.3579980060028512, "eval_ag_news_token_set_f1_sem": 0.004656055630614914, "eval_ag_news_token_set_precision": 0.34694101676323597, "eval_ag_news_token_set_recall": 0.3829433068540424, "eval_ag_news_true_num_tokens": 56.09375, "step": 211875 }, { "epoch": 40.68, "eval_anthropic_toxic_prompts_accuracy": 0.11615625, "eval_anthropic_toxic_prompts_bleu_score": 3.376417473185111, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12940743142741185, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6825761198997498, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008438446421132331, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.190232753753662, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.34, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.03, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.812, "eval_anthropic_toxic_prompts_num_pred_words": 46.524, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.29408132370793, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22075433661742308, "eval_anthropic_toxic_prompts_runtime": 11.4515, "eval_anthropic_toxic_prompts_samples_per_second": 43.662, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.35710008178358466, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0063458533161736864, "eval_anthropic_toxic_prompts_token_set_precision": 0.4496051392700254, "eval_anthropic_toxic_prompts_token_set_recall": 0.3225220007767997, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 211875 }, { "epoch": 40.68, "eval_arxiv_accuracy": 0.35378125, "eval_arxiv_bleu_score": 4.480396452198746, "eval_arxiv_bleu_score_sem": 0.1302892952645812, "eval_arxiv_emb_cos_sim": 0.7887937426567078, "eval_arxiv_emb_cos_sim_sem": 0.006822198415892059, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3442654609680176, "eval_arxiv_n_ngrams_match_1": 15.622, "eval_arxiv_n_ngrams_match_2": 3.06, "eval_arxiv_n_ngrams_match_3": 0.688, "eval_arxiv_num_pred_words": 41.222, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.339751365924066, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3742452813832927, "eval_arxiv_runtime": 26.3743, "eval_arxiv_samples_per_second": 18.958, "eval_arxiv_steps_per_second": 0.038, "eval_arxiv_token_set_f1": 0.367423677481192, "eval_arxiv_token_set_f1_sem": 0.0041650053318129715, "eval_arxiv_token_set_precision": 0.3198981863267829, "eval_arxiv_token_set_recall": 0.4484298164276399, "eval_arxiv_true_num_tokens": 64.0, "step": 211875 }, { "epoch": 40.68, "eval_python_code_alpaca_accuracy": 0.16390625, "eval_python_code_alpaca_bleu_score": 4.834341631975883, "eval_python_code_alpaca_bleu_score_sem": 0.15402381560850545, "eval_python_code_alpaca_emb_cos_sim": 0.7638465166091919, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008511363761778717, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.861112117767334, "eval_python_code_alpaca_n_ngrams_match_1": 10.108, "eval_python_code_alpaca_n_ngrams_match_2": 3.126, "eval_python_code_alpaca_n_ngrams_match_3": 1.05, "eval_python_code_alpaca_num_pred_words": 43.992, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.480957013195052, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34007841519793347, "eval_python_code_alpaca_runtime": 26.318, "eval_python_code_alpaca_samples_per_second": 18.998, "eval_python_code_alpaca_steps_per_second": 0.038, "eval_python_code_alpaca_token_set_f1": 0.4885419612175739, "eval_python_code_alpaca_token_set_f1_sem": 0.0052848286232211, "eval_python_code_alpaca_token_set_precision": 0.5541858940783554, "eval_python_code_alpaca_token_set_recall": 0.45751941396856666, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 211875 }, { "epoch": 40.68, "eval_wikibio_accuracy": 0.3301875, "eval_wikibio_bleu_score": 6.323464112115377, "eval_wikibio_bleu_score_sem": 0.2315125932544155, "eval_wikibio_emb_cos_sim": 0.747446596622467, "eval_wikibio_emb_cos_sim_sem": 0.008913036398977697, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6288974285125732, "eval_wikibio_n_ngrams_match_1": 10.132, "eval_wikibio_n_ngrams_match_2": 3.516, "eval_wikibio_n_ngrams_match_3": 1.32, "eval_wikibio_num_pred_words": 35.56, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.671258455501224, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3644290219383398, "eval_wikibio_runtime": 16.338, "eval_wikibio_samples_per_second": 30.603, "eval_wikibio_steps_per_second": 0.061, "eval_wikibio_token_set_f1": 0.32812273040857015, "eval_wikibio_token_set_f1_sem": 0.005243822949007464, "eval_wikibio_token_set_precision": 0.3338040701242717, "eval_wikibio_token_set_recall": 0.34078901371443304, "eval_wikibio_true_num_tokens": 61.1328125, "step": 211875 }, { "epoch": 40.68, "eval_nq_accuracy": 0.53696875, "eval_nq_bleu_score": 11.799364175949483, "eval_nq_bleu_score_sem": 0.4728314380828865, "eval_nq_emb_cos_sim": 0.8347982168197632, "eval_nq_emb_cos_sim_sem": 0.007110457835015271, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1179277896881104, "eval_nq_n_ngrams_match_1": 23.418, "eval_nq_n_ngrams_match_2": 8.656, "eval_nq_n_ngrams_match_3": 3.922, "eval_nq_num_pred_words": 49.098, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.313891493510692, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.455719516647935, "eval_nq_runtime": 16.0765, "eval_nq_samples_per_second": 31.101, "eval_nq_steps_per_second": 0.062, "eval_nq_token_set_f1": 0.4669491230711507, "eval_nq_token_set_f1_sem": 0.004873035977873829, "eval_nq_token_set_precision": 0.42769601919580363, "eval_nq_token_set_recall": 0.5229055021960608, "eval_nq_true_num_tokens": 64.0, "step": 211875 }, { "epoch": 40.68, "learning_rate": 0.001, "loss": 2.4934, "step": 211884 }, { "epoch": 40.69, "learning_rate": 0.001, "loss": 2.5003, "step": 211896 }, { "epoch": 40.69, "learning_rate": 0.001, "loss": 2.4955, "step": 211908 }, { "epoch": 40.69, "learning_rate": 0.001, "loss": 2.497, "step": 211920 }, { "epoch": 40.69, "learning_rate": 0.001, "loss": 2.4994, "step": 211932 }, { "epoch": 40.7, "learning_rate": 0.001, "loss": 2.4985, "step": 211944 }, { "epoch": 40.7, "learning_rate": 0.001, "loss": 2.4938, "step": 211956 }, { "epoch": 40.7, "learning_rate": 0.001, "loss": 2.4953, "step": 211968 }, { "epoch": 40.7, "learning_rate": 0.001, "loss": 2.4882, "step": 211980 }, { "epoch": 40.71, "learning_rate": 0.001, "loss": 2.4949, "step": 211992 }, { "epoch": 40.71, "learning_rate": 0.001, "loss": 2.4999, "step": 212004 }, { "epoch": 40.71, "learning_rate": 0.001, "loss": 2.4917, "step": 212016 }, { "epoch": 40.71, "learning_rate": 0.001, "loss": 2.488, "step": 212028 }, { "epoch": 40.71, "learning_rate": 0.001, "loss": 2.4934, "step": 212040 }, { "epoch": 40.72, "learning_rate": 0.001, "loss": 2.497, "step": 212052 }, { "epoch": 40.72, "learning_rate": 0.001, "loss": 2.4884, "step": 212064 }, { "epoch": 40.72, "learning_rate": 0.001, "loss": 2.5069, "step": 212076 }, { "epoch": 40.72, "learning_rate": 0.001, "loss": 2.4908, "step": 212088 }, { "epoch": 40.73, "learning_rate": 0.001, "loss": 2.5007, "step": 212100 }, { "epoch": 40.73, "learning_rate": 0.001, "loss": 2.4938, "step": 212112 }, { "epoch": 40.73, "learning_rate": 0.001, "loss": 2.5021, "step": 212124 }, { "epoch": 40.73, "learning_rate": 0.001, "loss": 2.4996, "step": 212136 }, { "epoch": 40.74, "learning_rate": 0.001, "loss": 2.4915, "step": 212148 }, { "epoch": 40.74, "learning_rate": 0.001, "loss": 2.4958, "step": 212160 }, { "epoch": 40.74, "learning_rate": 0.001, "loss": 2.4996, "step": 212172 }, { "epoch": 40.74, "learning_rate": 0.001, "loss": 2.5015, "step": 212184 }, { "epoch": 40.74, "learning_rate": 0.001, "loss": 2.5064, "step": 212196 }, { "epoch": 40.75, "learning_rate": 0.001, "loss": 2.4932, "step": 212208 }, { "epoch": 40.75, "learning_rate": 0.001, "loss": 2.4986, "step": 212220 }, { "epoch": 40.75, "learning_rate": 0.001, "loss": 2.4984, "step": 212232 }, { "epoch": 40.75, "learning_rate": 0.001, "loss": 2.5046, "step": 212244 }, { "epoch": 40.76, "learning_rate": 0.001, "loss": 2.4927, "step": 212256 }, { "epoch": 40.76, "learning_rate": 0.001, "loss": 2.4955, "step": 212268 }, { "epoch": 40.76, "learning_rate": 0.001, "loss": 2.5014, "step": 212280 }, { "epoch": 40.76, "learning_rate": 0.001, "loss": 2.5026, "step": 212292 }, { "epoch": 40.76, "learning_rate": 0.001, "loss": 2.4996, "step": 212304 }, { "epoch": 40.77, "learning_rate": 0.001, "loss": 2.489, "step": 212316 }, { "epoch": 40.77, "learning_rate": 0.001, "loss": 2.4987, "step": 212328 }, { "epoch": 40.77, "learning_rate": 0.001, "loss": 2.5016, "step": 212340 }, { "epoch": 40.77, "learning_rate": 0.001, "loss": 2.4975, "step": 212352 }, { "epoch": 40.78, "learning_rate": 0.001, "loss": 2.5102, "step": 212364 }, { "epoch": 40.78, "learning_rate": 0.001, "loss": 2.5007, "step": 212376 }, { "epoch": 40.78, "learning_rate": 0.001, "loss": 2.5162, "step": 212388 }, { "epoch": 40.78, "learning_rate": 0.001, "loss": 2.4996, "step": 212400 }, { "epoch": 40.79, "learning_rate": 0.001, "loss": 2.4987, "step": 212412 }, { "epoch": 40.79, "learning_rate": 0.001, "loss": 2.5069, "step": 212424 }, { "epoch": 40.79, "learning_rate": 0.001, "loss": 2.5003, "step": 212436 }, { "epoch": 40.79, "learning_rate": 0.001, "loss": 2.4959, "step": 212448 }, { "epoch": 40.79, "learning_rate": 0.001, "loss": 2.4978, "step": 212460 }, { "epoch": 40.8, "learning_rate": 0.001, "loss": 2.497, "step": 212472 }, { "epoch": 40.8, "learning_rate": 0.001, "loss": 2.5003, "step": 212484 }, { "epoch": 40.8, "learning_rate": 0.001, "loss": 2.4951, "step": 212496 }, { "epoch": 40.8, "eval_ag_news_accuracy": 0.32740625, "eval_ag_news_bleu_score": 5.073949490606959, "eval_ag_news_bleu_score_sem": 0.16189017421293195, "eval_ag_news_emb_cos_sim": 0.8260356187820435, "eval_ag_news_emb_cos_sim_sem": 0.006121638109755535, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.47394061088562, "eval_ag_news_n_ngrams_match_1": 14.348, "eval_ag_news_n_ngrams_match_2": 3.276, "eval_ag_news_n_ngrams_match_3": 0.982, "eval_ag_news_num_pred_words": 46.828, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.26363068625445, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35707282301627, "eval_ag_news_runtime": 26.7904, "eval_ag_news_samples_per_second": 18.663, "eval_ag_news_steps_per_second": 0.037, "eval_ag_news_token_set_f1": 0.3575342032398384, "eval_ag_news_token_set_f1_sem": 0.004328800450529145, "eval_ag_news_token_set_precision": 0.3437397156310016, "eval_ag_news_token_set_recall": 0.38607168588250695, "eval_ag_news_true_num_tokens": 56.09375, "step": 212500 }, { "epoch": 40.8, "eval_anthropic_toxic_prompts_accuracy": 0.11584375, "eval_anthropic_toxic_prompts_bleu_score": 3.19271293440135, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12423840186488055, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6878588199615479, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00838356068765902, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1866867542266846, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.252, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.962, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, "eval_anthropic_toxic_prompts_num_pred_words": 46.862, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.208087080706633, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21844141793597383, "eval_anthropic_toxic_prompts_runtime": 11.4823, "eval_anthropic_toxic_prompts_samples_per_second": 43.545, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.3653211047980055, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006858512618122479, "eval_anthropic_toxic_prompts_token_set_precision": 0.4382184239518757, "eval_anthropic_toxic_prompts_token_set_recall": 0.34065660787244145, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 212500 }, { "epoch": 40.8, "eval_arxiv_accuracy": 0.35275, "eval_arxiv_bleu_score": 4.513127925483866, "eval_arxiv_bleu_score_sem": 0.13612349484737696, "eval_arxiv_emb_cos_sim": 0.7828484773635864, "eval_arxiv_emb_cos_sim_sem": 0.006214240004290033, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3370327949523926, "eval_arxiv_n_ngrams_match_1": 15.622, "eval_arxiv_n_ngrams_match_2": 3.19, "eval_arxiv_n_ngrams_match_3": 0.702, "eval_arxiv_num_pred_words": 40.366, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.1355188723438, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3742057430507527, "eval_arxiv_runtime": 11.7402, "eval_arxiv_samples_per_second": 42.589, "eval_arxiv_steps_per_second": 0.085, "eval_arxiv_token_set_f1": 0.3688973608504254, "eval_arxiv_token_set_f1_sem": 0.00418976189933825, "eval_arxiv_token_set_precision": 0.31977340586560815, "eval_arxiv_token_set_recall": 0.457249922942953, "eval_arxiv_true_num_tokens": 64.0, "step": 212500 }, { "epoch": 40.8, "eval_python_code_alpaca_accuracy": 0.16359375, "eval_python_code_alpaca_bleu_score": 4.545406947168154, "eval_python_code_alpaca_bleu_score_sem": 0.1547740315439224, "eval_python_code_alpaca_emb_cos_sim": 0.7672972083091736, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0075867962485293315, "eval_python_code_alpaca_emb_top1_equal": 0.09375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.025864720141013958, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.838118076324463, "eval_python_code_alpaca_n_ngrams_match_1": 9.884, "eval_python_code_alpaca_n_ngrams_match_2": 2.832, "eval_python_code_alpaca_n_ngrams_match_3": 0.968, "eval_python_code_alpaca_num_pred_words": 44.42, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.083585262664368, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3330134344414717, "eval_python_code_alpaca_runtime": 32.4593, "eval_python_code_alpaca_samples_per_second": 15.404, "eval_python_code_alpaca_steps_per_second": 0.031, "eval_python_code_alpaca_token_set_f1": 0.4852851572535872, "eval_python_code_alpaca_token_set_f1_sem": 0.0054490215000587185, "eval_python_code_alpaca_token_set_precision": 0.5401105876679381, "eval_python_code_alpaca_token_set_recall": 0.4626911120535161, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 212500 }, { "epoch": 40.8, "eval_wikibio_accuracy": 0.3296875, "eval_wikibio_bleu_score": 6.147854636696207, "eval_wikibio_bleu_score_sem": 0.2178778092055402, "eval_wikibio_emb_cos_sim": 0.7475197315216064, "eval_wikibio_emb_cos_sim_sem": 0.009752180811374106, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6456449031829834, "eval_wikibio_n_ngrams_match_1": 9.974, "eval_wikibio_n_ngrams_match_2": 3.418, "eval_wikibio_n_ngrams_match_3": 1.256, "eval_wikibio_num_pred_words": 35.402, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.30746949623982, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35711404812557335, "eval_wikibio_runtime": 21.0921, "eval_wikibio_samples_per_second": 23.706, "eval_wikibio_steps_per_second": 0.047, "eval_wikibio_token_set_f1": 0.32296697502696603, "eval_wikibio_token_set_f1_sem": 0.00566851255044716, "eval_wikibio_token_set_precision": 0.3255429715328488, "eval_wikibio_token_set_recall": 0.33631945041421923, "eval_wikibio_true_num_tokens": 61.1328125, "step": 212500 }, { "epoch": 40.8, "eval_nq_accuracy": 0.53625, "eval_nq_bleu_score": 12.112637320579347, "eval_nq_bleu_score_sem": 0.49196328655898786, "eval_nq_emb_cos_sim": 0.8384393453598022, "eval_nq_emb_cos_sim_sem": 0.007336762045371977, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1238081455230713, "eval_nq_n_ngrams_match_1": 23.568, "eval_nq_n_ngrams_match_2": 8.814, "eval_nq_n_ngrams_match_3": 4.056, "eval_nq_num_pred_words": 49.302, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.362924157331262, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4588801477255383, "eval_nq_runtime": 11.7801, "eval_nq_samples_per_second": 42.445, "eval_nq_steps_per_second": 0.085, "eval_nq_token_set_f1": 0.47183306649742235, "eval_nq_token_set_f1_sem": 0.0048326883239400275, "eval_nq_token_set_precision": 0.4294888847624133, "eval_nq_token_set_recall": 0.5321096533546643, "eval_nq_true_num_tokens": 64.0, "step": 212500 }, { "epoch": 40.8, "learning_rate": 0.001, "loss": 2.5024, "step": 212508 }, { "epoch": 40.81, "learning_rate": 0.001, "loss": 2.4855, "step": 212520 }, { "epoch": 40.81, "learning_rate": 0.001, "loss": 2.502, "step": 212532 }, { "epoch": 40.81, "learning_rate": 0.001, "loss": 2.4964, "step": 212544 }, { "epoch": 40.81, "learning_rate": 0.001, "loss": 2.497, "step": 212556 }, { "epoch": 40.82, "learning_rate": 0.001, "loss": 2.5005, "step": 212568 }, { "epoch": 40.82, "learning_rate": 0.001, "loss": 2.4897, "step": 212580 }, { "epoch": 40.82, "learning_rate": 0.001, "loss": 2.4962, "step": 212592 }, { "epoch": 40.82, "learning_rate": 0.001, "loss": 2.5058, "step": 212604 }, { "epoch": 40.82, "learning_rate": 0.001, "loss": 2.5049, "step": 212616 }, { "epoch": 40.83, "learning_rate": 0.001, "loss": 2.4935, "step": 212628 }, { "epoch": 40.83, "learning_rate": 0.001, "loss": 2.505, "step": 212640 }, { "epoch": 40.83, "learning_rate": 0.001, "loss": 2.4965, "step": 212652 }, { "epoch": 40.83, "learning_rate": 0.001, "loss": 2.5106, "step": 212664 }, { "epoch": 40.84, "learning_rate": 0.001, "loss": 2.4992, "step": 212676 }, { "epoch": 40.84, "learning_rate": 0.001, "loss": 2.494, "step": 212688 }, { "epoch": 40.84, "learning_rate": 0.001, "loss": 2.496, "step": 212700 }, { "epoch": 40.84, "learning_rate": 0.001, "loss": 2.4952, "step": 212712 }, { "epoch": 40.85, "learning_rate": 0.001, "loss": 2.4912, "step": 212724 }, { "epoch": 40.85, "learning_rate": 0.001, "loss": 2.4887, "step": 212736 }, { "epoch": 40.85, "learning_rate": 0.001, "loss": 2.4902, "step": 212748 }, { "epoch": 40.85, "learning_rate": 0.001, "loss": 2.4976, "step": 212760 }, { "epoch": 40.85, "learning_rate": 0.001, "loss": 2.4966, "step": 212772 }, { "epoch": 40.86, "learning_rate": 0.001, "loss": 2.5008, "step": 212784 }, { "epoch": 40.86, "learning_rate": 0.001, "loss": 2.5015, "step": 212796 }, { "epoch": 40.86, "learning_rate": 0.001, "loss": 2.4955, "step": 212808 }, { "epoch": 40.86, "learning_rate": 0.001, "loss": 2.4985, "step": 212820 }, { "epoch": 40.87, "learning_rate": 0.001, "loss": 2.4951, "step": 212832 }, { "epoch": 40.87, "learning_rate": 0.001, "loss": 2.4967, "step": 212844 }, { "epoch": 40.87, "learning_rate": 0.001, "loss": 2.4977, "step": 212856 }, { "epoch": 40.87, "learning_rate": 0.001, "loss": 2.5028, "step": 212868 }, { "epoch": 40.88, "learning_rate": 0.001, "loss": 2.5021, "step": 212880 }, { "epoch": 40.88, "learning_rate": 0.001, "loss": 2.5063, "step": 212892 }, { "epoch": 40.88, "learning_rate": 0.001, "loss": 2.4876, "step": 212904 }, { "epoch": 40.88, "learning_rate": 0.001, "loss": 2.5101, "step": 212916 }, { "epoch": 40.88, "learning_rate": 0.001, "loss": 2.5051, "step": 212928 }, { "epoch": 40.89, "learning_rate": 0.001, "loss": 2.5, "step": 212940 }, { "epoch": 40.89, "learning_rate": 0.001, "loss": 2.4999, "step": 212952 }, { "epoch": 40.89, "learning_rate": 0.001, "loss": 2.4971, "step": 212964 }, { "epoch": 40.89, "learning_rate": 0.001, "loss": 2.5031, "step": 212976 }, { "epoch": 40.9, "learning_rate": 0.001, "loss": 2.4901, "step": 212988 }, { "epoch": 40.9, "learning_rate": 0.001, "loss": 2.5052, "step": 213000 }, { "epoch": 40.9, "learning_rate": 0.001, "loss": 2.5029, "step": 213012 }, { "epoch": 40.9, "learning_rate": 0.001, "loss": 2.4926, "step": 213024 }, { "epoch": 40.91, "learning_rate": 0.001, "loss": 2.4972, "step": 213036 }, { "epoch": 40.91, "learning_rate": 0.001, "loss": 2.5053, "step": 213048 }, { "epoch": 40.91, "learning_rate": 0.001, "loss": 2.5054, "step": 213060 }, { "epoch": 40.91, "learning_rate": 0.001, "loss": 2.5007, "step": 213072 }, { "epoch": 40.91, "learning_rate": 0.001, "loss": 2.5036, "step": 213084 }, { "epoch": 40.92, "learning_rate": 0.001, "loss": 2.5072, "step": 213096 }, { "epoch": 40.92, "learning_rate": 0.001, "loss": 2.5095, "step": 213108 }, { "epoch": 40.92, "learning_rate": 0.001, "loss": 2.4957, "step": 213120 }, { "epoch": 40.92, "eval_ag_news_accuracy": 0.3271875, "eval_ag_news_bleu_score": 5.169108115754358, "eval_ag_news_bleu_score_sem": 0.16503923262174292, "eval_ag_news_emb_cos_sim": 0.8210017681121826, "eval_ag_news_emb_cos_sim_sem": 0.007500360964767584, "eval_ag_news_emb_top1_equal": 0.21875, "eval_ag_news_emb_top1_equal_sem": 0.03668319712192295, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4736239910125732, "eval_ag_news_n_ngrams_match_1": 14.552, "eval_ag_news_n_ngrams_match_2": 3.316, "eval_ag_news_n_ngrams_match_3": 1.028, "eval_ag_news_num_pred_words": 46.538, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.25341699661642, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3636627801571863, "eval_ag_news_runtime": 15.3268, "eval_ag_news_samples_per_second": 32.623, "eval_ag_news_steps_per_second": 0.065, "eval_ag_news_token_set_f1": 0.361360187616445, "eval_ag_news_token_set_f1_sem": 0.00446827457414915, "eval_ag_news_token_set_precision": 0.34878617505293585, "eval_ag_news_token_set_recall": 0.38898972593450143, "eval_ag_news_true_num_tokens": 56.09375, "step": 213125 }, { "epoch": 40.92, "eval_anthropic_toxic_prompts_accuracy": 0.11546875, "eval_anthropic_toxic_prompts_bleu_score": 3.2853693684322023, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12805168652778529, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6821027994155884, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009127290561927336, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.219939947128296, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.272, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.966, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764, "eval_anthropic_toxic_prompts_num_pred_words": 46.388, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.02661721597671, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22074704688960617, "eval_anthropic_toxic_prompts_runtime": 32.4558, "eval_anthropic_toxic_prompts_samples_per_second": 15.406, "eval_anthropic_toxic_prompts_steps_per_second": 0.031, "eval_anthropic_toxic_prompts_token_set_f1": 0.35853575371725, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006844027464354096, "eval_anthropic_toxic_prompts_token_set_precision": 0.43916205786315576, "eval_anthropic_toxic_prompts_token_set_recall": 0.3275761487719574, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 213125 }, { "epoch": 40.92, "eval_arxiv_accuracy": 0.35296875, "eval_arxiv_bleu_score": 4.548372064096796, "eval_arxiv_bleu_score_sem": 0.13928834346354316, "eval_arxiv_emb_cos_sim": 0.7800613641738892, "eval_arxiv_emb_cos_sim_sem": 0.0066088161658709905, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3394787311553955, "eval_arxiv_n_ngrams_match_1": 15.768, "eval_arxiv_n_ngrams_match_2": 3.052, "eval_arxiv_n_ngrams_match_3": 0.704, "eval_arxiv_num_pred_words": 40.804, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.204420787036668, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3755981361737335, "eval_arxiv_runtime": 28.3775, "eval_arxiv_samples_per_second": 17.62, "eval_arxiv_steps_per_second": 0.035, "eval_arxiv_token_set_f1": 0.36887950641780737, "eval_arxiv_token_set_f1_sem": 0.0042013310633415, "eval_arxiv_token_set_precision": 0.32464162611819186, "eval_arxiv_token_set_recall": 0.44361441413087405, "eval_arxiv_true_num_tokens": 64.0, "step": 213125 }, { "epoch": 40.92, "eval_python_code_alpaca_accuracy": 0.16378125, "eval_python_code_alpaca_bleu_score": 4.705942055721906, "eval_python_code_alpaca_bleu_score_sem": 0.1418070028514918, "eval_python_code_alpaca_emb_cos_sim": 0.7664552927017212, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007452085042520895, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8642737865448, "eval_python_code_alpaca_n_ngrams_match_1": 9.968, "eval_python_code_alpaca_n_ngrams_match_2": 2.934, "eval_python_code_alpaca_n_ngrams_match_3": 1.006, "eval_python_code_alpaca_num_pred_words": 42.916, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.536313472465732, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34459987645758366, "eval_python_code_alpaca_runtime": 12.2157, "eval_python_code_alpaca_samples_per_second": 40.931, "eval_python_code_alpaca_steps_per_second": 0.082, "eval_python_code_alpaca_token_set_f1": 0.48284114138059797, "eval_python_code_alpaca_token_set_f1_sem": 0.005379991978509389, "eval_python_code_alpaca_token_set_precision": 0.542330542676281, "eval_python_code_alpaca_token_set_recall": 0.4588993503726998, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 213125 }, { "epoch": 40.92, "eval_wikibio_accuracy": 0.3299375, "eval_wikibio_bleu_score": 6.295265732135231, "eval_wikibio_bleu_score_sem": 0.2197365531556376, "eval_wikibio_emb_cos_sim": 0.7429490089416504, "eval_wikibio_emb_cos_sim_sem": 0.008816129598994516, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.620128870010376, "eval_wikibio_n_ngrams_match_1": 10.27, "eval_wikibio_n_ngrams_match_2": 3.51, "eval_wikibio_n_ngrams_match_3": 1.348, "eval_wikibio_num_pred_words": 36.102, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.34237982486106, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36273745766227117, "eval_wikibio_runtime": 28.0698, "eval_wikibio_samples_per_second": 17.813, "eval_wikibio_steps_per_second": 0.036, "eval_wikibio_token_set_f1": 0.32618856732670787, "eval_wikibio_token_set_f1_sem": 0.005601880546756807, "eval_wikibio_token_set_precision": 0.33269052837779683, "eval_wikibio_token_set_recall": 0.33711766931600834, "eval_wikibio_true_num_tokens": 61.1328125, "step": 213125 }, { "epoch": 40.92, "eval_nq_accuracy": 0.537375, "eval_nq_bleu_score": 12.43024418502401, "eval_nq_bleu_score_sem": 0.5122337884247374, "eval_nq_emb_cos_sim": 0.8392021656036377, "eval_nq_emb_cos_sim_sem": 0.0067571704384513055, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1223866939544678, "eval_nq_n_ngrams_match_1": 23.656, "eval_nq_n_ngrams_match_2": 8.896, "eval_nq_n_ngrams_match_3": 4.224, "eval_nq_num_pred_words": 49.348, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.351045110414807, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4572380599396225, "eval_nq_runtime": 33.0366, "eval_nq_samples_per_second": 15.135, "eval_nq_steps_per_second": 0.03, "eval_nq_token_set_f1": 0.4705994427112349, "eval_nq_token_set_f1_sem": 0.005034228319134849, "eval_nq_token_set_precision": 0.43036213735376244, "eval_nq_token_set_recall": 0.5268105817699161, "eval_nq_true_num_tokens": 64.0, "step": 213125 }, { "epoch": 40.92, "learning_rate": 0.001, "loss": 2.5095, "step": 213132 }, { "epoch": 40.93, "learning_rate": 0.001, "loss": 2.486, "step": 213144 }, { "epoch": 40.93, "learning_rate": 0.001, "loss": 2.505, "step": 213156 }, { "epoch": 40.93, "learning_rate": 0.001, "loss": 2.4973, "step": 213168 }, { "epoch": 40.93, "learning_rate": 0.001, "loss": 2.4981, "step": 213180 }, { "epoch": 40.94, "learning_rate": 0.001, "loss": 2.4981, "step": 213192 }, { "epoch": 40.94, "learning_rate": 0.001, "loss": 2.4916, "step": 213204 }, { "epoch": 40.94, "learning_rate": 0.001, "loss": 2.5043, "step": 213216 }, { "epoch": 40.94, "learning_rate": 0.001, "loss": 2.4997, "step": 213228 }, { "epoch": 40.94, "learning_rate": 0.001, "loss": 2.4959, "step": 213240 }, { "epoch": 40.95, "learning_rate": 0.001, "loss": 2.495, "step": 213252 }, { "epoch": 40.95, "learning_rate": 0.001, "loss": 2.4889, "step": 213264 }, { "epoch": 40.95, "learning_rate": 0.001, "loss": 2.4981, "step": 213276 }, { "epoch": 40.95, "learning_rate": 0.001, "loss": 2.4916, "step": 213288 }, { "epoch": 40.96, "learning_rate": 0.001, "loss": 2.4957, "step": 213300 }, { "epoch": 40.96, "learning_rate": 0.001, "loss": 2.4963, "step": 213312 }, { "epoch": 40.96, "learning_rate": 0.001, "loss": 2.499, "step": 213324 }, { "epoch": 40.96, "learning_rate": 0.001, "loss": 2.5075, "step": 213336 }, { "epoch": 40.97, "learning_rate": 0.001, "loss": 2.5006, "step": 213348 }, { "epoch": 40.97, "learning_rate": 0.001, "loss": 2.5003, "step": 213360 }, { "epoch": 40.97, "learning_rate": 0.001, "loss": 2.5009, "step": 213372 }, { "epoch": 40.97, "learning_rate": 0.001, "loss": 2.489, "step": 213384 }, { "epoch": 40.97, "learning_rate": 0.001, "loss": 2.5017, "step": 213396 }, { "epoch": 40.98, "learning_rate": 0.001, "loss": 2.499, "step": 213408 }, { "epoch": 40.98, "learning_rate": 0.001, "loss": 2.4959, "step": 213420 }, { "epoch": 40.98, "learning_rate": 0.001, "loss": 2.5032, "step": 213432 }, { "epoch": 40.98, "learning_rate": 0.001, "loss": 2.4935, "step": 213444 }, { "epoch": 40.99, "learning_rate": 0.001, "loss": 2.5032, "step": 213456 }, { "epoch": 40.99, "learning_rate": 0.001, "loss": 2.4987, "step": 213468 }, { "epoch": 40.99, "learning_rate": 0.001, "loss": 2.4983, "step": 213480 }, { "epoch": 40.99, "learning_rate": 0.001, "loss": 2.5033, "step": 213492 }, { "epoch": 41.0, "learning_rate": 0.001, "loss": 2.4971, "step": 213504 }, { "epoch": 41.0, "learning_rate": 0.001, "loss": 2.5026, "step": 213516 }, { "epoch": 41.0, "learning_rate": 0.001, "loss": 2.5, "step": 213528 }, { "epoch": 41.0, "learning_rate": 0.001, "loss": 2.4786, "step": 213540 }, { "epoch": 41.0, "learning_rate": 0.001, "loss": 2.4699, "step": 213552 }, { "epoch": 41.01, "learning_rate": 0.001, "loss": 2.4927, "step": 213564 }, { "epoch": 41.01, "learning_rate": 0.001, "loss": 2.4773, "step": 213576 }, { "epoch": 41.01, "learning_rate": 0.001, "loss": 2.4786, "step": 213588 }, { "epoch": 41.01, "learning_rate": 0.001, "loss": 2.4773, "step": 213600 }, { "epoch": 41.02, "learning_rate": 0.001, "loss": 2.4748, "step": 213612 }, { "epoch": 41.02, "learning_rate": 0.001, "loss": 2.4832, "step": 213624 }, { "epoch": 41.02, "learning_rate": 0.001, "loss": 2.4879, "step": 213636 }, { "epoch": 41.02, "learning_rate": 0.001, "loss": 2.4792, "step": 213648 }, { "epoch": 41.03, "learning_rate": 0.001, "loss": 2.4926, "step": 213660 }, { "epoch": 41.03, "learning_rate": 0.001, "loss": 2.4888, "step": 213672 }, { "epoch": 41.03, "learning_rate": 0.001, "loss": 2.4781, "step": 213684 }, { "epoch": 41.03, "learning_rate": 0.001, "loss": 2.4839, "step": 213696 }, { "epoch": 41.03, "learning_rate": 0.001, "loss": 2.4831, "step": 213708 }, { "epoch": 41.04, "learning_rate": 0.001, "loss": 2.4796, "step": 213720 }, { "epoch": 41.04, "learning_rate": 0.001, "loss": 2.4789, "step": 213732 }, { "epoch": 41.04, "learning_rate": 0.001, "loss": 2.4845, "step": 213744 }, { "epoch": 41.04, "eval_ag_news_accuracy": 0.33009375, "eval_ag_news_bleu_score": 5.021169210371341, "eval_ag_news_bleu_score_sem": 0.15638525229831882, "eval_ag_news_emb_cos_sim": 0.8251120448112488, "eval_ag_news_emb_cos_sim_sem": 0.0059141320523791955, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.470844030380249, "eval_ag_news_n_ngrams_match_1": 14.6, "eval_ag_news_n_ngrams_match_2": 3.308, "eval_ag_news_n_ngrams_match_3": 0.956, "eval_ag_news_num_pred_words": 47.136, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.163878281823266, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36112328265591154, "eval_ag_news_runtime": 28.624, "eval_ag_news_samples_per_second": 17.468, "eval_ag_news_steps_per_second": 0.035, "eval_ag_news_token_set_f1": 0.36095395962152366, "eval_ag_news_token_set_f1_sem": 0.004518493952908497, "eval_ag_news_token_set_precision": 0.34960525361713585, "eval_ag_news_token_set_recall": 0.3858555758987579, "eval_ag_news_true_num_tokens": 56.09375, "step": 213750 }, { "epoch": 41.04, "eval_anthropic_toxic_prompts_accuracy": 0.1164375, "eval_anthropic_toxic_prompts_bleu_score": 3.2782936240360314, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12083623366515364, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6817408800125122, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009024195419202642, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1967227458953857, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778, "eval_anthropic_toxic_prompts_num_pred_words": 47.138, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.45226246254783, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2198347171276814, "eval_anthropic_toxic_prompts_runtime": 29.821, "eval_anthropic_toxic_prompts_samples_per_second": 16.767, "eval_anthropic_toxic_prompts_steps_per_second": 0.034, "eval_anthropic_toxic_prompts_token_set_f1": 0.3570814592866485, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006393325882212609, "eval_anthropic_toxic_prompts_token_set_precision": 0.44961072120712753, "eval_anthropic_toxic_prompts_token_set_recall": 0.32034972562121095, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 213750 }, { "epoch": 41.04, "eval_arxiv_accuracy": 0.35471875, "eval_arxiv_bleu_score": 4.395262377424749, "eval_arxiv_bleu_score_sem": 0.12285051178584978, "eval_arxiv_emb_cos_sim": 0.7829989790916443, "eval_arxiv_emb_cos_sim_sem": 0.007437658307984517, "eval_arxiv_emb_top1_equal": 0.34375, "eval_arxiv_emb_top1_equal_sem": 0.04214578430296913, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.333362579345703, "eval_arxiv_n_ngrams_match_1": 15.882, "eval_arxiv_n_ngrams_match_2": 3.064, "eval_arxiv_n_ngrams_match_3": 0.628, "eval_arxiv_num_pred_words": 41.092, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.032444719762797, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3798240210446542, "eval_arxiv_runtime": 28.8102, "eval_arxiv_samples_per_second": 17.355, "eval_arxiv_steps_per_second": 0.035, "eval_arxiv_token_set_f1": 0.36853587317567066, "eval_arxiv_token_set_f1_sem": 0.004078921244420197, "eval_arxiv_token_set_precision": 0.3225207895017115, "eval_arxiv_token_set_recall": 0.4442408650522827, "eval_arxiv_true_num_tokens": 64.0, "step": 213750 }, { "epoch": 41.04, "eval_python_code_alpaca_accuracy": 0.16084375, "eval_python_code_alpaca_bleu_score": 4.601435887369982, "eval_python_code_alpaca_bleu_score_sem": 0.13867339875978243, "eval_python_code_alpaca_emb_cos_sim": 0.7690881490707397, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007758508436263286, "eval_python_code_alpaca_emb_top1_equal": 0.1953125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8940842151641846, "eval_python_code_alpaca_n_ngrams_match_1": 9.992, "eval_python_code_alpaca_n_ngrams_match_2": 2.936, "eval_python_code_alpaca_n_ngrams_match_3": 0.948, "eval_python_code_alpaca_num_pred_words": 43.204, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 18.066948425516664, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34104713195191094, "eval_python_code_alpaca_runtime": 32.3754, "eval_python_code_alpaca_samples_per_second": 15.444, "eval_python_code_alpaca_steps_per_second": 0.031, "eval_python_code_alpaca_token_set_f1": 0.4810809371042541, "eval_python_code_alpaca_token_set_f1_sem": 0.005493721279410755, "eval_python_code_alpaca_token_set_precision": 0.5410872989249331, "eval_python_code_alpaca_token_set_recall": 0.45792149389525905, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 213750 }, { "epoch": 41.04, "eval_wikibio_accuracy": 0.3305625, "eval_wikibio_bleu_score": 6.193185836495677, "eval_wikibio_bleu_score_sem": 0.2152254404163372, "eval_wikibio_emb_cos_sim": 0.7507250905036926, "eval_wikibio_emb_cos_sim_sem": 0.008622931079598611, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6053049564361572, "eval_wikibio_n_ngrams_match_1": 10.338, "eval_wikibio_n_ngrams_match_2": 3.484, "eval_wikibio_n_ngrams_match_3": 1.318, "eval_wikibio_num_pred_words": 36.028, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.7929023789624, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3664291891461958, "eval_wikibio_runtime": 23.4455, "eval_wikibio_samples_per_second": 21.326, "eval_wikibio_steps_per_second": 0.043, "eval_wikibio_token_set_f1": 0.3289246859551673, "eval_wikibio_token_set_f1_sem": 0.005232198835774688, "eval_wikibio_token_set_precision": 0.337068872071351, "eval_wikibio_token_set_recall": 0.3361057086222319, "eval_wikibio_true_num_tokens": 61.1328125, "step": 213750 }, { "epoch": 41.04, "eval_nq_accuracy": 0.5374375, "eval_nq_bleu_score": 12.379697384449983, "eval_nq_bleu_score_sem": 0.49119424103565773, "eval_nq_emb_cos_sim": 0.8429065942764282, "eval_nq_emb_cos_sim_sem": 0.006428461898558779, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.113908290863037, "eval_nq_n_ngrams_match_1": 23.766, "eval_nq_n_ngrams_match_2": 8.868, "eval_nq_n_ngrams_match_3": 4.178, "eval_nq_num_pred_words": 49.446, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.28054088768326, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4611481037840349, "eval_nq_runtime": 31.6604, "eval_nq_samples_per_second": 15.793, "eval_nq_steps_per_second": 0.032, "eval_nq_token_set_f1": 0.4730361680173183, "eval_nq_token_set_f1_sem": 0.005038475836783473, "eval_nq_token_set_precision": 0.4335635735805142, "eval_nq_token_set_recall": 0.5272752864067703, "eval_nq_true_num_tokens": 64.0, "step": 213750 }, { "epoch": 41.04, "learning_rate": 0.001, "loss": 2.4884, "step": 213756 }, { "epoch": 41.05, "learning_rate": 0.001, "loss": 2.4833, "step": 213768 }, { "epoch": 41.05, "learning_rate": 0.001, "loss": 2.4934, "step": 213780 }, { "epoch": 41.05, "learning_rate": 0.001, "loss": 2.4945, "step": 213792 }, { "epoch": 41.05, "learning_rate": 0.001, "loss": 2.4871, "step": 213804 }, { "epoch": 41.06, "learning_rate": 0.001, "loss": 2.4818, "step": 213816 }, { "epoch": 41.06, "learning_rate": 0.001, "loss": 2.4852, "step": 213828 }, { "epoch": 41.06, "learning_rate": 0.001, "loss": 2.4806, "step": 213840 }, { "epoch": 41.06, "learning_rate": 0.001, "loss": 2.4818, "step": 213852 }, { "epoch": 41.06, "learning_rate": 0.001, "loss": 2.4854, "step": 213864 }, { "epoch": 41.07, "learning_rate": 0.001, "loss": 2.4884, "step": 213876 }, { "epoch": 41.07, "learning_rate": 0.001, "loss": 2.4881, "step": 213888 }, { "epoch": 41.07, "learning_rate": 0.001, "loss": 2.4837, "step": 213900 }, { "epoch": 41.07, "learning_rate": 0.001, "loss": 2.4941, "step": 213912 }, { "epoch": 41.08, "learning_rate": 0.001, "loss": 2.4933, "step": 213924 }, { "epoch": 41.08, "learning_rate": 0.001, "loss": 2.4844, "step": 213936 }, { "epoch": 41.08, "learning_rate": 0.001, "loss": 2.4826, "step": 213948 }, { "epoch": 41.08, "learning_rate": 0.001, "loss": 2.4823, "step": 213960 }, { "epoch": 41.09, "learning_rate": 0.001, "loss": 2.4817, "step": 213972 }, { "epoch": 41.09, "learning_rate": 0.001, "loss": 2.4832, "step": 213984 }, { "epoch": 41.09, "learning_rate": 0.001, "loss": 2.4915, "step": 213996 }, { "epoch": 41.09, "learning_rate": 0.001, "loss": 2.4804, "step": 214008 }, { "epoch": 41.09, "learning_rate": 0.001, "loss": 2.4854, "step": 214020 }, { "epoch": 41.1, "learning_rate": 0.001, "loss": 2.4814, "step": 214032 }, { "epoch": 41.1, "learning_rate": 0.001, "loss": 2.4821, "step": 214044 }, { "epoch": 41.1, "learning_rate": 0.001, "loss": 2.4818, "step": 214056 }, { "epoch": 41.1, "learning_rate": 0.001, "loss": 2.4817, "step": 214068 }, { "epoch": 41.11, "learning_rate": 0.001, "loss": 2.4862, "step": 214080 }, { "epoch": 41.11, "learning_rate": 0.001, "loss": 2.488, "step": 214092 }, { "epoch": 41.11, "learning_rate": 0.001, "loss": 2.4796, "step": 214104 }, { "epoch": 41.11, "learning_rate": 0.001, "loss": 2.4825, "step": 214116 }, { "epoch": 41.12, "learning_rate": 0.001, "loss": 2.4911, "step": 214128 }, { "epoch": 41.12, "learning_rate": 0.001, "loss": 2.4936, "step": 214140 }, { "epoch": 41.12, "learning_rate": 0.001, "loss": 2.4788, "step": 214152 }, { "epoch": 41.12, "learning_rate": 0.001, "loss": 2.4853, "step": 214164 }, { "epoch": 41.12, "learning_rate": 0.001, "loss": 2.4811, "step": 214176 }, { "epoch": 41.13, "learning_rate": 0.001, "loss": 2.4826, "step": 214188 }, { "epoch": 41.13, "learning_rate": 0.001, "loss": 2.4925, "step": 214200 }, { "epoch": 41.13, "learning_rate": 0.001, "loss": 2.489, "step": 214212 }, { "epoch": 41.13, "learning_rate": 0.001, "loss": 2.4808, "step": 214224 }, { "epoch": 41.14, "learning_rate": 0.001, "loss": 2.4916, "step": 214236 }, { "epoch": 41.14, "learning_rate": 0.001, "loss": 2.4872, "step": 214248 }, { "epoch": 41.14, "learning_rate": 0.001, "loss": 2.4914, "step": 214260 }, { "epoch": 41.14, "learning_rate": 0.001, "loss": 2.4915, "step": 214272 }, { "epoch": 41.15, "learning_rate": 0.001, "loss": 2.4862, "step": 214284 }, { "epoch": 41.15, "learning_rate": 0.001, "loss": 2.48, "step": 214296 }, { "epoch": 41.15, "learning_rate": 0.001, "loss": 2.484, "step": 214308 }, { "epoch": 41.15, "learning_rate": 0.001, "loss": 2.4872, "step": 214320 }, { "epoch": 41.15, "learning_rate": 0.001, "loss": 2.4854, "step": 214332 }, { "epoch": 41.16, "learning_rate": 0.001, "loss": 2.485, "step": 214344 }, { "epoch": 41.16, "learning_rate": 0.001, "loss": 2.4852, "step": 214356 }, { "epoch": 41.16, "learning_rate": 0.001, "loss": 2.4856, "step": 214368 }, { "epoch": 41.16, "eval_ag_news_accuracy": 0.32984375, "eval_ag_news_bleu_score": 5.0500648045856416, "eval_ag_news_bleu_score_sem": 0.15145892087035598, "eval_ag_news_emb_cos_sim": 0.8266647458076477, "eval_ag_news_emb_cos_sim_sem": 0.006768569843884487, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4688570499420166, "eval_ag_news_n_ngrams_match_1": 14.59, "eval_ag_news_n_ngrams_match_2": 3.314, "eval_ag_news_n_ngrams_match_3": 0.962, "eval_ag_news_num_pred_words": 46.63, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.10003273579078, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3645330028525393, "eval_ag_news_runtime": 14.5066, "eval_ag_news_samples_per_second": 34.467, "eval_ag_news_steps_per_second": 0.069, "eval_ag_news_token_set_f1": 0.36426206718323795, "eval_ag_news_token_set_f1_sem": 0.004448607279050028, "eval_ag_news_token_set_precision": 0.35096366226675657, "eval_ag_news_token_set_recall": 0.3927617778315918, "eval_ag_news_true_num_tokens": 56.09375, "step": 214375 }, { "epoch": 41.16, "eval_anthropic_toxic_prompts_accuracy": 0.11621875, "eval_anthropic_toxic_prompts_bleu_score": 3.1590549159351045, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11324066800428384, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6883586049079895, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008477277498045532, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1922554969787598, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.998, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.72, "eval_anthropic_toxic_prompts_num_pred_words": 47.438, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.343271745139432, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21667823639138195, "eval_anthropic_toxic_prompts_runtime": 16.8899, "eval_anthropic_toxic_prompts_samples_per_second": 29.603, "eval_anthropic_toxic_prompts_steps_per_second": 0.059, "eval_anthropic_toxic_prompts_token_set_f1": 0.3609796755322638, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0062846575196215115, "eval_anthropic_toxic_prompts_token_set_precision": 0.44631097934195413, "eval_anthropic_toxic_prompts_token_set_recall": 0.3291285110048185, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 214375 }, { "epoch": 41.16, "eval_arxiv_accuracy": 0.35440625, "eval_arxiv_bleu_score": 4.517363137791126, "eval_arxiv_bleu_score_sem": 0.14084658776327824, "eval_arxiv_emb_cos_sim": 0.7761874794960022, "eval_arxiv_emb_cos_sim_sem": 0.007372744258743644, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3308255672454834, "eval_arxiv_n_ngrams_match_1": 15.536, "eval_arxiv_n_ngrams_match_2": 3.086, "eval_arxiv_n_ngrams_match_3": 0.706, "eval_arxiv_num_pred_words": 39.976, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.961416206506318, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3733102903340708, "eval_arxiv_runtime": 18.8546, "eval_arxiv_samples_per_second": 26.519, "eval_arxiv_steps_per_second": 0.053, "eval_arxiv_token_set_f1": 0.36435647260772625, "eval_arxiv_token_set_f1_sem": 0.004353442177504389, "eval_arxiv_token_set_precision": 0.31615323023331926, "eval_arxiv_token_set_recall": 0.4490499772382298, "eval_arxiv_true_num_tokens": 64.0, "step": 214375 }, { "epoch": 41.16, "eval_python_code_alpaca_accuracy": 0.16225, "eval_python_code_alpaca_bleu_score": 4.7667569386599205, "eval_python_code_alpaca_bleu_score_sem": 0.15668912231222806, "eval_python_code_alpaca_emb_cos_sim": 0.760850191116333, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008738534616707929, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.859790802001953, "eval_python_code_alpaca_n_ngrams_match_1": 9.938, "eval_python_code_alpaca_n_ngrams_match_2": 3.014, "eval_python_code_alpaca_n_ngrams_match_3": 1.044, "eval_python_code_alpaca_num_pred_words": 43.26, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.45787440216678, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33944286564659687, "eval_python_code_alpaca_runtime": 28.3846, "eval_python_code_alpaca_samples_per_second": 17.615, "eval_python_code_alpaca_steps_per_second": 0.035, "eval_python_code_alpaca_token_set_f1": 0.4826520430671495, "eval_python_code_alpaca_token_set_f1_sem": 0.005614066338067491, "eval_python_code_alpaca_token_set_precision": 0.543396768205565, "eval_python_code_alpaca_token_set_recall": 0.4575205354507373, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 214375 }, { "epoch": 41.16, "eval_wikibio_accuracy": 0.32696875, "eval_wikibio_bleu_score": 6.3706395457308505, "eval_wikibio_bleu_score_sem": 0.2218275256674258, "eval_wikibio_emb_cos_sim": 0.7428708672523499, "eval_wikibio_emb_cos_sim_sem": 0.009724521283494323, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6633644104003906, "eval_wikibio_n_ngrams_match_1": 10.508, "eval_wikibio_n_ngrams_match_2": 3.566, "eval_wikibio_n_ngrams_match_3": 1.326, "eval_wikibio_num_pred_words": 36.516, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.99230856509684, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3714831456641676, "eval_wikibio_runtime": 30.2693, "eval_wikibio_samples_per_second": 16.518, "eval_wikibio_steps_per_second": 0.033, "eval_wikibio_token_set_f1": 0.33239982870428225, "eval_wikibio_token_set_f1_sem": 0.005208510645716343, "eval_wikibio_token_set_precision": 0.3424158931721413, "eval_wikibio_token_set_recall": 0.3388729467404177, "eval_wikibio_true_num_tokens": 61.1328125, "step": 214375 }, { "epoch": 41.16, "eval_nq_accuracy": 0.536125, "eval_nq_bleu_score": 11.95739626544232, "eval_nq_bleu_score_sem": 0.4800209515731253, "eval_nq_emb_cos_sim": 0.8345988392829895, "eval_nq_emb_cos_sim_sem": 0.0072397742444143135, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1194307804107666, "eval_nq_n_ngrams_match_1": 23.534, "eval_nq_n_ngrams_match_2": 8.686, "eval_nq_n_ngrams_match_3": 3.956, "eval_nq_num_pred_words": 48.94, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.326396590462897, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4577215320803478, "eval_nq_runtime": 19.8451, "eval_nq_samples_per_second": 25.195, "eval_nq_steps_per_second": 0.05, "eval_nq_token_set_f1": 0.4708188621334759, "eval_nq_token_set_f1_sem": 0.0049491715082104135, "eval_nq_token_set_precision": 0.429296364658933, "eval_nq_token_set_recall": 0.5289907161663872, "eval_nq_true_num_tokens": 64.0, "step": 214375 }, { "epoch": 41.16, "learning_rate": 0.001, "loss": 2.4881, "step": 214380 }, { "epoch": 41.17, "learning_rate": 0.001, "loss": 2.4882, "step": 214392 }, { "epoch": 41.17, "learning_rate": 0.001, "loss": 2.4927, "step": 214404 }, { "epoch": 41.17, "learning_rate": 0.001, "loss": 2.4843, "step": 214416 }, { "epoch": 41.17, "learning_rate": 0.001, "loss": 2.4956, "step": 214428 }, { "epoch": 41.18, "learning_rate": 0.001, "loss": 2.4861, "step": 214440 }, { "epoch": 41.18, "learning_rate": 0.001, "loss": 2.4861, "step": 214452 }, { "epoch": 41.18, "learning_rate": 0.001, "loss": 2.4906, "step": 214464 }, { "epoch": 41.18, "learning_rate": 0.001, "loss": 2.4928, "step": 214476 }, { "epoch": 41.18, "learning_rate": 0.001, "loss": 2.4807, "step": 214488 }, { "epoch": 41.19, "learning_rate": 0.001, "loss": 2.4943, "step": 214500 }, { "epoch": 41.19, "learning_rate": 0.001, "loss": 2.4871, "step": 214512 }, { "epoch": 41.19, "learning_rate": 0.001, "loss": 2.5022, "step": 214524 }, { "epoch": 41.19, "learning_rate": 0.001, "loss": 2.4959, "step": 214536 }, { "epoch": 41.2, "learning_rate": 0.001, "loss": 2.4906, "step": 214548 }, { "epoch": 41.2, "learning_rate": 0.001, "loss": 2.4894, "step": 214560 }, { "epoch": 41.2, "learning_rate": 0.001, "loss": 2.4936, "step": 214572 }, { "epoch": 41.2, "learning_rate": 0.001, "loss": 2.4886, "step": 214584 }, { "epoch": 41.21, "learning_rate": 0.001, "loss": 2.4827, "step": 214596 }, { "epoch": 41.21, "learning_rate": 0.001, "loss": 2.4926, "step": 214608 }, { "epoch": 41.21, "learning_rate": 0.001, "loss": 2.494, "step": 214620 }, { "epoch": 41.21, "learning_rate": 0.001, "loss": 2.4905, "step": 214632 }, { "epoch": 41.21, "learning_rate": 0.001, "loss": 2.4908, "step": 214644 }, { "epoch": 41.22, "learning_rate": 0.001, "loss": 2.4879, "step": 214656 }, { "epoch": 41.22, "learning_rate": 0.001, "loss": 2.4878, "step": 214668 }, { "epoch": 41.22, "learning_rate": 0.001, "loss": 2.4845, "step": 214680 }, { "epoch": 41.22, "learning_rate": 0.001, "loss": 2.4912, "step": 214692 }, { "epoch": 41.23, "learning_rate": 0.001, "loss": 2.481, "step": 214704 }, { "epoch": 41.23, "learning_rate": 0.001, "loss": 2.4861, "step": 214716 }, { "epoch": 41.23, "learning_rate": 0.001, "loss": 2.4959, "step": 214728 }, { "epoch": 41.23, "learning_rate": 0.001, "loss": 2.4886, "step": 214740 }, { "epoch": 41.24, "learning_rate": 0.001, "loss": 2.4915, "step": 214752 }, { "epoch": 41.24, "learning_rate": 0.001, "loss": 2.4826, "step": 214764 }, { "epoch": 41.24, "learning_rate": 0.001, "loss": 2.4907, "step": 214776 }, { "epoch": 41.24, "learning_rate": 0.001, "loss": 2.4911, "step": 214788 }, { "epoch": 41.24, "learning_rate": 0.001, "loss": 2.4851, "step": 214800 }, { "epoch": 41.25, "learning_rate": 0.001, "loss": 2.4806, "step": 214812 }, { "epoch": 41.25, "learning_rate": 0.001, "loss": 2.4917, "step": 214824 }, { "epoch": 41.25, "learning_rate": 0.001, "loss": 2.4946, "step": 214836 }, { "epoch": 41.25, "learning_rate": 0.001, "loss": 2.4933, "step": 214848 }, { "epoch": 41.26, "learning_rate": 0.001, "loss": 2.4866, "step": 214860 }, { "epoch": 41.26, "learning_rate": 0.001, "loss": 2.4895, "step": 214872 }, { "epoch": 41.26, "learning_rate": 0.001, "loss": 2.4984, "step": 214884 }, { "epoch": 41.26, "learning_rate": 0.001, "loss": 2.4864, "step": 214896 }, { "epoch": 41.26, "learning_rate": 0.001, "loss": 2.4893, "step": 214908 }, { "epoch": 41.27, "learning_rate": 0.001, "loss": 2.4911, "step": 214920 }, { "epoch": 41.27, "learning_rate": 0.001, "loss": 2.4968, "step": 214932 }, { "epoch": 41.27, "learning_rate": 0.001, "loss": 2.4851, "step": 214944 }, { "epoch": 41.27, "learning_rate": 0.001, "loss": 2.4888, "step": 214956 }, { "epoch": 41.28, "learning_rate": 0.001, "loss": 2.483, "step": 214968 }, { "epoch": 41.28, "learning_rate": 0.001, "loss": 2.4931, "step": 214980 }, { "epoch": 41.28, "learning_rate": 0.001, "loss": 2.4943, "step": 214992 }, { "epoch": 41.28, "eval_ag_news_accuracy": 0.3286875, "eval_ag_news_bleu_score": 4.911106548055742, "eval_ag_news_bleu_score_sem": 0.14961079662746388, "eval_ag_news_emb_cos_sim": 0.8296934366226196, "eval_ag_news_emb_cos_sim_sem": 0.0058848492122872366, "eval_ag_news_emb_top1_equal": 0.2890625, "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4643592834472656, "eval_ag_news_n_ngrams_match_1": 14.346, "eval_ag_news_n_ngrams_match_2": 3.224, "eval_ag_news_n_ngrams_match_3": 0.902, "eval_ag_news_num_pred_words": 46.742, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.955978488105877, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3581928938976931, "eval_ag_news_runtime": 15.8479, "eval_ag_news_samples_per_second": 31.55, "eval_ag_news_steps_per_second": 0.063, "eval_ag_news_token_set_f1": 0.3568847740016171, "eval_ag_news_token_set_f1_sem": 0.004431182206229447, "eval_ag_news_token_set_precision": 0.3438032532362278, "eval_ag_news_token_set_recall": 0.3854205918315783, "eval_ag_news_true_num_tokens": 56.09375, "step": 215000 }, { "epoch": 41.28, "eval_anthropic_toxic_prompts_accuracy": 0.1150625, "eval_anthropic_toxic_prompts_bleu_score": 3.0956788601167107, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11819492799749814, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6837218403816223, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009700210454437634, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.20171856880188, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.32, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684, "eval_anthropic_toxic_prompts_num_pred_words": 47.362, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.574727287052745, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21645741784773337, "eval_anthropic_toxic_prompts_runtime": 27.1114, "eval_anthropic_toxic_prompts_samples_per_second": 18.442, "eval_anthropic_toxic_prompts_steps_per_second": 0.037, "eval_anthropic_toxic_prompts_token_set_f1": 0.35735044911598524, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006285558259126718, "eval_anthropic_toxic_prompts_token_set_precision": 0.44404995470070413, "eval_anthropic_toxic_prompts_token_set_recall": 0.3261554018538246, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 215000 }, { "epoch": 41.28, "eval_arxiv_accuracy": 0.353625, "eval_arxiv_bleu_score": 4.539136264421966, "eval_arxiv_bleu_score_sem": 0.12932764220855839, "eval_arxiv_emb_cos_sim": 0.7805837988853455, "eval_arxiv_emb_cos_sim_sem": 0.006615535295485938, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3242313861846924, "eval_arxiv_n_ngrams_match_1": 15.794, "eval_arxiv_n_ngrams_match_2": 3.102, "eval_arxiv_n_ngrams_match_3": 0.7, "eval_arxiv_num_pred_words": 40.87, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.777640157526058, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37585548490655346, "eval_arxiv_runtime": 33.5625, "eval_arxiv_samples_per_second": 14.898, "eval_arxiv_steps_per_second": 0.03, "eval_arxiv_token_set_f1": 0.3692386947731102, "eval_arxiv_token_set_f1_sem": 0.0042843514083066355, "eval_arxiv_token_set_precision": 0.32221066340479737, "eval_arxiv_token_set_recall": 0.447671855288058, "eval_arxiv_true_num_tokens": 64.0, "step": 215000 }, { "epoch": 41.28, "eval_python_code_alpaca_accuracy": 0.164625, "eval_python_code_alpaca_bleu_score": 4.733642276204919, "eval_python_code_alpaca_bleu_score_sem": 0.15469169785408335, "eval_python_code_alpaca_emb_cos_sim": 0.7663385272026062, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007827949320452032, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8512251377105713, "eval_python_code_alpaca_n_ngrams_match_1": 10.014, "eval_python_code_alpaca_n_ngrams_match_2": 2.952, "eval_python_code_alpaca_n_ngrams_match_3": 1.024, "eval_python_code_alpaca_num_pred_words": 43.668, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.308974733487993, "eval_python_code_alpaca_pred_num_tokens": 62.9921875, "eval_python_code_alpaca_rouge_score": 0.3413899149257977, "eval_python_code_alpaca_runtime": 16.8678, "eval_python_code_alpaca_samples_per_second": 29.642, "eval_python_code_alpaca_steps_per_second": 0.059, "eval_python_code_alpaca_token_set_f1": 0.4822155775355044, "eval_python_code_alpaca_token_set_f1_sem": 0.0054798389208696825, "eval_python_code_alpaca_token_set_precision": 0.5454143116803508, "eval_python_code_alpaca_token_set_recall": 0.45402405975938065, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 215000 }, { "epoch": 41.28, "eval_wikibio_accuracy": 0.33246875, "eval_wikibio_bleu_score": 6.354835710628173, "eval_wikibio_bleu_score_sem": 0.2192777670804854, "eval_wikibio_emb_cos_sim": 0.7575595378875732, "eval_wikibio_emb_cos_sim_sem": 0.008641644280337448, "eval_wikibio_emb_top1_equal": 0.265625, "eval_wikibio_emb_top1_equal_sem": 0.03919146934646163, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6177446842193604, "eval_wikibio_n_ngrams_match_1": 10.304, "eval_wikibio_n_ngrams_match_2": 3.564, "eval_wikibio_n_ngrams_match_3": 1.338, "eval_wikibio_num_pred_words": 36.224, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.25345470260978, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36866684470874084, "eval_wikibio_runtime": 17.4541, "eval_wikibio_samples_per_second": 28.646, "eval_wikibio_steps_per_second": 0.057, "eval_wikibio_token_set_f1": 0.32775999538201855, "eval_wikibio_token_set_f1_sem": 0.005387724717493739, "eval_wikibio_token_set_precision": 0.33604664044376237, "eval_wikibio_token_set_recall": 0.3355184819412624, "eval_wikibio_true_num_tokens": 61.1328125, "step": 215000 }, { "epoch": 41.28, "eval_nq_accuracy": 0.53690625, "eval_nq_bleu_score": 12.167901512556057, "eval_nq_bleu_score_sem": 0.48271683559208606, "eval_nq_emb_cos_sim": 0.8382583260536194, "eval_nq_emb_cos_sim_sem": 0.007045135488179449, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1166770458221436, "eval_nq_n_ngrams_match_1": 23.418, "eval_nq_n_ngrams_match_2": 8.758, "eval_nq_n_ngrams_match_3": 4.096, "eval_nq_num_pred_words": 49.038, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.303499444973093, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45391975960684405, "eval_nq_runtime": 19.1117, "eval_nq_samples_per_second": 26.162, "eval_nq_steps_per_second": 0.052, "eval_nq_token_set_f1": 0.46770886768053815, "eval_nq_token_set_f1_sem": 0.004929054691161523, "eval_nq_token_set_precision": 0.42591227686410665, "eval_nq_token_set_recall": 0.5272969797722891, "eval_nq_true_num_tokens": 64.0, "step": 215000 }, { "epoch": 41.28, "learning_rate": 0.001, "loss": 2.4999, "step": 215004 }, { "epoch": 41.29, "learning_rate": 0.001, "loss": 2.5016, "step": 215016 }, { "epoch": 41.29, "learning_rate": 0.001, "loss": 2.4892, "step": 215028 }, { "epoch": 41.29, "learning_rate": 0.001, "loss": 2.4952, "step": 215040 }, { "epoch": 41.29, "learning_rate": 0.001, "loss": 2.4998, "step": 215052 }, { "epoch": 41.29, "learning_rate": 0.001, "loss": 2.499, "step": 215064 }, { "epoch": 41.3, "learning_rate": 0.001, "loss": 2.4907, "step": 215076 }, { "epoch": 41.3, "learning_rate": 0.001, "loss": 2.5003, "step": 215088 }, { "epoch": 41.3, "learning_rate": 0.001, "loss": 2.4891, "step": 215100 }, { "epoch": 41.3, "learning_rate": 0.001, "loss": 2.4913, "step": 215112 }, { "epoch": 41.31, "learning_rate": 0.001, "loss": 2.4921, "step": 215124 }, { "epoch": 41.31, "learning_rate": 0.001, "loss": 2.4873, "step": 215136 }, { "epoch": 41.31, "learning_rate": 0.001, "loss": 2.4981, "step": 215148 }, { "epoch": 41.31, "learning_rate": 0.001, "loss": 2.4957, "step": 215160 }, { "epoch": 41.32, "learning_rate": 0.001, "loss": 2.4914, "step": 215172 }, { "epoch": 41.32, "learning_rate": 0.001, "loss": 2.4928, "step": 215184 }, { "epoch": 41.32, "learning_rate": 0.001, "loss": 2.4921, "step": 215196 }, { "epoch": 41.32, "learning_rate": 0.001, "loss": 2.5002, "step": 215208 }, { "epoch": 41.32, "learning_rate": 0.001, "loss": 2.4881, "step": 215220 }, { "epoch": 41.33, "learning_rate": 0.001, "loss": 2.498, "step": 215232 }, { "epoch": 41.33, "learning_rate": 0.001, "loss": 2.4904, "step": 215244 }, { "epoch": 41.33, "learning_rate": 0.001, "loss": 2.4847, "step": 215256 }, { "epoch": 41.33, "learning_rate": 0.001, "loss": 2.4878, "step": 215268 }, { "epoch": 41.34, "learning_rate": 0.001, "loss": 2.4922, "step": 215280 }, { "epoch": 41.34, "learning_rate": 0.001, "loss": 2.4931, "step": 215292 }, { "epoch": 41.34, "learning_rate": 0.001, "loss": 2.5043, "step": 215304 }, { "epoch": 41.34, "learning_rate": 0.001, "loss": 2.4951, "step": 215316 }, { "epoch": 41.35, "learning_rate": 0.001, "loss": 2.4905, "step": 215328 }, { "epoch": 41.35, "learning_rate": 0.001, "loss": 2.495, "step": 215340 }, { "epoch": 41.35, "learning_rate": 0.001, "loss": 2.4877, "step": 215352 }, { "epoch": 41.35, "learning_rate": 0.001, "loss": 2.488, "step": 215364 }, { "epoch": 41.35, "learning_rate": 0.001, "loss": 2.4932, "step": 215376 }, { "epoch": 41.36, "learning_rate": 0.001, "loss": 2.4934, "step": 215388 }, { "epoch": 41.36, "learning_rate": 0.001, "loss": 2.4879, "step": 215400 }, { "epoch": 41.36, "learning_rate": 0.001, "loss": 2.4966, "step": 215412 }, { "epoch": 41.36, "learning_rate": 0.001, "loss": 2.4932, "step": 215424 }, { "epoch": 41.37, "learning_rate": 0.001, "loss": 2.4862, "step": 215436 }, { "epoch": 41.37, "learning_rate": 0.001, "loss": 2.4944, "step": 215448 }, { "epoch": 41.37, "learning_rate": 0.001, "loss": 2.4851, "step": 215460 }, { "epoch": 41.37, "learning_rate": 0.001, "loss": 2.4911, "step": 215472 }, { "epoch": 41.38, "learning_rate": 0.001, "loss": 2.4921, "step": 215484 }, { "epoch": 41.38, "learning_rate": 0.001, "loss": 2.4922, "step": 215496 }, { "epoch": 41.38, "learning_rate": 0.001, "loss": 2.4923, "step": 215508 }, { "epoch": 41.38, "learning_rate": 0.001, "loss": 2.5, "step": 215520 }, { "epoch": 41.38, "learning_rate": 0.001, "loss": 2.4849, "step": 215532 }, { "epoch": 41.39, "learning_rate": 0.001, "loss": 2.4989, "step": 215544 }, { "epoch": 41.39, "learning_rate": 0.001, "loss": 2.4926, "step": 215556 }, { "epoch": 41.39, "learning_rate": 0.001, "loss": 2.4834, "step": 215568 }, { "epoch": 41.39, "learning_rate": 0.001, "loss": 2.4939, "step": 215580 }, { "epoch": 41.4, "learning_rate": 0.001, "loss": 2.4952, "step": 215592 }, { "epoch": 41.4, "learning_rate": 0.001, "loss": 2.4946, "step": 215604 }, { "epoch": 41.4, "learning_rate": 0.001, "loss": 2.4889, "step": 215616 }, { "epoch": 41.4, "eval_ag_news_accuracy": 0.32859375, "eval_ag_news_bleu_score": 4.894812245394238, "eval_ag_news_bleu_score_sem": 0.1531967238026543, "eval_ag_news_emb_cos_sim": 0.8191208243370056, "eval_ag_news_emb_cos_sim_sem": 0.006856653464549715, "eval_ag_news_emb_top1_equal": 0.2890625, "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4753947257995605, "eval_ag_news_n_ngrams_match_1": 14.278, "eval_ag_news_n_ngrams_match_2": 3.196, "eval_ag_news_n_ngrams_match_3": 0.934, "eval_ag_news_num_pred_words": 46.552, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.310579839272364, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35686990856875433, "eval_ag_news_runtime": 18.7791, "eval_ag_news_samples_per_second": 26.625, "eval_ag_news_steps_per_second": 0.053, "eval_ag_news_token_set_f1": 0.355913359291341, "eval_ag_news_token_set_f1_sem": 0.00457827121597649, "eval_ag_news_token_set_precision": 0.34176478967173335, "eval_ag_news_token_set_recall": 0.3846986494841942, "eval_ag_news_true_num_tokens": 56.09375, "step": 215625 }, { "epoch": 41.4, "eval_anthropic_toxic_prompts_accuracy": 0.11503125, "eval_anthropic_toxic_prompts_bleu_score": 3.0877630092569657, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1131846609571181, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6885554790496826, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009313639559895713, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.178467273712158, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.258, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.948, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.714, "eval_anthropic_toxic_prompts_num_pred_words": 47.186, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.009924692248806, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21387175087449456, "eval_anthropic_toxic_prompts_runtime": 10.8721, "eval_anthropic_toxic_prompts_samples_per_second": 45.989, "eval_anthropic_toxic_prompts_steps_per_second": 0.092, "eval_anthropic_toxic_prompts_token_set_f1": 0.35715467711469967, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067049714524682075, "eval_anthropic_toxic_prompts_token_set_precision": 0.43857833331093654, "eval_anthropic_toxic_prompts_token_set_recall": 0.32829719280060493, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 215625 }, { "epoch": 41.4, "eval_arxiv_accuracy": 0.35271875, "eval_arxiv_bleu_score": 4.370168208140656, "eval_arxiv_bleu_score_sem": 0.12384095741061274, "eval_arxiv_emb_cos_sim": 0.7805910110473633, "eval_arxiv_emb_cos_sim_sem": 0.0067356505210194465, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.338520050048828, "eval_arxiv_n_ngrams_match_1": 15.608, "eval_arxiv_n_ngrams_match_2": 2.986, "eval_arxiv_n_ngrams_match_3": 0.652, "eval_arxiv_num_pred_words": 40.362, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.177394698476608, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37313406751281053, "eval_arxiv_runtime": 19.0748, "eval_arxiv_samples_per_second": 26.213, "eval_arxiv_steps_per_second": 0.052, "eval_arxiv_token_set_f1": 0.3670502874880372, "eval_arxiv_token_set_f1_sem": 0.004256057944297466, "eval_arxiv_token_set_precision": 0.3192087679268113, "eval_arxiv_token_set_recall": 0.4474315570824344, "eval_arxiv_true_num_tokens": 64.0, "step": 215625 }, { "epoch": 41.4, "eval_python_code_alpaca_accuracy": 0.163375, "eval_python_code_alpaca_bleu_score": 4.609929005459897, "eval_python_code_alpaca_bleu_score_sem": 0.15030918062664822, "eval_python_code_alpaca_emb_cos_sim": 0.7570693492889404, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008847097654482404, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8363194465637207, "eval_python_code_alpaca_n_ngrams_match_1": 9.792, "eval_python_code_alpaca_n_ngrams_match_2": 2.898, "eval_python_code_alpaca_n_ngrams_match_3": 0.99, "eval_python_code_alpaca_num_pred_words": 43.442, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.052885834519543, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3333159120233796, "eval_python_code_alpaca_runtime": 22.6613, "eval_python_code_alpaca_samples_per_second": 22.064, "eval_python_code_alpaca_steps_per_second": 0.044, "eval_python_code_alpaca_token_set_f1": 0.48357523990704065, "eval_python_code_alpaca_token_set_f1_sem": 0.005586495130759468, "eval_python_code_alpaca_token_set_precision": 0.5363463926542905, "eval_python_code_alpaca_token_set_recall": 0.46347628627462434, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 215625 }, { "epoch": 41.4, "eval_wikibio_accuracy": 0.3279375, "eval_wikibio_bleu_score": 6.334611971164382, "eval_wikibio_bleu_score_sem": 0.22534530571102912, "eval_wikibio_emb_cos_sim": 0.7648400068283081, "eval_wikibio_emb_cos_sim_sem": 0.00739425890781944, "eval_wikibio_emb_top1_equal": 0.2578125, "eval_wikibio_emb_top1_equal_sem": 0.038815656435002115, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6433780193328857, "eval_wikibio_n_ngrams_match_1": 10.366, "eval_wikibio_n_ngrams_match_2": 3.534, "eval_wikibio_n_ngrams_match_3": 1.392, "eval_wikibio_num_pred_words": 36.416, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.22072926446075, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.364576780067592, "eval_wikibio_runtime": 12.419, "eval_wikibio_samples_per_second": 40.261, "eval_wikibio_steps_per_second": 0.081, "eval_wikibio_token_set_f1": 0.32913321843529936, "eval_wikibio_token_set_f1_sem": 0.005290374367772688, "eval_wikibio_token_set_precision": 0.33872084585488477, "eval_wikibio_token_set_recall": 0.3356276853334591, "eval_wikibio_true_num_tokens": 61.1328125, "step": 215625 }, { "epoch": 41.4, "eval_nq_accuracy": 0.5360625, "eval_nq_bleu_score": 12.354282355795416, "eval_nq_bleu_score_sem": 0.4942688778690195, "eval_nq_emb_cos_sim": 0.8390867710113525, "eval_nq_emb_cos_sim_sem": 0.007167073564911053, "eval_nq_emb_top1_equal": 0.34375, "eval_nq_emb_top1_equal_sem": 0.04214578430296913, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.117419958114624, "eval_nq_n_ngrams_match_1": 23.732, "eval_nq_n_ngrams_match_2": 8.874, "eval_nq_n_ngrams_match_3": 4.138, "eval_nq_num_pred_words": 49.026, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.30967050877712, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46196326280853994, "eval_nq_runtime": 15.8886, "eval_nq_samples_per_second": 31.469, "eval_nq_steps_per_second": 0.063, "eval_nq_token_set_f1": 0.47229959673054156, "eval_nq_token_set_f1_sem": 0.004810090559089124, "eval_nq_token_set_precision": 0.43130310019788043, "eval_nq_token_set_recall": 0.529068390268912, "eval_nq_true_num_tokens": 64.0, "step": 215625 }, { "epoch": 41.4, "learning_rate": 0.001, "loss": 2.4934, "step": 215628 }, { "epoch": 41.41, "learning_rate": 0.001, "loss": 2.4925, "step": 215640 }, { "epoch": 41.41, "learning_rate": 0.001, "loss": 2.5066, "step": 215652 }, { "epoch": 41.41, "learning_rate": 0.001, "loss": 2.497, "step": 215664 }, { "epoch": 41.41, "learning_rate": 0.001, "loss": 2.5014, "step": 215676 }, { "epoch": 41.41, "learning_rate": 0.001, "loss": 2.4979, "step": 215688 }, { "epoch": 41.42, "learning_rate": 0.001, "loss": 2.4957, "step": 215700 }, { "epoch": 41.42, "learning_rate": 0.001, "loss": 2.4964, "step": 215712 }, { "epoch": 41.42, "learning_rate": 0.001, "loss": 2.4857, "step": 215724 }, { "epoch": 41.42, "learning_rate": 0.001, "loss": 2.4943, "step": 215736 }, { "epoch": 41.43, "learning_rate": 0.001, "loss": 2.4816, "step": 215748 }, { "epoch": 41.43, "learning_rate": 0.001, "loss": 2.4904, "step": 215760 }, { "epoch": 41.43, "learning_rate": 0.001, "loss": 2.5005, "step": 215772 }, { "epoch": 41.43, "learning_rate": 0.001, "loss": 2.5018, "step": 215784 }, { "epoch": 41.44, "learning_rate": 0.001, "loss": 2.4957, "step": 215796 }, { "epoch": 41.44, "learning_rate": 0.001, "loss": 2.4928, "step": 215808 }, { "epoch": 41.44, "learning_rate": 0.001, "loss": 2.4946, "step": 215820 }, { "epoch": 41.44, "learning_rate": 0.001, "loss": 2.4956, "step": 215832 }, { "epoch": 41.44, "learning_rate": 0.001, "loss": 2.501, "step": 215844 }, { "epoch": 41.45, "learning_rate": 0.001, "loss": 2.4967, "step": 215856 }, { "epoch": 41.45, "learning_rate": 0.001, "loss": 2.4867, "step": 215868 }, { "epoch": 41.45, "learning_rate": 0.001, "loss": 2.4938, "step": 215880 }, { "epoch": 41.45, "learning_rate": 0.001, "loss": 2.5051, "step": 215892 }, { "epoch": 41.46, "learning_rate": 0.001, "loss": 2.5038, "step": 215904 }, { "epoch": 41.46, "learning_rate": 0.001, "loss": 2.4873, "step": 215916 }, { "epoch": 41.46, "learning_rate": 0.001, "loss": 2.4908, "step": 215928 }, { "epoch": 41.46, "learning_rate": 0.001, "loss": 2.4857, "step": 215940 }, { "epoch": 41.47, "learning_rate": 0.001, "loss": 2.4987, "step": 215952 }, { "epoch": 41.47, "learning_rate": 0.001, "loss": 2.4982, "step": 215964 }, { "epoch": 41.47, "learning_rate": 0.001, "loss": 2.4896, "step": 215976 }, { "epoch": 41.47, "learning_rate": 0.001, "loss": 2.4756, "step": 215988 }, { "epoch": 41.47, "learning_rate": 0.001, "loss": 2.4973, "step": 216000 }, { "epoch": 41.48, "learning_rate": 0.001, "loss": 2.5027, "step": 216012 }, { "epoch": 41.48, "learning_rate": 0.001, "loss": 2.493, "step": 216024 }, { "epoch": 41.48, "learning_rate": 0.001, "loss": 2.4894, "step": 216036 }, { "epoch": 41.48, "learning_rate": 0.001, "loss": 2.4945, "step": 216048 }, { "epoch": 41.49, "learning_rate": 0.001, "loss": 2.5012, "step": 216060 }, { "epoch": 41.49, "learning_rate": 0.001, "loss": 2.5093, "step": 216072 }, { "epoch": 41.49, "learning_rate": 0.001, "loss": 2.4894, "step": 216084 }, { "epoch": 41.49, "learning_rate": 0.001, "loss": 2.4885, "step": 216096 }, { "epoch": 41.5, "learning_rate": 0.001, "loss": 2.4896, "step": 216108 }, { "epoch": 41.5, "learning_rate": 0.001, "loss": 2.4943, "step": 216120 }, { "epoch": 41.5, "learning_rate": 0.001, "loss": 2.4981, "step": 216132 }, { "epoch": 41.5, "learning_rate": 0.001, "loss": 2.4963, "step": 216144 }, { "epoch": 41.5, "learning_rate": 0.001, "loss": 2.4938, "step": 216156 }, { "epoch": 41.51, "learning_rate": 0.001, "loss": 2.4992, "step": 216168 }, { "epoch": 41.51, "learning_rate": 0.001, "loss": 2.5087, "step": 216180 }, { "epoch": 41.51, "learning_rate": 0.001, "loss": 2.4987, "step": 216192 }, { "epoch": 41.51, "learning_rate": 0.001, "loss": 2.4862, "step": 216204 }, { "epoch": 41.52, "learning_rate": 0.001, "loss": 2.4928, "step": 216216 }, { "epoch": 41.52, "learning_rate": 0.001, "loss": 2.4911, "step": 216228 }, { "epoch": 41.52, "learning_rate": 0.001, "loss": 2.4846, "step": 216240 }, { "epoch": 41.52, "eval_ag_news_accuracy": 0.32965625, "eval_ag_news_bleu_score": 4.975507926974853, "eval_ag_news_bleu_score_sem": 0.16282958648646803, "eval_ag_news_emb_cos_sim": 0.819546103477478, "eval_ag_news_emb_cos_sim_sem": 0.006797881659934797, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.461921453475952, "eval_ag_news_n_ngrams_match_1": 14.562, "eval_ag_news_n_ngrams_match_2": 3.268, "eval_ag_news_n_ngrams_match_3": 0.958, "eval_ag_news_num_pred_words": 46.948, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.878170126297825, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36062709276859495, "eval_ag_news_runtime": 16.6294, "eval_ag_news_samples_per_second": 30.067, "eval_ag_news_steps_per_second": 0.06, "eval_ag_news_token_set_f1": 0.3587889977136666, "eval_ag_news_token_set_f1_sem": 0.004344475204137445, "eval_ag_news_token_set_precision": 0.34800964407998963, "eval_ag_news_token_set_recall": 0.38283276987761256, "eval_ag_news_true_num_tokens": 56.09375, "step": 216250 }, { "epoch": 41.52, "eval_anthropic_toxic_prompts_accuracy": 0.11634375, "eval_anthropic_toxic_prompts_bleu_score": 3.154174211135589, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12056889723607998, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6729702353477478, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009214522025999235, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.179611921310425, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.166, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.924, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.726, "eval_anthropic_toxic_prompts_num_pred_words": 46.774, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.037423330004746, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21408151939438294, "eval_anthropic_toxic_prompts_runtime": 11.596, "eval_anthropic_toxic_prompts_samples_per_second": 43.118, "eval_anthropic_toxic_prompts_steps_per_second": 0.086, "eval_anthropic_toxic_prompts_token_set_f1": 0.35509446971774833, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006623083033777618, "eval_anthropic_toxic_prompts_token_set_precision": 0.4339126628588507, "eval_anthropic_toxic_prompts_token_set_recall": 0.3270439195951161, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 216250 }, { "epoch": 41.52, "eval_arxiv_accuracy": 0.35446875, "eval_arxiv_bleu_score": 4.6568916794792266, "eval_arxiv_bleu_score_sem": 0.13509631913722348, "eval_arxiv_emb_cos_sim": 0.7880792617797852, "eval_arxiv_emb_cos_sim_sem": 0.007674974699603709, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.325824499130249, "eval_arxiv_n_ngrams_match_1": 15.91, "eval_arxiv_n_ngrams_match_2": 3.252, "eval_arxiv_n_ngrams_match_3": 0.754, "eval_arxiv_num_pred_words": 41.314, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.82192834433291, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3792920545631109, "eval_arxiv_runtime": 12.6402, "eval_arxiv_samples_per_second": 39.556, "eval_arxiv_steps_per_second": 0.079, "eval_arxiv_token_set_f1": 0.3700239828577745, "eval_arxiv_token_set_f1_sem": 0.00429736415187324, "eval_arxiv_token_set_precision": 0.3253997234839459, "eval_arxiv_token_set_recall": 0.44514193106922234, "eval_arxiv_true_num_tokens": 64.0, "step": 216250 }, { "epoch": 41.52, "eval_python_code_alpaca_accuracy": 0.16440625, "eval_python_code_alpaca_bleu_score": 4.758354599599039, "eval_python_code_alpaca_bleu_score_sem": 0.14804902514791038, "eval_python_code_alpaca_emb_cos_sim": 0.7701547145843506, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007143228985366015, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8359646797180176, "eval_python_code_alpaca_n_ngrams_match_1": 9.98, "eval_python_code_alpaca_n_ngrams_match_2": 2.964, "eval_python_code_alpaca_n_ngrams_match_3": 0.992, "eval_python_code_alpaca_num_pred_words": 42.522, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.04683710900897, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.341995131297409, "eval_python_code_alpaca_runtime": 12.2223, "eval_python_code_alpaca_samples_per_second": 40.909, "eval_python_code_alpaca_steps_per_second": 0.082, "eval_python_code_alpaca_token_set_f1": 0.48032662453752833, "eval_python_code_alpaca_token_set_f1_sem": 0.005443734611848907, "eval_python_code_alpaca_token_set_precision": 0.545062432059744, "eval_python_code_alpaca_token_set_recall": 0.4513832325149613, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 216250 }, { "epoch": 41.52, "eval_wikibio_accuracy": 0.33096875, "eval_wikibio_bleu_score": 6.133919956002912, "eval_wikibio_bleu_score_sem": 0.22972306645858473, "eval_wikibio_emb_cos_sim": 0.7307310104370117, "eval_wikibio_emb_cos_sim_sem": 0.010424130651361321, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6156070232391357, "eval_wikibio_n_ngrams_match_1": 10.212, "eval_wikibio_n_ngrams_match_2": 3.484, "eval_wikibio_n_ngrams_match_3": 1.32, "eval_wikibio_num_pred_words": 35.858, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.17390450208594, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3567853559953841, "eval_wikibio_runtime": 11.6035, "eval_wikibio_samples_per_second": 43.09, "eval_wikibio_steps_per_second": 0.086, "eval_wikibio_token_set_f1": 0.32210554516742845, "eval_wikibio_token_set_f1_sem": 0.005619655367232886, "eval_wikibio_token_set_precision": 0.3316311373522093, "eval_wikibio_token_set_recall": 0.330802615239244, "eval_wikibio_true_num_tokens": 61.1328125, "step": 216250 }, { "epoch": 41.52, "eval_nq_accuracy": 0.537375, "eval_nq_bleu_score": 12.47472674728733, "eval_nq_bleu_score_sem": 0.5044904634964353, "eval_nq_emb_cos_sim": 0.8439424633979797, "eval_nq_emb_cos_sim_sem": 0.006934180616751981, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1153202056884766, "eval_nq_n_ngrams_match_1": 23.6, "eval_nq_n_ngrams_match_2": 8.864, "eval_nq_n_ngrams_match_3": 4.226, "eval_nq_num_pred_words": 49.226, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.292240563654603, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4573645291481141, "eval_nq_runtime": 12.5055, "eval_nq_samples_per_second": 39.982, "eval_nq_steps_per_second": 0.08, "eval_nq_token_set_f1": 0.46811466852358713, "eval_nq_token_set_f1_sem": 0.004987963608760948, "eval_nq_token_set_precision": 0.429554826873254, "eval_nq_token_set_recall": 0.5219791979977839, "eval_nq_true_num_tokens": 64.0, "step": 216250 }, { "epoch": 41.52, "learning_rate": 0.001, "loss": 2.4899, "step": 216252 }, { "epoch": 41.53, "learning_rate": 0.001, "loss": 2.4971, "step": 216264 }, { "epoch": 41.53, "learning_rate": 0.001, "loss": 2.4948, "step": 216276 }, { "epoch": 41.53, "learning_rate": 0.001, "loss": 2.4992, "step": 216288 }, { "epoch": 41.53, "learning_rate": 0.001, "loss": 2.4904, "step": 216300 }, { "epoch": 41.53, "learning_rate": 0.001, "loss": 2.49, "step": 216312 }, { "epoch": 41.54, "learning_rate": 0.001, "loss": 2.4907, "step": 216324 }, { "epoch": 41.54, "learning_rate": 0.001, "loss": 2.4955, "step": 216336 }, { "epoch": 41.54, "learning_rate": 0.001, "loss": 2.5025, "step": 216348 }, { "epoch": 41.54, "learning_rate": 0.001, "loss": 2.5004, "step": 216360 }, { "epoch": 41.55, "learning_rate": 0.001, "loss": 2.4932, "step": 216372 }, { "epoch": 41.55, "learning_rate": 0.001, "loss": 2.4889, "step": 216384 }, { "epoch": 41.55, "learning_rate": 0.001, "loss": 2.4913, "step": 216396 }, { "epoch": 41.55, "learning_rate": 0.001, "loss": 2.4963, "step": 216408 }, { "epoch": 41.56, "learning_rate": 0.001, "loss": 2.4861, "step": 216420 }, { "epoch": 41.56, "learning_rate": 0.001, "loss": 2.4901, "step": 216432 }, { "epoch": 41.56, "learning_rate": 0.001, "loss": 2.4909, "step": 216444 }, { "epoch": 41.56, "learning_rate": 0.001, "loss": 2.4957, "step": 216456 }, { "epoch": 41.56, "learning_rate": 0.001, "loss": 2.4923, "step": 216468 }, { "epoch": 41.57, "learning_rate": 0.001, "loss": 2.4891, "step": 216480 }, { "epoch": 41.57, "learning_rate": 0.001, "loss": 2.4929, "step": 216492 }, { "epoch": 41.57, "learning_rate": 0.001, "loss": 2.5013, "step": 216504 }, { "epoch": 41.57, "learning_rate": 0.001, "loss": 2.506, "step": 216516 }, { "epoch": 41.58, "learning_rate": 0.001, "loss": 2.4921, "step": 216528 }, { "epoch": 41.58, "learning_rate": 0.001, "loss": 2.4916, "step": 216540 }, { "epoch": 41.58, "learning_rate": 0.001, "loss": 2.4908, "step": 216552 }, { "epoch": 41.58, "learning_rate": 0.001, "loss": 2.5005, "step": 216564 }, { "epoch": 41.59, "learning_rate": 0.001, "loss": 2.4812, "step": 216576 }, { "epoch": 41.59, "learning_rate": 0.001, "loss": 2.5033, "step": 216588 }, { "epoch": 41.59, "learning_rate": 0.001, "loss": 2.4965, "step": 216600 }, { "epoch": 41.59, "learning_rate": 0.001, "loss": 2.4955, "step": 216612 }, { "epoch": 41.59, "learning_rate": 0.001, "loss": 2.4804, "step": 216624 }, { "epoch": 41.6, "learning_rate": 0.001, "loss": 2.5018, "step": 216636 }, { "epoch": 41.6, "learning_rate": 0.001, "loss": 2.4963, "step": 216648 }, { "epoch": 41.6, "learning_rate": 0.001, "loss": 2.492, "step": 216660 }, { "epoch": 41.6, "learning_rate": 0.001, "loss": 2.4948, "step": 216672 }, { "epoch": 41.61, "learning_rate": 0.001, "loss": 2.4933, "step": 216684 }, { "epoch": 41.61, "learning_rate": 0.001, "loss": 2.4903, "step": 216696 }, { "epoch": 41.61, "learning_rate": 0.001, "loss": 2.5001, "step": 216708 }, { "epoch": 41.61, "learning_rate": 0.001, "loss": 2.495, "step": 216720 }, { "epoch": 41.62, "learning_rate": 0.001, "loss": 2.4974, "step": 216732 }, { "epoch": 41.62, "learning_rate": 0.001, "loss": 2.4927, "step": 216744 }, { "epoch": 41.62, "learning_rate": 0.001, "loss": 2.4954, "step": 216756 }, { "epoch": 41.62, "learning_rate": 0.001, "loss": 2.4925, "step": 216768 }, { "epoch": 41.62, "learning_rate": 0.001, "loss": 2.4975, "step": 216780 }, { "epoch": 41.63, "learning_rate": 0.001, "loss": 2.5017, "step": 216792 }, { "epoch": 41.63, "learning_rate": 0.001, "loss": 2.4896, "step": 216804 }, { "epoch": 41.63, "learning_rate": 0.001, "loss": 2.4893, "step": 216816 }, { "epoch": 41.63, "learning_rate": 0.001, "loss": 2.495, "step": 216828 }, { "epoch": 41.64, "learning_rate": 0.001, "loss": 2.4962, "step": 216840 }, { "epoch": 41.64, "learning_rate": 0.001, "loss": 2.4943, "step": 216852 }, { "epoch": 41.64, "learning_rate": 0.001, "loss": 2.5014, "step": 216864 }, { "epoch": 41.64, "eval_ag_news_accuracy": 0.32953125, "eval_ag_news_bleu_score": 4.887446146470891, "eval_ag_news_bleu_score_sem": 0.15133356751567484, "eval_ag_news_emb_cos_sim": 0.8248995542526245, "eval_ag_news_emb_cos_sim_sem": 0.006077720435062675, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.46960186958313, "eval_ag_news_n_ngrams_match_1": 14.484, "eval_ag_news_n_ngrams_match_2": 3.22, "eval_ag_news_n_ngrams_match_3": 0.932, "eval_ag_news_num_pred_words": 47.306, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.123950376711434, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3605940607231198, "eval_ag_news_runtime": 13.4946, "eval_ag_news_samples_per_second": 37.052, "eval_ag_news_steps_per_second": 0.074, "eval_ag_news_token_set_f1": 0.35857620706777227, "eval_ag_news_token_set_f1_sem": 0.00450405740569866, "eval_ag_news_token_set_precision": 0.34563491624333487, "eval_ag_news_token_set_recall": 0.3842417877920185, "eval_ag_news_true_num_tokens": 56.09375, "step": 216875 }, { "epoch": 41.64, "eval_anthropic_toxic_prompts_accuracy": 0.1150625, "eval_anthropic_toxic_prompts_bleu_score": 3.2513411431816186, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.126892913681169, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6807286143302917, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008860512206110012, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.204263210296631, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.346, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, "eval_anthropic_toxic_prompts_num_pred_words": 46.892, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.637340788500286, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2195885444428198, "eval_anthropic_toxic_prompts_runtime": 11.038, "eval_anthropic_toxic_prompts_samples_per_second": 45.298, "eval_anthropic_toxic_prompts_steps_per_second": 0.091, "eval_anthropic_toxic_prompts_token_set_f1": 0.36068211898830227, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065099890695187665, "eval_anthropic_toxic_prompts_token_set_precision": 0.4471870388666266, "eval_anthropic_toxic_prompts_token_set_recall": 0.32668230122323505, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 216875 }, { "epoch": 41.64, "eval_arxiv_accuracy": 0.35315625, "eval_arxiv_bleu_score": 4.477254602161336, "eval_arxiv_bleu_score_sem": 0.12972708225207144, "eval_arxiv_emb_cos_sim": 0.7866698503494263, "eval_arxiv_emb_cos_sim_sem": 0.00649461071875212, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.334129571914673, "eval_arxiv_n_ngrams_match_1": 15.7, "eval_arxiv_n_ngrams_match_2": 3.066, "eval_arxiv_n_ngrams_match_3": 0.68, "eval_arxiv_num_pred_words": 41.072, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.053953644091038, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3719643842534974, "eval_arxiv_runtime": 12.3489, "eval_arxiv_samples_per_second": 40.49, "eval_arxiv_steps_per_second": 0.081, "eval_arxiv_token_set_f1": 0.3657464384572065, "eval_arxiv_token_set_f1_sem": 0.004237910241118956, "eval_arxiv_token_set_precision": 0.31950822512058513, "eval_arxiv_token_set_recall": 0.44483633726071475, "eval_arxiv_true_num_tokens": 64.0, "step": 216875 }, { "epoch": 41.64, "eval_python_code_alpaca_accuracy": 0.1628125, "eval_python_code_alpaca_bleu_score": 4.724090232263869, "eval_python_code_alpaca_bleu_score_sem": 0.14421954033702714, "eval_python_code_alpaca_emb_cos_sim": 0.7643148899078369, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0076521653520252204, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8600780963897705, "eval_python_code_alpaca_n_ngrams_match_1": 10.016, "eval_python_code_alpaca_n_ngrams_match_2": 2.97, "eval_python_code_alpaca_n_ngrams_match_3": 0.988, "eval_python_code_alpaca_num_pred_words": 42.9, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.46289067204433, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34516917348252074, "eval_python_code_alpaca_runtime": 15.4619, "eval_python_code_alpaca_samples_per_second": 32.338, "eval_python_code_alpaca_steps_per_second": 0.065, "eval_python_code_alpaca_token_set_f1": 0.4861581536419704, "eval_python_code_alpaca_token_set_f1_sem": 0.005400821756556567, "eval_python_code_alpaca_token_set_precision": 0.5497370924112553, "eval_python_code_alpaca_token_set_recall": 0.4574293568838186, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 216875 }, { "epoch": 41.64, "eval_wikibio_accuracy": 0.32853125, "eval_wikibio_bleu_score": 6.049140432372366, "eval_wikibio_bleu_score_sem": 0.2281769783081609, "eval_wikibio_emb_cos_sim": 0.7500406503677368, "eval_wikibio_emb_cos_sim_sem": 0.010314869556408384, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.629960298538208, "eval_wikibio_n_ngrams_match_1": 10.076, "eval_wikibio_n_ngrams_match_2": 3.428, "eval_wikibio_n_ngrams_match_3": 1.28, "eval_wikibio_num_pred_words": 35.682, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.71131939295495, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3565092139045687, "eval_wikibio_runtime": 11.28, "eval_wikibio_samples_per_second": 44.326, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.3204860351212683, "eval_wikibio_token_set_f1_sem": 0.005776137581377932, "eval_wikibio_token_set_precision": 0.32676062238929077, "eval_wikibio_token_set_recall": 0.3340286328140021, "eval_wikibio_true_num_tokens": 61.1328125, "step": 216875 }, { "epoch": 41.64, "eval_nq_accuracy": 0.5380625, "eval_nq_bleu_score": 12.163553386979748, "eval_nq_bleu_score_sem": 0.490096609792054, "eval_nq_emb_cos_sim": 0.8416872024536133, "eval_nq_emb_cos_sim_sem": 0.0067169327104690535, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.117141008377075, "eval_nq_n_ngrams_match_1": 23.504, "eval_nq_n_ngrams_match_2": 8.782, "eval_nq_n_ngrams_match_3": 4.054, "eval_nq_num_pred_words": 49.368, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.307352851639534, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4560129468395967, "eval_nq_runtime": 11.6033, "eval_nq_samples_per_second": 43.091, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4682685039993215, "eval_nq_token_set_f1_sem": 0.004955362930816411, "eval_nq_token_set_precision": 0.4261815413465682, "eval_nq_token_set_recall": 0.5267050085226828, "eval_nq_true_num_tokens": 64.0, "step": 216875 }, { "epoch": 41.64, "learning_rate": 0.001, "loss": 2.4909, "step": 216876 }, { "epoch": 41.65, "learning_rate": 0.001, "loss": 2.5017, "step": 216888 }, { "epoch": 41.65, "learning_rate": 0.001, "loss": 2.4952, "step": 216900 }, { "epoch": 41.65, "learning_rate": 0.001, "loss": 2.4849, "step": 216912 }, { "epoch": 41.65, "learning_rate": 0.001, "loss": 2.5016, "step": 216924 }, { "epoch": 41.65, "learning_rate": 0.001, "loss": 2.4996, "step": 216936 }, { "epoch": 41.66, "learning_rate": 0.001, "loss": 2.4939, "step": 216948 }, { "epoch": 41.66, "learning_rate": 0.001, "loss": 2.4929, "step": 216960 }, { "epoch": 41.66, "learning_rate": 0.001, "loss": 2.4932, "step": 216972 }, { "epoch": 41.66, "learning_rate": 0.001, "loss": 2.4961, "step": 216984 }, { "epoch": 41.67, "learning_rate": 0.001, "loss": 2.5025, "step": 216996 }, { "epoch": 41.67, "learning_rate": 0.001, "loss": 2.4897, "step": 217008 }, { "epoch": 41.67, "learning_rate": 0.001, "loss": 2.4989, "step": 217020 }, { "epoch": 41.67, "learning_rate": 0.001, "loss": 2.4848, "step": 217032 }, { "epoch": 41.68, "learning_rate": 0.001, "loss": 2.4932, "step": 217044 }, { "epoch": 41.68, "learning_rate": 0.001, "loss": 2.5023, "step": 217056 }, { "epoch": 41.68, "learning_rate": 0.001, "loss": 2.4987, "step": 217068 }, { "epoch": 41.68, "learning_rate": 0.001, "loss": 2.5045, "step": 217080 }, { "epoch": 41.68, "learning_rate": 0.001, "loss": 2.4976, "step": 217092 }, { "epoch": 41.69, "learning_rate": 0.001, "loss": 2.499, "step": 217104 }, { "epoch": 41.69, "learning_rate": 0.001, "loss": 2.4902, "step": 217116 }, { "epoch": 41.69, "learning_rate": 0.001, "loss": 2.4946, "step": 217128 }, { "epoch": 41.69, "learning_rate": 0.001, "loss": 2.5054, "step": 217140 }, { "epoch": 41.7, "learning_rate": 0.001, "loss": 2.4918, "step": 217152 }, { "epoch": 41.7, "learning_rate": 0.001, "loss": 2.4848, "step": 217164 }, { "epoch": 41.7, "learning_rate": 0.001, "loss": 2.4976, "step": 217176 }, { "epoch": 41.7, "learning_rate": 0.001, "loss": 2.5, "step": 217188 }, { "epoch": 41.71, "learning_rate": 0.001, "loss": 2.5003, "step": 217200 }, { "epoch": 41.71, "learning_rate": 0.001, "loss": 2.5036, "step": 217212 }, { "epoch": 41.71, "learning_rate": 0.001, "loss": 2.4982, "step": 217224 }, { "epoch": 41.71, "learning_rate": 0.001, "loss": 2.4861, "step": 217236 }, { "epoch": 41.71, "learning_rate": 0.001, "loss": 2.4874, "step": 217248 }, { "epoch": 41.72, "learning_rate": 0.001, "loss": 2.4999, "step": 217260 }, { "epoch": 41.72, "learning_rate": 0.001, "loss": 2.4939, "step": 217272 }, { "epoch": 41.72, "learning_rate": 0.001, "loss": 2.4943, "step": 217284 }, { "epoch": 41.72, "learning_rate": 0.001, "loss": 2.5002, "step": 217296 }, { "epoch": 41.73, "learning_rate": 0.001, "loss": 2.4919, "step": 217308 }, { "epoch": 41.73, "learning_rate": 0.001, "loss": 2.4904, "step": 217320 }, { "epoch": 41.73, "learning_rate": 0.001, "loss": 2.4987, "step": 217332 }, { "epoch": 41.73, "learning_rate": 0.001, "loss": 2.5019, "step": 217344 }, { "epoch": 41.74, "learning_rate": 0.001, "loss": 2.4954, "step": 217356 }, { "epoch": 41.74, "learning_rate": 0.001, "loss": 2.5048, "step": 217368 }, { "epoch": 41.74, "learning_rate": 0.001, "loss": 2.5052, "step": 217380 }, { "epoch": 41.74, "learning_rate": 0.001, "loss": 2.4931, "step": 217392 }, { "epoch": 41.74, "learning_rate": 0.001, "loss": 2.4942, "step": 217404 }, { "epoch": 41.75, "learning_rate": 0.001, "loss": 2.4974, "step": 217416 }, { "epoch": 41.75, "learning_rate": 0.001, "loss": 2.4979, "step": 217428 }, { "epoch": 41.75, "learning_rate": 0.001, "loss": 2.4901, "step": 217440 }, { "epoch": 41.75, "learning_rate": 0.001, "loss": 2.4973, "step": 217452 }, { "epoch": 41.76, "learning_rate": 0.001, "loss": 2.4932, "step": 217464 }, { "epoch": 41.76, "learning_rate": 0.001, "loss": 2.4917, "step": 217476 }, { "epoch": 41.76, "learning_rate": 0.001, "loss": 2.4906, "step": 217488 }, { "epoch": 41.76, "learning_rate": 0.001, "loss": 2.4964, "step": 217500 }, { "epoch": 41.76, "eval_ag_news_accuracy": 0.32859375, "eval_ag_news_bleu_score": 4.995583157547209, "eval_ag_news_bleu_score_sem": 0.1596756936655915, "eval_ag_news_emb_cos_sim": 0.8249098062515259, "eval_ag_news_emb_cos_sim_sem": 0.006520750325907478, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4698293209075928, "eval_ag_news_n_ngrams_match_1": 14.552, "eval_ag_news_n_ngrams_match_2": 3.266, "eval_ag_news_n_ngrams_match_3": 0.896, "eval_ag_news_num_pred_words": 47.264, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.13125784278652, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3614383583803509, "eval_ag_news_runtime": 16.9613, "eval_ag_news_samples_per_second": 29.479, "eval_ag_news_steps_per_second": 0.059, "eval_ag_news_token_set_f1": 0.3588871070810427, "eval_ag_news_token_set_f1_sem": 0.004512316867045151, "eval_ag_news_token_set_precision": 0.3472791618544491, "eval_ag_news_token_set_recall": 0.38478773813664385, "eval_ag_news_true_num_tokens": 56.09375, "step": 217500 }, { "epoch": 41.76, "eval_anthropic_toxic_prompts_accuracy": 0.11625, "eval_anthropic_toxic_prompts_bleu_score": 3.1188430433525545, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11125796945082382, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6867282390594482, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009132653089855839, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1896812915802, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.292, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.976, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.71, "eval_anthropic_toxic_prompts_num_pred_words": 47.836, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.280687750182906, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21645242415468985, "eval_anthropic_toxic_prompts_runtime": 11.2998, "eval_anthropic_toxic_prompts_samples_per_second": 44.249, "eval_anthropic_toxic_prompts_steps_per_second": 0.088, "eval_anthropic_toxic_prompts_token_set_f1": 0.3606826922836969, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006406130219475097, "eval_anthropic_toxic_prompts_token_set_precision": 0.44734410281782194, "eval_anthropic_toxic_prompts_token_set_recall": 0.32859232847931197, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 217500 }, { "epoch": 41.76, "eval_arxiv_accuracy": 0.35484375, "eval_arxiv_bleu_score": 4.349381927634081, "eval_arxiv_bleu_score_sem": 0.12261669041775945, "eval_arxiv_emb_cos_sim": 0.7795267105102539, "eval_arxiv_emb_cos_sim_sem": 0.006579561643192874, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3339083194732666, "eval_arxiv_n_ngrams_match_1": 15.524, "eval_arxiv_n_ngrams_match_2": 2.992, "eval_arxiv_n_ngrams_match_3": 0.63, "eval_arxiv_num_pred_words": 41.052, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.047747324963133, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3708006821196053, "eval_arxiv_runtime": 11.5819, "eval_arxiv_samples_per_second": 43.171, "eval_arxiv_steps_per_second": 0.086, "eval_arxiv_token_set_f1": 0.36223340003084625, "eval_arxiv_token_set_f1_sem": 0.004068027199631216, "eval_arxiv_token_set_precision": 0.31482838317608736, "eval_arxiv_token_set_recall": 0.4432528440056383, "eval_arxiv_true_num_tokens": 64.0, "step": 217500 }, { "epoch": 41.76, "eval_python_code_alpaca_accuracy": 0.16503125, "eval_python_code_alpaca_bleu_score": 4.934751460771646, "eval_python_code_alpaca_bleu_score_sem": 0.1560997454553954, "eval_python_code_alpaca_emb_cos_sim": 0.7645458579063416, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007737975677048893, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.819257974624634, "eval_python_code_alpaca_n_ngrams_match_1": 10.146, "eval_python_code_alpaca_n_ngrams_match_2": 3.216, "eval_python_code_alpaca_n_ngrams_match_3": 1.152, "eval_python_code_alpaca_num_pred_words": 44.776, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.76440644076113, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3379222997135297, "eval_python_code_alpaca_runtime": 11.3243, "eval_python_code_alpaca_samples_per_second": 44.153, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.4850597834122426, "eval_python_code_alpaca_token_set_f1_sem": 0.005418792979596099, "eval_python_code_alpaca_token_set_precision": 0.5567010566560711, "eval_python_code_alpaca_token_set_recall": 0.4485105211120921, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 217500 }, { "epoch": 41.76, "eval_wikibio_accuracy": 0.32784375, "eval_wikibio_bleu_score": 6.1123385861585655, "eval_wikibio_bleu_score_sem": 0.22337905477189268, "eval_wikibio_emb_cos_sim": 0.7536593675613403, "eval_wikibio_emb_cos_sim_sem": 0.009892973017369255, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6195919513702393, "eval_wikibio_n_ngrams_match_1": 10.164, "eval_wikibio_n_ngrams_match_2": 3.456, "eval_wikibio_n_ngrams_match_3": 1.268, "eval_wikibio_num_pred_words": 36.198, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.322335386663816, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36250165984266763, "eval_wikibio_runtime": 11.5349, "eval_wikibio_samples_per_second": 43.347, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.3232677307099517, "eval_wikibio_token_set_f1_sem": 0.005515271360753612, "eval_wikibio_token_set_precision": 0.3308972985618345, "eval_wikibio_token_set_recall": 0.3323116846259824, "eval_wikibio_true_num_tokens": 61.1328125, "step": 217500 }, { "epoch": 41.76, "eval_nq_accuracy": 0.5384375, "eval_nq_bleu_score": 12.172826344679459, "eval_nq_bleu_score_sem": 0.49596154815355714, "eval_nq_emb_cos_sim": 0.8395051956176758, "eval_nq_emb_cos_sim_sem": 0.007189846033997096, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.113952159881592, "eval_nq_n_ngrams_match_1": 23.578, "eval_nq_n_ngrams_match_2": 8.722, "eval_nq_n_ngrams_match_3": 4.05, "eval_nq_num_pred_words": 49.268, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.280904154853133, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4568493131470111, "eval_nq_runtime": 11.7851, "eval_nq_samples_per_second": 42.426, "eval_nq_steps_per_second": 0.085, "eval_nq_token_set_f1": 0.46812867341981784, "eval_nq_token_set_f1_sem": 0.004992514109594052, "eval_nq_token_set_precision": 0.42765309099848514, "eval_nq_token_set_recall": 0.5251882959590899, "eval_nq_true_num_tokens": 64.0, "step": 217500 }, { "epoch": 41.76, "learning_rate": 0.001, "loss": 2.4909, "step": 217512 }, { "epoch": 41.77, "learning_rate": 0.001, "loss": 2.5007, "step": 217524 }, { "epoch": 41.77, "learning_rate": 0.001, "loss": 2.4953, "step": 217536 }, { "epoch": 41.77, "learning_rate": 0.001, "loss": 2.4964, "step": 217548 }, { "epoch": 41.77, "learning_rate": 0.001, "loss": 2.505, "step": 217560 }, { "epoch": 41.78, "learning_rate": 0.001, "loss": 2.4982, "step": 217572 }, { "epoch": 41.78, "learning_rate": 0.001, "loss": 2.4969, "step": 217584 }, { "epoch": 41.78, "learning_rate": 0.001, "loss": 2.4883, "step": 217596 }, { "epoch": 41.78, "learning_rate": 0.001, "loss": 2.4952, "step": 217608 }, { "epoch": 41.79, "learning_rate": 0.001, "loss": 2.4916, "step": 217620 }, { "epoch": 41.79, "learning_rate": 0.001, "loss": 2.4938, "step": 217632 }, { "epoch": 41.79, "learning_rate": 0.001, "loss": 2.503, "step": 217644 }, { "epoch": 41.79, "learning_rate": 0.001, "loss": 2.4935, "step": 217656 }, { "epoch": 41.79, "learning_rate": 0.001, "loss": 2.4977, "step": 217668 }, { "epoch": 41.8, "learning_rate": 0.001, "loss": 2.5007, "step": 217680 }, { "epoch": 41.8, "learning_rate": 0.001, "loss": 2.494, "step": 217692 }, { "epoch": 41.8, "learning_rate": 0.001, "loss": 2.5008, "step": 217704 }, { "epoch": 41.8, "learning_rate": 0.001, "loss": 2.5012, "step": 217716 }, { "epoch": 41.81, "learning_rate": 0.001, "loss": 2.4973, "step": 217728 }, { "epoch": 41.81, "learning_rate": 0.001, "loss": 2.4982, "step": 217740 }, { "epoch": 41.81, "learning_rate": 0.001, "loss": 2.4948, "step": 217752 }, { "epoch": 41.81, "learning_rate": 0.001, "loss": 2.4992, "step": 217764 }, { "epoch": 41.82, "learning_rate": 0.001, "loss": 2.4899, "step": 217776 }, { "epoch": 41.82, "learning_rate": 0.001, "loss": 2.5042, "step": 217788 }, { "epoch": 41.82, "learning_rate": 0.001, "loss": 2.4941, "step": 217800 }, { "epoch": 41.82, "learning_rate": 0.001, "loss": 2.4998, "step": 217812 }, { "epoch": 41.82, "learning_rate": 0.001, "loss": 2.4964, "step": 217824 }, { "epoch": 41.83, "learning_rate": 0.001, "loss": 2.4965, "step": 217836 }, { "epoch": 41.83, "learning_rate": 0.001, "loss": 2.5, "step": 217848 }, { "epoch": 41.83, "learning_rate": 0.001, "loss": 2.491, "step": 217860 }, { "epoch": 41.83, "learning_rate": 0.001, "loss": 2.5027, "step": 217872 }, { "epoch": 41.84, "learning_rate": 0.001, "loss": 2.491, "step": 217884 }, { "epoch": 41.84, "learning_rate": 0.001, "loss": 2.4938, "step": 217896 }, { "epoch": 41.84, "learning_rate": 0.001, "loss": 2.5026, "step": 217908 }, { "epoch": 41.84, "learning_rate": 0.001, "loss": 2.4898, "step": 217920 }, { "epoch": 41.85, "learning_rate": 0.001, "loss": 2.4919, "step": 217932 }, { "epoch": 41.85, "learning_rate": 0.001, "loss": 2.4905, "step": 217944 }, { "epoch": 41.85, "learning_rate": 0.001, "loss": 2.4972, "step": 217956 }, { "epoch": 41.85, "learning_rate": 0.001, "loss": 2.491, "step": 217968 }, { "epoch": 41.85, "learning_rate": 0.001, "loss": 2.5036, "step": 217980 }, { "epoch": 41.86, "learning_rate": 0.001, "loss": 2.4929, "step": 217992 }, { "epoch": 41.86, "learning_rate": 0.001, "loss": 2.4933, "step": 218004 }, { "epoch": 41.86, "learning_rate": 0.001, "loss": 2.4928, "step": 218016 }, { "epoch": 41.86, "learning_rate": 0.001, "loss": 2.5059, "step": 218028 }, { "epoch": 41.87, "learning_rate": 0.001, "loss": 2.4944, "step": 218040 }, { "epoch": 41.87, "learning_rate": 0.001, "loss": 2.4975, "step": 218052 }, { "epoch": 41.87, "learning_rate": 0.001, "loss": 2.4972, "step": 218064 }, { "epoch": 41.87, "learning_rate": 0.001, "loss": 2.4938, "step": 218076 }, { "epoch": 41.88, "learning_rate": 0.001, "loss": 2.498, "step": 218088 }, { "epoch": 41.88, "learning_rate": 0.001, "loss": 2.4986, "step": 218100 }, { "epoch": 41.88, "learning_rate": 0.001, "loss": 2.5018, "step": 218112 }, { "epoch": 41.88, "learning_rate": 0.001, "loss": 2.4975, "step": 218124 }, { "epoch": 41.88, "eval_ag_news_accuracy": 0.3290625, "eval_ag_news_bleu_score": 5.09083726930891, "eval_ag_news_bleu_score_sem": 0.15831070234832995, "eval_ag_news_emb_cos_sim": 0.8283110857009888, "eval_ag_news_emb_cos_sim_sem": 0.005814721466176664, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4672694206237793, "eval_ag_news_n_ngrams_match_1": 14.598, "eval_ag_news_n_ngrams_match_2": 3.33, "eval_ag_news_n_ngrams_match_3": 0.968, "eval_ag_news_num_pred_words": 46.674, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.049110216441534, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3625376925528374, "eval_ag_news_runtime": 12.3344, "eval_ag_news_samples_per_second": 40.537, "eval_ag_news_steps_per_second": 0.081, "eval_ag_news_token_set_f1": 0.3626152811554046, "eval_ag_news_token_set_f1_sem": 0.004335993821420875, "eval_ag_news_token_set_precision": 0.34905871442261815, "eval_ag_news_token_set_recall": 0.38912115175818046, "eval_ag_news_true_num_tokens": 56.09375, "step": 218125 }, { "epoch": 41.88, "eval_anthropic_toxic_prompts_accuracy": 0.1161875, "eval_anthropic_toxic_prompts_bleu_score": 3.249123508989452, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12067826666755373, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6899729371070862, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00905122274435476, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1833913326263428, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.418, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764, "eval_anthropic_toxic_prompts_num_pred_words": 47.594, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.128442530899807, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2208748617019638, "eval_anthropic_toxic_prompts_runtime": 11.1337, "eval_anthropic_toxic_prompts_samples_per_second": 44.909, "eval_anthropic_toxic_prompts_steps_per_second": 0.09, "eval_anthropic_toxic_prompts_token_set_f1": 0.3657788793315223, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006582693540461152, "eval_anthropic_toxic_prompts_token_set_precision": 0.4565585661968616, "eval_anthropic_toxic_prompts_token_set_recall": 0.33091982162053835, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 218125 }, { "epoch": 41.88, "eval_arxiv_accuracy": 0.353375, "eval_arxiv_bleu_score": 4.370257328071202, "eval_arxiv_bleu_score_sem": 0.1334749586855463, "eval_arxiv_emb_cos_sim": 0.7769792079925537, "eval_arxiv_emb_cos_sim_sem": 0.007613431739699201, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.322906732559204, "eval_arxiv_n_ngrams_match_1": 15.376, "eval_arxiv_n_ngrams_match_2": 3.012, "eval_arxiv_n_ngrams_match_3": 0.686, "eval_arxiv_num_pred_words": 40.202, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.740868765839455, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36781700390140043, "eval_arxiv_runtime": 13.6106, "eval_arxiv_samples_per_second": 36.736, "eval_arxiv_steps_per_second": 0.073, "eval_arxiv_token_set_f1": 0.36137802291831084, "eval_arxiv_token_set_f1_sem": 0.0043563007602366866, "eval_arxiv_token_set_precision": 0.31262204657687714, "eval_arxiv_token_set_recall": 0.4488679895227799, "eval_arxiv_true_num_tokens": 64.0, "step": 218125 }, { "epoch": 41.88, "eval_python_code_alpaca_accuracy": 0.16365625, "eval_python_code_alpaca_bleu_score": 4.749160743792896, "eval_python_code_alpaca_bleu_score_sem": 0.14927533323434977, "eval_python_code_alpaca_emb_cos_sim": 0.7650162577629089, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007978047418889484, "eval_python_code_alpaca_emb_top1_equal": 0.1015625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.026804565886848545, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8195087909698486, "eval_python_code_alpaca_n_ngrams_match_1": 9.836, "eval_python_code_alpaca_n_ngrams_match_2": 2.984, "eval_python_code_alpaca_n_ngrams_match_3": 0.996, "eval_python_code_alpaca_num_pred_words": 42.47, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.76861175527306, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3406014475336757, "eval_python_code_alpaca_runtime": 16.6532, "eval_python_code_alpaca_samples_per_second": 30.024, "eval_python_code_alpaca_steps_per_second": 0.06, "eval_python_code_alpaca_token_set_f1": 0.48095663814900064, "eval_python_code_alpaca_token_set_f1_sem": 0.005580266254246061, "eval_python_code_alpaca_token_set_precision": 0.5385783192099336, "eval_python_code_alpaca_token_set_recall": 0.45711145748608706, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 218125 }, { "epoch": 41.88, "eval_wikibio_accuracy": 0.32821875, "eval_wikibio_bleu_score": 6.353924405723115, "eval_wikibio_bleu_score_sem": 0.22379463979548128, "eval_wikibio_emb_cos_sim": 0.7559791803359985, "eval_wikibio_emb_cos_sim_sem": 0.009643657944814617, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.631709098815918, "eval_wikibio_n_ngrams_match_1": 10.236, "eval_wikibio_n_ngrams_match_2": 3.512, "eval_wikibio_n_ngrams_match_3": 1.356, "eval_wikibio_num_pred_words": 35.81, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.77732665872199, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36379702726230145, "eval_wikibio_runtime": 12.6298, "eval_wikibio_samples_per_second": 39.589, "eval_wikibio_steps_per_second": 0.079, "eval_wikibio_token_set_f1": 0.3260040864467825, "eval_wikibio_token_set_f1_sem": 0.005482797619439678, "eval_wikibio_token_set_precision": 0.334243061565777, "eval_wikibio_token_set_recall": 0.33423554439435343, "eval_wikibio_true_num_tokens": 61.1328125, "step": 218125 }, { "epoch": 41.88, "eval_nq_accuracy": 0.5383125, "eval_nq_bleu_score": 12.320078189725463, "eval_nq_bleu_score_sem": 0.5029188997077199, "eval_nq_emb_cos_sim": 0.8397891521453857, "eval_nq_emb_cos_sim_sem": 0.007033134173065972, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.116191864013672, "eval_nq_n_ngrams_match_1": 23.524, "eval_nq_n_ngrams_match_2": 8.742, "eval_nq_n_ngrams_match_3": 4.15, "eval_nq_num_pred_words": 49.086, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.29947171526534, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45623088664809053, "eval_nq_runtime": 12.9591, "eval_nq_samples_per_second": 38.583, "eval_nq_steps_per_second": 0.077, "eval_nq_token_set_f1": 0.4718029099495432, "eval_nq_token_set_f1_sem": 0.004986500594134429, "eval_nq_token_set_precision": 0.4298493940720233, "eval_nq_token_set_recall": 0.5313249893367641, "eval_nq_true_num_tokens": 64.0, "step": 218125 }, { "epoch": 41.88, "learning_rate": 0.001, "loss": 2.495, "step": 218136 }, { "epoch": 41.89, "learning_rate": 0.001, "loss": 2.4974, "step": 218148 }, { "epoch": 41.89, "learning_rate": 0.001, "loss": 2.5099, "step": 218160 }, { "epoch": 41.89, "learning_rate": 0.001, "loss": 2.4894, "step": 218172 }, { "epoch": 41.89, "learning_rate": 0.001, "loss": 2.5059, "step": 218184 }, { "epoch": 41.9, "learning_rate": 0.001, "loss": 2.4936, "step": 218196 }, { "epoch": 41.9, "learning_rate": 0.001, "loss": 2.5019, "step": 218208 }, { "epoch": 41.9, "learning_rate": 0.001, "loss": 2.5011, "step": 218220 }, { "epoch": 41.9, "learning_rate": 0.001, "loss": 2.4985, "step": 218232 }, { "epoch": 41.91, "learning_rate": 0.001, "loss": 2.503, "step": 218244 }, { "epoch": 41.91, "learning_rate": 0.001, "loss": 2.4943, "step": 218256 }, { "epoch": 41.91, "learning_rate": 0.001, "loss": 2.4942, "step": 218268 }, { "epoch": 41.91, "learning_rate": 0.001, "loss": 2.4905, "step": 218280 }, { "epoch": 41.91, "learning_rate": 0.001, "loss": 2.5003, "step": 218292 }, { "epoch": 41.92, "learning_rate": 0.001, "loss": 2.4985, "step": 218304 }, { "epoch": 41.92, "learning_rate": 0.001, "loss": 2.5054, "step": 218316 }, { "epoch": 41.92, "learning_rate": 0.001, "loss": 2.5015, "step": 218328 }, { "epoch": 41.92, "learning_rate": 0.001, "loss": 2.4952, "step": 218340 }, { "epoch": 41.93, "learning_rate": 0.001, "loss": 2.4981, "step": 218352 }, { "epoch": 41.93, "learning_rate": 0.001, "loss": 2.5031, "step": 218364 }, { "epoch": 41.93, "learning_rate": 0.001, "loss": 2.4884, "step": 218376 }, { "epoch": 41.93, "learning_rate": 0.001, "loss": 2.4912, "step": 218388 }, { "epoch": 41.94, "learning_rate": 0.001, "loss": 2.4987, "step": 218400 }, { "epoch": 41.94, "learning_rate": 0.001, "loss": 2.4995, "step": 218412 }, { "epoch": 41.94, "learning_rate": 0.001, "loss": 2.4977, "step": 218424 }, { "epoch": 41.94, "learning_rate": 0.001, "loss": 2.5057, "step": 218436 }, { "epoch": 41.94, "learning_rate": 0.001, "loss": 2.5041, "step": 218448 }, { "epoch": 41.95, "learning_rate": 0.001, "loss": 2.504, "step": 218460 }, { "epoch": 41.95, "learning_rate": 0.001, "loss": 2.4908, "step": 218472 }, { "epoch": 41.95, "learning_rate": 0.001, "loss": 2.5023, "step": 218484 }, { "epoch": 41.95, "learning_rate": 0.001, "loss": 2.4975, "step": 218496 }, { "epoch": 41.96, "learning_rate": 0.001, "loss": 2.5001, "step": 218508 }, { "epoch": 41.96, "learning_rate": 0.001, "loss": 2.487, "step": 218520 }, { "epoch": 41.96, "learning_rate": 0.001, "loss": 2.4783, "step": 218532 }, { "epoch": 41.96, "learning_rate": 0.001, "loss": 2.4949, "step": 218544 }, { "epoch": 41.97, "learning_rate": 0.001, "loss": 2.4913, "step": 218556 }, { "epoch": 41.97, "learning_rate": 0.001, "loss": 2.5015, "step": 218568 }, { "epoch": 41.97, "learning_rate": 0.001, "loss": 2.4977, "step": 218580 }, { "epoch": 41.97, "learning_rate": 0.001, "loss": 2.499, "step": 218592 }, { "epoch": 41.97, "learning_rate": 0.001, "loss": 2.4929, "step": 218604 }, { "epoch": 41.98, "learning_rate": 0.001, "loss": 2.4905, "step": 218616 }, { "epoch": 41.98, "learning_rate": 0.001, "loss": 2.4911, "step": 218628 }, { "epoch": 41.98, "learning_rate": 0.001, "loss": 2.5036, "step": 218640 }, { "epoch": 41.98, "learning_rate": 0.001, "loss": 2.5041, "step": 218652 }, { "epoch": 41.99, "learning_rate": 0.001, "loss": 2.4994, "step": 218664 }, { "epoch": 41.99, "learning_rate": 0.001, "loss": 2.5013, "step": 218676 }, { "epoch": 41.99, "learning_rate": 0.001, "loss": 2.4987, "step": 218688 }, { "epoch": 41.99, "learning_rate": 0.001, "loss": 2.5003, "step": 218700 }, { "epoch": 42.0, "learning_rate": 0.001, "loss": 2.4922, "step": 218712 }, { "epoch": 42.0, "learning_rate": 0.001, "loss": 2.5044, "step": 218724 }, { "epoch": 42.0, "learning_rate": 0.001, "loss": 2.4992, "step": 218736 }, { "epoch": 42.0, "learning_rate": 0.001, "loss": 2.4789, "step": 218748 }, { "epoch": 42.0, "eval_ag_news_accuracy": 0.32928125, "eval_ag_news_bleu_score": 4.891300621489941, "eval_ag_news_bleu_score_sem": 0.1568683167658272, "eval_ag_news_emb_cos_sim": 0.8244635462760925, "eval_ag_news_emb_cos_sim_sem": 0.0069586988873286855, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4658851623535156, "eval_ag_news_n_ngrams_match_1": 14.512, "eval_ag_news_n_ngrams_match_2": 3.192, "eval_ag_news_n_ngrams_match_3": 0.886, "eval_ag_news_num_pred_words": 46.956, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.00477666219361, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3598947344764221, "eval_ag_news_runtime": 17.1695, "eval_ag_news_samples_per_second": 29.121, "eval_ag_news_steps_per_second": 0.058, "eval_ag_news_token_set_f1": 0.3594154402224956, "eval_ag_news_token_set_f1_sem": 0.004417977714066989, "eval_ag_news_token_set_precision": 0.34828672300609675, "eval_ag_news_token_set_recall": 0.3852051015499847, "eval_ag_news_true_num_tokens": 56.09375, "step": 218750 }, { "epoch": 42.0, "eval_anthropic_toxic_prompts_accuracy": 0.1168125, "eval_anthropic_toxic_prompts_bleu_score": 3.1829855031755563, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12178573539836313, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6733123660087585, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009087960308277084, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.188302755355835, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.112, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.934, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.746, "eval_anthropic_toxic_prompts_num_pred_words": 46.092, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.247239003018652, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21379264199978543, "eval_anthropic_toxic_prompts_runtime": 12.2803, "eval_anthropic_toxic_prompts_samples_per_second": 40.716, "eval_anthropic_toxic_prompts_steps_per_second": 0.081, "eval_anthropic_toxic_prompts_token_set_f1": 0.35017008278401085, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006562992112271465, "eval_anthropic_toxic_prompts_token_set_precision": 0.4270487698133057, "eval_anthropic_toxic_prompts_token_set_recall": 0.3230413062917486, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 218750 }, { "epoch": 42.0, "eval_arxiv_accuracy": 0.35415625, "eval_arxiv_bleu_score": 4.431849600988413, "eval_arxiv_bleu_score_sem": 0.13175663319751654, "eval_arxiv_emb_cos_sim": 0.7872753143310547, "eval_arxiv_emb_cos_sim_sem": 0.006389407575057372, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.329688787460327, "eval_arxiv_n_ngrams_match_1": 15.66, "eval_arxiv_n_ngrams_match_2": 3.054, "eval_arxiv_n_ngrams_match_3": 0.67, "eval_arxiv_num_pred_words": 41.062, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.929648293779977, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37350832353546937, "eval_arxiv_runtime": 12.5477, "eval_arxiv_samples_per_second": 39.848, "eval_arxiv_steps_per_second": 0.08, "eval_arxiv_token_set_f1": 0.3669420986340655, "eval_arxiv_token_set_f1_sem": 0.0041141701218152405, "eval_arxiv_token_set_precision": 0.32124725225562717, "eval_arxiv_token_set_recall": 0.44572007610761427, "eval_arxiv_true_num_tokens": 64.0, "step": 218750 }, { "epoch": 42.0, "eval_python_code_alpaca_accuracy": 0.16371875, "eval_python_code_alpaca_bleu_score": 4.875315255716432, "eval_python_code_alpaca_bleu_score_sem": 0.16528973634850896, "eval_python_code_alpaca_emb_cos_sim": 0.7545474767684937, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008491577133491869, "eval_python_code_alpaca_emb_top1_equal": 0.2109375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03620184850179216, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.826129674911499, "eval_python_code_alpaca_n_ngrams_match_1": 9.728, "eval_python_code_alpaca_n_ngrams_match_2": 2.974, "eval_python_code_alpaca_n_ngrams_match_3": 1.01, "eval_python_code_alpaca_num_pred_words": 41.06, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.880003135353743, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34356245234797383, "eval_python_code_alpaca_runtime": 12.6955, "eval_python_code_alpaca_samples_per_second": 39.384, "eval_python_code_alpaca_steps_per_second": 0.079, "eval_python_code_alpaca_token_set_f1": 0.4770808737470414, "eval_python_code_alpaca_token_set_f1_sem": 0.005691738362953203, "eval_python_code_alpaca_token_set_precision": 0.5291690114545474, "eval_python_code_alpaca_token_set_recall": 0.46001131629935305, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 218750 }, { "epoch": 42.0, "eval_wikibio_accuracy": 0.33125, "eval_wikibio_bleu_score": 6.2129605813949595, "eval_wikibio_bleu_score_sem": 0.22906099806481456, "eval_wikibio_emb_cos_sim": 0.7505396604537964, "eval_wikibio_emb_cos_sim_sem": 0.009477432749921176, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5982353687286377, "eval_wikibio_n_ngrams_match_1": 10.306, "eval_wikibio_n_ngrams_match_2": 3.488, "eval_wikibio_n_ngrams_match_3": 1.31, "eval_wikibio_num_pred_words": 35.792, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.533709003250955, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3629200904000062, "eval_wikibio_runtime": 12.2875, "eval_wikibio_samples_per_second": 40.692, "eval_wikibio_steps_per_second": 0.081, "eval_wikibio_token_set_f1": 0.32620654746320593, "eval_wikibio_token_set_f1_sem": 0.005560672309609381, "eval_wikibio_token_set_precision": 0.33631133800354357, "eval_wikibio_token_set_recall": 0.3338787244091316, "eval_wikibio_true_num_tokens": 61.1328125, "step": 218750 }, { "epoch": 42.0, "eval_nq_accuracy": 0.538, "eval_nq_bleu_score": 12.353996683991667, "eval_nq_bleu_score_sem": 0.49252432605877994, "eval_nq_emb_cos_sim": 0.8390676975250244, "eval_nq_emb_cos_sim_sem": 0.007353442977759179, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1182310581207275, "eval_nq_n_ngrams_match_1": 23.764, "eval_nq_n_ngrams_match_2": 8.856, "eval_nq_n_ngrams_match_3": 4.15, "eval_nq_num_pred_words": 49.232, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.316413216713073, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4603732456239009, "eval_nq_runtime": 12.8752, "eval_nq_samples_per_second": 38.834, "eval_nq_steps_per_second": 0.078, "eval_nq_token_set_f1": 0.4729860581120128, "eval_nq_token_set_f1_sem": 0.0049555346143281475, "eval_nq_token_set_precision": 0.43302175884646615, "eval_nq_token_set_recall": 0.5286954614644978, "eval_nq_true_num_tokens": 64.0, "step": 218750 }, { "epoch": 42.0, "learning_rate": 0.001, "loss": 2.4787, "step": 218760 }, { "epoch": 42.01, "learning_rate": 0.001, "loss": 2.4757, "step": 218772 }, { "epoch": 42.01, "learning_rate": 0.001, "loss": 2.4868, "step": 218784 }, { "epoch": 42.01, "learning_rate": 0.001, "loss": 2.4872, "step": 218796 }, { "epoch": 42.01, "learning_rate": 0.001, "loss": 2.4886, "step": 218808 }, { "epoch": 42.02, "learning_rate": 0.001, "loss": 2.4827, "step": 218820 }, { "epoch": 42.02, "learning_rate": 0.001, "loss": 2.4819, "step": 218832 }, { "epoch": 42.02, "learning_rate": 0.001, "loss": 2.4843, "step": 218844 }, { "epoch": 42.02, "learning_rate": 0.001, "loss": 2.4746, "step": 218856 }, { "epoch": 42.03, "learning_rate": 0.001, "loss": 2.4775, "step": 218868 }, { "epoch": 42.03, "learning_rate": 0.001, "loss": 2.4868, "step": 218880 }, { "epoch": 42.03, "learning_rate": 0.001, "loss": 2.487, "step": 218892 }, { "epoch": 42.03, "learning_rate": 0.001, "loss": 2.4888, "step": 218904 }, { "epoch": 42.03, "learning_rate": 0.001, "loss": 2.4864, "step": 218916 }, { "epoch": 42.04, "learning_rate": 0.001, "loss": 2.4878, "step": 218928 }, { "epoch": 42.04, "learning_rate": 0.001, "loss": 2.4854, "step": 218940 }, { "epoch": 42.04, "learning_rate": 0.001, "loss": 2.4797, "step": 218952 }, { "epoch": 42.04, "learning_rate": 0.001, "loss": 2.4799, "step": 218964 }, { "epoch": 42.05, "learning_rate": 0.001, "loss": 2.4896, "step": 218976 }, { "epoch": 42.05, "learning_rate": 0.001, "loss": 2.4841, "step": 218988 }, { "epoch": 42.05, "learning_rate": 0.001, "loss": 2.4751, "step": 219000 }, { "epoch": 42.05, "learning_rate": 0.001, "loss": 2.4909, "step": 219012 }, { "epoch": 42.06, "learning_rate": 0.001, "loss": 2.4897, "step": 219024 }, { "epoch": 42.06, "learning_rate": 0.001, "loss": 2.4802, "step": 219036 }, { "epoch": 42.06, "learning_rate": 0.001, "loss": 2.4865, "step": 219048 }, { "epoch": 42.06, "learning_rate": 0.001, "loss": 2.4826, "step": 219060 }, { "epoch": 42.06, "learning_rate": 0.001, "loss": 2.4834, "step": 219072 }, { "epoch": 42.07, "learning_rate": 0.001, "loss": 2.4766, "step": 219084 }, { "epoch": 42.07, "learning_rate": 0.001, "loss": 2.4736, "step": 219096 }, { "epoch": 42.07, "learning_rate": 0.001, "loss": 2.4831, "step": 219108 }, { "epoch": 42.07, "learning_rate": 0.001, "loss": 2.478, "step": 219120 }, { "epoch": 42.08, "learning_rate": 0.001, "loss": 2.4764, "step": 219132 }, { "epoch": 42.08, "learning_rate": 0.001, "loss": 2.4736, "step": 219144 }, { "epoch": 42.08, "learning_rate": 0.001, "loss": 2.4886, "step": 219156 }, { "epoch": 42.08, "learning_rate": 0.001, "loss": 2.4852, "step": 219168 }, { "epoch": 42.09, "learning_rate": 0.001, "loss": 2.4773, "step": 219180 }, { "epoch": 42.09, "learning_rate": 0.001, "loss": 2.492, "step": 219192 }, { "epoch": 42.09, "learning_rate": 0.001, "loss": 2.4844, "step": 219204 }, { "epoch": 42.09, "learning_rate": 0.001, "loss": 2.4845, "step": 219216 }, { "epoch": 42.09, "learning_rate": 0.001, "loss": 2.4776, "step": 219228 }, { "epoch": 42.1, "learning_rate": 0.001, "loss": 2.4837, "step": 219240 }, { "epoch": 42.1, "learning_rate": 0.001, "loss": 2.4804, "step": 219252 }, { "epoch": 42.1, "learning_rate": 0.001, "loss": 2.4896, "step": 219264 }, { "epoch": 42.1, "learning_rate": 0.001, "loss": 2.4778, "step": 219276 }, { "epoch": 42.11, "learning_rate": 0.001, "loss": 2.4787, "step": 219288 }, { "epoch": 42.11, "learning_rate": 0.001, "loss": 2.4822, "step": 219300 }, { "epoch": 42.11, "learning_rate": 0.001, "loss": 2.4924, "step": 219312 }, { "epoch": 42.11, "learning_rate": 0.001, "loss": 2.4941, "step": 219324 }, { "epoch": 42.12, "learning_rate": 0.001, "loss": 2.4764, "step": 219336 }, { "epoch": 42.12, "learning_rate": 0.001, "loss": 2.4773, "step": 219348 }, { "epoch": 42.12, "learning_rate": 0.001, "loss": 2.4799, "step": 219360 }, { "epoch": 42.12, "learning_rate": 0.001, "loss": 2.485, "step": 219372 }, { "epoch": 42.12, "eval_ag_news_accuracy": 0.32878125, "eval_ag_news_bleu_score": 5.114055657840925, "eval_ag_news_bleu_score_sem": 0.16190495147979014, "eval_ag_news_emb_cos_sim": 0.8220123648643494, "eval_ag_news_emb_cos_sim_sem": 0.006110483682879323, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4679293632507324, "eval_ag_news_n_ngrams_match_1": 14.56, "eval_ag_news_n_ngrams_match_2": 3.366, "eval_ag_news_n_ngrams_match_3": 0.984, "eval_ag_news_num_pred_words": 46.9, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.07026777104748, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3620513106323513, "eval_ag_news_runtime": 13.2317, "eval_ag_news_samples_per_second": 37.788, "eval_ag_news_steps_per_second": 0.076, "eval_ag_news_token_set_f1": 0.36444018936295913, "eval_ag_news_token_set_f1_sem": 0.0044494099152123006, "eval_ag_news_token_set_precision": 0.349512383981734, "eval_ag_news_token_set_recall": 0.39478669348646933, "eval_ag_news_true_num_tokens": 56.09375, "step": 219375 }, { "epoch": 42.12, "eval_anthropic_toxic_prompts_accuracy": 0.1151875, "eval_anthropic_toxic_prompts_bleu_score": 3.2868874737624294, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12704543539871685, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6880490779876709, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009382956637716959, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.196662187576294, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782, "eval_anthropic_toxic_prompts_num_pred_words": 47.414, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.450781719471212, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21942776916217527, "eval_anthropic_toxic_prompts_runtime": 11.4068, "eval_anthropic_toxic_prompts_samples_per_second": 43.833, "eval_anthropic_toxic_prompts_steps_per_second": 0.088, "eval_anthropic_toxic_prompts_token_set_f1": 0.3635019726377188, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0064806904481003276, "eval_anthropic_toxic_prompts_token_set_precision": 0.44988551061375176, "eval_anthropic_toxic_prompts_token_set_recall": 0.32919869055265, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 219375 }, { "epoch": 42.12, "eval_arxiv_accuracy": 0.35246875, "eval_arxiv_bleu_score": 4.6956854491173035, "eval_arxiv_bleu_score_sem": 0.1368974649284805, "eval_arxiv_emb_cos_sim": 0.7871077656745911, "eval_arxiv_emb_cos_sim_sem": 0.006543580088365714, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3312876224517822, "eval_arxiv_n_ngrams_match_1": 15.858, "eval_arxiv_n_ngrams_match_2": 3.182, "eval_arxiv_n_ngrams_match_3": 0.748, "eval_arxiv_num_pred_words": 40.916, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.974338909711257, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3790483686474596, "eval_arxiv_runtime": 18.2242, "eval_arxiv_samples_per_second": 27.436, "eval_arxiv_steps_per_second": 0.055, "eval_arxiv_token_set_f1": 0.3694431494366954, "eval_arxiv_token_set_f1_sem": 0.004190429298376747, "eval_arxiv_token_set_precision": 0.32267483759534554, "eval_arxiv_token_set_recall": 0.44660387072495344, "eval_arxiv_true_num_tokens": 64.0, "step": 219375 }, { "epoch": 42.12, "eval_python_code_alpaca_accuracy": 0.16253125, "eval_python_code_alpaca_bleu_score": 4.720674963220425, "eval_python_code_alpaca_bleu_score_sem": 0.13931244265866943, "eval_python_code_alpaca_emb_cos_sim": 0.7657042741775513, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007414526932141625, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8257317543029785, "eval_python_code_alpaca_n_ngrams_match_1": 9.92, "eval_python_code_alpaca_n_ngrams_match_2": 2.93, "eval_python_code_alpaca_n_ngrams_match_3": 0.966, "eval_python_code_alpaca_num_pred_words": 42.614, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.873287570453744, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3425676640681087, "eval_python_code_alpaca_runtime": 11.8163, "eval_python_code_alpaca_samples_per_second": 42.314, "eval_python_code_alpaca_steps_per_second": 0.085, "eval_python_code_alpaca_token_set_f1": 0.48370720603233985, "eval_python_code_alpaca_token_set_f1_sem": 0.00529003908739912, "eval_python_code_alpaca_token_set_precision": 0.5418550952657617, "eval_python_code_alpaca_token_set_recall": 0.4564278057183093, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 219375 }, { "epoch": 42.12, "eval_wikibio_accuracy": 0.3291875, "eval_wikibio_bleu_score": 6.206690478962144, "eval_wikibio_bleu_score_sem": 0.21878327489493257, "eval_wikibio_emb_cos_sim": 0.7545444369316101, "eval_wikibio_emb_cos_sim_sem": 0.00849759688883934, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6319236755371094, "eval_wikibio_n_ngrams_match_1": 10.206, "eval_wikibio_n_ngrams_match_2": 3.424, "eval_wikibio_n_ngrams_match_3": 1.292, "eval_wikibio_num_pred_words": 35.738, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.78543366336793, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3640439718529963, "eval_wikibio_runtime": 11.3557, "eval_wikibio_samples_per_second": 44.031, "eval_wikibio_steps_per_second": 0.088, "eval_wikibio_token_set_f1": 0.3267677720836948, "eval_wikibio_token_set_f1_sem": 0.005445527984017967, "eval_wikibio_token_set_precision": 0.3333888306024795, "eval_wikibio_token_set_recall": 0.3353166533567367, "eval_wikibio_true_num_tokens": 61.1328125, "step": 219375 }, { "epoch": 42.12, "eval_nq_accuracy": 0.5368125, "eval_nq_bleu_score": 12.255347474851035, "eval_nq_bleu_score_sem": 0.4991846005084359, "eval_nq_emb_cos_sim": 0.8426265716552734, "eval_nq_emb_cos_sim_sem": 0.007043752544712686, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.117433547973633, "eval_nq_n_ngrams_match_1": 23.562, "eval_nq_n_ngrams_match_2": 8.758, "eval_nq_n_ngrams_match_3": 4.092, "eval_nq_num_pred_words": 49.042, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.309783436795081, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45805297250126337, "eval_nq_runtime": 11.9233, "eval_nq_samples_per_second": 41.935, "eval_nq_steps_per_second": 0.084, "eval_nq_token_set_f1": 0.47193520466452954, "eval_nq_token_set_f1_sem": 0.005136736506925819, "eval_nq_token_set_precision": 0.42909744487728674, "eval_nq_token_set_recall": 0.532314281496284, "eval_nq_true_num_tokens": 64.0, "step": 219375 }, { "epoch": 42.12, "learning_rate": 0.001, "loss": 2.4889, "step": 219384 }, { "epoch": 42.13, "learning_rate": 0.001, "loss": 2.4926, "step": 219396 }, { "epoch": 42.13, "learning_rate": 0.001, "loss": 2.4858, "step": 219408 }, { "epoch": 42.13, "learning_rate": 0.001, "loss": 2.487, "step": 219420 }, { "epoch": 42.13, "learning_rate": 0.001, "loss": 2.4821, "step": 219432 }, { "epoch": 42.14, "learning_rate": 0.001, "loss": 2.4885, "step": 219444 }, { "epoch": 42.14, "learning_rate": 0.001, "loss": 2.4848, "step": 219456 }, { "epoch": 42.14, "learning_rate": 0.001, "loss": 2.4963, "step": 219468 }, { "epoch": 42.14, "learning_rate": 0.001, "loss": 2.4846, "step": 219480 }, { "epoch": 42.15, "learning_rate": 0.001, "loss": 2.4854, "step": 219492 }, { "epoch": 42.15, "learning_rate": 0.001, "loss": 2.4906, "step": 219504 }, { "epoch": 42.15, "learning_rate": 0.001, "loss": 2.4797, "step": 219516 }, { "epoch": 42.15, "learning_rate": 0.001, "loss": 2.492, "step": 219528 }, { "epoch": 42.15, "learning_rate": 0.001, "loss": 2.4924, "step": 219540 }, { "epoch": 42.16, "learning_rate": 0.001, "loss": 2.4921, "step": 219552 }, { "epoch": 42.16, "learning_rate": 0.001, "loss": 2.4914, "step": 219564 }, { "epoch": 42.16, "learning_rate": 0.001, "loss": 2.4905, "step": 219576 }, { "epoch": 42.16, "learning_rate": 0.001, "loss": 2.4887, "step": 219588 }, { "epoch": 42.17, "learning_rate": 0.001, "loss": 2.4852, "step": 219600 }, { "epoch": 42.17, "learning_rate": 0.001, "loss": 2.4886, "step": 219612 }, { "epoch": 42.17, "learning_rate": 0.001, "loss": 2.4894, "step": 219624 }, { "epoch": 42.17, "learning_rate": 0.001, "loss": 2.479, "step": 219636 }, { "epoch": 42.18, "learning_rate": 0.001, "loss": 2.4916, "step": 219648 }, { "epoch": 42.18, "learning_rate": 0.001, "loss": 2.4848, "step": 219660 }, { "epoch": 42.18, "learning_rate": 0.001, "loss": 2.4874, "step": 219672 }, { "epoch": 42.18, "learning_rate": 0.001, "loss": 2.4853, "step": 219684 }, { "epoch": 42.18, "learning_rate": 0.001, "loss": 2.49, "step": 219696 }, { "epoch": 42.19, "learning_rate": 0.001, "loss": 2.4933, "step": 219708 }, { "epoch": 42.19, "learning_rate": 0.001, "loss": 2.4869, "step": 219720 }, { "epoch": 42.19, "learning_rate": 0.001, "loss": 2.5017, "step": 219732 }, { "epoch": 42.19, "learning_rate": 0.001, "loss": 2.4857, "step": 219744 }, { "epoch": 42.2, "learning_rate": 0.001, "loss": 2.4916, "step": 219756 }, { "epoch": 42.2, "learning_rate": 0.001, "loss": 2.4807, "step": 219768 }, { "epoch": 42.2, "learning_rate": 0.001, "loss": 2.4923, "step": 219780 }, { "epoch": 42.2, "learning_rate": 0.001, "loss": 2.4864, "step": 219792 }, { "epoch": 42.21, "learning_rate": 0.001, "loss": 2.4824, "step": 219804 }, { "epoch": 42.21, "learning_rate": 0.001, "loss": 2.4801, "step": 219816 }, { "epoch": 42.21, "learning_rate": 0.001, "loss": 2.4904, "step": 219828 }, { "epoch": 42.21, "learning_rate": 0.001, "loss": 2.4901, "step": 219840 }, { "epoch": 42.21, "learning_rate": 0.001, "loss": 2.4928, "step": 219852 }, { "epoch": 42.22, "learning_rate": 0.001, "loss": 2.4876, "step": 219864 }, { "epoch": 42.22, "learning_rate": 0.001, "loss": 2.4819, "step": 219876 }, { "epoch": 42.22, "learning_rate": 0.001, "loss": 2.4843, "step": 219888 }, { "epoch": 42.22, "learning_rate": 0.001, "loss": 2.4732, "step": 219900 }, { "epoch": 42.23, "learning_rate": 0.001, "loss": 2.4917, "step": 219912 }, { "epoch": 42.23, "learning_rate": 0.001, "loss": 2.4858, "step": 219924 }, { "epoch": 42.23, "learning_rate": 0.001, "loss": 2.487, "step": 219936 }, { "epoch": 42.23, "learning_rate": 0.001, "loss": 2.4817, "step": 219948 }, { "epoch": 42.24, "learning_rate": 0.001, "loss": 2.491, "step": 219960 }, { "epoch": 42.24, "learning_rate": 0.001, "loss": 2.4836, "step": 219972 }, { "epoch": 42.24, "learning_rate": 0.001, "loss": 2.4956, "step": 219984 }, { "epoch": 42.24, "learning_rate": 0.001, "loss": 2.4866, "step": 219996 }, { "epoch": 42.24, "eval_ag_news_accuracy": 0.32940625, "eval_ag_news_bleu_score": 5.135285862552853, "eval_ag_news_bleu_score_sem": 0.16595851834441983, "eval_ag_news_emb_cos_sim": 0.821050226688385, "eval_ag_news_emb_cos_sim_sem": 0.0068386936387280104, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4684741497039795, "eval_ag_news_n_ngrams_match_1": 14.552, "eval_ag_news_n_ngrams_match_2": 3.33, "eval_ag_news_n_ngrams_match_3": 0.97, "eval_ag_news_num_pred_words": 46.664, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.087743978449446, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3603146158118864, "eval_ag_news_runtime": 26.1457, "eval_ag_news_samples_per_second": 19.124, "eval_ag_news_steps_per_second": 0.038, "eval_ag_news_token_set_f1": 0.3619990764633275, "eval_ag_news_token_set_f1_sem": 0.004462129280319432, "eval_ag_news_token_set_precision": 0.3492692383491201, "eval_ag_news_token_set_recall": 0.3900119421807912, "eval_ag_news_true_num_tokens": 56.09375, "step": 220000 }, { "epoch": 42.24, "eval_anthropic_toxic_prompts_accuracy": 0.11453125, "eval_anthropic_toxic_prompts_bleu_score": 3.324966670128112, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1391054499045368, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6809598207473755, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009369215448013392, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.210698127746582, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.252, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.962, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.762, "eval_anthropic_toxic_prompts_num_pred_words": 46.896, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.79639123206612, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21624338666186077, "eval_anthropic_toxic_prompts_runtime": 14.8659, "eval_anthropic_toxic_prompts_samples_per_second": 33.634, "eval_anthropic_toxic_prompts_steps_per_second": 0.067, "eval_anthropic_toxic_prompts_token_set_f1": 0.3519435219790378, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006549468492754389, "eval_anthropic_toxic_prompts_token_set_precision": 0.43621953616353665, "eval_anthropic_toxic_prompts_token_set_recall": 0.3205663962370167, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 220000 }, { "epoch": 42.24, "eval_arxiv_accuracy": 0.3528125, "eval_arxiv_bleu_score": 4.68695975337914, "eval_arxiv_bleu_score_sem": 0.1430225954552886, "eval_arxiv_emb_cos_sim": 0.7879422903060913, "eval_arxiv_emb_cos_sim_sem": 0.00641595218708514, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.336101770401001, "eval_arxiv_n_ngrams_match_1": 15.572, "eval_arxiv_n_ngrams_match_2": 3.19, "eval_arxiv_n_ngrams_match_3": 0.794, "eval_arxiv_num_pred_words": 40.648, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.10933620375242, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37193294828325585, "eval_arxiv_runtime": 11.6836, "eval_arxiv_samples_per_second": 42.795, "eval_arxiv_steps_per_second": 0.086, "eval_arxiv_token_set_f1": 0.36365067851263927, "eval_arxiv_token_set_f1_sem": 0.004470472987750251, "eval_arxiv_token_set_precision": 0.31817153992694336, "eval_arxiv_token_set_recall": 0.4402469038092684, "eval_arxiv_true_num_tokens": 64.0, "step": 220000 }, { "epoch": 42.24, "eval_python_code_alpaca_accuracy": 0.1643125, "eval_python_code_alpaca_bleu_score": 4.671901115767498, "eval_python_code_alpaca_bleu_score_sem": 0.14884401069773442, "eval_python_code_alpaca_emb_cos_sim": 0.765325129032135, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00721018583967065, "eval_python_code_alpaca_emb_top1_equal": 0.1953125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8322031497955322, "eval_python_code_alpaca_n_ngrams_match_1": 9.838, "eval_python_code_alpaca_n_ngrams_match_2": 2.88, "eval_python_code_alpaca_n_ngrams_match_3": 0.956, "eval_python_code_alpaca_num_pred_words": 42.322, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.982835368834017, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34017329993780643, "eval_python_code_alpaca_runtime": 11.3858, "eval_python_code_alpaca_samples_per_second": 43.914, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.4764276407302975, "eval_python_code_alpaca_token_set_f1_sem": 0.005412071359097353, "eval_python_code_alpaca_token_set_precision": 0.5376840820111548, "eval_python_code_alpaca_token_set_recall": 0.44977870489549737, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 220000 }, { "epoch": 42.24, "eval_wikibio_accuracy": 0.32946875, "eval_wikibio_bleu_score": 6.1818557344923555, "eval_wikibio_bleu_score_sem": 0.2245235752092188, "eval_wikibio_emb_cos_sim": 0.7415040731430054, "eval_wikibio_emb_cos_sim_sem": 0.008858960016704697, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6164400577545166, "eval_wikibio_n_ngrams_match_1": 9.936, "eval_wikibio_n_ngrams_match_2": 3.396, "eval_wikibio_n_ngrams_match_3": 1.304, "eval_wikibio_num_pred_words": 35.448, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.204884549540516, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35385059861310764, "eval_wikibio_runtime": 21.3734, "eval_wikibio_samples_per_second": 23.394, "eval_wikibio_steps_per_second": 0.047, "eval_wikibio_token_set_f1": 0.3195351922274151, "eval_wikibio_token_set_f1_sem": 0.005707029002276072, "eval_wikibio_token_set_precision": 0.32460346752223623, "eval_wikibio_token_set_recall": 0.3310073936346742, "eval_wikibio_true_num_tokens": 61.1328125, "step": 220000 }, { "epoch": 42.24, "eval_nq_accuracy": 0.53684375, "eval_nq_bleu_score": 12.067484525172185, "eval_nq_bleu_score_sem": 0.49549202108640733, "eval_nq_emb_cos_sim": 0.8371459245681763, "eval_nq_emb_cos_sim_sem": 0.0076807949159652614, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.115579605102539, "eval_nq_n_ngrams_match_1": 23.578, "eval_nq_n_ngrams_match_2": 8.74, "eval_nq_n_ngrams_match_3": 4.02, "eval_nq_num_pred_words": 49.024, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.29439184500658, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45796722880794505, "eval_nq_runtime": 16.4204, "eval_nq_samples_per_second": 30.45, "eval_nq_steps_per_second": 0.061, "eval_nq_token_set_f1": 0.4707648551827902, "eval_nq_token_set_f1_sem": 0.005015695960147839, "eval_nq_token_set_precision": 0.4303408368698218, "eval_nq_token_set_recall": 0.5277757233312265, "eval_nq_true_num_tokens": 64.0, "step": 220000 }, { "epoch": 42.24, "learning_rate": 0.001, "loss": 2.4859, "step": 220008 }, { "epoch": 42.25, "learning_rate": 0.001, "loss": 2.4816, "step": 220020 }, { "epoch": 42.25, "learning_rate": 0.001, "loss": 2.4957, "step": 220032 }, { "epoch": 42.25, "learning_rate": 0.001, "loss": 2.4816, "step": 220044 }, { "epoch": 42.25, "learning_rate": 0.001, "loss": 2.4827, "step": 220056 }, { "epoch": 42.26, "learning_rate": 0.001, "loss": 2.4906, "step": 220068 }, { "epoch": 42.26, "learning_rate": 0.001, "loss": 2.4908, "step": 220080 }, { "epoch": 42.26, "learning_rate": 0.001, "loss": 2.4913, "step": 220092 }, { "epoch": 42.26, "learning_rate": 0.001, "loss": 2.4832, "step": 220104 }, { "epoch": 42.26, "learning_rate": 0.001, "loss": 2.4919, "step": 220116 }, { "epoch": 42.27, "learning_rate": 0.001, "loss": 2.4939, "step": 220128 }, { "epoch": 42.27, "learning_rate": 0.001, "loss": 2.4907, "step": 220140 }, { "epoch": 42.27, "learning_rate": 0.001, "loss": 2.4827, "step": 220152 }, { "epoch": 42.27, "learning_rate": 0.001, "loss": 2.4874, "step": 220164 }, { "epoch": 42.28, "learning_rate": 0.001, "loss": 2.4892, "step": 220176 }, { "epoch": 42.28, "learning_rate": 0.001, "loss": 2.4895, "step": 220188 }, { "epoch": 42.28, "learning_rate": 0.001, "loss": 2.4901, "step": 220200 }, { "epoch": 42.28, "learning_rate": 0.001, "loss": 2.4892, "step": 220212 }, { "epoch": 42.29, "learning_rate": 0.001, "loss": 2.4874, "step": 220224 }, { "epoch": 42.29, "learning_rate": 0.001, "loss": 2.4909, "step": 220236 }, { "epoch": 42.29, "learning_rate": 0.001, "loss": 2.4812, "step": 220248 }, { "epoch": 42.29, "learning_rate": 0.001, "loss": 2.4969, "step": 220260 }, { "epoch": 42.29, "learning_rate": 0.001, "loss": 2.4959, "step": 220272 }, { "epoch": 42.3, "learning_rate": 0.001, "loss": 2.4832, "step": 220284 }, { "epoch": 42.3, "learning_rate": 0.001, "loss": 2.4848, "step": 220296 }, { "epoch": 42.3, "learning_rate": 0.001, "loss": 2.4895, "step": 220308 }, { "epoch": 42.3, "learning_rate": 0.001, "loss": 2.4848, "step": 220320 }, { "epoch": 42.31, "learning_rate": 0.001, "loss": 2.4824, "step": 220332 }, { "epoch": 42.31, "learning_rate": 0.001, "loss": 2.4938, "step": 220344 }, { "epoch": 42.31, "learning_rate": 0.001, "loss": 2.4978, "step": 220356 }, { "epoch": 42.31, "learning_rate": 0.001, "loss": 2.4825, "step": 220368 }, { "epoch": 42.32, "learning_rate": 0.001, "loss": 2.4894, "step": 220380 }, { "epoch": 42.32, "learning_rate": 0.001, "loss": 2.4951, "step": 220392 }, { "epoch": 42.32, "learning_rate": 0.001, "loss": 2.4949, "step": 220404 }, { "epoch": 42.32, "learning_rate": 0.001, "loss": 2.4819, "step": 220416 }, { "epoch": 42.32, "learning_rate": 0.001, "loss": 2.4848, "step": 220428 }, { "epoch": 42.33, "learning_rate": 0.001, "loss": 2.4862, "step": 220440 }, { "epoch": 42.33, "learning_rate": 0.001, "loss": 2.4857, "step": 220452 }, { "epoch": 42.33, "learning_rate": 0.001, "loss": 2.4872, "step": 220464 }, { "epoch": 42.33, "learning_rate": 0.001, "loss": 2.4996, "step": 220476 }, { "epoch": 42.34, "learning_rate": 0.001, "loss": 2.4955, "step": 220488 }, { "epoch": 42.34, "learning_rate": 0.001, "loss": 2.4863, "step": 220500 }, { "epoch": 42.34, "learning_rate": 0.001, "loss": 2.4893, "step": 220512 }, { "epoch": 42.34, "learning_rate": 0.001, "loss": 2.4971, "step": 220524 }, { "epoch": 42.35, "learning_rate": 0.001, "loss": 2.4875, "step": 220536 }, { "epoch": 42.35, "learning_rate": 0.001, "loss": 2.4854, "step": 220548 }, { "epoch": 42.35, "learning_rate": 0.001, "loss": 2.4892, "step": 220560 }, { "epoch": 42.35, "learning_rate": 0.001, "loss": 2.4917, "step": 220572 }, { "epoch": 42.35, "learning_rate": 0.001, "loss": 2.4821, "step": 220584 }, { "epoch": 42.36, "learning_rate": 0.001, "loss": 2.4875, "step": 220596 }, { "epoch": 42.36, "learning_rate": 0.001, "loss": 2.4892, "step": 220608 }, { "epoch": 42.36, "learning_rate": 0.001, "loss": 2.4733, "step": 220620 }, { "epoch": 42.36, "eval_ag_news_accuracy": 0.3286875, "eval_ag_news_bleu_score": 4.913234779707554, "eval_ag_news_bleu_score_sem": 0.15325418224317922, "eval_ag_news_emb_cos_sim": 0.82138991355896, "eval_ag_news_emb_cos_sim_sem": 0.006465480002441504, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4734721183776855, "eval_ag_news_n_ngrams_match_1": 14.586, "eval_ag_news_n_ngrams_match_2": 3.304, "eval_ag_news_n_ngrams_match_3": 0.922, "eval_ag_news_num_pred_words": 46.986, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.24851895714151, "eval_ag_news_pred_num_tokens": 62.9921875, "eval_ag_news_rouge_score": 0.3621328017137787, "eval_ag_news_runtime": 14.4859, "eval_ag_news_samples_per_second": 34.516, "eval_ag_news_steps_per_second": 0.069, "eval_ag_news_token_set_f1": 0.3589860213247812, "eval_ag_news_token_set_f1_sem": 0.004384040337489738, "eval_ag_news_token_set_precision": 0.3479854436396126, "eval_ag_news_token_set_recall": 0.3850237597480226, "eval_ag_news_true_num_tokens": 56.09375, "step": 220625 }, { "epoch": 42.36, "eval_anthropic_toxic_prompts_accuracy": 0.11646875, "eval_anthropic_toxic_prompts_bleu_score": 3.2037401470384888, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1185331013462973, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6836092472076416, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008281643022514598, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2020986080169678, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.018, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.74, "eval_anthropic_toxic_prompts_num_pred_words": 47.23, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.584068422008286, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21838122962380246, "eval_anthropic_toxic_prompts_runtime": 11.6961, "eval_anthropic_toxic_prompts_samples_per_second": 42.749, "eval_anthropic_toxic_prompts_steps_per_second": 0.085, "eval_anthropic_toxic_prompts_token_set_f1": 0.35550090106319376, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006414531472548422, "eval_anthropic_toxic_prompts_token_set_precision": 0.4481323566292048, "eval_anthropic_toxic_prompts_token_set_recall": 0.3191696051240382, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 220625 }, { "epoch": 42.36, "eval_arxiv_accuracy": 0.35146875, "eval_arxiv_bleu_score": 4.384790524916859, "eval_arxiv_bleu_score_sem": 0.12775255899959273, "eval_arxiv_emb_cos_sim": 0.779697835445404, "eval_arxiv_emb_cos_sim_sem": 0.006949149991962936, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3369674682617188, "eval_arxiv_n_ngrams_match_1": 15.404, "eval_arxiv_n_ngrams_match_2": 3.042, "eval_arxiv_n_ngrams_match_3": 0.66, "eval_arxiv_num_pred_words": 40.726, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.133680932039407, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3685214404913864, "eval_arxiv_runtime": 11.7416, "eval_arxiv_samples_per_second": 42.583, "eval_arxiv_steps_per_second": 0.085, "eval_arxiv_token_set_f1": 0.3619226048821038, "eval_arxiv_token_set_f1_sem": 0.004209469035555156, "eval_arxiv_token_set_precision": 0.315969204757354, "eval_arxiv_token_set_recall": 0.4399705064946582, "eval_arxiv_true_num_tokens": 64.0, "step": 220625 }, { "epoch": 42.36, "eval_python_code_alpaca_accuracy": 0.16396875, "eval_python_code_alpaca_bleu_score": 5.05735299034849, "eval_python_code_alpaca_bleu_score_sem": 0.1673952538857668, "eval_python_code_alpaca_emb_cos_sim": 0.7657151222229004, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009257155538901527, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8134825229644775, "eval_python_code_alpaca_n_ngrams_match_1": 10.068, "eval_python_code_alpaca_n_ngrams_match_2": 3.156, "eval_python_code_alpaca_n_ngrams_match_3": 1.15, "eval_python_code_alpaca_num_pred_words": 43.436, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.667863479709474, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3423279830812419, "eval_python_code_alpaca_runtime": 11.2088, "eval_python_code_alpaca_samples_per_second": 44.608, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.4855619679600474, "eval_python_code_alpaca_token_set_f1_sem": 0.005572310532327872, "eval_python_code_alpaca_token_set_precision": 0.5522563722957164, "eval_python_code_alpaca_token_set_recall": 0.45931423398540877, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 220625 }, { "epoch": 42.36, "eval_wikibio_accuracy": 0.33259375, "eval_wikibio_bleu_score": 6.074715963795568, "eval_wikibio_bleu_score_sem": 0.22514948267978072, "eval_wikibio_emb_cos_sim": 0.736054003238678, "eval_wikibio_emb_cos_sim_sem": 0.010288878121767323, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.631265163421631, "eval_wikibio_n_ngrams_match_1": 10.006, "eval_wikibio_n_ngrams_match_2": 3.396, "eval_wikibio_n_ngrams_match_3": 1.24, "eval_wikibio_num_pred_words": 35.608, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.76055968831781, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3518882309754411, "eval_wikibio_runtime": 11.181, "eval_wikibio_samples_per_second": 44.719, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.3202194496593714, "eval_wikibio_token_set_f1_sem": 0.005526329548682177, "eval_wikibio_token_set_precision": 0.32614185234820303, "eval_wikibio_token_set_recall": 0.3333646391040292, "eval_wikibio_true_num_tokens": 61.1328125, "step": 220625 }, { "epoch": 42.36, "eval_nq_accuracy": 0.53759375, "eval_nq_bleu_score": 12.222270247136262, "eval_nq_bleu_score_sem": 0.48775874131268104, "eval_nq_emb_cos_sim": 0.8407552242279053, "eval_nq_emb_cos_sim_sem": 0.007149006396334548, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.115541458129883, "eval_nq_n_ngrams_match_1": 23.468, "eval_nq_n_ngrams_match_2": 8.758, "eval_nq_n_ngrams_match_3": 4.104, "eval_nq_num_pred_words": 49.096, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.294075445102555, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4557016637875644, "eval_nq_runtime": 14.2294, "eval_nq_samples_per_second": 35.139, "eval_nq_steps_per_second": 0.07, "eval_nq_token_set_f1": 0.46947814500519336, "eval_nq_token_set_f1_sem": 0.0049933643153937815, "eval_nq_token_set_precision": 0.42926104990642944, "eval_nq_token_set_recall": 0.5266418809022172, "eval_nq_true_num_tokens": 64.0, "step": 220625 }, { "epoch": 42.36, "learning_rate": 0.001, "loss": 2.482, "step": 220632 }, { "epoch": 42.37, "learning_rate": 0.001, "loss": 2.4894, "step": 220644 }, { "epoch": 42.37, "learning_rate": 0.001, "loss": 2.4907, "step": 220656 }, { "epoch": 42.37, "learning_rate": 0.001, "loss": 2.4839, "step": 220668 }, { "epoch": 42.37, "learning_rate": 0.001, "loss": 2.5, "step": 220680 }, { "epoch": 42.38, "learning_rate": 0.001, "loss": 2.4933, "step": 220692 }, { "epoch": 42.38, "learning_rate": 0.001, "loss": 2.493, "step": 220704 }, { "epoch": 42.38, "learning_rate": 0.001, "loss": 2.4875, "step": 220716 }, { "epoch": 42.38, "learning_rate": 0.001, "loss": 2.4913, "step": 220728 }, { "epoch": 42.38, "learning_rate": 0.001, "loss": 2.488, "step": 220740 }, { "epoch": 42.39, "learning_rate": 0.001, "loss": 2.4912, "step": 220752 }, { "epoch": 42.39, "learning_rate": 0.001, "loss": 2.4937, "step": 220764 }, { "epoch": 42.39, "learning_rate": 0.001, "loss": 2.4784, "step": 220776 }, { "epoch": 42.39, "learning_rate": 0.001, "loss": 2.4809, "step": 220788 }, { "epoch": 42.4, "learning_rate": 0.001, "loss": 2.4874, "step": 220800 }, { "epoch": 42.4, "learning_rate": 0.001, "loss": 2.4906, "step": 220812 }, { "epoch": 42.4, "learning_rate": 0.001, "loss": 2.4938, "step": 220824 }, { "epoch": 42.4, "learning_rate": 0.001, "loss": 2.4941, "step": 220836 }, { "epoch": 42.41, "learning_rate": 0.001, "loss": 2.4918, "step": 220848 }, { "epoch": 42.41, "learning_rate": 0.001, "loss": 2.4837, "step": 220860 }, { "epoch": 42.41, "learning_rate": 0.001, "loss": 2.4847, "step": 220872 }, { "epoch": 42.41, "learning_rate": 0.001, "loss": 2.4845, "step": 220884 }, { "epoch": 42.41, "learning_rate": 0.001, "loss": 2.4936, "step": 220896 }, { "epoch": 42.42, "learning_rate": 0.001, "loss": 2.489, "step": 220908 }, { "epoch": 42.42, "learning_rate": 0.001, "loss": 2.5041, "step": 220920 }, { "epoch": 42.42, "learning_rate": 0.001, "loss": 2.4937, "step": 220932 }, { "epoch": 42.42, "learning_rate": 0.001, "loss": 2.4901, "step": 220944 }, { "epoch": 42.43, "learning_rate": 0.001, "loss": 2.4788, "step": 220956 }, { "epoch": 42.43, "learning_rate": 0.001, "loss": 2.4869, "step": 220968 }, { "epoch": 42.43, "learning_rate": 0.001, "loss": 2.4905, "step": 220980 }, { "epoch": 42.43, "learning_rate": 0.001, "loss": 2.5006, "step": 220992 }, { "epoch": 42.44, "learning_rate": 0.001, "loss": 2.4864, "step": 221004 }, { "epoch": 42.44, "learning_rate": 0.001, "loss": 2.4858, "step": 221016 }, { "epoch": 42.44, "learning_rate": 0.001, "loss": 2.4887, "step": 221028 }, { "epoch": 42.44, "learning_rate": 0.001, "loss": 2.4902, "step": 221040 }, { "epoch": 42.44, "learning_rate": 0.001, "loss": 2.494, "step": 221052 }, { "epoch": 42.45, "learning_rate": 0.001, "loss": 2.4999, "step": 221064 }, { "epoch": 42.45, "learning_rate": 0.001, "loss": 2.4785, "step": 221076 }, { "epoch": 42.45, "learning_rate": 0.001, "loss": 2.5008, "step": 221088 }, { "epoch": 42.45, "learning_rate": 0.001, "loss": 2.4887, "step": 221100 }, { "epoch": 42.46, "learning_rate": 0.001, "loss": 2.4879, "step": 221112 }, { "epoch": 42.46, "learning_rate": 0.001, "loss": 2.4955, "step": 221124 }, { "epoch": 42.46, "learning_rate": 0.001, "loss": 2.4915, "step": 221136 }, { "epoch": 42.46, "learning_rate": 0.001, "loss": 2.4936, "step": 221148 }, { "epoch": 42.47, "learning_rate": 0.001, "loss": 2.4819, "step": 221160 }, { "epoch": 42.47, "learning_rate": 0.001, "loss": 2.499, "step": 221172 }, { "epoch": 42.47, "learning_rate": 0.001, "loss": 2.4982, "step": 221184 }, { "epoch": 42.47, "learning_rate": 0.001, "loss": 2.49, "step": 221196 }, { "epoch": 42.47, "learning_rate": 0.001, "loss": 2.4861, "step": 221208 }, { "epoch": 42.48, "learning_rate": 0.001, "loss": 2.4945, "step": 221220 }, { "epoch": 42.48, "learning_rate": 0.001, "loss": 2.4859, "step": 221232 }, { "epoch": 42.48, "learning_rate": 0.001, "loss": 2.4977, "step": 221244 }, { "epoch": 42.48, "eval_ag_news_accuracy": 0.33, "eval_ag_news_bleu_score": 4.964829589879801, "eval_ag_news_bleu_score_sem": 0.16182026323247872, "eval_ag_news_emb_cos_sim": 0.8185749053955078, "eval_ag_news_emb_cos_sim_sem": 0.007249959952319283, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.456552267074585, "eval_ag_news_n_ngrams_match_1": 14.35, "eval_ag_news_n_ngrams_match_2": 3.244, "eval_ag_news_n_ngrams_match_3": 0.908, "eval_ag_news_num_pred_words": 46.246, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.70746896202341, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35957403037743746, "eval_ag_news_runtime": 17.2026, "eval_ag_news_samples_per_second": 29.065, "eval_ag_news_steps_per_second": 0.058, "eval_ag_news_token_set_f1": 0.35961498353933935, "eval_ag_news_token_set_f1_sem": 0.004551093483797757, "eval_ag_news_token_set_precision": 0.34512260518668486, "eval_ag_news_token_set_recall": 0.38994156327149254, "eval_ag_news_true_num_tokens": 56.09375, "step": 221250 }, { "epoch": 42.48, "eval_anthropic_toxic_prompts_accuracy": 0.116125, "eval_anthropic_toxic_prompts_bleu_score": 3.048372009850658, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11413168733340211, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6789529323577881, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009790593712524737, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1827456951141357, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.214, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.86, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.686, "eval_anthropic_toxic_prompts_num_pred_words": 46.83, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.11286933115267, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21617793979208577, "eval_anthropic_toxic_prompts_runtime": 16.3683, "eval_anthropic_toxic_prompts_samples_per_second": 30.547, "eval_anthropic_toxic_prompts_steps_per_second": 0.061, "eval_anthropic_toxic_prompts_token_set_f1": 0.354242424612089, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066501806268472845, "eval_anthropic_toxic_prompts_token_set_precision": 0.438145795568399, "eval_anthropic_toxic_prompts_token_set_recall": 0.32164073704444873, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 221250 }, { "epoch": 42.48, "eval_arxiv_accuracy": 0.35203125, "eval_arxiv_bleu_score": 4.501454770698891, "eval_arxiv_bleu_score_sem": 0.1312538419078761, "eval_arxiv_emb_cos_sim": 0.7829389572143555, "eval_arxiv_emb_cos_sim_sem": 0.006906137157424045, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.328486442565918, "eval_arxiv_n_ngrams_match_1": 15.558, "eval_arxiv_n_ngrams_match_2": 3.088, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 40.954, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.896087403676802, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.371630732942738, "eval_arxiv_runtime": 12.1434, "eval_arxiv_samples_per_second": 41.175, "eval_arxiv_steps_per_second": 0.082, "eval_arxiv_token_set_f1": 0.36436814703754683, "eval_arxiv_token_set_f1_sem": 0.004064773325112701, "eval_arxiv_token_set_precision": 0.3164516263537375, "eval_arxiv_token_set_recall": 0.4460516047433709, "eval_arxiv_true_num_tokens": 64.0, "step": 221250 }, { "epoch": 42.48, "eval_python_code_alpaca_accuracy": 0.163625, "eval_python_code_alpaca_bleu_score": 4.863885875968682, "eval_python_code_alpaca_bleu_score_sem": 0.1701844172174006, "eval_python_code_alpaca_emb_cos_sim": 0.7633397579193115, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007786308234123268, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8259034156799316, "eval_python_code_alpaca_n_ngrams_match_1": 9.764, "eval_python_code_alpaca_n_ngrams_match_2": 2.944, "eval_python_code_alpaca_n_ngrams_match_3": 1.016, "eval_python_code_alpaca_num_pred_words": 42.318, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.876184310853922, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33722470945843286, "eval_python_code_alpaca_runtime": 11.545, "eval_python_code_alpaca_samples_per_second": 43.309, "eval_python_code_alpaca_steps_per_second": 0.087, "eval_python_code_alpaca_token_set_f1": 0.47371346511410956, "eval_python_code_alpaca_token_set_f1_sem": 0.0059670758444496565, "eval_python_code_alpaca_token_set_precision": 0.5317447831843691, "eval_python_code_alpaca_token_set_recall": 0.44921655113881387, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 221250 }, { "epoch": 42.48, "eval_wikibio_accuracy": 0.3293125, "eval_wikibio_bleu_score": 6.241991363100509, "eval_wikibio_bleu_score_sem": 0.2148057709867536, "eval_wikibio_emb_cos_sim": 0.7499901652336121, "eval_wikibio_emb_cos_sim_sem": 0.0094670567226534, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6126034259796143, "eval_wikibio_n_ngrams_match_1": 10.35, "eval_wikibio_n_ngrams_match_2": 3.526, "eval_wikibio_n_ngrams_match_3": 1.3, "eval_wikibio_num_pred_words": 36.06, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.06241658062203, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3647641482252846, "eval_wikibio_runtime": 16.1776, "eval_wikibio_samples_per_second": 30.907, "eval_wikibio_steps_per_second": 0.062, "eval_wikibio_token_set_f1": 0.32839988170065676, "eval_wikibio_token_set_f1_sem": 0.005525039758339359, "eval_wikibio_token_set_precision": 0.33716157196704494, "eval_wikibio_token_set_recall": 0.3387721449018883, "eval_wikibio_true_num_tokens": 61.1328125, "step": 221250 }, { "epoch": 42.48, "eval_nq_accuracy": 0.5365625, "eval_nq_bleu_score": 12.330745345704118, "eval_nq_bleu_score_sem": 0.48487521165461384, "eval_nq_emb_cos_sim": 0.8425266742706299, "eval_nq_emb_cos_sim_sem": 0.0063902017797339995, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1134583950042725, "eval_nq_n_ngrams_match_1": 23.716, "eval_nq_n_ngrams_match_2": 8.846, "eval_nq_n_ngrams_match_3": 4.17, "eval_nq_num_pred_words": 48.884, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.276816344520654, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46056870759133217, "eval_nq_runtime": 16.7213, "eval_nq_samples_per_second": 29.902, "eval_nq_steps_per_second": 0.06, "eval_nq_token_set_f1": 0.4712618744237596, "eval_nq_token_set_f1_sem": 0.0049357688121397765, "eval_nq_token_set_precision": 0.4312205787074414, "eval_nq_token_set_recall": 0.5283920948121197, "eval_nq_true_num_tokens": 64.0, "step": 221250 }, { "epoch": 42.48, "learning_rate": 0.001, "loss": 2.4895, "step": 221256 }, { "epoch": 42.49, "learning_rate": 0.001, "loss": 2.4808, "step": 221268 }, { "epoch": 42.49, "learning_rate": 0.001, "loss": 2.4876, "step": 221280 }, { "epoch": 42.49, "learning_rate": 0.001, "loss": 2.4813, "step": 221292 }, { "epoch": 42.49, "learning_rate": 0.001, "loss": 2.4916, "step": 221304 }, { "epoch": 42.5, "learning_rate": 0.001, "loss": 2.4841, "step": 221316 }, { "epoch": 42.5, "learning_rate": 0.001, "loss": 2.4829, "step": 221328 }, { "epoch": 42.5, "learning_rate": 0.001, "loss": 2.4951, "step": 221340 }, { "epoch": 42.5, "learning_rate": 0.001, "loss": 2.4926, "step": 221352 }, { "epoch": 42.5, "learning_rate": 0.001, "loss": 2.4879, "step": 221364 }, { "epoch": 42.51, "learning_rate": 0.001, "loss": 2.5077, "step": 221376 }, { "epoch": 42.51, "learning_rate": 0.001, "loss": 2.5, "step": 221388 }, { "epoch": 42.51, "learning_rate": 0.001, "loss": 2.4837, "step": 221400 }, { "epoch": 42.51, "learning_rate": 0.001, "loss": 2.4955, "step": 221412 }, { "epoch": 42.52, "learning_rate": 0.001, "loss": 2.499, "step": 221424 }, { "epoch": 42.52, "learning_rate": 0.001, "loss": 2.4981, "step": 221436 }, { "epoch": 42.52, "learning_rate": 0.001, "loss": 2.4979, "step": 221448 }, { "epoch": 42.52, "learning_rate": 0.001, "loss": 2.4957, "step": 221460 }, { "epoch": 42.53, "learning_rate": 0.001, "loss": 2.4968, "step": 221472 }, { "epoch": 42.53, "learning_rate": 0.001, "loss": 2.4996, "step": 221484 }, { "epoch": 42.53, "learning_rate": 0.001, "loss": 2.4917, "step": 221496 }, { "epoch": 42.53, "learning_rate": 0.001, "loss": 2.4899, "step": 221508 }, { "epoch": 42.53, "learning_rate": 0.001, "loss": 2.4755, "step": 221520 }, { "epoch": 42.54, "learning_rate": 0.001, "loss": 2.4937, "step": 221532 }, { "epoch": 42.54, "learning_rate": 0.001, "loss": 2.4897, "step": 221544 }, { "epoch": 42.54, "learning_rate": 0.001, "loss": 2.4946, "step": 221556 }, { "epoch": 42.54, "learning_rate": 0.001, "loss": 2.4818, "step": 221568 }, { "epoch": 42.55, "learning_rate": 0.001, "loss": 2.4883, "step": 221580 }, { "epoch": 42.55, "learning_rate": 0.001, "loss": 2.4844, "step": 221592 }, { "epoch": 42.55, "learning_rate": 0.001, "loss": 2.4941, "step": 221604 }, { "epoch": 42.55, "learning_rate": 0.001, "loss": 2.4896, "step": 221616 }, { "epoch": 42.56, "learning_rate": 0.001, "loss": 2.4895, "step": 221628 }, { "epoch": 42.56, "learning_rate": 0.001, "loss": 2.486, "step": 221640 }, { "epoch": 42.56, "learning_rate": 0.001, "loss": 2.4953, "step": 221652 }, { "epoch": 42.56, "learning_rate": 0.001, "loss": 2.4936, "step": 221664 }, { "epoch": 42.56, "learning_rate": 0.001, "loss": 2.4725, "step": 221676 }, { "epoch": 42.57, "learning_rate": 0.001, "loss": 2.4938, "step": 221688 }, { "epoch": 42.57, "learning_rate": 0.001, "loss": 2.4979, "step": 221700 }, { "epoch": 42.57, "learning_rate": 0.001, "loss": 2.4929, "step": 221712 }, { "epoch": 42.57, "learning_rate": 0.001, "loss": 2.4905, "step": 221724 }, { "epoch": 42.58, "learning_rate": 0.001, "loss": 2.497, "step": 221736 }, { "epoch": 42.58, "learning_rate": 0.001, "loss": 2.486, "step": 221748 }, { "epoch": 42.58, "learning_rate": 0.001, "loss": 2.4935, "step": 221760 }, { "epoch": 42.58, "learning_rate": 0.001, "loss": 2.4879, "step": 221772 }, { "epoch": 42.59, "learning_rate": 0.001, "loss": 2.4922, "step": 221784 }, { "epoch": 42.59, "learning_rate": 0.001, "loss": 2.4919, "step": 221796 }, { "epoch": 42.59, "learning_rate": 0.001, "loss": 2.489, "step": 221808 }, { "epoch": 42.59, "learning_rate": 0.001, "loss": 2.5003, "step": 221820 }, { "epoch": 42.59, "learning_rate": 0.001, "loss": 2.4852, "step": 221832 }, { "epoch": 42.6, "learning_rate": 0.001, "loss": 2.5075, "step": 221844 }, { "epoch": 42.6, "learning_rate": 0.001, "loss": 2.4947, "step": 221856 }, { "epoch": 42.6, "learning_rate": 0.001, "loss": 2.4823, "step": 221868 }, { "epoch": 42.6, "eval_ag_news_accuracy": 0.3305, "eval_ag_news_bleu_score": 5.048784266999561, "eval_ag_news_bleu_score_sem": 0.16230843330009173, "eval_ag_news_emb_cos_sim": 0.8227436542510986, "eval_ag_news_emb_cos_sim_sem": 0.007101127576426172, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4607505798339844, "eval_ag_news_n_ngrams_match_1": 14.472, "eval_ag_news_n_ngrams_match_2": 3.342, "eval_ag_news_n_ngrams_match_3": 0.97, "eval_ag_news_num_pred_words": 46.588, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.840866660227118, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3608835502818998, "eval_ag_news_runtime": 14.6856, "eval_ag_news_samples_per_second": 34.047, "eval_ag_news_steps_per_second": 0.068, "eval_ag_news_token_set_f1": 0.36106274917629305, "eval_ag_news_token_set_f1_sem": 0.004426777841001614, "eval_ag_news_token_set_precision": 0.34680449603887004, "eval_ag_news_token_set_recall": 0.39176875324193483, "eval_ag_news_true_num_tokens": 56.09375, "step": 221875 }, { "epoch": 42.6, "eval_anthropic_toxic_prompts_accuracy": 0.11553125, "eval_anthropic_toxic_prompts_bleu_score": 3.2235452102844517, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12004648662748921, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.684950590133667, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008897843777178536, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1883480548858643, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.332, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.76, "eval_anthropic_toxic_prompts_num_pred_words": 47.476, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.248337416428615, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21765704624815113, "eval_anthropic_toxic_prompts_runtime": 11.6409, "eval_anthropic_toxic_prompts_samples_per_second": 42.952, "eval_anthropic_toxic_prompts_steps_per_second": 0.086, "eval_anthropic_toxic_prompts_token_set_f1": 0.3647078563306401, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066032271600874725, "eval_anthropic_toxic_prompts_token_set_precision": 0.4453078299479659, "eval_anthropic_toxic_prompts_token_set_recall": 0.3354832502442887, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 221875 }, { "epoch": 42.6, "eval_arxiv_accuracy": 0.35296875, "eval_arxiv_bleu_score": 4.534394328629161, "eval_arxiv_bleu_score_sem": 0.13158237441585904, "eval_arxiv_emb_cos_sim": 0.7838080525398254, "eval_arxiv_emb_cos_sim_sem": 0.007122407783657126, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3306009769439697, "eval_arxiv_n_ngrams_match_1": 15.654, "eval_arxiv_n_ngrams_match_2": 3.18, "eval_arxiv_n_ngrams_match_3": 0.708, "eval_arxiv_num_pred_words": 40.408, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.95513704875511, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3742262138438309, "eval_arxiv_runtime": 11.9256, "eval_arxiv_samples_per_second": 41.927, "eval_arxiv_steps_per_second": 0.084, "eval_arxiv_token_set_f1": 0.3705539020906193, "eval_arxiv_token_set_f1_sem": 0.00426309678106626, "eval_arxiv_token_set_precision": 0.322085981949491, "eval_arxiv_token_set_recall": 0.455209287825526, "eval_arxiv_true_num_tokens": 64.0, "step": 221875 }, { "epoch": 42.6, "eval_python_code_alpaca_accuracy": 0.1619375, "eval_python_code_alpaca_bleu_score": 4.9155830441087, "eval_python_code_alpaca_bleu_score_sem": 0.1696741164803445, "eval_python_code_alpaca_emb_cos_sim": 0.7698432207107544, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0065826884125167765, "eval_python_code_alpaca_emb_top1_equal": 0.1875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.839216709136963, "eval_python_code_alpaca_n_ngrams_match_1": 10.05, "eval_python_code_alpaca_n_ngrams_match_2": 3.086, "eval_python_code_alpaca_n_ngrams_match_3": 1.128, "eval_python_code_alpaca_num_pred_words": 44.22, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.102364163658642, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34120464766921027, "eval_python_code_alpaca_runtime": 11.4107, "eval_python_code_alpaca_samples_per_second": 43.818, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.4888048798247338, "eval_python_code_alpaca_token_set_f1_sem": 0.005491730440376083, "eval_python_code_alpaca_token_set_precision": 0.5493986762846206, "eval_python_code_alpaca_token_set_recall": 0.4604317583363051, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 221875 }, { "epoch": 42.6, "eval_wikibio_accuracy": 0.32846875, "eval_wikibio_bleu_score": 6.021263570128182, "eval_wikibio_bleu_score_sem": 0.21727071297480205, "eval_wikibio_emb_cos_sim": 0.7418859601020813, "eval_wikibio_emb_cos_sim_sem": 0.010263068445410464, "eval_wikibio_emb_top1_equal": 0.265625, "eval_wikibio_emb_top1_equal_sem": 0.03919146934646163, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6263315677642822, "eval_wikibio_n_ngrams_match_1": 10.074, "eval_wikibio_n_ngrams_match_2": 3.432, "eval_wikibio_n_ngrams_match_3": 1.246, "eval_wikibio_num_pred_words": 35.63, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.57472315312593, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3582569460966102, "eval_wikibio_runtime": 12.5989, "eval_wikibio_samples_per_second": 39.686, "eval_wikibio_steps_per_second": 0.079, "eval_wikibio_token_set_f1": 0.31979591122633777, "eval_wikibio_token_set_f1_sem": 0.00554928472390408, "eval_wikibio_token_set_precision": 0.32734950004508384, "eval_wikibio_token_set_recall": 0.33355790427242027, "eval_wikibio_true_num_tokens": 61.1328125, "step": 221875 }, { "epoch": 42.6, "eval_nq_accuracy": 0.5363125, "eval_nq_bleu_score": 12.380004795370223, "eval_nq_bleu_score_sem": 0.4742240830847173, "eval_nq_emb_cos_sim": 0.8485004901885986, "eval_nq_emb_cos_sim_sem": 0.006622361109311179, "eval_nq_emb_top1_equal": 0.3359375, "eval_nq_emb_top1_equal_sem": 0.04191137143408563, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.114377498626709, "eval_nq_n_ngrams_match_1": 23.794, "eval_nq_n_ngrams_match_2": 8.91, "eval_nq_n_ngrams_match_3": 4.146, "eval_nq_num_pred_words": 49.326, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.284427093402813, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4616164035626542, "eval_nq_runtime": 12.6641, "eval_nq_samples_per_second": 39.482, "eval_nq_steps_per_second": 0.079, "eval_nq_token_set_f1": 0.4738818381738094, "eval_nq_token_set_f1_sem": 0.004879494647536723, "eval_nq_token_set_precision": 0.4329971561392119, "eval_nq_token_set_recall": 0.5316145906152132, "eval_nq_true_num_tokens": 64.0, "step": 221875 }, { "epoch": 42.6, "learning_rate": 0.001, "loss": 2.4933, "step": 221880 }, { "epoch": 42.61, "learning_rate": 0.001, "loss": 2.4891, "step": 221892 }, { "epoch": 42.61, "learning_rate": 0.001, "loss": 2.4878, "step": 221904 }, { "epoch": 42.61, "learning_rate": 0.001, "loss": 2.4844, "step": 221916 }, { "epoch": 42.61, "learning_rate": 0.001, "loss": 2.4849, "step": 221928 }, { "epoch": 42.62, "learning_rate": 0.001, "loss": 2.4939, "step": 221940 }, { "epoch": 42.62, "learning_rate": 0.001, "loss": 2.491, "step": 221952 }, { "epoch": 42.62, "learning_rate": 0.001, "loss": 2.4909, "step": 221964 }, { "epoch": 42.62, "learning_rate": 0.001, "loss": 2.4922, "step": 221976 }, { "epoch": 42.62, "learning_rate": 0.001, "loss": 2.486, "step": 221988 }, { "epoch": 42.63, "learning_rate": 0.001, "loss": 2.4929, "step": 222000 }, { "epoch": 42.63, "learning_rate": 0.001, "loss": 2.486, "step": 222012 }, { "epoch": 42.63, "learning_rate": 0.001, "loss": 2.4931, "step": 222024 }, { "epoch": 42.63, "learning_rate": 0.001, "loss": 2.4866, "step": 222036 }, { "epoch": 42.64, "learning_rate": 0.001, "loss": 2.4946, "step": 222048 }, { "epoch": 42.64, "learning_rate": 0.001, "loss": 2.4972, "step": 222060 }, { "epoch": 42.64, "learning_rate": 0.001, "loss": 2.4828, "step": 222072 }, { "epoch": 42.64, "learning_rate": 0.001, "loss": 2.4884, "step": 222084 }, { "epoch": 42.65, "learning_rate": 0.001, "loss": 2.4868, "step": 222096 }, { "epoch": 42.65, "learning_rate": 0.001, "loss": 2.4875, "step": 222108 }, { "epoch": 42.65, "learning_rate": 0.001, "loss": 2.4861, "step": 222120 }, { "epoch": 42.65, "learning_rate": 0.001, "loss": 2.4994, "step": 222132 }, { "epoch": 42.65, "learning_rate": 0.001, "loss": 2.4888, "step": 222144 }, { "epoch": 42.66, "learning_rate": 0.001, "loss": 2.49, "step": 222156 }, { "epoch": 42.66, "learning_rate": 0.001, "loss": 2.4876, "step": 222168 }, { "epoch": 42.66, "learning_rate": 0.001, "loss": 2.4892, "step": 222180 }, { "epoch": 42.66, "learning_rate": 0.001, "loss": 2.4983, "step": 222192 }, { "epoch": 42.67, "learning_rate": 0.001, "loss": 2.4931, "step": 222204 }, { "epoch": 42.67, "learning_rate": 0.001, "loss": 2.4908, "step": 222216 }, { "epoch": 42.67, "learning_rate": 0.001, "loss": 2.4932, "step": 222228 }, { "epoch": 42.67, "learning_rate": 0.001, "loss": 2.4958, "step": 222240 }, { "epoch": 42.68, "learning_rate": 0.001, "loss": 2.4879, "step": 222252 }, { "epoch": 42.68, "learning_rate": 0.001, "loss": 2.4874, "step": 222264 }, { "epoch": 42.68, "learning_rate": 0.001, "loss": 2.4899, "step": 222276 }, { "epoch": 42.68, "learning_rate": 0.001, "loss": 2.4774, "step": 222288 }, { "epoch": 42.68, "learning_rate": 0.001, "loss": 2.492, "step": 222300 }, { "epoch": 42.69, "learning_rate": 0.001, "loss": 2.4904, "step": 222312 }, { "epoch": 42.69, "learning_rate": 0.001, "loss": 2.4995, "step": 222324 }, { "epoch": 42.69, "learning_rate": 0.001, "loss": 2.4873, "step": 222336 }, { "epoch": 42.69, "learning_rate": 0.001, "loss": 2.5018, "step": 222348 }, { "epoch": 42.7, "learning_rate": 0.001, "loss": 2.4944, "step": 222360 }, { "epoch": 42.7, "learning_rate": 0.001, "loss": 2.4814, "step": 222372 }, { "epoch": 42.7, "learning_rate": 0.001, "loss": 2.4875, "step": 222384 }, { "epoch": 42.7, "learning_rate": 0.001, "loss": 2.4927, "step": 222396 }, { "epoch": 42.71, "learning_rate": 0.001, "loss": 2.4881, "step": 222408 }, { "epoch": 42.71, "learning_rate": 0.001, "loss": 2.4801, "step": 222420 }, { "epoch": 42.71, "learning_rate": 0.001, "loss": 2.4963, "step": 222432 }, { "epoch": 42.71, "learning_rate": 0.001, "loss": 2.4879, "step": 222444 }, { "epoch": 42.71, "learning_rate": 0.001, "loss": 2.4814, "step": 222456 }, { "epoch": 42.72, "learning_rate": 0.001, "loss": 2.4909, "step": 222468 }, { "epoch": 42.72, "learning_rate": 0.001, "loss": 2.4906, "step": 222480 }, { "epoch": 42.72, "learning_rate": 0.001, "loss": 2.4832, "step": 222492 }, { "epoch": 42.72, "eval_ag_news_accuracy": 0.3299375, "eval_ag_news_bleu_score": 4.957281724314242, "eval_ag_news_bleu_score_sem": 0.1576714803685843, "eval_ag_news_emb_cos_sim": 0.8234381079673767, "eval_ag_news_emb_cos_sim_sem": 0.0070108898566753316, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4598145484924316, "eval_ag_news_n_ngrams_match_1": 14.418, "eval_ag_news_n_ngrams_match_2": 3.27, "eval_ag_news_n_ngrams_match_3": 0.916, "eval_ag_news_num_pred_words": 46.814, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.81107655550185, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36017806505021455, "eval_ag_news_runtime": 12.0675, "eval_ag_news_samples_per_second": 41.433, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.3598791809241332, "eval_ag_news_token_set_f1_sem": 0.0044815531472226855, "eval_ag_news_token_set_precision": 0.34719809803693247, "eval_ag_news_token_set_recall": 0.38810075036977765, "eval_ag_news_true_num_tokens": 56.09375, "step": 222500 }, { "epoch": 42.72, "eval_anthropic_toxic_prompts_accuracy": 0.11553125, "eval_anthropic_toxic_prompts_bleu_score": 3.243367604952888, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12098285126094625, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6762825846672058, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00964989567839423, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1925556659698486, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.288, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.016, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768, "eval_anthropic_toxic_prompts_num_pred_words": 46.55, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.350579937250426, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21756804837210206, "eval_anthropic_toxic_prompts_runtime": 11.5352, "eval_anthropic_toxic_prompts_samples_per_second": 43.345, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.3608361362194086, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006606240899031136, "eval_anthropic_toxic_prompts_token_set_precision": 0.44403121576391713, "eval_anthropic_toxic_prompts_token_set_recall": 0.33108519344120535, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 222500 }, { "epoch": 42.72, "eval_arxiv_accuracy": 0.35440625, "eval_arxiv_bleu_score": 4.711477931346029, "eval_arxiv_bleu_score_sem": 0.13771927043094787, "eval_arxiv_emb_cos_sim": 0.7978407144546509, "eval_arxiv_emb_cos_sim_sem": 0.00543953548908875, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.32669997215271, "eval_arxiv_n_ngrams_match_1": 15.898, "eval_arxiv_n_ngrams_match_2": 3.302, "eval_arxiv_n_ngrams_match_3": 0.78, "eval_arxiv_num_pred_words": 41.862, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.846296357243762, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37944273905750814, "eval_arxiv_runtime": 11.7566, "eval_arxiv_samples_per_second": 42.529, "eval_arxiv_steps_per_second": 0.085, "eval_arxiv_token_set_f1": 0.3715815188929824, "eval_arxiv_token_set_f1_sem": 0.004152283815241085, "eval_arxiv_token_set_precision": 0.3245473206361563, "eval_arxiv_token_set_recall": 0.4483371909096113, "eval_arxiv_true_num_tokens": 64.0, "step": 222500 }, { "epoch": 42.72, "eval_python_code_alpaca_accuracy": 0.1616875, "eval_python_code_alpaca_bleu_score": 4.64954031901907, "eval_python_code_alpaca_bleu_score_sem": 0.1478891727972229, "eval_python_code_alpaca_emb_cos_sim": 0.7676364183425903, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007114602714148514, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8402645587921143, "eval_python_code_alpaca_n_ngrams_match_1": 9.83, "eval_python_code_alpaca_n_ngrams_match_2": 2.95, "eval_python_code_alpaca_n_ngrams_match_3": 0.996, "eval_python_code_alpaca_num_pred_words": 43.75, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.120294262433127, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33306373010805557, "eval_python_code_alpaca_runtime": 11.2868, "eval_python_code_alpaca_samples_per_second": 44.299, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.48378070866892925, "eval_python_code_alpaca_token_set_f1_sem": 0.005431199677241341, "eval_python_code_alpaca_token_set_precision": 0.5361468376670413, "eval_python_code_alpaca_token_set_recall": 0.4645847218685237, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 222500 }, { "epoch": 42.72, "eval_wikibio_accuracy": 0.33321875, "eval_wikibio_bleu_score": 6.246396044227107, "eval_wikibio_bleu_score_sem": 0.23267457994467938, "eval_wikibio_emb_cos_sim": 0.7441648244857788, "eval_wikibio_emb_cos_sim_sem": 0.009467850927330027, "eval_wikibio_emb_top1_equal": 0.25, "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6119182109832764, "eval_wikibio_n_ngrams_match_1": 10.27, "eval_wikibio_n_ngrams_match_2": 3.496, "eval_wikibio_n_ngrams_match_3": 1.312, "eval_wikibio_num_pred_words": 36.118, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.03702955575884, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3639763860351517, "eval_wikibio_runtime": 12.2257, "eval_wikibio_samples_per_second": 40.897, "eval_wikibio_steps_per_second": 0.082, "eval_wikibio_token_set_f1": 0.32728343043332825, "eval_wikibio_token_set_f1_sem": 0.005672756190866395, "eval_wikibio_token_set_precision": 0.33440982020200843, "eval_wikibio_token_set_recall": 0.33643955165193756, "eval_wikibio_true_num_tokens": 61.1328125, "step": 222500 }, { "epoch": 42.72, "eval_nq_accuracy": 0.5366875, "eval_nq_bleu_score": 12.435255426330572, "eval_nq_bleu_score_sem": 0.5029275654245562, "eval_nq_emb_cos_sim": 0.8402013182640076, "eval_nq_emb_cos_sim_sem": 0.006868461167577845, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1157002449035645, "eval_nq_n_ngrams_match_1": 23.73, "eval_nq_n_ngrams_match_2": 8.882, "eval_nq_n_ngrams_match_3": 4.206, "eval_nq_num_pred_words": 49.256, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.295392539148946, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45838952591589066, "eval_nq_runtime": 13.0358, "eval_nq_samples_per_second": 38.356, "eval_nq_steps_per_second": 0.077, "eval_nq_token_set_f1": 0.4714923727026808, "eval_nq_token_set_f1_sem": 0.004964865144185788, "eval_nq_token_set_precision": 0.43091017470444565, "eval_nq_token_set_recall": 0.5288284008955988, "eval_nq_true_num_tokens": 64.0, "step": 222500 }, { "epoch": 42.72, "learning_rate": 0.001, "loss": 2.4905, "step": 222504 }, { "epoch": 42.73, "learning_rate": 0.001, "loss": 2.4955, "step": 222516 }, { "epoch": 42.73, "learning_rate": 0.001, "loss": 2.4828, "step": 222528 }, { "epoch": 42.73, "learning_rate": 0.001, "loss": 2.494, "step": 222540 }, { "epoch": 42.73, "learning_rate": 0.001, "loss": 2.4946, "step": 222552 }, { "epoch": 42.74, "learning_rate": 0.001, "loss": 2.4911, "step": 222564 }, { "epoch": 42.74, "learning_rate": 0.001, "loss": 2.4903, "step": 222576 }, { "epoch": 42.74, "learning_rate": 0.001, "loss": 2.4933, "step": 222588 }, { "epoch": 42.74, "learning_rate": 0.001, "loss": 2.5013, "step": 222600 }, { "epoch": 42.74, "learning_rate": 0.001, "loss": 2.4892, "step": 222612 }, { "epoch": 42.75, "learning_rate": 0.001, "loss": 2.4926, "step": 222624 }, { "epoch": 42.75, "learning_rate": 0.001, "loss": 2.4936, "step": 222636 }, { "epoch": 42.75, "learning_rate": 0.001, "loss": 2.4972, "step": 222648 }, { "epoch": 42.75, "learning_rate": 0.001, "loss": 2.4952, "step": 222660 }, { "epoch": 42.76, "learning_rate": 0.001, "loss": 2.4941, "step": 222672 }, { "epoch": 42.76, "learning_rate": 0.001, "loss": 2.499, "step": 222684 }, { "epoch": 42.76, "learning_rate": 0.001, "loss": 2.4929, "step": 222696 }, { "epoch": 42.76, "learning_rate": 0.001, "loss": 2.4908, "step": 222708 }, { "epoch": 42.76, "learning_rate": 0.001, "loss": 2.4992, "step": 222720 }, { "epoch": 42.77, "learning_rate": 0.001, "loss": 2.4933, "step": 222732 }, { "epoch": 42.77, "learning_rate": 0.001, "loss": 2.499, "step": 222744 }, { "epoch": 42.77, "learning_rate": 0.001, "loss": 2.4932, "step": 222756 }, { "epoch": 42.77, "learning_rate": 0.001, "loss": 2.5032, "step": 222768 }, { "epoch": 42.78, "learning_rate": 0.001, "loss": 2.494, "step": 222780 }, { "epoch": 42.78, "learning_rate": 0.001, "loss": 2.5074, "step": 222792 }, { "epoch": 42.78, "learning_rate": 0.001, "loss": 2.4933, "step": 222804 }, { "epoch": 42.78, "learning_rate": 0.001, "loss": 2.4944, "step": 222816 }, { "epoch": 42.79, "learning_rate": 0.001, "loss": 2.4957, "step": 222828 }, { "epoch": 42.79, "learning_rate": 0.001, "loss": 2.4873, "step": 222840 }, { "epoch": 42.79, "learning_rate": 0.001, "loss": 2.4938, "step": 222852 }, { "epoch": 42.79, "learning_rate": 0.001, "loss": 2.5011, "step": 222864 }, { "epoch": 42.79, "learning_rate": 0.001, "loss": 2.4959, "step": 222876 }, { "epoch": 42.8, "learning_rate": 0.001, "loss": 2.5005, "step": 222888 }, { "epoch": 42.8, "learning_rate": 0.001, "loss": 2.5024, "step": 222900 }, { "epoch": 42.8, "learning_rate": 0.001, "loss": 2.4885, "step": 222912 }, { "epoch": 42.8, "learning_rate": 0.001, "loss": 2.503, "step": 222924 }, { "epoch": 42.81, "learning_rate": 0.001, "loss": 2.4825, "step": 222936 }, { "epoch": 42.81, "learning_rate": 0.001, "loss": 2.4974, "step": 222948 }, { "epoch": 42.81, "learning_rate": 0.001, "loss": 2.5021, "step": 222960 }, { "epoch": 42.81, "learning_rate": 0.001, "loss": 2.4969, "step": 222972 }, { "epoch": 42.82, "learning_rate": 0.001, "loss": 2.4894, "step": 222984 }, { "epoch": 42.82, "learning_rate": 0.001, "loss": 2.4979, "step": 222996 }, { "epoch": 42.82, "learning_rate": 0.001, "loss": 2.4892, "step": 223008 }, { "epoch": 42.82, "learning_rate": 0.001, "loss": 2.4888, "step": 223020 }, { "epoch": 42.82, "learning_rate": 0.001, "loss": 2.4944, "step": 223032 }, { "epoch": 42.83, "learning_rate": 0.001, "loss": 2.4994, "step": 223044 }, { "epoch": 42.83, "learning_rate": 0.001, "loss": 2.4959, "step": 223056 }, { "epoch": 42.83, "learning_rate": 0.001, "loss": 2.492, "step": 223068 }, { "epoch": 42.83, "learning_rate": 0.001, "loss": 2.4981, "step": 223080 }, { "epoch": 42.84, "learning_rate": 0.001, "loss": 2.5015, "step": 223092 }, { "epoch": 42.84, "learning_rate": 0.001, "loss": 2.4949, "step": 223104 }, { "epoch": 42.84, "learning_rate": 0.001, "loss": 2.4931, "step": 223116 }, { "epoch": 42.84, "eval_ag_news_accuracy": 0.329625, "eval_ag_news_bleu_score": 5.0845973097105235, "eval_ag_news_bleu_score_sem": 0.16688284002521692, "eval_ag_news_emb_cos_sim": 0.8280059099197388, "eval_ag_news_emb_cos_sim_sem": 0.006455267952756216, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4598050117492676, "eval_ag_news_n_ngrams_match_1": 14.43, "eval_ag_news_n_ngrams_match_2": 3.336, "eval_ag_news_n_ngrams_match_3": 0.954, "eval_ag_news_num_pred_words": 46.694, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.81077318288156, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36014041634686855, "eval_ag_news_runtime": 13.8194, "eval_ag_news_samples_per_second": 36.181, "eval_ag_news_steps_per_second": 0.072, "eval_ag_news_token_set_f1": 0.3611360384805379, "eval_ag_news_token_set_f1_sem": 0.004262939844210537, "eval_ag_news_token_set_precision": 0.34730017644891503, "eval_ag_news_token_set_recall": 0.3891751187326431, "eval_ag_news_true_num_tokens": 56.09375, "step": 223125 }, { "epoch": 42.84, "eval_anthropic_toxic_prompts_accuracy": 0.115125, "eval_anthropic_toxic_prompts_bleu_score": 3.10029998843007, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11299300448272312, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6762984991073608, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009655929263176368, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1868436336517334, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.34, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.934, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.708, "eval_anthropic_toxic_prompts_num_pred_words": 47.294, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.211885129399445, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21753376397019134, "eval_anthropic_toxic_prompts_runtime": 14.0482, "eval_anthropic_toxic_prompts_samples_per_second": 35.592, "eval_anthropic_toxic_prompts_steps_per_second": 0.071, "eval_anthropic_toxic_prompts_token_set_f1": 0.35845704877558726, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006505158292638676, "eval_anthropic_toxic_prompts_token_set_precision": 0.4431830318466098, "eval_anthropic_toxic_prompts_token_set_recall": 0.32807588811699495, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 223125 }, { "epoch": 42.84, "eval_arxiv_accuracy": 0.35278125, "eval_arxiv_bleu_score": 4.479037340466978, "eval_arxiv_bleu_score_sem": 0.12893957314213272, "eval_arxiv_emb_cos_sim": 0.7784118056297302, "eval_arxiv_emb_cos_sim_sem": 0.007566188415241029, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3270487785339355, "eval_arxiv_n_ngrams_match_1": 15.732, "eval_arxiv_n_ngrams_match_2": 3.084, "eval_arxiv_n_ngrams_match_3": 0.694, "eval_arxiv_num_pred_words": 41.168, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.856011017275875, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3737216125632223, "eval_arxiv_runtime": 13.3723, "eval_arxiv_samples_per_second": 37.391, "eval_arxiv_steps_per_second": 0.075, "eval_arxiv_token_set_f1": 0.3697991330743102, "eval_arxiv_token_set_f1_sem": 0.004085243099712074, "eval_arxiv_token_set_precision": 0.322918915249449, "eval_arxiv_token_set_recall": 0.44783622908033627, "eval_arxiv_true_num_tokens": 64.0, "step": 223125 }, { "epoch": 42.84, "eval_python_code_alpaca_accuracy": 0.1625625, "eval_python_code_alpaca_bleu_score": 4.564035888879271, "eval_python_code_alpaca_bleu_score_sem": 0.1499901868215338, "eval_python_code_alpaca_emb_cos_sim": 0.7590149641036987, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00816076717807437, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8386123180389404, "eval_python_code_alpaca_n_ngrams_match_1": 9.664, "eval_python_code_alpaca_n_ngrams_match_2": 2.886, "eval_python_code_alpaca_n_ngrams_match_3": 0.974, "eval_python_code_alpaca_num_pred_words": 43.176, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.092030770023108, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33195857535070894, "eval_python_code_alpaca_runtime": 11.2848, "eval_python_code_alpaca_samples_per_second": 44.308, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.4719959598813166, "eval_python_code_alpaca_token_set_f1_sem": 0.005649899719237582, "eval_python_code_alpaca_token_set_precision": 0.5290801912013479, "eval_python_code_alpaca_token_set_recall": 0.45008603053897156, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 223125 }, { "epoch": 42.84, "eval_wikibio_accuracy": 0.33184375, "eval_wikibio_bleu_score": 6.034415948747848, "eval_wikibio_bleu_score_sem": 0.21408917625230753, "eval_wikibio_emb_cos_sim": 0.7679538726806641, "eval_wikibio_emb_cos_sim_sem": 0.007276560540630229, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.604398012161255, "eval_wikibio_n_ngrams_match_1": 10.258, "eval_wikibio_n_ngrams_match_2": 3.46, "eval_wikibio_n_ngrams_match_3": 1.284, "eval_wikibio_num_pred_words": 36.104, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.7595483941818, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3629422883793961, "eval_wikibio_runtime": 13.8163, "eval_wikibio_samples_per_second": 36.189, "eval_wikibio_steps_per_second": 0.072, "eval_wikibio_token_set_f1": 0.32519809615490564, "eval_wikibio_token_set_f1_sem": 0.0053623959011498915, "eval_wikibio_token_set_precision": 0.3337896443538468, "eval_wikibio_token_set_recall": 0.33441333025866604, "eval_wikibio_true_num_tokens": 61.1328125, "step": 223125 }, { "epoch": 42.84, "eval_nq_accuracy": 0.5374375, "eval_nq_bleu_score": 12.29211278980963, "eval_nq_bleu_score_sem": 0.4991774962922198, "eval_nq_emb_cos_sim": 0.8381524682044983, "eval_nq_emb_cos_sim_sem": 0.007257072233005496, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.115370035171509, "eval_nq_n_ngrams_match_1": 23.59, "eval_nq_n_ngrams_match_2": 8.888, "eval_nq_n_ngrams_match_3": 4.13, "eval_nq_num_pred_words": 49.33, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.292653772009963, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4579031525484577, "eval_nq_runtime": 12.2839, "eval_nq_samples_per_second": 40.704, "eval_nq_steps_per_second": 0.081, "eval_nq_token_set_f1": 0.4719038445739792, "eval_nq_token_set_f1_sem": 0.004926442645843498, "eval_nq_token_set_precision": 0.43049878518161966, "eval_nq_token_set_recall": 0.5304848033198145, "eval_nq_true_num_tokens": 64.0, "step": 223125 }, { "epoch": 42.84, "learning_rate": 0.001, "loss": 2.5022, "step": 223128 }, { "epoch": 42.85, "learning_rate": 0.001, "loss": 2.5021, "step": 223140 }, { "epoch": 42.85, "learning_rate": 0.001, "loss": 2.4945, "step": 223152 }, { "epoch": 42.85, "learning_rate": 0.001, "loss": 2.4923, "step": 223164 }, { "epoch": 42.85, "learning_rate": 0.001, "loss": 2.4947, "step": 223176 }, { "epoch": 42.85, "learning_rate": 0.001, "loss": 2.5024, "step": 223188 }, { "epoch": 42.86, "learning_rate": 0.001, "loss": 2.5001, "step": 223200 }, { "epoch": 42.86, "learning_rate": 0.001, "loss": 2.4959, "step": 223212 }, { "epoch": 42.86, "learning_rate": 0.001, "loss": 2.4997, "step": 223224 }, { "epoch": 42.86, "learning_rate": 0.001, "loss": 2.5043, "step": 223236 }, { "epoch": 42.87, "learning_rate": 0.001, "loss": 2.5008, "step": 223248 }, { "epoch": 42.87, "learning_rate": 0.001, "loss": 2.4956, "step": 223260 }, { "epoch": 42.87, "learning_rate": 0.001, "loss": 2.4939, "step": 223272 }, { "epoch": 42.87, "learning_rate": 0.001, "loss": 2.4914, "step": 223284 }, { "epoch": 42.88, "learning_rate": 0.001, "loss": 2.4862, "step": 223296 }, { "epoch": 42.88, "learning_rate": 0.001, "loss": 2.495, "step": 223308 }, { "epoch": 42.88, "learning_rate": 0.001, "loss": 2.5024, "step": 223320 }, { "epoch": 42.88, "learning_rate": 0.001, "loss": 2.4967, "step": 223332 }, { "epoch": 42.88, "learning_rate": 0.001, "loss": 2.5049, "step": 223344 }, { "epoch": 42.89, "learning_rate": 0.001, "loss": 2.5024, "step": 223356 }, { "epoch": 42.89, "learning_rate": 0.001, "loss": 2.5034, "step": 223368 }, { "epoch": 42.89, "learning_rate": 0.001, "loss": 2.4931, "step": 223380 }, { "epoch": 42.89, "learning_rate": 0.001, "loss": 2.4871, "step": 223392 }, { "epoch": 42.9, "learning_rate": 0.001, "loss": 2.4927, "step": 223404 }, { "epoch": 42.9, "learning_rate": 0.001, "loss": 2.4896, "step": 223416 }, { "epoch": 42.9, "learning_rate": 0.001, "loss": 2.4987, "step": 223428 }, { "epoch": 42.9, "learning_rate": 0.001, "loss": 2.5005, "step": 223440 }, { "epoch": 42.91, "learning_rate": 0.001, "loss": 2.5011, "step": 223452 }, { "epoch": 42.91, "learning_rate": 0.001, "loss": 2.5093, "step": 223464 }, { "epoch": 42.91, "learning_rate": 0.001, "loss": 2.503, "step": 223476 }, { "epoch": 42.91, "learning_rate": 0.001, "loss": 2.4922, "step": 223488 }, { "epoch": 42.91, "learning_rate": 0.001, "loss": 2.4977, "step": 223500 }, { "epoch": 42.92, "learning_rate": 0.001, "loss": 2.4908, "step": 223512 }, { "epoch": 42.92, "learning_rate": 0.001, "loss": 2.5022, "step": 223524 }, { "epoch": 42.92, "learning_rate": 0.001, "loss": 2.494, "step": 223536 }, { "epoch": 42.92, "learning_rate": 0.001, "loss": 2.5054, "step": 223548 }, { "epoch": 42.93, "learning_rate": 0.001, "loss": 2.491, "step": 223560 }, { "epoch": 42.93, "learning_rate": 0.001, "loss": 2.4942, "step": 223572 }, { "epoch": 42.93, "learning_rate": 0.001, "loss": 2.4974, "step": 223584 }, { "epoch": 42.93, "learning_rate": 0.001, "loss": 2.4978, "step": 223596 }, { "epoch": 42.94, "learning_rate": 0.001, "loss": 2.4857, "step": 223608 }, { "epoch": 42.94, "learning_rate": 0.001, "loss": 2.4962, "step": 223620 }, { "epoch": 42.94, "learning_rate": 0.001, "loss": 2.5055, "step": 223632 }, { "epoch": 42.94, "learning_rate": 0.001, "loss": 2.4945, "step": 223644 }, { "epoch": 42.94, "learning_rate": 0.001, "loss": 2.5004, "step": 223656 }, { "epoch": 42.95, "learning_rate": 0.001, "loss": 2.5036, "step": 223668 }, { "epoch": 42.95, "learning_rate": 0.001, "loss": 2.5014, "step": 223680 }, { "epoch": 42.95, "learning_rate": 0.001, "loss": 2.501, "step": 223692 }, { "epoch": 42.95, "learning_rate": 0.001, "loss": 2.4881, "step": 223704 }, { "epoch": 42.96, "learning_rate": 0.001, "loss": 2.5019, "step": 223716 }, { "epoch": 42.96, "learning_rate": 0.001, "loss": 2.4905, "step": 223728 }, { "epoch": 42.96, "learning_rate": 0.001, "loss": 2.5061, "step": 223740 }, { "epoch": 42.96, "eval_ag_news_accuracy": 0.32746875, "eval_ag_news_bleu_score": 4.947097241774946, "eval_ag_news_bleu_score_sem": 0.1586502852056849, "eval_ag_news_emb_cos_sim": 0.8249166011810303, "eval_ag_news_emb_cos_sim_sem": 0.005872600942983266, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4830172061920166, "eval_ag_news_n_ngrams_match_1": 14.486, "eval_ag_news_n_ngrams_match_2": 3.228, "eval_ag_news_n_ngrams_match_3": 0.948, "eval_ag_news_num_pred_words": 47.162, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.557807648089735, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3581210894144573, "eval_ag_news_runtime": 12.0003, "eval_ag_news_samples_per_second": 41.666, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.3576250521731259, "eval_ag_news_token_set_f1_sem": 0.004230513746466198, "eval_ag_news_token_set_precision": 0.34621165517510566, "eval_ag_news_token_set_recall": 0.38173293351991555, "eval_ag_news_true_num_tokens": 56.09375, "step": 223750 }, { "epoch": 42.96, "eval_anthropic_toxic_prompts_accuracy": 0.1156875, "eval_anthropic_toxic_prompts_bleu_score": 3.1715246929844176, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11695843213029491, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6847228407859802, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.0092048341777423, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1856446266174316, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.496, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.016, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.76, "eval_anthropic_toxic_prompts_num_pred_words": 47.972, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.182872305581288, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2197862691566087, "eval_anthropic_toxic_prompts_runtime": 11.1205, "eval_anthropic_toxic_prompts_samples_per_second": 44.962, "eval_anthropic_toxic_prompts_steps_per_second": 0.09, "eval_anthropic_toxic_prompts_token_set_f1": 0.36024315246900496, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006542771600281045, "eval_anthropic_toxic_prompts_token_set_precision": 0.4580979423416351, "eval_anthropic_toxic_prompts_token_set_recall": 0.32226907155964357, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 223750 }, { "epoch": 42.96, "eval_arxiv_accuracy": 0.352375, "eval_arxiv_bleu_score": 4.4939667133590175, "eval_arxiv_bleu_score_sem": 0.1291008210131496, "eval_arxiv_emb_cos_sim": 0.7866687774658203, "eval_arxiv_emb_cos_sim_sem": 0.006545410842097907, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.330798864364624, "eval_arxiv_n_ngrams_match_1": 15.634, "eval_arxiv_n_ngrams_match_2": 3.092, "eval_arxiv_n_ngrams_match_3": 0.698, "eval_arxiv_num_pred_words": 41.036, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.960669566109463, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.372381418176841, "eval_arxiv_runtime": 12.9733, "eval_arxiv_samples_per_second": 38.541, "eval_arxiv_steps_per_second": 0.077, "eval_arxiv_token_set_f1": 0.3645117870270614, "eval_arxiv_token_set_f1_sem": 0.004115987123984987, "eval_arxiv_token_set_precision": 0.3182601159337704, "eval_arxiv_token_set_recall": 0.4422560015471147, "eval_arxiv_true_num_tokens": 64.0, "step": 223750 }, { "epoch": 42.96, "eval_python_code_alpaca_accuracy": 0.1630625, "eval_python_code_alpaca_bleu_score": 4.851000368179831, "eval_python_code_alpaca_bleu_score_sem": 0.15934825409434514, "eval_python_code_alpaca_emb_cos_sim": 0.7696998715400696, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007736320096155824, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.846017599105835, "eval_python_code_alpaca_n_ngrams_match_1": 10.116, "eval_python_code_alpaca_n_ngrams_match_2": 3.044, "eval_python_code_alpaca_n_ngrams_match_3": 1.066, "eval_python_code_alpaca_num_pred_words": 43.738, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.21907186884294, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.345137885265147, "eval_python_code_alpaca_runtime": 10.997, "eval_python_code_alpaca_samples_per_second": 45.467, "eval_python_code_alpaca_steps_per_second": 0.091, "eval_python_code_alpaca_token_set_f1": 0.4828662986513079, "eval_python_code_alpaca_token_set_f1_sem": 0.005473060353367226, "eval_python_code_alpaca_token_set_precision": 0.5516071864190307, "eval_python_code_alpaca_token_set_recall": 0.44712544174120716, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 223750 }, { "epoch": 42.96, "eval_wikibio_accuracy": 0.328625, "eval_wikibio_bleu_score": 6.278204611582496, "eval_wikibio_bleu_score_sem": 0.22794619497576218, "eval_wikibio_emb_cos_sim": 0.7612966895103455, "eval_wikibio_emb_cos_sim_sem": 0.00870326099722685, "eval_wikibio_emb_top1_equal": 0.265625, "eval_wikibio_emb_top1_equal_sem": 0.03919146934646163, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.65120267868042, "eval_wikibio_n_ngrams_match_1": 10.618, "eval_wikibio_n_ngrams_match_2": 3.63, "eval_wikibio_n_ngrams_match_3": 1.366, "eval_wikibio_num_pred_words": 37.756, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.52096654635519, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.37142049220597984, "eval_wikibio_runtime": 11.4259, "eval_wikibio_samples_per_second": 43.76, "eval_wikibio_steps_per_second": 0.088, "eval_wikibio_token_set_f1": 0.329697247099366, "eval_wikibio_token_set_f1_sem": 0.004978581454158171, "eval_wikibio_token_set_precision": 0.34606009155171524, "eval_wikibio_token_set_recall": 0.32908385631534526, "eval_wikibio_true_num_tokens": 61.1328125, "step": 223750 }, { "epoch": 42.96, "eval_nq_accuracy": 0.537, "eval_nq_bleu_score": 11.926327075427237, "eval_nq_bleu_score_sem": 0.48943310267388396, "eval_nq_emb_cos_sim": 0.8354673385620117, "eval_nq_emb_cos_sim_sem": 0.007611759695193432, "eval_nq_emb_top1_equal": 0.234375, "eval_nq_emb_top1_equal_sem": 0.03758909358128201, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.117015838623047, "eval_nq_n_ngrams_match_1": 23.436, "eval_nq_n_ngrams_match_2": 8.714, "eval_nq_n_ngrams_match_3": 3.988, "eval_nq_num_pred_words": 49.166, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.306313087401342, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.454532471944197, "eval_nq_runtime": 12.5535, "eval_nq_samples_per_second": 39.83, "eval_nq_steps_per_second": 0.08, "eval_nq_token_set_f1": 0.46579723138269247, "eval_nq_token_set_f1_sem": 0.0050853466978272865, "eval_nq_token_set_precision": 0.4265639900605402, "eval_nq_token_set_recall": 0.5221931161974545, "eval_nq_true_num_tokens": 64.0, "step": 223750 }, { "epoch": 42.96, "learning_rate": 0.001, "loss": 2.5118, "step": 223752 }, { "epoch": 42.97, "learning_rate": 0.001, "loss": 2.5042, "step": 223764 }, { "epoch": 42.97, "learning_rate": 0.001, "loss": 2.4995, "step": 223776 }, { "epoch": 42.97, "learning_rate": 0.001, "loss": 2.5067, "step": 223788 }, { "epoch": 42.97, "learning_rate": 0.001, "loss": 2.4888, "step": 223800 }, { "epoch": 42.97, "learning_rate": 0.001, "loss": 2.506, "step": 223812 }, { "epoch": 42.98, "learning_rate": 0.001, "loss": 2.4975, "step": 223824 }, { "epoch": 42.98, "learning_rate": 0.001, "loss": 2.4995, "step": 223836 }, { "epoch": 42.98, "learning_rate": 0.001, "loss": 2.4892, "step": 223848 }, { "epoch": 42.98, "learning_rate": 0.001, "loss": 2.502, "step": 223860 }, { "epoch": 42.99, "learning_rate": 0.001, "loss": 2.4868, "step": 223872 }, { "epoch": 42.99, "learning_rate": 0.001, "loss": 2.4865, "step": 223884 }, { "epoch": 42.99, "learning_rate": 0.001, "loss": 2.4866, "step": 223896 }, { "epoch": 42.99, "learning_rate": 0.001, "loss": 2.5021, "step": 223908 }, { "epoch": 43.0, "learning_rate": 0.001, "loss": 2.495, "step": 223920 }, { "epoch": 43.0, "learning_rate": 0.001, "loss": 2.496, "step": 223932 }, { "epoch": 43.0, "learning_rate": 0.001, "loss": 2.4994, "step": 223944 }, { "epoch": 43.0, "learning_rate": 0.001, "loss": 2.4875, "step": 223956 }, { "epoch": 43.0, "learning_rate": 0.001, "loss": 2.4875, "step": 223968 }, { "epoch": 43.01, "learning_rate": 0.001, "loss": 2.4927, "step": 223980 }, { "epoch": 43.01, "learning_rate": 0.001, "loss": 2.486, "step": 223992 }, { "epoch": 43.01, "learning_rate": 0.001, "loss": 2.4894, "step": 224004 }, { "epoch": 43.01, "learning_rate": 0.001, "loss": 2.4851, "step": 224016 }, { "epoch": 43.02, "learning_rate": 0.001, "loss": 2.488, "step": 224028 }, { "epoch": 43.02, "learning_rate": 0.001, "loss": 2.4863, "step": 224040 }, { "epoch": 43.02, "learning_rate": 0.001, "loss": 2.4851, "step": 224052 }, { "epoch": 43.02, "learning_rate": 0.001, "loss": 2.4823, "step": 224064 }, { "epoch": 43.03, "learning_rate": 0.001, "loss": 2.4769, "step": 224076 }, { "epoch": 43.03, "learning_rate": 0.001, "loss": 2.4853, "step": 224088 }, { "epoch": 43.03, "learning_rate": 0.001, "loss": 2.4896, "step": 224100 }, { "epoch": 43.03, "learning_rate": 0.001, "loss": 2.486, "step": 224112 }, { "epoch": 43.03, "learning_rate": 0.001, "loss": 2.4921, "step": 224124 }, { "epoch": 43.04, "learning_rate": 0.001, "loss": 2.4873, "step": 224136 }, { "epoch": 43.04, "learning_rate": 0.001, "loss": 2.4853, "step": 224148 }, { "epoch": 43.04, "learning_rate": 0.001, "loss": 2.4892, "step": 224160 }, { "epoch": 43.04, "learning_rate": 0.001, "loss": 2.4863, "step": 224172 }, { "epoch": 43.05, "learning_rate": 0.001, "loss": 2.4672, "step": 224184 }, { "epoch": 43.05, "learning_rate": 0.001, "loss": 2.4839, "step": 224196 }, { "epoch": 43.05, "learning_rate": 0.001, "loss": 2.4793, "step": 224208 }, { "epoch": 43.05, "learning_rate": 0.001, "loss": 2.4791, "step": 224220 }, { "epoch": 43.06, "learning_rate": 0.001, "loss": 2.4855, "step": 224232 }, { "epoch": 43.06, "learning_rate": 0.001, "loss": 2.473, "step": 224244 }, { "epoch": 43.06, "learning_rate": 0.001, "loss": 2.4867, "step": 224256 }, { "epoch": 43.06, "learning_rate": 0.001, "loss": 2.4875, "step": 224268 }, { "epoch": 43.06, "learning_rate": 0.001, "loss": 2.4837, "step": 224280 }, { "epoch": 43.07, "learning_rate": 0.001, "loss": 2.482, "step": 224292 }, { "epoch": 43.07, "learning_rate": 0.001, "loss": 2.4925, "step": 224304 }, { "epoch": 43.07, "learning_rate": 0.001, "loss": 2.4876, "step": 224316 }, { "epoch": 43.07, "learning_rate": 0.001, "loss": 2.4741, "step": 224328 }, { "epoch": 43.08, "learning_rate": 0.001, "loss": 2.487, "step": 224340 }, { "epoch": 43.08, "learning_rate": 0.001, "loss": 2.4893, "step": 224352 }, { "epoch": 43.08, "learning_rate": 0.001, "loss": 2.4827, "step": 224364 }, { "epoch": 43.08, "eval_ag_news_accuracy": 0.32909375, "eval_ag_news_bleu_score": 5.01400026409122, "eval_ag_news_bleu_score_sem": 0.15273230151708805, "eval_ag_news_emb_cos_sim": 0.8270438313484192, "eval_ag_news_emb_cos_sim_sem": 0.006000475798454334, "eval_ag_news_emb_top1_equal": 0.2890625, "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4638290405273438, "eval_ag_news_n_ngrams_match_1": 14.546, "eval_ag_news_n_ngrams_match_2": 3.248, "eval_ag_news_n_ngrams_match_3": 0.952, "eval_ag_news_num_pred_words": 46.74, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.939038548301856, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36438620367713326, "eval_ag_news_runtime": 14.4062, "eval_ag_news_samples_per_second": 34.707, "eval_ag_news_steps_per_second": 0.069, "eval_ag_news_token_set_f1": 0.36149512209529017, "eval_ag_news_token_set_f1_sem": 0.004451765491663545, "eval_ag_news_token_set_precision": 0.35044511747507473, "eval_ag_news_token_set_recall": 0.3856160775602759, "eval_ag_news_true_num_tokens": 56.09375, "step": 224375 }, { "epoch": 43.08, "eval_anthropic_toxic_prompts_accuracy": 0.11690625, "eval_anthropic_toxic_prompts_bleu_score": 3.2510326416432935, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12059571502026227, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6830965280532837, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008142744790524407, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1818060874938965, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.394, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.004, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752, "eval_anthropic_toxic_prompts_num_pred_words": 47.2, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.090223336224096, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21946284326705479, "eval_anthropic_toxic_prompts_runtime": 13.856, "eval_anthropic_toxic_prompts_samples_per_second": 36.085, "eval_anthropic_toxic_prompts_steps_per_second": 0.072, "eval_anthropic_toxic_prompts_token_set_f1": 0.36363135023222964, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006391148538094834, "eval_anthropic_toxic_prompts_token_set_precision": 0.4533600216063258, "eval_anthropic_toxic_prompts_token_set_recall": 0.328446911816258, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 224375 }, { "epoch": 43.08, "eval_arxiv_accuracy": 0.353625, "eval_arxiv_bleu_score": 4.445070120704116, "eval_arxiv_bleu_score_sem": 0.1327820442948291, "eval_arxiv_emb_cos_sim": 0.7907286882400513, "eval_arxiv_emb_cos_sim_sem": 0.0059995018111270275, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3163387775421143, "eval_arxiv_n_ngrams_match_1": 15.802, "eval_arxiv_n_ngrams_match_2": 3.086, "eval_arxiv_n_ngrams_match_3": 0.648, "eval_arxiv_num_pred_words": 40.676, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.559265023092298, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37799241386009425, "eval_arxiv_runtime": 14.3861, "eval_arxiv_samples_per_second": 34.756, "eval_arxiv_steps_per_second": 0.07, "eval_arxiv_token_set_f1": 0.3725749817647709, "eval_arxiv_token_set_f1_sem": 0.0043395176400665, "eval_arxiv_token_set_precision": 0.3257702584999924, "eval_arxiv_token_set_recall": 0.4500097548103646, "eval_arxiv_true_num_tokens": 64.0, "step": 224375 }, { "epoch": 43.08, "eval_python_code_alpaca_accuracy": 0.16240625, "eval_python_code_alpaca_bleu_score": 4.999721520727722, "eval_python_code_alpaca_bleu_score_sem": 0.1708161318990044, "eval_python_code_alpaca_emb_cos_sim": 0.7690632939338684, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006823445699190179, "eval_python_code_alpaca_emb_top1_equal": 0.1953125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.817096471786499, "eval_python_code_alpaca_n_ngrams_match_1": 9.884, "eval_python_code_alpaca_n_ngrams_match_2": 3.048, "eval_python_code_alpaca_n_ngrams_match_3": 1.132, "eval_python_code_alpaca_num_pred_words": 42.718, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.72820926290417, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3441966316221813, "eval_python_code_alpaca_runtime": 13.8038, "eval_python_code_alpaca_samples_per_second": 36.222, "eval_python_code_alpaca_steps_per_second": 0.072, "eval_python_code_alpaca_token_set_f1": 0.47988166748242034, "eval_python_code_alpaca_token_set_f1_sem": 0.005569815825814766, "eval_python_code_alpaca_token_set_precision": 0.5432677588892975, "eval_python_code_alpaca_token_set_recall": 0.45137410805920786, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 224375 }, { "epoch": 43.08, "eval_wikibio_accuracy": 0.33221875, "eval_wikibio_bleu_score": 6.163750457787334, "eval_wikibio_bleu_score_sem": 0.21996558162262686, "eval_wikibio_emb_cos_sim": 0.7439298629760742, "eval_wikibio_emb_cos_sim_sem": 0.009441347145061755, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.613781213760376, "eval_wikibio_n_ngrams_match_1": 10.222, "eval_wikibio_n_ngrams_match_2": 3.54, "eval_wikibio_n_ngrams_match_3": 1.316, "eval_wikibio_num_pred_words": 36.32, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.10609395828799, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35885054629553403, "eval_wikibio_runtime": 14.7465, "eval_wikibio_samples_per_second": 33.906, "eval_wikibio_steps_per_second": 0.068, "eval_wikibio_token_set_f1": 0.32249625868086373, "eval_wikibio_token_set_f1_sem": 0.0055730423306869295, "eval_wikibio_token_set_precision": 0.33306748991328516, "eval_wikibio_token_set_recall": 0.3294017772412334, "eval_wikibio_true_num_tokens": 61.1328125, "step": 224375 }, { "epoch": 43.08, "eval_nq_accuracy": 0.53696875, "eval_nq_bleu_score": 12.32116511533254, "eval_nq_bleu_score_sem": 0.5030054763386901, "eval_nq_emb_cos_sim": 0.8386412858963013, "eval_nq_emb_cos_sim_sem": 0.007426268780718956, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1135826110839844, "eval_nq_n_ngrams_match_1": 23.592, "eval_nq_n_ngrams_match_2": 8.828, "eval_nq_n_ngrams_match_3": 4.15, "eval_nq_num_pred_words": 48.634, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.277844522056236, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46018523243806136, "eval_nq_runtime": 14.6245, "eval_nq_samples_per_second": 34.189, "eval_nq_steps_per_second": 0.068, "eval_nq_token_set_f1": 0.4715927619269125, "eval_nq_token_set_f1_sem": 0.005178589142448564, "eval_nq_token_set_precision": 0.4307494796876743, "eval_nq_token_set_recall": 0.5300929339626383, "eval_nq_true_num_tokens": 64.0, "step": 224375 }, { "epoch": 43.08, "learning_rate": 0.001, "loss": 2.4869, "step": 224376 }, { "epoch": 43.09, "learning_rate": 0.001, "loss": 2.4797, "step": 224388 }, { "epoch": 43.09, "learning_rate": 0.001, "loss": 2.4936, "step": 224400 }, { "epoch": 43.09, "learning_rate": 0.001, "loss": 2.4816, "step": 224412 }, { "epoch": 43.09, "learning_rate": 0.001, "loss": 2.4812, "step": 224424 }, { "epoch": 43.09, "learning_rate": 0.001, "loss": 2.4953, "step": 224436 }, { "epoch": 43.1, "learning_rate": 0.001, "loss": 2.4932, "step": 224448 }, { "epoch": 43.1, "learning_rate": 0.001, "loss": 2.4783, "step": 224460 }, { "epoch": 43.1, "learning_rate": 0.001, "loss": 2.4944, "step": 224472 }, { "epoch": 43.1, "learning_rate": 0.001, "loss": 2.4851, "step": 224484 }, { "epoch": 43.11, "learning_rate": 0.001, "loss": 2.485, "step": 224496 }, { "epoch": 43.11, "learning_rate": 0.001, "loss": 2.4843, "step": 224508 }, { "epoch": 43.11, "learning_rate": 0.001, "loss": 2.4743, "step": 224520 }, { "epoch": 43.11, "learning_rate": 0.001, "loss": 2.4843, "step": 224532 }, { "epoch": 43.12, "learning_rate": 0.001, "loss": 2.4974, "step": 224544 }, { "epoch": 43.12, "learning_rate": 0.001, "loss": 2.4938, "step": 224556 }, { "epoch": 43.12, "learning_rate": 0.001, "loss": 2.4849, "step": 224568 }, { "epoch": 43.12, "learning_rate": 0.001, "loss": 2.4895, "step": 224580 }, { "epoch": 43.12, "learning_rate": 0.001, "loss": 2.492, "step": 224592 }, { "epoch": 43.13, "learning_rate": 0.001, "loss": 2.4811, "step": 224604 }, { "epoch": 43.13, "learning_rate": 0.001, "loss": 2.4854, "step": 224616 }, { "epoch": 43.13, "learning_rate": 0.001, "loss": 2.4885, "step": 224628 }, { "epoch": 43.13, "learning_rate": 0.001, "loss": 2.4839, "step": 224640 }, { "epoch": 43.14, "learning_rate": 0.001, "loss": 2.4865, "step": 224652 }, { "epoch": 43.14, "learning_rate": 0.001, "loss": 2.4858, "step": 224664 }, { "epoch": 43.14, "learning_rate": 0.001, "loss": 2.4787, "step": 224676 }, { "epoch": 43.14, "learning_rate": 0.001, "loss": 2.4872, "step": 224688 }, { "epoch": 43.15, "learning_rate": 0.001, "loss": 2.4821, "step": 224700 }, { "epoch": 43.15, "learning_rate": 0.001, "loss": 2.4899, "step": 224712 }, { "epoch": 43.15, "learning_rate": 0.001, "loss": 2.4864, "step": 224724 }, { "epoch": 43.15, "learning_rate": 0.001, "loss": 2.484, "step": 224736 }, { "epoch": 43.15, "learning_rate": 0.001, "loss": 2.4917, "step": 224748 }, { "epoch": 43.16, "learning_rate": 0.001, "loss": 2.482, "step": 224760 }, { "epoch": 43.16, "learning_rate": 0.001, "loss": 2.4844, "step": 224772 }, { "epoch": 43.16, "learning_rate": 0.001, "loss": 2.4873, "step": 224784 }, { "epoch": 43.16, "learning_rate": 0.001, "loss": 2.4899, "step": 224796 }, { "epoch": 43.17, "learning_rate": 0.001, "loss": 2.5012, "step": 224808 }, { "epoch": 43.17, "learning_rate": 0.001, "loss": 2.4927, "step": 224820 }, { "epoch": 43.17, "learning_rate": 0.001, "loss": 2.4807, "step": 224832 }, { "epoch": 43.17, "learning_rate": 0.001, "loss": 2.4821, "step": 224844 }, { "epoch": 43.18, "learning_rate": 0.001, "loss": 2.4898, "step": 224856 }, { "epoch": 43.18, "learning_rate": 0.001, "loss": 2.488, "step": 224868 }, { "epoch": 43.18, "learning_rate": 0.001, "loss": 2.4859, "step": 224880 }, { "epoch": 43.18, "learning_rate": 0.001, "loss": 2.4887, "step": 224892 }, { "epoch": 43.18, "learning_rate": 0.001, "loss": 2.4995, "step": 224904 }, { "epoch": 43.19, "learning_rate": 0.001, "loss": 2.4725, "step": 224916 }, { "epoch": 43.19, "learning_rate": 0.001, "loss": 2.4814, "step": 224928 }, { "epoch": 43.19, "learning_rate": 0.001, "loss": 2.4845, "step": 224940 }, { "epoch": 43.19, "learning_rate": 0.001, "loss": 2.4969, "step": 224952 }, { "epoch": 43.2, "learning_rate": 0.001, "loss": 2.4945, "step": 224964 }, { "epoch": 43.2, "learning_rate": 0.001, "loss": 2.4926, "step": 224976 }, { "epoch": 43.2, "learning_rate": 0.001, "loss": 2.4985, "step": 224988 }, { "epoch": 43.2, "learning_rate": 0.001, "loss": 2.4971, "step": 225000 }, { "epoch": 43.2, "eval_ag_news_accuracy": 0.32753125, "eval_ag_news_bleu_score": 5.211276532848724, "eval_ag_news_bleu_score_sem": 0.17252462750866365, "eval_ag_news_emb_cos_sim": 0.8245654702186584, "eval_ag_news_emb_cos_sim_sem": 0.006948924111196698, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.474595308303833, "eval_ag_news_n_ngrams_match_1": 14.406, "eval_ag_news_n_ngrams_match_2": 3.338, "eval_ag_news_n_ngrams_match_3": 0.998, "eval_ag_news_num_pred_words": 46.486, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.28476051803534, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3598653902184827, "eval_ag_news_runtime": 14.3423, "eval_ag_news_samples_per_second": 34.862, "eval_ag_news_steps_per_second": 0.07, "eval_ag_news_token_set_f1": 0.36276770394489916, "eval_ag_news_token_set_f1_sem": 0.004413243434977232, "eval_ag_news_token_set_precision": 0.34711509780671834, "eval_ag_news_token_set_recall": 0.39569878543589954, "eval_ag_news_true_num_tokens": 56.09375, "step": 225000 }, { "epoch": 43.2, "eval_anthropic_toxic_prompts_accuracy": 0.11671875, "eval_anthropic_toxic_prompts_bleu_score": 3.0833680660235285, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11897582499138239, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6771214604377747, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009058558930173688, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1975910663604736, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.22, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.884, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.698, "eval_anthropic_toxic_prompts_num_pred_words": 46.792, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.473504083394158, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21431399798056927, "eval_anthropic_toxic_prompts_runtime": 13.4659, "eval_anthropic_toxic_prompts_samples_per_second": 37.131, "eval_anthropic_toxic_prompts_steps_per_second": 0.074, "eval_anthropic_toxic_prompts_token_set_f1": 0.3579694121206704, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006631391204199331, "eval_anthropic_toxic_prompts_token_set_precision": 0.4361442732108506, "eval_anthropic_toxic_prompts_token_set_recall": 0.3305398430656689, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 225000 }, { "epoch": 43.2, "eval_arxiv_accuracy": 0.352375, "eval_arxiv_bleu_score": 4.40550909040396, "eval_arxiv_bleu_score_sem": 0.13550989774980318, "eval_arxiv_emb_cos_sim": 0.7769677639007568, "eval_arxiv_emb_cos_sim_sem": 0.007418074511406127, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.324664831161499, "eval_arxiv_n_ngrams_match_1": 15.142, "eval_arxiv_n_ngrams_match_2": 3.048, "eval_arxiv_n_ngrams_match_3": 0.674, "eval_arxiv_num_pred_words": 39.704, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.789682845852717, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36540441175968996, "eval_arxiv_runtime": 14.2562, "eval_arxiv_samples_per_second": 35.072, "eval_arxiv_steps_per_second": 0.07, "eval_arxiv_token_set_f1": 0.3599004113280265, "eval_arxiv_token_set_f1_sem": 0.004475997533823196, "eval_arxiv_token_set_precision": 0.30979782520523863, "eval_arxiv_token_set_recall": 0.4507911260698354, "eval_arxiv_true_num_tokens": 64.0, "step": 225000 }, { "epoch": 43.2, "eval_python_code_alpaca_accuracy": 0.1651875, "eval_python_code_alpaca_bleu_score": 4.8531095230752275, "eval_python_code_alpaca_bleu_score_sem": 0.15412118315054213, "eval_python_code_alpaca_emb_cos_sim": 0.7674447298049927, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007438374804409203, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8156230449676514, "eval_python_code_alpaca_n_ngrams_match_1": 9.856, "eval_python_code_alpaca_n_ngrams_match_2": 3.028, "eval_python_code_alpaca_n_ngrams_match_3": 1.066, "eval_python_code_alpaca_num_pred_words": 43.416, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.70357962018885, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3400893734969571, "eval_python_code_alpaca_runtime": 13.8528, "eval_python_code_alpaca_samples_per_second": 36.094, "eval_python_code_alpaca_steps_per_second": 0.072, "eval_python_code_alpaca_token_set_f1": 0.4799522378756545, "eval_python_code_alpaca_token_set_f1_sem": 0.005705710467907103, "eval_python_code_alpaca_token_set_precision": 0.5392585395325675, "eval_python_code_alpaca_token_set_recall": 0.4538301922814778, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 225000 }, { "epoch": 43.2, "eval_wikibio_accuracy": 0.32965625, "eval_wikibio_bleu_score": 5.801037856657294, "eval_wikibio_bleu_score_sem": 0.2178655353761267, "eval_wikibio_emb_cos_sim": 0.7327523827552795, "eval_wikibio_emb_cos_sim_sem": 0.010329804687304925, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.601663112640381, "eval_wikibio_n_ngrams_match_1": 9.698, "eval_wikibio_n_ngrams_match_2": 3.238, "eval_wikibio_n_ngrams_match_3": 1.196, "eval_wikibio_num_pred_words": 34.858, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.65915207239483, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34634169436391915, "eval_wikibio_runtime": 14.9041, "eval_wikibio_samples_per_second": 33.548, "eval_wikibio_steps_per_second": 0.067, "eval_wikibio_token_set_f1": 0.3080959545079406, "eval_wikibio_token_set_f1_sem": 0.005920249124020165, "eval_wikibio_token_set_precision": 0.3151915658575392, "eval_wikibio_token_set_recall": 0.32133225752904643, "eval_wikibio_true_num_tokens": 61.1328125, "step": 225000 }, { "epoch": 43.2, "eval_nq_accuracy": 0.536625, "eval_nq_bleu_score": 11.7493652192864, "eval_nq_bleu_score_sem": 0.48137986508988995, "eval_nq_emb_cos_sim": 0.8319485187530518, "eval_nq_emb_cos_sim_sem": 0.00709220627397653, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1160738468170166, "eval_nq_n_ngrams_match_1": 23.244, "eval_nq_n_ngrams_match_2": 8.56, "eval_nq_n_ngrams_match_3": 3.894, "eval_nq_num_pred_words": 48.84, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.298492292675277, "eval_nq_pred_num_tokens": 62.9609375, "eval_nq_rouge_score": 0.45197330265622393, "eval_nq_runtime": 14.8599, "eval_nq_samples_per_second": 33.648, "eval_nq_steps_per_second": 0.067, "eval_nq_token_set_f1": 0.46469942027029504, "eval_nq_token_set_f1_sem": 0.004887356329479464, "eval_nq_token_set_precision": 0.42270919429197945, "eval_nq_token_set_recall": 0.5245490890095952, "eval_nq_true_num_tokens": 64.0, "step": 225000 }, { "epoch": 43.21, "learning_rate": 0.001, "loss": 2.4849, "step": 225012 }, { "epoch": 43.21, "learning_rate": 0.001, "loss": 2.4849, "step": 225024 }, { "epoch": 43.21, "learning_rate": 0.001, "loss": 2.4939, "step": 225036 }, { "epoch": 43.21, "learning_rate": 0.001, "loss": 2.4851, "step": 225048 }, { "epoch": 43.21, "learning_rate": 0.001, "loss": 2.4804, "step": 225060 }, { "epoch": 43.22, "learning_rate": 0.001, "loss": 2.4871, "step": 225072 }, { "epoch": 43.22, "learning_rate": 0.001, "loss": 2.4916, "step": 225084 }, { "epoch": 43.22, "learning_rate": 0.001, "loss": 2.4863, "step": 225096 }, { "epoch": 43.22, "learning_rate": 0.001, "loss": 2.4949, "step": 225108 }, { "epoch": 43.23, "learning_rate": 0.001, "loss": 2.4913, "step": 225120 }, { "epoch": 43.23, "learning_rate": 0.001, "loss": 2.4847, "step": 225132 }, { "epoch": 43.23, "learning_rate": 0.001, "loss": 2.4938, "step": 225144 }, { "epoch": 43.23, "learning_rate": 0.001, "loss": 2.4783, "step": 225156 }, { "epoch": 43.24, "learning_rate": 0.001, "loss": 2.482, "step": 225168 }, { "epoch": 43.24, "learning_rate": 0.001, "loss": 2.4865, "step": 225180 }, { "epoch": 43.24, "learning_rate": 0.001, "loss": 2.482, "step": 225192 }, { "epoch": 43.24, "learning_rate": 0.001, "loss": 2.5016, "step": 225204 }, { "epoch": 43.24, "learning_rate": 0.001, "loss": 2.4863, "step": 225216 }, { "epoch": 43.25, "learning_rate": 0.001, "loss": 2.4894, "step": 225228 }, { "epoch": 43.25, "learning_rate": 0.001, "loss": 2.4785, "step": 225240 }, { "epoch": 43.25, "learning_rate": 0.001, "loss": 2.4854, "step": 225252 }, { "epoch": 43.25, "learning_rate": 0.001, "loss": 2.4821, "step": 225264 }, { "epoch": 43.26, "learning_rate": 0.001, "loss": 2.4842, "step": 225276 }, { "epoch": 43.26, "learning_rate": 0.001, "loss": 2.4786, "step": 225288 }, { "epoch": 43.26, "learning_rate": 0.001, "loss": 2.4882, "step": 225300 }, { "epoch": 43.26, "learning_rate": 0.001, "loss": 2.4883, "step": 225312 }, { "epoch": 43.26, "learning_rate": 0.001, "loss": 2.4834, "step": 225324 }, { "epoch": 43.27, "learning_rate": 0.001, "loss": 2.4815, "step": 225336 }, { "epoch": 43.27, "learning_rate": 0.001, "loss": 2.4958, "step": 225348 }, { "epoch": 43.27, "learning_rate": 0.001, "loss": 2.4777, "step": 225360 }, { "epoch": 43.27, "learning_rate": 0.001, "loss": 2.486, "step": 225372 }, { "epoch": 43.28, "learning_rate": 0.001, "loss": 2.4927, "step": 225384 }, { "epoch": 43.28, "learning_rate": 0.001, "loss": 2.4834, "step": 225396 }, { "epoch": 43.28, "learning_rate": 0.001, "loss": 2.4858, "step": 225408 }, { "epoch": 43.28, "learning_rate": 0.001, "loss": 2.4873, "step": 225420 }, { "epoch": 43.29, "learning_rate": 0.001, "loss": 2.4909, "step": 225432 }, { "epoch": 43.29, "learning_rate": 0.001, "loss": 2.4895, "step": 225444 }, { "epoch": 43.29, "learning_rate": 0.001, "loss": 2.4966, "step": 225456 }, { "epoch": 43.29, "learning_rate": 0.001, "loss": 2.4887, "step": 225468 }, { "epoch": 43.29, "learning_rate": 0.001, "loss": 2.4959, "step": 225480 }, { "epoch": 43.3, "learning_rate": 0.001, "loss": 2.4885, "step": 225492 }, { "epoch": 43.3, "learning_rate": 0.001, "loss": 2.4793, "step": 225504 }, { "epoch": 43.3, "learning_rate": 0.001, "loss": 2.4823, "step": 225516 }, { "epoch": 43.3, "learning_rate": 0.001, "loss": 2.4876, "step": 225528 }, { "epoch": 43.31, "learning_rate": 0.001, "loss": 2.4878, "step": 225540 }, { "epoch": 43.31, "learning_rate": 0.001, "loss": 2.4941, "step": 225552 }, { "epoch": 43.31, "learning_rate": 0.001, "loss": 2.4939, "step": 225564 }, { "epoch": 43.31, "learning_rate": 0.001, "loss": 2.4925, "step": 225576 }, { "epoch": 43.32, "learning_rate": 0.001, "loss": 2.4919, "step": 225588 }, { "epoch": 43.32, "learning_rate": 0.001, "loss": 2.4894, "step": 225600 }, { "epoch": 43.32, "learning_rate": 0.001, "loss": 2.4865, "step": 225612 }, { "epoch": 43.32, "learning_rate": 0.001, "loss": 2.4821, "step": 225624 }, { "epoch": 43.32, "eval_ag_news_accuracy": 0.329625, "eval_ag_news_bleu_score": 5.007209680712018, "eval_ag_news_bleu_score_sem": 0.15787546553026713, "eval_ag_news_emb_cos_sim": 0.8209670782089233, "eval_ag_news_emb_cos_sim_sem": 0.006888270844922473, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4715840816497803, "eval_ag_news_n_ngrams_match_1": 14.374, "eval_ag_news_n_ngrams_match_2": 3.198, "eval_ag_news_n_ngrams_match_3": 0.92, "eval_ag_news_num_pred_words": 46.454, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.18769001064209, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3594543254780415, "eval_ag_news_runtime": 14.1571, "eval_ag_news_samples_per_second": 35.318, "eval_ag_news_steps_per_second": 0.071, "eval_ag_news_token_set_f1": 0.35642959025874726, "eval_ag_news_token_set_f1_sem": 0.004416355497977135, "eval_ag_news_token_set_precision": 0.34342920762898865, "eval_ag_news_token_set_recall": 0.38472586235289224, "eval_ag_news_true_num_tokens": 56.09375, "step": 225625 }, { "epoch": 43.32, "eval_anthropic_toxic_prompts_accuracy": 0.11671875, "eval_anthropic_toxic_prompts_bleu_score": 3.177581048954006, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11991898957594833, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6827985644340515, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009172370567676785, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2150938510894775, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.244, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.934, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, "eval_anthropic_toxic_prompts_num_pred_words": 46.874, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.905629221920645, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21636918304036196, "eval_anthropic_toxic_prompts_runtime": 13.6736, "eval_anthropic_toxic_prompts_samples_per_second": 36.567, "eval_anthropic_toxic_prompts_steps_per_second": 0.073, "eval_anthropic_toxic_prompts_token_set_f1": 0.3588128479446289, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006505869840539364, "eval_anthropic_toxic_prompts_token_set_precision": 0.44135623305194505, "eval_anthropic_toxic_prompts_token_set_recall": 0.32780924040399356, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 225625 }, { "epoch": 43.32, "eval_arxiv_accuracy": 0.35303125, "eval_arxiv_bleu_score": 4.732146080760891, "eval_arxiv_bleu_score_sem": 0.13805026088782762, "eval_arxiv_emb_cos_sim": 0.7880035638809204, "eval_arxiv_emb_cos_sim_sem": 0.00648094855438951, "eval_arxiv_emb_top1_equal": 0.2265625, "eval_arxiv_emb_top1_equal_sem": 0.03714537682851538, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3333144187927246, "eval_arxiv_n_ngrams_match_1": 15.832, "eval_arxiv_n_ngrams_match_2": 3.256, "eval_arxiv_n_ngrams_match_3": 0.788, "eval_arxiv_num_pred_words": 40.95, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.031094694233, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37843801869766436, "eval_arxiv_runtime": 13.7925, "eval_arxiv_samples_per_second": 36.252, "eval_arxiv_steps_per_second": 0.073, "eval_arxiv_token_set_f1": 0.37021707299515594, "eval_arxiv_token_set_f1_sem": 0.004351910512045278, "eval_arxiv_token_set_precision": 0.32470253058240534, "eval_arxiv_token_set_recall": 0.44605730211835404, "eval_arxiv_true_num_tokens": 64.0, "step": 225625 }, { "epoch": 43.32, "eval_python_code_alpaca_accuracy": 0.16225, "eval_python_code_alpaca_bleu_score": 4.50693783852999, "eval_python_code_alpaca_bleu_score_sem": 0.13918322989300988, "eval_python_code_alpaca_emb_cos_sim": 0.75345778465271, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008189933455788417, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.847130060195923, "eval_python_code_alpaca_n_ngrams_match_1": 9.782, "eval_python_code_alpaca_n_ngrams_match_2": 2.868, "eval_python_code_alpaca_n_ngrams_match_3": 0.918, "eval_python_code_alpaca_num_pred_words": 42.844, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.238238075157213, "eval_python_code_alpaca_pred_num_tokens": 62.9921875, "eval_python_code_alpaca_rouge_score": 0.3344091486976238, "eval_python_code_alpaca_runtime": 15.4286, "eval_python_code_alpaca_samples_per_second": 32.407, "eval_python_code_alpaca_steps_per_second": 0.065, "eval_python_code_alpaca_token_set_f1": 0.47689932860094325, "eval_python_code_alpaca_token_set_f1_sem": 0.005593843710542998, "eval_python_code_alpaca_token_set_precision": 0.5333945379294924, "eval_python_code_alpaca_token_set_recall": 0.4560878928593189, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 225625 }, { "epoch": 43.32, "eval_wikibio_accuracy": 0.32946875, "eval_wikibio_bleu_score": 6.262314292456134, "eval_wikibio_bleu_score_sem": 0.22480394580126178, "eval_wikibio_emb_cos_sim": 0.7432674765586853, "eval_wikibio_emb_cos_sim_sem": 0.009720175548286147, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6118967533111572, "eval_wikibio_n_ngrams_match_1": 10.194, "eval_wikibio_n_ngrams_match_2": 3.538, "eval_wikibio_n_ngrams_match_3": 1.36, "eval_wikibio_num_pred_words": 36.174, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.036234835848816, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3594818662399503, "eval_wikibio_runtime": 13.6846, "eval_wikibio_samples_per_second": 36.537, "eval_wikibio_steps_per_second": 0.073, "eval_wikibio_token_set_f1": 0.3231579133748274, "eval_wikibio_token_set_f1_sem": 0.005596099174528171, "eval_wikibio_token_set_precision": 0.33064577235064085, "eval_wikibio_token_set_recall": 0.3356395695646333, "eval_wikibio_true_num_tokens": 61.1328125, "step": 225625 }, { "epoch": 43.32, "eval_nq_accuracy": 0.536625, "eval_nq_bleu_score": 12.052785832010397, "eval_nq_bleu_score_sem": 0.48510503214570616, "eval_nq_emb_cos_sim": 0.8349946737289429, "eval_nq_emb_cos_sim_sem": 0.007090471009197996, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.112039566040039, "eval_nq_n_ngrams_match_1": 23.56, "eval_nq_n_ngrams_match_2": 8.664, "eval_nq_n_ngrams_match_3": 4.04, "eval_nq_num_pred_words": 49.128, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.265081284749389, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4570524754387344, "eval_nq_runtime": 15.1352, "eval_nq_samples_per_second": 33.036, "eval_nq_steps_per_second": 0.066, "eval_nq_token_set_f1": 0.4705470357107136, "eval_nq_token_set_f1_sem": 0.004879032714876459, "eval_nq_token_set_precision": 0.4311862561023063, "eval_nq_token_set_recall": 0.5261074936810667, "eval_nq_true_num_tokens": 64.0, "step": 225625 }, { "epoch": 43.32, "learning_rate": 0.001, "loss": 2.4848, "step": 225636 }, { "epoch": 43.33, "learning_rate": 0.001, "loss": 2.4822, "step": 225648 }, { "epoch": 43.33, "learning_rate": 0.001, "loss": 2.482, "step": 225660 }, { "epoch": 43.33, "learning_rate": 0.001, "loss": 2.4874, "step": 225672 }, { "epoch": 43.33, "learning_rate": 0.001, "loss": 2.4837, "step": 225684 }, { "epoch": 43.34, "learning_rate": 0.001, "loss": 2.4887, "step": 225696 }, { "epoch": 43.34, "learning_rate": 0.001, "loss": 2.4845, "step": 225708 }, { "epoch": 43.34, "learning_rate": 0.001, "loss": 2.4826, "step": 225720 }, { "epoch": 43.34, "learning_rate": 0.001, "loss": 2.4921, "step": 225732 }, { "epoch": 43.35, "learning_rate": 0.001, "loss": 2.484, "step": 225744 }, { "epoch": 43.35, "learning_rate": 0.001, "loss": 2.4909, "step": 225756 }, { "epoch": 43.35, "learning_rate": 0.001, "loss": 2.4858, "step": 225768 }, { "epoch": 43.35, "learning_rate": 0.001, "loss": 2.4974, "step": 225780 }, { "epoch": 43.35, "learning_rate": 0.001, "loss": 2.4867, "step": 225792 }, { "epoch": 43.36, "learning_rate": 0.001, "loss": 2.4893, "step": 225804 }, { "epoch": 43.36, "learning_rate": 0.001, "loss": 2.4868, "step": 225816 }, { "epoch": 43.36, "learning_rate": 0.001, "loss": 2.4924, "step": 225828 }, { "epoch": 43.36, "learning_rate": 0.001, "loss": 2.4828, "step": 225840 }, { "epoch": 43.37, "learning_rate": 0.001, "loss": 2.4825, "step": 225852 }, { "epoch": 43.37, "learning_rate": 0.001, "loss": 2.4872, "step": 225864 }, { "epoch": 43.37, "learning_rate": 0.001, "loss": 2.4841, "step": 225876 }, { "epoch": 43.37, "learning_rate": 0.001, "loss": 2.4877, "step": 225888 }, { "epoch": 43.38, "learning_rate": 0.001, "loss": 2.4932, "step": 225900 }, { "epoch": 43.38, "learning_rate": 0.001, "loss": 2.4829, "step": 225912 }, { "epoch": 43.38, "learning_rate": 0.001, "loss": 2.4906, "step": 225924 }, { "epoch": 43.38, "learning_rate": 0.001, "loss": 2.4914, "step": 225936 }, { "epoch": 43.38, "learning_rate": 0.001, "loss": 2.4917, "step": 225948 }, { "epoch": 43.39, "learning_rate": 0.001, "loss": 2.4894, "step": 225960 }, { "epoch": 43.39, "learning_rate": 0.001, "loss": 2.4762, "step": 225972 }, { "epoch": 43.39, "learning_rate": 0.001, "loss": 2.4856, "step": 225984 }, { "epoch": 43.39, "learning_rate": 0.001, "loss": 2.4869, "step": 225996 }, { "epoch": 43.4, "learning_rate": 0.001, "loss": 2.4842, "step": 226008 }, { "epoch": 43.4, "learning_rate": 0.001, "loss": 2.495, "step": 226020 }, { "epoch": 43.4, "learning_rate": 0.001, "loss": 2.4875, "step": 226032 }, { "epoch": 43.4, "learning_rate": 0.001, "loss": 2.4864, "step": 226044 }, { "epoch": 43.41, "learning_rate": 0.001, "loss": 2.4947, "step": 226056 }, { "epoch": 43.41, "learning_rate": 0.001, "loss": 2.4903, "step": 226068 }, { "epoch": 43.41, "learning_rate": 0.001, "loss": 2.4833, "step": 226080 }, { "epoch": 43.41, "learning_rate": 0.001, "loss": 2.498, "step": 226092 }, { "epoch": 43.41, "learning_rate": 0.001, "loss": 2.4903, "step": 226104 }, { "epoch": 43.42, "learning_rate": 0.001, "loss": 2.4988, "step": 226116 }, { "epoch": 43.42, "learning_rate": 0.001, "loss": 2.4925, "step": 226128 }, { "epoch": 43.42, "learning_rate": 0.001, "loss": 2.4941, "step": 226140 }, { "epoch": 43.42, "learning_rate": 0.001, "loss": 2.4852, "step": 226152 }, { "epoch": 43.43, "learning_rate": 0.001, "loss": 2.499, "step": 226164 }, { "epoch": 43.43, "learning_rate": 0.001, "loss": 2.5042, "step": 226176 }, { "epoch": 43.43, "learning_rate": 0.001, "loss": 2.4803, "step": 226188 }, { "epoch": 43.43, "learning_rate": 0.001, "loss": 2.4825, "step": 226200 }, { "epoch": 43.44, "learning_rate": 0.001, "loss": 2.4942, "step": 226212 }, { "epoch": 43.44, "learning_rate": 0.001, "loss": 2.4888, "step": 226224 }, { "epoch": 43.44, "learning_rate": 0.001, "loss": 2.4943, "step": 226236 }, { "epoch": 43.44, "learning_rate": 0.001, "loss": 2.4892, "step": 226248 }, { "epoch": 43.44, "eval_ag_news_accuracy": 0.32978125, "eval_ag_news_bleu_score": 5.093676275547452, "eval_ag_news_bleu_score_sem": 0.16487175943824905, "eval_ag_news_emb_cos_sim": 0.8234910368919373, "eval_ag_news_emb_cos_sim_sem": 0.006803550411059513, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4731106758117676, "eval_ag_news_n_ngrams_match_1": 14.422, "eval_ag_news_n_ngrams_match_2": 3.276, "eval_ag_news_n_ngrams_match_3": 0.948, "eval_ag_news_num_pred_words": 46.11, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.23686507593382, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3623755152636988, "eval_ag_news_runtime": 14.5685, "eval_ag_news_samples_per_second": 34.321, "eval_ag_news_steps_per_second": 0.069, "eval_ag_news_token_set_f1": 0.36232721276381796, "eval_ag_news_token_set_f1_sem": 0.0043315819491174315, "eval_ag_news_token_set_precision": 0.3472226476482931, "eval_ag_news_token_set_recall": 0.39376838695199123, "eval_ag_news_true_num_tokens": 56.09375, "step": 226250 }, { "epoch": 43.44, "eval_anthropic_toxic_prompts_accuracy": 0.11546875, "eval_anthropic_toxic_prompts_bleu_score": 3.1573747343869236, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11883577326787367, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6744633913040161, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.011031932987914956, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.20275616645813, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.348, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.954, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.734, "eval_anthropic_toxic_prompts_num_pred_words": 47.214, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.600239199750323, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21727550300527126, "eval_anthropic_toxic_prompts_runtime": 18.2577, "eval_anthropic_toxic_prompts_samples_per_second": 27.386, "eval_anthropic_toxic_prompts_steps_per_second": 0.055, "eval_anthropic_toxic_prompts_token_set_f1": 0.35934558975278397, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00662503788648407, "eval_anthropic_toxic_prompts_token_set_precision": 0.44541038352047774, "eval_anthropic_toxic_prompts_token_set_recall": 0.32663969420704203, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 226250 }, { "epoch": 43.44, "eval_arxiv_accuracy": 0.3524375, "eval_arxiv_bleu_score": 4.478099066227136, "eval_arxiv_bleu_score_sem": 0.1369632925480413, "eval_arxiv_emb_cos_sim": 0.7743604183197021, "eval_arxiv_emb_cos_sim_sem": 0.008632045335589091, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3318378925323486, "eval_arxiv_n_ngrams_match_1": 15.556, "eval_arxiv_n_ngrams_match_2": 3.076, "eval_arxiv_n_ngrams_match_3": 0.714, "eval_arxiv_num_pred_words": 40.726, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.98973658748906, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36978181886539296, "eval_arxiv_runtime": 19.7036, "eval_arxiv_samples_per_second": 25.376, "eval_arxiv_steps_per_second": 0.051, "eval_arxiv_token_set_f1": 0.3635901665162223, "eval_arxiv_token_set_f1_sem": 0.004390762577594476, "eval_arxiv_token_set_precision": 0.3169895731092615, "eval_arxiv_token_set_recall": 0.444598054307409, "eval_arxiv_true_num_tokens": 64.0, "step": 226250 }, { "epoch": 43.44, "eval_python_code_alpaca_accuracy": 0.1635, "eval_python_code_alpaca_bleu_score": 4.708267183592054, "eval_python_code_alpaca_bleu_score_sem": 0.14815326850460622, "eval_python_code_alpaca_emb_cos_sim": 0.7634607553482056, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00831298183856048, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.816610813140869, "eval_python_code_alpaca_n_ngrams_match_1": 9.89, "eval_python_code_alpaca_n_ngrams_match_2": 2.938, "eval_python_code_alpaca_n_ngrams_match_3": 0.974, "eval_python_code_alpaca_num_pred_words": 42.422, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.720087035924255, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34077917287328185, "eval_python_code_alpaca_runtime": 21.2743, "eval_python_code_alpaca_samples_per_second": 23.503, "eval_python_code_alpaca_steps_per_second": 0.047, "eval_python_code_alpaca_token_set_f1": 0.4823328657908486, "eval_python_code_alpaca_token_set_f1_sem": 0.005527231693143531, "eval_python_code_alpaca_token_set_precision": 0.5427530424492416, "eval_python_code_alpaca_token_set_recall": 0.4568862758040123, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 226250 }, { "epoch": 43.44, "eval_wikibio_accuracy": 0.33146875, "eval_wikibio_bleu_score": 6.2041723659798595, "eval_wikibio_bleu_score_sem": 0.21787592440732506, "eval_wikibio_emb_cos_sim": 0.7340708374977112, "eval_wikibio_emb_cos_sim_sem": 0.009662068215079783, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6151347160339355, "eval_wikibio_n_ngrams_match_1": 9.918, "eval_wikibio_n_ngrams_match_2": 3.428, "eval_wikibio_n_ngrams_match_3": 1.326, "eval_wikibio_num_pred_words": 35.022, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.15635114475906, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35082655990224754, "eval_wikibio_runtime": 20.946, "eval_wikibio_samples_per_second": 23.871, "eval_wikibio_steps_per_second": 0.048, "eval_wikibio_token_set_f1": 0.3189537830900528, "eval_wikibio_token_set_f1_sem": 0.005816014437589874, "eval_wikibio_token_set_precision": 0.3236274637690813, "eval_wikibio_token_set_recall": 0.3344237077877823, "eval_wikibio_true_num_tokens": 61.1328125, "step": 226250 }, { "epoch": 43.44, "eval_nq_accuracy": 0.5366875, "eval_nq_bleu_score": 12.050509228876638, "eval_nq_bleu_score_sem": 0.4944644192355625, "eval_nq_emb_cos_sim": 0.834195613861084, "eval_nq_emb_cos_sim_sem": 0.007457853892410824, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1161155700683594, "eval_nq_n_ngrams_match_1": 23.55, "eval_nq_n_ngrams_match_2": 8.74, "eval_nq_n_ngrams_match_3": 4.052, "eval_nq_num_pred_words": 49.232, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.298838539978203, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45668360891146675, "eval_nq_runtime": 14.6674, "eval_nq_samples_per_second": 34.089, "eval_nq_steps_per_second": 0.068, "eval_nq_token_set_f1": 0.4686929296459197, "eval_nq_token_set_f1_sem": 0.004831937761692989, "eval_nq_token_set_precision": 0.42789368343855533, "eval_nq_token_set_recall": 0.5263362551753112, "eval_nq_true_num_tokens": 64.0, "step": 226250 }, { "epoch": 43.44, "learning_rate": 0.001, "loss": 2.4955, "step": 226260 }, { "epoch": 43.45, "learning_rate": 0.001, "loss": 2.491, "step": 226272 }, { "epoch": 43.45, "learning_rate": 0.001, "loss": 2.4913, "step": 226284 }, { "epoch": 43.45, "learning_rate": 0.001, "loss": 2.4905, "step": 226296 }, { "epoch": 43.45, "learning_rate": 0.001, "loss": 2.4845, "step": 226308 }, { "epoch": 43.46, "learning_rate": 0.001, "loss": 2.4824, "step": 226320 }, { "epoch": 43.46, "learning_rate": 0.001, "loss": 2.4927, "step": 226332 }, { "epoch": 43.46, "learning_rate": 0.001, "loss": 2.4934, "step": 226344 }, { "epoch": 43.46, "learning_rate": 0.001, "loss": 2.493, "step": 226356 }, { "epoch": 43.47, "learning_rate": 0.001, "loss": 2.4929, "step": 226368 }, { "epoch": 43.47, "learning_rate": 0.001, "loss": 2.495, "step": 226380 }, { "epoch": 43.47, "learning_rate": 0.001, "loss": 2.4924, "step": 226392 }, { "epoch": 43.47, "learning_rate": 0.001, "loss": 2.4883, "step": 226404 }, { "epoch": 43.47, "learning_rate": 0.001, "loss": 2.4942, "step": 226416 }, { "epoch": 43.48, "learning_rate": 0.001, "loss": 2.5025, "step": 226428 }, { "epoch": 43.48, "learning_rate": 0.001, "loss": 2.4824, "step": 226440 }, { "epoch": 43.48, "learning_rate": 0.001, "loss": 2.4816, "step": 226452 }, { "epoch": 43.48, "learning_rate": 0.001, "loss": 2.4933, "step": 226464 }, { "epoch": 43.49, "learning_rate": 0.001, "loss": 2.491, "step": 226476 }, { "epoch": 43.49, "learning_rate": 0.001, "loss": 2.493, "step": 226488 }, { "epoch": 43.49, "learning_rate": 0.001, "loss": 2.4891, "step": 226500 }, { "epoch": 43.49, "learning_rate": 0.001, "loss": 2.4997, "step": 226512 }, { "epoch": 43.5, "learning_rate": 0.001, "loss": 2.4935, "step": 226524 }, { "epoch": 43.5, "learning_rate": 0.001, "loss": 2.4895, "step": 226536 }, { "epoch": 43.5, "learning_rate": 0.001, "loss": 2.4991, "step": 226548 }, { "epoch": 43.5, "learning_rate": 0.001, "loss": 2.4898, "step": 226560 }, { "epoch": 43.5, "learning_rate": 0.001, "loss": 2.4879, "step": 226572 }, { "epoch": 43.51, "learning_rate": 0.001, "loss": 2.4925, "step": 226584 }, { "epoch": 43.51, "learning_rate": 0.001, "loss": 2.4963, "step": 226596 }, { "epoch": 43.51, "learning_rate": 0.001, "loss": 2.4957, "step": 226608 }, { "epoch": 43.51, "learning_rate": 0.001, "loss": 2.4988, "step": 226620 }, { "epoch": 43.52, "learning_rate": 0.001, "loss": 2.4859, "step": 226632 }, { "epoch": 43.52, "learning_rate": 0.001, "loss": 2.497, "step": 226644 }, { "epoch": 43.52, "learning_rate": 0.001, "loss": 2.491, "step": 226656 }, { "epoch": 43.52, "learning_rate": 0.001, "loss": 2.4951, "step": 226668 }, { "epoch": 43.53, "learning_rate": 0.001, "loss": 2.4941, "step": 226680 }, { "epoch": 43.53, "learning_rate": 0.001, "loss": 2.4967, "step": 226692 }, { "epoch": 43.53, "learning_rate": 0.001, "loss": 2.4932, "step": 226704 }, { "epoch": 43.53, "learning_rate": 0.001, "loss": 2.4808, "step": 226716 }, { "epoch": 43.53, "learning_rate": 0.001, "loss": 2.4908, "step": 226728 }, { "epoch": 43.54, "learning_rate": 0.001, "loss": 2.4903, "step": 226740 }, { "epoch": 43.54, "learning_rate": 0.001, "loss": 2.4883, "step": 226752 }, { "epoch": 43.54, "learning_rate": 0.001, "loss": 2.4861, "step": 226764 }, { "epoch": 43.54, "learning_rate": 0.001, "loss": 2.493, "step": 226776 }, { "epoch": 43.55, "learning_rate": 0.001, "loss": 2.5003, "step": 226788 }, { "epoch": 43.55, "learning_rate": 0.001, "loss": 2.4928, "step": 226800 }, { "epoch": 43.55, "learning_rate": 0.001, "loss": 2.4797, "step": 226812 }, { "epoch": 43.55, "learning_rate": 0.001, "loss": 2.4882, "step": 226824 }, { "epoch": 43.56, "learning_rate": 0.001, "loss": 2.4933, "step": 226836 }, { "epoch": 43.56, "learning_rate": 0.001, "loss": 2.4915, "step": 226848 }, { "epoch": 43.56, "learning_rate": 0.001, "loss": 2.4951, "step": 226860 }, { "epoch": 43.56, "learning_rate": 0.001, "loss": 2.4945, "step": 226872 }, { "epoch": 43.56, "eval_ag_news_accuracy": 0.33009375, "eval_ag_news_bleu_score": 5.18683700258882, "eval_ag_news_bleu_score_sem": 0.16658588927471646, "eval_ag_news_emb_cos_sim": 0.8264328241348267, "eval_ag_news_emb_cos_sim_sem": 0.006069782998107959, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4740750789642334, "eval_ag_news_n_ngrams_match_1": 14.654, "eval_ag_news_n_ngrams_match_2": 3.392, "eval_ag_news_n_ngrams_match_3": 0.994, "eval_ag_news_num_pred_words": 47.484, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.26796940638506, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3635652825803162, "eval_ag_news_runtime": 15.423, "eval_ag_news_samples_per_second": 32.419, "eval_ag_news_steps_per_second": 0.065, "eval_ag_news_token_set_f1": 0.36300772399473347, "eval_ag_news_token_set_f1_sem": 0.004374503068819877, "eval_ag_news_token_set_precision": 0.35095653459588133, "eval_ag_news_token_set_recall": 0.3905892320102781, "eval_ag_news_true_num_tokens": 56.09375, "step": 226875 }, { "epoch": 43.56, "eval_anthropic_toxic_prompts_accuracy": 0.11665625, "eval_anthropic_toxic_prompts_bleu_score": 3.2283255266058406, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12286237103688849, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6831443309783936, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008379857033346125, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.191474199295044, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.48, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.028, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768, "eval_anthropic_toxic_prompts_num_pred_words": 47.892, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.324259831260143, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22156940193532354, "eval_anthropic_toxic_prompts_runtime": 20.7528, "eval_anthropic_toxic_prompts_samples_per_second": 24.093, "eval_anthropic_toxic_prompts_steps_per_second": 0.048, "eval_anthropic_toxic_prompts_token_set_f1": 0.3656214365592688, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066540252995934655, "eval_anthropic_toxic_prompts_token_set_precision": 0.4576689392150905, "eval_anthropic_toxic_prompts_token_set_recall": 0.32922829780264373, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 226875 }, { "epoch": 43.56, "eval_arxiv_accuracy": 0.35121875, "eval_arxiv_bleu_score": 4.775489376411273, "eval_arxiv_bleu_score_sem": 0.1408563374964588, "eval_arxiv_emb_cos_sim": 0.7955030202865601, "eval_arxiv_emb_cos_sim_sem": 0.005562560822803033, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3303866386413574, "eval_arxiv_n_ngrams_match_1": 16.012, "eval_arxiv_n_ngrams_match_2": 3.28, "eval_arxiv_n_ngrams_match_3": 0.792, "eval_arxiv_num_pred_words": 41.938, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.949145834227096, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37894337090325375, "eval_arxiv_runtime": 14.2924, "eval_arxiv_samples_per_second": 34.984, "eval_arxiv_steps_per_second": 0.07, "eval_arxiv_token_set_f1": 0.3711074617169576, "eval_arxiv_token_set_f1_sem": 0.00393800402090114, "eval_arxiv_token_set_precision": 0.32621156166361315, "eval_arxiv_token_set_recall": 0.44156517817519597, "eval_arxiv_true_num_tokens": 64.0, "step": 226875 }, { "epoch": 43.56, "eval_python_code_alpaca_accuracy": 0.16315625, "eval_python_code_alpaca_bleu_score": 4.59472098123169, "eval_python_code_alpaca_bleu_score_sem": 0.13986066710656597, "eval_python_code_alpaca_emb_cos_sim": 0.7682456374168396, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006673759191070215, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.842379093170166, "eval_python_code_alpaca_n_ngrams_match_1": 9.988, "eval_python_code_alpaca_n_ngrams_match_2": 2.944, "eval_python_code_alpaca_n_ngrams_match_3": 0.97, "eval_python_code_alpaca_num_pred_words": 43.942, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.156534014811434, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33862023276952685, "eval_python_code_alpaca_runtime": 14.7645, "eval_python_code_alpaca_samples_per_second": 33.865, "eval_python_code_alpaca_steps_per_second": 0.068, "eval_python_code_alpaca_token_set_f1": 0.48263569981446547, "eval_python_code_alpaca_token_set_f1_sem": 0.0054130602868705255, "eval_python_code_alpaca_token_set_precision": 0.5457914115905191, "eval_python_code_alpaca_token_set_recall": 0.45169799507626196, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 226875 }, { "epoch": 43.56, "eval_wikibio_accuracy": 0.32953125, "eval_wikibio_bleu_score": 6.245418193755065, "eval_wikibio_bleu_score_sem": 0.21070933554292692, "eval_wikibio_emb_cos_sim": 0.7470102310180664, "eval_wikibio_emb_cos_sim_sem": 0.008467278158236323, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.647217273712158, "eval_wikibio_n_ngrams_match_1": 10.476, "eval_wikibio_n_ngrams_match_2": 3.57, "eval_wikibio_n_ngrams_match_3": 1.324, "eval_wikibio_num_pred_words": 36.954, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.36775041187095, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36972855906308055, "eval_wikibio_runtime": 13.4114, "eval_wikibio_samples_per_second": 37.282, "eval_wikibio_steps_per_second": 0.075, "eval_wikibio_token_set_f1": 0.3321133123839618, "eval_wikibio_token_set_f1_sem": 0.005077694500044495, "eval_wikibio_token_set_precision": 0.34170982891271984, "eval_wikibio_token_set_recall": 0.3383033697172483, "eval_wikibio_true_num_tokens": 61.1328125, "step": 226875 }, { "epoch": 43.56, "eval_nq_accuracy": 0.536375, "eval_nq_bleu_score": 12.183205604592198, "eval_nq_bleu_score_sem": 0.4936275432903302, "eval_nq_emb_cos_sim": 0.838262677192688, "eval_nq_emb_cos_sim_sem": 0.006786116103755178, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.113842248916626, "eval_nq_n_ngrams_match_1": 23.624, "eval_nq_n_ngrams_match_2": 8.814, "eval_nq_n_ngrams_match_3": 4.112, "eval_nq_num_pred_words": 48.95, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.279994042703253, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45859884429649944, "eval_nq_runtime": 15.0148, "eval_nq_samples_per_second": 33.3, "eval_nq_steps_per_second": 0.067, "eval_nq_token_set_f1": 0.47102018114009236, "eval_nq_token_set_f1_sem": 0.00488656495573706, "eval_nq_token_set_precision": 0.42948122136816175, "eval_nq_token_set_recall": 0.5287281667290759, "eval_nq_true_num_tokens": 64.0, "step": 226875 }, { "epoch": 43.56, "learning_rate": 0.001, "loss": 2.4964, "step": 226884 }, { "epoch": 43.57, "learning_rate": 0.001, "loss": 2.4964, "step": 226896 }, { "epoch": 43.57, "learning_rate": 0.001, "loss": 2.4819, "step": 226908 }, { "epoch": 43.57, "learning_rate": 0.001, "loss": 2.4918, "step": 226920 }, { "epoch": 43.57, "learning_rate": 0.001, "loss": 2.4955, "step": 226932 }, { "epoch": 43.58, "learning_rate": 0.001, "loss": 2.4933, "step": 226944 }, { "epoch": 43.58, "learning_rate": 0.001, "loss": 2.4844, "step": 226956 }, { "epoch": 43.58, "learning_rate": 0.001, "loss": 2.4911, "step": 226968 }, { "epoch": 43.58, "learning_rate": 0.001, "loss": 2.4893, "step": 226980 }, { "epoch": 43.59, "learning_rate": 0.001, "loss": 2.4926, "step": 226992 }, { "epoch": 43.59, "learning_rate": 0.001, "loss": 2.4849, "step": 227004 }, { "epoch": 43.59, "learning_rate": 0.001, "loss": 2.4972, "step": 227016 }, { "epoch": 43.59, "learning_rate": 0.001, "loss": 2.4904, "step": 227028 }, { "epoch": 43.59, "learning_rate": 0.001, "loss": 2.4847, "step": 227040 }, { "epoch": 43.6, "learning_rate": 0.001, "loss": 2.4967, "step": 227052 }, { "epoch": 43.6, "learning_rate": 0.001, "loss": 2.4927, "step": 227064 }, { "epoch": 43.6, "learning_rate": 0.001, "loss": 2.4886, "step": 227076 }, { "epoch": 43.6, "learning_rate": 0.001, "loss": 2.4924, "step": 227088 }, { "epoch": 43.61, "learning_rate": 0.001, "loss": 2.4918, "step": 227100 }, { "epoch": 43.61, "learning_rate": 0.001, "loss": 2.4936, "step": 227112 }, { "epoch": 43.61, "learning_rate": 0.001, "loss": 2.4959, "step": 227124 }, { "epoch": 43.61, "learning_rate": 0.001, "loss": 2.4958, "step": 227136 }, { "epoch": 43.62, "learning_rate": 0.001, "loss": 2.4919, "step": 227148 }, { "epoch": 43.62, "learning_rate": 0.001, "loss": 2.4952, "step": 227160 }, { "epoch": 43.62, "learning_rate": 0.001, "loss": 2.4994, "step": 227172 }, { "epoch": 43.62, "learning_rate": 0.001, "loss": 2.4994, "step": 227184 }, { "epoch": 43.62, "learning_rate": 0.001, "loss": 2.4902, "step": 227196 }, { "epoch": 43.63, "learning_rate": 0.001, "loss": 2.4859, "step": 227208 }, { "epoch": 43.63, "learning_rate": 0.001, "loss": 2.4906, "step": 227220 }, { "epoch": 43.63, "learning_rate": 0.001, "loss": 2.493, "step": 227232 }, { "epoch": 43.63, "learning_rate": 0.001, "loss": 2.4837, "step": 227244 }, { "epoch": 43.64, "learning_rate": 0.001, "loss": 2.4945, "step": 227256 }, { "epoch": 43.64, "learning_rate": 0.001, "loss": 2.4883, "step": 227268 }, { "epoch": 43.64, "learning_rate": 0.001, "loss": 2.4877, "step": 227280 }, { "epoch": 43.64, "learning_rate": 0.001, "loss": 2.5003, "step": 227292 }, { "epoch": 43.65, "learning_rate": 0.001, "loss": 2.4878, "step": 227304 }, { "epoch": 43.65, "learning_rate": 0.001, "loss": 2.4961, "step": 227316 }, { "epoch": 43.65, "learning_rate": 0.001, "loss": 2.4949, "step": 227328 }, { "epoch": 43.65, "learning_rate": 0.001, "loss": 2.4941, "step": 227340 }, { "epoch": 43.65, "learning_rate": 0.001, "loss": 2.4849, "step": 227352 }, { "epoch": 43.66, "learning_rate": 0.001, "loss": 2.4806, "step": 227364 }, { "epoch": 43.66, "learning_rate": 0.001, "loss": 2.4924, "step": 227376 }, { "epoch": 43.66, "learning_rate": 0.001, "loss": 2.496, "step": 227388 }, { "epoch": 43.66, "learning_rate": 0.001, "loss": 2.4899, "step": 227400 }, { "epoch": 43.67, "learning_rate": 0.001, "loss": 2.4922, "step": 227412 }, { "epoch": 43.67, "learning_rate": 0.001, "loss": 2.5019, "step": 227424 }, { "epoch": 43.67, "learning_rate": 0.001, "loss": 2.4828, "step": 227436 }, { "epoch": 43.67, "learning_rate": 0.001, "loss": 2.488, "step": 227448 }, { "epoch": 43.68, "learning_rate": 0.001, "loss": 2.4967, "step": 227460 }, { "epoch": 43.68, "learning_rate": 0.001, "loss": 2.4885, "step": 227472 }, { "epoch": 43.68, "learning_rate": 0.001, "loss": 2.4862, "step": 227484 }, { "epoch": 43.68, "learning_rate": 0.001, "loss": 2.4945, "step": 227496 }, { "epoch": 43.68, "eval_ag_news_accuracy": 0.328875, "eval_ag_news_bleu_score": 5.286246946761519, "eval_ag_news_bleu_score_sem": 0.17030936643136907, "eval_ag_news_emb_cos_sim": 0.8202008008956909, "eval_ag_news_emb_cos_sim_sem": 0.0076707830638104, "eval_ag_news_emb_top1_equal": 0.3046875, "eval_ag_news_emb_top1_equal_sem": 0.04084279867618665, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4668161869049072, "eval_ag_news_n_ngrams_match_1": 14.472, "eval_ag_news_n_ngrams_match_2": 3.366, "eval_ag_news_n_ngrams_match_3": 0.982, "eval_ag_news_num_pred_words": 46.132, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.03458777031133, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3630351606190766, "eval_ag_news_runtime": 14.7917, "eval_ag_news_samples_per_second": 33.803, "eval_ag_news_steps_per_second": 0.068, "eval_ag_news_token_set_f1": 0.3604192821392029, "eval_ag_news_token_set_f1_sem": 0.004494872211112179, "eval_ag_news_token_set_precision": 0.3464709988923379, "eval_ag_news_token_set_recall": 0.39039131299477525, "eval_ag_news_true_num_tokens": 56.09375, "step": 227500 }, { "epoch": 43.68, "eval_anthropic_toxic_prompts_accuracy": 0.11578125, "eval_anthropic_toxic_prompts_bleu_score": 3.3022331245236614, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12581546669826427, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6830217242240906, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008841839176586163, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.210660219192505, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.414, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.048, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.786, "eval_anthropic_toxic_prompts_num_pred_words": 47.246, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.79545125454489, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22190634144100985, "eval_anthropic_toxic_prompts_runtime": 14.2403, "eval_anthropic_toxic_prompts_samples_per_second": 35.112, "eval_anthropic_toxic_prompts_steps_per_second": 0.07, "eval_anthropic_toxic_prompts_token_set_f1": 0.3617547289111145, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00640227351897671, "eval_anthropic_toxic_prompts_token_set_precision": 0.44939880893583517, "eval_anthropic_toxic_prompts_token_set_recall": 0.3286458522756195, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 227500 }, { "epoch": 43.68, "eval_arxiv_accuracy": 0.35290625, "eval_arxiv_bleu_score": 4.65388300473452, "eval_arxiv_bleu_score_sem": 0.14106568293216215, "eval_arxiv_emb_cos_sim": 0.7914077639579773, "eval_arxiv_emb_cos_sim_sem": 0.007106011342497372, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3320858478546143, "eval_arxiv_n_ngrams_match_1": 15.488, "eval_arxiv_n_ngrams_match_2": 3.122, "eval_arxiv_n_ngrams_match_3": 0.778, "eval_arxiv_num_pred_words": 40.636, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.99667765214614, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37164831168300744, "eval_arxiv_runtime": 13.837, "eval_arxiv_samples_per_second": 36.135, "eval_arxiv_steps_per_second": 0.072, "eval_arxiv_token_set_f1": 0.36418839645459333, "eval_arxiv_token_set_f1_sem": 0.00426535713540038, "eval_arxiv_token_set_precision": 0.31721653582457804, "eval_arxiv_token_set_recall": 0.44178076341031575, "eval_arxiv_true_num_tokens": 64.0, "step": 227500 }, { "epoch": 43.68, "eval_python_code_alpaca_accuracy": 0.16353125, "eval_python_code_alpaca_bleu_score": 4.67672936228896, "eval_python_code_alpaca_bleu_score_sem": 0.15054175835514544, "eval_python_code_alpaca_emb_cos_sim": 0.7713525295257568, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007235496997834966, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8360798358917236, "eval_python_code_alpaca_n_ngrams_match_1": 9.832, "eval_python_code_alpaca_n_ngrams_match_2": 2.89, "eval_python_code_alpaca_n_ngrams_match_3": 0.978, "eval_python_code_alpaca_num_pred_words": 43.102, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.04880027057715, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3408123858704276, "eval_python_code_alpaca_runtime": 15.0396, "eval_python_code_alpaca_samples_per_second": 33.246, "eval_python_code_alpaca_steps_per_second": 0.066, "eval_python_code_alpaca_token_set_f1": 0.4810487994843043, "eval_python_code_alpaca_token_set_f1_sem": 0.005479756175395937, "eval_python_code_alpaca_token_set_precision": 0.5396984804612589, "eval_python_code_alpaca_token_set_recall": 0.45620973247543833, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 227500 }, { "epoch": 43.68, "eval_wikibio_accuracy": 0.33303125, "eval_wikibio_bleu_score": 5.925104478803174, "eval_wikibio_bleu_score_sem": 0.218554434822167, "eval_wikibio_emb_cos_sim": 0.7368950843811035, "eval_wikibio_emb_cos_sim_sem": 0.01096662512905831, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6045265197753906, "eval_wikibio_n_ngrams_match_1": 9.858, "eval_wikibio_n_ngrams_match_2": 3.32, "eval_wikibio_n_ngrams_match_3": 1.228, "eval_wikibio_num_pred_words": 35.218, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.76427257958304, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34979286954505556, "eval_wikibio_runtime": 14.1959, "eval_wikibio_samples_per_second": 35.221, "eval_wikibio_steps_per_second": 0.07, "eval_wikibio_token_set_f1": 0.3159993821913835, "eval_wikibio_token_set_f1_sem": 0.005814667657070419, "eval_wikibio_token_set_precision": 0.3223637468900714, "eval_wikibio_token_set_recall": 0.32918955818752543, "eval_wikibio_true_num_tokens": 61.1328125, "step": 227500 }, { "epoch": 43.68, "eval_nq_accuracy": 0.537375, "eval_nq_bleu_score": 12.204728865234786, "eval_nq_bleu_score_sem": 0.48995800363840586, "eval_nq_emb_cos_sim": 0.8363226056098938, "eval_nq_emb_cos_sim_sem": 0.006792588279179632, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1125874519348145, "eval_nq_n_ngrams_match_1": 23.714, "eval_nq_n_ngrams_match_2": 8.848, "eval_nq_n_ngrams_match_3": 4.156, "eval_nq_num_pred_words": 48.752, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.269610846932784, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46150773248573296, "eval_nq_runtime": 14.5262, "eval_nq_samples_per_second": 34.421, "eval_nq_steps_per_second": 0.069, "eval_nq_token_set_f1": 0.472206477608648, "eval_nq_token_set_f1_sem": 0.00487267736465192, "eval_nq_token_set_precision": 0.43085782040690107, "eval_nq_token_set_recall": 0.529571015474834, "eval_nq_true_num_tokens": 64.0, "step": 227500 }, { "epoch": 43.68, "learning_rate": 0.001, "loss": 2.4968, "step": 227508 }, { "epoch": 43.69, "learning_rate": 0.001, "loss": 2.4846, "step": 227520 }, { "epoch": 43.69, "learning_rate": 0.001, "loss": 2.4928, "step": 227532 }, { "epoch": 43.69, "learning_rate": 0.001, "loss": 2.4865, "step": 227544 }, { "epoch": 43.69, "learning_rate": 0.001, "loss": 2.4851, "step": 227556 }, { "epoch": 43.7, "learning_rate": 0.001, "loss": 2.5007, "step": 227568 }, { "epoch": 43.7, "learning_rate": 0.001, "loss": 2.4921, "step": 227580 }, { "epoch": 43.7, "learning_rate": 0.001, "loss": 2.4789, "step": 227592 }, { "epoch": 43.7, "learning_rate": 0.001, "loss": 2.4913, "step": 227604 }, { "epoch": 43.71, "learning_rate": 0.001, "loss": 2.49, "step": 227616 }, { "epoch": 43.71, "learning_rate": 0.001, "loss": 2.4911, "step": 227628 }, { "epoch": 43.71, "learning_rate": 0.001, "loss": 2.4978, "step": 227640 }, { "epoch": 43.71, "learning_rate": 0.001, "loss": 2.4908, "step": 227652 }, { "epoch": 43.71, "learning_rate": 0.001, "loss": 2.4929, "step": 227664 }, { "epoch": 43.72, "learning_rate": 0.001, "loss": 2.4863, "step": 227676 }, { "epoch": 43.72, "learning_rate": 0.001, "loss": 2.4903, "step": 227688 }, { "epoch": 43.72, "learning_rate": 0.001, "loss": 2.4911, "step": 227700 }, { "epoch": 43.72, "learning_rate": 0.001, "loss": 2.4965, "step": 227712 }, { "epoch": 43.73, "learning_rate": 0.001, "loss": 2.4908, "step": 227724 }, { "epoch": 43.73, "learning_rate": 0.001, "loss": 2.4874, "step": 227736 }, { "epoch": 43.73, "learning_rate": 0.001, "loss": 2.4934, "step": 227748 }, { "epoch": 43.73, "learning_rate": 0.001, "loss": 2.4773, "step": 227760 }, { "epoch": 43.74, "learning_rate": 0.001, "loss": 2.4879, "step": 227772 }, { "epoch": 43.74, "learning_rate": 0.001, "loss": 2.495, "step": 227784 }, { "epoch": 43.74, "learning_rate": 0.001, "loss": 2.4853, "step": 227796 }, { "epoch": 43.74, "learning_rate": 0.001, "loss": 2.4861, "step": 227808 }, { "epoch": 43.74, "learning_rate": 0.001, "loss": 2.4913, "step": 227820 }, { "epoch": 43.75, "learning_rate": 0.001, "loss": 2.4984, "step": 227832 }, { "epoch": 43.75, "learning_rate": 0.001, "loss": 2.485, "step": 227844 }, { "epoch": 43.75, "learning_rate": 0.001, "loss": 2.4958, "step": 227856 }, { "epoch": 43.75, "learning_rate": 0.001, "loss": 2.4875, "step": 227868 }, { "epoch": 43.76, "learning_rate": 0.001, "loss": 2.4972, "step": 227880 }, { "epoch": 43.76, "learning_rate": 0.001, "loss": 2.4864, "step": 227892 }, { "epoch": 43.76, "learning_rate": 0.001, "loss": 2.4881, "step": 227904 }, { "epoch": 43.76, "learning_rate": 0.001, "loss": 2.4894, "step": 227916 }, { "epoch": 43.76, "learning_rate": 0.001, "loss": 2.5012, "step": 227928 }, { "epoch": 43.77, "learning_rate": 0.001, "loss": 2.4894, "step": 227940 }, { "epoch": 43.77, "learning_rate": 0.001, "loss": 2.5086, "step": 227952 }, { "epoch": 43.77, "learning_rate": 0.001, "loss": 2.4863, "step": 227964 }, { "epoch": 43.77, "learning_rate": 0.001, "loss": 2.4847, "step": 227976 }, { "epoch": 43.78, "learning_rate": 0.001, "loss": 2.4902, "step": 227988 }, { "epoch": 43.78, "learning_rate": 0.001, "loss": 2.4918, "step": 228000 }, { "epoch": 43.78, "learning_rate": 0.001, "loss": 2.4971, "step": 228012 }, { "epoch": 43.78, "learning_rate": 0.001, "loss": 2.4767, "step": 228024 }, { "epoch": 43.79, "learning_rate": 0.001, "loss": 2.4875, "step": 228036 }, { "epoch": 43.79, "learning_rate": 0.001, "loss": 2.4923, "step": 228048 }, { "epoch": 43.79, "learning_rate": 0.001, "loss": 2.4829, "step": 228060 }, { "epoch": 43.79, "learning_rate": 0.001, "loss": 2.4964, "step": 228072 }, { "epoch": 43.79, "learning_rate": 0.001, "loss": 2.489, "step": 228084 }, { "epoch": 43.8, "learning_rate": 0.001, "loss": 2.4972, "step": 228096 }, { "epoch": 43.8, "learning_rate": 0.001, "loss": 2.4957, "step": 228108 }, { "epoch": 43.8, "learning_rate": 0.001, "loss": 2.4864, "step": 228120 }, { "epoch": 43.8, "eval_ag_news_accuracy": 0.329375, "eval_ag_news_bleu_score": 5.218972618216884, "eval_ag_news_bleu_score_sem": 0.16391128781218012, "eval_ag_news_emb_cos_sim": 0.8281189203262329, "eval_ag_news_emb_cos_sim_sem": 0.0063615096540657005, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.458378791809082, "eval_ag_news_n_ngrams_match_1": 14.552, "eval_ag_news_n_ngrams_match_2": 3.332, "eval_ag_news_n_ngrams_match_3": 0.986, "eval_ag_news_num_pred_words": 46.716, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.765436361679612, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3652443826359567, "eval_ag_news_runtime": 14.1764, "eval_ag_news_samples_per_second": 35.27, "eval_ag_news_steps_per_second": 0.071, "eval_ag_news_token_set_f1": 0.36284572280796107, "eval_ag_news_token_set_f1_sem": 0.004418511882036516, "eval_ag_news_token_set_precision": 0.3495190759914665, "eval_ag_news_token_set_recall": 0.39328671755470007, "eval_ag_news_true_num_tokens": 56.09375, "step": 228125 }, { "epoch": 43.8, "eval_anthropic_toxic_prompts_accuracy": 0.1163125, "eval_anthropic_toxic_prompts_bleu_score": 3.2897430739701234, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1206368963141211, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6725733280181885, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009591677051166017, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1959078311920166, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.312, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.016, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77, "eval_anthropic_toxic_prompts_num_pred_words": 46.248, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.432344071333684, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21941529448734093, "eval_anthropic_toxic_prompts_runtime": 13.7867, "eval_anthropic_toxic_prompts_samples_per_second": 36.267, "eval_anthropic_toxic_prompts_steps_per_second": 0.073, "eval_anthropic_toxic_prompts_token_set_f1": 0.35693994853124955, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006663661397444088, "eval_anthropic_toxic_prompts_token_set_precision": 0.44442798369884906, "eval_anthropic_toxic_prompts_token_set_recall": 0.3271182805600298, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 228125 }, { "epoch": 43.8, "eval_arxiv_accuracy": 0.35428125, "eval_arxiv_bleu_score": 4.411160135841091, "eval_arxiv_bleu_score_sem": 0.12380017158173169, "eval_arxiv_emb_cos_sim": 0.7858262062072754, "eval_arxiv_emb_cos_sim_sem": 0.006228739837266796, "eval_arxiv_emb_top1_equal": 0.3125, "eval_arxiv_emb_top1_equal_sem": 0.041130074229814934, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.311851739883423, "eval_arxiv_n_ngrams_match_1": 15.628, "eval_arxiv_n_ngrams_match_2": 3.124, "eval_arxiv_n_ngrams_match_3": 0.702, "eval_arxiv_num_pred_words": 39.984, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.435882580930063, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37460315893574037, "eval_arxiv_runtime": 14.2604, "eval_arxiv_samples_per_second": 35.062, "eval_arxiv_steps_per_second": 0.07, "eval_arxiv_token_set_f1": 0.3687741371131204, "eval_arxiv_token_set_f1_sem": 0.0041142125403771344, "eval_arxiv_token_set_precision": 0.31997554079365725, "eval_arxiv_token_set_recall": 0.45575428448005867, "eval_arxiv_true_num_tokens": 64.0, "step": 228125 }, { "epoch": 43.8, "eval_python_code_alpaca_accuracy": 0.16203125, "eval_python_code_alpaca_bleu_score": 4.534021089959997, "eval_python_code_alpaca_bleu_score_sem": 0.14310167297927237, "eval_python_code_alpaca_emb_cos_sim": 0.762942373752594, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007049628078812907, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.852388858795166, "eval_python_code_alpaca_n_ngrams_match_1": 9.664, "eval_python_code_alpaca_n_ngrams_match_2": 2.772, "eval_python_code_alpaca_n_ngrams_match_3": 0.892, "eval_python_code_alpaca_num_pred_words": 42.334, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.329129277197286, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34003918799971394, "eval_python_code_alpaca_runtime": 15.8406, "eval_python_code_alpaca_samples_per_second": 31.564, "eval_python_code_alpaca_steps_per_second": 0.063, "eval_python_code_alpaca_token_set_f1": 0.4756593455806314, "eval_python_code_alpaca_token_set_f1_sem": 0.005354015323394554, "eval_python_code_alpaca_token_set_precision": 0.5266720470534918, "eval_python_code_alpaca_token_set_recall": 0.4557619544848569, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 228125 }, { "epoch": 43.8, "eval_wikibio_accuracy": 0.334125, "eval_wikibio_bleu_score": 6.091998676287523, "eval_wikibio_bleu_score_sem": 0.21867120931586123, "eval_wikibio_emb_cos_sim": 0.7392401099205017, "eval_wikibio_emb_cos_sim_sem": 0.00929029613272125, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5713040828704834, "eval_wikibio_n_ngrams_match_1": 10.034, "eval_wikibio_n_ngrams_match_2": 3.384, "eval_wikibio_n_ngrams_match_3": 1.256, "eval_wikibio_num_pred_words": 35.45, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 35.56293994583742, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35867961807613336, "eval_wikibio_runtime": 14.1776, "eval_wikibio_samples_per_second": 35.267, "eval_wikibio_steps_per_second": 0.071, "eval_wikibio_token_set_f1": 0.32150157306675037, "eval_wikibio_token_set_f1_sem": 0.005495988730047056, "eval_wikibio_token_set_precision": 0.3296388393259386, "eval_wikibio_token_set_recall": 0.33309311224423366, "eval_wikibio_true_num_tokens": 61.1328125, "step": 228125 }, { "epoch": 43.8, "eval_nq_accuracy": 0.53803125, "eval_nq_bleu_score": 12.22990612075819, "eval_nq_bleu_score_sem": 0.486046002316013, "eval_nq_emb_cos_sim": 0.8454262018203735, "eval_nq_emb_cos_sim_sem": 0.006555047323883218, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1076643466949463, "eval_nq_n_ngrams_match_1": 23.516, "eval_nq_n_ngrams_match_2": 8.832, "eval_nq_n_ngrams_match_3": 4.096, "eval_nq_num_pred_words": 49.03, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.228998733321404, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45847912262809987, "eval_nq_runtime": 15.165, "eval_nq_samples_per_second": 32.971, "eval_nq_steps_per_second": 0.066, "eval_nq_token_set_f1": 0.47190121862554396, "eval_nq_token_set_f1_sem": 0.004918196606022275, "eval_nq_token_set_precision": 0.4297765646887952, "eval_nq_token_set_recall": 0.5326213888032779, "eval_nq_true_num_tokens": 64.0, "step": 228125 }, { "epoch": 43.8, "learning_rate": 0.001, "loss": 2.488, "step": 228132 }, { "epoch": 43.81, "learning_rate": 0.001, "loss": 2.4964, "step": 228144 }, { "epoch": 43.81, "learning_rate": 0.001, "loss": 2.4913, "step": 228156 }, { "epoch": 43.81, "learning_rate": 0.001, "loss": 2.4906, "step": 228168 }, { "epoch": 43.81, "learning_rate": 0.001, "loss": 2.4798, "step": 228180 }, { "epoch": 43.82, "learning_rate": 0.001, "loss": 2.4935, "step": 228192 }, { "epoch": 43.82, "learning_rate": 0.001, "loss": 2.4941, "step": 228204 }, { "epoch": 43.82, "learning_rate": 0.001, "loss": 2.4916, "step": 228216 }, { "epoch": 43.82, "learning_rate": 0.001, "loss": 2.4896, "step": 228228 }, { "epoch": 43.82, "learning_rate": 0.001, "loss": 2.4917, "step": 228240 }, { "epoch": 43.83, "learning_rate": 0.001, "loss": 2.4952, "step": 228252 }, { "epoch": 43.83, "learning_rate": 0.001, "loss": 2.4865, "step": 228264 }, { "epoch": 43.83, "learning_rate": 0.001, "loss": 2.4912, "step": 228276 }, { "epoch": 43.83, "learning_rate": 0.001, "loss": 2.4981, "step": 228288 }, { "epoch": 43.84, "learning_rate": 0.001, "loss": 2.4932, "step": 228300 }, { "epoch": 43.84, "learning_rate": 0.001, "loss": 2.4995, "step": 228312 }, { "epoch": 43.84, "learning_rate": 0.001, "loss": 2.4906, "step": 228324 }, { "epoch": 43.84, "learning_rate": 0.001, "loss": 2.4885, "step": 228336 }, { "epoch": 43.85, "learning_rate": 0.001, "loss": 2.4986, "step": 228348 }, { "epoch": 43.85, "learning_rate": 0.001, "loss": 2.5008, "step": 228360 }, { "epoch": 43.85, "learning_rate": 0.001, "loss": 2.4961, "step": 228372 }, { "epoch": 43.85, "learning_rate": 0.001, "loss": 2.4985, "step": 228384 }, { "epoch": 43.85, "learning_rate": 0.001, "loss": 2.4949, "step": 228396 }, { "epoch": 43.86, "learning_rate": 0.001, "loss": 2.4904, "step": 228408 }, { "epoch": 43.86, "learning_rate": 0.001, "loss": 2.4824, "step": 228420 }, { "epoch": 43.86, "learning_rate": 0.001, "loss": 2.4962, "step": 228432 }, { "epoch": 43.86, "learning_rate": 0.001, "loss": 2.4946, "step": 228444 }, { "epoch": 43.87, "learning_rate": 0.001, "loss": 2.4916, "step": 228456 }, { "epoch": 43.87, "learning_rate": 0.001, "loss": 2.4953, "step": 228468 }, { "epoch": 43.87, "learning_rate": 0.001, "loss": 2.4932, "step": 228480 }, { "epoch": 43.87, "learning_rate": 0.001, "loss": 2.4829, "step": 228492 }, { "epoch": 43.88, "learning_rate": 0.001, "loss": 2.4964, "step": 228504 }, { "epoch": 43.88, "learning_rate": 0.001, "loss": 2.4867, "step": 228516 }, { "epoch": 43.88, "learning_rate": 0.001, "loss": 2.4924, "step": 228528 }, { "epoch": 43.88, "learning_rate": 0.001, "loss": 2.4875, "step": 228540 }, { "epoch": 43.88, "learning_rate": 0.001, "loss": 2.4978, "step": 228552 }, { "epoch": 43.89, "learning_rate": 0.001, "loss": 2.4874, "step": 228564 }, { "epoch": 43.89, "learning_rate": 0.001, "loss": 2.4885, "step": 228576 }, { "epoch": 43.89, "learning_rate": 0.001, "loss": 2.4903, "step": 228588 }, { "epoch": 43.89, "learning_rate": 0.001, "loss": 2.4886, "step": 228600 }, { "epoch": 43.9, "learning_rate": 0.001, "loss": 2.4924, "step": 228612 }, { "epoch": 43.9, "learning_rate": 0.001, "loss": 2.4908, "step": 228624 }, { "epoch": 43.9, "learning_rate": 0.001, "loss": 2.5114, "step": 228636 }, { "epoch": 43.9, "learning_rate": 0.001, "loss": 2.4893, "step": 228648 }, { "epoch": 43.91, "learning_rate": 0.001, "loss": 2.4994, "step": 228660 }, { "epoch": 43.91, "learning_rate": 0.001, "loss": 2.485, "step": 228672 }, { "epoch": 43.91, "learning_rate": 0.001, "loss": 2.4948, "step": 228684 }, { "epoch": 43.91, "learning_rate": 0.001, "loss": 2.492, "step": 228696 }, { "epoch": 43.91, "learning_rate": 0.001, "loss": 2.4995, "step": 228708 }, { "epoch": 43.92, "learning_rate": 0.001, "loss": 2.4959, "step": 228720 }, { "epoch": 43.92, "learning_rate": 0.001, "loss": 2.495, "step": 228732 }, { "epoch": 43.92, "learning_rate": 0.001, "loss": 2.4848, "step": 228744 }, { "epoch": 43.92, "eval_ag_news_accuracy": 0.32903125, "eval_ag_news_bleu_score": 5.122464930551099, "eval_ag_news_bleu_score_sem": 0.15941103150767458, "eval_ag_news_emb_cos_sim": 0.826166570186615, "eval_ag_news_emb_cos_sim_sem": 0.006659244870114276, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.45980167388916, "eval_ag_news_n_ngrams_match_1": 14.496, "eval_ag_news_n_ngrams_match_2": 3.378, "eval_ag_news_n_ngrams_match_3": 0.972, "eval_ag_news_num_pred_words": 46.59, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.810667003147977, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3639347275150985, "eval_ag_news_runtime": 14.209, "eval_ag_news_samples_per_second": 35.189, "eval_ag_news_steps_per_second": 0.07, "eval_ag_news_token_set_f1": 0.3605539755511987, "eval_ag_news_token_set_f1_sem": 0.0043332377796799285, "eval_ag_news_token_set_precision": 0.34775428086718624, "eval_ag_news_token_set_recall": 0.3895662581363894, "eval_ag_news_true_num_tokens": 56.09375, "step": 228750 }, { "epoch": 43.92, "eval_anthropic_toxic_prompts_accuracy": 0.11646875, "eval_anthropic_toxic_prompts_bleu_score": 3.279443862478997, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12529580128001774, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6832237243652344, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009008737403966762, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.197946071624756, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.378, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.016, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.774, "eval_anthropic_toxic_prompts_num_pred_words": 46.818, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.482193848544117, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2201173881010906, "eval_anthropic_toxic_prompts_runtime": 13.5463, "eval_anthropic_toxic_prompts_samples_per_second": 36.91, "eval_anthropic_toxic_prompts_steps_per_second": 0.074, "eval_anthropic_toxic_prompts_token_set_f1": 0.3684695633841047, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0067072920874347375, "eval_anthropic_toxic_prompts_token_set_precision": 0.44838591841292746, "eval_anthropic_toxic_prompts_token_set_recall": 0.3385878893256619, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 228750 }, { "epoch": 43.92, "eval_arxiv_accuracy": 0.35428125, "eval_arxiv_bleu_score": 4.603703466028948, "eval_arxiv_bleu_score_sem": 0.13452632685735333, "eval_arxiv_emb_cos_sim": 0.7816855907440186, "eval_arxiv_emb_cos_sim_sem": 0.007311415984348738, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3170077800750732, "eval_arxiv_n_ngrams_match_1": 15.546, "eval_arxiv_n_ngrams_match_2": 3.206, "eval_arxiv_n_ngrams_match_3": 0.758, "eval_arxiv_num_pred_words": 39.946, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.57770840984758, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3739435874807563, "eval_arxiv_runtime": 15.0488, "eval_arxiv_samples_per_second": 33.225, "eval_arxiv_steps_per_second": 0.066, "eval_arxiv_token_set_f1": 0.3662341742629446, "eval_arxiv_token_set_f1_sem": 0.00450767857689167, "eval_arxiv_token_set_precision": 0.31821886913500225, "eval_arxiv_token_set_recall": 0.4488378199357054, "eval_arxiv_true_num_tokens": 64.0, "step": 228750 }, { "epoch": 43.92, "eval_python_code_alpaca_accuracy": 0.1638125, "eval_python_code_alpaca_bleu_score": 4.74719281991187, "eval_python_code_alpaca_bleu_score_sem": 0.15097743269272845, "eval_python_code_alpaca_emb_cos_sim": 0.7565107345581055, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008246849481979839, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.812595844268799, "eval_python_code_alpaca_n_ngrams_match_1": 9.754, "eval_python_code_alpaca_n_ngrams_match_2": 2.99, "eval_python_code_alpaca_n_ngrams_match_3": 0.984, "eval_python_code_alpaca_num_pred_words": 42.192, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.65309099045313, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3398873282204268, "eval_python_code_alpaca_runtime": 13.4391, "eval_python_code_alpaca_samples_per_second": 37.205, "eval_python_code_alpaca_steps_per_second": 0.074, "eval_python_code_alpaca_token_set_f1": 0.47850209079121614, "eval_python_code_alpaca_token_set_f1_sem": 0.005644283108384785, "eval_python_code_alpaca_token_set_precision": 0.5340946357203107, "eval_python_code_alpaca_token_set_recall": 0.459099245729098, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 228750 }, { "epoch": 43.92, "eval_wikibio_accuracy": 0.3329375, "eval_wikibio_bleu_score": 6.23548243281245, "eval_wikibio_bleu_score_sem": 0.22414342021633893, "eval_wikibio_emb_cos_sim": 0.7375278472900391, "eval_wikibio_emb_cos_sim_sem": 0.009473264821730153, "eval_wikibio_emb_top1_equal": 0.15625, "eval_wikibio_emb_top1_equal_sem": 0.03221922156442571, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.595562219619751, "eval_wikibio_n_ngrams_match_1": 10.052, "eval_wikibio_n_ngrams_match_2": 3.432, "eval_wikibio_n_ngrams_match_3": 1.314, "eval_wikibio_num_pred_words": 35.474, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.43617936529365, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35709053868168017, "eval_wikibio_runtime": 13.9262, "eval_wikibio_samples_per_second": 35.904, "eval_wikibio_steps_per_second": 0.072, "eval_wikibio_token_set_f1": 0.3209013043262027, "eval_wikibio_token_set_f1_sem": 0.005646028256609949, "eval_wikibio_token_set_precision": 0.32596328260921825, "eval_wikibio_token_set_recall": 0.3337972872543651, "eval_wikibio_true_num_tokens": 61.1328125, "step": 228750 }, { "epoch": 43.92, "eval_nq_accuracy": 0.53684375, "eval_nq_bleu_score": 12.445183379049432, "eval_nq_bleu_score_sem": 0.5044772499170779, "eval_nq_emb_cos_sim": 0.8396387100219727, "eval_nq_emb_cos_sim_sem": 0.0067118592835795545, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1118662357330322, "eval_nq_n_ngrams_match_1": 23.416, "eval_nq_n_ngrams_match_2": 8.888, "eval_nq_n_ngrams_match_3": 4.206, "eval_nq_num_pred_words": 48.918, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.263648819821245, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45463262098736623, "eval_nq_runtime": 14.6789, "eval_nq_samples_per_second": 34.062, "eval_nq_steps_per_second": 0.068, "eval_nq_token_set_f1": 0.4687988198064519, "eval_nq_token_set_f1_sem": 0.004966175925347379, "eval_nq_token_set_precision": 0.4260689416833156, "eval_nq_token_set_recall": 0.5291435665839637, "eval_nq_true_num_tokens": 64.0, "step": 228750 }, { "epoch": 43.92, "learning_rate": 0.001, "loss": 2.496, "step": 228756 }, { "epoch": 43.93, "learning_rate": 0.001, "loss": 2.4962, "step": 228768 }, { "epoch": 43.93, "learning_rate": 0.001, "loss": 2.4997, "step": 228780 }, { "epoch": 43.93, "learning_rate": 0.001, "loss": 2.4858, "step": 228792 }, { "epoch": 43.93, "learning_rate": 0.001, "loss": 2.4988, "step": 228804 }, { "epoch": 43.94, "learning_rate": 0.001, "loss": 2.4994, "step": 228816 }, { "epoch": 43.94, "learning_rate": 0.001, "loss": 2.4931, "step": 228828 }, { "epoch": 43.94, "learning_rate": 0.001, "loss": 2.5002, "step": 228840 }, { "epoch": 43.94, "learning_rate": 0.001, "loss": 2.5085, "step": 228852 }, { "epoch": 43.94, "learning_rate": 0.001, "loss": 2.4924, "step": 228864 }, { "epoch": 43.95, "learning_rate": 0.001, "loss": 2.4948, "step": 228876 }, { "epoch": 43.95, "learning_rate": 0.001, "loss": 2.4935, "step": 228888 }, { "epoch": 43.95, "learning_rate": 0.001, "loss": 2.4944, "step": 228900 }, { "epoch": 43.95, "learning_rate": 0.001, "loss": 2.488, "step": 228912 }, { "epoch": 43.96, "learning_rate": 0.001, "loss": 2.4869, "step": 228924 }, { "epoch": 43.96, "learning_rate": 0.001, "loss": 2.4941, "step": 228936 }, { "epoch": 43.96, "learning_rate": 0.001, "loss": 2.4929, "step": 228948 }, { "epoch": 43.96, "learning_rate": 0.001, "loss": 2.4989, "step": 228960 }, { "epoch": 43.97, "learning_rate": 0.001, "loss": 2.5007, "step": 228972 }, { "epoch": 43.97, "learning_rate": 0.001, "loss": 2.491, "step": 228984 }, { "epoch": 43.97, "learning_rate": 0.001, "loss": 2.5008, "step": 228996 }, { "epoch": 43.97, "learning_rate": 0.001, "loss": 2.4988, "step": 229008 }, { "epoch": 43.97, "learning_rate": 0.001, "loss": 2.4821, "step": 229020 }, { "epoch": 43.98, "learning_rate": 0.001, "loss": 2.4989, "step": 229032 }, { "epoch": 43.98, "learning_rate": 0.001, "loss": 2.4961, "step": 229044 }, { "epoch": 43.98, "learning_rate": 0.001, "loss": 2.506, "step": 229056 }, { "epoch": 43.98, "learning_rate": 0.001, "loss": 2.4899, "step": 229068 }, { "epoch": 43.99, "learning_rate": 0.001, "loss": 2.4999, "step": 229080 }, { "epoch": 43.99, "learning_rate": 0.001, "loss": 2.4953, "step": 229092 }, { "epoch": 43.99, "learning_rate": 0.001, "loss": 2.5012, "step": 229104 }, { "epoch": 43.99, "learning_rate": 0.001, "loss": 2.4966, "step": 229116 }, { "epoch": 44.0, "learning_rate": 0.001, "loss": 2.5057, "step": 229128 }, { "epoch": 44.0, "learning_rate": 0.001, "loss": 2.4899, "step": 229140 }, { "epoch": 44.0, "learning_rate": 0.001, "loss": 2.4947, "step": 229152 }, { "epoch": 44.0, "learning_rate": 0.001, "loss": 2.4887, "step": 229164 }, { "epoch": 44.0, "learning_rate": 0.001, "loss": 2.4797, "step": 229176 }, { "epoch": 44.01, "learning_rate": 0.001, "loss": 2.4715, "step": 229188 }, { "epoch": 44.01, "learning_rate": 0.001, "loss": 2.4862, "step": 229200 }, { "epoch": 44.01, "learning_rate": 0.001, "loss": 2.483, "step": 229212 }, { "epoch": 44.01, "learning_rate": 0.001, "loss": 2.4846, "step": 229224 }, { "epoch": 44.02, "learning_rate": 0.001, "loss": 2.4829, "step": 229236 }, { "epoch": 44.02, "learning_rate": 0.001, "loss": 2.4755, "step": 229248 }, { "epoch": 44.02, "learning_rate": 0.001, "loss": 2.4859, "step": 229260 }, { "epoch": 44.02, "learning_rate": 0.001, "loss": 2.4794, "step": 229272 }, { "epoch": 44.03, "learning_rate": 0.001, "loss": 2.4852, "step": 229284 }, { "epoch": 44.03, "learning_rate": 0.001, "loss": 2.4847, "step": 229296 }, { "epoch": 44.03, "learning_rate": 0.001, "loss": 2.4606, "step": 229308 }, { "epoch": 44.03, "learning_rate": 0.001, "loss": 2.4801, "step": 229320 }, { "epoch": 44.03, "learning_rate": 0.001, "loss": 2.4811, "step": 229332 }, { "epoch": 44.04, "learning_rate": 0.001, "loss": 2.4746, "step": 229344 }, { "epoch": 44.04, "learning_rate": 0.001, "loss": 2.4775, "step": 229356 }, { "epoch": 44.04, "learning_rate": 0.001, "loss": 2.4881, "step": 229368 }, { "epoch": 44.04, "eval_ag_news_accuracy": 0.33, "eval_ag_news_bleu_score": 5.103983653986498, "eval_ag_news_bleu_score_sem": 0.1590122618243574, "eval_ag_news_emb_cos_sim": 0.8271461725234985, "eval_ag_news_emb_cos_sim_sem": 0.006241688139382752, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4644253253936768, "eval_ag_news_n_ngrams_match_1": 14.578, "eval_ag_news_n_ngrams_match_2": 3.364, "eval_ag_news_n_ngrams_match_3": 0.996, "eval_ag_news_num_pred_words": 46.808, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.958088992814854, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36493808296155045, "eval_ag_news_runtime": 18.7889, "eval_ag_news_samples_per_second": 26.611, "eval_ag_news_steps_per_second": 0.053, "eval_ag_news_token_set_f1": 0.36322957537705597, "eval_ag_news_token_set_f1_sem": 0.004421643630767436, "eval_ag_news_token_set_precision": 0.3502102824614946, "eval_ag_news_token_set_recall": 0.3909909421278378, "eval_ag_news_true_num_tokens": 56.09375, "step": 229375 }, { "epoch": 44.04, "eval_anthropic_toxic_prompts_accuracy": 0.1175625, "eval_anthropic_toxic_prompts_bleu_score": 3.3095375669636113, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12708507076971015, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6871490478515625, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009180323809699692, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1824052333831787, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.398, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.054, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792, "eval_anthropic_toxic_prompts_num_pred_words": 46.796, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.10466121927512, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2212104118596558, "eval_anthropic_toxic_prompts_runtime": 18.136, "eval_anthropic_toxic_prompts_samples_per_second": 27.57, "eval_anthropic_toxic_prompts_steps_per_second": 0.055, "eval_anthropic_toxic_prompts_token_set_f1": 0.3600745721199604, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006589481405737642, "eval_anthropic_toxic_prompts_token_set_precision": 0.452521602022359, "eval_anthropic_toxic_prompts_token_set_recall": 0.3250802004853819, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 229375 }, { "epoch": 44.04, "eval_arxiv_accuracy": 0.35415625, "eval_arxiv_bleu_score": 4.567469995224752, "eval_arxiv_bleu_score_sem": 0.13634272388419386, "eval_arxiv_emb_cos_sim": 0.7857984304428101, "eval_arxiv_emb_cos_sim_sem": 0.006438691728945783, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3286006450653076, "eval_arxiv_n_ngrams_match_1": 15.826, "eval_arxiv_n_ngrams_match_2": 3.078, "eval_arxiv_n_ngrams_match_3": 0.726, "eval_arxiv_num_pred_words": 40.838, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.899273388501747, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37881624225660093, "eval_arxiv_runtime": 18.9059, "eval_arxiv_samples_per_second": 26.447, "eval_arxiv_steps_per_second": 0.053, "eval_arxiv_token_set_f1": 0.3689119879339914, "eval_arxiv_token_set_f1_sem": 0.0042171397295453685, "eval_arxiv_token_set_precision": 0.32291774537253054, "eval_arxiv_token_set_recall": 0.44560393164832557, "eval_arxiv_true_num_tokens": 64.0, "step": 229375 }, { "epoch": 44.04, "eval_python_code_alpaca_accuracy": 0.16428125, "eval_python_code_alpaca_bleu_score": 4.751828922133397, "eval_python_code_alpaca_bleu_score_sem": 0.15112619433172716, "eval_python_code_alpaca_emb_cos_sim": 0.7750812768936157, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006498989381185697, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8149120807647705, "eval_python_code_alpaca_n_ngrams_match_1": 10.072, "eval_python_code_alpaca_n_ngrams_match_2": 2.962, "eval_python_code_alpaca_n_ngrams_match_3": 0.964, "eval_python_code_alpaca_num_pred_words": 42.562, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.691708193598654, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3500105254019824, "eval_python_code_alpaca_runtime": 14.1792, "eval_python_code_alpaca_samples_per_second": 35.263, "eval_python_code_alpaca_steps_per_second": 0.071, "eval_python_code_alpaca_token_set_f1": 0.4865076752927683, "eval_python_code_alpaca_token_set_f1_sem": 0.005367249972759482, "eval_python_code_alpaca_token_set_precision": 0.5536583859166705, "eval_python_code_alpaca_token_set_recall": 0.4552995905336038, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 229375 }, { "epoch": 44.04, "eval_wikibio_accuracy": 0.33190625, "eval_wikibio_bleu_score": 6.200101019233036, "eval_wikibio_bleu_score_sem": 0.21423849953372273, "eval_wikibio_emb_cos_sim": 0.7459069490432739, "eval_wikibio_emb_cos_sim_sem": 0.008310609761242727, "eval_wikibio_emb_top1_equal": 0.1484375, "eval_wikibio_emb_top1_equal_sem": 0.031548465007086954, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.622779607772827, "eval_wikibio_n_ngrams_match_1": 10.402, "eval_wikibio_n_ngrams_match_2": 3.514, "eval_wikibio_n_ngrams_match_3": 1.304, "eval_wikibio_num_pred_words": 37.074, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.44149598864582, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3701234283837513, "eval_wikibio_runtime": 20.7974, "eval_wikibio_samples_per_second": 24.041, "eval_wikibio_steps_per_second": 0.048, "eval_wikibio_token_set_f1": 0.3283242409291427, "eval_wikibio_token_set_f1_sem": 0.004969229621705694, "eval_wikibio_token_set_precision": 0.3386983843491412, "eval_wikibio_token_set_recall": 0.3341984644878862, "eval_wikibio_true_num_tokens": 61.1328125, "step": 229375 }, { "epoch": 44.04, "eval_nq_accuracy": 0.53759375, "eval_nq_bleu_score": 12.1953972848761, "eval_nq_bleu_score_sem": 0.49263520022599105, "eval_nq_emb_cos_sim": 0.8387718796730042, "eval_nq_emb_cos_sim_sem": 0.007476274040843608, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1147685050964355, "eval_nq_n_ngrams_match_1": 23.472, "eval_nq_n_ngrams_match_2": 8.808, "eval_nq_n_ngrams_match_3": 4.108, "eval_nq_num_pred_words": 48.778, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.287666991363565, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4580419596826657, "eval_nq_runtime": 14.01, "eval_nq_samples_per_second": 35.689, "eval_nq_steps_per_second": 0.071, "eval_nq_token_set_f1": 0.47118302928665534, "eval_nq_token_set_f1_sem": 0.005037996171470476, "eval_nq_token_set_precision": 0.42921038369893383, "eval_nq_token_set_recall": 0.5316751312424766, "eval_nq_true_num_tokens": 64.0, "step": 229375 }, { "epoch": 44.04, "learning_rate": 0.001, "loss": 2.4838, "step": 229380 }, { "epoch": 44.05, "learning_rate": 0.001, "loss": 2.4858, "step": 229392 }, { "epoch": 44.05, "learning_rate": 0.001, "loss": 2.479, "step": 229404 }, { "epoch": 44.05, "learning_rate": 0.001, "loss": 2.49, "step": 229416 }, { "epoch": 44.05, "learning_rate": 0.001, "loss": 2.4854, "step": 229428 }, { "epoch": 44.06, "learning_rate": 0.001, "loss": 2.4797, "step": 229440 }, { "epoch": 44.06, "learning_rate": 0.001, "loss": 2.4802, "step": 229452 }, { "epoch": 44.06, "learning_rate": 0.001, "loss": 2.4842, "step": 229464 }, { "epoch": 44.06, "learning_rate": 0.001, "loss": 2.4898, "step": 229476 }, { "epoch": 44.06, "learning_rate": 0.001, "loss": 2.4953, "step": 229488 }, { "epoch": 44.07, "learning_rate": 0.001, "loss": 2.4806, "step": 229500 }, { "epoch": 44.07, "learning_rate": 0.001, "loss": 2.4805, "step": 229512 }, { "epoch": 44.07, "learning_rate": 0.001, "loss": 2.4769, "step": 229524 }, { "epoch": 44.07, "learning_rate": 0.001, "loss": 2.491, "step": 229536 }, { "epoch": 44.08, "learning_rate": 0.001, "loss": 2.4787, "step": 229548 }, { "epoch": 44.08, "learning_rate": 0.001, "loss": 2.4894, "step": 229560 }, { "epoch": 44.08, "learning_rate": 0.001, "loss": 2.4824, "step": 229572 }, { "epoch": 44.08, "learning_rate": 0.001, "loss": 2.4893, "step": 229584 }, { "epoch": 44.09, "learning_rate": 0.001, "loss": 2.4813, "step": 229596 }, { "epoch": 44.09, "learning_rate": 0.001, "loss": 2.4841, "step": 229608 }, { "epoch": 44.09, "learning_rate": 0.001, "loss": 2.4793, "step": 229620 }, { "epoch": 44.09, "learning_rate": 0.001, "loss": 2.495, "step": 229632 }, { "epoch": 44.09, "learning_rate": 0.001, "loss": 2.48, "step": 229644 }, { "epoch": 44.1, "learning_rate": 0.001, "loss": 2.4785, "step": 229656 }, { "epoch": 44.1, "learning_rate": 0.001, "loss": 2.4749, "step": 229668 }, { "epoch": 44.1, "learning_rate": 0.001, "loss": 2.4793, "step": 229680 }, { "epoch": 44.1, "learning_rate": 0.001, "loss": 2.4742, "step": 229692 }, { "epoch": 44.11, "learning_rate": 0.001, "loss": 2.4924, "step": 229704 }, { "epoch": 44.11, "learning_rate": 0.001, "loss": 2.4865, "step": 229716 }, { "epoch": 44.11, "learning_rate": 0.001, "loss": 2.4803, "step": 229728 }, { "epoch": 44.11, "learning_rate": 0.001, "loss": 2.4817, "step": 229740 }, { "epoch": 44.12, "learning_rate": 0.001, "loss": 2.4779, "step": 229752 }, { "epoch": 44.12, "learning_rate": 0.001, "loss": 2.4959, "step": 229764 }, { "epoch": 44.12, "learning_rate": 0.001, "loss": 2.4802, "step": 229776 }, { "epoch": 44.12, "learning_rate": 0.001, "loss": 2.4892, "step": 229788 }, { "epoch": 44.12, "learning_rate": 0.001, "loss": 2.4855, "step": 229800 }, { "epoch": 44.13, "learning_rate": 0.001, "loss": 2.4786, "step": 229812 }, { "epoch": 44.13, "learning_rate": 0.001, "loss": 2.4892, "step": 229824 }, { "epoch": 44.13, "learning_rate": 0.001, "loss": 2.4821, "step": 229836 }, { "epoch": 44.13, "learning_rate": 0.001, "loss": 2.4797, "step": 229848 }, { "epoch": 44.14, "learning_rate": 0.001, "loss": 2.4747, "step": 229860 }, { "epoch": 44.14, "learning_rate": 0.001, "loss": 2.4869, "step": 229872 }, { "epoch": 44.14, "learning_rate": 0.001, "loss": 2.4893, "step": 229884 }, { "epoch": 44.14, "learning_rate": 0.001, "loss": 2.4879, "step": 229896 }, { "epoch": 44.15, "learning_rate": 0.001, "loss": 2.4934, "step": 229908 }, { "epoch": 44.15, "learning_rate": 0.001, "loss": 2.4865, "step": 229920 }, { "epoch": 44.15, "learning_rate": 0.001, "loss": 2.4746, "step": 229932 }, { "epoch": 44.15, "learning_rate": 0.001, "loss": 2.4702, "step": 229944 }, { "epoch": 44.15, "learning_rate": 0.001, "loss": 2.488, "step": 229956 }, { "epoch": 44.16, "learning_rate": 0.001, "loss": 2.4855, "step": 229968 }, { "epoch": 44.16, "learning_rate": 0.001, "loss": 2.4828, "step": 229980 }, { "epoch": 44.16, "learning_rate": 0.001, "loss": 2.4904, "step": 229992 }, { "epoch": 44.16, "eval_ag_news_accuracy": 0.3304375, "eval_ag_news_bleu_score": 5.227999143175835, "eval_ag_news_bleu_score_sem": 0.16745849992881312, "eval_ag_news_emb_cos_sim": 0.8245687484741211, "eval_ag_news_emb_cos_sim_sem": 0.006772942579417492, "eval_ag_news_emb_top1_equal": 0.3125, "eval_ag_news_emb_top1_equal_sem": 0.041130074229814934, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.467036247253418, "eval_ag_news_n_ngrams_match_1": 14.582, "eval_ag_news_n_ngrams_match_2": 3.316, "eval_ag_news_n_ngrams_match_3": 0.958, "eval_ag_news_num_pred_words": 46.686, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.04163808857976, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.363437163091331, "eval_ag_news_runtime": 22.7358, "eval_ag_news_samples_per_second": 21.992, "eval_ag_news_steps_per_second": 0.044, "eval_ag_news_token_set_f1": 0.36366623360661227, "eval_ag_news_token_set_f1_sem": 0.004404099270188432, "eval_ag_news_token_set_precision": 0.3501714071343526, "eval_ag_news_token_set_recall": 0.3909533064624547, "eval_ag_news_true_num_tokens": 56.09375, "step": 230000 }, { "epoch": 44.16, "eval_anthropic_toxic_prompts_accuracy": 0.11678125, "eval_anthropic_toxic_prompts_bleu_score": 3.2214823884686625, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11874571914592888, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6864318251609802, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008864945527737752, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2236599922180176, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.4, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.026, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758, "eval_anthropic_toxic_prompts_num_pred_words": 46.558, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.11989074376225, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21994995218263774, "eval_anthropic_toxic_prompts_runtime": 13.4739, "eval_anthropic_toxic_prompts_samples_per_second": 37.109, "eval_anthropic_toxic_prompts_steps_per_second": 0.074, "eval_anthropic_toxic_prompts_token_set_f1": 0.36254226339893136, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065839280307947775, "eval_anthropic_toxic_prompts_token_set_precision": 0.4506151153465686, "eval_anthropic_toxic_prompts_token_set_recall": 0.32877529479373396, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 230000 }, { "epoch": 44.16, "eval_arxiv_accuracy": 0.3510625, "eval_arxiv_bleu_score": 4.666007287703487, "eval_arxiv_bleu_score_sem": 0.14038904396579627, "eval_arxiv_emb_cos_sim": 0.7872946858406067, "eval_arxiv_emb_cos_sim_sem": 0.006174981531991151, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.332165002822876, "eval_arxiv_n_ngrams_match_1": 15.68, "eval_arxiv_n_ngrams_match_2": 3.21, "eval_arxiv_n_ngrams_match_3": 0.784, "eval_arxiv_num_pred_words": 40.328, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.99889381598616, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37623712352710803, "eval_arxiv_runtime": 13.8737, "eval_arxiv_samples_per_second": 36.039, "eval_arxiv_steps_per_second": 0.072, "eval_arxiv_token_set_f1": 0.3670198305060281, "eval_arxiv_token_set_f1_sem": 0.0043571585470539385, "eval_arxiv_token_set_precision": 0.3207681752154741, "eval_arxiv_token_set_recall": 0.44425783673944136, "eval_arxiv_true_num_tokens": 64.0, "step": 230000 }, { "epoch": 44.16, "eval_python_code_alpaca_accuracy": 0.16125, "eval_python_code_alpaca_bleu_score": 4.6031537781665355, "eval_python_code_alpaca_bleu_score_sem": 0.14470936890987762, "eval_python_code_alpaca_emb_cos_sim": 0.7584249377250671, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008684214572966975, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.825847625732422, "eval_python_code_alpaca_n_ngrams_match_1": 9.668, "eval_python_code_alpaca_n_ngrams_match_2": 2.826, "eval_python_code_alpaca_n_ngrams_match_3": 0.932, "eval_python_code_alpaca_num_pred_words": 41.94, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.875242815680284, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3410002505428078, "eval_python_code_alpaca_runtime": 22.5054, "eval_python_code_alpaca_samples_per_second": 22.217, "eval_python_code_alpaca_steps_per_second": 0.044, "eval_python_code_alpaca_token_set_f1": 0.47470388797143237, "eval_python_code_alpaca_token_set_f1_sem": 0.005552881437807432, "eval_python_code_alpaca_token_set_precision": 0.5300151500823808, "eval_python_code_alpaca_token_set_recall": 0.4538208418854002, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 230000 }, { "epoch": 44.16, "eval_wikibio_accuracy": 0.3335, "eval_wikibio_bleu_score": 6.479867851690654, "eval_wikibio_bleu_score_sem": 0.22298200379478933, "eval_wikibio_emb_cos_sim": 0.7464686036109924, "eval_wikibio_emb_cos_sim_sem": 0.008253369731153376, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6139187812805176, "eval_wikibio_n_ngrams_match_1": 10.244, "eval_wikibio_n_ngrams_match_2": 3.55, "eval_wikibio_n_ngrams_match_3": 1.356, "eval_wikibio_num_pred_words": 35.666, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.1111989027452, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3695829595970824, "eval_wikibio_runtime": 13.9807, "eval_wikibio_samples_per_second": 35.764, "eval_wikibio_steps_per_second": 0.072, "eval_wikibio_token_set_f1": 0.32897327881917576, "eval_wikibio_token_set_f1_sem": 0.005215906713604864, "eval_wikibio_token_set_precision": 0.33438402705040965, "eval_wikibio_token_set_recall": 0.3402201625076424, "eval_wikibio_true_num_tokens": 61.1328125, "step": 230000 }, { "epoch": 44.16, "eval_nq_accuracy": 0.53596875, "eval_nq_bleu_score": 12.269807454845486, "eval_nq_bleu_score_sem": 0.5018900023960086, "eval_nq_emb_cos_sim": 0.8384106755256653, "eval_nq_emb_cos_sim_sem": 0.006962078537743653, "eval_nq_emb_top1_equal": 0.265625, "eval_nq_emb_top1_equal_sem": 0.03919146934646163, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1142802238464355, "eval_nq_n_ngrams_match_1": 23.356, "eval_nq_n_ngrams_match_2": 8.762, "eval_nq_n_ngrams_match_3": 4.11, "eval_nq_num_pred_words": 48.806, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.28362126677155, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4551664252869843, "eval_nq_runtime": 14.513, "eval_nq_samples_per_second": 34.452, "eval_nq_steps_per_second": 0.069, "eval_nq_token_set_f1": 0.4669954403632132, "eval_nq_token_set_f1_sem": 0.004963082345214547, "eval_nq_token_set_precision": 0.424774802542662, "eval_nq_token_set_recall": 0.52720961576976, "eval_nq_true_num_tokens": 64.0, "step": 230000 }, { "epoch": 44.16, "learning_rate": 0.001, "loss": 2.4836, "step": 230004 }, { "epoch": 44.17, "learning_rate": 0.001, "loss": 2.4943, "step": 230016 }, { "epoch": 44.17, "learning_rate": 0.001, "loss": 2.4861, "step": 230028 }, { "epoch": 44.17, "learning_rate": 0.001, "loss": 2.4832, "step": 230040 }, { "epoch": 44.17, "learning_rate": 0.001, "loss": 2.4942, "step": 230052 }, { "epoch": 44.18, "learning_rate": 0.001, "loss": 2.495, "step": 230064 }, { "epoch": 44.18, "learning_rate": 0.001, "loss": 2.4878, "step": 230076 }, { "epoch": 44.18, "learning_rate": 0.001, "loss": 2.488, "step": 230088 }, { "epoch": 44.18, "learning_rate": 0.001, "loss": 2.4965, "step": 230100 }, { "epoch": 44.18, "learning_rate": 0.001, "loss": 2.4863, "step": 230112 }, { "epoch": 44.19, "learning_rate": 0.001, "loss": 2.4848, "step": 230124 }, { "epoch": 44.19, "learning_rate": 0.001, "loss": 2.4865, "step": 230136 }, { "epoch": 44.19, "learning_rate": 0.001, "loss": 2.4772, "step": 230148 }, { "epoch": 44.19, "learning_rate": 0.001, "loss": 2.4883, "step": 230160 }, { "epoch": 44.2, "learning_rate": 0.001, "loss": 2.4858, "step": 230172 }, { "epoch": 44.2, "learning_rate": 0.001, "loss": 2.4845, "step": 230184 }, { "epoch": 44.2, "learning_rate": 0.001, "loss": 2.494, "step": 230196 }, { "epoch": 44.2, "learning_rate": 0.001, "loss": 2.4845, "step": 230208 }, { "epoch": 44.21, "learning_rate": 0.001, "loss": 2.4921, "step": 230220 }, { "epoch": 44.21, "learning_rate": 0.001, "loss": 2.4992, "step": 230232 }, { "epoch": 44.21, "learning_rate": 0.001, "loss": 2.489, "step": 230244 }, { "epoch": 44.21, "learning_rate": 0.001, "loss": 2.4854, "step": 230256 }, { "epoch": 44.21, "learning_rate": 0.001, "loss": 2.4881, "step": 230268 }, { "epoch": 44.22, "learning_rate": 0.001, "loss": 2.4949, "step": 230280 }, { "epoch": 44.22, "learning_rate": 0.001, "loss": 2.4929, "step": 230292 }, { "epoch": 44.22, "learning_rate": 0.001, "loss": 2.4845, "step": 230304 }, { "epoch": 44.22, "learning_rate": 0.001, "loss": 2.4878, "step": 230316 }, { "epoch": 44.23, "learning_rate": 0.001, "loss": 2.489, "step": 230328 }, { "epoch": 44.23, "learning_rate": 0.001, "loss": 2.4785, "step": 230340 }, { "epoch": 44.23, "learning_rate": 0.001, "loss": 2.4805, "step": 230352 }, { "epoch": 44.23, "learning_rate": 0.001, "loss": 2.4917, "step": 230364 }, { "epoch": 44.24, "learning_rate": 0.001, "loss": 2.4909, "step": 230376 }, { "epoch": 44.24, "learning_rate": 0.001, "loss": 2.4934, "step": 230388 }, { "epoch": 44.24, "learning_rate": 0.001, "loss": 2.4958, "step": 230400 }, { "epoch": 44.24, "learning_rate": 0.001, "loss": 2.4902, "step": 230412 }, { "epoch": 44.24, "learning_rate": 0.001, "loss": 2.4888, "step": 230424 }, { "epoch": 44.25, "learning_rate": 0.001, "loss": 2.4843, "step": 230436 }, { "epoch": 44.25, "learning_rate": 0.001, "loss": 2.4913, "step": 230448 }, { "epoch": 44.25, "learning_rate": 0.001, "loss": 2.4842, "step": 230460 }, { "epoch": 44.25, "learning_rate": 0.001, "loss": 2.4829, "step": 230472 }, { "epoch": 44.26, "learning_rate": 0.001, "loss": 2.4792, "step": 230484 }, { "epoch": 44.26, "learning_rate": 0.001, "loss": 2.4895, "step": 230496 }, { "epoch": 44.26, "learning_rate": 0.001, "loss": 2.4934, "step": 230508 }, { "epoch": 44.26, "learning_rate": 0.001, "loss": 2.4979, "step": 230520 }, { "epoch": 44.26, "learning_rate": 0.001, "loss": 2.4842, "step": 230532 }, { "epoch": 44.27, "learning_rate": 0.001, "loss": 2.4885, "step": 230544 }, { "epoch": 44.27, "learning_rate": 0.001, "loss": 2.4814, "step": 230556 }, { "epoch": 44.27, "learning_rate": 0.001, "loss": 2.4938, "step": 230568 }, { "epoch": 44.27, "learning_rate": 0.001, "loss": 2.4975, "step": 230580 }, { "epoch": 44.28, "learning_rate": 0.001, "loss": 2.4834, "step": 230592 }, { "epoch": 44.28, "learning_rate": 0.001, "loss": 2.4883, "step": 230604 }, { "epoch": 44.28, "learning_rate": 0.001, "loss": 2.4846, "step": 230616 }, { "epoch": 44.28, "eval_ag_news_accuracy": 0.32865625, "eval_ag_news_bleu_score": 5.151042038723306, "eval_ag_news_bleu_score_sem": 0.1679491783122144, "eval_ag_news_emb_cos_sim": 0.8254836797714233, "eval_ag_news_emb_cos_sim_sem": 0.007335676764022821, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4625508785247803, "eval_ag_news_n_ngrams_match_1": 14.63, "eval_ag_news_n_ngrams_match_2": 3.39, "eval_ag_news_n_ngrams_match_3": 0.978, "eval_ag_news_num_pred_words": 46.71, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.898241361092445, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36539840279800617, "eval_ag_news_runtime": 14.1975, "eval_ag_news_samples_per_second": 35.218, "eval_ag_news_steps_per_second": 0.07, "eval_ag_news_token_set_f1": 0.3653697064151061, "eval_ag_news_token_set_f1_sem": 0.0045366294122285485, "eval_ag_news_token_set_precision": 0.3517690523920131, "eval_ag_news_token_set_recall": 0.3963370629402755, "eval_ag_news_true_num_tokens": 56.09375, "step": 230625 }, { "epoch": 44.28, "eval_anthropic_toxic_prompts_accuracy": 0.11646875, "eval_anthropic_toxic_prompts_bleu_score": 3.2309547844401014, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11684793739141351, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6881632804870605, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009371160129945464, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.176680088043213, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.472, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.036, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.78, "eval_anthropic_toxic_prompts_num_pred_words": 47.614, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.967052820335084, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21984921097908108, "eval_anthropic_toxic_prompts_runtime": 13.6023, "eval_anthropic_toxic_prompts_samples_per_second": 36.758, "eval_anthropic_toxic_prompts_steps_per_second": 0.074, "eval_anthropic_toxic_prompts_token_set_f1": 0.3666556033599894, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006796563987067125, "eval_anthropic_toxic_prompts_token_set_precision": 0.45299614163070895, "eval_anthropic_toxic_prompts_token_set_recall": 0.3333300874950141, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 230625 }, { "epoch": 44.28, "eval_arxiv_accuracy": 0.35125, "eval_arxiv_bleu_score": 4.502675640221707, "eval_arxiv_bleu_score_sem": 0.1318155761835477, "eval_arxiv_emb_cos_sim": 0.7878255844116211, "eval_arxiv_emb_cos_sim_sem": 0.006188094470234103, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.337618589401245, "eval_arxiv_n_ngrams_match_1": 15.674, "eval_arxiv_n_ngrams_match_2": 3.144, "eval_arxiv_n_ngrams_match_3": 0.684, "eval_arxiv_num_pred_words": 40.596, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.152005331481472, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3734648833536512, "eval_arxiv_runtime": 15.0698, "eval_arxiv_samples_per_second": 33.179, "eval_arxiv_steps_per_second": 0.066, "eval_arxiv_token_set_f1": 0.36629161983045455, "eval_arxiv_token_set_f1_sem": 0.0041715945315500625, "eval_arxiv_token_set_precision": 0.3203715424232131, "eval_arxiv_token_set_recall": 0.4439270992429467, "eval_arxiv_true_num_tokens": 64.0, "step": 230625 }, { "epoch": 44.28, "eval_python_code_alpaca_accuracy": 0.16471875, "eval_python_code_alpaca_bleu_score": 4.987518028366514, "eval_python_code_alpaca_bleu_score_sem": 0.15173951075981082, "eval_python_code_alpaca_emb_cos_sim": 0.776828408241272, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007575471258625553, "eval_python_code_alpaca_emb_top1_equal": 0.1953125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8173060417175293, "eval_python_code_alpaca_n_ngrams_match_1": 10.264, "eval_python_code_alpaca_n_ngrams_match_2": 3.202, "eval_python_code_alpaca_n_ngrams_match_3": 1.096, "eval_python_code_alpaca_num_pred_words": 43.38, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.73171535993908, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.35011357808652255, "eval_python_code_alpaca_runtime": 13.252, "eval_python_code_alpaca_samples_per_second": 37.73, "eval_python_code_alpaca_steps_per_second": 0.075, "eval_python_code_alpaca_token_set_f1": 0.4869528704480582, "eval_python_code_alpaca_token_set_f1_sem": 0.0053927974542409265, "eval_python_code_alpaca_token_set_precision": 0.5611929984632298, "eval_python_code_alpaca_token_set_recall": 0.448868216022795, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 230625 }, { "epoch": 44.28, "eval_wikibio_accuracy": 0.3320625, "eval_wikibio_bleu_score": 6.361335782866247, "eval_wikibio_bleu_score_sem": 0.22093950249641842, "eval_wikibio_emb_cos_sim": 0.7376881241798401, "eval_wikibio_emb_cos_sim_sem": 0.011141980385010424, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6032822132110596, "eval_wikibio_n_ngrams_match_1": 10.324, "eval_wikibio_n_ngrams_match_2": 3.592, "eval_wikibio_n_ngrams_match_3": 1.374, "eval_wikibio_num_pred_words": 36.288, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.71855500311837, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3634289849159927, "eval_wikibio_runtime": 13.4004, "eval_wikibio_samples_per_second": 37.312, "eval_wikibio_steps_per_second": 0.075, "eval_wikibio_token_set_f1": 0.3273452590282704, "eval_wikibio_token_set_f1_sem": 0.0054884067259554185, "eval_wikibio_token_set_precision": 0.3363029694057662, "eval_wikibio_token_set_recall": 0.3353293070713061, "eval_wikibio_true_num_tokens": 61.1328125, "step": 230625 }, { "epoch": 44.28, "eval_nq_accuracy": 0.53790625, "eval_nq_bleu_score": 12.297011020912475, "eval_nq_bleu_score_sem": 0.4863772620800528, "eval_nq_emb_cos_sim": 0.842413067817688, "eval_nq_emb_cos_sim_sem": 0.007522692209033278, "eval_nq_emb_top1_equal": 0.34375, "eval_nq_emb_top1_equal_sem": 0.04214578430296913, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1123740673065186, "eval_nq_n_ngrams_match_1": 23.608, "eval_nq_n_ngrams_match_2": 8.794, "eval_nq_n_ngrams_match_3": 4.094, "eval_nq_num_pred_words": 48.95, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.267846427352762, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4590508098826642, "eval_nq_runtime": 14.9576, "eval_nq_samples_per_second": 33.428, "eval_nq_steps_per_second": 0.067, "eval_nq_token_set_f1": 0.4735377685738816, "eval_nq_token_set_f1_sem": 0.004958284323717352, "eval_nq_token_set_precision": 0.4317609500410873, "eval_nq_token_set_recall": 0.5332976598623803, "eval_nq_true_num_tokens": 64.0, "step": 230625 }, { "epoch": 44.28, "learning_rate": 0.001, "loss": 2.4855, "step": 230628 }, { "epoch": 44.29, "learning_rate": 0.001, "loss": 2.4918, "step": 230640 }, { "epoch": 44.29, "learning_rate": 0.001, "loss": 2.485, "step": 230652 }, { "epoch": 44.29, "learning_rate": 0.001, "loss": 2.4851, "step": 230664 }, { "epoch": 44.29, "learning_rate": 0.001, "loss": 2.484, "step": 230676 }, { "epoch": 44.29, "learning_rate": 0.001, "loss": 2.4806, "step": 230688 }, { "epoch": 44.3, "learning_rate": 0.001, "loss": 2.4916, "step": 230700 }, { "epoch": 44.3, "learning_rate": 0.001, "loss": 2.4789, "step": 230712 }, { "epoch": 44.3, "learning_rate": 0.001, "loss": 2.4875, "step": 230724 }, { "epoch": 44.3, "learning_rate": 0.001, "loss": 2.4885, "step": 230736 }, { "epoch": 44.31, "learning_rate": 0.001, "loss": 2.4871, "step": 230748 }, { "epoch": 44.31, "learning_rate": 0.001, "loss": 2.4852, "step": 230760 }, { "epoch": 44.31, "learning_rate": 0.001, "loss": 2.4845, "step": 230772 }, { "epoch": 44.31, "learning_rate": 0.001, "loss": 2.4886, "step": 230784 }, { "epoch": 44.32, "learning_rate": 0.001, "loss": 2.479, "step": 230796 }, { "epoch": 44.32, "learning_rate": 0.001, "loss": 2.4923, "step": 230808 }, { "epoch": 44.32, "learning_rate": 0.001, "loss": 2.4863, "step": 230820 }, { "epoch": 44.32, "learning_rate": 0.001, "loss": 2.4971, "step": 230832 }, { "epoch": 44.32, "learning_rate": 0.001, "loss": 2.4983, "step": 230844 }, { "epoch": 44.33, "learning_rate": 0.001, "loss": 2.4914, "step": 230856 }, { "epoch": 44.33, "learning_rate": 0.001, "loss": 2.4882, "step": 230868 }, { "epoch": 44.33, "learning_rate": 0.001, "loss": 2.4903, "step": 230880 }, { "epoch": 44.33, "learning_rate": 0.001, "loss": 2.4846, "step": 230892 }, { "epoch": 44.34, "learning_rate": 0.001, "loss": 2.4811, "step": 230904 }, { "epoch": 44.34, "learning_rate": 0.001, "loss": 2.4955, "step": 230916 }, { "epoch": 44.34, "learning_rate": 0.001, "loss": 2.4858, "step": 230928 }, { "epoch": 44.34, "learning_rate": 0.001, "loss": 2.49, "step": 230940 }, { "epoch": 44.35, "learning_rate": 0.001, "loss": 2.4833, "step": 230952 }, { "epoch": 44.35, "learning_rate": 0.001, "loss": 2.4818, "step": 230964 }, { "epoch": 44.35, "learning_rate": 0.001, "loss": 2.4866, "step": 230976 }, { "epoch": 44.35, "learning_rate": 0.001, "loss": 2.492, "step": 230988 }, { "epoch": 44.35, "learning_rate": 0.001, "loss": 2.4892, "step": 231000 }, { "epoch": 44.36, "learning_rate": 0.001, "loss": 2.4872, "step": 231012 }, { "epoch": 44.36, "learning_rate": 0.001, "loss": 2.4952, "step": 231024 }, { "epoch": 44.36, "learning_rate": 0.001, "loss": 2.4915, "step": 231036 }, { "epoch": 44.36, "learning_rate": 0.001, "loss": 2.4882, "step": 231048 }, { "epoch": 44.37, "learning_rate": 0.001, "loss": 2.4878, "step": 231060 }, { "epoch": 44.37, "learning_rate": 0.001, "loss": 2.4882, "step": 231072 }, { "epoch": 44.37, "learning_rate": 0.001, "loss": 2.477, "step": 231084 }, { "epoch": 44.37, "learning_rate": 0.001, "loss": 2.4904, "step": 231096 }, { "epoch": 44.38, "learning_rate": 0.001, "loss": 2.4841, "step": 231108 }, { "epoch": 44.38, "learning_rate": 0.001, "loss": 2.4888, "step": 231120 }, { "epoch": 44.38, "learning_rate": 0.001, "loss": 2.4949, "step": 231132 }, { "epoch": 44.38, "learning_rate": 0.001, "loss": 2.489, "step": 231144 }, { "epoch": 44.38, "learning_rate": 0.001, "loss": 2.5006, "step": 231156 }, { "epoch": 44.39, "learning_rate": 0.001, "loss": 2.5023, "step": 231168 }, { "epoch": 44.39, "learning_rate": 0.001, "loss": 2.4854, "step": 231180 }, { "epoch": 44.39, "learning_rate": 0.001, "loss": 2.4963, "step": 231192 }, { "epoch": 44.39, "learning_rate": 0.001, "loss": 2.492, "step": 231204 }, { "epoch": 44.4, "learning_rate": 0.001, "loss": 2.497, "step": 231216 }, { "epoch": 44.4, "learning_rate": 0.001, "loss": 2.4861, "step": 231228 }, { "epoch": 44.4, "learning_rate": 0.001, "loss": 2.4956, "step": 231240 }, { "epoch": 44.4, "eval_ag_news_accuracy": 0.329125, "eval_ag_news_bleu_score": 4.951772545080421, "eval_ag_news_bleu_score_sem": 0.15895500546693048, "eval_ag_news_emb_cos_sim": 0.8184164762496948, "eval_ag_news_emb_cos_sim_sem": 0.007088071931555415, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4644315242767334, "eval_ag_news_n_ngrams_match_1": 14.28, "eval_ag_news_n_ngrams_match_2": 3.292, "eval_ag_news_n_ngrams_match_3": 0.912, "eval_ag_news_num_pred_words": 46.56, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.95828709788525, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3584141121283838, "eval_ag_news_runtime": 14.7439, "eval_ag_news_samples_per_second": 33.912, "eval_ag_news_steps_per_second": 0.068, "eval_ag_news_token_set_f1": 0.3572673046241237, "eval_ag_news_token_set_f1_sem": 0.004575753007493886, "eval_ag_news_token_set_precision": 0.34227980742384423, "eval_ag_news_token_set_recall": 0.3887265408129691, "eval_ag_news_true_num_tokens": 56.09375, "step": 231250 }, { "epoch": 44.4, "eval_anthropic_toxic_prompts_accuracy": 0.1178125, "eval_anthropic_toxic_prompts_bleu_score": 3.257911478221794, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12478898503307423, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6780132055282593, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008386806653538867, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.196791410446167, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.318, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.998, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.766, "eval_anthropic_toxic_prompts_num_pred_words": 46.754, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.453941523810737, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21806154708980463, "eval_anthropic_toxic_prompts_runtime": 13.3856, "eval_anthropic_toxic_prompts_samples_per_second": 37.354, "eval_anthropic_toxic_prompts_steps_per_second": 0.075, "eval_anthropic_toxic_prompts_token_set_f1": 0.36287035858831085, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006630035259195912, "eval_anthropic_toxic_prompts_token_set_precision": 0.4432813115155612, "eval_anthropic_toxic_prompts_token_set_recall": 0.3343028740591016, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 231250 }, { "epoch": 44.4, "eval_arxiv_accuracy": 0.35334375, "eval_arxiv_bleu_score": 4.617478853905073, "eval_arxiv_bleu_score_sem": 0.1362489432586617, "eval_arxiv_emb_cos_sim": 0.7854287028312683, "eval_arxiv_emb_cos_sim_sem": 0.0062751474687443385, "eval_arxiv_emb_top1_equal": 0.234375, "eval_arxiv_emb_top1_equal_sem": 0.03758909358128201, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.327728509902954, "eval_arxiv_n_ngrams_match_1": 15.55, "eval_arxiv_n_ngrams_match_2": 3.128, "eval_arxiv_n_ngrams_match_3": 0.754, "eval_arxiv_num_pred_words": 40.706, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.87495205846069, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3750254383457203, "eval_arxiv_runtime": 14.8707, "eval_arxiv_samples_per_second": 33.623, "eval_arxiv_steps_per_second": 0.067, "eval_arxiv_token_set_f1": 0.3648252787311752, "eval_arxiv_token_set_f1_sem": 0.00403940420609417, "eval_arxiv_token_set_precision": 0.3173998898279539, "eval_arxiv_token_set_recall": 0.4444589702833801, "eval_arxiv_true_num_tokens": 64.0, "step": 231250 }, { "epoch": 44.4, "eval_python_code_alpaca_accuracy": 0.16209375, "eval_python_code_alpaca_bleu_score": 4.6041812163336795, "eval_python_code_alpaca_bleu_score_sem": 0.1483186692461661, "eval_python_code_alpaca_emb_cos_sim": 0.7744543552398682, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007144725198488152, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8420767784118652, "eval_python_code_alpaca_n_ngrams_match_1": 10.006, "eval_python_code_alpaca_n_ngrams_match_2": 2.946, "eval_python_code_alpaca_n_ngrams_match_3": 0.98, "eval_python_code_alpaca_num_pred_words": 44.422, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.15134812530243, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3393222654256284, "eval_python_code_alpaca_runtime": 13.7062, "eval_python_code_alpaca_samples_per_second": 36.48, "eval_python_code_alpaca_steps_per_second": 0.073, "eval_python_code_alpaca_token_set_f1": 0.4868389731976632, "eval_python_code_alpaca_token_set_f1_sem": 0.005375971006064705, "eval_python_code_alpaca_token_set_precision": 0.5486301922005524, "eval_python_code_alpaca_token_set_recall": 0.45746959759716543, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 231250 }, { "epoch": 44.4, "eval_wikibio_accuracy": 0.334125, "eval_wikibio_bleu_score": 6.014058014255632, "eval_wikibio_bleu_score_sem": 0.21447243333362348, "eval_wikibio_emb_cos_sim": 0.7384083271026611, "eval_wikibio_emb_cos_sim_sem": 0.010786422326982934, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.579582452774048, "eval_wikibio_n_ngrams_match_1": 9.94, "eval_wikibio_n_ngrams_match_2": 3.372, "eval_wikibio_n_ngrams_match_3": 1.26, "eval_wikibio_num_pred_words": 35.614, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 35.85856507636032, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.353314695923721, "eval_wikibio_runtime": 13.7511, "eval_wikibio_samples_per_second": 36.361, "eval_wikibio_steps_per_second": 0.073, "eval_wikibio_token_set_f1": 0.317845479825668, "eval_wikibio_token_set_f1_sem": 0.005635497103008044, "eval_wikibio_token_set_precision": 0.3245144502863291, "eval_wikibio_token_set_recall": 0.3308307688927712, "eval_wikibio_true_num_tokens": 61.1328125, "step": 231250 }, { "epoch": 44.4, "eval_nq_accuracy": 0.5359375, "eval_nq_bleu_score": 12.087189298239092, "eval_nq_bleu_score_sem": 0.4775757830859531, "eval_nq_emb_cos_sim": 0.8411356210708618, "eval_nq_emb_cos_sim_sem": 0.00651645990843797, "eval_nq_emb_top1_equal": 0.2578125, "eval_nq_emb_top1_equal_sem": 0.038815656435002115, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1131131649017334, "eval_nq_n_ngrams_match_1": 23.498, "eval_nq_n_ngrams_match_2": 8.732, "eval_nq_n_ngrams_match_3": 4.024, "eval_nq_num_pred_words": 48.72, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.273959431539895, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45961685494641735, "eval_nq_runtime": 14.3703, "eval_nq_samples_per_second": 34.794, "eval_nq_steps_per_second": 0.07, "eval_nq_token_set_f1": 0.4710749317682711, "eval_nq_token_set_f1_sem": 0.0047465441645887325, "eval_nq_token_set_precision": 0.42948145801591264, "eval_nq_token_set_recall": 0.5303811677895016, "eval_nq_true_num_tokens": 64.0, "step": 231250 }, { "epoch": 44.4, "learning_rate": 0.001, "loss": 2.5001, "step": 231252 }, { "epoch": 44.41, "learning_rate": 0.001, "loss": 2.4908, "step": 231264 }, { "epoch": 44.41, "learning_rate": 0.001, "loss": 2.4957, "step": 231276 }, { "epoch": 44.41, "learning_rate": 0.001, "loss": 2.4911, "step": 231288 }, { "epoch": 44.41, "learning_rate": 0.001, "loss": 2.4958, "step": 231300 }, { "epoch": 44.41, "learning_rate": 0.001, "loss": 2.4905, "step": 231312 }, { "epoch": 44.42, "learning_rate": 0.001, "loss": 2.4916, "step": 231324 }, { "epoch": 44.42, "learning_rate": 0.001, "loss": 2.4964, "step": 231336 }, { "epoch": 44.42, "learning_rate": 0.001, "loss": 2.4841, "step": 231348 }, { "epoch": 44.42, "learning_rate": 0.001, "loss": 2.4938, "step": 231360 }, { "epoch": 44.43, "learning_rate": 0.001, "loss": 2.4989, "step": 231372 }, { "epoch": 44.43, "learning_rate": 0.001, "loss": 2.4926, "step": 231384 }, { "epoch": 44.43, "learning_rate": 0.001, "loss": 2.4973, "step": 231396 }, { "epoch": 44.43, "learning_rate": 0.001, "loss": 2.4891, "step": 231408 }, { "epoch": 44.44, "learning_rate": 0.001, "loss": 2.4914, "step": 231420 }, { "epoch": 44.44, "learning_rate": 0.001, "loss": 2.492, "step": 231432 }, { "epoch": 44.44, "learning_rate": 0.001, "loss": 2.4894, "step": 231444 }, { "epoch": 44.44, "learning_rate": 0.001, "loss": 2.4914, "step": 231456 }, { "epoch": 44.44, "learning_rate": 0.001, "loss": 2.4849, "step": 231468 }, { "epoch": 44.45, "learning_rate": 0.001, "loss": 2.4924, "step": 231480 }, { "epoch": 44.45, "learning_rate": 0.001, "loss": 2.4928, "step": 231492 }, { "epoch": 44.45, "learning_rate": 0.001, "loss": 2.4882, "step": 231504 }, { "epoch": 44.45, "learning_rate": 0.001, "loss": 2.4934, "step": 231516 }, { "epoch": 44.46, "learning_rate": 0.001, "loss": 2.4847, "step": 231528 }, { "epoch": 44.46, "learning_rate": 0.001, "loss": 2.4904, "step": 231540 }, { "epoch": 44.46, "learning_rate": 0.001, "loss": 2.4957, "step": 231552 }, { "epoch": 44.46, "learning_rate": 0.001, "loss": 2.4823, "step": 231564 }, { "epoch": 44.47, "learning_rate": 0.001, "loss": 2.4848, "step": 231576 }, { "epoch": 44.47, "learning_rate": 0.001, "loss": 2.4855, "step": 231588 }, { "epoch": 44.47, "learning_rate": 0.001, "loss": 2.4924, "step": 231600 }, { "epoch": 44.47, "learning_rate": 0.001, "loss": 2.4915, "step": 231612 }, { "epoch": 44.47, "learning_rate": 0.001, "loss": 2.4781, "step": 231624 }, { "epoch": 44.48, "learning_rate": 0.001, "loss": 2.4931, "step": 231636 }, { "epoch": 44.48, "learning_rate": 0.001, "loss": 2.5002, "step": 231648 }, { "epoch": 44.48, "learning_rate": 0.001, "loss": 2.4839, "step": 231660 }, { "epoch": 44.48, "learning_rate": 0.001, "loss": 2.4857, "step": 231672 }, { "epoch": 44.49, "learning_rate": 0.001, "loss": 2.501, "step": 231684 }, { "epoch": 44.49, "learning_rate": 0.001, "loss": 2.4912, "step": 231696 }, { "epoch": 44.49, "learning_rate": 0.001, "loss": 2.4922, "step": 231708 }, { "epoch": 44.49, "learning_rate": 0.001, "loss": 2.4849, "step": 231720 }, { "epoch": 44.5, "learning_rate": 0.001, "loss": 2.4878, "step": 231732 }, { "epoch": 44.5, "learning_rate": 0.001, "loss": 2.481, "step": 231744 }, { "epoch": 44.5, "learning_rate": 0.001, "loss": 2.494, "step": 231756 }, { "epoch": 44.5, "learning_rate": 0.001, "loss": 2.4853, "step": 231768 }, { "epoch": 44.5, "learning_rate": 0.001, "loss": 2.492, "step": 231780 }, { "epoch": 44.51, "learning_rate": 0.001, "loss": 2.489, "step": 231792 }, { "epoch": 44.51, "learning_rate": 0.001, "loss": 2.4821, "step": 231804 }, { "epoch": 44.51, "learning_rate": 0.001, "loss": 2.4872, "step": 231816 }, { "epoch": 44.51, "learning_rate": 0.001, "loss": 2.4877, "step": 231828 }, { "epoch": 44.52, "learning_rate": 0.001, "loss": 2.4904, "step": 231840 }, { "epoch": 44.52, "learning_rate": 0.001, "loss": 2.4939, "step": 231852 }, { "epoch": 44.52, "learning_rate": 0.001, "loss": 2.488, "step": 231864 }, { "epoch": 44.52, "eval_ag_news_accuracy": 0.32746875, "eval_ag_news_bleu_score": 5.032148010511632, "eval_ag_news_bleu_score_sem": 0.15772902792673343, "eval_ag_news_emb_cos_sim": 0.8267014026641846, "eval_ag_news_emb_cos_sim_sem": 0.006599245538536478, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4720215797424316, "eval_ag_news_n_ngrams_match_1": 14.466, "eval_ag_news_n_ngrams_match_2": 3.332, "eval_ag_news_n_ngrams_match_3": 0.968, "eval_ag_news_num_pred_words": 46.846, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.20177514451354, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3605934341228475, "eval_ag_news_runtime": 12.4031, "eval_ag_news_samples_per_second": 40.313, "eval_ag_news_steps_per_second": 0.081, "eval_ag_news_token_set_f1": 0.3593330210601828, "eval_ag_news_token_set_f1_sem": 0.004519197976533824, "eval_ag_news_token_set_precision": 0.3459731448610272, "eval_ag_news_token_set_recall": 0.387712768205289, "eval_ag_news_true_num_tokens": 56.09375, "step": 231875 }, { "epoch": 44.52, "eval_anthropic_toxic_prompts_accuracy": 0.116625, "eval_anthropic_toxic_prompts_bleu_score": 3.3787512868093175, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.14053752954488988, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6918679475784302, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008374250843949669, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2191762924194336, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.416, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.07, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792, "eval_anthropic_toxic_prompts_num_pred_words": 46.642, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.007512817403224, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2248723645176024, "eval_anthropic_toxic_prompts_runtime": 11.1823, "eval_anthropic_toxic_prompts_samples_per_second": 44.714, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.36663493969040806, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006519405853883953, "eval_anthropic_toxic_prompts_token_set_precision": 0.4525412478375378, "eval_anthropic_toxic_prompts_token_set_recall": 0.3326264750523705, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 231875 }, { "epoch": 44.52, "eval_arxiv_accuracy": 0.3543125, "eval_arxiv_bleu_score": 4.517749544071756, "eval_arxiv_bleu_score_sem": 0.13442668578636985, "eval_arxiv_emb_cos_sim": 0.7815765738487244, "eval_arxiv_emb_cos_sim_sem": 0.007420977375597315, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3189609050750732, "eval_arxiv_n_ngrams_match_1": 15.42, "eval_arxiv_n_ngrams_match_2": 3.106, "eval_arxiv_n_ngrams_match_3": 0.712, "eval_arxiv_num_pred_words": 39.85, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.63162375615171, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37318881767804524, "eval_arxiv_runtime": 11.4412, "eval_arxiv_samples_per_second": 43.702, "eval_arxiv_steps_per_second": 0.087, "eval_arxiv_token_set_f1": 0.36440001264371186, "eval_arxiv_token_set_f1_sem": 0.004088611910439568, "eval_arxiv_token_set_precision": 0.3144086455781148, "eval_arxiv_token_set_recall": 0.45718424629530285, "eval_arxiv_true_num_tokens": 64.0, "step": 231875 }, { "epoch": 44.52, "eval_python_code_alpaca_accuracy": 0.161125, "eval_python_code_alpaca_bleu_score": 4.654814475943952, "eval_python_code_alpaca_bleu_score_sem": 0.1477308756024803, "eval_python_code_alpaca_emb_cos_sim": 0.7707570791244507, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0081884800480593, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8380470275878906, "eval_python_code_alpaca_n_ngrams_match_1": 9.91, "eval_python_code_alpaca_n_ngrams_match_2": 2.934, "eval_python_code_alpaca_n_ngrams_match_3": 0.944, "eval_python_code_alpaca_num_pred_words": 43.174, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.082371538632618, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34218812762441414, "eval_python_code_alpaca_runtime": 11.3825, "eval_python_code_alpaca_samples_per_second": 43.927, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.49155034642244394, "eval_python_code_alpaca_token_set_f1_sem": 0.005474163445380063, "eval_python_code_alpaca_token_set_precision": 0.5414658855911652, "eval_python_code_alpaca_token_set_recall": 0.47145215086983455, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 231875 }, { "epoch": 44.52, "eval_wikibio_accuracy": 0.33109375, "eval_wikibio_bleu_score": 6.068280358273803, "eval_wikibio_bleu_score_sem": 0.21390801141292687, "eval_wikibio_emb_cos_sim": 0.7426877021789551, "eval_wikibio_emb_cos_sim_sem": 0.00934576401953962, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6402812004089355, "eval_wikibio_n_ngrams_match_1": 10.294, "eval_wikibio_n_ngrams_match_2": 3.416, "eval_wikibio_n_ngrams_match_3": 1.23, "eval_wikibio_num_pred_words": 36.14, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.102549671635146, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36616732745850755, "eval_wikibio_runtime": 11.4382, "eval_wikibio_samples_per_second": 43.713, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.3231692667263347, "eval_wikibio_token_set_f1_sem": 0.0053967739013447685, "eval_wikibio_token_set_precision": 0.3315140182844297, "eval_wikibio_token_set_recall": 0.3296530079213008, "eval_wikibio_true_num_tokens": 61.1328125, "step": 231875 }, { "epoch": 44.52, "eval_nq_accuracy": 0.5378125, "eval_nq_bleu_score": 12.337385287495966, "eval_nq_bleu_score_sem": 0.49999971679454874, "eval_nq_emb_cos_sim": 0.8437784910202026, "eval_nq_emb_cos_sim_sem": 0.00671438085050062, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1097679138183594, "eval_nq_n_ngrams_match_1": 23.646, "eval_nq_n_ngrams_match_2": 8.904, "eval_nq_n_ngrams_match_3": 4.178, "eval_nq_num_pred_words": 49.086, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.246327203926093, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4599864372969777, "eval_nq_runtime": 11.4574, "eval_nq_samples_per_second": 43.64, "eval_nq_steps_per_second": 0.087, "eval_nq_token_set_f1": 0.4727454867206314, "eval_nq_token_set_f1_sem": 0.004996086940878568, "eval_nq_token_set_precision": 0.43139192327672027, "eval_nq_token_set_recall": 0.5297096802434696, "eval_nq_true_num_tokens": 64.0, "step": 231875 }, { "epoch": 44.52, "learning_rate": 0.001, "loss": 2.4922, "step": 231876 }, { "epoch": 44.53, "learning_rate": 0.001, "loss": 2.4821, "step": 231888 }, { "epoch": 44.53, "learning_rate": 0.001, "loss": 2.4949, "step": 231900 }, { "epoch": 44.53, "learning_rate": 0.001, "loss": 2.489, "step": 231912 }, { "epoch": 44.53, "learning_rate": 0.001, "loss": 2.4918, "step": 231924 }, { "epoch": 44.53, "learning_rate": 0.001, "loss": 2.4991, "step": 231936 }, { "epoch": 44.54, "learning_rate": 0.001, "loss": 2.4894, "step": 231948 }, { "epoch": 44.54, "learning_rate": 0.001, "loss": 2.4919, "step": 231960 }, { "epoch": 44.54, "learning_rate": 0.001, "loss": 2.4897, "step": 231972 }, { "epoch": 44.54, "learning_rate": 0.001, "loss": 2.4943, "step": 231984 }, { "epoch": 44.55, "learning_rate": 0.001, "loss": 2.4947, "step": 231996 }, { "epoch": 44.55, "learning_rate": 0.001, "loss": 2.4868, "step": 232008 }, { "epoch": 44.55, "learning_rate": 0.001, "loss": 2.4854, "step": 232020 }, { "epoch": 44.55, "learning_rate": 0.001, "loss": 2.4944, "step": 232032 }, { "epoch": 44.56, "learning_rate": 0.001, "loss": 2.4931, "step": 232044 }, { "epoch": 44.56, "learning_rate": 0.001, "loss": 2.5003, "step": 232056 }, { "epoch": 44.56, "learning_rate": 0.001, "loss": 2.4947, "step": 232068 }, { "epoch": 44.56, "learning_rate": 0.001, "loss": 2.4864, "step": 232080 }, { "epoch": 44.56, "learning_rate": 0.001, "loss": 2.4951, "step": 232092 }, { "epoch": 44.57, "learning_rate": 0.001, "loss": 2.4849, "step": 232104 }, { "epoch": 44.57, "learning_rate": 0.001, "loss": 2.4905, "step": 232116 }, { "epoch": 44.57, "learning_rate": 0.001, "loss": 2.4921, "step": 232128 }, { "epoch": 44.57, "learning_rate": 0.001, "loss": 2.4855, "step": 232140 }, { "epoch": 44.58, "learning_rate": 0.001, "loss": 2.4927, "step": 232152 }, { "epoch": 44.58, "learning_rate": 0.001, "loss": 2.492, "step": 232164 }, { "epoch": 44.58, "learning_rate": 0.001, "loss": 2.483, "step": 232176 }, { "epoch": 44.58, "learning_rate": 0.001, "loss": 2.49, "step": 232188 }, { "epoch": 44.59, "learning_rate": 0.001, "loss": 2.4878, "step": 232200 }, { "epoch": 44.59, "learning_rate": 0.001, "loss": 2.49, "step": 232212 }, { "epoch": 44.59, "learning_rate": 0.001, "loss": 2.4951, "step": 232224 }, { "epoch": 44.59, "learning_rate": 0.001, "loss": 2.4924, "step": 232236 }, { "epoch": 44.59, "learning_rate": 0.001, "loss": 2.4841, "step": 232248 }, { "epoch": 44.6, "learning_rate": 0.001, "loss": 2.4931, "step": 232260 }, { "epoch": 44.6, "learning_rate": 0.001, "loss": 2.4812, "step": 232272 }, { "epoch": 44.6, "learning_rate": 0.001, "loss": 2.5003, "step": 232284 }, { "epoch": 44.6, "learning_rate": 0.001, "loss": 2.4977, "step": 232296 }, { "epoch": 44.61, "learning_rate": 0.001, "loss": 2.4868, "step": 232308 }, { "epoch": 44.61, "learning_rate": 0.001, "loss": 2.4899, "step": 232320 }, { "epoch": 44.61, "learning_rate": 0.001, "loss": 2.4935, "step": 232332 }, { "epoch": 44.61, "learning_rate": 0.001, "loss": 2.496, "step": 232344 }, { "epoch": 44.62, "learning_rate": 0.001, "loss": 2.4982, "step": 232356 }, { "epoch": 44.62, "learning_rate": 0.001, "loss": 2.4922, "step": 232368 }, { "epoch": 44.62, "learning_rate": 0.001, "loss": 2.4838, "step": 232380 }, { "epoch": 44.62, "learning_rate": 0.001, "loss": 2.491, "step": 232392 }, { "epoch": 44.62, "learning_rate": 0.001, "loss": 2.4991, "step": 232404 }, { "epoch": 44.63, "learning_rate": 0.001, "loss": 2.4852, "step": 232416 }, { "epoch": 44.63, "learning_rate": 0.001, "loss": 2.4855, "step": 232428 }, { "epoch": 44.63, "learning_rate": 0.001, "loss": 2.492, "step": 232440 }, { "epoch": 44.63, "learning_rate": 0.001, "loss": 2.4869, "step": 232452 }, { "epoch": 44.64, "learning_rate": 0.001, "loss": 2.4909, "step": 232464 }, { "epoch": 44.64, "learning_rate": 0.001, "loss": 2.4806, "step": 232476 }, { "epoch": 44.64, "learning_rate": 0.001, "loss": 2.4917, "step": 232488 }, { "epoch": 44.64, "learning_rate": 0.001, "loss": 2.4985, "step": 232500 }, { "epoch": 44.64, "eval_ag_news_accuracy": 0.3278125, "eval_ag_news_bleu_score": 4.833847274796979, "eval_ag_news_bleu_score_sem": 0.15604819210561066, "eval_ag_news_emb_cos_sim": 0.8204124569892883, "eval_ag_news_emb_cos_sim_sem": 0.0062024290085393625, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4684019088745117, "eval_ag_news_n_ngrams_match_1": 14.374, "eval_ag_news_n_ngrams_match_2": 3.168, "eval_ag_news_n_ngrams_match_3": 0.88, "eval_ag_news_num_pred_words": 46.616, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.08542601693543, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3577536070579498, "eval_ag_news_runtime": 12.0782, "eval_ag_news_samples_per_second": 41.397, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.35668751572364166, "eval_ag_news_token_set_f1_sem": 0.004468901815282063, "eval_ag_news_token_set_precision": 0.344271097338136, "eval_ag_news_token_set_recall": 0.38491437490998465, "eval_ag_news_true_num_tokens": 56.09375, "step": 232500 }, { "epoch": 44.64, "eval_anthropic_toxic_prompts_accuracy": 0.1176875, "eval_anthropic_toxic_prompts_bleu_score": 3.17252314798791, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11957468490942567, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.681765079498291, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009270485796832114, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1847341060638428, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.33, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.96, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.752, "eval_anthropic_toxic_prompts_num_pred_words": 47.508, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.16086332463754, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21582066676314207, "eval_anthropic_toxic_prompts_runtime": 11.438, "eval_anthropic_toxic_prompts_samples_per_second": 43.714, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.3632119220779932, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006928794261231546, "eval_anthropic_toxic_prompts_token_set_precision": 0.4458300876755742, "eval_anthropic_toxic_prompts_token_set_recall": 0.3312882521265975, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 232500 }, { "epoch": 44.64, "eval_arxiv_accuracy": 0.35475, "eval_arxiv_bleu_score": 4.456119312620294, "eval_arxiv_bleu_score_sem": 0.1360193654027934, "eval_arxiv_emb_cos_sim": 0.7783864736557007, "eval_arxiv_emb_cos_sim_sem": 0.00699144698862165, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.31887149810791, "eval_arxiv_n_ngrams_match_1": 15.348, "eval_arxiv_n_ngrams_match_2": 3.044, "eval_arxiv_n_ngrams_match_3": 0.706, "eval_arxiv_num_pred_words": 40.098, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.629153406908745, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36868529982167786, "eval_arxiv_runtime": 11.8737, "eval_arxiv_samples_per_second": 42.11, "eval_arxiv_steps_per_second": 0.084, "eval_arxiv_token_set_f1": 0.36319481193290903, "eval_arxiv_token_set_f1_sem": 0.004378341711286714, "eval_arxiv_token_set_precision": 0.3110319725875156, "eval_arxiv_token_set_recall": 0.4569955362034434, "eval_arxiv_true_num_tokens": 64.0, "step": 232500 }, { "epoch": 44.64, "eval_python_code_alpaca_accuracy": 0.16259375, "eval_python_code_alpaca_bleu_score": 4.660162667988266, "eval_python_code_alpaca_bleu_score_sem": 0.15117254687198411, "eval_python_code_alpaca_emb_cos_sim": 0.768064558506012, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007732874591290058, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.851837396621704, "eval_python_code_alpaca_n_ngrams_match_1": 9.874, "eval_python_code_alpaca_n_ngrams_match_2": 2.822, "eval_python_code_alpaca_n_ngrams_match_3": 0.988, "eval_python_code_alpaca_num_pred_words": 43.028, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.319575552402913, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3415741618862518, "eval_python_code_alpaca_runtime": 11.8385, "eval_python_code_alpaca_samples_per_second": 42.235, "eval_python_code_alpaca_steps_per_second": 0.084, "eval_python_code_alpaca_token_set_f1": 0.48227694070062255, "eval_python_code_alpaca_token_set_f1_sem": 0.005521130966249993, "eval_python_code_alpaca_token_set_precision": 0.5358386821741956, "eval_python_code_alpaca_token_set_recall": 0.46013190161250833, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 232500 }, { "epoch": 44.64, "eval_wikibio_accuracy": 0.33040625, "eval_wikibio_bleu_score": 6.305004954218946, "eval_wikibio_bleu_score_sem": 0.2239023075215015, "eval_wikibio_emb_cos_sim": 0.7458434104919434, "eval_wikibio_emb_cos_sim_sem": 0.008974877020999816, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6182138919830322, "eval_wikibio_n_ngrams_match_1": 10.018, "eval_wikibio_n_ngrams_match_2": 3.4, "eval_wikibio_n_ngrams_match_3": 1.324, "eval_wikibio_num_pred_words": 35.022, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.270938414205695, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35632726064714937, "eval_wikibio_runtime": 11.2664, "eval_wikibio_samples_per_second": 44.38, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.3219415595057358, "eval_wikibio_token_set_f1_sem": 0.005475142763128616, "eval_wikibio_token_set_precision": 0.32635907410889786, "eval_wikibio_token_set_recall": 0.3365921153126336, "eval_wikibio_true_num_tokens": 61.1328125, "step": 232500 }, { "epoch": 44.64, "eval_nq_accuracy": 0.5371875, "eval_nq_bleu_score": 12.446293493753974, "eval_nq_bleu_score_sem": 0.5001599804606712, "eval_nq_emb_cos_sim": 0.8429065346717834, "eval_nq_emb_cos_sim_sem": 0.0067761174224904765, "eval_nq_emb_top1_equal": 0.3515625, "eval_nq_emb_top1_equal_sem": 0.04236756101983345, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1108951568603516, "eval_nq_n_ngrams_match_1": 23.608, "eval_nq_n_ngrams_match_2": 8.974, "eval_nq_n_ngrams_match_3": 4.198, "eval_nq_num_pred_words": 48.92, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.25562806006653, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46122095192437024, "eval_nq_runtime": 11.962, "eval_nq_samples_per_second": 41.799, "eval_nq_steps_per_second": 0.084, "eval_nq_token_set_f1": 0.47409383964277885, "eval_nq_token_set_f1_sem": 0.004876826908985697, "eval_nq_token_set_precision": 0.43193271966593727, "eval_nq_token_set_recall": 0.5332972341018406, "eval_nq_true_num_tokens": 64.0, "step": 232500 }, { "epoch": 44.65, "learning_rate": 0.001, "loss": 2.4978, "step": 232512 }, { "epoch": 44.65, "learning_rate": 0.001, "loss": 2.4956, "step": 232524 }, { "epoch": 44.65, "learning_rate": 0.001, "loss": 2.4954, "step": 232536 }, { "epoch": 44.65, "learning_rate": 0.001, "loss": 2.4879, "step": 232548 }, { "epoch": 44.65, "learning_rate": 0.001, "loss": 2.4887, "step": 232560 }, { "epoch": 44.66, "learning_rate": 0.001, "loss": 2.4876, "step": 232572 }, { "epoch": 44.66, "learning_rate": 0.001, "loss": 2.4919, "step": 232584 }, { "epoch": 44.66, "learning_rate": 0.001, "loss": 2.491, "step": 232596 }, { "epoch": 44.66, "learning_rate": 0.001, "loss": 2.4926, "step": 232608 }, { "epoch": 44.67, "learning_rate": 0.001, "loss": 2.491, "step": 232620 }, { "epoch": 44.67, "learning_rate": 0.001, "loss": 2.504, "step": 232632 }, { "epoch": 44.67, "learning_rate": 0.001, "loss": 2.493, "step": 232644 }, { "epoch": 44.67, "learning_rate": 0.001, "loss": 2.4892, "step": 232656 }, { "epoch": 44.68, "learning_rate": 0.001, "loss": 2.4941, "step": 232668 }, { "epoch": 44.68, "learning_rate": 0.001, "loss": 2.4988, "step": 232680 }, { "epoch": 44.68, "learning_rate": 0.001, "loss": 2.4859, "step": 232692 }, { "epoch": 44.68, "learning_rate": 0.001, "loss": 2.4912, "step": 232704 }, { "epoch": 44.68, "learning_rate": 0.001, "loss": 2.484, "step": 232716 }, { "epoch": 44.69, "learning_rate": 0.001, "loss": 2.5034, "step": 232728 }, { "epoch": 44.69, "learning_rate": 0.001, "loss": 2.4979, "step": 232740 }, { "epoch": 44.69, "learning_rate": 0.001, "loss": 2.4966, "step": 232752 }, { "epoch": 44.69, "learning_rate": 0.001, "loss": 2.4883, "step": 232764 }, { "epoch": 44.7, "learning_rate": 0.001, "loss": 2.4914, "step": 232776 }, { "epoch": 44.7, "learning_rate": 0.001, "loss": 2.494, "step": 232788 }, { "epoch": 44.7, "learning_rate": 0.001, "loss": 2.4871, "step": 232800 }, { "epoch": 44.7, "learning_rate": 0.001, "loss": 2.4986, "step": 232812 }, { "epoch": 44.71, "learning_rate": 0.001, "loss": 2.4946, "step": 232824 }, { "epoch": 44.71, "learning_rate": 0.001, "loss": 2.4859, "step": 232836 }, { "epoch": 44.71, "learning_rate": 0.001, "loss": 2.4864, "step": 232848 }, { "epoch": 44.71, "learning_rate": 0.001, "loss": 2.4911, "step": 232860 }, { "epoch": 44.71, "learning_rate": 0.001, "loss": 2.4868, "step": 232872 }, { "epoch": 44.72, "learning_rate": 0.001, "loss": 2.4954, "step": 232884 }, { "epoch": 44.72, "learning_rate": 0.001, "loss": 2.4874, "step": 232896 }, { "epoch": 44.72, "learning_rate": 0.001, "loss": 2.4935, "step": 232908 }, { "epoch": 44.72, "learning_rate": 0.001, "loss": 2.4977, "step": 232920 }, { "epoch": 44.73, "learning_rate": 0.001, "loss": 2.4899, "step": 232932 }, { "epoch": 44.73, "learning_rate": 0.001, "loss": 2.4925, "step": 232944 }, { "epoch": 44.73, "learning_rate": 0.001, "loss": 2.4916, "step": 232956 }, { "epoch": 44.73, "learning_rate": 0.001, "loss": 2.4826, "step": 232968 }, { "epoch": 44.74, "learning_rate": 0.001, "loss": 2.4876, "step": 232980 }, { "epoch": 44.74, "learning_rate": 0.001, "loss": 2.4839, "step": 232992 }, { "epoch": 44.74, "learning_rate": 0.001, "loss": 2.4833, "step": 233004 }, { "epoch": 44.74, "learning_rate": 0.001, "loss": 2.4929, "step": 233016 }, { "epoch": 44.74, "learning_rate": 0.001, "loss": 2.4872, "step": 233028 }, { "epoch": 44.75, "learning_rate": 0.001, "loss": 2.4992, "step": 233040 }, { "epoch": 44.75, "learning_rate": 0.001, "loss": 2.495, "step": 233052 }, { "epoch": 44.75, "learning_rate": 0.001, "loss": 2.4779, "step": 233064 }, { "epoch": 44.75, "learning_rate": 0.001, "loss": 2.4853, "step": 233076 }, { "epoch": 44.76, "learning_rate": 0.001, "loss": 2.4904, "step": 233088 }, { "epoch": 44.76, "learning_rate": 0.001, "loss": 2.4922, "step": 233100 }, { "epoch": 44.76, "learning_rate": 0.001, "loss": 2.4922, "step": 233112 }, { "epoch": 44.76, "learning_rate": 0.001, "loss": 2.4876, "step": 233124 }, { "epoch": 44.76, "eval_ag_news_accuracy": 0.329875, "eval_ag_news_bleu_score": 4.911330238378152, "eval_ag_news_bleu_score_sem": 0.1583020659705351, "eval_ag_news_emb_cos_sim": 0.8199710845947266, "eval_ag_news_emb_cos_sim_sem": 0.007175698522332103, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.467978000640869, "eval_ag_news_n_ngrams_match_1": 14.572, "eval_ag_news_n_ngrams_match_2": 3.216, "eval_ag_news_n_ngrams_match_3": 0.896, "eval_ag_news_num_pred_words": 47.002, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.07182762310611, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3611572503418052, "eval_ag_news_runtime": 11.8256, "eval_ag_news_samples_per_second": 42.281, "eval_ag_news_steps_per_second": 0.085, "eval_ag_news_token_set_f1": 0.35761217906127024, "eval_ag_news_token_set_f1_sem": 0.00448286920762013, "eval_ag_news_token_set_precision": 0.34624476285537886, "eval_ag_news_token_set_recall": 0.38470179548590044, "eval_ag_news_true_num_tokens": 56.09375, "step": 233125 }, { "epoch": 44.76, "eval_anthropic_toxic_prompts_accuracy": 0.116125, "eval_anthropic_toxic_prompts_bleu_score": 3.056392732103471, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.10660535293173316, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6825622916221619, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009403752814734545, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.182234525680542, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.49, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.0, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.684, "eval_anthropic_toxic_prompts_num_pred_words": 47.746, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.100546719133966, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22057785534846946, "eval_anthropic_toxic_prompts_runtime": 11.4665, "eval_anthropic_toxic_prompts_samples_per_second": 43.605, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.3658021762998863, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00675121611005975, "eval_anthropic_toxic_prompts_token_set_precision": 0.45522586502478696, "eval_anthropic_toxic_prompts_token_set_recall": 0.3318392893769312, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 233125 }, { "epoch": 44.76, "eval_arxiv_accuracy": 0.35034375, "eval_arxiv_bleu_score": 4.569612237114931, "eval_arxiv_bleu_score_sem": 0.12911174905259093, "eval_arxiv_emb_cos_sim": 0.7890654802322388, "eval_arxiv_emb_cos_sim_sem": 0.006090167584808057, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3350062370300293, "eval_arxiv_n_ngrams_match_1": 15.634, "eval_arxiv_n_ngrams_match_2": 3.152, "eval_arxiv_n_ngrams_match_3": 0.74, "eval_arxiv_num_pred_words": 41.216, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 28.078558350066526, "eval_arxiv_pred_num_tokens": 62.9921875, "eval_arxiv_rouge_score": 0.37022475451948533, "eval_arxiv_runtime": 12.2, "eval_arxiv_samples_per_second": 40.984, "eval_arxiv_steps_per_second": 0.082, "eval_arxiv_token_set_f1": 0.36813210363390825, "eval_arxiv_token_set_f1_sem": 0.0038958119374668844, "eval_arxiv_token_set_precision": 0.3210678923740764, "eval_arxiv_token_set_recall": 0.44954983845232976, "eval_arxiv_true_num_tokens": 64.0, "step": 233125 }, { "epoch": 44.76, "eval_python_code_alpaca_accuracy": 0.16184375, "eval_python_code_alpaca_bleu_score": 4.804922149779122, "eval_python_code_alpaca_bleu_score_sem": 0.15457295137870475, "eval_python_code_alpaca_emb_cos_sim": 0.7771536707878113, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006405953505820438, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8460724353790283, "eval_python_code_alpaca_n_ngrams_match_1": 10.156, "eval_python_code_alpaca_n_ngrams_match_2": 3.036, "eval_python_code_alpaca_n_ngrams_match_3": 1.048, "eval_python_code_alpaca_num_pred_words": 43.692, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.22001612446157, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3481268633301894, "eval_python_code_alpaca_runtime": 11.4053, "eval_python_code_alpaca_samples_per_second": 43.839, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.49094832460599663, "eval_python_code_alpaca_token_set_f1_sem": 0.005440607741651078, "eval_python_code_alpaca_token_set_precision": 0.5537041849175821, "eval_python_code_alpaca_token_set_recall": 0.45950289975435893, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 233125 }, { "epoch": 44.76, "eval_wikibio_accuracy": 0.328125, "eval_wikibio_bleu_score": 6.379971239059081, "eval_wikibio_bleu_score_sem": 0.22358192028378598, "eval_wikibio_emb_cos_sim": 0.7499635219573975, "eval_wikibio_emb_cos_sim_sem": 0.00844267427687358, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.65299129486084, "eval_wikibio_n_ngrams_match_1": 10.352, "eval_wikibio_n_ngrams_match_2": 3.552, "eval_wikibio_n_ngrams_match_3": 1.382, "eval_wikibio_num_pred_words": 36.47, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.589927424291794, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36666395165759413, "eval_wikibio_runtime": 11.13, "eval_wikibio_samples_per_second": 44.924, "eval_wikibio_steps_per_second": 0.09, "eval_wikibio_token_set_f1": 0.32686702242703836, "eval_wikibio_token_set_f1_sem": 0.005317875481153925, "eval_wikibio_token_set_precision": 0.33673488188853756, "eval_wikibio_token_set_recall": 0.3327432570358703, "eval_wikibio_true_num_tokens": 61.1328125, "step": 233125 }, { "epoch": 44.76, "eval_nq_accuracy": 0.53709375, "eval_nq_bleu_score": 12.168191358668201, "eval_nq_bleu_score_sem": 0.4940694869119727, "eval_nq_emb_cos_sim": 0.8457506895065308, "eval_nq_emb_cos_sim_sem": 0.006445194855962112, "eval_nq_emb_top1_equal": 0.3515625, "eval_nq_emb_top1_equal_sem": 0.04236756101983345, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1120073795318604, "eval_nq_n_ngrams_match_1": 23.566, "eval_nq_n_ngrams_match_2": 8.744, "eval_nq_n_ngrams_match_3": 4.114, "eval_nq_num_pred_words": 49.208, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.264815264924167, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4578527317584956, "eval_nq_runtime": 17.739, "eval_nq_samples_per_second": 28.187, "eval_nq_steps_per_second": 0.056, "eval_nq_token_set_f1": 0.4692111460883984, "eval_nq_token_set_f1_sem": 0.004946791072729553, "eval_nq_token_set_precision": 0.4293232239549887, "eval_nq_token_set_recall": 0.5262780931430423, "eval_nq_true_num_tokens": 64.0, "step": 233125 }, { "epoch": 44.76, "learning_rate": 0.001, "loss": 2.496, "step": 233136 }, { "epoch": 44.77, "learning_rate": 0.001, "loss": 2.4939, "step": 233148 }, { "epoch": 44.77, "learning_rate": 0.001, "loss": 2.4871, "step": 233160 }, { "epoch": 44.77, "learning_rate": 0.001, "loss": 2.5019, "step": 233172 }, { "epoch": 44.77, "learning_rate": 0.001, "loss": 2.4989, "step": 233184 }, { "epoch": 44.78, "learning_rate": 0.001, "loss": 2.4917, "step": 233196 }, { "epoch": 44.78, "learning_rate": 0.001, "loss": 2.4912, "step": 233208 }, { "epoch": 44.78, "learning_rate": 0.001, "loss": 2.4954, "step": 233220 }, { "epoch": 44.78, "learning_rate": 0.001, "loss": 2.5069, "step": 233232 }, { "epoch": 44.79, "learning_rate": 0.001, "loss": 2.4995, "step": 233244 }, { "epoch": 44.79, "learning_rate": 0.001, "loss": 2.4963, "step": 233256 }, { "epoch": 44.79, "learning_rate": 0.001, "loss": 2.4936, "step": 233268 }, { "epoch": 44.79, "learning_rate": 0.001, "loss": 2.4811, "step": 233280 }, { "epoch": 44.79, "learning_rate": 0.001, "loss": 2.4866, "step": 233292 }, { "epoch": 44.8, "learning_rate": 0.001, "loss": 2.4928, "step": 233304 }, { "epoch": 44.8, "learning_rate": 0.001, "loss": 2.4913, "step": 233316 }, { "epoch": 44.8, "learning_rate": 0.001, "loss": 2.4912, "step": 233328 }, { "epoch": 44.8, "learning_rate": 0.001, "loss": 2.4866, "step": 233340 }, { "epoch": 44.81, "learning_rate": 0.001, "loss": 2.496, "step": 233352 }, { "epoch": 44.81, "learning_rate": 0.001, "loss": 2.4946, "step": 233364 }, { "epoch": 44.81, "learning_rate": 0.001, "loss": 2.4947, "step": 233376 }, { "epoch": 44.81, "learning_rate": 0.001, "loss": 2.4964, "step": 233388 }, { "epoch": 44.82, "learning_rate": 0.001, "loss": 2.4905, "step": 233400 }, { "epoch": 44.82, "learning_rate": 0.001, "loss": 2.4919, "step": 233412 }, { "epoch": 44.82, "learning_rate": 0.001, "loss": 2.4884, "step": 233424 }, { "epoch": 44.82, "learning_rate": 0.001, "loss": 2.4922, "step": 233436 }, { "epoch": 44.82, "learning_rate": 0.001, "loss": 2.4922, "step": 233448 }, { "epoch": 44.83, "learning_rate": 0.001, "loss": 2.4896, "step": 233460 }, { "epoch": 44.83, "learning_rate": 0.001, "loss": 2.4943, "step": 233472 }, { "epoch": 44.83, "learning_rate": 0.001, "loss": 2.4897, "step": 233484 }, { "epoch": 44.83, "learning_rate": 0.001, "loss": 2.4891, "step": 233496 }, { "epoch": 44.84, "learning_rate": 0.001, "loss": 2.4918, "step": 233508 }, { "epoch": 44.84, "learning_rate": 0.001, "loss": 2.4839, "step": 233520 }, { "epoch": 44.84, "learning_rate": 0.001, "loss": 2.4916, "step": 233532 }, { "epoch": 44.84, "learning_rate": 0.001, "loss": 2.498, "step": 233544 }, { "epoch": 44.85, "learning_rate": 0.001, "loss": 2.5056, "step": 233556 }, { "epoch": 44.85, "learning_rate": 0.001, "loss": 2.5037, "step": 233568 }, { "epoch": 44.85, "learning_rate": 0.001, "loss": 2.4808, "step": 233580 }, { "epoch": 44.85, "learning_rate": 0.001, "loss": 2.4852, "step": 233592 }, { "epoch": 44.85, "learning_rate": 0.001, "loss": 2.4888, "step": 233604 }, { "epoch": 44.86, "learning_rate": 0.001, "loss": 2.4991, "step": 233616 }, { "epoch": 44.86, "learning_rate": 0.001, "loss": 2.4974, "step": 233628 }, { "epoch": 44.86, "learning_rate": 0.001, "loss": 2.4895, "step": 233640 }, { "epoch": 44.86, "learning_rate": 0.001, "loss": 2.4957, "step": 233652 }, { "epoch": 44.87, "learning_rate": 0.001, "loss": 2.5009, "step": 233664 }, { "epoch": 44.87, "learning_rate": 0.001, "loss": 2.4995, "step": 233676 }, { "epoch": 44.87, "learning_rate": 0.001, "loss": 2.5021, "step": 233688 }, { "epoch": 44.87, "learning_rate": 0.001, "loss": 2.4988, "step": 233700 }, { "epoch": 44.88, "learning_rate": 0.001, "loss": 2.4998, "step": 233712 }, { "epoch": 44.88, "learning_rate": 0.001, "loss": 2.4975, "step": 233724 }, { "epoch": 44.88, "learning_rate": 0.001, "loss": 2.4963, "step": 233736 }, { "epoch": 44.88, "learning_rate": 0.001, "loss": 2.4928, "step": 233748 }, { "epoch": 44.88, "eval_ag_news_accuracy": 0.329, "eval_ag_news_bleu_score": 4.943506182483064, "eval_ag_news_bleu_score_sem": 0.1591658295276275, "eval_ag_news_emb_cos_sim": 0.8237883448600769, "eval_ag_news_emb_cos_sim_sem": 0.006359255456214875, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4656832218170166, "eval_ag_news_n_ngrams_match_1": 14.462, "eval_ag_news_n_ngrams_match_2": 3.232, "eval_ag_news_n_ngrams_match_3": 0.884, "eval_ag_news_num_pred_words": 47.176, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.998314252957076, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.359527296304229, "eval_ag_news_runtime": 11.5777, "eval_ag_news_samples_per_second": 43.186, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.3616526399749545, "eval_ag_news_token_set_f1_sem": 0.004410987666602623, "eval_ag_news_token_set_precision": 0.34805479304943676, "eval_ag_news_token_set_recall": 0.39039921147470874, "eval_ag_news_true_num_tokens": 56.09375, "step": 233750 }, { "epoch": 44.88, "eval_anthropic_toxic_prompts_accuracy": 0.11665625, "eval_anthropic_toxic_prompts_bleu_score": 3.184933979162106, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11634765300963519, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6902061700820923, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008550291644734592, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.194072961807251, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.516, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.762, "eval_anthropic_toxic_prompts_num_pred_words": 48.248, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.38755501479955, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2202184690163958, "eval_anthropic_toxic_prompts_runtime": 11.3634, "eval_anthropic_toxic_prompts_samples_per_second": 44.001, "eval_anthropic_toxic_prompts_steps_per_second": 0.088, "eval_anthropic_toxic_prompts_token_set_f1": 0.3637381932820986, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006503239882816224, "eval_anthropic_toxic_prompts_token_set_precision": 0.4596421768650673, "eval_anthropic_toxic_prompts_token_set_recall": 0.3256649891482098, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 233750 }, { "epoch": 44.88, "eval_arxiv_accuracy": 0.35296875, "eval_arxiv_bleu_score": 4.454903348532187, "eval_arxiv_bleu_score_sem": 0.12382288765593584, "eval_arxiv_emb_cos_sim": 0.7839063405990601, "eval_arxiv_emb_cos_sim_sem": 0.006648951819454505, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3281784057617188, "eval_arxiv_n_ngrams_match_1": 15.612, "eval_arxiv_n_ngrams_match_2": 3.132, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 40.982, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.887495705410885, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3710272278631155, "eval_arxiv_runtime": 11.9622, "eval_arxiv_samples_per_second": 41.798, "eval_arxiv_steps_per_second": 0.084, "eval_arxiv_token_set_f1": 0.3629057381987348, "eval_arxiv_token_set_f1_sem": 0.0039950649701458955, "eval_arxiv_token_set_precision": 0.31690083419007586, "eval_arxiv_token_set_recall": 0.44308012254810736, "eval_arxiv_true_num_tokens": 64.0, "step": 233750 }, { "epoch": 44.88, "eval_python_code_alpaca_accuracy": 0.1655625, "eval_python_code_alpaca_bleu_score": 4.945063139850272, "eval_python_code_alpaca_bleu_score_sem": 0.15143180572827783, "eval_python_code_alpaca_emb_cos_sim": 0.7870513200759888, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006154739849449286, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8081815242767334, "eval_python_code_alpaca_n_ngrams_match_1": 10.462, "eval_python_code_alpaca_n_ngrams_match_2": 3.16, "eval_python_code_alpaca_n_ngrams_match_3": 1.128, "eval_python_code_alpaca_num_pred_words": 44.406, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.579740932387736, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.35518891420517174, "eval_python_code_alpaca_runtime": 11.8525, "eval_python_code_alpaca_samples_per_second": 42.185, "eval_python_code_alpaca_steps_per_second": 0.084, "eval_python_code_alpaca_token_set_f1": 0.4918032318275741, "eval_python_code_alpaca_token_set_f1_sem": 0.0052360135457094606, "eval_python_code_alpaca_token_set_precision": 0.576674056168738, "eval_python_code_alpaca_token_set_recall": 0.44628443025629405, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 233750 }, { "epoch": 44.88, "eval_wikibio_accuracy": 0.3314375, "eval_wikibio_bleu_score": 6.261302634399697, "eval_wikibio_bleu_score_sem": 0.2242887877383055, "eval_wikibio_emb_cos_sim": 0.7429580688476562, "eval_wikibio_emb_cos_sim_sem": 0.009413135098339775, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.590331554412842, "eval_wikibio_n_ngrams_match_1": 10.24, "eval_wikibio_n_ngrams_match_2": 3.548, "eval_wikibio_n_ngrams_match_3": 1.372, "eval_wikibio_num_pred_words": 36.704, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.24609148604096, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35995438205822317, "eval_wikibio_runtime": 14.7028, "eval_wikibio_samples_per_second": 34.007, "eval_wikibio_steps_per_second": 0.068, "eval_wikibio_token_set_f1": 0.3221930267981259, "eval_wikibio_token_set_f1_sem": 0.00556499696088582, "eval_wikibio_token_set_precision": 0.3322789642795646, "eval_wikibio_token_set_recall": 0.3300855178762037, "eval_wikibio_true_num_tokens": 61.1328125, "step": 233750 }, { "epoch": 44.88, "eval_nq_accuracy": 0.53653125, "eval_nq_bleu_score": 12.311711459786745, "eval_nq_bleu_score_sem": 0.48828531255976293, "eval_nq_emb_cos_sim": 0.8420907258987427, "eval_nq_emb_cos_sim_sem": 0.006898221452438093, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.113239288330078, "eval_nq_n_ngrams_match_1": 23.698, "eval_nq_n_ngrams_match_2": 8.886, "eval_nq_n_ngrams_match_3": 4.17, "eval_nq_num_pred_words": 49.252, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.275003037479582, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46208923471860586, "eval_nq_runtime": 29.667, "eval_nq_samples_per_second": 16.854, "eval_nq_steps_per_second": 0.034, "eval_nq_token_set_f1": 0.47194581999061097, "eval_nq_token_set_f1_sem": 0.004866567798190057, "eval_nq_token_set_precision": 0.4309898560778709, "eval_nq_token_set_recall": 0.5296490932118251, "eval_nq_true_num_tokens": 64.0, "step": 233750 }, { "epoch": 44.88, "learning_rate": 0.001, "loss": 2.4936, "step": 233760 }, { "epoch": 44.89, "learning_rate": 0.001, "loss": 2.5009, "step": 233772 }, { "epoch": 44.89, "learning_rate": 0.001, "loss": 2.4977, "step": 233784 }, { "epoch": 44.89, "learning_rate": 0.001, "loss": 2.4999, "step": 233796 }, { "epoch": 44.89, "learning_rate": 0.001, "loss": 2.4979, "step": 233808 }, { "epoch": 44.9, "learning_rate": 0.001, "loss": 2.4917, "step": 233820 }, { "epoch": 44.9, "learning_rate": 0.001, "loss": 2.4928, "step": 233832 }, { "epoch": 44.9, "learning_rate": 0.001, "loss": 2.4927, "step": 233844 }, { "epoch": 44.9, "learning_rate": 0.001, "loss": 2.4866, "step": 233856 }, { "epoch": 44.91, "learning_rate": 0.001, "loss": 2.502, "step": 233868 }, { "epoch": 44.91, "learning_rate": 0.001, "loss": 2.5062, "step": 233880 }, { "epoch": 44.91, "learning_rate": 0.001, "loss": 2.4851, "step": 233892 }, { "epoch": 44.91, "learning_rate": 0.001, "loss": 2.4965, "step": 233904 }, { "epoch": 44.91, "learning_rate": 0.001, "loss": 2.4966, "step": 233916 }, { "epoch": 44.92, "learning_rate": 0.001, "loss": 2.4893, "step": 233928 }, { "epoch": 44.92, "learning_rate": 0.001, "loss": 2.5014, "step": 233940 }, { "epoch": 44.92, "learning_rate": 0.001, "loss": 2.4996, "step": 233952 }, { "epoch": 44.92, "learning_rate": 0.001, "loss": 2.4869, "step": 233964 }, { "epoch": 44.93, "learning_rate": 0.001, "loss": 2.499, "step": 233976 }, { "epoch": 44.93, "learning_rate": 0.001, "loss": 2.4837, "step": 233988 }, { "epoch": 44.93, "learning_rate": 0.001, "loss": 2.4941, "step": 234000 }, { "epoch": 44.93, "learning_rate": 0.001, "loss": 2.4946, "step": 234012 }, { "epoch": 44.94, "learning_rate": 0.001, "loss": 2.5021, "step": 234024 }, { "epoch": 44.94, "learning_rate": 0.001, "loss": 2.4807, "step": 234036 }, { "epoch": 44.94, "learning_rate": 0.001, "loss": 2.4962, "step": 234048 }, { "epoch": 44.94, "learning_rate": 0.001, "loss": 2.488, "step": 234060 }, { "epoch": 44.94, "learning_rate": 0.001, "loss": 2.4986, "step": 234072 }, { "epoch": 44.95, "learning_rate": 0.001, "loss": 2.485, "step": 234084 }, { "epoch": 44.95, "learning_rate": 0.001, "loss": 2.5094, "step": 234096 }, { "epoch": 44.95, "learning_rate": 0.001, "loss": 2.5016, "step": 234108 }, { "epoch": 44.95, "learning_rate": 0.001, "loss": 2.4944, "step": 234120 }, { "epoch": 44.96, "learning_rate": 0.001, "loss": 2.4911, "step": 234132 }, { "epoch": 44.96, "learning_rate": 0.001, "loss": 2.4909, "step": 234144 }, { "epoch": 44.96, "learning_rate": 0.001, "loss": 2.4912, "step": 234156 }, { "epoch": 44.96, "learning_rate": 0.001, "loss": 2.4871, "step": 234168 }, { "epoch": 44.97, "learning_rate": 0.001, "loss": 2.4906, "step": 234180 }, { "epoch": 44.97, "learning_rate": 0.001, "loss": 2.485, "step": 234192 }, { "epoch": 44.97, "learning_rate": 0.001, "loss": 2.4915, "step": 234204 }, { "epoch": 44.97, "learning_rate": 0.001, "loss": 2.5007, "step": 234216 }, { "epoch": 44.97, "learning_rate": 0.001, "loss": 2.4984, "step": 234228 }, { "epoch": 44.98, "learning_rate": 0.001, "loss": 2.4972, "step": 234240 }, { "epoch": 44.98, "learning_rate": 0.001, "loss": 2.494, "step": 234252 }, { "epoch": 44.98, "learning_rate": 0.001, "loss": 2.4944, "step": 234264 }, { "epoch": 44.98, "learning_rate": 0.001, "loss": 2.5054, "step": 234276 }, { "epoch": 44.99, "learning_rate": 0.001, "loss": 2.4946, "step": 234288 }, { "epoch": 44.99, "learning_rate": 0.001, "loss": 2.4825, "step": 234300 }, { "epoch": 44.99, "learning_rate": 0.001, "loss": 2.5043, "step": 234312 }, { "epoch": 44.99, "learning_rate": 0.001, "loss": 2.4869, "step": 234324 }, { "epoch": 45.0, "learning_rate": 0.001, "loss": 2.4982, "step": 234336 }, { "epoch": 45.0, "learning_rate": 0.001, "loss": 2.493, "step": 234348 }, { "epoch": 45.0, "learning_rate": 0.001, "loss": 2.4946, "step": 234360 }, { "epoch": 45.0, "learning_rate": 0.001, "loss": 2.4846, "step": 234372 }, { "epoch": 45.0, "eval_ag_news_accuracy": 0.32984375, "eval_ag_news_bleu_score": 5.050963469957959, "eval_ag_news_bleu_score_sem": 0.16074897571484204, "eval_ag_news_emb_cos_sim": 0.8206876516342163, "eval_ag_news_emb_cos_sim_sem": 0.007737273668603383, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4585185050964355, "eval_ag_news_n_ngrams_match_1": 14.654, "eval_ag_news_n_ngrams_match_2": 3.346, "eval_ag_news_n_ngrams_match_3": 0.938, "eval_ag_news_num_pred_words": 47.044, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.769874725259882, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3629810314376243, "eval_ag_news_runtime": 11.6887, "eval_ag_news_samples_per_second": 42.776, "eval_ag_news_steps_per_second": 0.086, "eval_ag_news_token_set_f1": 0.36353585810266753, "eval_ag_news_token_set_f1_sem": 0.0045619068841447575, "eval_ag_news_token_set_precision": 0.34873619528640054, "eval_ag_news_token_set_recall": 0.39487507795527493, "eval_ag_news_true_num_tokens": 56.09375, "step": 234375 }, { "epoch": 45.0, "eval_anthropic_toxic_prompts_accuracy": 0.11584375, "eval_anthropic_toxic_prompts_bleu_score": 3.1859433383747104, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12730083966736477, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6858948469161987, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008169146498393943, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2058634757995605, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.31, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.928, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.75, "eval_anthropic_toxic_prompts_num_pred_words": 47.494, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.676798638145378, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21730135124581162, "eval_anthropic_toxic_prompts_runtime": 11.1916, "eval_anthropic_toxic_prompts_samples_per_second": 44.676, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.3574979096810616, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006470218294475449, "eval_anthropic_toxic_prompts_token_set_precision": 0.4433849828834597, "eval_anthropic_toxic_prompts_token_set_recall": 0.32354975873003256, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 234375 }, { "epoch": 45.0, "eval_arxiv_accuracy": 0.35515625, "eval_arxiv_bleu_score": 4.482566132989462, "eval_arxiv_bleu_score_sem": 0.1315923904064372, "eval_arxiv_emb_cos_sim": 0.7830665111541748, "eval_arxiv_emb_cos_sim_sem": 0.006992334048073903, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3218233585357666, "eval_arxiv_n_ngrams_match_1": 15.624, "eval_arxiv_n_ngrams_match_2": 3.098, "eval_arxiv_n_ngrams_match_3": 0.698, "eval_arxiv_num_pred_words": 40.482, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.710831303072315, "eval_arxiv_pred_num_tokens": 62.9921875, "eval_arxiv_rouge_score": 0.3748865835741546, "eval_arxiv_runtime": 11.6649, "eval_arxiv_samples_per_second": 42.864, "eval_arxiv_steps_per_second": 0.086, "eval_arxiv_token_set_f1": 0.366400106839306, "eval_arxiv_token_set_f1_sem": 0.0042245727621743655, "eval_arxiv_token_set_precision": 0.3189128701744615, "eval_arxiv_token_set_recall": 0.4514187899904173, "eval_arxiv_true_num_tokens": 64.0, "step": 234375 }, { "epoch": 45.0, "eval_python_code_alpaca_accuracy": 0.1626875, "eval_python_code_alpaca_bleu_score": 4.646057821441524, "eval_python_code_alpaca_bleu_score_sem": 0.14654806645109208, "eval_python_code_alpaca_emb_cos_sim": 0.7748706936836243, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007797053704860023, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8313002586364746, "eval_python_code_alpaca_n_ngrams_match_1": 10.036, "eval_python_code_alpaca_n_ngrams_match_2": 3.028, "eval_python_code_alpaca_n_ngrams_match_3": 0.974, "eval_python_code_alpaca_num_pred_words": 44.142, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.967508637150246, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3426050111183082, "eval_python_code_alpaca_runtime": 27.0116, "eval_python_code_alpaca_samples_per_second": 18.511, "eval_python_code_alpaca_steps_per_second": 0.037, "eval_python_code_alpaca_token_set_f1": 0.48563724798043634, "eval_python_code_alpaca_token_set_f1_sem": 0.00547676951873612, "eval_python_code_alpaca_token_set_precision": 0.5504174307459256, "eval_python_code_alpaca_token_set_recall": 0.4555004170217813, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 234375 }, { "epoch": 45.0, "eval_wikibio_accuracy": 0.33025, "eval_wikibio_bleu_score": 6.323590258839978, "eval_wikibio_bleu_score_sem": 0.2148147875379222, "eval_wikibio_emb_cos_sim": 0.759397029876709, "eval_wikibio_emb_cos_sim_sem": 0.008260989749655244, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6340675354003906, "eval_wikibio_n_ngrams_match_1": 10.504, "eval_wikibio_n_ngrams_match_2": 3.626, "eval_wikibio_n_ngrams_match_3": 1.354, "eval_wikibio_num_pred_words": 37.208, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.866527233580925, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.37195459576758305, "eval_wikibio_runtime": 24.9962, "eval_wikibio_samples_per_second": 20.003, "eval_wikibio_steps_per_second": 0.04, "eval_wikibio_token_set_f1": 0.3275260511089058, "eval_wikibio_token_set_f1_sem": 0.005317047030055768, "eval_wikibio_token_set_precision": 0.34032125816446246, "eval_wikibio_token_set_recall": 0.3283904764364292, "eval_wikibio_true_num_tokens": 61.1328125, "step": 234375 }, { "epoch": 45.0, "eval_nq_accuracy": 0.53621875, "eval_nq_bleu_score": 12.472245564420644, "eval_nq_bleu_score_sem": 0.509516496709447, "eval_nq_emb_cos_sim": 0.8428362607955933, "eval_nq_emb_cos_sim_sem": 0.006838735785576522, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.113086462020874, "eval_nq_n_ngrams_match_1": 23.714, "eval_nq_n_ngrams_match_2": 8.988, "eval_nq_n_ngrams_match_3": 4.242, "eval_nq_num_pred_words": 48.972, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.27373849593678, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46227421514202416, "eval_nq_runtime": 11.9422, "eval_nq_samples_per_second": 41.868, "eval_nq_steps_per_second": 0.084, "eval_nq_token_set_f1": 0.47320994443134756, "eval_nq_token_set_f1_sem": 0.004999096680014144, "eval_nq_token_set_precision": 0.431642811161382, "eval_nq_token_set_recall": 0.5322724941503796, "eval_nq_true_num_tokens": 64.0, "step": 234375 }, { "epoch": 45.0, "learning_rate": 0.001, "loss": 2.4801, "step": 234384 }, { "epoch": 45.01, "learning_rate": 0.001, "loss": 2.477, "step": 234396 }, { "epoch": 45.01, "learning_rate": 0.001, "loss": 2.4764, "step": 234408 }, { "epoch": 45.01, "learning_rate": 0.001, "loss": 2.4792, "step": 234420 }, { "epoch": 45.01, "learning_rate": 0.001, "loss": 2.4838, "step": 234432 }, { "epoch": 45.02, "learning_rate": 0.001, "loss": 2.4829, "step": 234444 }, { "epoch": 45.02, "learning_rate": 0.001, "loss": 2.4887, "step": 234456 }, { "epoch": 45.02, "learning_rate": 0.001, "loss": 2.4723, "step": 234468 }, { "epoch": 45.02, "learning_rate": 0.001, "loss": 2.4801, "step": 234480 }, { "epoch": 45.03, "learning_rate": 0.001, "loss": 2.476, "step": 234492 }, { "epoch": 45.03, "learning_rate": 0.001, "loss": 2.4773, "step": 234504 }, { "epoch": 45.03, "learning_rate": 0.001, "loss": 2.4837, "step": 234516 }, { "epoch": 45.03, "learning_rate": 0.001, "loss": 2.4898, "step": 234528 }, { "epoch": 45.03, "learning_rate": 0.001, "loss": 2.4707, "step": 234540 }, { "epoch": 45.04, "learning_rate": 0.001, "loss": 2.483, "step": 234552 }, { "epoch": 45.04, "learning_rate": 0.001, "loss": 2.481, "step": 234564 }, { "epoch": 45.04, "learning_rate": 0.001, "loss": 2.4787, "step": 234576 }, { "epoch": 45.04, "learning_rate": 0.001, "loss": 2.472, "step": 234588 }, { "epoch": 45.05, "learning_rate": 0.001, "loss": 2.4803, "step": 234600 }, { "epoch": 45.05, "learning_rate": 0.001, "loss": 2.4873, "step": 234612 }, { "epoch": 45.05, "learning_rate": 0.001, "loss": 2.4759, "step": 234624 }, { "epoch": 45.05, "learning_rate": 0.001, "loss": 2.4881, "step": 234636 }, { "epoch": 45.06, "learning_rate": 0.001, "loss": 2.4757, "step": 234648 }, { "epoch": 45.06, "learning_rate": 0.001, "loss": 2.4856, "step": 234660 }, { "epoch": 45.06, "learning_rate": 0.001, "loss": 2.4718, "step": 234672 }, { "epoch": 45.06, "learning_rate": 0.001, "loss": 2.4865, "step": 234684 }, { "epoch": 45.06, "learning_rate": 0.001, "loss": 2.4885, "step": 234696 }, { "epoch": 45.07, "learning_rate": 0.001, "loss": 2.4793, "step": 234708 }, { "epoch": 45.07, "learning_rate": 0.001, "loss": 2.4752, "step": 234720 }, { "epoch": 45.07, "learning_rate": 0.001, "loss": 2.4753, "step": 234732 }, { "epoch": 45.07, "learning_rate": 0.001, "loss": 2.4695, "step": 234744 }, { "epoch": 45.08, "learning_rate": 0.001, "loss": 2.4742, "step": 234756 }, { "epoch": 45.08, "learning_rate": 0.001, "loss": 2.493, "step": 234768 }, { "epoch": 45.08, "learning_rate": 0.001, "loss": 2.4774, "step": 234780 }, { "epoch": 45.08, "learning_rate": 0.001, "loss": 2.4728, "step": 234792 }, { "epoch": 45.09, "learning_rate": 0.001, "loss": 2.481, "step": 234804 }, { "epoch": 45.09, "learning_rate": 0.001, "loss": 2.4787, "step": 234816 }, { "epoch": 45.09, "learning_rate": 0.001, "loss": 2.4888, "step": 234828 }, { "epoch": 45.09, "learning_rate": 0.001, "loss": 2.4741, "step": 234840 }, { "epoch": 45.09, "learning_rate": 0.001, "loss": 2.4903, "step": 234852 }, { "epoch": 45.1, "learning_rate": 0.001, "loss": 2.4823, "step": 234864 }, { "epoch": 45.1, "learning_rate": 0.001, "loss": 2.4775, "step": 234876 }, { "epoch": 45.1, "learning_rate": 0.001, "loss": 2.484, "step": 234888 }, { "epoch": 45.1, "learning_rate": 0.001, "loss": 2.477, "step": 234900 }, { "epoch": 45.11, "learning_rate": 0.001, "loss": 2.4758, "step": 234912 }, { "epoch": 45.11, "learning_rate": 0.001, "loss": 2.4799, "step": 234924 }, { "epoch": 45.11, "learning_rate": 0.001, "loss": 2.4794, "step": 234936 }, { "epoch": 45.11, "learning_rate": 0.001, "loss": 2.4803, "step": 234948 }, { "epoch": 45.12, "learning_rate": 0.001, "loss": 2.4785, "step": 234960 }, { "epoch": 45.12, "learning_rate": 0.001, "loss": 2.491, "step": 234972 }, { "epoch": 45.12, "learning_rate": 0.001, "loss": 2.4804, "step": 234984 }, { "epoch": 45.12, "learning_rate": 0.001, "loss": 2.4834, "step": 234996 }, { "epoch": 45.12, "eval_ag_news_accuracy": 0.330125, "eval_ag_news_bleu_score": 5.080505909630914, "eval_ag_news_bleu_score_sem": 0.15953387709149533, "eval_ag_news_emb_cos_sim": 0.8225025534629822, "eval_ag_news_emb_cos_sim_sem": 0.006679532650771699, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.459632635116577, "eval_ag_news_n_ngrams_match_1": 14.422, "eval_ag_news_n_ngrams_match_2": 3.298, "eval_ag_news_n_ngrams_match_3": 0.964, "eval_ag_news_num_pred_words": 46.58, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.805290221497806, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35965960931803664, "eval_ag_news_runtime": 13.3014, "eval_ag_news_samples_per_second": 37.59, "eval_ag_news_steps_per_second": 0.075, "eval_ag_news_token_set_f1": 0.36084064419133377, "eval_ag_news_token_set_f1_sem": 0.0045625628717824605, "eval_ag_news_token_set_precision": 0.3450969952602633, "eval_ag_news_token_set_recall": 0.3936605413583107, "eval_ag_news_true_num_tokens": 56.09375, "step": 235000 }, { "epoch": 45.12, "eval_anthropic_toxic_prompts_accuracy": 0.11809375, "eval_anthropic_toxic_prompts_bleu_score": 3.249449969364471, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12182234326896128, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6863254308700562, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009197239183931736, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1501572132110596, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.042, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788, "eval_anthropic_toxic_prompts_num_pred_words": 47.702, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.339733606991103, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2175672514017648, "eval_anthropic_toxic_prompts_runtime": 11.213, "eval_anthropic_toxic_prompts_samples_per_second": 44.591, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.36769985522783755, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006739889351686934, "eval_anthropic_toxic_prompts_token_set_precision": 0.44911581742473733, "eval_anthropic_toxic_prompts_token_set_recall": 0.3395485151405642, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 235000 }, { "epoch": 45.12, "eval_arxiv_accuracy": 0.35675, "eval_arxiv_bleu_score": 4.498174501227942, "eval_arxiv_bleu_score_sem": 0.13092403510269388, "eval_arxiv_emb_cos_sim": 0.7803041934967041, "eval_arxiv_emb_cos_sim_sem": 0.007107721582584603, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.320892095565796, "eval_arxiv_n_ngrams_match_1": 15.354, "eval_arxiv_n_ngrams_match_2": 3.082, "eval_arxiv_n_ngrams_match_3": 0.718, "eval_arxiv_num_pred_words": 40.28, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.68503724440266, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37205666210677635, "eval_arxiv_runtime": 15.5522, "eval_arxiv_samples_per_second": 32.15, "eval_arxiv_steps_per_second": 0.064, "eval_arxiv_token_set_f1": 0.3642742211446164, "eval_arxiv_token_set_f1_sem": 0.0042574172954471445, "eval_arxiv_token_set_precision": 0.31586787043883907, "eval_arxiv_token_set_recall": 0.45116233598688793, "eval_arxiv_true_num_tokens": 64.0, "step": 235000 }, { "epoch": 45.12, "eval_python_code_alpaca_accuracy": 0.1645, "eval_python_code_alpaca_bleu_score": 4.923737753922552, "eval_python_code_alpaca_bleu_score_sem": 0.1553692776982092, "eval_python_code_alpaca_emb_cos_sim": 0.7769338488578796, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007851296040349036, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8087265491485596, "eval_python_code_alpaca_n_ngrams_match_1": 10.12, "eval_python_code_alpaca_n_ngrams_match_2": 3.078, "eval_python_code_alpaca_n_ngrams_match_3": 1.06, "eval_python_code_alpaca_num_pred_words": 43.334, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.588779766535286, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.346360684279525, "eval_python_code_alpaca_runtime": 11.3833, "eval_python_code_alpaca_samples_per_second": 43.924, "eval_python_code_alpaca_steps_per_second": 0.088, "eval_python_code_alpaca_token_set_f1": 0.49147143962269535, "eval_python_code_alpaca_token_set_f1_sem": 0.005240694562867404, "eval_python_code_alpaca_token_set_precision": 0.5548385292059235, "eval_python_code_alpaca_token_set_recall": 0.4614492581547742, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 235000 }, { "epoch": 45.12, "eval_wikibio_accuracy": 0.3329375, "eval_wikibio_bleu_score": 6.1282826144213765, "eval_wikibio_bleu_score_sem": 0.2135608913666631, "eval_wikibio_emb_cos_sim": 0.7597454786300659, "eval_wikibio_emb_cos_sim_sem": 0.008223369736092223, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6424877643585205, "eval_wikibio_n_ngrams_match_1": 10.064, "eval_wikibio_n_ngrams_match_2": 3.428, "eval_wikibio_n_ngrams_match_3": 1.272, "eval_wikibio_num_pred_words": 35.324, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.186718211609985, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36334704713539007, "eval_wikibio_runtime": 11.3461, "eval_wikibio_samples_per_second": 44.068, "eval_wikibio_steps_per_second": 0.088, "eval_wikibio_token_set_f1": 0.32353777314613724, "eval_wikibio_token_set_f1_sem": 0.005704797195882945, "eval_wikibio_token_set_precision": 0.32894446201880306, "eval_wikibio_token_set_recall": 0.3358082802925566, "eval_wikibio_true_num_tokens": 61.1328125, "step": 235000 }, { "epoch": 45.12, "eval_nq_accuracy": 0.537, "eval_nq_bleu_score": 12.37910922158707, "eval_nq_bleu_score_sem": 0.4889500774094668, "eval_nq_emb_cos_sim": 0.841946005821228, "eval_nq_emb_cos_sim_sem": 0.006509863268101507, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.110452651977539, "eval_nq_n_ngrams_match_1": 23.926, "eval_nq_n_ngrams_match_2": 8.974, "eval_nq_n_ngrams_match_3": 4.146, "eval_nq_num_pred_words": 49.174, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.251975712489681, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4646920390789369, "eval_nq_runtime": 11.9196, "eval_nq_samples_per_second": 41.948, "eval_nq_steps_per_second": 0.084, "eval_nq_token_set_f1": 0.47868285087664814, "eval_nq_token_set_f1_sem": 0.0046898240743526816, "eval_nq_token_set_precision": 0.43710833309811864, "eval_nq_token_set_recall": 0.5358058398409047, "eval_nq_true_num_tokens": 64.0, "step": 235000 }, { "epoch": 45.12, "learning_rate": 0.001, "loss": 2.4788, "step": 235008 }, { "epoch": 45.13, "learning_rate": 0.001, "loss": 2.4859, "step": 235020 }, { "epoch": 45.13, "learning_rate": 0.001, "loss": 2.4807, "step": 235032 }, { "epoch": 45.13, "learning_rate": 0.001, "loss": 2.4746, "step": 235044 }, { "epoch": 45.13, "learning_rate": 0.001, "loss": 2.4748, "step": 235056 }, { "epoch": 45.14, "learning_rate": 0.001, "loss": 2.4802, "step": 235068 }, { "epoch": 45.14, "learning_rate": 0.001, "loss": 2.4775, "step": 235080 }, { "epoch": 45.14, "learning_rate": 0.001, "loss": 2.4815, "step": 235092 }, { "epoch": 45.14, "learning_rate": 0.001, "loss": 2.4754, "step": 235104 }, { "epoch": 45.15, "learning_rate": 0.001, "loss": 2.4854, "step": 235116 }, { "epoch": 45.15, "learning_rate": 0.001, "loss": 2.4821, "step": 235128 }, { "epoch": 45.15, "learning_rate": 0.001, "loss": 2.4879, "step": 235140 }, { "epoch": 45.15, "learning_rate": 0.001, "loss": 2.4838, "step": 235152 }, { "epoch": 45.15, "learning_rate": 0.001, "loss": 2.4854, "step": 235164 }, { "epoch": 45.16, "learning_rate": 0.001, "loss": 2.4903, "step": 235176 }, { "epoch": 45.16, "learning_rate": 0.001, "loss": 2.493, "step": 235188 }, { "epoch": 45.16, "learning_rate": 0.001, "loss": 2.4809, "step": 235200 }, { "epoch": 45.16, "learning_rate": 0.001, "loss": 2.488, "step": 235212 }, { "epoch": 45.17, "learning_rate": 0.001, "loss": 2.4888, "step": 235224 }, { "epoch": 45.17, "learning_rate": 0.001, "loss": 2.4977, "step": 235236 }, { "epoch": 45.17, "learning_rate": 0.001, "loss": 2.4877, "step": 235248 }, { "epoch": 45.17, "learning_rate": 0.001, "loss": 2.4819, "step": 235260 }, { "epoch": 45.18, "learning_rate": 0.001, "loss": 2.4871, "step": 235272 }, { "epoch": 45.18, "learning_rate": 0.001, "loss": 2.4834, "step": 235284 }, { "epoch": 45.18, "learning_rate": 0.001, "loss": 2.4889, "step": 235296 }, { "epoch": 45.18, "learning_rate": 0.001, "loss": 2.474, "step": 235308 }, { "epoch": 45.18, "learning_rate": 0.001, "loss": 2.4851, "step": 235320 }, { "epoch": 45.19, "learning_rate": 0.001, "loss": 2.4804, "step": 235332 }, { "epoch": 45.19, "learning_rate": 0.001, "loss": 2.4821, "step": 235344 }, { "epoch": 45.19, "learning_rate": 0.001, "loss": 2.4839, "step": 235356 }, { "epoch": 45.19, "learning_rate": 0.001, "loss": 2.4788, "step": 235368 }, { "epoch": 45.2, "learning_rate": 0.001, "loss": 2.4817, "step": 235380 }, { "epoch": 45.2, "learning_rate": 0.001, "loss": 2.4835, "step": 235392 }, { "epoch": 45.2, "learning_rate": 0.001, "loss": 2.4819, "step": 235404 }, { "epoch": 45.2, "learning_rate": 0.001, "loss": 2.4852, "step": 235416 }, { "epoch": 45.21, "learning_rate": 0.001, "loss": 2.4818, "step": 235428 }, { "epoch": 45.21, "learning_rate": 0.001, "loss": 2.4874, "step": 235440 }, { "epoch": 45.21, "learning_rate": 0.001, "loss": 2.4866, "step": 235452 }, { "epoch": 45.21, "learning_rate": 0.001, "loss": 2.4863, "step": 235464 }, { "epoch": 45.21, "learning_rate": 0.001, "loss": 2.4895, "step": 235476 }, { "epoch": 45.22, "learning_rate": 0.001, "loss": 2.4828, "step": 235488 }, { "epoch": 45.22, "learning_rate": 0.001, "loss": 2.4958, "step": 235500 }, { "epoch": 45.22, "learning_rate": 0.001, "loss": 2.4881, "step": 235512 }, { "epoch": 45.22, "learning_rate": 0.001, "loss": 2.4933, "step": 235524 }, { "epoch": 45.23, "learning_rate": 0.001, "loss": 2.4851, "step": 235536 }, { "epoch": 45.23, "learning_rate": 0.001, "loss": 2.4861, "step": 235548 }, { "epoch": 45.23, "learning_rate": 0.001, "loss": 2.4716, "step": 235560 }, { "epoch": 45.23, "learning_rate": 0.001, "loss": 2.499, "step": 235572 }, { "epoch": 45.24, "learning_rate": 0.001, "loss": 2.4909, "step": 235584 }, { "epoch": 45.24, "learning_rate": 0.001, "loss": 2.4838, "step": 235596 }, { "epoch": 45.24, "learning_rate": 0.001, "loss": 2.4943, "step": 235608 }, { "epoch": 45.24, "learning_rate": 0.001, "loss": 2.4925, "step": 235620 }, { "epoch": 45.24, "eval_ag_news_accuracy": 0.3299375, "eval_ag_news_bleu_score": 4.866871206895734, "eval_ag_news_bleu_score_sem": 0.15012628398864336, "eval_ag_news_emb_cos_sim": 0.8280687928199768, "eval_ag_news_emb_cos_sim_sem": 0.006476843846471254, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4598886966705322, "eval_ag_news_n_ngrams_match_1": 14.544, "eval_ag_news_n_ngrams_match_2": 3.278, "eval_ag_news_n_ngrams_match_3": 0.896, "eval_ag_news_num_pred_words": 47.016, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.81343537632191, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3621607706413642, "eval_ag_news_runtime": 15.6836, "eval_ag_news_samples_per_second": 31.88, "eval_ag_news_steps_per_second": 0.064, "eval_ag_news_token_set_f1": 0.3610874182248644, "eval_ag_news_token_set_f1_sem": 0.004483335555714829, "eval_ag_news_token_set_precision": 0.34740491236900384, "eval_ag_news_token_set_recall": 0.39075741696767335, "eval_ag_news_true_num_tokens": 56.09375, "step": 235625 }, { "epoch": 45.24, "eval_anthropic_toxic_prompts_accuracy": 0.1161875, "eval_anthropic_toxic_prompts_bleu_score": 3.215478882411253, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12096969487830865, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.676385760307312, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009648151193992583, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.203469753265381, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.328, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.98, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.744, "eval_anthropic_toxic_prompts_num_pred_words": 47.256, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.61779987068487, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21867117862093788, "eval_anthropic_toxic_prompts_runtime": 25.1762, "eval_anthropic_toxic_prompts_samples_per_second": 19.86, "eval_anthropic_toxic_prompts_steps_per_second": 0.04, "eval_anthropic_toxic_prompts_token_set_f1": 0.36282506843024004, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006601876380974217, "eval_anthropic_toxic_prompts_token_set_precision": 0.4509802352407291, "eval_anthropic_toxic_prompts_token_set_recall": 0.3330317478483652, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 235625 }, { "epoch": 45.24, "eval_arxiv_accuracy": 0.35421875, "eval_arxiv_bleu_score": 4.614085119744084, "eval_arxiv_bleu_score_sem": 0.1369909901083598, "eval_arxiv_emb_cos_sim": 0.7892141938209534, "eval_arxiv_emb_cos_sim_sem": 0.00709441964206786, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3293392658233643, "eval_arxiv_n_ngrams_match_1": 15.962, "eval_arxiv_n_ngrams_match_2": 3.146, "eval_arxiv_n_ngrams_match_3": 0.728, "eval_arxiv_num_pred_words": 41.562, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.919887983207765, "eval_arxiv_pred_num_tokens": 62.9921875, "eval_arxiv_rouge_score": 0.37875755355305507, "eval_arxiv_runtime": 30.732, "eval_arxiv_samples_per_second": 16.27, "eval_arxiv_steps_per_second": 0.033, "eval_arxiv_token_set_f1": 0.3720818651494092, "eval_arxiv_token_set_f1_sem": 0.004215388835882899, "eval_arxiv_token_set_precision": 0.32606911438785263, "eval_arxiv_token_set_recall": 0.44864556790489124, "eval_arxiv_true_num_tokens": 64.0, "step": 235625 }, { "epoch": 45.24, "eval_python_code_alpaca_accuracy": 0.163, "eval_python_code_alpaca_bleu_score": 4.8233972354286525, "eval_python_code_alpaca_bleu_score_sem": 0.15316598434408954, "eval_python_code_alpaca_emb_cos_sim": 0.7797827124595642, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007125558919127823, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8484835624694824, "eval_python_code_alpaca_n_ngrams_match_1": 10.106, "eval_python_code_alpaca_n_ngrams_match_2": 3.172, "eval_python_code_alpaca_n_ngrams_match_3": 1.114, "eval_python_code_alpaca_num_pred_words": 44.832, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.261585866664173, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34161305540878206, "eval_python_code_alpaca_runtime": 11.5138, "eval_python_code_alpaca_samples_per_second": 43.426, "eval_python_code_alpaca_steps_per_second": 0.087, "eval_python_code_alpaca_token_set_f1": 0.4881126232561804, "eval_python_code_alpaca_token_set_f1_sem": 0.005361239903944323, "eval_python_code_alpaca_token_set_precision": 0.555132361923531, "eval_python_code_alpaca_token_set_recall": 0.4579293609304612, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 235625 }, { "epoch": 45.24, "eval_wikibio_accuracy": 0.33096875, "eval_wikibio_bleu_score": 6.508365084613505, "eval_wikibio_bleu_score_sem": 0.24532999105066583, "eval_wikibio_emb_cos_sim": 0.7584213614463806, "eval_wikibio_emb_cos_sim_sem": 0.007648820604469176, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6209194660186768, "eval_wikibio_n_ngrams_match_1": 10.314, "eval_wikibio_n_ngrams_match_2": 3.584, "eval_wikibio_n_ngrams_match_3": 1.384, "eval_wikibio_num_pred_words": 36.462, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.371914234645935, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36373669882380344, "eval_wikibio_runtime": 11.4701, "eval_wikibio_samples_per_second": 43.592, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.32613851056154686, "eval_wikibio_token_set_f1_sem": 0.005264163300303088, "eval_wikibio_token_set_precision": 0.33286115807209193, "eval_wikibio_token_set_recall": 0.33652744995122025, "eval_wikibio_true_num_tokens": 61.1328125, "step": 235625 }, { "epoch": 45.24, "eval_nq_accuracy": 0.538, "eval_nq_bleu_score": 12.257466558820232, "eval_nq_bleu_score_sem": 0.487990812104523, "eval_nq_emb_cos_sim": 0.8438543081283569, "eval_nq_emb_cos_sim_sem": 0.006167822494644871, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.108394145965576, "eval_nq_n_ngrams_match_1": 23.59, "eval_nq_n_ngrams_match_2": 8.822, "eval_nq_n_ngrams_match_3": 4.148, "eval_nq_num_pred_words": 49.062, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.235006442539254, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4587694171930715, "eval_nq_runtime": 11.6939, "eval_nq_samples_per_second": 42.757, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.47206982833067984, "eval_nq_token_set_f1_sem": 0.004924633875529909, "eval_nq_token_set_precision": 0.43000664181308274, "eval_nq_token_set_recall": 0.5308060014530428, "eval_nq_true_num_tokens": 64.0, "step": 235625 }, { "epoch": 45.24, "learning_rate": 0.001, "loss": 2.4919, "step": 235632 }, { "epoch": 45.25, "learning_rate": 0.001, "loss": 2.4822, "step": 235644 }, { "epoch": 45.25, "learning_rate": 0.001, "loss": 2.4856, "step": 235656 }, { "epoch": 45.25, "learning_rate": 0.001, "loss": 2.4891, "step": 235668 }, { "epoch": 45.25, "learning_rate": 0.001, "loss": 2.4814, "step": 235680 }, { "epoch": 45.26, "learning_rate": 0.001, "loss": 2.4845, "step": 235692 }, { "epoch": 45.26, "learning_rate": 0.001, "loss": 2.4844, "step": 235704 }, { "epoch": 45.26, "learning_rate": 0.001, "loss": 2.4836, "step": 235716 }, { "epoch": 45.26, "learning_rate": 0.001, "loss": 2.4827, "step": 235728 }, { "epoch": 45.26, "learning_rate": 0.001, "loss": 2.4809, "step": 235740 }, { "epoch": 45.27, "learning_rate": 0.001, "loss": 2.4819, "step": 235752 }, { "epoch": 45.27, "learning_rate": 0.001, "loss": 2.4901, "step": 235764 }, { "epoch": 45.27, "learning_rate": 0.001, "loss": 2.4906, "step": 235776 }, { "epoch": 45.27, "learning_rate": 0.001, "loss": 2.4875, "step": 235788 }, { "epoch": 45.28, "learning_rate": 0.001, "loss": 2.4869, "step": 235800 }, { "epoch": 45.28, "learning_rate": 0.001, "loss": 2.4962, "step": 235812 }, { "epoch": 45.28, "learning_rate": 0.001, "loss": 2.4897, "step": 235824 }, { "epoch": 45.28, "learning_rate": 0.001, "loss": 2.4947, "step": 235836 }, { "epoch": 45.29, "learning_rate": 0.001, "loss": 2.4911, "step": 235848 }, { "epoch": 45.29, "learning_rate": 0.001, "loss": 2.482, "step": 235860 }, { "epoch": 45.29, "learning_rate": 0.001, "loss": 2.4825, "step": 235872 }, { "epoch": 45.29, "learning_rate": 0.001, "loss": 2.4876, "step": 235884 }, { "epoch": 45.29, "learning_rate": 0.001, "loss": 2.4855, "step": 235896 }, { "epoch": 45.3, "learning_rate": 0.001, "loss": 2.4812, "step": 235908 }, { "epoch": 45.3, "learning_rate": 0.001, "loss": 2.4899, "step": 235920 }, { "epoch": 45.3, "learning_rate": 0.001, "loss": 2.4896, "step": 235932 }, { "epoch": 45.3, "learning_rate": 0.001, "loss": 2.4845, "step": 235944 }, { "epoch": 45.31, "learning_rate": 0.001, "loss": 2.4791, "step": 235956 }, { "epoch": 45.31, "learning_rate": 0.001, "loss": 2.4905, "step": 235968 }, { "epoch": 45.31, "learning_rate": 0.001, "loss": 2.4811, "step": 235980 }, { "epoch": 45.31, "learning_rate": 0.001, "loss": 2.4925, "step": 235992 }, { "epoch": 45.32, "learning_rate": 0.001, "loss": 2.4882, "step": 236004 }, { "epoch": 45.32, "learning_rate": 0.001, "loss": 2.4871, "step": 236016 }, { "epoch": 45.32, "learning_rate": 0.001, "loss": 2.4867, "step": 236028 }, { "epoch": 45.32, "learning_rate": 0.001, "loss": 2.4819, "step": 236040 }, { "epoch": 45.32, "learning_rate": 0.001, "loss": 2.4837, "step": 236052 }, { "epoch": 45.33, "learning_rate": 0.001, "loss": 2.4751, "step": 236064 }, { "epoch": 45.33, "learning_rate": 0.001, "loss": 2.4787, "step": 236076 }, { "epoch": 45.33, "learning_rate": 0.001, "loss": 2.4804, "step": 236088 }, { "epoch": 45.33, "learning_rate": 0.001, "loss": 2.4863, "step": 236100 }, { "epoch": 45.34, "learning_rate": 0.001, "loss": 2.4786, "step": 236112 }, { "epoch": 45.34, "learning_rate": 0.001, "loss": 2.4955, "step": 236124 }, { "epoch": 45.34, "learning_rate": 0.001, "loss": 2.484, "step": 236136 }, { "epoch": 45.34, "learning_rate": 0.001, "loss": 2.4922, "step": 236148 }, { "epoch": 45.35, "learning_rate": 0.001, "loss": 2.4986, "step": 236160 }, { "epoch": 45.35, "learning_rate": 0.001, "loss": 2.4801, "step": 236172 }, { "epoch": 45.35, "learning_rate": 0.001, "loss": 2.4868, "step": 236184 }, { "epoch": 45.35, "learning_rate": 0.001, "loss": 2.487, "step": 236196 }, { "epoch": 45.35, "learning_rate": 0.001, "loss": 2.4907, "step": 236208 }, { "epoch": 45.36, "learning_rate": 0.001, "loss": 2.4848, "step": 236220 }, { "epoch": 45.36, "learning_rate": 0.001, "loss": 2.4931, "step": 236232 }, { "epoch": 45.36, "learning_rate": 0.001, "loss": 2.4842, "step": 236244 }, { "epoch": 45.36, "eval_ag_news_accuracy": 0.32978125, "eval_ag_news_bleu_score": 5.068924985048963, "eval_ag_news_bleu_score_sem": 0.15804541170942332, "eval_ag_news_emb_cos_sim": 0.8181669116020203, "eval_ag_news_emb_cos_sim_sem": 0.007024785804338275, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4600605964660645, "eval_ag_news_n_ngrams_match_1": 14.576, "eval_ag_news_n_ngrams_match_2": 3.296, "eval_ag_news_n_ngrams_match_3": 0.94, "eval_ag_news_num_pred_words": 46.678, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.8189045694214, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3638274695582324, "eval_ag_news_runtime": 31.213, "eval_ag_news_samples_per_second": 16.019, "eval_ag_news_steps_per_second": 0.032, "eval_ag_news_token_set_f1": 0.36142355006688703, "eval_ag_news_token_set_f1_sem": 0.004617036902792799, "eval_ag_news_token_set_precision": 0.34859251880572, "eval_ag_news_token_set_recall": 0.38927184647075064, "eval_ag_news_true_num_tokens": 56.09375, "step": 236250 }, { "epoch": 45.36, "eval_anthropic_toxic_prompts_accuracy": 0.1159375, "eval_anthropic_toxic_prompts_bleu_score": 3.0716327682616535, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11298120935300243, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6829196214675903, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009644477832727056, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1737353801727295, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.306, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.892, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.696, "eval_anthropic_toxic_prompts_num_pred_words": 47.586, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.896580662145972, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2159726355983121, "eval_anthropic_toxic_prompts_runtime": 18.0997, "eval_anthropic_toxic_prompts_samples_per_second": 27.625, "eval_anthropic_toxic_prompts_steps_per_second": 0.055, "eval_anthropic_toxic_prompts_token_set_f1": 0.36047059793605396, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006363741253291872, "eval_anthropic_toxic_prompts_token_set_precision": 0.44671505151350177, "eval_anthropic_toxic_prompts_token_set_recall": 0.3306825404772978, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 236250 }, { "epoch": 45.36, "eval_arxiv_accuracy": 0.35440625, "eval_arxiv_bleu_score": 4.3774102065342735, "eval_arxiv_bleu_score_sem": 0.12293831713317518, "eval_arxiv_emb_cos_sim": 0.7836014032363892, "eval_arxiv_emb_cos_sim_sem": 0.007510873310748512, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.312004327774048, "eval_arxiv_n_ngrams_match_1": 15.278, "eval_arxiv_n_ngrams_match_2": 3.056, "eval_arxiv_n_ngrams_match_3": 0.686, "eval_arxiv_num_pred_words": 39.608, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.44006928379248, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3714682813779792, "eval_arxiv_runtime": 29.7371, "eval_arxiv_samples_per_second": 16.814, "eval_arxiv_steps_per_second": 0.034, "eval_arxiv_token_set_f1": 0.36171949803136166, "eval_arxiv_token_set_f1_sem": 0.004384357297562797, "eval_arxiv_token_set_precision": 0.3119897852176188, "eval_arxiv_token_set_recall": 0.4516782800865952, "eval_arxiv_true_num_tokens": 64.0, "step": 236250 }, { "epoch": 45.36, "eval_python_code_alpaca_accuracy": 0.16546875, "eval_python_code_alpaca_bleu_score": 4.833091059079826, "eval_python_code_alpaca_bleu_score_sem": 0.1498344748185783, "eval_python_code_alpaca_emb_cos_sim": 0.7737959027290344, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007713317795036496, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8216986656188965, "eval_python_code_alpaca_n_ngrams_match_1": 10.182, "eval_python_code_alpaca_n_ngrams_match_2": 3.098, "eval_python_code_alpaca_n_ngrams_match_3": 1.072, "eval_python_code_alpaca_num_pred_words": 43.948, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.805373149787687, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3482552589865845, "eval_python_code_alpaca_runtime": 16.6762, "eval_python_code_alpaca_samples_per_second": 29.983, "eval_python_code_alpaca_steps_per_second": 0.06, "eval_python_code_alpaca_token_set_f1": 0.4895694294231196, "eval_python_code_alpaca_token_set_f1_sem": 0.005327856918272031, "eval_python_code_alpaca_token_set_precision": 0.5576943403315965, "eval_python_code_alpaca_token_set_recall": 0.45703971465575227, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 236250 }, { "epoch": 45.36, "eval_wikibio_accuracy": 0.33278125, "eval_wikibio_bleu_score": 6.3777153332111896, "eval_wikibio_bleu_score_sem": 0.22310046634993044, "eval_wikibio_emb_cos_sim": 0.7548523545265198, "eval_wikibio_emb_cos_sim_sem": 0.008988898750663783, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6209940910339355, "eval_wikibio_n_ngrams_match_1": 10.256, "eval_wikibio_n_ngrams_match_2": 3.548, "eval_wikibio_n_ngrams_match_3": 1.36, "eval_wikibio_num_pred_words": 35.766, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.37470321837863, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3653736570756626, "eval_wikibio_runtime": 28.4822, "eval_wikibio_samples_per_second": 17.555, "eval_wikibio_steps_per_second": 0.035, "eval_wikibio_token_set_f1": 0.32625843237840907, "eval_wikibio_token_set_f1_sem": 0.005399370667335609, "eval_wikibio_token_set_precision": 0.3352714334272106, "eval_wikibio_token_set_recall": 0.33370784478189963, "eval_wikibio_true_num_tokens": 61.1328125, "step": 236250 }, { "epoch": 45.36, "eval_nq_accuracy": 0.53703125, "eval_nq_bleu_score": 12.334143957771436, "eval_nq_bleu_score_sem": 0.49047282629757294, "eval_nq_emb_cos_sim": 0.8425197005271912, "eval_nq_emb_cos_sim_sem": 0.006781060457567395, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1136088371276855, "eval_nq_n_ngrams_match_1": 23.728, "eval_nq_n_ngrams_match_2": 8.906, "eval_nq_n_ngrams_match_3": 4.138, "eval_nq_num_pred_words": 48.936, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.27806162001522, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4614856671972397, "eval_nq_runtime": 32.675, "eval_nq_samples_per_second": 15.302, "eval_nq_steps_per_second": 0.031, "eval_nq_token_set_f1": 0.4737944349817195, "eval_nq_token_set_f1_sem": 0.004902637958479992, "eval_nq_token_set_precision": 0.4331997859628718, "eval_nq_token_set_recall": 0.5303665878086978, "eval_nq_true_num_tokens": 64.0, "step": 236250 }, { "epoch": 45.36, "learning_rate": 0.001, "loss": 2.4883, "step": 236256 }, { "epoch": 45.37, "learning_rate": 0.001, "loss": 2.4955, "step": 236268 }, { "epoch": 45.37, "learning_rate": 0.001, "loss": 2.4886, "step": 236280 }, { "epoch": 45.37, "learning_rate": 0.001, "loss": 2.4907, "step": 236292 }, { "epoch": 45.37, "learning_rate": 0.001, "loss": 2.4933, "step": 236304 }, { "epoch": 45.38, "learning_rate": 0.001, "loss": 2.4869, "step": 236316 }, { "epoch": 45.38, "learning_rate": 0.001, "loss": 2.4822, "step": 236328 }, { "epoch": 45.38, "learning_rate": 0.001, "loss": 2.4824, "step": 236340 }, { "epoch": 45.38, "learning_rate": 0.001, "loss": 2.4777, "step": 236352 }, { "epoch": 45.38, "learning_rate": 0.001, "loss": 2.4907, "step": 236364 }, { "epoch": 45.39, "learning_rate": 0.001, "loss": 2.4866, "step": 236376 }, { "epoch": 45.39, "learning_rate": 0.001, "loss": 2.4931, "step": 236388 }, { "epoch": 45.39, "learning_rate": 0.001, "loss": 2.4798, "step": 236400 }, { "epoch": 45.39, "learning_rate": 0.001, "loss": 2.4899, "step": 236412 }, { "epoch": 45.4, "learning_rate": 0.001, "loss": 2.4784, "step": 236424 }, { "epoch": 45.4, "learning_rate": 0.001, "loss": 2.4932, "step": 236436 }, { "epoch": 45.4, "learning_rate": 0.001, "loss": 2.4896, "step": 236448 }, { "epoch": 45.4, "learning_rate": 0.001, "loss": 2.4953, "step": 236460 }, { "epoch": 45.41, "learning_rate": 0.001, "loss": 2.495, "step": 236472 }, { "epoch": 45.41, "learning_rate": 0.001, "loss": 2.4835, "step": 236484 }, { "epoch": 45.41, "learning_rate": 0.001, "loss": 2.4899, "step": 236496 }, { "epoch": 45.41, "learning_rate": 0.001, "loss": 2.4799, "step": 236508 }, { "epoch": 45.41, "learning_rate": 0.001, "loss": 2.485, "step": 236520 }, { "epoch": 45.42, "learning_rate": 0.001, "loss": 2.4854, "step": 236532 }, { "epoch": 45.42, "learning_rate": 0.001, "loss": 2.4851, "step": 236544 }, { "epoch": 45.42, "learning_rate": 0.001, "loss": 2.4802, "step": 236556 }, { "epoch": 45.42, "learning_rate": 0.001, "loss": 2.487, "step": 236568 }, { "epoch": 45.43, "learning_rate": 0.001, "loss": 2.4882, "step": 236580 }, { "epoch": 45.43, "learning_rate": 0.001, "loss": 2.485, "step": 236592 }, { "epoch": 45.43, "learning_rate": 0.001, "loss": 2.4892, "step": 236604 }, { "epoch": 45.43, "learning_rate": 0.001, "loss": 2.4808, "step": 236616 }, { "epoch": 45.44, "learning_rate": 0.001, "loss": 2.4784, "step": 236628 }, { "epoch": 45.44, "learning_rate": 0.001, "loss": 2.4956, "step": 236640 }, { "epoch": 45.44, "learning_rate": 0.001, "loss": 2.4813, "step": 236652 }, { "epoch": 45.44, "learning_rate": 0.001, "loss": 2.4867, "step": 236664 }, { "epoch": 45.44, "learning_rate": 0.001, "loss": 2.4829, "step": 236676 }, { "epoch": 45.45, "learning_rate": 0.001, "loss": 2.488, "step": 236688 }, { "epoch": 45.45, "learning_rate": 0.001, "loss": 2.4833, "step": 236700 }, { "epoch": 45.45, "learning_rate": 0.001, "loss": 2.4766, "step": 236712 }, { "epoch": 45.45, "learning_rate": 0.001, "loss": 2.4758, "step": 236724 }, { "epoch": 45.46, "learning_rate": 0.001, "loss": 2.4931, "step": 236736 }, { "epoch": 45.46, "learning_rate": 0.001, "loss": 2.4772, "step": 236748 }, { "epoch": 45.46, "learning_rate": 0.001, "loss": 2.4938, "step": 236760 }, { "epoch": 45.46, "learning_rate": 0.001, "loss": 2.4748, "step": 236772 }, { "epoch": 45.47, "learning_rate": 0.001, "loss": 2.4794, "step": 236784 }, { "epoch": 45.47, "learning_rate": 0.001, "loss": 2.486, "step": 236796 }, { "epoch": 45.47, "learning_rate": 0.001, "loss": 2.4882, "step": 236808 }, { "epoch": 45.47, "learning_rate": 0.001, "loss": 2.4885, "step": 236820 }, { "epoch": 45.47, "learning_rate": 0.001, "loss": 2.4894, "step": 236832 }, { "epoch": 45.48, "learning_rate": 0.001, "loss": 2.4848, "step": 236844 }, { "epoch": 45.48, "learning_rate": 0.001, "loss": 2.4822, "step": 236856 }, { "epoch": 45.48, "learning_rate": 0.001, "loss": 2.4808, "step": 236868 }, { "epoch": 45.48, "eval_ag_news_accuracy": 0.328125, "eval_ag_news_bleu_score": 5.1097284651362935, "eval_ag_news_bleu_score_sem": 0.16805408067037464, "eval_ag_news_emb_cos_sim": 0.8161610960960388, "eval_ag_news_emb_cos_sim_sem": 0.0076715825368430905, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.468269109725952, "eval_ag_news_n_ngrams_match_1": 14.522, "eval_ag_news_n_ngrams_match_2": 3.378, "eval_ag_news_n_ngrams_match_3": 0.988, "eval_ag_news_num_pred_words": 47.396, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.08116538258978, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3563040187253297, "eval_ag_news_runtime": 31.0775, "eval_ag_news_samples_per_second": 16.089, "eval_ag_news_steps_per_second": 0.032, "eval_ag_news_token_set_f1": 0.3605941653128626, "eval_ag_news_token_set_f1_sem": 0.00455381093278579, "eval_ag_news_token_set_precision": 0.34833227459101823, "eval_ag_news_token_set_recall": 0.3896397253423483, "eval_ag_news_true_num_tokens": 56.09375, "step": 236875 }, { "epoch": 45.48, "eval_anthropic_toxic_prompts_accuracy": 0.11646875, "eval_anthropic_toxic_prompts_bleu_score": 3.2666462887798327, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.16144987480701473, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.682634711265564, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009303319508911117, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.199036121368408, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.432, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.994, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764, "eval_anthropic_toxic_prompts_num_pred_words": 47.832, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.50889520793385, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21733901350708934, "eval_anthropic_toxic_prompts_runtime": 30.7501, "eval_anthropic_toxic_prompts_samples_per_second": 16.26, "eval_anthropic_toxic_prompts_steps_per_second": 0.033, "eval_anthropic_toxic_prompts_token_set_f1": 0.3660504748382527, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006690572454197111, "eval_anthropic_toxic_prompts_token_set_precision": 0.45446579938371107, "eval_anthropic_toxic_prompts_token_set_recall": 0.3332701357190309, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 236875 }, { "epoch": 45.48, "eval_arxiv_accuracy": 0.35603125, "eval_arxiv_bleu_score": 4.561718540765802, "eval_arxiv_bleu_score_sem": 0.12746920151424626, "eval_arxiv_emb_cos_sim": 0.7865849733352661, "eval_arxiv_emb_cos_sim_sem": 0.006162806361127566, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3262879848480225, "eval_arxiv_n_ngrams_match_1": 15.576, "eval_arxiv_n_ngrams_match_2": 3.254, "eval_arxiv_n_ngrams_match_3": 0.726, "eval_arxiv_num_pred_words": 41.548, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.834826399562722, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.369173905150433, "eval_arxiv_runtime": 34.3059, "eval_arxiv_samples_per_second": 14.575, "eval_arxiv_steps_per_second": 0.029, "eval_arxiv_token_set_f1": 0.3618765912173447, "eval_arxiv_token_set_f1_sem": 0.004027786625053291, "eval_arxiv_token_set_precision": 0.3170028173870082, "eval_arxiv_token_set_recall": 0.4401272965914524, "eval_arxiv_true_num_tokens": 64.0, "step": 236875 }, { "epoch": 45.48, "eval_python_code_alpaca_accuracy": 0.16403125, "eval_python_code_alpaca_bleu_score": 4.625687913046255, "eval_python_code_alpaca_bleu_score_sem": 0.14890315462892045, "eval_python_code_alpaca_emb_cos_sim": 0.7672336101531982, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00910012823515108, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8469605445861816, "eval_python_code_alpaca_n_ngrams_match_1": 10.018, "eval_python_code_alpaca_n_ngrams_match_2": 3.016, "eval_python_code_alpaca_n_ngrams_match_3": 1.036, "eval_python_code_alpaca_num_pred_words": 44.498, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.2353161723801, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3376399817632487, "eval_python_code_alpaca_runtime": 29.577, "eval_python_code_alpaca_samples_per_second": 16.905, "eval_python_code_alpaca_steps_per_second": 0.034, "eval_python_code_alpaca_token_set_f1": 0.48058938152673236, "eval_python_code_alpaca_token_set_f1_sem": 0.005517767124463283, "eval_python_code_alpaca_token_set_precision": 0.5518982416220253, "eval_python_code_alpaca_token_set_recall": 0.450023858065497, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 236875 }, { "epoch": 45.48, "eval_wikibio_accuracy": 0.3306875, "eval_wikibio_bleu_score": 6.324013838507522, "eval_wikibio_bleu_score_sem": 0.22019240291969452, "eval_wikibio_emb_cos_sim": 0.7524803876876831, "eval_wikibio_emb_cos_sim_sem": 0.008564025590448573, "eval_wikibio_emb_top1_equal": 0.25, "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.645080804824829, "eval_wikibio_n_ngrams_match_1": 10.508, "eval_wikibio_n_ngrams_match_2": 3.61, "eval_wikibio_n_ngrams_match_3": 1.368, "eval_wikibio_num_pred_words": 37.264, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.28586640929773, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3723037163215847, "eval_wikibio_runtime": 34.7075, "eval_wikibio_samples_per_second": 14.406, "eval_wikibio_steps_per_second": 0.029, "eval_wikibio_token_set_f1": 0.3284325087793525, "eval_wikibio_token_set_f1_sem": 0.0051377816417530935, "eval_wikibio_token_set_precision": 0.34115068068346904, "eval_wikibio_token_set_recall": 0.3300704913409777, "eval_wikibio_true_num_tokens": 61.1328125, "step": 236875 }, { "epoch": 45.48, "eval_nq_accuracy": 0.535875, "eval_nq_bleu_score": 12.032815460481608, "eval_nq_bleu_score_sem": 0.4761279300902094, "eval_nq_emb_cos_sim": 0.8346362113952637, "eval_nq_emb_cos_sim_sem": 0.006997409450596926, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1153182983398438, "eval_nq_n_ngrams_match_1": 23.618, "eval_nq_n_ngrams_match_2": 8.798, "eval_nq_n_ngrams_match_3": 4.058, "eval_nq_num_pred_words": 49.488, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.292224747475986, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4562308616159445, "eval_nq_runtime": 31.475, "eval_nq_samples_per_second": 15.886, "eval_nq_steps_per_second": 0.032, "eval_nq_token_set_f1": 0.47124612309718056, "eval_nq_token_set_f1_sem": 0.00486947120200252, "eval_nq_token_set_precision": 0.4312773553495812, "eval_nq_token_set_recall": 0.527375433761542, "eval_nq_true_num_tokens": 64.0, "step": 236875 }, { "epoch": 45.48, "learning_rate": 0.001, "loss": 2.4787, "step": 236880 }, { "epoch": 45.49, "learning_rate": 0.001, "loss": 2.4916, "step": 236892 }, { "epoch": 45.49, "learning_rate": 0.001, "loss": 2.4811, "step": 236904 }, { "epoch": 45.49, "learning_rate": 0.001, "loss": 2.4842, "step": 236916 }, { "epoch": 45.49, "learning_rate": 0.001, "loss": 2.4793, "step": 236928 }, { "epoch": 45.5, "learning_rate": 0.001, "loss": 2.4846, "step": 236940 }, { "epoch": 45.5, "learning_rate": 0.001, "loss": 2.49, "step": 236952 }, { "epoch": 45.5, "learning_rate": 0.001, "loss": 2.4864, "step": 236964 }, { "epoch": 45.5, "learning_rate": 0.001, "loss": 2.4921, "step": 236976 }, { "epoch": 45.5, "learning_rate": 0.001, "loss": 2.484, "step": 236988 }, { "epoch": 45.51, "learning_rate": 0.001, "loss": 2.4983, "step": 237000 }, { "epoch": 45.51, "learning_rate": 0.001, "loss": 2.4876, "step": 237012 }, { "epoch": 45.51, "learning_rate": 0.001, "loss": 2.491, "step": 237024 }, { "epoch": 45.51, "learning_rate": 0.001, "loss": 2.4896, "step": 237036 }, { "epoch": 45.52, "learning_rate": 0.001, "loss": 2.4811, "step": 237048 }, { "epoch": 45.52, "learning_rate": 0.001, "loss": 2.4859, "step": 237060 }, { "epoch": 45.52, "learning_rate": 0.001, "loss": 2.4926, "step": 237072 }, { "epoch": 45.52, "learning_rate": 0.001, "loss": 2.4826, "step": 237084 }, { "epoch": 45.53, "learning_rate": 0.001, "loss": 2.4941, "step": 237096 }, { "epoch": 45.53, "learning_rate": 0.001, "loss": 2.4891, "step": 237108 }, { "epoch": 45.53, "learning_rate": 0.001, "loss": 2.4905, "step": 237120 }, { "epoch": 45.53, "learning_rate": 0.001, "loss": 2.4751, "step": 237132 }, { "epoch": 45.53, "learning_rate": 0.001, "loss": 2.484, "step": 237144 }, { "epoch": 45.54, "learning_rate": 0.001, "loss": 2.4866, "step": 237156 }, { "epoch": 45.54, "learning_rate": 0.001, "loss": 2.4947, "step": 237168 }, { "epoch": 45.54, "learning_rate": 0.001, "loss": 2.4935, "step": 237180 }, { "epoch": 45.54, "learning_rate": 0.001, "loss": 2.4823, "step": 237192 }, { "epoch": 45.55, "learning_rate": 0.001, "loss": 2.4908, "step": 237204 }, { "epoch": 45.55, "learning_rate": 0.001, "loss": 2.4945, "step": 237216 }, { "epoch": 45.55, "learning_rate": 0.001, "loss": 2.4985, "step": 237228 }, { "epoch": 45.55, "learning_rate": 0.001, "loss": 2.4885, "step": 237240 }, { "epoch": 45.56, "learning_rate": 0.001, "loss": 2.4906, "step": 237252 }, { "epoch": 45.56, "learning_rate": 0.001, "loss": 2.474, "step": 237264 }, { "epoch": 45.56, "learning_rate": 0.001, "loss": 2.4884, "step": 237276 }, { "epoch": 45.56, "learning_rate": 0.001, "loss": 2.4914, "step": 237288 }, { "epoch": 45.56, "learning_rate": 0.001, "loss": 2.4933, "step": 237300 }, { "epoch": 45.57, "learning_rate": 0.001, "loss": 2.4891, "step": 237312 }, { "epoch": 45.57, "learning_rate": 0.001, "loss": 2.4779, "step": 237324 }, { "epoch": 45.57, "learning_rate": 0.001, "loss": 2.487, "step": 237336 }, { "epoch": 45.57, "learning_rate": 0.001, "loss": 2.4902, "step": 237348 }, { "epoch": 45.58, "learning_rate": 0.001, "loss": 2.4914, "step": 237360 }, { "epoch": 45.58, "learning_rate": 0.001, "loss": 2.4936, "step": 237372 }, { "epoch": 45.58, "learning_rate": 0.001, "loss": 2.496, "step": 237384 }, { "epoch": 45.58, "learning_rate": 0.001, "loss": 2.4947, "step": 237396 }, { "epoch": 45.59, "learning_rate": 0.001, "loss": 2.4909, "step": 237408 }, { "epoch": 45.59, "learning_rate": 0.001, "loss": 2.492, "step": 237420 }, { "epoch": 45.59, "learning_rate": 0.001, "loss": 2.4932, "step": 237432 }, { "epoch": 45.59, "learning_rate": 0.001, "loss": 2.4794, "step": 237444 }, { "epoch": 45.59, "learning_rate": 0.001, "loss": 2.491, "step": 237456 }, { "epoch": 45.6, "learning_rate": 0.001, "loss": 2.4889, "step": 237468 }, { "epoch": 45.6, "learning_rate": 0.001, "loss": 2.4904, "step": 237480 }, { "epoch": 45.6, "learning_rate": 0.001, "loss": 2.4964, "step": 237492 }, { "epoch": 45.6, "eval_ag_news_accuracy": 0.32959375, "eval_ag_news_bleu_score": 5.024724755706574, "eval_ag_news_bleu_score_sem": 0.16372791908545986, "eval_ag_news_emb_cos_sim": 0.8220397233963013, "eval_ag_news_emb_cos_sim_sem": 0.007606910173436648, "eval_ag_news_emb_top1_equal": 0.3203125, "eval_ag_news_emb_top1_equal_sem": 0.041403754790620424, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4633166790008545, "eval_ag_news_n_ngrams_match_1": 14.574, "eval_ag_news_n_ngrams_match_2": 3.316, "eval_ag_news_n_ngrams_match_3": 0.938, "eval_ag_news_num_pred_words": 47.258, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.922678405268474, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3594223922503628, "eval_ag_news_runtime": 32.0224, "eval_ag_news_samples_per_second": 15.614, "eval_ag_news_steps_per_second": 0.031, "eval_ag_news_token_set_f1": 0.36223142711624384, "eval_ag_news_token_set_f1_sem": 0.004453876575610987, "eval_ag_news_token_set_precision": 0.3497371965963449, "eval_ag_news_token_set_recall": 0.3900248051337052, "eval_ag_news_true_num_tokens": 56.09375, "step": 237500 }, { "epoch": 45.6, "eval_anthropic_toxic_prompts_accuracy": 0.11575, "eval_anthropic_toxic_prompts_bleu_score": 3.3184463357934466, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12615424755439342, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6881489753723145, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008676794751995533, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.189636707305908, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.494, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.064, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.802, "eval_anthropic_toxic_prompts_num_pred_words": 47.118, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.279605237471955, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2218234368893499, "eval_anthropic_toxic_prompts_runtime": 34.2193, "eval_anthropic_toxic_prompts_samples_per_second": 14.612, "eval_anthropic_toxic_prompts_steps_per_second": 0.029, "eval_anthropic_toxic_prompts_token_set_f1": 0.36779669154581185, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066226066988801305, "eval_anthropic_toxic_prompts_token_set_precision": 0.4557761434098778, "eval_anthropic_toxic_prompts_token_set_recall": 0.3352528239933192, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 237500 }, { "epoch": 45.6, "eval_arxiv_accuracy": 0.354125, "eval_arxiv_bleu_score": 4.635956601267533, "eval_arxiv_bleu_score_sem": 0.12833729885936038, "eval_arxiv_emb_cos_sim": 0.7911975383758545, "eval_arxiv_emb_cos_sim_sem": 0.006375492529603698, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3218994140625, "eval_arxiv_n_ngrams_match_1": 16.082, "eval_arxiv_n_ngrams_match_2": 3.234, "eval_arxiv_n_ngrams_match_3": 0.72, "eval_arxiv_num_pred_words": 41.972, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.712938945091185, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.38065290013941777, "eval_arxiv_runtime": 29.1833, "eval_arxiv_samples_per_second": 17.133, "eval_arxiv_steps_per_second": 0.034, "eval_arxiv_token_set_f1": 0.37468184081270856, "eval_arxiv_token_set_f1_sem": 0.004001955486649725, "eval_arxiv_token_set_precision": 0.3292246429652243, "eval_arxiv_token_set_recall": 0.4488915799943892, "eval_arxiv_true_num_tokens": 64.0, "step": 237500 }, { "epoch": 45.6, "eval_python_code_alpaca_accuracy": 0.163625, "eval_python_code_alpaca_bleu_score": 4.727852193410257, "eval_python_code_alpaca_bleu_score_sem": 0.14930282466611577, "eval_python_code_alpaca_emb_cos_sim": 0.7710949182510376, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008083269001830456, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8213469982147217, "eval_python_code_alpaca_n_ngrams_match_1": 10.098, "eval_python_code_alpaca_n_ngrams_match_2": 2.952, "eval_python_code_alpaca_n_ngrams_match_3": 1.01, "eval_python_code_alpaca_num_pred_words": 43.544, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.79946428687405, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3452332843968612, "eval_python_code_alpaca_runtime": 31.4166, "eval_python_code_alpaca_samples_per_second": 15.915, "eval_python_code_alpaca_steps_per_second": 0.032, "eval_python_code_alpaca_token_set_f1": 0.48932212231834404, "eval_python_code_alpaca_token_set_f1_sem": 0.005614974685638379, "eval_python_code_alpaca_token_set_precision": 0.5537253776682445, "eval_python_code_alpaca_token_set_recall": 0.4584430132902336, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 237500 }, { "epoch": 45.6, "eval_wikibio_accuracy": 0.3310625, "eval_wikibio_bleu_score": 6.056535468706676, "eval_wikibio_bleu_score_sem": 0.21373125210053123, "eval_wikibio_emb_cos_sim": 0.7506512999534607, "eval_wikibio_emb_cos_sim_sem": 0.00843996765894577, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6376712322235107, "eval_wikibio_n_ngrams_match_1": 10.076, "eval_wikibio_n_ngrams_match_2": 3.466, "eval_wikibio_n_ngrams_match_3": 1.282, "eval_wikibio_num_pred_words": 36.29, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.003232892403915, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3614314674627419, "eval_wikibio_runtime": 31.0212, "eval_wikibio_samples_per_second": 16.118, "eval_wikibio_steps_per_second": 0.032, "eval_wikibio_token_set_f1": 0.3177332711936968, "eval_wikibio_token_set_f1_sem": 0.005371096912022222, "eval_wikibio_token_set_precision": 0.3286910213631987, "eval_wikibio_token_set_recall": 0.32599899004879107, "eval_wikibio_true_num_tokens": 61.1328125, "step": 237500 }, { "epoch": 45.6, "eval_nq_accuracy": 0.53796875, "eval_nq_bleu_score": 12.326095005539692, "eval_nq_bleu_score_sem": 0.49644107675178234, "eval_nq_emb_cos_sim": 0.8375586271286011, "eval_nq_emb_cos_sim_sem": 0.00674963471364646, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.111186981201172, "eval_nq_n_ngrams_match_1": 23.75, "eval_nq_n_ngrams_match_2": 8.904, "eval_nq_n_ngrams_match_3": 4.176, "eval_nq_num_pred_words": 49.452, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.258037604848024, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4590653808449676, "eval_nq_runtime": 31.3621, "eval_nq_samples_per_second": 15.943, "eval_nq_steps_per_second": 0.032, "eval_nq_token_set_f1": 0.47191897728494864, "eval_nq_token_set_f1_sem": 0.005061002449395294, "eval_nq_token_set_precision": 0.43308139515979027, "eval_nq_token_set_recall": 0.5260267250353408, "eval_nq_true_num_tokens": 64.0, "step": 237500 }, { "epoch": 45.6, "learning_rate": 0.001, "loss": 2.4929, "step": 237504 }, { "epoch": 45.61, "learning_rate": 0.001, "loss": 2.4824, "step": 237516 }, { "epoch": 45.61, "learning_rate": 0.001, "loss": 2.4932, "step": 237528 }, { "epoch": 45.61, "learning_rate": 0.001, "loss": 2.4915, "step": 237540 }, { "epoch": 45.61, "learning_rate": 0.001, "loss": 2.4851, "step": 237552 }, { "epoch": 45.62, "learning_rate": 0.001, "loss": 2.4905, "step": 237564 }, { "epoch": 45.62, "learning_rate": 0.001, "loss": 2.4821, "step": 237576 }, { "epoch": 45.62, "learning_rate": 0.001, "loss": 2.4899, "step": 237588 }, { "epoch": 45.62, "learning_rate": 0.001, "loss": 2.4991, "step": 237600 }, { "epoch": 45.62, "learning_rate": 0.001, "loss": 2.4905, "step": 237612 }, { "epoch": 45.63, "learning_rate": 0.001, "loss": 2.5029, "step": 237624 }, { "epoch": 45.63, "learning_rate": 0.001, "loss": 2.4858, "step": 237636 }, { "epoch": 45.63, "learning_rate": 0.001, "loss": 2.4913, "step": 237648 }, { "epoch": 45.63, "learning_rate": 0.001, "loss": 2.4869, "step": 237660 }, { "epoch": 45.64, "learning_rate": 0.001, "loss": 2.4956, "step": 237672 }, { "epoch": 45.64, "learning_rate": 0.001, "loss": 2.4817, "step": 237684 }, { "epoch": 45.64, "learning_rate": 0.001, "loss": 2.4889, "step": 237696 }, { "epoch": 45.64, "learning_rate": 0.001, "loss": 2.4832, "step": 237708 }, { "epoch": 45.65, "learning_rate": 0.001, "loss": 2.4845, "step": 237720 }, { "epoch": 45.65, "learning_rate": 0.001, "loss": 2.4892, "step": 237732 }, { "epoch": 45.65, "learning_rate": 0.001, "loss": 2.4899, "step": 237744 }, { "epoch": 45.65, "learning_rate": 0.001, "loss": 2.479, "step": 237756 }, { "epoch": 45.65, "learning_rate": 0.001, "loss": 2.4853, "step": 237768 }, { "epoch": 45.66, "learning_rate": 0.001, "loss": 2.4815, "step": 237780 }, { "epoch": 45.66, "learning_rate": 0.001, "loss": 2.4762, "step": 237792 }, { "epoch": 45.66, "learning_rate": 0.001, "loss": 2.4832, "step": 237804 }, { "epoch": 45.66, "learning_rate": 0.001, "loss": 2.4902, "step": 237816 }, { "epoch": 45.67, "learning_rate": 0.001, "loss": 2.4893, "step": 237828 }, { "epoch": 45.67, "learning_rate": 0.001, "loss": 2.4899, "step": 237840 }, { "epoch": 45.67, "learning_rate": 0.001, "loss": 2.4943, "step": 237852 }, { "epoch": 45.67, "learning_rate": 0.001, "loss": 2.4892, "step": 237864 }, { "epoch": 45.68, "learning_rate": 0.001, "loss": 2.489, "step": 237876 }, { "epoch": 45.68, "learning_rate": 0.001, "loss": 2.4802, "step": 237888 }, { "epoch": 45.68, "learning_rate": 0.001, "loss": 2.49, "step": 237900 }, { "epoch": 45.68, "learning_rate": 0.001, "loss": 2.4895, "step": 237912 }, { "epoch": 45.68, "learning_rate": 0.001, "loss": 2.4958, "step": 237924 }, { "epoch": 45.69, "learning_rate": 0.001, "loss": 2.4946, "step": 237936 }, { "epoch": 45.69, "learning_rate": 0.001, "loss": 2.4976, "step": 237948 }, { "epoch": 45.69, "learning_rate": 0.001, "loss": 2.4858, "step": 237960 }, { "epoch": 45.69, "learning_rate": 0.001, "loss": 2.4816, "step": 237972 }, { "epoch": 45.7, "learning_rate": 0.001, "loss": 2.4798, "step": 237984 }, { "epoch": 45.7, "learning_rate": 0.001, "loss": 2.4954, "step": 237996 }, { "epoch": 45.7, "learning_rate": 0.001, "loss": 2.4966, "step": 238008 }, { "epoch": 45.7, "learning_rate": 0.001, "loss": 2.4916, "step": 238020 }, { "epoch": 45.71, "learning_rate": 0.001, "loss": 2.4789, "step": 238032 }, { "epoch": 45.71, "learning_rate": 0.001, "loss": 2.4875, "step": 238044 }, { "epoch": 45.71, "learning_rate": 0.001, "loss": 2.4875, "step": 238056 }, { "epoch": 45.71, "learning_rate": 0.001, "loss": 2.4952, "step": 238068 }, { "epoch": 45.71, "learning_rate": 0.001, "loss": 2.4918, "step": 238080 }, { "epoch": 45.72, "learning_rate": 0.001, "loss": 2.4809, "step": 238092 }, { "epoch": 45.72, "learning_rate": 0.001, "loss": 2.4882, "step": 238104 }, { "epoch": 45.72, "learning_rate": 0.001, "loss": 2.4864, "step": 238116 }, { "epoch": 45.72, "eval_ag_news_accuracy": 0.329125, "eval_ag_news_bleu_score": 5.044727813363567, "eval_ag_news_bleu_score_sem": 0.16370559943851612, "eval_ag_news_emb_cos_sim": 0.8225343823432922, "eval_ag_news_emb_cos_sim_sem": 0.006919853322436309, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4637818336486816, "eval_ag_news_n_ngrams_match_1": 14.444, "eval_ag_news_n_ngrams_match_2": 3.232, "eval_ag_news_n_ngrams_match_3": 0.948, "eval_ag_news_num_pred_words": 46.48, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.937530841571864, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36035109910329255, "eval_ag_news_runtime": 31.7509, "eval_ag_news_samples_per_second": 15.748, "eval_ag_news_steps_per_second": 0.031, "eval_ag_news_token_set_f1": 0.363937083695072, "eval_ag_news_token_set_f1_sem": 0.004495710950073818, "eval_ag_news_token_set_precision": 0.348181004304836, "eval_ag_news_token_set_recall": 0.3962901292141611, "eval_ag_news_true_num_tokens": 56.09375, "step": 238125 }, { "epoch": 45.72, "eval_anthropic_toxic_prompts_accuracy": 0.11725, "eval_anthropic_toxic_prompts_bleu_score": 3.2110720711957734, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12444476877967454, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6830450296401978, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008823951132115827, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.046875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.01875615101164758, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.176992654800415, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.388, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.026, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.754, "eval_anthropic_toxic_prompts_num_pred_words": 47.36, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.974545295203114, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21839579920788915, "eval_anthropic_toxic_prompts_runtime": 30.2496, "eval_anthropic_toxic_prompts_samples_per_second": 16.529, "eval_anthropic_toxic_prompts_steps_per_second": 0.033, "eval_anthropic_toxic_prompts_token_set_f1": 0.3659661144509881, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006548412992509525, "eval_anthropic_toxic_prompts_token_set_precision": 0.4506092923404888, "eval_anthropic_toxic_prompts_token_set_recall": 0.33653116794084625, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 238125 }, { "epoch": 45.72, "eval_arxiv_accuracy": 0.355875, "eval_arxiv_bleu_score": 4.533323625214181, "eval_arxiv_bleu_score_sem": 0.14178894150305668, "eval_arxiv_emb_cos_sim": 0.7814557552337646, "eval_arxiv_emb_cos_sim_sem": 0.007334771923868853, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3131489753723145, "eval_arxiv_n_ngrams_match_1": 15.604, "eval_arxiv_n_ngrams_match_2": 3.158, "eval_arxiv_n_ngrams_match_3": 0.712, "eval_arxiv_num_pred_words": 40.422, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.471496476293275, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3724556854309238, "eval_arxiv_runtime": 30.2812, "eval_arxiv_samples_per_second": 16.512, "eval_arxiv_steps_per_second": 0.033, "eval_arxiv_token_set_f1": 0.3681954355977995, "eval_arxiv_token_set_f1_sem": 0.004370250898434788, "eval_arxiv_token_set_precision": 0.3201643705610055, "eval_arxiv_token_set_recall": 0.4541018201444844, "eval_arxiv_true_num_tokens": 64.0, "step": 238125 }, { "epoch": 45.72, "eval_python_code_alpaca_accuracy": 0.1631875, "eval_python_code_alpaca_bleu_score": 4.9750378292134005, "eval_python_code_alpaca_bleu_score_sem": 0.16032895638455275, "eval_python_code_alpaca_emb_cos_sim": 0.7726291418075562, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007056011350728784, "eval_python_code_alpaca_emb_top1_equal": 0.1953125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.840923309326172, "eval_python_code_alpaca_n_ngrams_match_1": 9.964, "eval_python_code_alpaca_n_ngrams_match_2": 3.076, "eval_python_code_alpaca_n_ngrams_match_3": 1.094, "eval_python_code_alpaca_num_pred_words": 43.224, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.131575980932794, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3452292864192766, "eval_python_code_alpaca_runtime": 30.6634, "eval_python_code_alpaca_samples_per_second": 16.306, "eval_python_code_alpaca_steps_per_second": 0.033, "eval_python_code_alpaca_token_set_f1": 0.4896349754728187, "eval_python_code_alpaca_token_set_f1_sem": 0.005574803660132695, "eval_python_code_alpaca_token_set_precision": 0.5464009977536679, "eval_python_code_alpaca_token_set_recall": 0.46678427966126335, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 238125 }, { "epoch": 45.72, "eval_wikibio_accuracy": 0.33603125, "eval_wikibio_bleu_score": 6.1314882652978175, "eval_wikibio_bleu_score_sem": 0.21713200836686297, "eval_wikibio_emb_cos_sim": 0.7277990579605103, "eval_wikibio_emb_cos_sim_sem": 0.009776202539391791, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6152026653289795, "eval_wikibio_n_ngrams_match_1": 10.128, "eval_wikibio_n_ngrams_match_2": 3.418, "eval_wikibio_n_ngrams_match_3": 1.262, "eval_wikibio_num_pred_words": 36.118, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.15887597840511, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3583513249132447, "eval_wikibio_runtime": 30.1834, "eval_wikibio_samples_per_second": 16.565, "eval_wikibio_steps_per_second": 0.033, "eval_wikibio_token_set_f1": 0.3216099510643586, "eval_wikibio_token_set_f1_sem": 0.005447061873766101, "eval_wikibio_token_set_precision": 0.32827551551202994, "eval_wikibio_token_set_recall": 0.33427165559753935, "eval_wikibio_true_num_tokens": 61.1328125, "step": 238125 }, { "epoch": 45.72, "eval_nq_accuracy": 0.538875, "eval_nq_bleu_score": 12.250939046918806, "eval_nq_bleu_score_sem": 0.49319309581285903, "eval_nq_emb_cos_sim": 0.8406163454055786, "eval_nq_emb_cos_sim_sem": 0.00763260131178207, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1072921752929688, "eval_nq_n_ngrams_match_1": 23.52, "eval_nq_n_ngrams_match_2": 8.804, "eval_nq_n_ngrams_match_3": 4.114, "eval_nq_num_pred_words": 49.158, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.225936705160954, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4568604821623774, "eval_nq_runtime": 31.2032, "eval_nq_samples_per_second": 16.024, "eval_nq_steps_per_second": 0.032, "eval_nq_token_set_f1": 0.47205520438837756, "eval_nq_token_set_f1_sem": 0.005003613810279482, "eval_nq_token_set_precision": 0.4303367268046354, "eval_nq_token_set_recall": 0.5316028579508614, "eval_nq_true_num_tokens": 64.0, "step": 238125 }, { "epoch": 45.72, "learning_rate": 0.001, "loss": 2.4766, "step": 238128 }, { "epoch": 45.73, "learning_rate": 0.001, "loss": 2.4767, "step": 238140 }, { "epoch": 45.73, "learning_rate": 0.001, "loss": 2.4815, "step": 238152 }, { "epoch": 45.73, "learning_rate": 0.001, "loss": 2.4864, "step": 238164 }, { "epoch": 45.73, "learning_rate": 0.001, "loss": 2.4903, "step": 238176 }, { "epoch": 45.74, "learning_rate": 0.001, "loss": 2.4788, "step": 238188 }, { "epoch": 45.74, "learning_rate": 0.001, "loss": 2.4929, "step": 238200 }, { "epoch": 45.74, "learning_rate": 0.001, "loss": 2.4944, "step": 238212 }, { "epoch": 45.74, "learning_rate": 0.001, "loss": 2.4891, "step": 238224 }, { "epoch": 45.74, "learning_rate": 0.001, "loss": 2.491, "step": 238236 }, { "epoch": 45.75, "learning_rate": 0.001, "loss": 2.4825, "step": 238248 }, { "epoch": 45.75, "learning_rate": 0.001, "loss": 2.4758, "step": 238260 }, { "epoch": 45.75, "learning_rate": 0.001, "loss": 2.4924, "step": 238272 }, { "epoch": 45.75, "learning_rate": 0.001, "loss": 2.4942, "step": 238284 }, { "epoch": 45.76, "learning_rate": 0.001, "loss": 2.4873, "step": 238296 }, { "epoch": 45.76, "learning_rate": 0.001, "loss": 2.4919, "step": 238308 }, { "epoch": 45.76, "learning_rate": 0.001, "loss": 2.4863, "step": 238320 }, { "epoch": 45.76, "learning_rate": 0.001, "loss": 2.496, "step": 238332 }, { "epoch": 45.76, "learning_rate": 0.001, "loss": 2.4794, "step": 238344 }, { "epoch": 45.77, "learning_rate": 0.001, "loss": 2.4908, "step": 238356 }, { "epoch": 45.77, "learning_rate": 0.001, "loss": 2.4934, "step": 238368 }, { "epoch": 45.77, "learning_rate": 0.001, "loss": 2.4839, "step": 238380 }, { "epoch": 45.77, "learning_rate": 0.001, "loss": 2.4924, "step": 238392 }, { "epoch": 45.78, "learning_rate": 0.001, "loss": 2.4796, "step": 238404 }, { "epoch": 45.78, "learning_rate": 0.001, "loss": 2.4865, "step": 238416 }, { "epoch": 45.78, "learning_rate": 0.001, "loss": 2.4844, "step": 238428 }, { "epoch": 45.78, "learning_rate": 0.001, "loss": 2.485, "step": 238440 }, { "epoch": 45.79, "learning_rate": 0.001, "loss": 2.4876, "step": 238452 }, { "epoch": 45.79, "learning_rate": 0.001, "loss": 2.49, "step": 238464 }, { "epoch": 45.79, "learning_rate": 0.001, "loss": 2.4941, "step": 238476 }, { "epoch": 45.79, "learning_rate": 0.001, "loss": 2.4863, "step": 238488 }, { "epoch": 45.79, "learning_rate": 0.001, "loss": 2.4763, "step": 238500 }, { "epoch": 45.8, "learning_rate": 0.001, "loss": 2.497, "step": 238512 }, { "epoch": 45.8, "learning_rate": 0.001, "loss": 2.4874, "step": 238524 }, { "epoch": 45.8, "learning_rate": 0.001, "loss": 2.4845, "step": 238536 }, { "epoch": 45.8, "learning_rate": 0.001, "loss": 2.4968, "step": 238548 }, { "epoch": 45.81, "learning_rate": 0.001, "loss": 2.4809, "step": 238560 }, { "epoch": 45.81, "learning_rate": 0.001, "loss": 2.4876, "step": 238572 }, { "epoch": 45.81, "learning_rate": 0.001, "loss": 2.4834, "step": 238584 }, { "epoch": 45.81, "learning_rate": 0.001, "loss": 2.4874, "step": 238596 }, { "epoch": 45.82, "learning_rate": 0.001, "loss": 2.4814, "step": 238608 }, { "epoch": 45.82, "learning_rate": 0.001, "loss": 2.4894, "step": 238620 }, { "epoch": 45.82, "learning_rate": 0.001, "loss": 2.4761, "step": 238632 }, { "epoch": 45.82, "learning_rate": 0.001, "loss": 2.4984, "step": 238644 }, { "epoch": 45.82, "learning_rate": 0.001, "loss": 2.484, "step": 238656 }, { "epoch": 45.83, "learning_rate": 0.001, "loss": 2.4897, "step": 238668 }, { "epoch": 45.83, "learning_rate": 0.001, "loss": 2.4896, "step": 238680 }, { "epoch": 45.83, "learning_rate": 0.001, "loss": 2.4914, "step": 238692 }, { "epoch": 45.83, "learning_rate": 0.001, "loss": 2.5042, "step": 238704 }, { "epoch": 45.84, "learning_rate": 0.001, "loss": 2.4966, "step": 238716 }, { "epoch": 45.84, "learning_rate": 0.001, "loss": 2.498, "step": 238728 }, { "epoch": 45.84, "learning_rate": 0.001, "loss": 2.4987, "step": 238740 }, { "epoch": 45.84, "eval_ag_news_accuracy": 0.32865625, "eval_ag_news_bleu_score": 5.144768233041238, "eval_ag_news_bleu_score_sem": 0.1689205305001006, "eval_ag_news_emb_cos_sim": 0.8266169428825378, "eval_ag_news_emb_cos_sim_sem": 0.006205318701840391, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4586448669433594, "eval_ag_news_n_ngrams_match_1": 14.38, "eval_ag_news_n_ngrams_match_2": 3.27, "eval_ag_news_n_ngrams_match_3": 0.998, "eval_ag_news_num_pred_words": 47.13, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.77388947895721, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35822425798766766, "eval_ag_news_runtime": 31.9965, "eval_ag_news_samples_per_second": 15.627, "eval_ag_news_steps_per_second": 0.031, "eval_ag_news_token_set_f1": 0.35634377796755073, "eval_ag_news_token_set_f1_sem": 0.0042882442053454855, "eval_ag_news_token_set_precision": 0.3434862089996292, "eval_ag_news_token_set_recall": 0.38303931384841483, "eval_ag_news_true_num_tokens": 56.09375, "step": 238750 }, { "epoch": 45.84, "eval_anthropic_toxic_prompts_accuracy": 0.11740625, "eval_anthropic_toxic_prompts_bleu_score": 3.139026559988348, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11305576393985822, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6804046630859375, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009394784755624837, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1747403144836426, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.334, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.972, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.718, "eval_anthropic_toxic_prompts_num_pred_words": 47.528, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.92060722650429, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2172781156802377, "eval_anthropic_toxic_prompts_runtime": 30.1829, "eval_anthropic_toxic_prompts_samples_per_second": 16.566, "eval_anthropic_toxic_prompts_steps_per_second": 0.033, "eval_anthropic_toxic_prompts_token_set_f1": 0.364104661709084, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006646789025763911, "eval_anthropic_toxic_prompts_token_set_precision": 0.4455358200937167, "eval_anthropic_toxic_prompts_token_set_recall": 0.33682954824463635, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 238750 }, { "epoch": 45.84, "eval_arxiv_accuracy": 0.35353125, "eval_arxiv_bleu_score": 4.306459648463017, "eval_arxiv_bleu_score_sem": 0.12515299423561607, "eval_arxiv_emb_cos_sim": 0.7886142730712891, "eval_arxiv_emb_cos_sim_sem": 0.0062686746347753765, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.324554204940796, "eval_arxiv_n_ngrams_match_1": 15.324, "eval_arxiv_n_ngrams_match_2": 2.91, "eval_arxiv_n_ngrams_match_3": 0.644, "eval_arxiv_num_pred_words": 40.634, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.786608748305973, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36529843382010346, "eval_arxiv_runtime": 30.9706, "eval_arxiv_samples_per_second": 16.144, "eval_arxiv_steps_per_second": 0.032, "eval_arxiv_token_set_f1": 0.36171720253871614, "eval_arxiv_token_set_f1_sem": 0.004325681949219151, "eval_arxiv_token_set_precision": 0.31479686368326426, "eval_arxiv_token_set_recall": 0.4458654477597768, "eval_arxiv_true_num_tokens": 64.0, "step": 238750 }, { "epoch": 45.84, "eval_python_code_alpaca_accuracy": 0.163, "eval_python_code_alpaca_bleu_score": 4.416517719170178, "eval_python_code_alpaca_bleu_score_sem": 0.1373566021447754, "eval_python_code_alpaca_emb_cos_sim": 0.7562286257743835, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009416064304311283, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8365767002105713, "eval_python_code_alpaca_n_ngrams_match_1": 9.788, "eval_python_code_alpaca_n_ngrams_match_2": 2.744, "eval_python_code_alpaca_n_ngrams_match_3": 0.878, "eval_python_code_alpaca_num_pred_words": 43.366, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.057273315913395, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3363113499729391, "eval_python_code_alpaca_runtime": 29.6055, "eval_python_code_alpaca_samples_per_second": 16.889, "eval_python_code_alpaca_steps_per_second": 0.034, "eval_python_code_alpaca_token_set_f1": 0.4783706046645508, "eval_python_code_alpaca_token_set_f1_sem": 0.005271790732172327, "eval_python_code_alpaca_token_set_precision": 0.5333043457211538, "eval_python_code_alpaca_token_set_recall": 0.45581275469665694, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 238750 }, { "epoch": 45.84, "eval_wikibio_accuracy": 0.32853125, "eval_wikibio_bleu_score": 6.255304883834685, "eval_wikibio_bleu_score_sem": 0.2265203061591911, "eval_wikibio_emb_cos_sim": 0.746715247631073, "eval_wikibio_emb_cos_sim_sem": 0.009717119243224598, "eval_wikibio_emb_top1_equal": 0.1796875, "eval_wikibio_emb_top1_equal_sem": 0.034068008879424266, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6414806842803955, "eval_wikibio_n_ngrams_match_1": 10.364, "eval_wikibio_n_ngrams_match_2": 3.564, "eval_wikibio_n_ngrams_match_3": 1.336, "eval_wikibio_num_pred_words": 36.364, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.1482804866323, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36748032006631687, "eval_wikibio_runtime": 31.2042, "eval_wikibio_samples_per_second": 16.023, "eval_wikibio_steps_per_second": 0.032, "eval_wikibio_token_set_f1": 0.3276415903672124, "eval_wikibio_token_set_f1_sem": 0.005572812305989724, "eval_wikibio_token_set_precision": 0.3379065389037104, "eval_wikibio_token_set_recall": 0.3324337132562616, "eval_wikibio_true_num_tokens": 61.1328125, "step": 238750 }, { "epoch": 45.84, "eval_nq_accuracy": 0.537, "eval_nq_bleu_score": 12.073843240487765, "eval_nq_bleu_score_sem": 0.48933742638335503, "eval_nq_emb_cos_sim": 0.8400126695632935, "eval_nq_emb_cos_sim_sem": 0.007382467668404009, "eval_nq_emb_top1_equal": 0.3671875, "eval_nq_emb_top1_equal_sem": 0.04277397517748991, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.109062433242798, "eval_nq_n_ngrams_match_1": 23.658, "eval_nq_n_ngrams_match_2": 8.738, "eval_nq_n_ngrams_match_3": 4.064, "eval_nq_num_pred_words": 49.174, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.240511631891755, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4597881343163798, "eval_nq_runtime": 32.0715, "eval_nq_samples_per_second": 15.59, "eval_nq_steps_per_second": 0.031, "eval_nq_token_set_f1": 0.472616888432545, "eval_nq_token_set_f1_sem": 0.004883805672172423, "eval_nq_token_set_precision": 0.4310450081246856, "eval_nq_token_set_recall": 0.5323319177726178, "eval_nq_true_num_tokens": 64.0, "step": 238750 }, { "epoch": 45.84, "learning_rate": 0.001, "loss": 2.4947, "step": 238752 }, { "epoch": 45.85, "learning_rate": 0.001, "loss": 2.4935, "step": 238764 }, { "epoch": 45.85, "learning_rate": 0.001, "loss": 2.4894, "step": 238776 }, { "epoch": 45.85, "learning_rate": 0.001, "loss": 2.488, "step": 238788 }, { "epoch": 45.85, "learning_rate": 0.001, "loss": 2.4948, "step": 238800 }, { "epoch": 45.85, "learning_rate": 0.001, "loss": 2.4979, "step": 238812 }, { "epoch": 45.86, "learning_rate": 0.001, "loss": 2.4905, "step": 238824 }, { "epoch": 45.86, "learning_rate": 0.001, "loss": 2.4892, "step": 238836 }, { "epoch": 45.86, "learning_rate": 0.001, "loss": 2.4985, "step": 238848 }, { "epoch": 45.86, "learning_rate": 0.001, "loss": 2.4908, "step": 238860 }, { "epoch": 45.87, "learning_rate": 0.001, "loss": 2.4916, "step": 238872 }, { "epoch": 45.87, "learning_rate": 0.001, "loss": 2.4783, "step": 238884 }, { "epoch": 45.87, "learning_rate": 0.001, "loss": 2.4862, "step": 238896 }, { "epoch": 45.87, "learning_rate": 0.001, "loss": 2.4807, "step": 238908 }, { "epoch": 45.88, "learning_rate": 0.001, "loss": 2.4913, "step": 238920 }, { "epoch": 45.88, "learning_rate": 0.001, "loss": 2.4861, "step": 238932 }, { "epoch": 45.88, "learning_rate": 0.001, "loss": 2.492, "step": 238944 }, { "epoch": 45.88, "learning_rate": 0.001, "loss": 2.4942, "step": 238956 }, { "epoch": 45.88, "learning_rate": 0.001, "loss": 2.4919, "step": 238968 }, { "epoch": 45.89, "learning_rate": 0.001, "loss": 2.4877, "step": 238980 }, { "epoch": 45.89, "learning_rate": 0.001, "loss": 2.4911, "step": 238992 }, { "epoch": 45.89, "learning_rate": 0.001, "loss": 2.4865, "step": 239004 }, { "epoch": 45.89, "learning_rate": 0.001, "loss": 2.493, "step": 239016 }, { "epoch": 45.9, "learning_rate": 0.001, "loss": 2.487, "step": 239028 }, { "epoch": 45.9, "learning_rate": 0.001, "loss": 2.4902, "step": 239040 }, { "epoch": 45.9, "learning_rate": 0.001, "loss": 2.4907, "step": 239052 }, { "epoch": 45.9, "learning_rate": 0.001, "loss": 2.4936, "step": 239064 }, { "epoch": 45.91, "learning_rate": 0.001, "loss": 2.493, "step": 239076 }, { "epoch": 45.91, "learning_rate": 0.001, "loss": 2.489, "step": 239088 }, { "epoch": 45.91, "learning_rate": 0.001, "loss": 2.5019, "step": 239100 }, { "epoch": 45.91, "learning_rate": 0.001, "loss": 2.4987, "step": 239112 }, { "epoch": 45.91, "learning_rate": 0.001, "loss": 2.5033, "step": 239124 }, { "epoch": 45.92, "learning_rate": 0.001, "loss": 2.5049, "step": 239136 }, { "epoch": 45.92, "learning_rate": 0.001, "loss": 2.4882, "step": 239148 }, { "epoch": 45.92, "learning_rate": 0.001, "loss": 2.4871, "step": 239160 }, { "epoch": 45.92, "learning_rate": 0.001, "loss": 2.492, "step": 239172 }, { "epoch": 45.93, "learning_rate": 0.001, "loss": 2.478, "step": 239184 }, { "epoch": 45.93, "learning_rate": 0.001, "loss": 2.4837, "step": 239196 }, { "epoch": 45.93, "learning_rate": 0.001, "loss": 2.4894, "step": 239208 }, { "epoch": 45.93, "learning_rate": 0.001, "loss": 2.4907, "step": 239220 }, { "epoch": 45.94, "learning_rate": 0.001, "loss": 2.4902, "step": 239232 }, { "epoch": 45.94, "learning_rate": 0.001, "loss": 2.4815, "step": 239244 }, { "epoch": 45.94, "learning_rate": 0.001, "loss": 2.4972, "step": 239256 }, { "epoch": 45.94, "learning_rate": 0.001, "loss": 2.4933, "step": 239268 }, { "epoch": 45.94, "learning_rate": 0.001, "loss": 2.4836, "step": 239280 }, { "epoch": 45.95, "learning_rate": 0.001, "loss": 2.4921, "step": 239292 }, { "epoch": 45.95, "learning_rate": 0.001, "loss": 2.4972, "step": 239304 }, { "epoch": 45.95, "learning_rate": 0.001, "loss": 2.4968, "step": 239316 }, { "epoch": 45.95, "learning_rate": 0.001, "loss": 2.4947, "step": 239328 }, { "epoch": 45.96, "learning_rate": 0.001, "loss": 2.4891, "step": 239340 }, { "epoch": 45.96, "learning_rate": 0.001, "loss": 2.4978, "step": 239352 }, { "epoch": 45.96, "learning_rate": 0.001, "loss": 2.4896, "step": 239364 }, { "epoch": 45.96, "eval_ag_news_accuracy": 0.32871875, "eval_ag_news_bleu_score": 4.9399026382671485, "eval_ag_news_bleu_score_sem": 0.1510686819332423, "eval_ag_news_emb_cos_sim": 0.8162962198257446, "eval_ag_news_emb_cos_sim_sem": 0.007395716266815606, "eval_ag_news_emb_top1_equal": 0.328125, "eval_ag_news_emb_top1_equal_sem": 0.041664103776406315, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4671449661254883, "eval_ag_news_n_ngrams_match_1": 14.348, "eval_ag_news_n_ngrams_match_2": 3.3, "eval_ag_news_n_ngrams_match_3": 0.938, "eval_ag_news_num_pred_words": 46.87, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.045121808701666, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3576900450423325, "eval_ag_news_runtime": 31.3477, "eval_ag_news_samples_per_second": 15.95, "eval_ag_news_steps_per_second": 0.032, "eval_ag_news_token_set_f1": 0.35768096360388313, "eval_ag_news_token_set_f1_sem": 0.004428705720902338, "eval_ag_news_token_set_precision": 0.34337172895039436, "eval_ag_news_token_set_recall": 0.38905105075725327, "eval_ag_news_true_num_tokens": 56.09375, "step": 239375 }, { "epoch": 45.96, "eval_anthropic_toxic_prompts_accuracy": 0.11665625, "eval_anthropic_toxic_prompts_bleu_score": 3.2676324815211477, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11991438332392085, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6874958276748657, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008322838274211456, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.195629358291626, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.318, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.976, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.748, "eval_anthropic_toxic_prompts_num_pred_words": 46.32, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.42554127285827, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22266523439444974, "eval_anthropic_toxic_prompts_runtime": 28.1126, "eval_anthropic_toxic_prompts_samples_per_second": 17.786, "eval_anthropic_toxic_prompts_steps_per_second": 0.036, "eval_anthropic_toxic_prompts_token_set_f1": 0.3631631838023226, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006377495720521091, "eval_anthropic_toxic_prompts_token_set_precision": 0.44728210919074557, "eval_anthropic_toxic_prompts_token_set_recall": 0.33138813950909785, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 239375 }, { "epoch": 45.96, "eval_arxiv_accuracy": 0.3506875, "eval_arxiv_bleu_score": 4.445403743904117, "eval_arxiv_bleu_score_sem": 0.1331901707372557, "eval_arxiv_emb_cos_sim": 0.7852582335472107, "eval_arxiv_emb_cos_sim_sem": 0.0062486166861512385, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.329124689102173, "eval_arxiv_n_ngrams_match_1": 15.5, "eval_arxiv_n_ngrams_match_2": 3.076, "eval_arxiv_n_ngrams_match_3": 0.676, "eval_arxiv_num_pred_words": 40.63, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.91389766790239, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36933122511739414, "eval_arxiv_runtime": 24.5123, "eval_arxiv_samples_per_second": 20.398, "eval_arxiv_steps_per_second": 0.041, "eval_arxiv_token_set_f1": 0.364780382769738, "eval_arxiv_token_set_f1_sem": 0.004452125811279962, "eval_arxiv_token_set_precision": 0.3159489690695461, "eval_arxiv_token_set_recall": 0.44893671958103903, "eval_arxiv_true_num_tokens": 64.0, "step": 239375 }, { "epoch": 45.96, "eval_python_code_alpaca_accuracy": 0.1626875, "eval_python_code_alpaca_bleu_score": 4.608845837247555, "eval_python_code_alpaca_bleu_score_sem": 0.1422032556489837, "eval_python_code_alpaca_emb_cos_sim": 0.7490547895431519, "eval_python_code_alpaca_emb_cos_sim_sem": 0.011754896978213053, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.83894419670105, "eval_python_code_alpaca_n_ngrams_match_1": 9.772, "eval_python_code_alpaca_n_ngrams_match_2": 2.824, "eval_python_code_alpaca_n_ngrams_match_3": 0.928, "eval_python_code_alpaca_num_pred_words": 42.048, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.097704191719522, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3420207305899004, "eval_python_code_alpaca_runtime": 15.6332, "eval_python_code_alpaca_samples_per_second": 31.983, "eval_python_code_alpaca_steps_per_second": 0.064, "eval_python_code_alpaca_token_set_f1": 0.48195669409699216, "eval_python_code_alpaca_token_set_f1_sem": 0.0056963555204259594, "eval_python_code_alpaca_token_set_precision": 0.5368215408074446, "eval_python_code_alpaca_token_set_recall": 0.4551674891904531, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 239375 }, { "epoch": 45.96, "eval_wikibio_accuracy": 0.3286875, "eval_wikibio_bleu_score": 6.424721395278257, "eval_wikibio_bleu_score_sem": 0.23062238420257883, "eval_wikibio_emb_cos_sim": 0.737343430519104, "eval_wikibio_emb_cos_sim_sem": 0.010036058933885709, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6401028633117676, "eval_wikibio_n_ngrams_match_1": 10.264, "eval_wikibio_n_ngrams_match_2": 3.546, "eval_wikibio_n_ngrams_match_3": 1.348, "eval_wikibio_num_pred_words": 35.836, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.095755179405025, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36338410457838255, "eval_wikibio_runtime": 23.3936, "eval_wikibio_samples_per_second": 21.373, "eval_wikibio_steps_per_second": 0.043, "eval_wikibio_token_set_f1": 0.3260010040556651, "eval_wikibio_token_set_f1_sem": 0.0056159329637420615, "eval_wikibio_token_set_precision": 0.33366631613476777, "eval_wikibio_token_set_recall": 0.33407662882375644, "eval_wikibio_true_num_tokens": 61.1328125, "step": 239375 }, { "epoch": 45.96, "eval_nq_accuracy": 0.53828125, "eval_nq_bleu_score": 12.107406644099097, "eval_nq_bleu_score_sem": 0.4878146134150134, "eval_nq_emb_cos_sim": 0.8405568599700928, "eval_nq_emb_cos_sim_sem": 0.006912360403024481, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1115565299987793, "eval_nq_n_ngrams_match_1": 23.364, "eval_nq_n_ngrams_match_2": 8.69, "eval_nq_n_ngrams_match_3": 4.044, "eval_nq_num_pred_words": 48.828, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.261089916669839, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4556274129056221, "eval_nq_runtime": 29.0195, "eval_nq_samples_per_second": 17.23, "eval_nq_steps_per_second": 0.034, "eval_nq_token_set_f1": 0.4671490124111569, "eval_nq_token_set_f1_sem": 0.00491704582686669, "eval_nq_token_set_precision": 0.42362585087010984, "eval_nq_token_set_recall": 0.5289978632145839, "eval_nq_true_num_tokens": 64.0, "step": 239375 }, { "epoch": 45.96, "learning_rate": 0.001, "loss": 2.4825, "step": 239376 }, { "epoch": 45.97, "learning_rate": 0.001, "loss": 2.4757, "step": 239388 }, { "epoch": 45.97, "learning_rate": 0.001, "loss": 2.492, "step": 239400 }, { "epoch": 45.97, "learning_rate": 0.001, "loss": 2.4927, "step": 239412 }, { "epoch": 45.97, "learning_rate": 0.001, "loss": 2.4928, "step": 239424 }, { "epoch": 45.97, "learning_rate": 0.001, "loss": 2.4825, "step": 239436 }, { "epoch": 45.98, "learning_rate": 0.001, "loss": 2.4862, "step": 239448 }, { "epoch": 45.98, "learning_rate": 0.001, "loss": 2.4944, "step": 239460 }, { "epoch": 45.98, "learning_rate": 0.001, "loss": 2.4825, "step": 239472 }, { "epoch": 45.98, "learning_rate": 0.001, "loss": 2.4918, "step": 239484 }, { "epoch": 45.99, "learning_rate": 0.001, "loss": 2.4906, "step": 239496 }, { "epoch": 45.99, "learning_rate": 0.001, "loss": 2.4974, "step": 239508 }, { "epoch": 45.99, "learning_rate": 0.001, "loss": 2.4966, "step": 239520 }, { "epoch": 45.99, "learning_rate": 0.001, "loss": 2.4885, "step": 239532 }, { "epoch": 46.0, "learning_rate": 0.001, "loss": 2.4956, "step": 239544 }, { "epoch": 46.0, "learning_rate": 0.001, "loss": 2.4781, "step": 239556 }, { "epoch": 46.0, "learning_rate": 0.001, "loss": 2.4887, "step": 239568 }, { "epoch": 46.0, "learning_rate": 0.001, "loss": 2.4649, "step": 239580 }, { "epoch": 46.0, "learning_rate": 0.001, "loss": 2.4821, "step": 239592 }, { "epoch": 46.01, "learning_rate": 0.001, "loss": 2.4719, "step": 239604 }, { "epoch": 46.01, "learning_rate": 0.001, "loss": 2.4766, "step": 239616 }, { "epoch": 46.01, "learning_rate": 0.001, "loss": 2.4793, "step": 239628 }, { "epoch": 46.01, "learning_rate": 0.001, "loss": 2.475, "step": 239640 }, { "epoch": 46.02, "learning_rate": 0.001, "loss": 2.4732, "step": 239652 }, { "epoch": 46.02, "learning_rate": 0.001, "loss": 2.4701, "step": 239664 }, { "epoch": 46.02, "learning_rate": 0.001, "loss": 2.4635, "step": 239676 }, { "epoch": 46.02, "learning_rate": 0.001, "loss": 2.4661, "step": 239688 }, { "epoch": 46.03, "learning_rate": 0.001, "loss": 2.4822, "step": 239700 }, { "epoch": 46.03, "learning_rate": 0.001, "loss": 2.4774, "step": 239712 }, { "epoch": 46.03, "learning_rate": 0.001, "loss": 2.4766, "step": 239724 }, { "epoch": 46.03, "learning_rate": 0.001, "loss": 2.4806, "step": 239736 }, { "epoch": 46.03, "learning_rate": 0.001, "loss": 2.4765, "step": 239748 }, { "epoch": 46.04, "learning_rate": 0.001, "loss": 2.4727, "step": 239760 }, { "epoch": 46.04, "learning_rate": 0.001, "loss": 2.4691, "step": 239772 }, { "epoch": 46.04, "learning_rate": 0.001, "loss": 2.4654, "step": 239784 }, { "epoch": 46.04, "learning_rate": 0.001, "loss": 2.4711, "step": 239796 }, { "epoch": 46.05, "learning_rate": 0.001, "loss": 2.4751, "step": 239808 }, { "epoch": 46.05, "learning_rate": 0.001, "loss": 2.4877, "step": 239820 }, { "epoch": 46.05, "learning_rate": 0.001, "loss": 2.4794, "step": 239832 }, { "epoch": 46.05, "learning_rate": 0.001, "loss": 2.4694, "step": 239844 }, { "epoch": 46.06, "learning_rate": 0.001, "loss": 2.4722, "step": 239856 }, { "epoch": 46.06, "learning_rate": 0.001, "loss": 2.4858, "step": 239868 }, { "epoch": 46.06, "learning_rate": 0.001, "loss": 2.4742, "step": 239880 }, { "epoch": 46.06, "learning_rate": 0.001, "loss": 2.4774, "step": 239892 }, { "epoch": 46.06, "learning_rate": 0.001, "loss": 2.4725, "step": 239904 }, { "epoch": 46.07, "learning_rate": 0.001, "loss": 2.4771, "step": 239916 }, { "epoch": 46.07, "learning_rate": 0.001, "loss": 2.4736, "step": 239928 }, { "epoch": 46.07, "learning_rate": 0.001, "loss": 2.4808, "step": 239940 }, { "epoch": 46.07, "learning_rate": 0.001, "loss": 2.4802, "step": 239952 }, { "epoch": 46.08, "learning_rate": 0.001, "loss": 2.4843, "step": 239964 }, { "epoch": 46.08, "learning_rate": 0.001, "loss": 2.4763, "step": 239976 }, { "epoch": 46.08, "learning_rate": 0.001, "loss": 2.4768, "step": 239988 }, { "epoch": 46.08, "learning_rate": 0.001, "loss": 2.4736, "step": 240000 }, { "epoch": 46.08, "eval_ag_news_accuracy": 0.329625, "eval_ag_news_bleu_score": 4.809710955415112, "eval_ag_news_bleu_score_sem": 0.14954303122333842, "eval_ag_news_emb_cos_sim": 0.822744607925415, "eval_ag_news_emb_cos_sim_sem": 0.006417004541208896, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.453580141067505, "eval_ag_news_n_ngrams_match_1": 14.464, "eval_ag_news_n_ngrams_match_2": 3.244, "eval_ag_news_n_ngrams_match_3": 0.858, "eval_ag_news_num_pred_words": 46.574, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.61337027474918, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3610593843406519, "eval_ag_news_runtime": 30.2345, "eval_ag_news_samples_per_second": 16.537, "eval_ag_news_steps_per_second": 0.033, "eval_ag_news_token_set_f1": 0.36150224937473874, "eval_ag_news_token_set_f1_sem": 0.004347056964809516, "eval_ag_news_token_set_precision": 0.34791071771012694, "eval_ag_news_token_set_recall": 0.3912455918213762, "eval_ag_news_true_num_tokens": 56.09375, "step": 240000 }, { "epoch": 46.08, "eval_anthropic_toxic_prompts_accuracy": 0.11715625, "eval_anthropic_toxic_prompts_bleu_score": 3.19502595225729, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11500448056937196, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6891922950744629, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008879860902299053, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1659159660339355, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.318, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758, "eval_anthropic_toxic_prompts_num_pred_words": 47.53, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.71045205814094, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21694025716983395, "eval_anthropic_toxic_prompts_runtime": 27.7102, "eval_anthropic_toxic_prompts_samples_per_second": 18.044, "eval_anthropic_toxic_prompts_steps_per_second": 0.036, "eval_anthropic_toxic_prompts_token_set_f1": 0.35889453499554247, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006317587143558395, "eval_anthropic_toxic_prompts_token_set_precision": 0.4417790410242109, "eval_anthropic_toxic_prompts_token_set_recall": 0.3290858249843667, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 240000 }, { "epoch": 46.08, "eval_arxiv_accuracy": 0.35415625, "eval_arxiv_bleu_score": 4.537296894632079, "eval_arxiv_bleu_score_sem": 0.12948589511221464, "eval_arxiv_emb_cos_sim": 0.786674976348877, "eval_arxiv_emb_cos_sim_sem": 0.006466060180153037, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.330310821533203, "eval_arxiv_n_ngrams_match_1": 15.936, "eval_arxiv_n_ngrams_match_2": 3.106, "eval_arxiv_n_ngrams_match_3": 0.708, "eval_arxiv_num_pred_words": 41.652, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.947026891141647, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3767400793242511, "eval_arxiv_runtime": 24.7446, "eval_arxiv_samples_per_second": 20.206, "eval_arxiv_steps_per_second": 0.04, "eval_arxiv_token_set_f1": 0.37091683662322, "eval_arxiv_token_set_f1_sem": 0.00419620446335893, "eval_arxiv_token_set_precision": 0.32662819259736514, "eval_arxiv_token_set_recall": 0.4422520115584296, "eval_arxiv_true_num_tokens": 64.0, "step": 240000 }, { "epoch": 46.08, "eval_python_code_alpaca_accuracy": 0.16303125, "eval_python_code_alpaca_bleu_score": 4.624069656542272, "eval_python_code_alpaca_bleu_score_sem": 0.14400423219514763, "eval_python_code_alpaca_emb_cos_sim": 0.7744783163070679, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006863218494749795, "eval_python_code_alpaca_emb_top1_equal": 0.1328125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030114394778901498, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8359525203704834, "eval_python_code_alpaca_n_ngrams_match_1": 9.912, "eval_python_code_alpaca_n_ngrams_match_2": 2.934, "eval_python_code_alpaca_n_ngrams_match_3": 0.992, "eval_python_code_alpaca_num_pred_words": 43.822, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.046629831852382, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3406593161608, "eval_python_code_alpaca_runtime": 24.784, "eval_python_code_alpaca_samples_per_second": 20.174, "eval_python_code_alpaca_steps_per_second": 0.04, "eval_python_code_alpaca_token_set_f1": 0.4862114037808474, "eval_python_code_alpaca_token_set_f1_sem": 0.005301342399029125, "eval_python_code_alpaca_token_set_precision": 0.5399548248272075, "eval_python_code_alpaca_token_set_recall": 0.4665642576531167, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 240000 }, { "epoch": 46.08, "eval_wikibio_accuracy": 0.32915625, "eval_wikibio_bleu_score": 6.3564638604424015, "eval_wikibio_bleu_score_sem": 0.22570622608110016, "eval_wikibio_emb_cos_sim": 0.7566898465156555, "eval_wikibio_emb_cos_sim_sem": 0.00850022843269324, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6335058212280273, "eval_wikibio_n_ngrams_match_1": 10.64, "eval_wikibio_n_ngrams_match_2": 3.644, "eval_wikibio_n_ngrams_match_3": 1.36, "eval_wikibio_num_pred_words": 37.316, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.84526304133383, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3729145717259592, "eval_wikibio_runtime": 27.4853, "eval_wikibio_samples_per_second": 18.192, "eval_wikibio_steps_per_second": 0.036, "eval_wikibio_token_set_f1": 0.33196457282792086, "eval_wikibio_token_set_f1_sem": 0.005130478320420156, "eval_wikibio_token_set_precision": 0.34511803631241433, "eval_wikibio_token_set_recall": 0.3327441539282741, "eval_wikibio_true_num_tokens": 61.1328125, "step": 240000 }, { "epoch": 46.08, "eval_nq_accuracy": 0.53975, "eval_nq_bleu_score": 12.389035135167148, "eval_nq_bleu_score_sem": 0.4980326902113494, "eval_nq_emb_cos_sim": 0.8414790630340576, "eval_nq_emb_cos_sim_sem": 0.007054549381921063, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1096136569976807, "eval_nq_n_ngrams_match_1": 23.654, "eval_nq_n_ngrams_match_2": 8.878, "eval_nq_n_ngrams_match_3": 4.132, "eval_nq_num_pred_words": 49.342, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.24505524981566, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4582628348318293, "eval_nq_runtime": 25.4296, "eval_nq_samples_per_second": 19.662, "eval_nq_steps_per_second": 0.039, "eval_nq_token_set_f1": 0.470483692756976, "eval_nq_token_set_f1_sem": 0.004891242595918317, "eval_nq_token_set_precision": 0.43139297728319953, "eval_nq_token_set_recall": 0.5254170217681848, "eval_nq_true_num_tokens": 64.0, "step": 240000 }, { "epoch": 46.09, "learning_rate": 0.001, "loss": 2.4816, "step": 240012 }, { "epoch": 46.09, "learning_rate": 0.001, "loss": 2.4861, "step": 240024 }, { "epoch": 46.09, "learning_rate": 0.001, "loss": 2.4851, "step": 240036 }, { "epoch": 46.09, "learning_rate": 0.001, "loss": 2.4722, "step": 240048 }, { "epoch": 46.09, "learning_rate": 0.001, "loss": 2.469, "step": 240060 }, { "epoch": 46.1, "learning_rate": 0.001, "loss": 2.4772, "step": 240072 }, { "epoch": 46.1, "learning_rate": 0.001, "loss": 2.4727, "step": 240084 }, { "epoch": 46.1, "learning_rate": 0.001, "loss": 2.4746, "step": 240096 }, { "epoch": 46.1, "learning_rate": 0.001, "loss": 2.4833, "step": 240108 }, { "epoch": 46.11, "learning_rate": 0.001, "loss": 2.4811, "step": 240120 }, { "epoch": 46.11, "learning_rate": 0.001, "loss": 2.4839, "step": 240132 }, { "epoch": 46.11, "learning_rate": 0.001, "loss": 2.4885, "step": 240144 }, { "epoch": 46.11, "learning_rate": 0.001, "loss": 2.4737, "step": 240156 }, { "epoch": 46.12, "learning_rate": 0.001, "loss": 2.4775, "step": 240168 }, { "epoch": 46.12, "learning_rate": 0.001, "loss": 2.4817, "step": 240180 }, { "epoch": 46.12, "learning_rate": 0.001, "loss": 2.4759, "step": 240192 }, { "epoch": 46.12, "learning_rate": 0.001, "loss": 2.482, "step": 240204 }, { "epoch": 46.12, "learning_rate": 0.001, "loss": 2.4738, "step": 240216 }, { "epoch": 46.13, "learning_rate": 0.001, "loss": 2.4774, "step": 240228 }, { "epoch": 46.13, "learning_rate": 0.001, "loss": 2.4822, "step": 240240 }, { "epoch": 46.13, "learning_rate": 0.001, "loss": 2.4777, "step": 240252 }, { "epoch": 46.13, "learning_rate": 0.001, "loss": 2.4784, "step": 240264 }, { "epoch": 46.14, "learning_rate": 0.001, "loss": 2.4832, "step": 240276 }, { "epoch": 46.14, "learning_rate": 0.001, "loss": 2.4835, "step": 240288 }, { "epoch": 46.14, "learning_rate": 0.001, "loss": 2.4782, "step": 240300 }, { "epoch": 46.14, "learning_rate": 0.001, "loss": 2.4794, "step": 240312 }, { "epoch": 46.15, "learning_rate": 0.001, "loss": 2.4813, "step": 240324 }, { "epoch": 46.15, "learning_rate": 0.001, "loss": 2.4772, "step": 240336 }, { "epoch": 46.15, "learning_rate": 0.001, "loss": 2.4749, "step": 240348 }, { "epoch": 46.15, "learning_rate": 0.001, "loss": 2.4915, "step": 240360 }, { "epoch": 46.15, "learning_rate": 0.001, "loss": 2.4798, "step": 240372 }, { "epoch": 46.16, "learning_rate": 0.001, "loss": 2.4772, "step": 240384 }, { "epoch": 46.16, "learning_rate": 0.001, "loss": 2.4969, "step": 240396 }, { "epoch": 46.16, "learning_rate": 0.001, "loss": 2.478, "step": 240408 }, { "epoch": 46.16, "learning_rate": 0.001, "loss": 2.4791, "step": 240420 }, { "epoch": 46.17, "learning_rate": 0.001, "loss": 2.485, "step": 240432 }, { "epoch": 46.17, "learning_rate": 0.001, "loss": 2.4839, "step": 240444 }, { "epoch": 46.17, "learning_rate": 0.001, "loss": 2.4799, "step": 240456 }, { "epoch": 46.17, "learning_rate": 0.001, "loss": 2.4781, "step": 240468 }, { "epoch": 46.18, "learning_rate": 0.001, "loss": 2.4749, "step": 240480 }, { "epoch": 46.18, "learning_rate": 0.001, "loss": 2.4717, "step": 240492 }, { "epoch": 46.18, "learning_rate": 0.001, "loss": 2.4972, "step": 240504 }, { "epoch": 46.18, "learning_rate": 0.001, "loss": 2.4806, "step": 240516 }, { "epoch": 46.18, "learning_rate": 0.001, "loss": 2.4728, "step": 240528 }, { "epoch": 46.19, "learning_rate": 0.001, "loss": 2.4824, "step": 240540 }, { "epoch": 46.19, "learning_rate": 0.001, "loss": 2.4821, "step": 240552 }, { "epoch": 46.19, "learning_rate": 0.001, "loss": 2.4809, "step": 240564 }, { "epoch": 46.19, "learning_rate": 0.001, "loss": 2.4747, "step": 240576 }, { "epoch": 46.2, "learning_rate": 0.001, "loss": 2.4754, "step": 240588 }, { "epoch": 46.2, "learning_rate": 0.001, "loss": 2.489, "step": 240600 }, { "epoch": 46.2, "learning_rate": 0.001, "loss": 2.4813, "step": 240612 }, { "epoch": 46.2, "learning_rate": 0.001, "loss": 2.4796, "step": 240624 }, { "epoch": 46.2, "eval_ag_news_accuracy": 0.33040625, "eval_ag_news_bleu_score": 5.085781487073833, "eval_ag_news_bleu_score_sem": 0.1586855435254607, "eval_ag_news_emb_cos_sim": 0.8263951539993286, "eval_ag_news_emb_cos_sim_sem": 0.006545015056848609, "eval_ag_news_emb_top1_equal": 0.2421875, "eval_ag_news_emb_top1_equal_sem": 0.038014990119662626, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4564740657806396, "eval_ag_news_n_ngrams_match_1": 14.572, "eval_ag_news_n_ngrams_match_2": 3.324, "eval_ag_news_n_ngrams_match_3": 0.962, "eval_ag_news_num_pred_words": 46.846, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.704989493872922, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3627588435215151, "eval_ag_news_runtime": 31.2842, "eval_ag_news_samples_per_second": 15.983, "eval_ag_news_steps_per_second": 0.032, "eval_ag_news_token_set_f1": 0.36367627125920055, "eval_ag_news_token_set_f1_sem": 0.004495732384210909, "eval_ag_news_token_set_precision": 0.3484682179815899, "eval_ag_news_token_set_recall": 0.39595446271032897, "eval_ag_news_true_num_tokens": 56.09375, "step": 240625 }, { "epoch": 46.2, "eval_anthropic_toxic_prompts_accuracy": 0.11765625, "eval_anthropic_toxic_prompts_bleu_score": 3.2539468859656058, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11598988557949785, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6961995363235474, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008616661735882615, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.185462474822998, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.454, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.036, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.772, "eval_anthropic_toxic_prompts_num_pred_words": 47.32, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.178467753157012, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22132556779911564, "eval_anthropic_toxic_prompts_runtime": 28.6646, "eval_anthropic_toxic_prompts_samples_per_second": 17.443, "eval_anthropic_toxic_prompts_steps_per_second": 0.035, "eval_anthropic_toxic_prompts_token_set_f1": 0.3686151958594369, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006541165470605921, "eval_anthropic_toxic_prompts_token_set_precision": 0.45025060429691766, "eval_anthropic_toxic_prompts_token_set_recall": 0.34104932451098174, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 240625 }, { "epoch": 46.2, "eval_arxiv_accuracy": 0.35609375, "eval_arxiv_bleu_score": 4.506935303199835, "eval_arxiv_bleu_score_sem": 0.128245708450738, "eval_arxiv_emb_cos_sim": 0.7882487773895264, "eval_arxiv_emb_cos_sim_sem": 0.006432708193546252, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3089725971221924, "eval_arxiv_n_ngrams_match_1": 15.716, "eval_arxiv_n_ngrams_match_2": 3.142, "eval_arxiv_n_ngrams_match_3": 0.688, "eval_arxiv_num_pred_words": 41.036, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.357004363511724, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3750472274439439, "eval_arxiv_runtime": 24.8541, "eval_arxiv_samples_per_second": 20.117, "eval_arxiv_steps_per_second": 0.04, "eval_arxiv_token_set_f1": 0.36817400991817184, "eval_arxiv_token_set_f1_sem": 0.004325866274308996, "eval_arxiv_token_set_precision": 0.32086655016617605, "eval_arxiv_token_set_recall": 0.44688507774430586, "eval_arxiv_true_num_tokens": 64.0, "step": 240625 }, { "epoch": 46.2, "eval_python_code_alpaca_accuracy": 0.16146875, "eval_python_code_alpaca_bleu_score": 4.659948408359692, "eval_python_code_alpaca_bleu_score_sem": 0.14826534153121193, "eval_python_code_alpaca_emb_cos_sim": 0.7675008773803711, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007611677377129935, "eval_python_code_alpaca_emb_top1_equal": 0.1953125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8577992916107178, "eval_python_code_alpaca_n_ngrams_match_1": 10.086, "eval_python_code_alpaca_n_ngrams_match_2": 2.938, "eval_python_code_alpaca_n_ngrams_match_3": 0.95, "eval_python_code_alpaca_num_pred_words": 43.444, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.4231414608723, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.349076057560282, "eval_python_code_alpaca_runtime": 14.1274, "eval_python_code_alpaca_samples_per_second": 35.392, "eval_python_code_alpaca_steps_per_second": 0.071, "eval_python_code_alpaca_token_set_f1": 0.48991908772605924, "eval_python_code_alpaca_token_set_f1_sem": 0.005347459449312268, "eval_python_code_alpaca_token_set_precision": 0.5498679956345386, "eval_python_code_alpaca_token_set_recall": 0.4621258316501458, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 240625 }, { "epoch": 46.2, "eval_wikibio_accuracy": 0.33275, "eval_wikibio_bleu_score": 6.060103788040271, "eval_wikibio_bleu_score_sem": 0.22060552009294668, "eval_wikibio_emb_cos_sim": 0.7326762676239014, "eval_wikibio_emb_cos_sim_sem": 0.010217672996841691, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.607383966445923, "eval_wikibio_n_ngrams_match_1": 10.012, "eval_wikibio_n_ngrams_match_2": 3.42, "eval_wikibio_n_ngrams_match_3": 1.26, "eval_wikibio_num_pred_words": 35.81, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.86947476109043, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3536812163252703, "eval_wikibio_runtime": 17.8396, "eval_wikibio_samples_per_second": 28.028, "eval_wikibio_steps_per_second": 0.056, "eval_wikibio_token_set_f1": 0.3186639345920932, "eval_wikibio_token_set_f1_sem": 0.005847377785073911, "eval_wikibio_token_set_precision": 0.32633156129899715, "eval_wikibio_token_set_recall": 0.3272993497343292, "eval_wikibio_true_num_tokens": 61.1328125, "step": 240625 }, { "epoch": 46.2, "eval_nq_accuracy": 0.5389375, "eval_nq_bleu_score": 12.255174826241957, "eval_nq_bleu_score_sem": 0.49766489291864496, "eval_nq_emb_cos_sim": 0.8423871994018555, "eval_nq_emb_cos_sim_sem": 0.007154197044146467, "eval_nq_emb_top1_equal": 0.3046875, "eval_nq_emb_top1_equal_sem": 0.04084279867618665, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1075503826141357, "eval_nq_n_ngrams_match_1": 23.664, "eval_nq_n_ngrams_match_2": 8.878, "eval_nq_n_ngrams_match_3": 4.13, "eval_nq_num_pred_words": 49.012, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.228060976481084, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4611815942826812, "eval_nq_runtime": 20.4638, "eval_nq_samples_per_second": 24.433, "eval_nq_steps_per_second": 0.049, "eval_nq_token_set_f1": 0.47574247881999204, "eval_nq_token_set_f1_sem": 0.005002256807773097, "eval_nq_token_set_precision": 0.433061993703655, "eval_nq_token_set_recall": 0.5362434656844651, "eval_nq_true_num_tokens": 64.0, "step": 240625 }, { "epoch": 46.21, "learning_rate": 0.001, "loss": 2.4792, "step": 240636 }, { "epoch": 46.21, "learning_rate": 0.001, "loss": 2.4857, "step": 240648 }, { "epoch": 46.21, "learning_rate": 0.001, "loss": 2.4723, "step": 240660 }, { "epoch": 46.21, "learning_rate": 0.001, "loss": 2.4809, "step": 240672 }, { "epoch": 46.21, "learning_rate": 0.001, "loss": 2.485, "step": 240684 }, { "epoch": 46.22, "learning_rate": 0.001, "loss": 2.4881, "step": 240696 }, { "epoch": 46.22, "learning_rate": 0.001, "loss": 2.4859, "step": 240708 }, { "epoch": 46.22, "learning_rate": 0.001, "loss": 2.4805, "step": 240720 }, { "epoch": 46.22, "learning_rate": 0.001, "loss": 2.4856, "step": 240732 }, { "epoch": 46.23, "learning_rate": 0.001, "loss": 2.4898, "step": 240744 }, { "epoch": 46.23, "learning_rate": 0.001, "loss": 2.4714, "step": 240756 }, { "epoch": 46.23, "learning_rate": 0.001, "loss": 2.478, "step": 240768 }, { "epoch": 46.23, "learning_rate": 0.001, "loss": 2.483, "step": 240780 }, { "epoch": 46.24, "learning_rate": 0.001, "loss": 2.4745, "step": 240792 }, { "epoch": 46.24, "learning_rate": 0.001, "loss": 2.4821, "step": 240804 }, { "epoch": 46.24, "learning_rate": 0.001, "loss": 2.4792, "step": 240816 }, { "epoch": 46.24, "learning_rate": 0.001, "loss": 2.4891, "step": 240828 }, { "epoch": 46.24, "learning_rate": 0.001, "loss": 2.4857, "step": 240840 }, { "epoch": 46.25, "learning_rate": 0.001, "loss": 2.485, "step": 240852 }, { "epoch": 46.25, "learning_rate": 0.001, "loss": 2.475, "step": 240864 }, { "epoch": 46.25, "learning_rate": 0.001, "loss": 2.4888, "step": 240876 }, { "epoch": 46.25, "learning_rate": 0.001, "loss": 2.4757, "step": 240888 }, { "epoch": 46.26, "learning_rate": 0.001, "loss": 2.4821, "step": 240900 }, { "epoch": 46.26, "learning_rate": 0.001, "loss": 2.4887, "step": 240912 }, { "epoch": 46.26, "learning_rate": 0.001, "loss": 2.4755, "step": 240924 }, { "epoch": 46.26, "learning_rate": 0.001, "loss": 2.4799, "step": 240936 }, { "epoch": 46.26, "learning_rate": 0.001, "loss": 2.4949, "step": 240948 }, { "epoch": 46.27, "learning_rate": 0.001, "loss": 2.4895, "step": 240960 }, { "epoch": 46.27, "learning_rate": 0.001, "loss": 2.4812, "step": 240972 }, { "epoch": 46.27, "learning_rate": 0.001, "loss": 2.4761, "step": 240984 }, { "epoch": 46.27, "learning_rate": 0.001, "loss": 2.4805, "step": 240996 }, { "epoch": 46.28, "learning_rate": 0.001, "loss": 2.4819, "step": 241008 }, { "epoch": 46.28, "learning_rate": 0.001, "loss": 2.4885, "step": 241020 }, { "epoch": 46.28, "learning_rate": 0.001, "loss": 2.4805, "step": 241032 }, { "epoch": 46.28, "learning_rate": 0.001, "loss": 2.4764, "step": 241044 }, { "epoch": 46.29, "learning_rate": 0.001, "loss": 2.4801, "step": 241056 }, { "epoch": 46.29, "learning_rate": 0.001, "loss": 2.4761, "step": 241068 }, { "epoch": 46.29, "learning_rate": 0.001, "loss": 2.4943, "step": 241080 }, { "epoch": 46.29, "learning_rate": 0.001, "loss": 2.4812, "step": 241092 }, { "epoch": 46.29, "learning_rate": 0.001, "loss": 2.4956, "step": 241104 }, { "epoch": 46.3, "learning_rate": 0.001, "loss": 2.4805, "step": 241116 }, { "epoch": 46.3, "learning_rate": 0.001, "loss": 2.4837, "step": 241128 }, { "epoch": 46.3, "learning_rate": 0.001, "loss": 2.4968, "step": 241140 }, { "epoch": 46.3, "learning_rate": 0.001, "loss": 2.4824, "step": 241152 }, { "epoch": 46.31, "learning_rate": 0.001, "loss": 2.479, "step": 241164 }, { "epoch": 46.31, "learning_rate": 0.001, "loss": 2.483, "step": 241176 }, { "epoch": 46.31, "learning_rate": 0.001, "loss": 2.4789, "step": 241188 }, { "epoch": 46.31, "learning_rate": 0.001, "loss": 2.4868, "step": 241200 }, { "epoch": 46.32, "learning_rate": 0.001, "loss": 2.4934, "step": 241212 }, { "epoch": 46.32, "learning_rate": 0.001, "loss": 2.4854, "step": 241224 }, { "epoch": 46.32, "learning_rate": 0.001, "loss": 2.4833, "step": 241236 }, { "epoch": 46.32, "learning_rate": 0.001, "loss": 2.489, "step": 241248 }, { "epoch": 46.32, "eval_ag_news_accuracy": 0.32990625, "eval_ag_news_bleu_score": 5.064259546913567, "eval_ag_news_bleu_score_sem": 0.16276135782846404, "eval_ag_news_emb_cos_sim": 0.8233256340026855, "eval_ag_news_emb_cos_sim_sem": 0.00726704720666791, "eval_ag_news_emb_top1_equal": 0.3203125, "eval_ag_news_emb_top1_equal_sem": 0.041403754790620424, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.457628011703491, "eval_ag_news_n_ngrams_match_1": 14.404, "eval_ag_news_n_ngrams_match_2": 3.306, "eval_ag_news_n_ngrams_match_3": 0.972, "eval_ag_news_num_pred_words": 46.488, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.741596454397726, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36044058466982654, "eval_ag_news_runtime": 30.2171, "eval_ag_news_samples_per_second": 16.547, "eval_ag_news_steps_per_second": 0.033, "eval_ag_news_token_set_f1": 0.3571753756234547, "eval_ag_news_token_set_f1_sem": 0.004507353959461323, "eval_ag_news_token_set_precision": 0.3449015915465036, "eval_ag_news_token_set_recall": 0.3851819490431336, "eval_ag_news_true_num_tokens": 56.09375, "step": 241250 }, { "epoch": 46.32, "eval_anthropic_toxic_prompts_accuracy": 0.11678125, "eval_anthropic_toxic_prompts_bleu_score": 3.2929250900653, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11955546534354838, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6932997703552246, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008553027897165262, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.176013708114624, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.548, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.05, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788, "eval_anthropic_toxic_prompts_num_pred_words": 47.302, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.95108697763756, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22495408032910458, "eval_anthropic_toxic_prompts_runtime": 11.014, "eval_anthropic_toxic_prompts_samples_per_second": 45.397, "eval_anthropic_toxic_prompts_steps_per_second": 0.091, "eval_anthropic_toxic_prompts_token_set_f1": 0.3652003227967441, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006446262059315402, "eval_anthropic_toxic_prompts_token_set_precision": 0.4611348657393043, "eval_anthropic_toxic_prompts_token_set_recall": 0.3275224839263923, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 241250 }, { "epoch": 46.32, "eval_arxiv_accuracy": 0.355875, "eval_arxiv_bleu_score": 4.420447042734797, "eval_arxiv_bleu_score_sem": 0.1196629610718991, "eval_arxiv_emb_cos_sim": 0.780036211013794, "eval_arxiv_emb_cos_sim_sem": 0.006999685380416514, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3037800788879395, "eval_arxiv_n_ngrams_match_1": 15.67, "eval_arxiv_n_ngrams_match_2": 3.066, "eval_arxiv_n_ngrams_match_3": 0.67, "eval_arxiv_num_pred_words": 40.562, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.21532078514388, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3762368929864656, "eval_arxiv_runtime": 11.562, "eval_arxiv_samples_per_second": 43.245, "eval_arxiv_steps_per_second": 0.086, "eval_arxiv_token_set_f1": 0.36768080061744435, "eval_arxiv_token_set_f1_sem": 0.0041402226800557025, "eval_arxiv_token_set_precision": 0.32126840310119836, "eval_arxiv_token_set_recall": 0.44616519755619827, "eval_arxiv_true_num_tokens": 64.0, "step": 241250 }, { "epoch": 46.32, "eval_python_code_alpaca_accuracy": 0.1625625, "eval_python_code_alpaca_bleu_score": 4.74742940138566, "eval_python_code_alpaca_bleu_score_sem": 0.1499882542581223, "eval_python_code_alpaca_emb_cos_sim": 0.7822372317314148, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006967462139096411, "eval_python_code_alpaca_emb_top1_equal": 0.1875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8513922691345215, "eval_python_code_alpaca_n_ngrams_match_1": 10.074, "eval_python_code_alpaca_n_ngrams_match_2": 2.936, "eval_python_code_alpaca_n_ngrams_match_3": 0.986, "eval_python_code_alpaca_num_pred_words": 43.14, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.31186784884083, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.35139482803010574, "eval_python_code_alpaca_runtime": 11.2267, "eval_python_code_alpaca_samples_per_second": 44.537, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.4836683448111049, "eval_python_code_alpaca_token_set_f1_sem": 0.0052867670675411085, "eval_python_code_alpaca_token_set_precision": 0.553907183624986, "eval_python_code_alpaca_token_set_recall": 0.4498540024236263, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 241250 }, { "epoch": 46.32, "eval_wikibio_accuracy": 0.332, "eval_wikibio_bleu_score": 6.361706537993236, "eval_wikibio_bleu_score_sem": 0.22598514869935404, "eval_wikibio_emb_cos_sim": 0.7513222098350525, "eval_wikibio_emb_cos_sim_sem": 0.008775885285567185, "eval_wikibio_emb_top1_equal": 0.1953125, "eval_wikibio_emb_top1_equal_sem": 0.035178457165496856, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.631347417831421, "eval_wikibio_n_ngrams_match_1": 10.454, "eval_wikibio_n_ngrams_match_2": 3.576, "eval_wikibio_n_ngrams_match_3": 1.364, "eval_wikibio_num_pred_words": 36.522, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.76366578861179, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36665510897863574, "eval_wikibio_runtime": 12.4778, "eval_wikibio_samples_per_second": 40.071, "eval_wikibio_steps_per_second": 0.08, "eval_wikibio_token_set_f1": 0.3300539736679318, "eval_wikibio_token_set_f1_sem": 0.005207958393225082, "eval_wikibio_token_set_precision": 0.3416930416172614, "eval_wikibio_token_set_recall": 0.33454915588064005, "eval_wikibio_true_num_tokens": 61.1328125, "step": 241250 }, { "epoch": 46.32, "eval_nq_accuracy": 0.53828125, "eval_nq_bleu_score": 12.240064989747756, "eval_nq_bleu_score_sem": 0.48635335223747905, "eval_nq_emb_cos_sim": 0.8417385816574097, "eval_nq_emb_cos_sim_sem": 0.007237234238247025, "eval_nq_emb_top1_equal": 0.359375, "eval_nq_emb_top1_equal_sem": 0.04257689651385297, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1077377796173096, "eval_nq_n_ngrams_match_1": 23.778, "eval_nq_n_ngrams_match_2": 8.82, "eval_nq_n_ngrams_match_3": 4.116, "eval_nq_num_pred_words": 49.168, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.229603034934062, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46107012518998153, "eval_nq_runtime": 11.9501, "eval_nq_samples_per_second": 41.841, "eval_nq_steps_per_second": 0.084, "eval_nq_token_set_f1": 0.47387866590384453, "eval_nq_token_set_f1_sem": 0.004848162849187167, "eval_nq_token_set_precision": 0.4327174058177407, "eval_nq_token_set_recall": 0.5316999989137778, "eval_nq_true_num_tokens": 64.0, "step": 241250 }, { "epoch": 46.32, "learning_rate": 0.001, "loss": 2.477, "step": 241260 }, { "epoch": 46.33, "learning_rate": 0.001, "loss": 2.4926, "step": 241272 }, { "epoch": 46.33, "learning_rate": 0.001, "loss": 2.4701, "step": 241284 }, { "epoch": 46.33, "learning_rate": 0.001, "loss": 2.4839, "step": 241296 }, { "epoch": 46.33, "learning_rate": 0.001, "loss": 2.4764, "step": 241308 }, { "epoch": 46.34, "learning_rate": 0.001, "loss": 2.4872, "step": 241320 }, { "epoch": 46.34, "learning_rate": 0.001, "loss": 2.4888, "step": 241332 }, { "epoch": 46.34, "learning_rate": 0.001, "loss": 2.4745, "step": 241344 }, { "epoch": 46.34, "learning_rate": 0.001, "loss": 2.473, "step": 241356 }, { "epoch": 46.35, "learning_rate": 0.001, "loss": 2.485, "step": 241368 }, { "epoch": 46.35, "learning_rate": 0.001, "loss": 2.4851, "step": 241380 }, { "epoch": 46.35, "learning_rate": 0.001, "loss": 2.4865, "step": 241392 }, { "epoch": 46.35, "learning_rate": 0.001, "loss": 2.4848, "step": 241404 }, { "epoch": 46.35, "learning_rate": 0.001, "loss": 2.4911, "step": 241416 }, { "epoch": 46.36, "learning_rate": 0.001, "loss": 2.4852, "step": 241428 }, { "epoch": 46.36, "learning_rate": 0.001, "loss": 2.4908, "step": 241440 }, { "epoch": 46.36, "learning_rate": 0.001, "loss": 2.4807, "step": 241452 }, { "epoch": 46.36, "learning_rate": 0.001, "loss": 2.4885, "step": 241464 }, { "epoch": 46.37, "learning_rate": 0.001, "loss": 2.4839, "step": 241476 }, { "epoch": 46.37, "learning_rate": 0.001, "loss": 2.4878, "step": 241488 }, { "epoch": 46.37, "learning_rate": 0.001, "loss": 2.4706, "step": 241500 }, { "epoch": 46.37, "learning_rate": 0.001, "loss": 2.4814, "step": 241512 }, { "epoch": 46.38, "learning_rate": 0.001, "loss": 2.4883, "step": 241524 }, { "epoch": 46.38, "learning_rate": 0.001, "loss": 2.4772, "step": 241536 }, { "epoch": 46.38, "learning_rate": 0.001, "loss": 2.4806, "step": 241548 }, { "epoch": 46.38, "learning_rate": 0.001, "loss": 2.4855, "step": 241560 }, { "epoch": 46.38, "learning_rate": 0.001, "loss": 2.4809, "step": 241572 }, { "epoch": 46.39, "learning_rate": 0.001, "loss": 2.4861, "step": 241584 }, { "epoch": 46.39, "learning_rate": 0.001, "loss": 2.4793, "step": 241596 }, { "epoch": 46.39, "learning_rate": 0.001, "loss": 2.4727, "step": 241608 }, { "epoch": 46.39, "learning_rate": 0.001, "loss": 2.4844, "step": 241620 }, { "epoch": 46.4, "learning_rate": 0.001, "loss": 2.4898, "step": 241632 }, { "epoch": 46.4, "learning_rate": 0.001, "loss": 2.4802, "step": 241644 }, { "epoch": 46.4, "learning_rate": 0.001, "loss": 2.4825, "step": 241656 }, { "epoch": 46.4, "learning_rate": 0.001, "loss": 2.4816, "step": 241668 }, { "epoch": 46.41, "learning_rate": 0.001, "loss": 2.4842, "step": 241680 }, { "epoch": 46.41, "learning_rate": 0.001, "loss": 2.4838, "step": 241692 }, { "epoch": 46.41, "learning_rate": 0.001, "loss": 2.4842, "step": 241704 }, { "epoch": 46.41, "learning_rate": 0.001, "loss": 2.4668, "step": 241716 }, { "epoch": 46.41, "learning_rate": 0.001, "loss": 2.4827, "step": 241728 }, { "epoch": 46.42, "learning_rate": 0.001, "loss": 2.4768, "step": 241740 }, { "epoch": 46.42, "learning_rate": 0.001, "loss": 2.4882, "step": 241752 }, { "epoch": 46.42, "learning_rate": 0.001, "loss": 2.4875, "step": 241764 }, { "epoch": 46.42, "learning_rate": 0.001, "loss": 2.4724, "step": 241776 }, { "epoch": 46.43, "learning_rate": 0.001, "loss": 2.4958, "step": 241788 }, { "epoch": 46.43, "learning_rate": 0.001, "loss": 2.4832, "step": 241800 }, { "epoch": 46.43, "learning_rate": 0.001, "loss": 2.4827, "step": 241812 }, { "epoch": 46.43, "learning_rate": 0.001, "loss": 2.4975, "step": 241824 }, { "epoch": 46.44, "learning_rate": 0.001, "loss": 2.4817, "step": 241836 }, { "epoch": 46.44, "learning_rate": 0.001, "loss": 2.4834, "step": 241848 }, { "epoch": 46.44, "learning_rate": 0.001, "loss": 2.491, "step": 241860 }, { "epoch": 46.44, "learning_rate": 0.001, "loss": 2.4976, "step": 241872 }, { "epoch": 46.44, "eval_ag_news_accuracy": 0.3296875, "eval_ag_news_bleu_score": 5.084642026614604, "eval_ag_news_bleu_score_sem": 0.16016679319617716, "eval_ag_news_emb_cos_sim": 0.8224168419837952, "eval_ag_news_emb_cos_sim_sem": 0.007245111747651514, "eval_ag_news_emb_top1_equal": 0.2890625, "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4650425910949707, "eval_ag_news_n_ngrams_match_1": 14.488, "eval_ag_news_n_ngrams_match_2": 3.264, "eval_ag_news_n_ngrams_match_3": 0.952, "eval_ag_news_num_pred_words": 46.772, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.977821714568645, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3618038231855376, "eval_ag_news_runtime": 30.9142, "eval_ag_news_samples_per_second": 16.174, "eval_ag_news_steps_per_second": 0.032, "eval_ag_news_token_set_f1": 0.3587320573823345, "eval_ag_news_token_set_f1_sem": 0.004469019027870232, "eval_ag_news_token_set_precision": 0.3454440933379695, "eval_ag_news_token_set_recall": 0.38878451831262495, "eval_ag_news_true_num_tokens": 56.09375, "step": 241875 }, { "epoch": 46.44, "eval_anthropic_toxic_prompts_accuracy": 0.11746875, "eval_anthropic_toxic_prompts_bleu_score": 3.440615900928326, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.13112100901090346, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6902843713760376, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009069573087069698, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.190671443939209, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.562, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.154, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.852, "eval_anthropic_toxic_prompts_num_pred_words": 47.618, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.30474123678018, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22370955471489626, "eval_anthropic_toxic_prompts_runtime": 29.1316, "eval_anthropic_toxic_prompts_samples_per_second": 17.163, "eval_anthropic_toxic_prompts_steps_per_second": 0.034, "eval_anthropic_toxic_prompts_token_set_f1": 0.3627563001365179, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0065597484170451985, "eval_anthropic_toxic_prompts_token_set_precision": 0.4602601738777415, "eval_anthropic_toxic_prompts_token_set_recall": 0.3241935636421385, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 241875 }, { "epoch": 46.44, "eval_arxiv_accuracy": 0.3535625, "eval_arxiv_bleu_score": 4.587565821380598, "eval_arxiv_bleu_score_sem": 0.1362682273078973, "eval_arxiv_emb_cos_sim": 0.7883133292198181, "eval_arxiv_emb_cos_sim_sem": 0.006566388777399696, "eval_arxiv_emb_top1_equal": 0.3359375, "eval_arxiv_emb_top1_equal_sem": 0.04191137143408563, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.322441339492798, "eval_arxiv_n_ngrams_match_1": 15.65, "eval_arxiv_n_ngrams_match_2": 3.2, "eval_arxiv_n_ngrams_match_3": 0.742, "eval_arxiv_num_pred_words": 40.994, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.727961361600936, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3711320883415423, "eval_arxiv_runtime": 30.6795, "eval_arxiv_samples_per_second": 16.298, "eval_arxiv_steps_per_second": 0.033, "eval_arxiv_token_set_f1": 0.3662087807138172, "eval_arxiv_token_set_f1_sem": 0.004321891433179117, "eval_arxiv_token_set_precision": 0.31922207319930457, "eval_arxiv_token_set_recall": 0.44744889708585134, "eval_arxiv_true_num_tokens": 64.0, "step": 241875 }, { "epoch": 46.44, "eval_python_code_alpaca_accuracy": 0.16246875, "eval_python_code_alpaca_bleu_score": 4.687255340246557, "eval_python_code_alpaca_bleu_score_sem": 0.15019681958371742, "eval_python_code_alpaca_emb_cos_sim": 0.7704471945762634, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009157781831191444, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8582279682159424, "eval_python_code_alpaca_n_ngrams_match_1": 10.046, "eval_python_code_alpaca_n_ngrams_match_2": 3.006, "eval_python_code_alpaca_n_ngrams_match_3": 1.01, "eval_python_code_alpaca_num_pred_words": 43.156, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.43061195510475, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3463634712786531, "eval_python_code_alpaca_runtime": 29.1269, "eval_python_code_alpaca_samples_per_second": 17.166, "eval_python_code_alpaca_steps_per_second": 0.034, "eval_python_code_alpaca_token_set_f1": 0.4735165123242745, "eval_python_code_alpaca_token_set_f1_sem": 0.0056185112210383826, "eval_python_code_alpaca_token_set_precision": 0.5495388137409534, "eval_python_code_alpaca_token_set_recall": 0.44209822638636487, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 241875 }, { "epoch": 46.44, "eval_wikibio_accuracy": 0.32709375, "eval_wikibio_bleu_score": 5.958341045771376, "eval_wikibio_bleu_score_sem": 0.2174500917069693, "eval_wikibio_emb_cos_sim": 0.7273484468460083, "eval_wikibio_emb_cos_sim_sem": 0.010700433536397599, "eval_wikibio_emb_top1_equal": 0.1875, "eval_wikibio_emb_top1_equal_sem": 0.034634623208270626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6463050842285156, "eval_wikibio_n_ngrams_match_1": 10.022, "eval_wikibio_n_ngrams_match_2": 3.414, "eval_wikibio_n_ngrams_match_3": 1.254, "eval_wikibio_num_pred_words": 35.822, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.33276771128574, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3530682279269266, "eval_wikibio_runtime": 11.6722, "eval_wikibio_samples_per_second": 42.837, "eval_wikibio_steps_per_second": 0.086, "eval_wikibio_token_set_f1": 0.315965979692545, "eval_wikibio_token_set_f1_sem": 0.005772811421930752, "eval_wikibio_token_set_precision": 0.3239042816365887, "eval_wikibio_token_set_recall": 0.3253844288547698, "eval_wikibio_true_num_tokens": 61.1328125, "step": 241875 }, { "epoch": 46.44, "eval_nq_accuracy": 0.53840625, "eval_nq_bleu_score": 12.358487248963987, "eval_nq_bleu_score_sem": 0.4884760344559198, "eval_nq_emb_cos_sim": 0.8379926085472107, "eval_nq_emb_cos_sim_sem": 0.007062561892949688, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.106782913208008, "eval_nq_n_ngrams_match_1": 23.658, "eval_nq_n_ngrams_match_2": 8.88, "eval_nq_n_ngrams_match_3": 4.182, "eval_nq_num_pred_words": 48.95, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.221748613992261, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4587327511095819, "eval_nq_runtime": 16.9333, "eval_nq_samples_per_second": 29.528, "eval_nq_steps_per_second": 0.059, "eval_nq_token_set_f1": 0.4746710993493959, "eval_nq_token_set_f1_sem": 0.004854345292045013, "eval_nq_token_set_precision": 0.4326281716859562, "eval_nq_token_set_recall": 0.5336436845156283, "eval_nq_true_num_tokens": 64.0, "step": 241875 }, { "epoch": 46.44, "learning_rate": 0.001, "loss": 2.4867, "step": 241884 }, { "epoch": 46.45, "learning_rate": 0.001, "loss": 2.4897, "step": 241896 }, { "epoch": 46.45, "learning_rate": 0.001, "loss": 2.49, "step": 241908 }, { "epoch": 46.45, "learning_rate": 0.001, "loss": 2.4809, "step": 241920 }, { "epoch": 46.45, "learning_rate": 0.001, "loss": 2.4878, "step": 241932 }, { "epoch": 46.46, "learning_rate": 0.001, "loss": 2.4774, "step": 241944 }, { "epoch": 46.46, "learning_rate": 0.001, "loss": 2.49, "step": 241956 }, { "epoch": 46.46, "learning_rate": 0.001, "loss": 2.4769, "step": 241968 }, { "epoch": 46.46, "learning_rate": 0.001, "loss": 2.4755, "step": 241980 }, { "epoch": 46.47, "learning_rate": 0.001, "loss": 2.4801, "step": 241992 }, { "epoch": 46.47, "learning_rate": 0.001, "loss": 2.4873, "step": 242004 }, { "epoch": 46.47, "learning_rate": 0.001, "loss": 2.489, "step": 242016 }, { "epoch": 46.47, "learning_rate": 0.001, "loss": 2.4887, "step": 242028 }, { "epoch": 46.47, "learning_rate": 0.001, "loss": 2.4853, "step": 242040 }, { "epoch": 46.48, "learning_rate": 0.001, "loss": 2.479, "step": 242052 }, { "epoch": 46.48, "learning_rate": 0.001, "loss": 2.4902, "step": 242064 }, { "epoch": 46.48, "learning_rate": 0.001, "loss": 2.4854, "step": 242076 }, { "epoch": 46.48, "learning_rate": 0.001, "loss": 2.4807, "step": 242088 }, { "epoch": 46.49, "learning_rate": 0.001, "loss": 2.4915, "step": 242100 }, { "epoch": 46.49, "learning_rate": 0.001, "loss": 2.487, "step": 242112 }, { "epoch": 46.49, "learning_rate": 0.001, "loss": 2.4856, "step": 242124 }, { "epoch": 46.49, "learning_rate": 0.001, "loss": 2.4745, "step": 242136 }, { "epoch": 46.5, "learning_rate": 0.001, "loss": 2.479, "step": 242148 }, { "epoch": 46.5, "learning_rate": 0.001, "loss": 2.4828, "step": 242160 }, { "epoch": 46.5, "learning_rate": 0.001, "loss": 2.4783, "step": 242172 }, { "epoch": 46.5, "learning_rate": 0.001, "loss": 2.4827, "step": 242184 }, { "epoch": 46.5, "learning_rate": 0.001, "loss": 2.48, "step": 242196 }, { "epoch": 46.51, "learning_rate": 0.001, "loss": 2.481, "step": 242208 }, { "epoch": 46.51, "learning_rate": 0.001, "loss": 2.4758, "step": 242220 }, { "epoch": 46.51, "learning_rate": 0.001, "loss": 2.4822, "step": 242232 }, { "epoch": 46.51, "learning_rate": 0.001, "loss": 2.4742, "step": 242244 }, { "epoch": 46.52, "learning_rate": 0.001, "loss": 2.4767, "step": 242256 }, { "epoch": 46.52, "learning_rate": 0.001, "loss": 2.4853, "step": 242268 }, { "epoch": 46.52, "learning_rate": 0.001, "loss": 2.4828, "step": 242280 }, { "epoch": 46.52, "learning_rate": 0.001, "loss": 2.471, "step": 242292 }, { "epoch": 46.53, "learning_rate": 0.001, "loss": 2.4787, "step": 242304 }, { "epoch": 46.53, "learning_rate": 0.001, "loss": 2.4782, "step": 242316 }, { "epoch": 46.53, "learning_rate": 0.001, "loss": 2.4825, "step": 242328 }, { "epoch": 46.53, "learning_rate": 0.001, "loss": 2.4829, "step": 242340 }, { "epoch": 46.53, "learning_rate": 0.001, "loss": 2.4802, "step": 242352 }, { "epoch": 46.54, "learning_rate": 0.001, "loss": 2.4834, "step": 242364 }, { "epoch": 46.54, "learning_rate": 0.001, "loss": 2.4785, "step": 242376 }, { "epoch": 46.54, "learning_rate": 0.001, "loss": 2.4843, "step": 242388 }, { "epoch": 46.54, "learning_rate": 0.001, "loss": 2.4901, "step": 242400 }, { "epoch": 46.55, "learning_rate": 0.001, "loss": 2.4873, "step": 242412 }, { "epoch": 46.55, "learning_rate": 0.001, "loss": 2.4827, "step": 242424 }, { "epoch": 46.55, "learning_rate": 0.001, "loss": 2.4876, "step": 242436 }, { "epoch": 46.55, "learning_rate": 0.001, "loss": 2.4928, "step": 242448 }, { "epoch": 46.56, "learning_rate": 0.001, "loss": 2.4863, "step": 242460 }, { "epoch": 46.56, "learning_rate": 0.001, "loss": 2.4921, "step": 242472 }, { "epoch": 46.56, "learning_rate": 0.001, "loss": 2.4828, "step": 242484 }, { "epoch": 46.56, "learning_rate": 0.001, "loss": 2.486, "step": 242496 }, { "epoch": 46.56, "eval_ag_news_accuracy": 0.3304375, "eval_ag_news_bleu_score": 5.0147895358484815, "eval_ag_news_bleu_score_sem": 0.15958465491068766, "eval_ag_news_emb_cos_sim": 0.8218309283256531, "eval_ag_news_emb_cos_sim_sem": 0.0066238639078783955, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4549052715301514, "eval_ag_news_n_ngrams_match_1": 14.36, "eval_ag_news_n_ngrams_match_2": 3.216, "eval_ag_news_n_ngrams_match_3": 0.944, "eval_ag_news_num_pred_words": 46.47, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.655289883068022, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3590430609878206, "eval_ag_news_runtime": 15.8727, "eval_ag_news_samples_per_second": 31.501, "eval_ag_news_steps_per_second": 0.063, "eval_ag_news_token_set_f1": 0.3579887445455612, "eval_ag_news_token_set_f1_sem": 0.0043650586776531825, "eval_ag_news_token_set_precision": 0.34438800093333527, "eval_ag_news_token_set_recall": 0.38769083022653245, "eval_ag_news_true_num_tokens": 56.09375, "step": 242500 }, { "epoch": 46.56, "eval_anthropic_toxic_prompts_accuracy": 0.11709375, "eval_anthropic_toxic_prompts_bleu_score": 3.3395154646772034, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12719804499071863, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6862552165985107, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009476123563439307, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1812803745269775, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.352, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.048, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.822, "eval_anthropic_toxic_prompts_num_pred_words": 47.482, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.077562121814168, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21649841090894642, "eval_anthropic_toxic_prompts_runtime": 12.4149, "eval_anthropic_toxic_prompts_samples_per_second": 40.274, "eval_anthropic_toxic_prompts_steps_per_second": 0.081, "eval_anthropic_toxic_prompts_token_set_f1": 0.36090650861044277, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006607413680914105, "eval_anthropic_toxic_prompts_token_set_precision": 0.44784435857669164, "eval_anthropic_toxic_prompts_token_set_recall": 0.3339832018508541, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 242500 }, { "epoch": 46.56, "eval_arxiv_accuracy": 0.35321875, "eval_arxiv_bleu_score": 4.709397685389214, "eval_arxiv_bleu_score_sem": 0.13017014297566795, "eval_arxiv_emb_cos_sim": 0.7913519144058228, "eval_arxiv_emb_cos_sim_sem": 0.0068440535324784816, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3240878582000732, "eval_arxiv_n_ngrams_match_1": 15.912, "eval_arxiv_n_ngrams_match_2": 3.294, "eval_arxiv_n_ngrams_match_3": 0.78, "eval_arxiv_num_pred_words": 41.348, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.7736535749167, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37678167135450635, "eval_arxiv_runtime": 16.9108, "eval_arxiv_samples_per_second": 29.567, "eval_arxiv_steps_per_second": 0.059, "eval_arxiv_token_set_f1": 0.3749786536115361, "eval_arxiv_token_set_f1_sem": 0.00423406580137907, "eval_arxiv_token_set_precision": 0.3266201011569917, "eval_arxiv_token_set_recall": 0.4599227426804347, "eval_arxiv_true_num_tokens": 64.0, "step": 242500 }, { "epoch": 46.56, "eval_python_code_alpaca_accuracy": 0.1638125, "eval_python_code_alpaca_bleu_score": 4.753542972403131, "eval_python_code_alpaca_bleu_score_sem": 0.15269941418383537, "eval_python_code_alpaca_emb_cos_sim": 0.7625956535339355, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008567124700903138, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.862983465194702, "eval_python_code_alpaca_n_ngrams_match_1": 10.02, "eval_python_code_alpaca_n_ngrams_match_2": 2.98, "eval_python_code_alpaca_n_ngrams_match_3": 1.028, "eval_python_code_alpaca_num_pred_words": 44.044, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.513700584873433, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3417390203002659, "eval_python_code_alpaca_runtime": 29.3975, "eval_python_code_alpaca_samples_per_second": 17.008, "eval_python_code_alpaca_steps_per_second": 0.034, "eval_python_code_alpaca_token_set_f1": 0.4845210223439105, "eval_python_code_alpaca_token_set_f1_sem": 0.005446935440651199, "eval_python_code_alpaca_token_set_precision": 0.5474563393900826, "eval_python_code_alpaca_token_set_recall": 0.4572541838133401, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 242500 }, { "epoch": 46.56, "eval_wikibio_accuracy": 0.32965625, "eval_wikibio_bleu_score": 6.199104057835193, "eval_wikibio_bleu_score_sem": 0.21852074990903678, "eval_wikibio_emb_cos_sim": 0.7372463941574097, "eval_wikibio_emb_cos_sim_sem": 0.010695194814836596, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6502938270568848, "eval_wikibio_n_ngrams_match_1": 10.102, "eval_wikibio_n_ngrams_match_2": 3.51, "eval_wikibio_n_ngrams_match_3": 1.304, "eval_wikibio_num_pred_words": 35.488, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.48597260792705, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3588946318349425, "eval_wikibio_runtime": 18.0011, "eval_wikibio_samples_per_second": 27.776, "eval_wikibio_steps_per_second": 0.056, "eval_wikibio_token_set_f1": 0.323072826372439, "eval_wikibio_token_set_f1_sem": 0.00563513111265325, "eval_wikibio_token_set_precision": 0.32923979947755805, "eval_wikibio_token_set_recall": 0.3343350847546233, "eval_wikibio_true_num_tokens": 61.1328125, "step": 242500 }, { "epoch": 46.56, "eval_nq_accuracy": 0.5393125, "eval_nq_bleu_score": 12.401884825808072, "eval_nq_bleu_score_sem": 0.4935568006258743, "eval_nq_emb_cos_sim": 0.839489221572876, "eval_nq_emb_cos_sim_sem": 0.007569988217052089, "eval_nq_emb_top1_equal": 0.3359375, "eval_nq_emb_top1_equal_sem": 0.04191137143408563, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.104843854904175, "eval_nq_n_ngrams_match_1": 23.732, "eval_nq_n_ngrams_match_2": 8.902, "eval_nq_n_ngrams_match_3": 4.184, "eval_nq_num_pred_words": 49.15, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.205821610754604, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4632106348476336, "eval_nq_runtime": 11.9895, "eval_nq_samples_per_second": 41.703, "eval_nq_steps_per_second": 0.083, "eval_nq_token_set_f1": 0.4746018846814656, "eval_nq_token_set_f1_sem": 0.005044325450182254, "eval_nq_token_set_precision": 0.43326331968603143, "eval_nq_token_set_recall": 0.5330526004274339, "eval_nq_true_num_tokens": 64.0, "step": 242500 }, { "epoch": 46.56, "learning_rate": 0.001, "loss": 2.4792, "step": 242508 }, { "epoch": 46.57, "learning_rate": 0.001, "loss": 2.483, "step": 242520 }, { "epoch": 46.57, "learning_rate": 0.001, "loss": 2.482, "step": 242532 }, { "epoch": 46.57, "learning_rate": 0.001, "loss": 2.4873, "step": 242544 }, { "epoch": 46.57, "learning_rate": 0.001, "loss": 2.4888, "step": 242556 }, { "epoch": 46.58, "learning_rate": 0.001, "loss": 2.4807, "step": 242568 }, { "epoch": 46.58, "learning_rate": 0.001, "loss": 2.4866, "step": 242580 }, { "epoch": 46.58, "learning_rate": 0.001, "loss": 2.482, "step": 242592 }, { "epoch": 46.58, "learning_rate": 0.001, "loss": 2.483, "step": 242604 }, { "epoch": 46.59, "learning_rate": 0.001, "loss": 2.4836, "step": 242616 }, { "epoch": 46.59, "learning_rate": 0.001, "loss": 2.4898, "step": 242628 }, { "epoch": 46.59, "learning_rate": 0.001, "loss": 2.4835, "step": 242640 }, { "epoch": 46.59, "learning_rate": 0.001, "loss": 2.4845, "step": 242652 }, { "epoch": 46.59, "learning_rate": 0.001, "loss": 2.4779, "step": 242664 }, { "epoch": 46.6, "learning_rate": 0.001, "loss": 2.4756, "step": 242676 }, { "epoch": 46.6, "learning_rate": 0.001, "loss": 2.4767, "step": 242688 }, { "epoch": 46.6, "learning_rate": 0.001, "loss": 2.4724, "step": 242700 }, { "epoch": 46.6, "learning_rate": 0.001, "loss": 2.4944, "step": 242712 }, { "epoch": 46.61, "learning_rate": 0.001, "loss": 2.4861, "step": 242724 }, { "epoch": 46.61, "learning_rate": 0.001, "loss": 2.4831, "step": 242736 }, { "epoch": 46.61, "learning_rate": 0.001, "loss": 2.477, "step": 242748 }, { "epoch": 46.61, "learning_rate": 0.001, "loss": 2.4792, "step": 242760 }, { "epoch": 46.62, "learning_rate": 0.001, "loss": 2.4751, "step": 242772 }, { "epoch": 46.62, "learning_rate": 0.001, "loss": 2.4835, "step": 242784 }, { "epoch": 46.62, "learning_rate": 0.001, "loss": 2.4791, "step": 242796 }, { "epoch": 46.62, "learning_rate": 0.001, "loss": 2.4745, "step": 242808 }, { "epoch": 46.62, "learning_rate": 0.001, "loss": 2.4788, "step": 242820 }, { "epoch": 46.63, "learning_rate": 0.001, "loss": 2.4752, "step": 242832 }, { "epoch": 46.63, "learning_rate": 0.001, "loss": 2.4912, "step": 242844 }, { "epoch": 46.63, "learning_rate": 0.001, "loss": 2.4835, "step": 242856 }, { "epoch": 46.63, "learning_rate": 0.001, "loss": 2.4878, "step": 242868 }, { "epoch": 46.64, "learning_rate": 0.001, "loss": 2.4795, "step": 242880 }, { "epoch": 46.64, "learning_rate": 0.001, "loss": 2.4875, "step": 242892 }, { "epoch": 46.64, "learning_rate": 0.001, "loss": 2.4819, "step": 242904 }, { "epoch": 46.64, "learning_rate": 0.001, "loss": 2.4888, "step": 242916 }, { "epoch": 46.65, "learning_rate": 0.001, "loss": 2.4872, "step": 242928 }, { "epoch": 46.65, "learning_rate": 0.001, "loss": 2.4805, "step": 242940 }, { "epoch": 46.65, "learning_rate": 0.001, "loss": 2.4854, "step": 242952 }, { "epoch": 46.65, "learning_rate": 0.001, "loss": 2.4898, "step": 242964 }, { "epoch": 46.65, "learning_rate": 0.001, "loss": 2.4859, "step": 242976 }, { "epoch": 46.66, "learning_rate": 0.001, "loss": 2.4861, "step": 242988 }, { "epoch": 46.66, "learning_rate": 0.001, "loss": 2.4896, "step": 243000 }, { "epoch": 46.66, "learning_rate": 0.001, "loss": 2.4892, "step": 243012 }, { "epoch": 46.66, "learning_rate": 0.001, "loss": 2.4827, "step": 243024 }, { "epoch": 46.67, "learning_rate": 0.001, "loss": 2.4789, "step": 243036 }, { "epoch": 46.67, "learning_rate": 0.001, "loss": 2.4899, "step": 243048 }, { "epoch": 46.67, "learning_rate": 0.001, "loss": 2.4946, "step": 243060 }, { "epoch": 46.67, "learning_rate": 0.001, "loss": 2.4839, "step": 243072 }, { "epoch": 46.68, "learning_rate": 0.001, "loss": 2.4858, "step": 243084 }, { "epoch": 46.68, "learning_rate": 0.001, "loss": 2.4767, "step": 243096 }, { "epoch": 46.68, "learning_rate": 0.001, "loss": 2.4899, "step": 243108 }, { "epoch": 46.68, "learning_rate": 0.001, "loss": 2.4827, "step": 243120 }, { "epoch": 46.68, "eval_ag_news_accuracy": 0.330125, "eval_ag_news_bleu_score": 5.154653789908301, "eval_ag_news_bleu_score_sem": 0.17062236167367198, "eval_ag_news_emb_cos_sim": 0.8238040804862976, "eval_ag_news_emb_cos_sim_sem": 0.0062663980464112805, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.455888509750366, "eval_ag_news_n_ngrams_match_1": 14.568, "eval_ag_news_n_ngrams_match_2": 3.362, "eval_ag_news_n_ngrams_match_3": 0.984, "eval_ag_news_num_pred_words": 46.354, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.686429880462107, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3658949290313125, "eval_ag_news_runtime": 12.6369, "eval_ag_news_samples_per_second": 39.567, "eval_ag_news_steps_per_second": 0.079, "eval_ag_news_token_set_f1": 0.3628202448725998, "eval_ag_news_token_set_f1_sem": 0.004526394411412497, "eval_ag_news_token_set_precision": 0.3480184762274712, "eval_ag_news_token_set_recall": 0.393966522650403, "eval_ag_news_true_num_tokens": 56.09375, "step": 243125 }, { "epoch": 46.68, "eval_anthropic_toxic_prompts_accuracy": 0.11575, "eval_anthropic_toxic_prompts_bleu_score": 3.371620502622668, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1302202195352204, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6879521608352661, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008665812863780414, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.19242262840271, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.512, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.088, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782, "eval_anthropic_toxic_prompts_num_pred_words": 47.196, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.34734061081799, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22434746598187452, "eval_anthropic_toxic_prompts_runtime": 12.0603, "eval_anthropic_toxic_prompts_samples_per_second": 41.458, "eval_anthropic_toxic_prompts_steps_per_second": 0.083, "eval_anthropic_toxic_prompts_token_set_f1": 0.3681940245403478, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006311540758298674, "eval_anthropic_toxic_prompts_token_set_precision": 0.4585773723164939, "eval_anthropic_toxic_prompts_token_set_recall": 0.33606220631117817, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 243125 }, { "epoch": 46.68, "eval_arxiv_accuracy": 0.3543125, "eval_arxiv_bleu_score": 4.510159697244193, "eval_arxiv_bleu_score_sem": 0.13235250084794623, "eval_arxiv_emb_cos_sim": 0.7854233384132385, "eval_arxiv_emb_cos_sim_sem": 0.006893196757842185, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.31634259223938, "eval_arxiv_n_ngrams_match_1": 15.692, "eval_arxiv_n_ngrams_match_2": 3.098, "eval_arxiv_n_ngrams_match_3": 0.696, "eval_arxiv_num_pred_words": 40.282, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.559370153545746, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37622325720937877, "eval_arxiv_runtime": 17.2826, "eval_arxiv_samples_per_second": 28.931, "eval_arxiv_steps_per_second": 0.058, "eval_arxiv_token_set_f1": 0.3681895761925773, "eval_arxiv_token_set_f1_sem": 0.004325737464815855, "eval_arxiv_token_set_precision": 0.3205020006401268, "eval_arxiv_token_set_recall": 0.4510585337220203, "eval_arxiv_true_num_tokens": 64.0, "step": 243125 }, { "epoch": 46.68, "eval_python_code_alpaca_accuracy": 0.16128125, "eval_python_code_alpaca_bleu_score": 4.784843518504221, "eval_python_code_alpaca_bleu_score_sem": 0.15087232854001884, "eval_python_code_alpaca_emb_cos_sim": 0.7689688205718994, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008392794798749954, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.871361255645752, "eval_python_code_alpaca_n_ngrams_match_1": 9.99, "eval_python_code_alpaca_n_ngrams_match_2": 2.97, "eval_python_code_alpaca_n_ngrams_match_3": 1.002, "eval_python_code_alpaca_num_pred_words": 43.066, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.66104303869998, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.345981739899315, "eval_python_code_alpaca_runtime": 11.2119, "eval_python_code_alpaca_samples_per_second": 44.595, "eval_python_code_alpaca_steps_per_second": 0.089, "eval_python_code_alpaca_token_set_f1": 0.48245358037137476, "eval_python_code_alpaca_token_set_f1_sem": 0.005309595032016044, "eval_python_code_alpaca_token_set_precision": 0.5453286615089248, "eval_python_code_alpaca_token_set_recall": 0.45740623444184725, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 243125 }, { "epoch": 46.68, "eval_wikibio_accuracy": 0.33078125, "eval_wikibio_bleu_score": 6.225084517037956, "eval_wikibio_bleu_score_sem": 0.22225109490955056, "eval_wikibio_emb_cos_sim": 0.7402669191360474, "eval_wikibio_emb_cos_sim_sem": 0.009899571633339242, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.596177339553833, "eval_wikibio_n_ngrams_match_1": 10.168, "eval_wikibio_n_ngrams_match_2": 3.43, "eval_wikibio_n_ngrams_match_3": 1.29, "eval_wikibio_num_pred_words": 35.382, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.45859888018138, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36033282229314456, "eval_wikibio_runtime": 13.1564, "eval_wikibio_samples_per_second": 38.004, "eval_wikibio_steps_per_second": 0.076, "eval_wikibio_token_set_f1": 0.32314003112975126, "eval_wikibio_token_set_f1_sem": 0.0055351492160330236, "eval_wikibio_token_set_precision": 0.32881593414703353, "eval_wikibio_token_set_recall": 0.3366387826826832, "eval_wikibio_true_num_tokens": 61.1328125, "step": 243125 }, { "epoch": 46.68, "eval_nq_accuracy": 0.53953125, "eval_nq_bleu_score": 12.41028290681622, "eval_nq_bleu_score_sem": 0.49611651861096845, "eval_nq_emb_cos_sim": 0.8431179523468018, "eval_nq_emb_cos_sim_sem": 0.006613498417322748, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1034491062164307, "eval_nq_n_ngrams_match_1": 23.65, "eval_nq_n_ngrams_match_2": 8.892, "eval_nq_n_ngrams_match_3": 4.186, "eval_nq_num_pred_words": 49.044, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.194384529612169, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45980285574893875, "eval_nq_runtime": 19.4993, "eval_nq_samples_per_second": 25.642, "eval_nq_steps_per_second": 0.051, "eval_nq_token_set_f1": 0.47188570265152296, "eval_nq_token_set_f1_sem": 0.0048763636092038695, "eval_nq_token_set_precision": 0.43007271010702747, "eval_nq_token_set_recall": 0.5305909207414481, "eval_nq_true_num_tokens": 64.0, "step": 243125 }, { "epoch": 46.68, "learning_rate": 0.001, "loss": 2.4939, "step": 243132 }, { "epoch": 46.69, "learning_rate": 0.001, "loss": 2.4927, "step": 243144 }, { "epoch": 46.69, "learning_rate": 0.001, "loss": 2.4889, "step": 243156 }, { "epoch": 46.69, "learning_rate": 0.001, "loss": 2.4787, "step": 243168 }, { "epoch": 46.69, "learning_rate": 0.001, "loss": 2.4834, "step": 243180 }, { "epoch": 46.7, "learning_rate": 0.001, "loss": 2.4825, "step": 243192 }, { "epoch": 46.7, "learning_rate": 0.001, "loss": 2.4922, "step": 243204 }, { "epoch": 46.7, "learning_rate": 0.001, "loss": 2.4847, "step": 243216 }, { "epoch": 46.7, "learning_rate": 0.001, "loss": 2.4896, "step": 243228 }, { "epoch": 46.71, "learning_rate": 0.001, "loss": 2.4918, "step": 243240 }, { "epoch": 46.71, "learning_rate": 0.001, "loss": 2.4748, "step": 243252 }, { "epoch": 46.71, "learning_rate": 0.001, "loss": 2.4825, "step": 243264 }, { "epoch": 46.71, "learning_rate": 0.001, "loss": 2.4925, "step": 243276 }, { "epoch": 46.71, "learning_rate": 0.001, "loss": 2.4791, "step": 243288 }, { "epoch": 46.72, "learning_rate": 0.001, "loss": 2.4813, "step": 243300 }, { "epoch": 46.72, "learning_rate": 0.001, "loss": 2.4867, "step": 243312 }, { "epoch": 46.72, "learning_rate": 0.001, "loss": 2.481, "step": 243324 }, { "epoch": 46.72, "learning_rate": 0.001, "loss": 2.4787, "step": 243336 }, { "epoch": 46.73, "learning_rate": 0.001, "loss": 2.4921, "step": 243348 }, { "epoch": 46.73, "learning_rate": 0.001, "loss": 2.4836, "step": 243360 }, { "epoch": 46.73, "learning_rate": 0.001, "loss": 2.4839, "step": 243372 }, { "epoch": 46.73, "learning_rate": 0.001, "loss": 2.4786, "step": 243384 }, { "epoch": 46.74, "learning_rate": 0.001, "loss": 2.4796, "step": 243396 }, { "epoch": 46.74, "learning_rate": 0.001, "loss": 2.4804, "step": 243408 }, { "epoch": 46.74, "learning_rate": 0.001, "loss": 2.4856, "step": 243420 }, { "epoch": 46.74, "learning_rate": 0.001, "loss": 2.4831, "step": 243432 }, { "epoch": 46.74, "learning_rate": 0.001, "loss": 2.4828, "step": 243444 }, { "epoch": 46.75, "learning_rate": 0.001, "loss": 2.4974, "step": 243456 }, { "epoch": 46.75, "learning_rate": 0.001, "loss": 2.4954, "step": 243468 }, { "epoch": 46.75, "learning_rate": 0.001, "loss": 2.4937, "step": 243480 }, { "epoch": 46.75, "learning_rate": 0.001, "loss": 2.4861, "step": 243492 }, { "epoch": 46.76, "learning_rate": 0.001, "loss": 2.4867, "step": 243504 }, { "epoch": 46.76, "learning_rate": 0.001, "loss": 2.4847, "step": 243516 }, { "epoch": 46.76, "learning_rate": 0.001, "loss": 2.4838, "step": 243528 }, { "epoch": 46.76, "learning_rate": 0.001, "loss": 2.4762, "step": 243540 }, { "epoch": 46.76, "learning_rate": 0.001, "loss": 2.4906, "step": 243552 }, { "epoch": 46.77, "learning_rate": 0.001, "loss": 2.4871, "step": 243564 }, { "epoch": 46.77, "learning_rate": 0.001, "loss": 2.4832, "step": 243576 }, { "epoch": 46.77, "learning_rate": 0.001, "loss": 2.4921, "step": 243588 }, { "epoch": 46.77, "learning_rate": 0.001, "loss": 2.4901, "step": 243600 }, { "epoch": 46.78, "learning_rate": 0.001, "loss": 2.4886, "step": 243612 }, { "epoch": 46.78, "learning_rate": 0.001, "loss": 2.4906, "step": 243624 }, { "epoch": 46.78, "learning_rate": 0.001, "loss": 2.485, "step": 243636 }, { "epoch": 46.78, "learning_rate": 0.001, "loss": 2.4867, "step": 243648 }, { "epoch": 46.79, "learning_rate": 0.001, "loss": 2.4829, "step": 243660 }, { "epoch": 46.79, "learning_rate": 0.001, "loss": 2.4877, "step": 243672 }, { "epoch": 46.79, "learning_rate": 0.001, "loss": 2.4855, "step": 243684 }, { "epoch": 46.79, "learning_rate": 0.001, "loss": 2.495, "step": 243696 }, { "epoch": 46.79, "learning_rate": 0.001, "loss": 2.4922, "step": 243708 }, { "epoch": 46.8, "learning_rate": 0.001, "loss": 2.4904, "step": 243720 }, { "epoch": 46.8, "learning_rate": 0.001, "loss": 2.489, "step": 243732 }, { "epoch": 46.8, "learning_rate": 0.001, "loss": 2.4894, "step": 243744 }, { "epoch": 46.8, "eval_ag_news_accuracy": 0.33053125, "eval_ag_news_bleu_score": 5.039099309980454, "eval_ag_news_bleu_score_sem": 0.16045699538588595, "eval_ag_news_emb_cos_sim": 0.8177430033683777, "eval_ag_news_emb_cos_sim_sem": 0.007100475617363269, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.455599546432495, "eval_ag_news_n_ngrams_match_1": 14.558, "eval_ag_news_n_ngrams_match_2": 3.358, "eval_ag_news_n_ngrams_match_3": 0.954, "eval_ag_news_num_pred_words": 46.75, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.677274987330208, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36112618782361333, "eval_ag_news_runtime": 30.2062, "eval_ag_news_samples_per_second": 16.553, "eval_ag_news_steps_per_second": 0.033, "eval_ag_news_token_set_f1": 0.3601579649836729, "eval_ag_news_token_set_f1_sem": 0.004504752513229392, "eval_ag_news_token_set_precision": 0.34783631246687213, "eval_ag_news_token_set_recall": 0.388737858799268, "eval_ag_news_true_num_tokens": 56.09375, "step": 243750 }, { "epoch": 46.8, "eval_anthropic_toxic_prompts_accuracy": 0.1168125, "eval_anthropic_toxic_prompts_bleu_score": 3.2752443221500096, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12777298422486882, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6866858005523682, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009372066287188448, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1995556354522705, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.398, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.014, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778, "eval_anthropic_toxic_prompts_num_pred_words": 47.552, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.52163123215981, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2185410184506723, "eval_anthropic_toxic_prompts_runtime": 36.0892, "eval_anthropic_toxic_prompts_samples_per_second": 13.855, "eval_anthropic_toxic_prompts_steps_per_second": 0.028, "eval_anthropic_toxic_prompts_token_set_f1": 0.36072478194335755, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006357141499689125, "eval_anthropic_toxic_prompts_token_set_precision": 0.4523530944084032, "eval_anthropic_toxic_prompts_token_set_recall": 0.3261110783356425, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 243750 }, { "epoch": 46.8, "eval_arxiv_accuracy": 0.35609375, "eval_arxiv_bleu_score": 4.447450755461512, "eval_arxiv_bleu_score_sem": 0.124638000482428, "eval_arxiv_emb_cos_sim": 0.780614972114563, "eval_arxiv_emb_cos_sim_sem": 0.00677338709696038, "eval_arxiv_emb_top1_equal": 0.3515625, "eval_arxiv_emb_top1_equal_sem": 0.04236756101983345, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3115153312683105, "eval_arxiv_n_ngrams_match_1": 15.756, "eval_arxiv_n_ngrams_match_2": 3.092, "eval_arxiv_n_ngrams_match_3": 0.662, "eval_arxiv_num_pred_words": 41.334, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.426654465962347, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3740744791341803, "eval_arxiv_runtime": 30.6156, "eval_arxiv_samples_per_second": 16.332, "eval_arxiv_steps_per_second": 0.033, "eval_arxiv_token_set_f1": 0.36621881777434234, "eval_arxiv_token_set_f1_sem": 0.004211189609961602, "eval_arxiv_token_set_precision": 0.3213277691125244, "eval_arxiv_token_set_recall": 0.4422843764241028, "eval_arxiv_true_num_tokens": 64.0, "step": 243750 }, { "epoch": 46.8, "eval_python_code_alpaca_accuracy": 0.16175, "eval_python_code_alpaca_bleu_score": 4.742927892019021, "eval_python_code_alpaca_bleu_score_sem": 0.14746088091806286, "eval_python_code_alpaca_emb_cos_sim": 0.7770613431930542, "eval_python_code_alpaca_emb_cos_sim_sem": 0.0074594903755131605, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8479361534118652, "eval_python_code_alpaca_n_ngrams_match_1": 9.962, "eval_python_code_alpaca_n_ngrams_match_2": 2.916, "eval_python_code_alpaca_n_ngrams_match_3": 0.974, "eval_python_code_alpaca_num_pred_words": 42.832, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.252139304014797, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3460431630138763, "eval_python_code_alpaca_runtime": 30.1762, "eval_python_code_alpaca_samples_per_second": 16.569, "eval_python_code_alpaca_steps_per_second": 0.033, "eval_python_code_alpaca_token_set_f1": 0.48362512000929325, "eval_python_code_alpaca_token_set_f1_sem": 0.005489204647118722, "eval_python_code_alpaca_token_set_precision": 0.5414038521108293, "eval_python_code_alpaca_token_set_recall": 0.46068559773746326, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 243750 }, { "epoch": 46.8, "eval_wikibio_accuracy": 0.33084375, "eval_wikibio_bleu_score": 6.234119576658955, "eval_wikibio_bleu_score_sem": 0.2183858152906804, "eval_wikibio_emb_cos_sim": 0.7522262334823608, "eval_wikibio_emb_cos_sim_sem": 0.008664405554166855, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.605022668838501, "eval_wikibio_n_ngrams_match_1": 10.374, "eval_wikibio_n_ngrams_match_2": 3.548, "eval_wikibio_n_ngrams_match_3": 1.338, "eval_wikibio_num_pred_words": 36.764, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.78251766474602, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3669585286582726, "eval_wikibio_runtime": 29.993, "eval_wikibio_samples_per_second": 16.671, "eval_wikibio_steps_per_second": 0.033, "eval_wikibio_token_set_f1": 0.32738013850868886, "eval_wikibio_token_set_f1_sem": 0.005082182857883322, "eval_wikibio_token_set_precision": 0.3353376015714099, "eval_wikibio_token_set_recall": 0.3359335564814072, "eval_wikibio_true_num_tokens": 61.1328125, "step": 243750 }, { "epoch": 46.8, "eval_nq_accuracy": 0.537875, "eval_nq_bleu_score": 12.316738043884445, "eval_nq_bleu_score_sem": 0.49831910958334596, "eval_nq_emb_cos_sim": 0.8426940441131592, "eval_nq_emb_cos_sim_sem": 0.0066900482894751674, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.108543634414673, "eval_nq_n_ngrams_match_1": 23.73, "eval_nq_n_ngrams_match_2": 8.872, "eval_nq_n_ngrams_match_3": 4.14, "eval_nq_num_pred_words": 49.178, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.236237572898242, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4599833752389065, "eval_nq_runtime": 31.5153, "eval_nq_samples_per_second": 15.865, "eval_nq_steps_per_second": 0.032, "eval_nq_token_set_f1": 0.4748004010561954, "eval_nq_token_set_f1_sem": 0.004913855745807401, "eval_nq_token_set_precision": 0.43466219557873326, "eval_nq_token_set_recall": 0.5296665214234823, "eval_nq_true_num_tokens": 64.0, "step": 243750 }, { "epoch": 46.8, "learning_rate": 0.001, "loss": 2.4834, "step": 243756 }, { "epoch": 46.81, "learning_rate": 0.001, "loss": 2.4832, "step": 243768 }, { "epoch": 46.81, "learning_rate": 0.001, "loss": 2.4876, "step": 243780 }, { "epoch": 46.81, "learning_rate": 0.001, "loss": 2.4906, "step": 243792 }, { "epoch": 46.81, "learning_rate": 0.001, "loss": 2.4883, "step": 243804 }, { "epoch": 46.82, "learning_rate": 0.001, "loss": 2.4798, "step": 243816 }, { "epoch": 46.82, "learning_rate": 0.001, "loss": 2.4865, "step": 243828 }, { "epoch": 46.82, "learning_rate": 0.001, "loss": 2.4895, "step": 243840 }, { "epoch": 46.82, "learning_rate": 0.001, "loss": 2.4926, "step": 243852 }, { "epoch": 46.82, "learning_rate": 0.001, "loss": 2.4904, "step": 243864 }, { "epoch": 46.83, "learning_rate": 0.001, "loss": 2.4766, "step": 243876 }, { "epoch": 46.83, "learning_rate": 0.001, "loss": 2.4922, "step": 243888 }, { "epoch": 46.83, "learning_rate": 0.001, "loss": 2.4868, "step": 243900 }, { "epoch": 46.83, "learning_rate": 0.001, "loss": 2.4857, "step": 243912 }, { "epoch": 46.84, "learning_rate": 0.001, "loss": 2.4863, "step": 243924 }, { "epoch": 46.84, "learning_rate": 0.001, "loss": 2.4835, "step": 243936 }, { "epoch": 46.84, "learning_rate": 0.001, "loss": 2.4939, "step": 243948 }, { "epoch": 46.84, "learning_rate": 0.001, "loss": 2.4842, "step": 243960 }, { "epoch": 46.85, "learning_rate": 0.001, "loss": 2.4925, "step": 243972 }, { "epoch": 46.85, "learning_rate": 0.001, "loss": 2.4912, "step": 243984 }, { "epoch": 46.85, "learning_rate": 0.001, "loss": 2.4826, "step": 243996 }, { "epoch": 46.85, "learning_rate": 0.001, "loss": 2.4827, "step": 244008 }, { "epoch": 46.85, "learning_rate": 0.001, "loss": 2.488, "step": 244020 }, { "epoch": 46.86, "learning_rate": 0.001, "loss": 2.4954, "step": 244032 }, { "epoch": 46.86, "learning_rate": 0.001, "loss": 2.4892, "step": 244044 }, { "epoch": 46.86, "learning_rate": 0.001, "loss": 2.4888, "step": 244056 }, { "epoch": 46.86, "learning_rate": 0.001, "loss": 2.4952, "step": 244068 }, { "epoch": 46.87, "learning_rate": 0.001, "loss": 2.486, "step": 244080 }, { "epoch": 46.87, "learning_rate": 0.001, "loss": 2.4956, "step": 244092 }, { "epoch": 46.87, "learning_rate": 0.001, "loss": 2.4867, "step": 244104 }, { "epoch": 46.87, "learning_rate": 0.001, "loss": 2.4893, "step": 244116 }, { "epoch": 46.88, "learning_rate": 0.001, "loss": 2.4954, "step": 244128 }, { "epoch": 46.88, "learning_rate": 0.001, "loss": 2.4833, "step": 244140 }, { "epoch": 46.88, "learning_rate": 0.001, "loss": 2.4867, "step": 244152 }, { "epoch": 46.88, "learning_rate": 0.001, "loss": 2.4971, "step": 244164 }, { "epoch": 46.88, "learning_rate": 0.001, "loss": 2.4939, "step": 244176 }, { "epoch": 46.89, "learning_rate": 0.001, "loss": 2.4841, "step": 244188 }, { "epoch": 46.89, "learning_rate": 0.001, "loss": 2.4903, "step": 244200 }, { "epoch": 46.89, "learning_rate": 0.001, "loss": 2.484, "step": 244212 }, { "epoch": 46.89, "learning_rate": 0.001, "loss": 2.4953, "step": 244224 }, { "epoch": 46.9, "learning_rate": 0.001, "loss": 2.4888, "step": 244236 }, { "epoch": 46.9, "learning_rate": 0.001, "loss": 2.4931, "step": 244248 }, { "epoch": 46.9, "learning_rate": 0.001, "loss": 2.4889, "step": 244260 }, { "epoch": 46.9, "learning_rate": 0.001, "loss": 2.4815, "step": 244272 }, { "epoch": 46.91, "learning_rate": 0.001, "loss": 2.4951, "step": 244284 }, { "epoch": 46.91, "learning_rate": 0.001, "loss": 2.4974, "step": 244296 }, { "epoch": 46.91, "learning_rate": 0.001, "loss": 2.4949, "step": 244308 }, { "epoch": 46.91, "learning_rate": 0.001, "loss": 2.4917, "step": 244320 }, { "epoch": 46.91, "learning_rate": 0.001, "loss": 2.4793, "step": 244332 }, { "epoch": 46.92, "learning_rate": 0.001, "loss": 2.4944, "step": 244344 }, { "epoch": 46.92, "learning_rate": 0.001, "loss": 2.4885, "step": 244356 }, { "epoch": 46.92, "learning_rate": 0.001, "loss": 2.4919, "step": 244368 }, { "epoch": 46.92, "eval_ag_news_accuracy": 0.3298125, "eval_ag_news_bleu_score": 4.962390928750171, "eval_ag_news_bleu_score_sem": 0.1577795556939404, "eval_ag_news_emb_cos_sim": 0.8230937719345093, "eval_ag_news_emb_cos_sim_sem": 0.006215335822351316, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.45223331451416, "eval_ag_news_n_ngrams_match_1": 14.468, "eval_ag_news_n_ngrams_match_2": 3.23, "eval_ag_news_n_ngrams_match_3": 0.89, "eval_ag_news_num_pred_words": 46.366, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.570821207760844, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.363137736754119, "eval_ag_news_runtime": 37.4267, "eval_ag_news_samples_per_second": 13.359, "eval_ag_news_steps_per_second": 0.027, "eval_ag_news_token_set_f1": 0.3607285461914485, "eval_ag_news_token_set_f1_sem": 0.004437250615944355, "eval_ag_news_token_set_precision": 0.3469920166092059, "eval_ag_news_token_set_recall": 0.3902508826310556, "eval_ag_news_true_num_tokens": 56.09375, "step": 244375 }, { "epoch": 46.92, "eval_anthropic_toxic_prompts_accuracy": 0.11771875, "eval_anthropic_toxic_prompts_bleu_score": 3.2925499486339223, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1278967785829679, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6840674877166748, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009150309326659363, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.173318386077881, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.468, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.026, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.788, "eval_anthropic_toxic_prompts_num_pred_words": 47.686, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.886618006451442, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2183830024605264, "eval_anthropic_toxic_prompts_runtime": 30.6044, "eval_anthropic_toxic_prompts_samples_per_second": 16.338, "eval_anthropic_toxic_prompts_steps_per_second": 0.033, "eval_anthropic_toxic_prompts_token_set_f1": 0.3618598421796209, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006626758408281386, "eval_anthropic_toxic_prompts_token_set_precision": 0.45160102785618533, "eval_anthropic_toxic_prompts_token_set_recall": 0.32704402421459855, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 244375 }, { "epoch": 46.92, "eval_arxiv_accuracy": 0.35521875, "eval_arxiv_bleu_score": 4.485724999979893, "eval_arxiv_bleu_score_sem": 0.131572420176096, "eval_arxiv_emb_cos_sim": 0.7817816734313965, "eval_arxiv_emb_cos_sim_sem": 0.007995185381165228, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.310394287109375, "eval_arxiv_n_ngrams_match_1": 15.606, "eval_arxiv_n_ngrams_match_2": 3.094, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 40.19, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.395925202822838, "eval_arxiv_pred_num_tokens": 62.9140625, "eval_arxiv_rouge_score": 0.3742979304523928, "eval_arxiv_runtime": 29.2873, "eval_arxiv_samples_per_second": 17.072, "eval_arxiv_steps_per_second": 0.034, "eval_arxiv_token_set_f1": 0.36926164088446767, "eval_arxiv_token_set_f1_sem": 0.004198561910363197, "eval_arxiv_token_set_precision": 0.3211493159963727, "eval_arxiv_token_set_recall": 0.4571534011221035, "eval_arxiv_true_num_tokens": 64.0, "step": 244375 }, { "epoch": 46.92, "eval_python_code_alpaca_accuracy": 0.16421875, "eval_python_code_alpaca_bleu_score": 4.869395686517465, "eval_python_code_alpaca_bleu_score_sem": 0.16228264601249945, "eval_python_code_alpaca_emb_cos_sim": 0.7633101940155029, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00776597633098381, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.843024492263794, "eval_python_code_alpaca_n_ngrams_match_1": 10.09, "eval_python_code_alpaca_n_ngrams_match_2": 3.028, "eval_python_code_alpaca_n_ngrams_match_3": 1.034, "eval_python_code_alpaca_num_pred_words": 43.308, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.16761040027448, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3443562091412039, "eval_python_code_alpaca_runtime": 30.5049, "eval_python_code_alpaca_samples_per_second": 16.391, "eval_python_code_alpaca_steps_per_second": 0.033, "eval_python_code_alpaca_token_set_f1": 0.48226221929118135, "eval_python_code_alpaca_token_set_f1_sem": 0.005672199341244246, "eval_python_code_alpaca_token_set_precision": 0.5531533212175661, "eval_python_code_alpaca_token_set_recall": 0.45301052435937644, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 244375 }, { "epoch": 46.92, "eval_wikibio_accuracy": 0.33259375, "eval_wikibio_bleu_score": 6.365955373794853, "eval_wikibio_bleu_score_sem": 0.22052584561828015, "eval_wikibio_emb_cos_sim": 0.7567028999328613, "eval_wikibio_emb_cos_sim_sem": 0.008532059181486583, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.593489408493042, "eval_wikibio_n_ngrams_match_1": 10.31, "eval_wikibio_n_ngrams_match_2": 3.572, "eval_wikibio_n_ngrams_match_3": 1.39, "eval_wikibio_num_pred_words": 36.002, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.36073226809527, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36272023720448, "eval_wikibio_runtime": 21.9374, "eval_wikibio_samples_per_second": 22.792, "eval_wikibio_steps_per_second": 0.046, "eval_wikibio_token_set_f1": 0.328821071063146, "eval_wikibio_token_set_f1_sem": 0.005372802226609237, "eval_wikibio_token_set_precision": 0.3361162671775071, "eval_wikibio_token_set_recall": 0.3371563485334888, "eval_wikibio_true_num_tokens": 61.1328125, "step": 244375 }, { "epoch": 46.92, "eval_nq_accuracy": 0.53909375, "eval_nq_bleu_score": 12.432785528827589, "eval_nq_bleu_score_sem": 0.5016220057976496, "eval_nq_emb_cos_sim": 0.8387429714202881, "eval_nq_emb_cos_sim_sem": 0.0078774692332743, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1078877449035645, "eval_nq_n_ngrams_match_1": 23.53, "eval_nq_n_ngrams_match_2": 8.954, "eval_nq_n_ngrams_match_3": 4.226, "eval_nq_num_pred_words": 48.91, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.230837282253772, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4560999104233717, "eval_nq_runtime": 30.6294, "eval_nq_samples_per_second": 16.324, "eval_nq_steps_per_second": 0.033, "eval_nq_token_set_f1": 0.47177922781546106, "eval_nq_token_set_f1_sem": 0.005179308555629063, "eval_nq_token_set_precision": 0.4288001810334077, "eval_nq_token_set_recall": 0.5334257556443286, "eval_nq_true_num_tokens": 64.0, "step": 244375 }, { "epoch": 46.92, "learning_rate": 0.001, "loss": 2.4909, "step": 244380 }, { "epoch": 46.93, "learning_rate": 0.001, "loss": 2.4854, "step": 244392 }, { "epoch": 46.93, "learning_rate": 0.001, "loss": 2.4934, "step": 244404 }, { "epoch": 46.93, "learning_rate": 0.001, "loss": 2.4883, "step": 244416 }, { "epoch": 46.93, "learning_rate": 0.001, "loss": 2.498, "step": 244428 }, { "epoch": 46.94, "learning_rate": 0.001, "loss": 2.4932, "step": 244440 }, { "epoch": 46.94, "learning_rate": 0.001, "loss": 2.4969, "step": 244452 }, { "epoch": 46.94, "learning_rate": 0.001, "loss": 2.4804, "step": 244464 }, { "epoch": 46.94, "learning_rate": 0.001, "loss": 2.4866, "step": 244476 }, { "epoch": 46.94, "learning_rate": 0.001, "loss": 2.5013, "step": 244488 }, { "epoch": 46.95, "learning_rate": 0.001, "loss": 2.4904, "step": 244500 }, { "epoch": 46.95, "learning_rate": 0.001, "loss": 2.4934, "step": 244512 }, { "epoch": 46.95, "learning_rate": 0.001, "loss": 2.4943, "step": 244524 }, { "epoch": 46.95, "learning_rate": 0.001, "loss": 2.4896, "step": 244536 }, { "epoch": 46.96, "learning_rate": 0.001, "loss": 2.4854, "step": 244548 }, { "epoch": 46.96, "learning_rate": 0.001, "loss": 2.4825, "step": 244560 }, { "epoch": 46.96, "learning_rate": 0.001, "loss": 2.4863, "step": 244572 }, { "epoch": 46.96, "learning_rate": 0.001, "loss": 2.4822, "step": 244584 }, { "epoch": 46.97, "learning_rate": 0.001, "loss": 2.4876, "step": 244596 }, { "epoch": 46.97, "learning_rate": 0.001, "loss": 2.4962, "step": 244608 }, { "epoch": 46.97, "learning_rate": 0.001, "loss": 2.4872, "step": 244620 }, { "epoch": 46.97, "learning_rate": 0.001, "loss": 2.4914, "step": 244632 }, { "epoch": 46.97, "learning_rate": 0.001, "loss": 2.4895, "step": 244644 }, { "epoch": 46.98, "learning_rate": 0.001, "loss": 2.4863, "step": 244656 }, { "epoch": 46.98, "learning_rate": 0.001, "loss": 2.4862, "step": 244668 }, { "epoch": 46.98, "learning_rate": 0.001, "loss": 2.4899, "step": 244680 }, { "epoch": 46.98, "learning_rate": 0.001, "loss": 2.486, "step": 244692 }, { "epoch": 46.99, "learning_rate": 0.001, "loss": 2.487, "step": 244704 }, { "epoch": 46.99, "learning_rate": 0.001, "loss": 2.4824, "step": 244716 }, { "epoch": 46.99, "learning_rate": 0.001, "loss": 2.4943, "step": 244728 }, { "epoch": 46.99, "learning_rate": 0.001, "loss": 2.4822, "step": 244740 }, { "epoch": 47.0, "learning_rate": 0.001, "loss": 2.4839, "step": 244752 }, { "epoch": 47.0, "learning_rate": 0.001, "loss": 2.4874, "step": 244764 }, { "epoch": 47.0, "learning_rate": 0.001, "loss": 2.4821, "step": 244776 }, { "epoch": 47.0, "learning_rate": 0.001, "loss": 2.4794, "step": 244788 }, { "epoch": 47.0, "learning_rate": 0.001, "loss": 2.4753, "step": 244800 }, { "epoch": 47.01, "learning_rate": 0.001, "loss": 2.4778, "step": 244812 }, { "epoch": 47.01, "learning_rate": 0.001, "loss": 2.4731, "step": 244824 }, { "epoch": 47.01, "learning_rate": 0.001, "loss": 2.4642, "step": 244836 }, { "epoch": 47.01, "learning_rate": 0.001, "loss": 2.4765, "step": 244848 }, { "epoch": 47.02, "learning_rate": 0.001, "loss": 2.4793, "step": 244860 }, { "epoch": 47.02, "learning_rate": 0.001, "loss": 2.4792, "step": 244872 }, { "epoch": 47.02, "learning_rate": 0.001, "loss": 2.4795, "step": 244884 }, { "epoch": 47.02, "learning_rate": 0.001, "loss": 2.4733, "step": 244896 }, { "epoch": 47.03, "learning_rate": 0.001, "loss": 2.4778, "step": 244908 }, { "epoch": 47.03, "learning_rate": 0.001, "loss": 2.4744, "step": 244920 }, { "epoch": 47.03, "learning_rate": 0.001, "loss": 2.4844, "step": 244932 }, { "epoch": 47.03, "learning_rate": 0.001, "loss": 2.4685, "step": 244944 }, { "epoch": 47.03, "learning_rate": 0.001, "loss": 2.4761, "step": 244956 }, { "epoch": 47.04, "learning_rate": 0.001, "loss": 2.4747, "step": 244968 }, { "epoch": 47.04, "learning_rate": 0.001, "loss": 2.4768, "step": 244980 }, { "epoch": 47.04, "learning_rate": 0.001, "loss": 2.4701, "step": 244992 }, { "epoch": 47.04, "eval_ag_news_accuracy": 0.33209375, "eval_ag_news_bleu_score": 4.925295757937099, "eval_ag_news_bleu_score_sem": 0.15157324942359807, "eval_ag_news_emb_cos_sim": 0.8138347864151001, "eval_ag_news_emb_cos_sim_sem": 0.00782541985099687, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.44590425491333, "eval_ag_news_n_ngrams_match_1": 14.496, "eval_ag_news_n_ngrams_match_2": 3.304, "eval_ag_news_n_ngrams_match_3": 0.908, "eval_ag_news_num_pred_words": 46.722, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.37163858292946, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.35854532559491736, "eval_ag_news_runtime": 12.0986, "eval_ag_news_samples_per_second": 41.327, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.35947735744225884, "eval_ag_news_token_set_f1_sem": 0.004478944389067876, "eval_ag_news_token_set_precision": 0.3464737878193696, "eval_ag_news_token_set_recall": 0.38848512375453, "eval_ag_news_true_num_tokens": 56.09375, "step": 245000 }, { "epoch": 47.04, "eval_anthropic_toxic_prompts_accuracy": 0.11784375, "eval_anthropic_toxic_prompts_bleu_score": 3.435449649711825, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12565953244350866, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6939759850502014, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008376048011911953, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1880574226379395, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.574, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.122, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.816, "eval_anthropic_toxic_prompts_num_pred_words": 47.346, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.24129109161105, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.225962911332243, "eval_anthropic_toxic_prompts_runtime": 12.0806, "eval_anthropic_toxic_prompts_samples_per_second": 41.389, "eval_anthropic_toxic_prompts_steps_per_second": 0.083, "eval_anthropic_toxic_prompts_token_set_f1": 0.3658123598298576, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006262961764184194, "eval_anthropic_toxic_prompts_token_set_precision": 0.4649568963644189, "eval_anthropic_toxic_prompts_token_set_recall": 0.32727427145892146, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 245000 }, { "epoch": 47.04, "eval_arxiv_accuracy": 0.35521875, "eval_arxiv_bleu_score": 4.5693387377181995, "eval_arxiv_bleu_score_sem": 0.13515266858895042, "eval_arxiv_emb_cos_sim": 0.7914153933525085, "eval_arxiv_emb_cos_sim_sem": 0.006605425320199388, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.315833806991577, "eval_arxiv_n_ngrams_match_1": 15.798, "eval_arxiv_n_ngrams_match_2": 3.154, "eval_arxiv_n_ngrams_match_3": 0.706, "eval_arxiv_num_pred_words": 41.268, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.545351919010752, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3768625888130388, "eval_arxiv_runtime": 11.8295, "eval_arxiv_samples_per_second": 42.267, "eval_arxiv_steps_per_second": 0.085, "eval_arxiv_token_set_f1": 0.37089241852354093, "eval_arxiv_token_set_f1_sem": 0.003965028715257392, "eval_arxiv_token_set_precision": 0.3237756862481294, "eval_arxiv_token_set_recall": 0.4496472154634021, "eval_arxiv_true_num_tokens": 64.0, "step": 245000 }, { "epoch": 47.04, "eval_python_code_alpaca_accuracy": 0.16203125, "eval_python_code_alpaca_bleu_score": 4.6709153834448, "eval_python_code_alpaca_bleu_score_sem": 0.1489269565286898, "eval_python_code_alpaca_emb_cos_sim": 0.7627075910568237, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009097241834572592, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.865194797515869, "eval_python_code_alpaca_n_ngrams_match_1": 9.93, "eval_python_code_alpaca_n_ngrams_match_2": 2.962, "eval_python_code_alpaca_n_ngrams_match_3": 0.996, "eval_python_code_alpaca_num_pred_words": 43.102, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.55247204953714, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34162463797619924, "eval_python_code_alpaca_runtime": 11.6621, "eval_python_code_alpaca_samples_per_second": 42.874, "eval_python_code_alpaca_steps_per_second": 0.086, "eval_python_code_alpaca_token_set_f1": 0.478837769366088, "eval_python_code_alpaca_token_set_f1_sem": 0.0056724362378326055, "eval_python_code_alpaca_token_set_precision": 0.5454503489781254, "eval_python_code_alpaca_token_set_recall": 0.4472350972555811, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 245000 }, { "epoch": 47.04, "eval_wikibio_accuracy": 0.33003125, "eval_wikibio_bleu_score": 6.144214884836507, "eval_wikibio_bleu_score_sem": 0.21037107462738655, "eval_wikibio_emb_cos_sim": 0.7528545260429382, "eval_wikibio_emb_cos_sim_sem": 0.008942246140629271, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6246695518493652, "eval_wikibio_n_ngrams_match_1": 10.168, "eval_wikibio_n_ngrams_match_2": 3.412, "eval_wikibio_n_ngrams_match_3": 1.272, "eval_wikibio_num_pred_words": 36.168, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.512325232778636, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36010652868234394, "eval_wikibio_runtime": 14.9713, "eval_wikibio_samples_per_second": 33.397, "eval_wikibio_steps_per_second": 0.067, "eval_wikibio_token_set_f1": 0.3227942991028087, "eval_wikibio_token_set_f1_sem": 0.005350308801267859, "eval_wikibio_token_set_precision": 0.3317673437737749, "eval_wikibio_token_set_recall": 0.3311929048937341, "eval_wikibio_true_num_tokens": 61.1328125, "step": 245000 }, { "epoch": 47.04, "eval_nq_accuracy": 0.538875, "eval_nq_bleu_score": 12.48542827698002, "eval_nq_bleu_score_sem": 0.48766691134829954, "eval_nq_emb_cos_sim": 0.8438307046890259, "eval_nq_emb_cos_sim_sem": 0.0068573436191940815, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.105804681777954, "eval_nq_n_ngrams_match_1": 23.758, "eval_nq_n_ngrams_match_2": 8.998, "eval_nq_n_ngrams_match_3": 4.22, "eval_nq_num_pred_words": 49.148, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.21370977365225, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46308293267395306, "eval_nq_runtime": 33.8755, "eval_nq_samples_per_second": 14.76, "eval_nq_steps_per_second": 0.03, "eval_nq_token_set_f1": 0.47685870947935743, "eval_nq_token_set_f1_sem": 0.004842791199554861, "eval_nq_token_set_precision": 0.4353981588254932, "eval_nq_token_set_recall": 0.5363340850169116, "eval_nq_true_num_tokens": 64.0, "step": 245000 }, { "epoch": 47.04, "learning_rate": 0.001, "loss": 2.4765, "step": 245004 }, { "epoch": 47.05, "learning_rate": 0.001, "loss": 2.4742, "step": 245016 }, { "epoch": 47.05, "learning_rate": 0.001, "loss": 2.4928, "step": 245028 }, { "epoch": 47.05, "learning_rate": 0.001, "loss": 2.4864, "step": 245040 }, { "epoch": 47.05, "learning_rate": 0.001, "loss": 2.4788, "step": 245052 }, { "epoch": 47.06, "learning_rate": 0.001, "loss": 2.484, "step": 245064 }, { "epoch": 47.06, "learning_rate": 0.001, "loss": 2.472, "step": 245076 }, { "epoch": 47.06, "learning_rate": 0.001, "loss": 2.4775, "step": 245088 }, { "epoch": 47.06, "learning_rate": 0.001, "loss": 2.4782, "step": 245100 }, { "epoch": 47.06, "learning_rate": 0.001, "loss": 2.4788, "step": 245112 }, { "epoch": 47.07, "learning_rate": 0.001, "loss": 2.492, "step": 245124 }, { "epoch": 47.07, "learning_rate": 0.001, "loss": 2.4827, "step": 245136 }, { "epoch": 47.07, "learning_rate": 0.001, "loss": 2.491, "step": 245148 }, { "epoch": 47.07, "learning_rate": 0.001, "loss": 2.4885, "step": 245160 }, { "epoch": 47.08, "learning_rate": 0.001, "loss": 2.4849, "step": 245172 }, { "epoch": 47.08, "learning_rate": 0.001, "loss": 2.4836, "step": 245184 }, { "epoch": 47.08, "learning_rate": 0.001, "loss": 2.4819, "step": 245196 }, { "epoch": 47.08, "learning_rate": 0.001, "loss": 2.4778, "step": 245208 }, { "epoch": 47.09, "learning_rate": 0.001, "loss": 2.472, "step": 245220 }, { "epoch": 47.09, "learning_rate": 0.001, "loss": 2.4831, "step": 245232 }, { "epoch": 47.09, "learning_rate": 0.001, "loss": 2.4742, "step": 245244 }, { "epoch": 47.09, "learning_rate": 0.001, "loss": 2.4772, "step": 245256 }, { "epoch": 47.09, "learning_rate": 0.001, "loss": 2.4828, "step": 245268 }, { "epoch": 47.1, "learning_rate": 0.001, "loss": 2.4829, "step": 245280 }, { "epoch": 47.1, "learning_rate": 0.001, "loss": 2.484, "step": 245292 }, { "epoch": 47.1, "learning_rate": 0.001, "loss": 2.4862, "step": 245304 }, { "epoch": 47.1, "learning_rate": 0.001, "loss": 2.4848, "step": 245316 }, { "epoch": 47.11, "learning_rate": 0.001, "loss": 2.4852, "step": 245328 }, { "epoch": 47.11, "learning_rate": 0.001, "loss": 2.4808, "step": 245340 }, { "epoch": 47.11, "learning_rate": 0.001, "loss": 2.4773, "step": 245352 }, { "epoch": 47.11, "learning_rate": 0.001, "loss": 2.4742, "step": 245364 }, { "epoch": 47.12, "learning_rate": 0.001, "loss": 2.4823, "step": 245376 }, { "epoch": 47.12, "learning_rate": 0.001, "loss": 2.4869, "step": 245388 }, { "epoch": 47.12, "learning_rate": 0.001, "loss": 2.4801, "step": 245400 }, { "epoch": 47.12, "learning_rate": 0.001, "loss": 2.4886, "step": 245412 }, { "epoch": 47.12, "learning_rate": 0.001, "loss": 2.4841, "step": 245424 }, { "epoch": 47.13, "learning_rate": 0.001, "loss": 2.4803, "step": 245436 }, { "epoch": 47.13, "learning_rate": 0.001, "loss": 2.4855, "step": 245448 }, { "epoch": 47.13, "learning_rate": 0.001, "loss": 2.4774, "step": 245460 }, { "epoch": 47.13, "learning_rate": 0.001, "loss": 2.4741, "step": 245472 }, { "epoch": 47.14, "learning_rate": 0.001, "loss": 2.4874, "step": 245484 }, { "epoch": 47.14, "learning_rate": 0.001, "loss": 2.4789, "step": 245496 }, { "epoch": 47.14, "learning_rate": 0.001, "loss": 2.4755, "step": 245508 }, { "epoch": 47.14, "learning_rate": 0.001, "loss": 2.4747, "step": 245520 }, { "epoch": 47.15, "learning_rate": 0.001, "loss": 2.4892, "step": 245532 }, { "epoch": 47.15, "learning_rate": 0.001, "loss": 2.482, "step": 245544 }, { "epoch": 47.15, "learning_rate": 0.001, "loss": 2.494, "step": 245556 }, { "epoch": 47.15, "learning_rate": 0.001, "loss": 2.4795, "step": 245568 }, { "epoch": 47.15, "learning_rate": 0.001, "loss": 2.481, "step": 245580 }, { "epoch": 47.16, "learning_rate": 0.001, "loss": 2.4733, "step": 245592 }, { "epoch": 47.16, "learning_rate": 0.001, "loss": 2.4829, "step": 245604 }, { "epoch": 47.16, "learning_rate": 0.001, "loss": 2.4818, "step": 245616 }, { "epoch": 47.16, "eval_ag_news_accuracy": 0.32975, "eval_ag_news_bleu_score": 5.095891868333192, "eval_ag_news_bleu_score_sem": 0.15943742532720626, "eval_ag_news_emb_cos_sim": 0.8234723210334778, "eval_ag_news_emb_cos_sim_sem": 0.006856294557792865, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4530296325683594, "eval_ag_news_n_ngrams_match_1": 14.62, "eval_ag_news_n_ngrams_match_2": 3.358, "eval_ag_news_n_ngrams_match_3": 0.938, "eval_ag_news_num_pred_words": 46.804, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.595971635215168, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3638075014427663, "eval_ag_news_runtime": 34.6683, "eval_ag_news_samples_per_second": 14.422, "eval_ag_news_steps_per_second": 0.029, "eval_ag_news_token_set_f1": 0.361439056316256, "eval_ag_news_token_set_f1_sem": 0.00458129653332981, "eval_ag_news_token_set_precision": 0.3496636224836855, "eval_ag_news_token_set_recall": 0.38659686179655955, "eval_ag_news_true_num_tokens": 56.09375, "step": 245625 }, { "epoch": 47.16, "eval_anthropic_toxic_prompts_accuracy": 0.1173125, "eval_anthropic_toxic_prompts_bleu_score": 3.4667467996448904, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1355228854603887, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.693714439868927, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00836533480985609, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0546875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.020175758285348722, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2145566940307617, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.556, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.124, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.88, "eval_anthropic_toxic_prompts_num_pred_words": 46.834, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.89225457984562, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22428777102998337, "eval_anthropic_toxic_prompts_runtime": 34.9172, "eval_anthropic_toxic_prompts_samples_per_second": 14.32, "eval_anthropic_toxic_prompts_steps_per_second": 0.029, "eval_anthropic_toxic_prompts_token_set_f1": 0.36637338820197096, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006580240863976744, "eval_anthropic_toxic_prompts_token_set_precision": 0.46019843590764575, "eval_anthropic_toxic_prompts_token_set_recall": 0.32943045333637133, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 245625 }, { "epoch": 47.16, "eval_arxiv_accuracy": 0.35496875, "eval_arxiv_bleu_score": 4.670369731043923, "eval_arxiv_bleu_score_sem": 0.13696372933979614, "eval_arxiv_emb_cos_sim": 0.7951237559318542, "eval_arxiv_emb_cos_sim_sem": 0.005358955983091984, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3004679679870605, "eval_arxiv_n_ngrams_match_1": 15.94, "eval_arxiv_n_ngrams_match_2": 3.212, "eval_arxiv_n_ngrams_match_3": 0.73, "eval_arxiv_num_pred_words": 41.522, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.12532973693376, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3792798416906389, "eval_arxiv_runtime": 32.6239, "eval_arxiv_samples_per_second": 15.326, "eval_arxiv_steps_per_second": 0.031, "eval_arxiv_token_set_f1": 0.36966386096588283, "eval_arxiv_token_set_f1_sem": 0.004125212815473106, "eval_arxiv_token_set_precision": 0.32719840939950184, "eval_arxiv_token_set_recall": 0.44019621005456366, "eval_arxiv_true_num_tokens": 64.0, "step": 245625 }, { "epoch": 47.16, "eval_python_code_alpaca_accuracy": 0.16253125, "eval_python_code_alpaca_bleu_score": 4.976036605527539, "eval_python_code_alpaca_bleu_score_sem": 0.157347021349905, "eval_python_code_alpaca_emb_cos_sim": 0.7751104831695557, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007062985337068321, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8574025630950928, "eval_python_code_alpaca_n_ngrams_match_1": 10.192, "eval_python_code_alpaca_n_ngrams_match_2": 3.09, "eval_python_code_alpaca_n_ngrams_match_3": 1.078, "eval_python_code_alpaca_num_pred_words": 42.92, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.41623057478644, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.35352815037520247, "eval_python_code_alpaca_runtime": 26.9993, "eval_python_code_alpaca_samples_per_second": 18.519, "eval_python_code_alpaca_steps_per_second": 0.037, "eval_python_code_alpaca_token_set_f1": 0.48542807527786097, "eval_python_code_alpaca_token_set_f1_sem": 0.00523008801301433, "eval_python_code_alpaca_token_set_precision": 0.5576286797307609, "eval_python_code_alpaca_token_set_recall": 0.4498511173790471, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 245625 }, { "epoch": 47.16, "eval_wikibio_accuracy": 0.333125, "eval_wikibio_bleu_score": 6.44085541546715, "eval_wikibio_bleu_score_sem": 0.23410197951135908, "eval_wikibio_emb_cos_sim": 0.7461103796958923, "eval_wikibio_emb_cos_sim_sem": 0.008843071971905106, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6005210876464844, "eval_wikibio_n_ngrams_match_1": 10.222, "eval_wikibio_n_ngrams_match_2": 3.568, "eval_wikibio_n_ngrams_match_3": 1.416, "eval_wikibio_num_pred_words": 35.742, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.617310301194934, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36266366691392105, "eval_wikibio_runtime": 32.5894, "eval_wikibio_samples_per_second": 15.342, "eval_wikibio_steps_per_second": 0.031, "eval_wikibio_token_set_f1": 0.32505223914894016, "eval_wikibio_token_set_f1_sem": 0.005395906597591434, "eval_wikibio_token_set_precision": 0.3342663134899181, "eval_wikibio_token_set_recall": 0.33298039057251944, "eval_wikibio_true_num_tokens": 61.1328125, "step": 245625 }, { "epoch": 47.16, "eval_nq_accuracy": 0.53975, "eval_nq_bleu_score": 12.431233616869063, "eval_nq_bleu_score_sem": 0.5039949780717249, "eval_nq_emb_cos_sim": 0.8445185422897339, "eval_nq_emb_cos_sim_sem": 0.006614425647989987, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.106003522872925, "eval_nq_n_ngrams_match_1": 23.414, "eval_nq_n_ngrams_match_2": 8.87, "eval_nq_n_ngrams_match_3": 4.204, "eval_nq_num_pred_words": 48.94, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.215343159084107, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4548226045607402, "eval_nq_runtime": 23.735, "eval_nq_samples_per_second": 21.066, "eval_nq_steps_per_second": 0.042, "eval_nq_token_set_f1": 0.47117816099443643, "eval_nq_token_set_f1_sem": 0.004860512411022548, "eval_nq_token_set_precision": 0.42950036872631, "eval_nq_token_set_recall": 0.5296503290431468, "eval_nq_true_num_tokens": 64.0, "step": 245625 }, { "epoch": 47.16, "learning_rate": 0.001, "loss": 2.4759, "step": 245628 }, { "epoch": 47.17, "learning_rate": 0.001, "loss": 2.4771, "step": 245640 }, { "epoch": 47.17, "learning_rate": 0.001, "loss": 2.4786, "step": 245652 }, { "epoch": 47.17, "learning_rate": 0.001, "loss": 2.4818, "step": 245664 }, { "epoch": 47.17, "learning_rate": 0.001, "loss": 2.4725, "step": 245676 }, { "epoch": 47.18, "learning_rate": 0.001, "loss": 2.4807, "step": 245688 }, { "epoch": 47.18, "learning_rate": 0.001, "loss": 2.4836, "step": 245700 }, { "epoch": 47.18, "learning_rate": 0.001, "loss": 2.4758, "step": 245712 }, { "epoch": 47.18, "learning_rate": 0.001, "loss": 2.4701, "step": 245724 }, { "epoch": 47.18, "learning_rate": 0.001, "loss": 2.4818, "step": 245736 }, { "epoch": 47.19, "learning_rate": 0.001, "loss": 2.4816, "step": 245748 }, { "epoch": 47.19, "learning_rate": 0.001, "loss": 2.4876, "step": 245760 }, { "epoch": 47.19, "learning_rate": 0.001, "loss": 2.4817, "step": 245772 }, { "epoch": 47.19, "learning_rate": 0.001, "loss": 2.4837, "step": 245784 }, { "epoch": 47.2, "learning_rate": 0.001, "loss": 2.4746, "step": 245796 }, { "epoch": 47.2, "learning_rate": 0.001, "loss": 2.4868, "step": 245808 }, { "epoch": 47.2, "learning_rate": 0.001, "loss": 2.4803, "step": 245820 }, { "epoch": 47.2, "learning_rate": 0.001, "loss": 2.4848, "step": 245832 }, { "epoch": 47.21, "learning_rate": 0.001, "loss": 2.4929, "step": 245844 }, { "epoch": 47.21, "learning_rate": 0.001, "loss": 2.4844, "step": 245856 }, { "epoch": 47.21, "learning_rate": 0.001, "loss": 2.4768, "step": 245868 }, { "epoch": 47.21, "learning_rate": 0.001, "loss": 2.4871, "step": 245880 }, { "epoch": 47.21, "learning_rate": 0.001, "loss": 2.4775, "step": 245892 }, { "epoch": 47.22, "learning_rate": 0.001, "loss": 2.4827, "step": 245904 }, { "epoch": 47.22, "learning_rate": 0.001, "loss": 2.4789, "step": 245916 }, { "epoch": 47.22, "learning_rate": 0.001, "loss": 2.4797, "step": 245928 }, { "epoch": 47.22, "learning_rate": 0.001, "loss": 2.4784, "step": 245940 }, { "epoch": 47.23, "learning_rate": 0.001, "loss": 2.4873, "step": 245952 }, { "epoch": 47.23, "learning_rate": 0.001, "loss": 2.478, "step": 245964 }, { "epoch": 47.23, "learning_rate": 0.001, "loss": 2.4851, "step": 245976 }, { "epoch": 47.23, "learning_rate": 0.001, "loss": 2.4847, "step": 245988 }, { "epoch": 47.24, "learning_rate": 0.001, "loss": 2.4698, "step": 246000 }, { "epoch": 47.24, "learning_rate": 0.001, "loss": 2.4706, "step": 246012 }, { "epoch": 47.24, "learning_rate": 0.001, "loss": 2.4829, "step": 246024 }, { "epoch": 47.24, "learning_rate": 0.001, "loss": 2.49, "step": 246036 }, { "epoch": 47.24, "learning_rate": 0.001, "loss": 2.482, "step": 246048 }, { "epoch": 47.25, "learning_rate": 0.001, "loss": 2.4956, "step": 246060 }, { "epoch": 47.25, "learning_rate": 0.001, "loss": 2.4856, "step": 246072 }, { "epoch": 47.25, "learning_rate": 0.001, "loss": 2.4841, "step": 246084 }, { "epoch": 47.25, "learning_rate": 0.001, "loss": 2.4864, "step": 246096 }, { "epoch": 47.26, "learning_rate": 0.001, "loss": 2.4747, "step": 246108 }, { "epoch": 47.26, "learning_rate": 0.001, "loss": 2.4857, "step": 246120 }, { "epoch": 47.26, "learning_rate": 0.001, "loss": 2.4857, "step": 246132 }, { "epoch": 47.26, "learning_rate": 0.001, "loss": 2.4834, "step": 246144 }, { "epoch": 47.26, "learning_rate": 0.001, "loss": 2.4809, "step": 246156 }, { "epoch": 47.27, "learning_rate": 0.001, "loss": 2.4759, "step": 246168 }, { "epoch": 47.27, "learning_rate": 0.001, "loss": 2.4736, "step": 246180 }, { "epoch": 47.27, "learning_rate": 0.001, "loss": 2.4781, "step": 246192 }, { "epoch": 47.27, "learning_rate": 0.001, "loss": 2.4769, "step": 246204 }, { "epoch": 47.28, "learning_rate": 0.001, "loss": 2.4845, "step": 246216 }, { "epoch": 47.28, "learning_rate": 0.001, "loss": 2.4804, "step": 246228 }, { "epoch": 47.28, "learning_rate": 0.001, "loss": 2.4837, "step": 246240 }, { "epoch": 47.28, "eval_ag_news_accuracy": 0.330875, "eval_ag_news_bleu_score": 5.069283979206854, "eval_ag_news_bleu_score_sem": 0.15037329134454597, "eval_ag_news_emb_cos_sim": 0.8174196481704712, "eval_ag_news_emb_cos_sim_sem": 0.006828056827835074, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4608304500579834, "eval_ag_news_n_ngrams_match_1": 14.536, "eval_ag_news_n_ngrams_match_2": 3.332, "eval_ag_news_n_ngrams_match_3": 0.96, "eval_ag_news_num_pred_words": 46.82, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.843409898942763, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36355621736268795, "eval_ag_news_runtime": 12.6654, "eval_ag_news_samples_per_second": 39.478, "eval_ag_news_steps_per_second": 0.079, "eval_ag_news_token_set_f1": 0.36138147065303444, "eval_ag_news_token_set_f1_sem": 0.004455285735451715, "eval_ag_news_token_set_precision": 0.34958395829939504, "eval_ag_news_token_set_recall": 0.3883823749164066, "eval_ag_news_true_num_tokens": 56.09375, "step": 246250 }, { "epoch": 47.28, "eval_anthropic_toxic_prompts_accuracy": 0.11778125, "eval_anthropic_toxic_prompts_bleu_score": 3.3545482265217457, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1358527323889524, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6918302774429321, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009046826959763976, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.177814245223999, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.478, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.04, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.78, "eval_anthropic_toxic_prompts_num_pred_words": 46.738, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.994250645782653, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.221042587001745, "eval_anthropic_toxic_prompts_runtime": 11.2064, "eval_anthropic_toxic_prompts_samples_per_second": 44.617, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.3645269389199886, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006546077958475243, "eval_anthropic_toxic_prompts_token_set_precision": 0.453796950926804, "eval_anthropic_toxic_prompts_token_set_recall": 0.3304425966327437, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 246250 }, { "epoch": 47.28, "eval_arxiv_accuracy": 0.354125, "eval_arxiv_bleu_score": 4.585063696401905, "eval_arxiv_bleu_score_sem": 0.13507661118758754, "eval_arxiv_emb_cos_sim": 0.7911890149116516, "eval_arxiv_emb_cos_sim_sem": 0.006057483362332366, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3176190853118896, "eval_arxiv_n_ngrams_match_1": 15.744, "eval_arxiv_n_ngrams_match_2": 3.086, "eval_arxiv_n_ngrams_match_3": 0.712, "eval_arxiv_num_pred_words": 40.916, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.594571961281417, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3746047285374328, "eval_arxiv_runtime": 11.5567, "eval_arxiv_samples_per_second": 43.265, "eval_arxiv_steps_per_second": 0.087, "eval_arxiv_token_set_f1": 0.3682811102187006, "eval_arxiv_token_set_f1_sem": 0.0041616267102067925, "eval_arxiv_token_set_precision": 0.3207730655900764, "eval_arxiv_token_set_recall": 0.4463019354585246, "eval_arxiv_true_num_tokens": 64.0, "step": 246250 }, { "epoch": 47.28, "eval_python_code_alpaca_accuracy": 0.1626875, "eval_python_code_alpaca_bleu_score": 4.826769904015877, "eval_python_code_alpaca_bleu_score_sem": 0.15539924185609735, "eval_python_code_alpaca_emb_cos_sim": 0.7731510400772095, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007862736275541711, "eval_python_code_alpaca_emb_top1_equal": 0.1953125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.035178457165496856, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.843956232070923, "eval_python_code_alpaca_n_ngrams_match_1": 10.076, "eval_python_code_alpaca_n_ngrams_match_2": 3.054, "eval_python_code_alpaca_n_ngrams_match_3": 1.048, "eval_python_code_alpaca_num_pred_words": 43.328, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.183613600529306, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34707660506223303, "eval_python_code_alpaca_runtime": 11.1597, "eval_python_code_alpaca_samples_per_second": 44.804, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.48806449492791165, "eval_python_code_alpaca_token_set_f1_sem": 0.00546412908273593, "eval_python_code_alpaca_token_set_precision": 0.5511415035134234, "eval_python_code_alpaca_token_set_recall": 0.46355931959320923, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 246250 }, { "epoch": 47.28, "eval_wikibio_accuracy": 0.332625, "eval_wikibio_bleu_score": 6.085197534410365, "eval_wikibio_bleu_score_sem": 0.21429132046116717, "eval_wikibio_emb_cos_sim": 0.7510096430778503, "eval_wikibio_emb_cos_sim_sem": 0.007621918402773574, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.580207586288452, "eval_wikibio_n_ngrams_match_1": 9.964, "eval_wikibio_n_ngrams_match_2": 3.378, "eval_wikibio_n_ngrams_match_3": 1.244, "eval_wikibio_num_pred_words": 35.164, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 35.88098847524684, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3560414074782632, "eval_wikibio_runtime": 11.6049, "eval_wikibio_samples_per_second": 43.085, "eval_wikibio_steps_per_second": 0.086, "eval_wikibio_token_set_f1": 0.3175300190040947, "eval_wikibio_token_set_f1_sem": 0.0057529849220731896, "eval_wikibio_token_set_precision": 0.32446516253160845, "eval_wikibio_token_set_recall": 0.3277066342158966, "eval_wikibio_true_num_tokens": 61.1328125, "step": 246250 }, { "epoch": 47.28, "eval_nq_accuracy": 0.53934375, "eval_nq_bleu_score": 12.24617707581566, "eval_nq_bleu_score_sem": 0.48214842429722665, "eval_nq_emb_cos_sim": 0.8433565497398376, "eval_nq_emb_cos_sim_sem": 0.007000108165990639, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.104877471923828, "eval_nq_n_ngrams_match_1": 23.542, "eval_nq_n_ngrams_match_2": 8.894, "eval_nq_n_ngrams_match_3": 4.116, "eval_nq_num_pred_words": 48.858, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.206097470657733, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4590039839001292, "eval_nq_runtime": 36.7928, "eval_nq_samples_per_second": 13.59, "eval_nq_steps_per_second": 0.027, "eval_nq_token_set_f1": 0.47020593382796755, "eval_nq_token_set_f1_sem": 0.004965353417429101, "eval_nq_token_set_precision": 0.4283757020782135, "eval_nq_token_set_recall": 0.5287112408349433, "eval_nq_true_num_tokens": 64.0, "step": 246250 }, { "epoch": 47.28, "learning_rate": 0.001, "loss": 2.4755, "step": 246252 }, { "epoch": 47.29, "learning_rate": 0.001, "loss": 2.4812, "step": 246264 }, { "epoch": 47.29, "learning_rate": 0.001, "loss": 2.4796, "step": 246276 }, { "epoch": 47.29, "learning_rate": 0.001, "loss": 2.4771, "step": 246288 }, { "epoch": 47.29, "learning_rate": 0.001, "loss": 2.4894, "step": 246300 }, { "epoch": 47.29, "learning_rate": 0.001, "loss": 2.4703, "step": 246312 }, { "epoch": 47.3, "learning_rate": 0.001, "loss": 2.4815, "step": 246324 }, { "epoch": 47.3, "learning_rate": 0.001, "loss": 2.4827, "step": 246336 }, { "epoch": 47.3, "learning_rate": 0.001, "loss": 2.4795, "step": 246348 }, { "epoch": 47.3, "learning_rate": 0.001, "loss": 2.4779, "step": 246360 }, { "epoch": 47.31, "learning_rate": 0.001, "loss": 2.4848, "step": 246372 }, { "epoch": 47.31, "learning_rate": 0.001, "loss": 2.4775, "step": 246384 }, { "epoch": 47.31, "learning_rate": 0.001, "loss": 2.4823, "step": 246396 }, { "epoch": 47.31, "learning_rate": 0.001, "loss": 2.4887, "step": 246408 }, { "epoch": 47.32, "learning_rate": 0.001, "loss": 2.4854, "step": 246420 }, { "epoch": 47.32, "learning_rate": 0.001, "loss": 2.4818, "step": 246432 }, { "epoch": 47.32, "learning_rate": 0.001, "loss": 2.4779, "step": 246444 }, { "epoch": 47.32, "learning_rate": 0.001, "loss": 2.4798, "step": 246456 }, { "epoch": 47.32, "learning_rate": 0.001, "loss": 2.4799, "step": 246468 }, { "epoch": 47.33, "learning_rate": 0.001, "loss": 2.4891, "step": 246480 }, { "epoch": 47.33, "learning_rate": 0.001, "loss": 2.4733, "step": 246492 }, { "epoch": 47.33, "learning_rate": 0.001, "loss": 2.472, "step": 246504 }, { "epoch": 47.33, "learning_rate": 0.001, "loss": 2.4761, "step": 246516 }, { "epoch": 47.34, "learning_rate": 0.001, "loss": 2.4868, "step": 246528 }, { "epoch": 47.34, "learning_rate": 0.001, "loss": 2.4714, "step": 246540 }, { "epoch": 47.34, "learning_rate": 0.001, "loss": 2.4769, "step": 246552 }, { "epoch": 47.34, "learning_rate": 0.001, "loss": 2.4857, "step": 246564 }, { "epoch": 47.35, "learning_rate": 0.001, "loss": 2.4822, "step": 246576 }, { "epoch": 47.35, "learning_rate": 0.001, "loss": 2.4808, "step": 246588 }, { "epoch": 47.35, "learning_rate": 0.001, "loss": 2.4905, "step": 246600 }, { "epoch": 47.35, "learning_rate": 0.001, "loss": 2.4792, "step": 246612 }, { "epoch": 47.35, "learning_rate": 0.001, "loss": 2.4827, "step": 246624 }, { "epoch": 47.36, "learning_rate": 0.001, "loss": 2.4864, "step": 246636 }, { "epoch": 47.36, "learning_rate": 0.001, "loss": 2.4805, "step": 246648 }, { "epoch": 47.36, "learning_rate": 0.001, "loss": 2.4888, "step": 246660 }, { "epoch": 47.36, "learning_rate": 0.001, "loss": 2.4746, "step": 246672 }, { "epoch": 47.37, "learning_rate": 0.001, "loss": 2.4913, "step": 246684 }, { "epoch": 47.37, "learning_rate": 0.001, "loss": 2.4847, "step": 246696 }, { "epoch": 47.37, "learning_rate": 0.001, "loss": 2.4673, "step": 246708 }, { "epoch": 47.37, "learning_rate": 0.001, "loss": 2.4821, "step": 246720 }, { "epoch": 47.38, "learning_rate": 0.001, "loss": 2.4818, "step": 246732 }, { "epoch": 47.38, "learning_rate": 0.001, "loss": 2.479, "step": 246744 }, { "epoch": 47.38, "learning_rate": 0.001, "loss": 2.4766, "step": 246756 }, { "epoch": 47.38, "learning_rate": 0.001, "loss": 2.4832, "step": 246768 }, { "epoch": 47.38, "learning_rate": 0.001, "loss": 2.4891, "step": 246780 }, { "epoch": 47.39, "learning_rate": 0.001, "loss": 2.4813, "step": 246792 }, { "epoch": 47.39, "learning_rate": 0.001, "loss": 2.4965, "step": 246804 }, { "epoch": 47.39, "learning_rate": 0.001, "loss": 2.4798, "step": 246816 }, { "epoch": 47.39, "learning_rate": 0.001, "loss": 2.477, "step": 246828 }, { "epoch": 47.4, "learning_rate": 0.001, "loss": 2.4846, "step": 246840 }, { "epoch": 47.4, "learning_rate": 0.001, "loss": 2.4808, "step": 246852 }, { "epoch": 47.4, "learning_rate": 0.001, "loss": 2.4806, "step": 246864 }, { "epoch": 47.4, "eval_ag_news_accuracy": 0.33125, "eval_ag_news_bleu_score": 4.95748454221818, "eval_ag_news_bleu_score_sem": 0.15717453753524005, "eval_ag_news_emb_cos_sim": 0.8210819363594055, "eval_ag_news_emb_cos_sim_sem": 0.006851312010045467, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4686532020568848, "eval_ag_news_n_ngrams_match_1": 14.724, "eval_ag_news_n_ngrams_match_2": 3.256, "eval_ag_news_n_ngrams_match_3": 0.94, "eval_ag_news_num_pred_words": 47.106, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 32.09348987890135, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3636885512137098, "eval_ag_news_runtime": 16.3839, "eval_ag_news_samples_per_second": 30.518, "eval_ag_news_steps_per_second": 0.061, "eval_ag_news_token_set_f1": 0.36612103356267095, "eval_ag_news_token_set_f1_sem": 0.004392574909400384, "eval_ag_news_token_set_precision": 0.35428537255538034, "eval_ag_news_token_set_recall": 0.39319283618688394, "eval_ag_news_true_num_tokens": 56.09375, "step": 246875 }, { "epoch": 47.4, "eval_anthropic_toxic_prompts_accuracy": 0.1178125, "eval_anthropic_toxic_prompts_bleu_score": 3.2521426563901223, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12029724585942032, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6833691000938416, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008713470412734107, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.09375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.025864720141013958, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1902172565460205, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.324, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.008, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77, "eval_anthropic_toxic_prompts_num_pred_words": 46.728, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.293704836202455, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21781817418923197, "eval_anthropic_toxic_prompts_runtime": 32.4497, "eval_anthropic_toxic_prompts_samples_per_second": 15.408, "eval_anthropic_toxic_prompts_steps_per_second": 0.031, "eval_anthropic_toxic_prompts_token_set_f1": 0.35963204052864417, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006591739026702393, "eval_anthropic_toxic_prompts_token_set_precision": 0.4453172756458448, "eval_anthropic_toxic_prompts_token_set_recall": 0.3290254931467475, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 246875 }, { "epoch": 47.4, "eval_arxiv_accuracy": 0.3550625, "eval_arxiv_bleu_score": 4.568349374104083, "eval_arxiv_bleu_score_sem": 0.13337471206948545, "eval_arxiv_emb_cos_sim": 0.7892099618911743, "eval_arxiv_emb_cos_sim_sem": 0.005877521587546912, "eval_arxiv_emb_top1_equal": 0.2421875, "eval_arxiv_emb_top1_equal_sem": 0.038014990119662626, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.315105438232422, "eval_arxiv_n_ngrams_match_1": 15.916, "eval_arxiv_n_ngrams_match_2": 3.196, "eval_arxiv_n_ngrams_match_3": 0.7, "eval_arxiv_num_pred_words": 41.246, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.525296050133843, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37910221318098236, "eval_arxiv_runtime": 15.0963, "eval_arxiv_samples_per_second": 33.121, "eval_arxiv_steps_per_second": 0.066, "eval_arxiv_token_set_f1": 0.3736075627778935, "eval_arxiv_token_set_f1_sem": 0.004002237537676729, "eval_arxiv_token_set_precision": 0.32596646971140175, "eval_arxiv_token_set_recall": 0.45409043922187126, "eval_arxiv_true_num_tokens": 64.0, "step": 246875 }, { "epoch": 47.4, "eval_python_code_alpaca_accuracy": 0.16396875, "eval_python_code_alpaca_bleu_score": 4.7093845402385535, "eval_python_code_alpaca_bleu_score_sem": 0.15228736223768255, "eval_python_code_alpaca_emb_cos_sim": 0.764851450920105, "eval_python_code_alpaca_emb_cos_sim_sem": 0.009325005379558987, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8560683727264404, "eval_python_code_alpaca_n_ngrams_match_1": 9.982, "eval_python_code_alpaca_n_ngrams_match_2": 2.978, "eval_python_code_alpaca_n_ngrams_match_3": 0.998, "eval_python_code_alpaca_num_pred_words": 43.442, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.393009501805874, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34056783123403533, "eval_python_code_alpaca_runtime": 11.1072, "eval_python_code_alpaca_samples_per_second": 45.016, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.4801650704960793, "eval_python_code_alpaca_token_set_f1_sem": 0.005775790764804429, "eval_python_code_alpaca_token_set_precision": 0.5470332982090649, "eval_python_code_alpaca_token_set_recall": 0.45306159639444776, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 246875 }, { "epoch": 47.4, "eval_wikibio_accuracy": 0.330625, "eval_wikibio_bleu_score": 6.1075811307460315, "eval_wikibio_bleu_score_sem": 0.209743417085095, "eval_wikibio_emb_cos_sim": 0.7490042448043823, "eval_wikibio_emb_cos_sim_sem": 0.00874728930739705, "eval_wikibio_emb_top1_equal": 0.25, "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5973165035247803, "eval_wikibio_n_ngrams_match_1": 10.404, "eval_wikibio_n_ngrams_match_2": 3.508, "eval_wikibio_n_ngrams_match_3": 1.292, "eval_wikibio_num_pred_words": 36.562, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.500154867504754, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3624132932822166, "eval_wikibio_runtime": 11.2427, "eval_wikibio_samples_per_second": 44.473, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.32757770210092213, "eval_wikibio_token_set_f1_sem": 0.005386957918527208, "eval_wikibio_token_set_precision": 0.33755201264840085, "eval_wikibio_token_set_recall": 0.33488611119247536, "eval_wikibio_true_num_tokens": 61.1328125, "step": 246875 }, { "epoch": 47.4, "eval_nq_accuracy": 0.5394375, "eval_nq_bleu_score": 12.437865973147138, "eval_nq_bleu_score_sem": 0.5012949543237201, "eval_nq_emb_cos_sim": 0.837842583656311, "eval_nq_emb_cos_sim_sem": 0.00682203838957662, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1043810844421387, "eval_nq_n_ngrams_match_1": 23.618, "eval_nq_n_ngrams_match_2": 8.926, "eval_nq_n_ngrams_match_3": 4.202, "eval_nq_num_pred_words": 49.09, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.202025077425905, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4611810921748841, "eval_nq_runtime": 11.9935, "eval_nq_samples_per_second": 41.689, "eval_nq_steps_per_second": 0.083, "eval_nq_token_set_f1": 0.472476325106537, "eval_nq_token_set_f1_sem": 0.005051576596544739, "eval_nq_token_set_precision": 0.43133373470389247, "eval_nq_token_set_recall": 0.5310151216208895, "eval_nq_true_num_tokens": 64.0, "step": 246875 }, { "epoch": 47.4, "learning_rate": 0.001, "loss": 2.4819, "step": 246876 }, { "epoch": 47.41, "learning_rate": 0.001, "loss": 2.4839, "step": 246888 }, { "epoch": 47.41, "learning_rate": 0.001, "loss": 2.4762, "step": 246900 }, { "epoch": 47.41, "learning_rate": 0.001, "loss": 2.4828, "step": 246912 }, { "epoch": 47.41, "learning_rate": 0.001, "loss": 2.4712, "step": 246924 }, { "epoch": 47.41, "learning_rate": 0.001, "loss": 2.4764, "step": 246936 }, { "epoch": 47.42, "learning_rate": 0.001, "loss": 2.4904, "step": 246948 }, { "epoch": 47.42, "learning_rate": 0.001, "loss": 2.4725, "step": 246960 }, { "epoch": 47.42, "learning_rate": 0.001, "loss": 2.4867, "step": 246972 }, { "epoch": 47.42, "learning_rate": 0.001, "loss": 2.4714, "step": 246984 }, { "epoch": 47.43, "learning_rate": 0.001, "loss": 2.473, "step": 246996 }, { "epoch": 47.43, "learning_rate": 0.001, "loss": 2.4778, "step": 247008 }, { "epoch": 47.43, "learning_rate": 0.001, "loss": 2.4914, "step": 247020 }, { "epoch": 47.43, "learning_rate": 0.001, "loss": 2.4722, "step": 247032 }, { "epoch": 47.44, "learning_rate": 0.001, "loss": 2.4867, "step": 247044 }, { "epoch": 47.44, "learning_rate": 0.001, "loss": 2.4774, "step": 247056 }, { "epoch": 47.44, "learning_rate": 0.001, "loss": 2.4834, "step": 247068 }, { "epoch": 47.44, "learning_rate": 0.001, "loss": 2.4805, "step": 247080 }, { "epoch": 47.44, "learning_rate": 0.001, "loss": 2.4815, "step": 247092 }, { "epoch": 47.45, "learning_rate": 0.001, "loss": 2.4946, "step": 247104 }, { "epoch": 47.45, "learning_rate": 0.001, "loss": 2.4899, "step": 247116 }, { "epoch": 47.45, "learning_rate": 0.001, "loss": 2.4881, "step": 247128 }, { "epoch": 47.45, "learning_rate": 0.001, "loss": 2.4779, "step": 247140 }, { "epoch": 47.46, "learning_rate": 0.001, "loss": 2.4854, "step": 247152 }, { "epoch": 47.46, "learning_rate": 0.001, "loss": 2.4838, "step": 247164 }, { "epoch": 47.46, "learning_rate": 0.001, "loss": 2.4776, "step": 247176 }, { "epoch": 47.46, "learning_rate": 0.001, "loss": 2.475, "step": 247188 }, { "epoch": 47.47, "learning_rate": 0.001, "loss": 2.4847, "step": 247200 }, { "epoch": 47.47, "learning_rate": 0.001, "loss": 2.483, "step": 247212 }, { "epoch": 47.47, "learning_rate": 0.001, "loss": 2.4801, "step": 247224 }, { "epoch": 47.47, "learning_rate": 0.001, "loss": 2.4935, "step": 247236 }, { "epoch": 47.47, "learning_rate": 0.001, "loss": 2.4793, "step": 247248 }, { "epoch": 47.48, "learning_rate": 0.001, "loss": 2.4876, "step": 247260 }, { "epoch": 47.48, "learning_rate": 0.001, "loss": 2.4937, "step": 247272 }, { "epoch": 47.48, "learning_rate": 0.001, "loss": 2.4826, "step": 247284 }, { "epoch": 47.48, "learning_rate": 0.001, "loss": 2.4772, "step": 247296 }, { "epoch": 47.49, "learning_rate": 0.001, "loss": 2.474, "step": 247308 }, { "epoch": 47.49, "learning_rate": 0.001, "loss": 2.4781, "step": 247320 }, { "epoch": 47.49, "learning_rate": 0.001, "loss": 2.4865, "step": 247332 }, { "epoch": 47.49, "learning_rate": 0.001, "loss": 2.4802, "step": 247344 }, { "epoch": 47.5, "learning_rate": 0.001, "loss": 2.4853, "step": 247356 }, { "epoch": 47.5, "learning_rate": 0.001, "loss": 2.4808, "step": 247368 }, { "epoch": 47.5, "learning_rate": 0.001, "loss": 2.4746, "step": 247380 }, { "epoch": 47.5, "learning_rate": 0.001, "loss": 2.4784, "step": 247392 }, { "epoch": 47.5, "learning_rate": 0.001, "loss": 2.4812, "step": 247404 }, { "epoch": 47.51, "learning_rate": 0.001, "loss": 2.4945, "step": 247416 }, { "epoch": 47.51, "learning_rate": 0.001, "loss": 2.4845, "step": 247428 }, { "epoch": 47.51, "learning_rate": 0.001, "loss": 2.4745, "step": 247440 }, { "epoch": 47.51, "learning_rate": 0.001, "loss": 2.4785, "step": 247452 }, { "epoch": 47.52, "learning_rate": 0.001, "loss": 2.478, "step": 247464 }, { "epoch": 47.52, "learning_rate": 0.001, "loss": 2.4712, "step": 247476 }, { "epoch": 47.52, "learning_rate": 0.001, "loss": 2.4739, "step": 247488 }, { "epoch": 47.52, "learning_rate": 0.001, "loss": 2.4857, "step": 247500 }, { "epoch": 47.52, "eval_ag_news_accuracy": 0.33175, "eval_ag_news_bleu_score": 5.029422770897393, "eval_ag_news_bleu_score_sem": 0.16380101756335028, "eval_ag_news_emb_cos_sim": 0.8288478851318359, "eval_ag_news_emb_cos_sim_sem": 0.006212303883436563, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4597585201263428, "eval_ag_news_n_ngrams_match_1": 14.672, "eval_ag_news_n_ngrams_match_2": 3.298, "eval_ag_news_n_ngrams_match_3": 0.94, "eval_ag_news_num_pred_words": 47.014, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.809294282788297, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3642090152251275, "eval_ag_news_runtime": 12.0264, "eval_ag_news_samples_per_second": 41.575, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.3624695242294932, "eval_ag_news_token_set_f1_sem": 0.004342850011884466, "eval_ag_news_token_set_precision": 0.35081021556791264, "eval_ag_news_token_set_recall": 0.39002082699221696, "eval_ag_news_true_num_tokens": 56.09375, "step": 247500 }, { "epoch": 47.52, "eval_anthropic_toxic_prompts_accuracy": 0.1168125, "eval_anthropic_toxic_prompts_bleu_score": 3.3159917800262, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1199578645385769, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6887571811676025, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009510374463299488, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1992640495300293, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.498, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.1, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.81, "eval_anthropic_toxic_prompts_num_pred_words": 47.916, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.514482112044163, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21999013113870575, "eval_anthropic_toxic_prompts_runtime": 11.4006, "eval_anthropic_toxic_prompts_samples_per_second": 43.857, "eval_anthropic_toxic_prompts_steps_per_second": 0.088, "eval_anthropic_toxic_prompts_token_set_f1": 0.36671549708315293, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006487966368412724, "eval_anthropic_toxic_prompts_token_set_precision": 0.4545445209656313, "eval_anthropic_toxic_prompts_token_set_recall": 0.33112234474259844, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 247500 }, { "epoch": 47.52, "eval_arxiv_accuracy": 0.35709375, "eval_arxiv_bleu_score": 4.55082629701745, "eval_arxiv_bleu_score_sem": 0.12964846411783915, "eval_arxiv_emb_cos_sim": 0.7826601266860962, "eval_arxiv_emb_cos_sim_sem": 0.006554796418425676, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.308187961578369, "eval_arxiv_n_ngrams_match_1": 15.87, "eval_arxiv_n_ngrams_match_2": 3.118, "eval_arxiv_n_ngrams_match_3": 0.718, "eval_arxiv_num_pred_words": 41.796, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.33554750452353, "eval_arxiv_pred_num_tokens": 62.9765625, "eval_arxiv_rouge_score": 0.3761210040313659, "eval_arxiv_runtime": 11.4119, "eval_arxiv_samples_per_second": 43.814, "eval_arxiv_steps_per_second": 0.088, "eval_arxiv_token_set_f1": 0.3695147895228119, "eval_arxiv_token_set_f1_sem": 0.004085653139148685, "eval_arxiv_token_set_precision": 0.3232452985980358, "eval_arxiv_token_set_recall": 0.44458742916011473, "eval_arxiv_true_num_tokens": 64.0, "step": 247500 }, { "epoch": 47.52, "eval_python_code_alpaca_accuracy": 0.16459375, "eval_python_code_alpaca_bleu_score": 4.853859947409812, "eval_python_code_alpaca_bleu_score_sem": 0.15523373529222304, "eval_python_code_alpaca_emb_cos_sim": 0.7744649052619934, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008070760607445831, "eval_python_code_alpaca_emb_top1_equal": 0.1171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02854125312152025, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.820350408554077, "eval_python_code_alpaca_n_ngrams_match_1": 9.984, "eval_python_code_alpaca_n_ngrams_match_2": 3.058, "eval_python_code_alpaca_n_ngrams_match_3": 1.066, "eval_python_code_alpaca_num_pred_words": 44.282, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.782730454228673, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.33825892757214127, "eval_python_code_alpaca_runtime": 11.173, "eval_python_code_alpaca_samples_per_second": 44.751, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.4827229738097629, "eval_python_code_alpaca_token_set_f1_sem": 0.005685745307924299, "eval_python_code_alpaca_token_set_precision": 0.5498215139490301, "eval_python_code_alpaca_token_set_recall": 0.4569048945331764, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 247500 }, { "epoch": 47.52, "eval_wikibio_accuracy": 0.33390625, "eval_wikibio_bleu_score": 6.296963039401893, "eval_wikibio_bleu_score_sem": 0.21705758597565802, "eval_wikibio_emb_cos_sim": 0.7550753951072693, "eval_wikibio_emb_cos_sim_sem": 0.008168087558825105, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6192288398742676, "eval_wikibio_n_ngrams_match_1": 10.388, "eval_wikibio_n_ngrams_match_2": 3.556, "eval_wikibio_n_ngrams_match_3": 1.346, "eval_wikibio_num_pred_words": 36.542, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.30878567780481, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36517735316905797, "eval_wikibio_runtime": 11.0875, "eval_wikibio_samples_per_second": 45.096, "eval_wikibio_steps_per_second": 0.09, "eval_wikibio_token_set_f1": 0.3285122624127821, "eval_wikibio_token_set_f1_sem": 0.0053412733078049245, "eval_wikibio_token_set_precision": 0.33708939106803176, "eval_wikibio_token_set_recall": 0.3357096609863899, "eval_wikibio_true_num_tokens": 61.1328125, "step": 247500 }, { "epoch": 47.52, "eval_nq_accuracy": 0.54084375, "eval_nq_bleu_score": 12.38619399887228, "eval_nq_bleu_score_sem": 0.504320471283675, "eval_nq_emb_cos_sim": 0.8339306712150574, "eval_nq_emb_cos_sim_sem": 0.007504061326357938, "eval_nq_emb_top1_equal": 0.34375, "eval_nq_emb_top1_equal_sem": 0.04214578430296913, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1038002967834473, "eval_nq_n_ngrams_match_1": 23.684, "eval_nq_n_ngrams_match_2": 8.902, "eval_nq_n_ngrams_match_3": 4.222, "eval_nq_num_pred_words": 49.35, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.197262825547082, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45790492008081407, "eval_nq_runtime": 12.7857, "eval_nq_samples_per_second": 39.106, "eval_nq_steps_per_second": 0.078, "eval_nq_token_set_f1": 0.4723201313880448, "eval_nq_token_set_f1_sem": 0.005060248364909892, "eval_nq_token_set_precision": 0.4317872366237719, "eval_nq_token_set_recall": 0.5298413138759623, "eval_nq_true_num_tokens": 64.0, "step": 247500 }, { "epoch": 47.53, "learning_rate": 0.001, "loss": 2.4815, "step": 247512 }, { "epoch": 47.53, "learning_rate": 0.001, "loss": 2.4827, "step": 247524 }, { "epoch": 47.53, "learning_rate": 0.001, "loss": 2.4861, "step": 247536 }, { "epoch": 47.53, "learning_rate": 0.001, "loss": 2.4859, "step": 247548 }, { "epoch": 47.53, "learning_rate": 0.001, "loss": 2.4864, "step": 247560 }, { "epoch": 47.54, "learning_rate": 0.001, "loss": 2.4809, "step": 247572 }, { "epoch": 47.54, "learning_rate": 0.001, "loss": 2.4852, "step": 247584 }, { "epoch": 47.54, "learning_rate": 0.001, "loss": 2.4839, "step": 247596 }, { "epoch": 47.54, "learning_rate": 0.001, "loss": 2.4755, "step": 247608 }, { "epoch": 47.55, "learning_rate": 0.001, "loss": 2.4766, "step": 247620 }, { "epoch": 47.55, "learning_rate": 0.001, "loss": 2.4822, "step": 247632 }, { "epoch": 47.55, "learning_rate": 0.001, "loss": 2.4787, "step": 247644 }, { "epoch": 47.55, "learning_rate": 0.001, "loss": 2.4762, "step": 247656 }, { "epoch": 47.56, "learning_rate": 0.001, "loss": 2.4833, "step": 247668 }, { "epoch": 47.56, "learning_rate": 0.001, "loss": 2.478, "step": 247680 }, { "epoch": 47.56, "learning_rate": 0.001, "loss": 2.491, "step": 247692 }, { "epoch": 47.56, "learning_rate": 0.001, "loss": 2.4805, "step": 247704 }, { "epoch": 47.56, "learning_rate": 0.001, "loss": 2.479, "step": 247716 }, { "epoch": 47.57, "learning_rate": 0.001, "loss": 2.4813, "step": 247728 }, { "epoch": 47.57, "learning_rate": 0.001, "loss": 2.4787, "step": 247740 }, { "epoch": 47.57, "learning_rate": 0.001, "loss": 2.4919, "step": 247752 }, { "epoch": 47.57, "learning_rate": 0.001, "loss": 2.4705, "step": 247764 }, { "epoch": 47.58, "learning_rate": 0.001, "loss": 2.4765, "step": 247776 }, { "epoch": 47.58, "learning_rate": 0.001, "loss": 2.4793, "step": 247788 }, { "epoch": 47.58, "learning_rate": 0.001, "loss": 2.4982, "step": 247800 }, { "epoch": 47.58, "learning_rate": 0.001, "loss": 2.4877, "step": 247812 }, { "epoch": 47.59, "learning_rate": 0.001, "loss": 2.4852, "step": 247824 }, { "epoch": 47.59, "learning_rate": 0.001, "loss": 2.4766, "step": 247836 }, { "epoch": 47.59, "learning_rate": 0.001, "loss": 2.4843, "step": 247848 }, { "epoch": 47.59, "learning_rate": 0.001, "loss": 2.4887, "step": 247860 }, { "epoch": 47.59, "learning_rate": 0.001, "loss": 2.4871, "step": 247872 }, { "epoch": 47.6, "learning_rate": 0.001, "loss": 2.4727, "step": 247884 }, { "epoch": 47.6, "learning_rate": 0.001, "loss": 2.4774, "step": 247896 }, { "epoch": 47.6, "learning_rate": 0.001, "loss": 2.4849, "step": 247908 }, { "epoch": 47.6, "learning_rate": 0.001, "loss": 2.4818, "step": 247920 }, { "epoch": 47.61, "learning_rate": 0.001, "loss": 2.47, "step": 247932 }, { "epoch": 47.61, "learning_rate": 0.001, "loss": 2.4845, "step": 247944 }, { "epoch": 47.61, "learning_rate": 0.001, "loss": 2.4705, "step": 247956 }, { "epoch": 47.61, "learning_rate": 0.001, "loss": 2.4761, "step": 247968 }, { "epoch": 47.62, "learning_rate": 0.001, "loss": 2.484, "step": 247980 }, { "epoch": 47.62, "learning_rate": 0.001, "loss": 2.4843, "step": 247992 }, { "epoch": 47.62, "learning_rate": 0.001, "loss": 2.475, "step": 248004 }, { "epoch": 47.62, "learning_rate": 0.001, "loss": 2.4837, "step": 248016 }, { "epoch": 47.62, "learning_rate": 0.001, "loss": 2.4839, "step": 248028 }, { "epoch": 47.63, "learning_rate": 0.001, "loss": 2.4808, "step": 248040 }, { "epoch": 47.63, "learning_rate": 0.001, "loss": 2.4874, "step": 248052 }, { "epoch": 47.63, "learning_rate": 0.001, "loss": 2.4925, "step": 248064 }, { "epoch": 47.63, "learning_rate": 0.001, "loss": 2.4883, "step": 248076 }, { "epoch": 47.64, "learning_rate": 0.001, "loss": 2.4914, "step": 248088 }, { "epoch": 47.64, "learning_rate": 0.001, "loss": 2.479, "step": 248100 }, { "epoch": 47.64, "learning_rate": 0.001, "loss": 2.4816, "step": 248112 }, { "epoch": 47.64, "learning_rate": 0.001, "loss": 2.4875, "step": 248124 }, { "epoch": 47.64, "eval_ag_news_accuracy": 0.3326875, "eval_ag_news_bleu_score": 4.991878825321206, "eval_ag_news_bleu_score_sem": 0.1594841729142032, "eval_ag_news_emb_cos_sim": 0.8210830688476562, "eval_ag_news_emb_cos_sim_sem": 0.007206565820510269, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4462733268737793, "eval_ag_news_n_ngrams_match_1": 14.49, "eval_ag_news_n_ngrams_match_2": 3.268, "eval_ag_news_n_ngrams_match_3": 0.902, "eval_ag_news_num_pred_words": 46.324, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.38321911197659, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3623592694074649, "eval_ag_news_runtime": 12.0906, "eval_ag_news_samples_per_second": 41.354, "eval_ag_news_steps_per_second": 0.083, "eval_ag_news_token_set_f1": 0.36091635249547704, "eval_ag_news_token_set_f1_sem": 0.004384558194972008, "eval_ag_news_token_set_precision": 0.3475602129415628, "eval_ag_news_token_set_recall": 0.39079082250721275, "eval_ag_news_true_num_tokens": 56.09375, "step": 248125 }, { "epoch": 47.64, "eval_anthropic_toxic_prompts_accuracy": 0.1169375, "eval_anthropic_toxic_prompts_bleu_score": 3.293148509397367, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1185256629904491, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6882367730140686, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009334609592663407, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1941885948181152, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.454, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.044, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.81, "eval_anthropic_toxic_prompts_num_pred_words": 47.574, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.39037518426273, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22141263498086622, "eval_anthropic_toxic_prompts_runtime": 13.082, "eval_anthropic_toxic_prompts_samples_per_second": 38.22, "eval_anthropic_toxic_prompts_steps_per_second": 0.076, "eval_anthropic_toxic_prompts_token_set_f1": 0.36222316663025583, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.00650730959606252, "eval_anthropic_toxic_prompts_token_set_precision": 0.45246064724649093, "eval_anthropic_toxic_prompts_token_set_recall": 0.32619813806069026, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 248125 }, { "epoch": 47.64, "eval_arxiv_accuracy": 0.35634375, "eval_arxiv_bleu_score": 4.447612161166064, "eval_arxiv_bleu_score_sem": 0.1323879350373058, "eval_arxiv_emb_cos_sim": 0.7812366485595703, "eval_arxiv_emb_cos_sim_sem": 0.007202708066782506, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.310638666152954, "eval_arxiv_n_ngrams_match_1": 15.67, "eval_arxiv_n_ngrams_match_2": 3.11, "eval_arxiv_n_ngrams_match_3": 0.676, "eval_arxiv_num_pred_words": 40.206, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.402621010946135, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3756236847446874, "eval_arxiv_runtime": 11.9695, "eval_arxiv_samples_per_second": 41.773, "eval_arxiv_steps_per_second": 0.084, "eval_arxiv_token_set_f1": 0.3666380583630598, "eval_arxiv_token_set_f1_sem": 0.004279919035351922, "eval_arxiv_token_set_precision": 0.32057749850464246, "eval_arxiv_token_set_recall": 0.44920929263093656, "eval_arxiv_true_num_tokens": 64.0, "step": 248125 }, { "epoch": 47.64, "eval_python_code_alpaca_accuracy": 0.163875, "eval_python_code_alpaca_bleu_score": 4.757590874097533, "eval_python_code_alpaca_bleu_score_sem": 0.14959500405046608, "eval_python_code_alpaca_emb_cos_sim": 0.7674453854560852, "eval_python_code_alpaca_emb_cos_sim_sem": 0.008086995046656623, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8286101818084717, "eval_python_code_alpaca_n_ngrams_match_1": 10.186, "eval_python_code_alpaca_n_ngrams_match_2": 3.002, "eval_python_code_alpaca_n_ngrams_match_3": 0.984, "eval_python_code_alpaca_num_pred_words": 43.604, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.921926073124364, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3477354298339224, "eval_python_code_alpaca_runtime": 17.237, "eval_python_code_alpaca_samples_per_second": 29.007, "eval_python_code_alpaca_steps_per_second": 0.058, "eval_python_code_alpaca_token_set_f1": 0.48834096784614445, "eval_python_code_alpaca_token_set_f1_sem": 0.005232869025755022, "eval_python_code_alpaca_token_set_precision": 0.5592495199402681, "eval_python_code_alpaca_token_set_recall": 0.4567530569456588, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 248125 }, { "epoch": 47.64, "eval_wikibio_accuracy": 0.33115625, "eval_wikibio_bleu_score": 6.173020548637241, "eval_wikibio_bleu_score_sem": 0.2162235697229472, "eval_wikibio_emb_cos_sim": 0.7419592142105103, "eval_wikibio_emb_cos_sim_sem": 0.008961096977170277, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6299922466278076, "eval_wikibio_n_ngrams_match_1": 10.252, "eval_wikibio_n_ngrams_match_2": 3.46, "eval_wikibio_n_ngrams_match_3": 1.29, "eval_wikibio_num_pred_words": 36.578, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.71252421681164, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36247058554973255, "eval_wikibio_runtime": 12.4009, "eval_wikibio_samples_per_second": 40.32, "eval_wikibio_steps_per_second": 0.081, "eval_wikibio_token_set_f1": 0.32500288959311974, "eval_wikibio_token_set_f1_sem": 0.0052746729137608415, "eval_wikibio_token_set_precision": 0.33421849558845984, "eval_wikibio_token_set_recall": 0.330506573524447, "eval_wikibio_true_num_tokens": 61.1328125, "step": 248125 }, { "epoch": 47.64, "eval_nq_accuracy": 0.53865625, "eval_nq_bleu_score": 12.164957660255928, "eval_nq_bleu_score_sem": 0.49219298409302564, "eval_nq_emb_cos_sim": 0.8435643911361694, "eval_nq_emb_cos_sim_sem": 0.0065411731081890375, "eval_nq_emb_top1_equal": 0.359375, "eval_nq_emb_top1_equal_sem": 0.04257689651385297, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1049978733062744, "eval_nq_n_ngrams_match_1": 23.524, "eval_nq_n_ngrams_match_2": 8.772, "eval_nq_n_ngrams_match_3": 4.046, "eval_nq_num_pred_words": 49.052, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.207085555619893, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4580813632384835, "eval_nq_runtime": 21.1875, "eval_nq_samples_per_second": 23.599, "eval_nq_steps_per_second": 0.047, "eval_nq_token_set_f1": 0.4672014668803913, "eval_nq_token_set_f1_sem": 0.004979673036775632, "eval_nq_token_set_precision": 0.4267685199957736, "eval_nq_token_set_recall": 0.5234748105371331, "eval_nq_true_num_tokens": 64.0, "step": 248125 }, { "epoch": 47.65, "learning_rate": 0.001, "loss": 2.4817, "step": 248136 }, { "epoch": 47.65, "learning_rate": 0.001, "loss": 2.4858, "step": 248148 }, { "epoch": 47.65, "learning_rate": 0.001, "loss": 2.4745, "step": 248160 }, { "epoch": 47.65, "learning_rate": 0.001, "loss": 2.4942, "step": 248172 }, { "epoch": 47.65, "learning_rate": 0.001, "loss": 2.4843, "step": 248184 }, { "epoch": 47.66, "learning_rate": 0.001, "loss": 2.4902, "step": 248196 }, { "epoch": 47.66, "learning_rate": 0.001, "loss": 2.4777, "step": 248208 }, { "epoch": 47.66, "learning_rate": 0.001, "loss": 2.4845, "step": 248220 }, { "epoch": 47.66, "learning_rate": 0.001, "loss": 2.477, "step": 248232 }, { "epoch": 47.67, "learning_rate": 0.001, "loss": 2.4823, "step": 248244 }, { "epoch": 47.67, "learning_rate": 0.001, "loss": 2.4853, "step": 248256 }, { "epoch": 47.67, "learning_rate": 0.001, "loss": 2.4887, "step": 248268 }, { "epoch": 47.67, "learning_rate": 0.001, "loss": 2.4888, "step": 248280 }, { "epoch": 47.68, "learning_rate": 0.001, "loss": 2.4854, "step": 248292 }, { "epoch": 47.68, "learning_rate": 0.001, "loss": 2.4742, "step": 248304 }, { "epoch": 47.68, "learning_rate": 0.001, "loss": 2.4896, "step": 248316 }, { "epoch": 47.68, "learning_rate": 0.001, "loss": 2.4811, "step": 248328 }, { "epoch": 47.68, "learning_rate": 0.001, "loss": 2.4766, "step": 248340 }, { "epoch": 47.69, "learning_rate": 0.001, "loss": 2.4873, "step": 248352 }, { "epoch": 47.69, "learning_rate": 0.001, "loss": 2.4827, "step": 248364 }, { "epoch": 47.69, "learning_rate": 0.001, "loss": 2.49, "step": 248376 }, { "epoch": 47.69, "learning_rate": 0.001, "loss": 2.4791, "step": 248388 }, { "epoch": 47.7, "learning_rate": 0.001, "loss": 2.4775, "step": 248400 }, { "epoch": 47.7, "learning_rate": 0.001, "loss": 2.488, "step": 248412 }, { "epoch": 47.7, "learning_rate": 0.001, "loss": 2.4841, "step": 248424 }, { "epoch": 47.7, "learning_rate": 0.001, "loss": 2.494, "step": 248436 }, { "epoch": 47.71, "learning_rate": 0.001, "loss": 2.4935, "step": 248448 }, { "epoch": 47.71, "learning_rate": 0.001, "loss": 2.4938, "step": 248460 }, { "epoch": 47.71, "learning_rate": 0.001, "loss": 2.4897, "step": 248472 }, { "epoch": 47.71, "learning_rate": 0.001, "loss": 2.4979, "step": 248484 }, { "epoch": 47.71, "learning_rate": 0.001, "loss": 2.4933, "step": 248496 }, { "epoch": 47.72, "learning_rate": 0.001, "loss": 2.481, "step": 248508 }, { "epoch": 47.72, "learning_rate": 0.001, "loss": 2.494, "step": 248520 }, { "epoch": 47.72, "learning_rate": 0.001, "loss": 2.4901, "step": 248532 }, { "epoch": 47.72, "learning_rate": 0.001, "loss": 2.4845, "step": 248544 }, { "epoch": 47.73, "learning_rate": 0.001, "loss": 2.4925, "step": 248556 }, { "epoch": 47.73, "learning_rate": 0.001, "loss": 2.4872, "step": 248568 }, { "epoch": 47.73, "learning_rate": 0.001, "loss": 2.4876, "step": 248580 }, { "epoch": 47.73, "learning_rate": 0.001, "loss": 2.4715, "step": 248592 }, { "epoch": 47.74, "learning_rate": 0.001, "loss": 2.4873, "step": 248604 }, { "epoch": 47.74, "learning_rate": 0.001, "loss": 2.4829, "step": 248616 }, { "epoch": 47.74, "learning_rate": 0.001, "loss": 2.4952, "step": 248628 }, { "epoch": 47.74, "learning_rate": 0.001, "loss": 2.4807, "step": 248640 }, { "epoch": 47.74, "learning_rate": 0.001, "loss": 2.4971, "step": 248652 }, { "epoch": 47.75, "learning_rate": 0.001, "loss": 2.4779, "step": 248664 }, { "epoch": 47.75, "learning_rate": 0.001, "loss": 2.4958, "step": 248676 }, { "epoch": 47.75, "learning_rate": 0.001, "loss": 2.4811, "step": 248688 }, { "epoch": 47.75, "learning_rate": 0.001, "loss": 2.4801, "step": 248700 }, { "epoch": 47.76, "learning_rate": 0.001, "loss": 2.4953, "step": 248712 }, { "epoch": 47.76, "learning_rate": 0.001, "loss": 2.4857, "step": 248724 }, { "epoch": 47.76, "learning_rate": 0.001, "loss": 2.4883, "step": 248736 }, { "epoch": 47.76, "learning_rate": 0.001, "loss": 2.4865, "step": 248748 }, { "epoch": 47.76, "eval_ag_news_accuracy": 0.33178125, "eval_ag_news_bleu_score": 5.1813409721901404, "eval_ag_news_bleu_score_sem": 0.16947469492284958, "eval_ag_news_emb_cos_sim": 0.8197761178016663, "eval_ag_news_emb_cos_sim_sem": 0.0067856189026516515, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4435057640075684, "eval_ag_news_n_ngrams_match_1": 14.658, "eval_ag_news_n_ngrams_match_2": 3.388, "eval_ag_news_n_ngrams_match_3": 0.99, "eval_ag_news_num_pred_words": 46.894, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.296484157720922, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3650632896430201, "eval_ag_news_runtime": 12.5634, "eval_ag_news_samples_per_second": 39.798, "eval_ag_news_steps_per_second": 0.08, "eval_ag_news_token_set_f1": 0.36405006717945027, "eval_ag_news_token_set_f1_sem": 0.0045575006687500145, "eval_ag_news_token_set_precision": 0.35088080169794217, "eval_ag_news_token_set_recall": 0.39225027686730346, "eval_ag_news_true_num_tokens": 56.09375, "step": 248750 }, { "epoch": 47.76, "eval_anthropic_toxic_prompts_accuracy": 0.1171875, "eval_anthropic_toxic_prompts_bleu_score": 3.338124336445745, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.126176027487603, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6882451772689819, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009843266077906719, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.183750629425049, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.456, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.068, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.816, "eval_anthropic_toxic_prompts_num_pred_words": 47.178, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.137113360666334, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21981441704678467, "eval_anthropic_toxic_prompts_runtime": 13.3897, "eval_anthropic_toxic_prompts_samples_per_second": 37.342, "eval_anthropic_toxic_prompts_steps_per_second": 0.075, "eval_anthropic_toxic_prompts_token_set_f1": 0.35698655829917686, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006383206273064008, "eval_anthropic_toxic_prompts_token_set_precision": 0.4492739199412488, "eval_anthropic_toxic_prompts_token_set_recall": 0.3209038862451919, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 248750 }, { "epoch": 47.76, "eval_arxiv_accuracy": 0.35425, "eval_arxiv_bleu_score": 4.546258497820296, "eval_arxiv_bleu_score_sem": 0.13208640610322758, "eval_arxiv_emb_cos_sim": 0.779262900352478, "eval_arxiv_emb_cos_sim_sem": 0.008558350912423286, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3064181804656982, "eval_arxiv_n_ngrams_match_1": 15.654, "eval_arxiv_n_ngrams_match_2": 3.148, "eval_arxiv_n_ngrams_match_3": 0.722, "eval_arxiv_num_pred_words": 40.408, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.2872123527808, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3744534864106186, "eval_arxiv_runtime": 12.3596, "eval_arxiv_samples_per_second": 40.455, "eval_arxiv_steps_per_second": 0.081, "eval_arxiv_token_set_f1": 0.3672515792691267, "eval_arxiv_token_set_f1_sem": 0.004318107325377595, "eval_arxiv_token_set_precision": 0.31965560457117304, "eval_arxiv_token_set_recall": 0.44986833436728046, "eval_arxiv_true_num_tokens": 64.0, "step": 248750 }, { "epoch": 47.76, "eval_python_code_alpaca_accuracy": 0.1634375, "eval_python_code_alpaca_bleu_score": 4.840313444083068, "eval_python_code_alpaca_bleu_score_sem": 0.15701355395283076, "eval_python_code_alpaca_emb_cos_sim": 0.7639352083206177, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00903370546044242, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8244967460632324, "eval_python_code_alpaca_n_ngrams_match_1": 10.076, "eval_python_code_alpaca_n_ngrams_match_2": 3.062, "eval_python_code_alpaca_n_ngrams_match_3": 1.052, "eval_python_code_alpaca_num_pred_words": 42.918, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.852461783928643, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34581750969477737, "eval_python_code_alpaca_runtime": 18.9461, "eval_python_code_alpaca_samples_per_second": 26.391, "eval_python_code_alpaca_steps_per_second": 0.053, "eval_python_code_alpaca_token_set_f1": 0.47879834858823295, "eval_python_code_alpaca_token_set_f1_sem": 0.005547133602944839, "eval_python_code_alpaca_token_set_precision": 0.5533665450521097, "eval_python_code_alpaca_token_set_recall": 0.4478643519443287, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 248750 }, { "epoch": 47.76, "eval_wikibio_accuracy": 0.33171875, "eval_wikibio_bleu_score": 6.291519913828881, "eval_wikibio_bleu_score_sem": 0.21300405143483134, "eval_wikibio_emb_cos_sim": 0.756807804107666, "eval_wikibio_emb_cos_sim_sem": 0.007285925043533743, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6334481239318848, "eval_wikibio_n_ngrams_match_1": 10.43, "eval_wikibio_n_ngrams_match_2": 3.524, "eval_wikibio_n_ngrams_match_3": 1.322, "eval_wikibio_num_pred_words": 36.418, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.843079534976354, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36813694957930015, "eval_wikibio_runtime": 13.3914, "eval_wikibio_samples_per_second": 37.337, "eval_wikibio_steps_per_second": 0.075, "eval_wikibio_token_set_f1": 0.33083042184134814, "eval_wikibio_token_set_f1_sem": 0.004997707837593411, "eval_wikibio_token_set_precision": 0.3401205010532537, "eval_wikibio_token_set_recall": 0.33678032830033583, "eval_wikibio_true_num_tokens": 61.1328125, "step": 248750 }, { "epoch": 47.76, "eval_nq_accuracy": 0.5383125, "eval_nq_bleu_score": 12.350032568777765, "eval_nq_bleu_score_sem": 0.4891152951987281, "eval_nq_emb_cos_sim": 0.8401965498924255, "eval_nq_emb_cos_sim_sem": 0.006955711070895968, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1046643257141113, "eval_nq_n_ngrams_match_1": 23.66, "eval_nq_n_ngrams_match_2": 8.838, "eval_nq_n_ngrams_match_3": 4.124, "eval_nq_num_pred_words": 49.03, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.204348558478918, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4593533050827407, "eval_nq_runtime": 12.5346, "eval_nq_samples_per_second": 39.89, "eval_nq_steps_per_second": 0.08, "eval_nq_token_set_f1": 0.4725723252747118, "eval_nq_token_set_f1_sem": 0.004943369866499317, "eval_nq_token_set_precision": 0.43251952547362293, "eval_nq_token_set_recall": 0.5284720460052428, "eval_nq_true_num_tokens": 64.0, "step": 248750 }, { "epoch": 47.76, "learning_rate": 0.001, "loss": 2.4817, "step": 248760 }, { "epoch": 47.77, "learning_rate": 0.001, "loss": 2.4898, "step": 248772 }, { "epoch": 47.77, "learning_rate": 0.001, "loss": 2.4987, "step": 248784 }, { "epoch": 47.77, "learning_rate": 0.001, "loss": 2.4872, "step": 248796 }, { "epoch": 47.77, "learning_rate": 0.001, "loss": 2.4897, "step": 248808 }, { "epoch": 47.78, "learning_rate": 0.001, "loss": 2.4786, "step": 248820 }, { "epoch": 47.78, "learning_rate": 0.001, "loss": 2.477, "step": 248832 }, { "epoch": 47.78, "learning_rate": 0.001, "loss": 2.482, "step": 248844 }, { "epoch": 47.78, "learning_rate": 0.001, "loss": 2.4833, "step": 248856 }, { "epoch": 47.79, "learning_rate": 0.001, "loss": 2.4883, "step": 248868 }, { "epoch": 47.79, "learning_rate": 0.001, "loss": 2.4899, "step": 248880 }, { "epoch": 47.79, "learning_rate": 0.001, "loss": 2.4948, "step": 248892 }, { "epoch": 47.79, "learning_rate": 0.001, "loss": 2.4871, "step": 248904 }, { "epoch": 47.79, "learning_rate": 0.001, "loss": 2.4915, "step": 248916 }, { "epoch": 47.8, "learning_rate": 0.001, "loss": 2.4857, "step": 248928 }, { "epoch": 47.8, "learning_rate": 0.001, "loss": 2.491, "step": 248940 }, { "epoch": 47.8, "learning_rate": 0.001, "loss": 2.4935, "step": 248952 }, { "epoch": 47.8, "learning_rate": 0.001, "loss": 2.4784, "step": 248964 }, { "epoch": 47.81, "learning_rate": 0.001, "loss": 2.4817, "step": 248976 }, { "epoch": 47.81, "learning_rate": 0.001, "loss": 2.4856, "step": 248988 }, { "epoch": 47.81, "learning_rate": 0.001, "loss": 2.4872, "step": 249000 }, { "epoch": 47.81, "learning_rate": 0.001, "loss": 2.4829, "step": 249012 }, { "epoch": 47.82, "learning_rate": 0.001, "loss": 2.4787, "step": 249024 }, { "epoch": 47.82, "learning_rate": 0.001, "loss": 2.4835, "step": 249036 }, { "epoch": 47.82, "learning_rate": 0.001, "loss": 2.4887, "step": 249048 }, { "epoch": 47.82, "learning_rate": 0.001, "loss": 2.4878, "step": 249060 }, { "epoch": 47.82, "learning_rate": 0.001, "loss": 2.4921, "step": 249072 }, { "epoch": 47.83, "learning_rate": 0.001, "loss": 2.4895, "step": 249084 }, { "epoch": 47.83, "learning_rate": 0.001, "loss": 2.4799, "step": 249096 }, { "epoch": 47.83, "learning_rate": 0.001, "loss": 2.4861, "step": 249108 }, { "epoch": 47.83, "learning_rate": 0.001, "loss": 2.4817, "step": 249120 }, { "epoch": 47.84, "learning_rate": 0.001, "loss": 2.4826, "step": 249132 }, { "epoch": 47.84, "learning_rate": 0.001, "loss": 2.4783, "step": 249144 }, { "epoch": 47.84, "learning_rate": 0.001, "loss": 2.4779, "step": 249156 }, { "epoch": 47.84, "learning_rate": 0.001, "loss": 2.4892, "step": 249168 }, { "epoch": 47.85, "learning_rate": 0.001, "loss": 2.4802, "step": 249180 }, { "epoch": 47.85, "learning_rate": 0.001, "loss": 2.4819, "step": 249192 }, { "epoch": 47.85, "learning_rate": 0.001, "loss": 2.4866, "step": 249204 }, { "epoch": 47.85, "learning_rate": 0.001, "loss": 2.4819, "step": 249216 }, { "epoch": 47.85, "learning_rate": 0.001, "loss": 2.4974, "step": 249228 }, { "epoch": 47.86, "learning_rate": 0.001, "loss": 2.4832, "step": 249240 }, { "epoch": 47.86, "learning_rate": 0.001, "loss": 2.4809, "step": 249252 }, { "epoch": 47.86, "learning_rate": 0.001, "loss": 2.4834, "step": 249264 }, { "epoch": 47.86, "learning_rate": 0.001, "loss": 2.5022, "step": 249276 }, { "epoch": 47.87, "learning_rate": 0.001, "loss": 2.489, "step": 249288 }, { "epoch": 47.87, "learning_rate": 0.001, "loss": 2.4845, "step": 249300 }, { "epoch": 47.87, "learning_rate": 0.001, "loss": 2.49, "step": 249312 }, { "epoch": 47.87, "learning_rate": 0.001, "loss": 2.4892, "step": 249324 }, { "epoch": 47.88, "learning_rate": 0.001, "loss": 2.4948, "step": 249336 }, { "epoch": 47.88, "learning_rate": 0.001, "loss": 2.4893, "step": 249348 }, { "epoch": 47.88, "learning_rate": 0.001, "loss": 2.4996, "step": 249360 }, { "epoch": 47.88, "learning_rate": 0.001, "loss": 2.4928, "step": 249372 }, { "epoch": 47.88, "eval_ag_news_accuracy": 0.3326875, "eval_ag_news_bleu_score": 5.094311246219722, "eval_ag_news_bleu_score_sem": 0.16178231435843113, "eval_ag_news_emb_cos_sim": 0.8272091746330261, "eval_ag_news_emb_cos_sim_sem": 0.0058888149673143085, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4528400897979736, "eval_ag_news_n_ngrams_match_1": 14.64, "eval_ag_news_n_ngrams_match_2": 3.36, "eval_ag_news_n_ngrams_match_3": 0.962, "eval_ag_news_num_pred_words": 47.15, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.589983414748275, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3650431527465434, "eval_ag_news_runtime": 16.4939, "eval_ag_news_samples_per_second": 30.314, "eval_ag_news_steps_per_second": 0.061, "eval_ag_news_token_set_f1": 0.36372649564581144, "eval_ag_news_token_set_f1_sem": 0.004362924790996712, "eval_ag_news_token_set_precision": 0.35209632210321207, "eval_ag_news_token_set_recall": 0.39088022647759174, "eval_ag_news_true_num_tokens": 56.09375, "step": 249375 }, { "epoch": 47.88, "eval_anthropic_toxic_prompts_accuracy": 0.116, "eval_anthropic_toxic_prompts_bleu_score": 3.3111473594140794, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12225925352009366, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.691220760345459, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009095238542179309, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.181739330291748, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.524, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.09, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.822, "eval_anthropic_toxic_prompts_num_pred_words": 48.108, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.088615193993174, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21980840822097467, "eval_anthropic_toxic_prompts_runtime": 11.4759, "eval_anthropic_toxic_prompts_samples_per_second": 43.57, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.36636641612646337, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006508005136516142, "eval_anthropic_toxic_prompts_token_set_precision": 0.45786588999368416, "eval_anthropic_toxic_prompts_token_set_recall": 0.3323222626343548, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 249375 }, { "epoch": 47.88, "eval_arxiv_accuracy": 0.3566875, "eval_arxiv_bleu_score": 4.627996455625923, "eval_arxiv_bleu_score_sem": 0.14153317852089609, "eval_arxiv_emb_cos_sim": 0.790118932723999, "eval_arxiv_emb_cos_sim_sem": 0.006327755955309046, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.311711311340332, "eval_arxiv_n_ngrams_match_1": 15.966, "eval_arxiv_n_ngrams_match_2": 3.174, "eval_arxiv_n_ngrams_match_3": 0.73, "eval_arxiv_num_pred_words": 41.262, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.432030070418364, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3781419794927081, "eval_arxiv_runtime": 12.1669, "eval_arxiv_samples_per_second": 41.095, "eval_arxiv_steps_per_second": 0.082, "eval_arxiv_token_set_f1": 0.37344940864008863, "eval_arxiv_token_set_f1_sem": 0.004335465312531154, "eval_arxiv_token_set_precision": 0.3273930618648491, "eval_arxiv_token_set_recall": 0.45024986199642736, "eval_arxiv_true_num_tokens": 64.0, "step": 249375 }, { "epoch": 47.88, "eval_python_code_alpaca_accuracy": 0.16425, "eval_python_code_alpaca_bleu_score": 4.739159174673058, "eval_python_code_alpaca_bleu_score_sem": 0.1465690038225698, "eval_python_code_alpaca_emb_cos_sim": 0.7764459848403931, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007967120189868528, "eval_python_code_alpaca_emb_top1_equal": 0.125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.02934655822437397, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.845170736312866, "eval_python_code_alpaca_n_ngrams_match_1": 10.1, "eval_python_code_alpaca_n_ngrams_match_2": 3.054, "eval_python_code_alpaca_n_ngrams_match_3": 1.016, "eval_python_code_alpaca_num_pred_words": 43.646, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.204495850362754, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34362570934376124, "eval_python_code_alpaca_runtime": 11.1557, "eval_python_code_alpaca_samples_per_second": 44.82, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.4833799893595455, "eval_python_code_alpaca_token_set_f1_sem": 0.0055056761700500775, "eval_python_code_alpaca_token_set_precision": 0.5531893356831864, "eval_python_code_alpaca_token_set_recall": 0.45541811555591716, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 249375 }, { "epoch": 47.88, "eval_wikibio_accuracy": 0.33440625, "eval_wikibio_bleu_score": 6.316261810104175, "eval_wikibio_bleu_score_sem": 0.21310147077984917, "eval_wikibio_emb_cos_sim": 0.7608048915863037, "eval_wikibio_emb_cos_sim_sem": 0.008395486928698587, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6134183406829834, "eval_wikibio_n_ngrams_match_1": 10.36, "eval_wikibio_n_ngrams_match_2": 3.574, "eval_wikibio_n_ngrams_match_3": 1.35, "eval_wikibio_num_pred_words": 36.58, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.09263159849502, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3675050112083451, "eval_wikibio_runtime": 11.2377, "eval_wikibio_samples_per_second": 44.493, "eval_wikibio_steps_per_second": 0.089, "eval_wikibio_token_set_f1": 0.32762593209439095, "eval_wikibio_token_set_f1_sem": 0.005229883661863675, "eval_wikibio_token_set_precision": 0.33849174256731424, "eval_wikibio_token_set_recall": 0.3318670215393933, "eval_wikibio_true_num_tokens": 61.1328125, "step": 249375 }, { "epoch": 47.88, "eval_nq_accuracy": 0.53959375, "eval_nq_bleu_score": 12.404047305223402, "eval_nq_bleu_score_sem": 0.4993819882216008, "eval_nq_emb_cos_sim": 0.8405977487564087, "eval_nq_emb_cos_sim_sem": 0.007364287888716638, "eval_nq_emb_top1_equal": 0.28125, "eval_nq_emb_top1_equal_sem": 0.039896367485272234, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1072959899902344, "eval_nq_n_ngrams_match_1": 23.684, "eval_nq_n_ngrams_match_2": 8.95, "eval_nq_n_ngrams_match_3": 4.208, "eval_nq_num_pred_words": 49.034, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.225968084679062, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4603848110094526, "eval_nq_runtime": 12.1299, "eval_nq_samples_per_second": 41.22, "eval_nq_steps_per_second": 0.082, "eval_nq_token_set_f1": 0.47281263431806797, "eval_nq_token_set_f1_sem": 0.004971267093609227, "eval_nq_token_set_precision": 0.43155680507352223, "eval_nq_token_set_recall": 0.530710810333764, "eval_nq_true_num_tokens": 64.0, "step": 249375 }, { "epoch": 47.88, "learning_rate": 0.001, "loss": 2.4966, "step": 249384 }, { "epoch": 47.89, "learning_rate": 0.001, "loss": 2.4847, "step": 249396 }, { "epoch": 47.89, "learning_rate": 0.001, "loss": 2.4937, "step": 249408 }, { "epoch": 47.89, "learning_rate": 0.001, "loss": 2.4839, "step": 249420 }, { "epoch": 47.89, "learning_rate": 0.001, "loss": 2.498, "step": 249432 }, { "epoch": 47.9, "learning_rate": 0.001, "loss": 2.4863, "step": 249444 }, { "epoch": 47.9, "learning_rate": 0.001, "loss": 2.4839, "step": 249456 }, { "epoch": 47.9, "learning_rate": 0.001, "loss": 2.4898, "step": 249468 }, { "epoch": 47.9, "learning_rate": 0.001, "loss": 2.4818, "step": 249480 }, { "epoch": 47.91, "learning_rate": 0.001, "loss": 2.4859, "step": 249492 }, { "epoch": 47.91, "learning_rate": 0.001, "loss": 2.4892, "step": 249504 }, { "epoch": 47.91, "learning_rate": 0.001, "loss": 2.4951, "step": 249516 }, { "epoch": 47.91, "learning_rate": 0.001, "loss": 2.4849, "step": 249528 }, { "epoch": 47.91, "learning_rate": 0.001, "loss": 2.4946, "step": 249540 }, { "epoch": 47.92, "learning_rate": 0.001, "loss": 2.4867, "step": 249552 }, { "epoch": 47.92, "learning_rate": 0.001, "loss": 2.4928, "step": 249564 }, { "epoch": 47.92, "learning_rate": 0.001, "loss": 2.4903, "step": 249576 }, { "epoch": 47.92, "learning_rate": 0.001, "loss": 2.4918, "step": 249588 }, { "epoch": 47.93, "learning_rate": 0.001, "loss": 2.489, "step": 249600 }, { "epoch": 47.93, "learning_rate": 0.001, "loss": 2.4779, "step": 249612 }, { "epoch": 47.93, "learning_rate": 0.001, "loss": 2.4896, "step": 249624 }, { "epoch": 47.93, "learning_rate": 0.001, "loss": 2.4885, "step": 249636 }, { "epoch": 47.94, "learning_rate": 0.001, "loss": 2.48, "step": 249648 }, { "epoch": 47.94, "learning_rate": 0.001, "loss": 2.4772, "step": 249660 }, { "epoch": 47.94, "learning_rate": 0.001, "loss": 2.4834, "step": 249672 }, { "epoch": 47.94, "learning_rate": 0.001, "loss": 2.4836, "step": 249684 }, { "epoch": 47.94, "learning_rate": 0.001, "loss": 2.4821, "step": 249696 }, { "epoch": 47.95, "learning_rate": 0.001, "loss": 2.4818, "step": 249708 }, { "epoch": 47.95, "learning_rate": 0.001, "loss": 2.484, "step": 249720 }, { "epoch": 47.95, "learning_rate": 0.001, "loss": 2.4875, "step": 249732 }, { "epoch": 47.95, "learning_rate": 0.001, "loss": 2.4954, "step": 249744 }, { "epoch": 47.96, "learning_rate": 0.001, "loss": 2.4974, "step": 249756 }, { "epoch": 47.96, "learning_rate": 0.001, "loss": 2.4867, "step": 249768 }, { "epoch": 47.96, "learning_rate": 0.001, "loss": 2.4809, "step": 249780 }, { "epoch": 47.96, "learning_rate": 0.001, "loss": 2.4928, "step": 249792 }, { "epoch": 47.97, "learning_rate": 0.001, "loss": 2.4869, "step": 249804 }, { "epoch": 47.97, "learning_rate": 0.001, "loss": 2.4839, "step": 249816 }, { "epoch": 47.97, "learning_rate": 0.001, "loss": 2.4883, "step": 249828 }, { "epoch": 47.97, "learning_rate": 0.001, "loss": 2.5035, "step": 249840 }, { "epoch": 47.97, "learning_rate": 0.001, "loss": 2.4964, "step": 249852 }, { "epoch": 47.98, "learning_rate": 0.001, "loss": 2.492, "step": 249864 }, { "epoch": 47.98, "learning_rate": 0.001, "loss": 2.4935, "step": 249876 }, { "epoch": 47.98, "learning_rate": 0.001, "loss": 2.4997, "step": 249888 }, { "epoch": 47.98, "learning_rate": 0.001, "loss": 2.4895, "step": 249900 }, { "epoch": 47.99, "learning_rate": 0.001, "loss": 2.4954, "step": 249912 }, { "epoch": 47.99, "learning_rate": 0.001, "loss": 2.481, "step": 249924 }, { "epoch": 47.99, "learning_rate": 0.001, "loss": 2.4858, "step": 249936 }, { "epoch": 47.99, "learning_rate": 0.001, "loss": 2.4918, "step": 249948 }, { "epoch": 48.0, "learning_rate": 0.001, "loss": 2.4877, "step": 249960 }, { "epoch": 48.0, "learning_rate": 0.001, "loss": 2.4965, "step": 249972 }, { "epoch": 48.0, "learning_rate": 0.001, "loss": 2.4847, "step": 249984 }, { "epoch": 48.0, "learning_rate": 0.001, "loss": 2.4746, "step": 249996 }, { "epoch": 48.0, "eval_ag_news_accuracy": 0.333, "eval_ag_news_bleu_score": 4.976351304144446, "eval_ag_news_bleu_score_sem": 0.15219663898429564, "eval_ag_news_emb_cos_sim": 0.8216410279273987, "eval_ag_news_emb_cos_sim_sem": 0.006388932764467116, "eval_ag_news_emb_top1_equal": 0.265625, "eval_ag_news_emb_top1_equal_sem": 0.03919146934646163, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4505717754364014, "eval_ag_news_n_ngrams_match_1": 14.562, "eval_ag_news_n_ngrams_match_2": 3.272, "eval_ag_news_n_ngrams_match_3": 0.94, "eval_ag_news_num_pred_words": 46.52, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.518408609455346, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36547325964035304, "eval_ag_news_runtime": 16.7875, "eval_ag_news_samples_per_second": 29.784, "eval_ag_news_steps_per_second": 0.06, "eval_ag_news_token_set_f1": 0.3611641671533246, "eval_ag_news_token_set_f1_sem": 0.004320317545460231, "eval_ag_news_token_set_precision": 0.3479161112839795, "eval_ag_news_token_set_recall": 0.38902752215403175, "eval_ag_news_true_num_tokens": 56.09375, "step": 250000 }, { "epoch": 48.0, "eval_anthropic_toxic_prompts_accuracy": 0.11678125, "eval_anthropic_toxic_prompts_bleu_score": 3.4926227989688905, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1273502353359989, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6953889727592468, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008645840525942314, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.20438814163208, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.418, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.174, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.87, "eval_anthropic_toxic_prompts_num_pred_words": 47.434, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.640418956662735, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21922156664019604, "eval_anthropic_toxic_prompts_runtime": 13.6896, "eval_anthropic_toxic_prompts_samples_per_second": 36.524, "eval_anthropic_toxic_prompts_steps_per_second": 0.073, "eval_anthropic_toxic_prompts_token_set_f1": 0.3664467303634207, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066477240187314095, "eval_anthropic_toxic_prompts_token_set_precision": 0.4538589120389553, "eval_anthropic_toxic_prompts_token_set_recall": 0.3349302629762433, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 250000 }, { "epoch": 48.0, "eval_arxiv_accuracy": 0.35684375, "eval_arxiv_bleu_score": 4.573399632545602, "eval_arxiv_bleu_score_sem": 0.14076170848554803, "eval_arxiv_emb_cos_sim": 0.7903249263763428, "eval_arxiv_emb_cos_sim_sem": 0.006268415826783739, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.309251546859741, "eval_arxiv_n_ngrams_match_1": 15.69, "eval_arxiv_n_ngrams_match_2": 3.082, "eval_arxiv_n_ngrams_match_3": 0.724, "eval_arxiv_num_pred_words": 40.532, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.364636657162713, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37800332397791225, "eval_arxiv_runtime": 13.4098, "eval_arxiv_samples_per_second": 37.286, "eval_arxiv_steps_per_second": 0.075, "eval_arxiv_token_set_f1": 0.36725584720743554, "eval_arxiv_token_set_f1_sem": 0.004144421319963543, "eval_arxiv_token_set_precision": 0.3217808379845878, "eval_arxiv_token_set_recall": 0.44403633856085034, "eval_arxiv_true_num_tokens": 64.0, "step": 250000 }, { "epoch": 48.0, "eval_python_code_alpaca_accuracy": 0.1654375, "eval_python_code_alpaca_bleu_score": 5.1548312159158, "eval_python_code_alpaca_bleu_score_sem": 0.16573807504942967, "eval_python_code_alpaca_emb_cos_sim": 0.77726149559021, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007279054448681959, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.822619915008545, "eval_python_code_alpaca_n_ngrams_match_1": 10.27, "eval_python_code_alpaca_n_ngrams_match_2": 3.174, "eval_python_code_alpaca_n_ngrams_match_3": 1.154, "eval_python_code_alpaca_num_pred_words": 43.632, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.820862223098946, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.35120400874866275, "eval_python_code_alpaca_runtime": 14.2054, "eval_python_code_alpaca_samples_per_second": 35.198, "eval_python_code_alpaca_steps_per_second": 0.07, "eval_python_code_alpaca_token_set_f1": 0.49310856020986243, "eval_python_code_alpaca_token_set_f1_sem": 0.005361499865049245, "eval_python_code_alpaca_token_set_precision": 0.5608457442122253, "eval_python_code_alpaca_token_set_recall": 0.46043463793300393, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 250000 }, { "epoch": 48.0, "eval_wikibio_accuracy": 0.33415625, "eval_wikibio_bleu_score": 6.292058713973434, "eval_wikibio_bleu_score_sem": 0.21369389203475148, "eval_wikibio_emb_cos_sim": 0.7567078471183777, "eval_wikibio_emb_cos_sim_sem": 0.008591763485124804, "eval_wikibio_emb_top1_equal": 0.1171875, "eval_wikibio_emb_top1_equal_sem": 0.02854125312152025, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6028592586517334, "eval_wikibio_n_ngrams_match_1": 10.288, "eval_wikibio_n_ngrams_match_2": 3.542, "eval_wikibio_n_ngrams_match_3": 1.33, "eval_wikibio_num_pred_words": 36.186, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.70302800670636, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36515826051373507, "eval_wikibio_runtime": 17.6021, "eval_wikibio_samples_per_second": 28.406, "eval_wikibio_steps_per_second": 0.057, "eval_wikibio_token_set_f1": 0.3302672128499839, "eval_wikibio_token_set_f1_sem": 0.00524216625780752, "eval_wikibio_token_set_precision": 0.3359925807969038, "eval_wikibio_token_set_recall": 0.3395744931915767, "eval_wikibio_true_num_tokens": 61.1328125, "step": 250000 }, { "epoch": 48.0, "eval_nq_accuracy": 0.5396875, "eval_nq_bleu_score": 12.45586321621732, "eval_nq_bleu_score_sem": 0.48988617764236103, "eval_nq_emb_cos_sim": 0.836689829826355, "eval_nq_emb_cos_sim_sem": 0.006868378190969839, "eval_nq_emb_top1_equal": 0.3203125, "eval_nq_emb_top1_equal_sem": 0.041403754790620424, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.101829767227173, "eval_nq_n_ngrams_match_1": 23.646, "eval_nq_n_ngrams_match_2": 8.936, "eval_nq_n_ngrams_match_3": 4.186, "eval_nq_num_pred_words": 49.212, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.18112578135172, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4586611125271711, "eval_nq_runtime": 18.3333, "eval_nq_samples_per_second": 27.273, "eval_nq_steps_per_second": 0.055, "eval_nq_token_set_f1": 0.4707373578555305, "eval_nq_token_set_f1_sem": 0.005047285913235144, "eval_nq_token_set_precision": 0.4278669815829763, "eval_nq_token_set_recall": 0.5322160792486194, "eval_nq_true_num_tokens": 64.0, "step": 250000 }, { "epoch": 48.0, "learning_rate": 0.001, "loss": 2.4747, "step": 250008 }, { "epoch": 48.01, "learning_rate": 0.001, "loss": 2.4757, "step": 250020 }, { "epoch": 48.01, "learning_rate": 0.001, "loss": 2.473, "step": 250032 }, { "epoch": 48.01, "learning_rate": 0.001, "loss": 2.4653, "step": 250044 }, { "epoch": 48.01, "learning_rate": 0.001, "loss": 2.4691, "step": 250056 }, { "epoch": 48.02, "learning_rate": 0.001, "loss": 2.4628, "step": 250068 }, { "epoch": 48.02, "learning_rate": 0.001, "loss": 2.4701, "step": 250080 }, { "epoch": 48.02, "learning_rate": 0.001, "loss": 2.4738, "step": 250092 }, { "epoch": 48.02, "learning_rate": 0.001, "loss": 2.4725, "step": 250104 }, { "epoch": 48.03, "learning_rate": 0.001, "loss": 2.4732, "step": 250116 }, { "epoch": 48.03, "learning_rate": 0.001, "loss": 2.475, "step": 250128 }, { "epoch": 48.03, "learning_rate": 0.001, "loss": 2.4787, "step": 250140 }, { "epoch": 48.03, "learning_rate": 0.001, "loss": 2.4751, "step": 250152 }, { "epoch": 48.03, "learning_rate": 0.001, "loss": 2.462, "step": 250164 }, { "epoch": 48.04, "learning_rate": 0.001, "loss": 2.4628, "step": 250176 }, { "epoch": 48.04, "learning_rate": 0.001, "loss": 2.4812, "step": 250188 }, { "epoch": 48.04, "learning_rate": 0.001, "loss": 2.4767, "step": 250200 }, { "epoch": 48.04, "learning_rate": 0.001, "loss": 2.4719, "step": 250212 }, { "epoch": 48.05, "learning_rate": 0.001, "loss": 2.4751, "step": 250224 }, { "epoch": 48.05, "learning_rate": 0.001, "loss": 2.4727, "step": 250236 }, { "epoch": 48.05, "learning_rate": 0.001, "loss": 2.4767, "step": 250248 }, { "epoch": 48.05, "learning_rate": 0.001, "loss": 2.4605, "step": 250260 }, { "epoch": 48.06, "learning_rate": 0.001, "loss": 2.4747, "step": 250272 }, { "epoch": 48.06, "learning_rate": 0.001, "loss": 2.475, "step": 250284 }, { "epoch": 48.06, "learning_rate": 0.001, "loss": 2.4713, "step": 250296 }, { "epoch": 48.06, "learning_rate": 0.001, "loss": 2.4784, "step": 250308 }, { "epoch": 48.06, "learning_rate": 0.001, "loss": 2.4781, "step": 250320 }, { "epoch": 48.07, "learning_rate": 0.001, "loss": 2.4704, "step": 250332 }, { "epoch": 48.07, "learning_rate": 0.001, "loss": 2.4697, "step": 250344 }, { "epoch": 48.07, "learning_rate": 0.001, "loss": 2.4677, "step": 250356 }, { "epoch": 48.07, "learning_rate": 0.001, "loss": 2.482, "step": 250368 }, { "epoch": 48.08, "learning_rate": 0.001, "loss": 2.4845, "step": 250380 }, { "epoch": 48.08, "learning_rate": 0.001, "loss": 2.4765, "step": 250392 }, { "epoch": 48.08, "learning_rate": 0.001, "loss": 2.4755, "step": 250404 }, { "epoch": 48.08, "learning_rate": 0.001, "loss": 2.4744, "step": 250416 }, { "epoch": 48.09, "learning_rate": 0.001, "loss": 2.4873, "step": 250428 }, { "epoch": 48.09, "learning_rate": 0.001, "loss": 2.4829, "step": 250440 }, { "epoch": 48.09, "learning_rate": 0.001, "loss": 2.479, "step": 250452 }, { "epoch": 48.09, "learning_rate": 0.001, "loss": 2.4718, "step": 250464 }, { "epoch": 48.09, "learning_rate": 0.001, "loss": 2.4817, "step": 250476 }, { "epoch": 48.1, "learning_rate": 0.001, "loss": 2.4831, "step": 250488 }, { "epoch": 48.1, "learning_rate": 0.001, "loss": 2.4743, "step": 250500 }, { "epoch": 48.1, "learning_rate": 0.001, "loss": 2.4766, "step": 250512 }, { "epoch": 48.1, "learning_rate": 0.001, "loss": 2.4705, "step": 250524 }, { "epoch": 48.11, "learning_rate": 0.001, "loss": 2.4792, "step": 250536 }, { "epoch": 48.11, "learning_rate": 0.001, "loss": 2.4727, "step": 250548 }, { "epoch": 48.11, "learning_rate": 0.001, "loss": 2.4728, "step": 250560 }, { "epoch": 48.11, "learning_rate": 0.001, "loss": 2.4811, "step": 250572 }, { "epoch": 48.12, "learning_rate": 0.001, "loss": 2.473, "step": 250584 }, { "epoch": 48.12, "learning_rate": 0.001, "loss": 2.4801, "step": 250596 }, { "epoch": 48.12, "learning_rate": 0.001, "loss": 2.4865, "step": 250608 }, { "epoch": 48.12, "learning_rate": 0.001, "loss": 2.474, "step": 250620 }, { "epoch": 48.12, "eval_ag_news_accuracy": 0.3311875, "eval_ag_news_bleu_score": 5.139758274458105, "eval_ag_news_bleu_score_sem": 0.1594382743298427, "eval_ag_news_emb_cos_sim": 0.8248339891433716, "eval_ag_news_emb_cos_sim_sem": 0.006527133597823354, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.460014581680298, "eval_ag_news_n_ngrams_match_1": 14.676, "eval_ag_news_n_ngrams_match_2": 3.392, "eval_ag_news_n_ngrams_match_3": 0.958, "eval_ag_news_num_pred_words": 46.874, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.817440463029836, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36488812800624415, "eval_ag_news_runtime": 11.5577, "eval_ag_news_samples_per_second": 43.261, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.36204404038841675, "eval_ag_news_token_set_f1_sem": 0.00449969604583042, "eval_ag_news_token_set_precision": 0.34946652317310706, "eval_ag_news_token_set_recall": 0.3889837193239418, "eval_ag_news_true_num_tokens": 56.09375, "step": 250625 }, { "epoch": 48.12, "eval_anthropic_toxic_prompts_accuracy": 0.11665625, "eval_anthropic_toxic_prompts_bleu_score": 3.361079890503321, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1280400925058103, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6960893869400024, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008995374218810777, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.190563440322876, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.534, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.11, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.808, "eval_anthropic_toxic_prompts_num_pred_words": 47.324, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.302116378582213, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22300698806984248, "eval_anthropic_toxic_prompts_runtime": 11.2219, "eval_anthropic_toxic_prompts_samples_per_second": 44.556, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.36094798571661507, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.0066335762361913785, "eval_anthropic_toxic_prompts_token_set_precision": 0.4567114963136891, "eval_anthropic_toxic_prompts_token_set_recall": 0.32255889335372684, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 250625 }, { "epoch": 48.12, "eval_arxiv_accuracy": 0.3540625, "eval_arxiv_bleu_score": 4.731141276595753, "eval_arxiv_bleu_score_sem": 0.135710392393234, "eval_arxiv_emb_cos_sim": 0.7890267372131348, "eval_arxiv_emb_cos_sim_sem": 0.006840106216697633, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.321216344833374, "eval_arxiv_n_ngrams_match_1": 15.808, "eval_arxiv_n_ngrams_match_2": 3.262, "eval_arxiv_n_ngrams_match_3": 0.774, "eval_arxiv_num_pred_words": 41.228, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.694015552978357, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37571313045028343, "eval_arxiv_runtime": 11.3363, "eval_arxiv_samples_per_second": 44.106, "eval_arxiv_steps_per_second": 0.088, "eval_arxiv_token_set_f1": 0.36838095837752993, "eval_arxiv_token_set_f1_sem": 0.004210803563776229, "eval_arxiv_token_set_precision": 0.3233462041974542, "eval_arxiv_token_set_recall": 0.442847770455988, "eval_arxiv_true_num_tokens": 64.0, "step": 250625 }, { "epoch": 48.12, "eval_python_code_alpaca_accuracy": 0.1648125, "eval_python_code_alpaca_bleu_score": 5.200160592115089, "eval_python_code_alpaca_bleu_score_sem": 0.16877561616190814, "eval_python_code_alpaca_emb_cos_sim": 0.7871952056884766, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007216122618410114, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8048338890075684, "eval_python_code_alpaca_n_ngrams_match_1": 10.36, "eval_python_code_alpaca_n_ngrams_match_2": 3.334, "eval_python_code_alpaca_n_ngrams_match_3": 1.252, "eval_python_code_alpaca_num_pred_words": 44.176, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.52433080508394, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3483535281781177, "eval_python_code_alpaca_runtime": 11.1175, "eval_python_code_alpaca_samples_per_second": 44.974, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.4857537839483617, "eval_python_code_alpaca_token_set_f1_sem": 0.0051787267579279555, "eval_python_code_alpaca_token_set_precision": 0.5689601697740975, "eval_python_code_alpaca_token_set_recall": 0.444310862304604, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 250625 }, { "epoch": 48.12, "eval_wikibio_accuracy": 0.33190625, "eval_wikibio_bleu_score": 6.039174249369688, "eval_wikibio_bleu_score_sem": 0.21558128963785914, "eval_wikibio_emb_cos_sim": 0.7473443746566772, "eval_wikibio_emb_cos_sim_sem": 0.00937715486060163, "eval_wikibio_emb_top1_equal": 0.2734375, "eval_wikibio_emb_top1_equal_sem": 0.03955156411760461, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6011593341827393, "eval_wikibio_n_ngrams_match_1": 9.924, "eval_wikibio_n_ngrams_match_2": 3.37, "eval_wikibio_n_ngrams_match_3": 1.244, "eval_wikibio_num_pred_words": 35.85, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.640688632437495, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3570580379457571, "eval_wikibio_runtime": 11.9395, "eval_wikibio_samples_per_second": 41.878, "eval_wikibio_steps_per_second": 0.084, "eval_wikibio_token_set_f1": 0.31896550870984725, "eval_wikibio_token_set_f1_sem": 0.005661441125922278, "eval_wikibio_token_set_precision": 0.3269826723831596, "eval_wikibio_token_set_recall": 0.32874119211446584, "eval_wikibio_true_num_tokens": 61.1328125, "step": 250625 }, { "epoch": 48.12, "eval_nq_accuracy": 0.53815625, "eval_nq_bleu_score": 12.594178108198438, "eval_nq_bleu_score_sem": 0.5045598870209774, "eval_nq_emb_cos_sim": 0.8444705009460449, "eval_nq_emb_cos_sim_sem": 0.006603551102529669, "eval_nq_emb_top1_equal": 0.3671875, "eval_nq_emb_top1_equal_sem": 0.04277397517748991, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.103944778442383, "eval_nq_n_ngrams_match_1": 23.724, "eval_nq_n_ngrams_match_2": 9.018, "eval_nq_n_ngrams_match_3": 4.25, "eval_nq_num_pred_words": 49.122, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.198447265241693, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46023629689377865, "eval_nq_runtime": 12.0333, "eval_nq_samples_per_second": 41.551, "eval_nq_steps_per_second": 0.083, "eval_nq_token_set_f1": 0.47348552007413786, "eval_nq_token_set_f1_sem": 0.005088084412755815, "eval_nq_token_set_precision": 0.4324209050827016, "eval_nq_token_set_recall": 0.5312504527279666, "eval_nq_true_num_tokens": 64.0, "step": 250625 }, { "epoch": 48.12, "learning_rate": 0.001, "loss": 2.4834, "step": 250632 }, { "epoch": 48.13, "learning_rate": 0.001, "loss": 2.4775, "step": 250644 }, { "epoch": 48.13, "learning_rate": 0.001, "loss": 2.4822, "step": 250656 }, { "epoch": 48.13, "learning_rate": 0.001, "loss": 2.4769, "step": 250668 }, { "epoch": 48.13, "learning_rate": 0.001, "loss": 2.471, "step": 250680 }, { "epoch": 48.14, "learning_rate": 0.001, "loss": 2.4706, "step": 250692 }, { "epoch": 48.14, "learning_rate": 0.001, "loss": 2.4738, "step": 250704 }, { "epoch": 48.14, "learning_rate": 0.001, "loss": 2.4788, "step": 250716 }, { "epoch": 48.14, "learning_rate": 0.001, "loss": 2.4833, "step": 250728 }, { "epoch": 48.15, "learning_rate": 0.001, "loss": 2.4742, "step": 250740 }, { "epoch": 48.15, "learning_rate": 0.001, "loss": 2.4895, "step": 250752 }, { "epoch": 48.15, "learning_rate": 0.001, "loss": 2.4741, "step": 250764 }, { "epoch": 48.15, "learning_rate": 0.001, "loss": 2.4703, "step": 250776 }, { "epoch": 48.15, "learning_rate": 0.001, "loss": 2.4748, "step": 250788 }, { "epoch": 48.16, "learning_rate": 0.001, "loss": 2.479, "step": 250800 }, { "epoch": 48.16, "learning_rate": 0.001, "loss": 2.4805, "step": 250812 }, { "epoch": 48.16, "learning_rate": 0.001, "loss": 2.4742, "step": 250824 }, { "epoch": 48.16, "learning_rate": 0.001, "loss": 2.4839, "step": 250836 }, { "epoch": 48.17, "learning_rate": 0.001, "loss": 2.4743, "step": 250848 }, { "epoch": 48.17, "learning_rate": 0.001, "loss": 2.483, "step": 250860 }, { "epoch": 48.17, "learning_rate": 0.001, "loss": 2.4767, "step": 250872 }, { "epoch": 48.17, "learning_rate": 0.001, "loss": 2.4821, "step": 250884 }, { "epoch": 48.18, "learning_rate": 0.001, "loss": 2.4749, "step": 250896 }, { "epoch": 48.18, "learning_rate": 0.001, "loss": 2.4769, "step": 250908 }, { "epoch": 48.18, "learning_rate": 0.001, "loss": 2.4842, "step": 250920 }, { "epoch": 48.18, "learning_rate": 0.001, "loss": 2.482, "step": 250932 }, { "epoch": 48.18, "learning_rate": 0.001, "loss": 2.4779, "step": 250944 }, { "epoch": 48.19, "learning_rate": 0.001, "loss": 2.4803, "step": 250956 }, { "epoch": 48.19, "learning_rate": 0.001, "loss": 2.4759, "step": 250968 }, { "epoch": 48.19, "learning_rate": 0.001, "loss": 2.4807, "step": 250980 }, { "epoch": 48.19, "learning_rate": 0.001, "loss": 2.4812, "step": 250992 }, { "epoch": 48.2, "learning_rate": 0.001, "loss": 2.4768, "step": 251004 }, { "epoch": 48.2, "learning_rate": 0.001, "loss": 2.4838, "step": 251016 }, { "epoch": 48.2, "learning_rate": 0.001, "loss": 2.4819, "step": 251028 }, { "epoch": 48.2, "learning_rate": 0.001, "loss": 2.4795, "step": 251040 }, { "epoch": 48.21, "learning_rate": 0.001, "loss": 2.4801, "step": 251052 }, { "epoch": 48.21, "learning_rate": 0.001, "loss": 2.491, "step": 251064 }, { "epoch": 48.21, "learning_rate": 0.001, "loss": 2.4737, "step": 251076 }, { "epoch": 48.21, "learning_rate": 0.001, "loss": 2.4685, "step": 251088 }, { "epoch": 48.21, "learning_rate": 0.001, "loss": 2.4764, "step": 251100 }, { "epoch": 48.22, "learning_rate": 0.001, "loss": 2.4709, "step": 251112 }, { "epoch": 48.22, "learning_rate": 0.001, "loss": 2.4863, "step": 251124 }, { "epoch": 48.22, "learning_rate": 0.001, "loss": 2.4893, "step": 251136 }, { "epoch": 48.22, "learning_rate": 0.001, "loss": 2.4738, "step": 251148 }, { "epoch": 48.23, "learning_rate": 0.001, "loss": 2.4803, "step": 251160 }, { "epoch": 48.23, "learning_rate": 0.001, "loss": 2.4718, "step": 251172 }, { "epoch": 48.23, "learning_rate": 0.001, "loss": 2.4835, "step": 251184 }, { "epoch": 48.23, "learning_rate": 0.001, "loss": 2.4798, "step": 251196 }, { "epoch": 48.24, "learning_rate": 0.001, "loss": 2.4788, "step": 251208 }, { "epoch": 48.24, "learning_rate": 0.001, "loss": 2.4777, "step": 251220 }, { "epoch": 48.24, "learning_rate": 0.001, "loss": 2.4738, "step": 251232 }, { "epoch": 48.24, "learning_rate": 0.001, "loss": 2.4731, "step": 251244 }, { "epoch": 48.24, "eval_ag_news_accuracy": 0.33078125, "eval_ag_news_bleu_score": 5.09959671108279, "eval_ag_news_bleu_score_sem": 0.16014220244775615, "eval_ag_news_emb_cos_sim": 0.8223634958267212, "eval_ag_news_emb_cos_sim_sem": 0.0062494326227966294, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4594814777374268, "eval_ag_news_n_ngrams_match_1": 14.536, "eval_ag_news_n_ngrams_match_2": 3.328, "eval_ag_news_n_ngrams_match_3": 0.966, "eval_ag_news_num_pred_words": 46.438, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.800482980518932, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3640067607748212, "eval_ag_news_runtime": 11.5479, "eval_ag_news_samples_per_second": 43.298, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.36098790997584823, "eval_ag_news_token_set_f1_sem": 0.0044507064389004685, "eval_ag_news_token_set_precision": 0.3476410126022377, "eval_ag_news_token_set_recall": 0.39049919153744506, "eval_ag_news_true_num_tokens": 56.09375, "step": 251250 }, { "epoch": 48.24, "eval_anthropic_toxic_prompts_accuracy": 0.1165, "eval_anthropic_toxic_prompts_bleu_score": 3.4080332414745, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1266418285440086, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6937938928604126, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008794806586370545, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.140625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030847557647994725, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2096006870269775, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.392, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.082, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.836, "eval_anthropic_toxic_prompts_num_pred_words": 47.152, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.769193589258837, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22015547013252357, "eval_anthropic_toxic_prompts_runtime": 12.573, "eval_anthropic_toxic_prompts_samples_per_second": 39.768, "eval_anthropic_toxic_prompts_steps_per_second": 0.08, "eval_anthropic_toxic_prompts_token_set_f1": 0.3607077909796986, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006354294148384737, "eval_anthropic_toxic_prompts_token_set_precision": 0.45063219653479836, "eval_anthropic_toxic_prompts_token_set_recall": 0.3283815092734517, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 251250 }, { "epoch": 48.24, "eval_arxiv_accuracy": 0.35534375, "eval_arxiv_bleu_score": 4.497678610095454, "eval_arxiv_bleu_score_sem": 0.13852984143801117, "eval_arxiv_emb_cos_sim": 0.7842373847961426, "eval_arxiv_emb_cos_sim_sem": 0.007200587553466802, "eval_arxiv_emb_top1_equal": 0.3203125, "eval_arxiv_emb_top1_equal_sem": 0.041403754790620424, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3105201721191406, "eval_arxiv_n_ngrams_match_1": 15.584, "eval_arxiv_n_ngrams_match_2": 3.082, "eval_arxiv_n_ngrams_match_3": 0.684, "eval_arxiv_num_pred_words": 40.456, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.399374156215742, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3733434561234425, "eval_arxiv_runtime": 11.4525, "eval_arxiv_samples_per_second": 43.659, "eval_arxiv_steps_per_second": 0.087, "eval_arxiv_token_set_f1": 0.36444946024990826, "eval_arxiv_token_set_f1_sem": 0.004467964127470384, "eval_arxiv_token_set_precision": 0.31892920578939843, "eval_arxiv_token_set_recall": 0.44565416650040224, "eval_arxiv_true_num_tokens": 64.0, "step": 251250 }, { "epoch": 48.24, "eval_python_code_alpaca_accuracy": 0.16434375, "eval_python_code_alpaca_bleu_score": 4.965559602453602, "eval_python_code_alpaca_bleu_score_sem": 0.16300538877905096, "eval_python_code_alpaca_emb_cos_sim": 0.7760825157165527, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00773737245027958, "eval_python_code_alpaca_emb_top1_equal": 0.1796875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034068008879424266, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.819366216659546, "eval_python_code_alpaca_n_ngrams_match_1": 10.06, "eval_python_code_alpaca_n_ngrams_match_2": 3.078, "eval_python_code_alpaca_n_ngrams_match_3": 1.09, "eval_python_code_alpaca_num_pred_words": 43.126, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.766221152440643, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3472794596335977, "eval_python_code_alpaca_runtime": 11.164, "eval_python_code_alpaca_samples_per_second": 44.787, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.48566086343735, "eval_python_code_alpaca_token_set_f1_sem": 0.00542900190715754, "eval_python_code_alpaca_token_set_precision": 0.553812915165647, "eval_python_code_alpaca_token_set_recall": 0.45463936680057865, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 251250 }, { "epoch": 48.24, "eval_wikibio_accuracy": 0.33140625, "eval_wikibio_bleu_score": 5.866905590101334, "eval_wikibio_bleu_score_sem": 0.21141034914752507, "eval_wikibio_emb_cos_sim": 0.7367610931396484, "eval_wikibio_emb_cos_sim_sem": 0.010105310815800664, "eval_wikibio_emb_top1_equal": 0.2421875, "eval_wikibio_emb_top1_equal_sem": 0.038014990119662626, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6136386394500732, "eval_wikibio_n_ngrams_match_1": 9.726, "eval_wikibio_n_ngrams_match_2": 3.224, "eval_wikibio_n_ngrams_match_3": 1.172, "eval_wikibio_num_pred_words": 34.774, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.10080395965178, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.34991224266827015, "eval_wikibio_runtime": 11.995, "eval_wikibio_samples_per_second": 41.684, "eval_wikibio_steps_per_second": 0.083, "eval_wikibio_token_set_f1": 0.31528805844395424, "eval_wikibio_token_set_f1_sem": 0.0056001249024216744, "eval_wikibio_token_set_precision": 0.3176935238206678, "eval_wikibio_token_set_recall": 0.3338562315888643, "eval_wikibio_true_num_tokens": 61.1328125, "step": 251250 }, { "epoch": 48.24, "eval_nq_accuracy": 0.5401875, "eval_nq_bleu_score": 12.269426629372164, "eval_nq_bleu_score_sem": 0.49251996946522614, "eval_nq_emb_cos_sim": 0.836445689201355, "eval_nq_emb_cos_sim_sem": 0.00690769922499698, "eval_nq_emb_top1_equal": 0.3359375, "eval_nq_emb_top1_equal_sem": 0.04191137143408563, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1021807193756104, "eval_nq_n_ngrams_match_1": 23.596, "eval_nq_n_ngrams_match_2": 8.796, "eval_nq_n_ngrams_match_3": 4.134, "eval_nq_num_pred_words": 49.008, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.183997468904307, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45823407640517116, "eval_nq_runtime": 11.6865, "eval_nq_samples_per_second": 42.784, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4709152746179991, "eval_nq_token_set_f1_sem": 0.004818976117621279, "eval_nq_token_set_precision": 0.43048724253270526, "eval_nq_token_set_recall": 0.5275287415318805, "eval_nq_true_num_tokens": 64.0, "step": 251250 }, { "epoch": 48.24, "learning_rate": 0.001, "loss": 2.4868, "step": 251256 }, { "epoch": 48.25, "learning_rate": 0.001, "loss": 2.4766, "step": 251268 }, { "epoch": 48.25, "learning_rate": 0.001, "loss": 2.4829, "step": 251280 }, { "epoch": 48.25, "learning_rate": 0.001, "loss": 2.4823, "step": 251292 }, { "epoch": 48.25, "learning_rate": 0.001, "loss": 2.4837, "step": 251304 }, { "epoch": 48.26, "learning_rate": 0.001, "loss": 2.4787, "step": 251316 }, { "epoch": 48.26, "learning_rate": 0.001, "loss": 2.474, "step": 251328 }, { "epoch": 48.26, "learning_rate": 0.001, "loss": 2.4757, "step": 251340 }, { "epoch": 48.26, "learning_rate": 0.001, "loss": 2.4882, "step": 251352 }, { "epoch": 48.26, "learning_rate": 0.001, "loss": 2.4782, "step": 251364 }, { "epoch": 48.27, "learning_rate": 0.001, "loss": 2.4825, "step": 251376 }, { "epoch": 48.27, "learning_rate": 0.001, "loss": 2.4858, "step": 251388 }, { "epoch": 48.27, "learning_rate": 0.001, "loss": 2.4826, "step": 251400 }, { "epoch": 48.27, "learning_rate": 0.001, "loss": 2.4846, "step": 251412 }, { "epoch": 48.28, "learning_rate": 0.001, "loss": 2.4762, "step": 251424 }, { "epoch": 48.28, "learning_rate": 0.001, "loss": 2.4778, "step": 251436 }, { "epoch": 48.28, "learning_rate": 0.001, "loss": 2.4702, "step": 251448 }, { "epoch": 48.28, "learning_rate": 0.001, "loss": 2.4791, "step": 251460 }, { "epoch": 48.29, "learning_rate": 0.001, "loss": 2.4871, "step": 251472 }, { "epoch": 48.29, "learning_rate": 0.001, "loss": 2.4761, "step": 251484 }, { "epoch": 48.29, "learning_rate": 0.001, "loss": 2.4868, "step": 251496 }, { "epoch": 48.29, "learning_rate": 0.001, "loss": 2.4759, "step": 251508 }, { "epoch": 48.29, "learning_rate": 0.001, "loss": 2.4871, "step": 251520 }, { "epoch": 48.3, "learning_rate": 0.001, "loss": 2.4773, "step": 251532 }, { "epoch": 48.3, "learning_rate": 0.001, "loss": 2.475, "step": 251544 }, { "epoch": 48.3, "learning_rate": 0.001, "loss": 2.4857, "step": 251556 }, { "epoch": 48.3, "learning_rate": 0.001, "loss": 2.4831, "step": 251568 }, { "epoch": 48.31, "learning_rate": 0.001, "loss": 2.4881, "step": 251580 }, { "epoch": 48.31, "learning_rate": 0.001, "loss": 2.476, "step": 251592 }, { "epoch": 48.31, "learning_rate": 0.001, "loss": 2.4761, "step": 251604 }, { "epoch": 48.31, "learning_rate": 0.001, "loss": 2.4804, "step": 251616 }, { "epoch": 48.32, "learning_rate": 0.001, "loss": 2.4767, "step": 251628 }, { "epoch": 48.32, "learning_rate": 0.001, "loss": 2.4832, "step": 251640 }, { "epoch": 48.32, "learning_rate": 0.001, "loss": 2.4724, "step": 251652 }, { "epoch": 48.32, "learning_rate": 0.001, "loss": 2.4834, "step": 251664 }, { "epoch": 48.32, "learning_rate": 0.001, "loss": 2.4847, "step": 251676 }, { "epoch": 48.33, "learning_rate": 0.001, "loss": 2.4779, "step": 251688 }, { "epoch": 48.33, "learning_rate": 0.001, "loss": 2.48, "step": 251700 }, { "epoch": 48.33, "learning_rate": 0.001, "loss": 2.4782, "step": 251712 }, { "epoch": 48.33, "learning_rate": 0.001, "loss": 2.4813, "step": 251724 }, { "epoch": 48.34, "learning_rate": 0.001, "loss": 2.4745, "step": 251736 }, { "epoch": 48.34, "learning_rate": 0.001, "loss": 2.4777, "step": 251748 }, { "epoch": 48.34, "learning_rate": 0.001, "loss": 2.4784, "step": 251760 }, { "epoch": 48.34, "learning_rate": 0.001, "loss": 2.4763, "step": 251772 }, { "epoch": 48.35, "learning_rate": 0.001, "loss": 2.4841, "step": 251784 }, { "epoch": 48.35, "learning_rate": 0.001, "loss": 2.4699, "step": 251796 }, { "epoch": 48.35, "learning_rate": 0.001, "loss": 2.4776, "step": 251808 }, { "epoch": 48.35, "learning_rate": 0.001, "loss": 2.4719, "step": 251820 }, { "epoch": 48.35, "learning_rate": 0.001, "loss": 2.4845, "step": 251832 }, { "epoch": 48.36, "learning_rate": 0.001, "loss": 2.4736, "step": 251844 }, { "epoch": 48.36, "learning_rate": 0.001, "loss": 2.4792, "step": 251856 }, { "epoch": 48.36, "learning_rate": 0.001, "loss": 2.4814, "step": 251868 }, { "epoch": 48.36, "eval_ag_news_accuracy": 0.33090625, "eval_ag_news_bleu_score": 5.167693823396464, "eval_ag_news_bleu_score_sem": 0.1643706627612869, "eval_ag_news_emb_cos_sim": 0.8261070847511292, "eval_ag_news_emb_cos_sim_sem": 0.006726918220843595, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.456059455871582, "eval_ag_news_n_ngrams_match_1": 14.776, "eval_ag_news_n_ngrams_match_2": 3.5, "eval_ag_news_n_ngrams_match_3": 1.004, "eval_ag_news_num_pred_words": 46.92, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.69184701575129, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3673643766483693, "eval_ag_news_runtime": 11.9375, "eval_ag_news_samples_per_second": 41.885, "eval_ag_news_steps_per_second": 0.084, "eval_ag_news_token_set_f1": 0.3647391261588922, "eval_ag_news_token_set_f1_sem": 0.0046172232975958, "eval_ag_news_token_set_precision": 0.35199514681047417, "eval_ag_news_token_set_recall": 0.39216877659465427, "eval_ag_news_true_num_tokens": 56.09375, "step": 251875 }, { "epoch": 48.36, "eval_anthropic_toxic_prompts_accuracy": 0.11690625, "eval_anthropic_toxic_prompts_bleu_score": 3.2433695670770337, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12217276880675339, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6950536966323853, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008528013742574045, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2095835208892822, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.384, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.02, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.778, "eval_anthropic_toxic_prompts_num_pred_words": 47.836, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.768768401520507, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21722775682923107, "eval_anthropic_toxic_prompts_runtime": 11.1324, "eval_anthropic_toxic_prompts_samples_per_second": 44.914, "eval_anthropic_toxic_prompts_steps_per_second": 0.09, "eval_anthropic_toxic_prompts_token_set_f1": 0.36048937684694327, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006526683291917304, "eval_anthropic_toxic_prompts_token_set_precision": 0.4514346911599886, "eval_anthropic_toxic_prompts_token_set_recall": 0.3265819297445124, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 251875 }, { "epoch": 48.36, "eval_arxiv_accuracy": 0.35553125, "eval_arxiv_bleu_score": 4.431887569962282, "eval_arxiv_bleu_score_sem": 0.12944506366506656, "eval_arxiv_emb_cos_sim": 0.7818551063537598, "eval_arxiv_emb_cos_sim_sem": 0.007109343577707764, "eval_arxiv_emb_top1_equal": 0.25, "eval_arxiv_emb_top1_equal_sem": 0.03842366440207048, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3221285343170166, "eval_arxiv_n_ngrams_match_1": 15.592, "eval_arxiv_n_ngrams_match_2": 3.074, "eval_arxiv_n_ngrams_match_3": 0.674, "eval_arxiv_num_pred_words": 40.842, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.71928926818173, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3727703280568793, "eval_arxiv_runtime": 12.6899, "eval_arxiv_samples_per_second": 39.401, "eval_arxiv_steps_per_second": 0.079, "eval_arxiv_token_set_f1": 0.365927603584925, "eval_arxiv_token_set_f1_sem": 0.004416122318664511, "eval_arxiv_token_set_precision": 0.31787081954965296, "eval_arxiv_token_set_recall": 0.4491077653743621, "eval_arxiv_true_num_tokens": 64.0, "step": 251875 }, { "epoch": 48.36, "eval_python_code_alpaca_accuracy": 0.162875, "eval_python_code_alpaca_bleu_score": 4.868752082978907, "eval_python_code_alpaca_bleu_score_sem": 0.1440595166960755, "eval_python_code_alpaca_emb_cos_sim": 0.7802065014839172, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007584125192004953, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8391098976135254, "eval_python_code_alpaca_n_ngrams_match_1": 10.272, "eval_python_code_alpaca_n_ngrams_match_2": 3.138, "eval_python_code_alpaca_n_ngrams_match_3": 1.102, "eval_python_code_alpaca_num_pred_words": 44.376, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.100537531642352, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3472426663093965, "eval_python_code_alpaca_runtime": 33.8483, "eval_python_code_alpaca_samples_per_second": 14.772, "eval_python_code_alpaca_steps_per_second": 0.03, "eval_python_code_alpaca_token_set_f1": 0.4900903739555282, "eval_python_code_alpaca_token_set_f1_sem": 0.00512187924133402, "eval_python_code_alpaca_token_set_precision": 0.565810531328908, "eval_python_code_alpaca_token_set_recall": 0.4511917160087212, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 251875 }, { "epoch": 48.36, "eval_wikibio_accuracy": 0.32878125, "eval_wikibio_bleu_score": 6.279184295600841, "eval_wikibio_bleu_score_sem": 0.2193481791221487, "eval_wikibio_emb_cos_sim": 0.7526933550834656, "eval_wikibio_emb_cos_sim_sem": 0.009144129544996453, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.632040500640869, "eval_wikibio_n_ngrams_match_1": 10.266, "eval_wikibio_n_ngrams_match_2": 3.538, "eval_wikibio_n_ngrams_match_3": 1.366, "eval_wikibio_num_pred_words": 36.494, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.789848208436084, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36355306803507004, "eval_wikibio_runtime": 29.2206, "eval_wikibio_samples_per_second": 17.111, "eval_wikibio_steps_per_second": 0.034, "eval_wikibio_token_set_f1": 0.32665042392141497, "eval_wikibio_token_set_f1_sem": 0.005293649857900133, "eval_wikibio_token_set_precision": 0.3353163458613141, "eval_wikibio_token_set_recall": 0.3362724738346447, "eval_wikibio_true_num_tokens": 61.1328125, "step": 251875 }, { "epoch": 48.36, "eval_nq_accuracy": 0.54034375, "eval_nq_bleu_score": 12.420310795175435, "eval_nq_bleu_score_sem": 0.5042513808363712, "eval_nq_emb_cos_sim": 0.8430113196372986, "eval_nq_emb_cos_sim_sem": 0.007010841782926249, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.104539632797241, "eval_nq_n_ngrams_match_1": 23.758, "eval_nq_n_ngrams_match_2": 8.79, "eval_nq_n_ngrams_match_3": 4.178, "eval_nq_num_pred_words": 49.278, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.203325598105424, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45959830713726046, "eval_nq_runtime": 23.7373, "eval_nq_samples_per_second": 21.064, "eval_nq_steps_per_second": 0.042, "eval_nq_token_set_f1": 0.47252964948873705, "eval_nq_token_set_f1_sem": 0.004844632569834099, "eval_nq_token_set_precision": 0.4319054093187382, "eval_nq_token_set_recall": 0.5295300013559052, "eval_nq_true_num_tokens": 64.0, "step": 251875 }, { "epoch": 48.36, "learning_rate": 0.001, "loss": 2.4827, "step": 251880 }, { "epoch": 48.37, "learning_rate": 0.001, "loss": 2.472, "step": 251892 }, { "epoch": 48.37, "learning_rate": 0.001, "loss": 2.4765, "step": 251904 }, { "epoch": 48.37, "learning_rate": 0.001, "loss": 2.4779, "step": 251916 }, { "epoch": 48.37, "learning_rate": 0.001, "loss": 2.4759, "step": 251928 }, { "epoch": 48.38, "learning_rate": 0.001, "loss": 2.478, "step": 251940 }, { "epoch": 48.38, "learning_rate": 0.001, "loss": 2.4829, "step": 251952 }, { "epoch": 48.38, "learning_rate": 0.001, "loss": 2.4752, "step": 251964 }, { "epoch": 48.38, "learning_rate": 0.001, "loss": 2.4821, "step": 251976 }, { "epoch": 48.38, "learning_rate": 0.001, "loss": 2.4785, "step": 251988 }, { "epoch": 48.39, "learning_rate": 0.001, "loss": 2.4756, "step": 252000 }, { "epoch": 48.39, "learning_rate": 0.001, "loss": 2.4763, "step": 252012 }, { "epoch": 48.39, "learning_rate": 0.001, "loss": 2.4813, "step": 252024 }, { "epoch": 48.39, "learning_rate": 0.001, "loss": 2.4866, "step": 252036 }, { "epoch": 48.4, "learning_rate": 0.001, "loss": 2.4734, "step": 252048 }, { "epoch": 48.4, "learning_rate": 0.001, "loss": 2.4786, "step": 252060 }, { "epoch": 48.4, "learning_rate": 0.001, "loss": 2.4739, "step": 252072 }, { "epoch": 48.4, "learning_rate": 0.001, "loss": 2.4765, "step": 252084 }, { "epoch": 48.41, "learning_rate": 0.001, "loss": 2.4686, "step": 252096 }, { "epoch": 48.41, "learning_rate": 0.001, "loss": 2.4835, "step": 252108 }, { "epoch": 48.41, "learning_rate": 0.001, "loss": 2.4749, "step": 252120 }, { "epoch": 48.41, "learning_rate": 0.001, "loss": 2.4767, "step": 252132 }, { "epoch": 48.41, "learning_rate": 0.001, "loss": 2.4696, "step": 252144 }, { "epoch": 48.42, "learning_rate": 0.001, "loss": 2.4748, "step": 252156 }, { "epoch": 48.42, "learning_rate": 0.001, "loss": 2.4795, "step": 252168 }, { "epoch": 48.42, "learning_rate": 0.001, "loss": 2.4862, "step": 252180 }, { "epoch": 48.42, "learning_rate": 0.001, "loss": 2.4745, "step": 252192 }, { "epoch": 48.43, "learning_rate": 0.001, "loss": 2.4783, "step": 252204 }, { "epoch": 48.43, "learning_rate": 0.001, "loss": 2.4711, "step": 252216 }, { "epoch": 48.43, "learning_rate": 0.001, "loss": 2.4754, "step": 252228 }, { "epoch": 48.43, "learning_rate": 0.001, "loss": 2.4799, "step": 252240 }, { "epoch": 48.44, "learning_rate": 0.001, "loss": 2.4768, "step": 252252 }, { "epoch": 48.44, "learning_rate": 0.001, "loss": 2.4689, "step": 252264 }, { "epoch": 48.44, "learning_rate": 0.001, "loss": 2.4686, "step": 252276 }, { "epoch": 48.44, "learning_rate": 0.001, "loss": 2.4749, "step": 252288 }, { "epoch": 48.44, "learning_rate": 0.001, "loss": 2.4799, "step": 252300 }, { "epoch": 48.45, "learning_rate": 0.001, "loss": 2.4805, "step": 252312 }, { "epoch": 48.45, "learning_rate": 0.001, "loss": 2.4792, "step": 252324 }, { "epoch": 48.45, "learning_rate": 0.001, "loss": 2.4688, "step": 252336 }, { "epoch": 48.45, "learning_rate": 0.001, "loss": 2.485, "step": 252348 }, { "epoch": 48.46, "learning_rate": 0.001, "loss": 2.4771, "step": 252360 }, { "epoch": 48.46, "learning_rate": 0.001, "loss": 2.4874, "step": 252372 }, { "epoch": 48.46, "learning_rate": 0.001, "loss": 2.4844, "step": 252384 }, { "epoch": 48.46, "learning_rate": 0.001, "loss": 2.4767, "step": 252396 }, { "epoch": 48.47, "learning_rate": 0.001, "loss": 2.4855, "step": 252408 }, { "epoch": 48.47, "learning_rate": 0.001, "loss": 2.4789, "step": 252420 }, { "epoch": 48.47, "learning_rate": 0.001, "loss": 2.4819, "step": 252432 }, { "epoch": 48.47, "learning_rate": 0.001, "loss": 2.4837, "step": 252444 }, { "epoch": 48.47, "learning_rate": 0.001, "loss": 2.4882, "step": 252456 }, { "epoch": 48.48, "learning_rate": 0.001, "loss": 2.484, "step": 252468 }, { "epoch": 48.48, "learning_rate": 0.001, "loss": 2.477, "step": 252480 }, { "epoch": 48.48, "learning_rate": 0.001, "loss": 2.4863, "step": 252492 }, { "epoch": 48.48, "eval_ag_news_accuracy": 0.332375, "eval_ag_news_bleu_score": 5.138620462052554, "eval_ag_news_bleu_score_sem": 0.1663385760860313, "eval_ag_news_emb_cos_sim": 0.8251314163208008, "eval_ag_news_emb_cos_sim_sem": 0.0064455965681119816, "eval_ag_news_emb_top1_equal": 0.203125, "eval_ag_news_emb_top1_equal_sem": 0.03570055125142555, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.445012092590332, "eval_ag_news_n_ngrams_match_1": 14.558, "eval_ag_news_n_ngrams_match_2": 3.372, "eval_ag_news_n_ngrams_match_3": 0.916, "eval_ag_news_num_pred_words": 46.234, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.34366247044745, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36562157530401873, "eval_ag_news_runtime": 11.5372, "eval_ag_news_samples_per_second": 43.338, "eval_ag_news_steps_per_second": 0.087, "eval_ag_news_token_set_f1": 0.36167660406611235, "eval_ag_news_token_set_f1_sem": 0.004403739658015286, "eval_ag_news_token_set_precision": 0.3477815205037834, "eval_ag_news_token_set_recall": 0.3913349886249815, "eval_ag_news_true_num_tokens": 56.09375, "step": 252500 }, { "epoch": 48.48, "eval_anthropic_toxic_prompts_accuracy": 0.11615625, "eval_anthropic_toxic_prompts_bleu_score": 3.3158997955492695, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1309423280689943, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6888794302940369, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008748511566003868, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.192425012588501, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.362, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.012, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782, "eval_anthropic_toxic_prompts_num_pred_words": 46.686, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.347398659470723, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2204264884245279, "eval_anthropic_toxic_prompts_runtime": 11.2014, "eval_anthropic_toxic_prompts_samples_per_second": 44.637, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.35525893389393404, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006543885851610156, "eval_anthropic_toxic_prompts_token_set_precision": 0.4463478376988283, "eval_anthropic_toxic_prompts_token_set_recall": 0.32304658677546483, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 252500 }, { "epoch": 48.48, "eval_arxiv_accuracy": 0.35434375, "eval_arxiv_bleu_score": 4.55302470016922, "eval_arxiv_bleu_score_sem": 0.14336370871717433, "eval_arxiv_emb_cos_sim": 0.7756081819534302, "eval_arxiv_emb_cos_sim_sem": 0.0082969792070165, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3071327209472656, "eval_arxiv_n_ngrams_match_1": 15.46, "eval_arxiv_n_ngrams_match_2": 3.122, "eval_arxiv_n_ngrams_match_3": 0.75, "eval_arxiv_num_pred_words": 39.726, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.30671713828552, "eval_arxiv_pred_num_tokens": 62.9921875, "eval_arxiv_rouge_score": 0.37337024335822466, "eval_arxiv_runtime": 15.7946, "eval_arxiv_samples_per_second": 31.656, "eval_arxiv_steps_per_second": 0.063, "eval_arxiv_token_set_f1": 0.36420785366806485, "eval_arxiv_token_set_f1_sem": 0.0044603440265640225, "eval_arxiv_token_set_precision": 0.31483497585554, "eval_arxiv_token_set_recall": 0.45624563278599656, "eval_arxiv_true_num_tokens": 64.0, "step": 252500 }, { "epoch": 48.48, "eval_python_code_alpaca_accuracy": 0.16315625, "eval_python_code_alpaca_bleu_score": 4.80265066334131, "eval_python_code_alpaca_bleu_score_sem": 0.15329768739829774, "eval_python_code_alpaca_emb_cos_sim": 0.7780901193618774, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007707415260611447, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8433306217193604, "eval_python_code_alpaca_n_ngrams_match_1": 10.146, "eval_python_code_alpaca_n_ngrams_match_2": 3.126, "eval_python_code_alpaca_n_ngrams_match_3": 1.08, "eval_python_code_alpaca_num_pred_words": 43.854, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.17286671601518, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3441052162234529, "eval_python_code_alpaca_runtime": 11.0685, "eval_python_code_alpaca_samples_per_second": 45.173, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.4788373856917405, "eval_python_code_alpaca_token_set_f1_sem": 0.005535633109350059, "eval_python_code_alpaca_token_set_precision": 0.5559542974393263, "eval_python_code_alpaca_token_set_recall": 0.4426920524993888, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 252500 }, { "epoch": 48.48, "eval_wikibio_accuracy": 0.333, "eval_wikibio_bleu_score": 6.275643764195638, "eval_wikibio_bleu_score_sem": 0.22007016783032315, "eval_wikibio_emb_cos_sim": 0.7408552169799805, "eval_wikibio_emb_cos_sim_sem": 0.009536931587672907, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5935592651367188, "eval_wikibio_n_ngrams_match_1": 10.122, "eval_wikibio_n_ngrams_match_2": 3.506, "eval_wikibio_n_ngrams_match_3": 1.344, "eval_wikibio_num_pred_words": 35.854, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.36327239553451, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3579663194467216, "eval_wikibio_runtime": 11.1135, "eval_wikibio_samples_per_second": 44.99, "eval_wikibio_steps_per_second": 0.09, "eval_wikibio_token_set_f1": 0.32318602179276845, "eval_wikibio_token_set_f1_sem": 0.0056420714798284, "eval_wikibio_token_set_precision": 0.3318421320096968, "eval_wikibio_token_set_recall": 0.3329391706574089, "eval_wikibio_true_num_tokens": 61.1328125, "step": 252500 }, { "epoch": 48.48, "eval_nq_accuracy": 0.5394375, "eval_nq_bleu_score": 12.532029776140453, "eval_nq_bleu_score_sem": 0.48641532656614345, "eval_nq_emb_cos_sim": 0.8417807221412659, "eval_nq_emb_cos_sim_sem": 0.006886138477805626, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1042640209198, "eval_nq_n_ngrams_match_1": 23.658, "eval_nq_n_ngrams_match_2": 9.02, "eval_nq_n_ngrams_match_3": 4.224, "eval_nq_num_pred_words": 48.74, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.201064975677573, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4632701590166841, "eval_nq_runtime": 11.792, "eval_nq_samples_per_second": 42.402, "eval_nq_steps_per_second": 0.085, "eval_nq_token_set_f1": 0.4745589005744199, "eval_nq_token_set_f1_sem": 0.004988652119626734, "eval_nq_token_set_precision": 0.43380942006969636, "eval_nq_token_set_recall": 0.530726400324404, "eval_nq_true_num_tokens": 64.0, "step": 252500 }, { "epoch": 48.48, "learning_rate": 0.001, "loss": 2.4778, "step": 252504 }, { "epoch": 48.49, "learning_rate": 0.001, "loss": 2.483, "step": 252516 }, { "epoch": 48.49, "learning_rate": 0.001, "loss": 2.4839, "step": 252528 }, { "epoch": 48.49, "learning_rate": 0.001, "loss": 2.4856, "step": 252540 }, { "epoch": 48.49, "learning_rate": 0.001, "loss": 2.477, "step": 252552 }, { "epoch": 48.5, "learning_rate": 0.001, "loss": 2.4793, "step": 252564 }, { "epoch": 48.5, "learning_rate": 0.001, "loss": 2.4785, "step": 252576 }, { "epoch": 48.5, "learning_rate": 0.001, "loss": 2.467, "step": 252588 }, { "epoch": 48.5, "learning_rate": 0.001, "loss": 2.4788, "step": 252600 }, { "epoch": 48.5, "learning_rate": 0.001, "loss": 2.4875, "step": 252612 }, { "epoch": 48.51, "learning_rate": 0.001, "loss": 2.4801, "step": 252624 }, { "epoch": 48.51, "learning_rate": 0.001, "loss": 2.4824, "step": 252636 }, { "epoch": 48.51, "learning_rate": 0.001, "loss": 2.4845, "step": 252648 }, { "epoch": 48.51, "learning_rate": 0.001, "loss": 2.4722, "step": 252660 }, { "epoch": 48.52, "learning_rate": 0.001, "loss": 2.4729, "step": 252672 }, { "epoch": 48.52, "learning_rate": 0.001, "loss": 2.491, "step": 252684 }, { "epoch": 48.52, "learning_rate": 0.001, "loss": 2.4904, "step": 252696 }, { "epoch": 48.52, "learning_rate": 0.001, "loss": 2.4783, "step": 252708 }, { "epoch": 48.53, "learning_rate": 0.001, "loss": 2.488, "step": 252720 }, { "epoch": 48.53, "learning_rate": 0.001, "loss": 2.4817, "step": 252732 }, { "epoch": 48.53, "learning_rate": 0.001, "loss": 2.4793, "step": 252744 }, { "epoch": 48.53, "learning_rate": 0.001, "loss": 2.485, "step": 252756 }, { "epoch": 48.53, "learning_rate": 0.001, "loss": 2.4822, "step": 252768 }, { "epoch": 48.54, "learning_rate": 0.001, "loss": 2.4844, "step": 252780 }, { "epoch": 48.54, "learning_rate": 0.001, "loss": 2.4786, "step": 252792 }, { "epoch": 48.54, "learning_rate": 0.001, "loss": 2.4823, "step": 252804 }, { "epoch": 48.54, "learning_rate": 0.001, "loss": 2.4807, "step": 252816 }, { "epoch": 48.55, "learning_rate": 0.001, "loss": 2.4814, "step": 252828 }, { "epoch": 48.55, "learning_rate": 0.001, "loss": 2.485, "step": 252840 }, { "epoch": 48.55, "learning_rate": 0.001, "loss": 2.4849, "step": 252852 }, { "epoch": 48.55, "learning_rate": 0.001, "loss": 2.4838, "step": 252864 }, { "epoch": 48.56, "learning_rate": 0.001, "loss": 2.4847, "step": 252876 }, { "epoch": 48.56, "learning_rate": 0.001, "loss": 2.4763, "step": 252888 }, { "epoch": 48.56, "learning_rate": 0.001, "loss": 2.474, "step": 252900 }, { "epoch": 48.56, "learning_rate": 0.001, "loss": 2.4885, "step": 252912 }, { "epoch": 48.56, "learning_rate": 0.001, "loss": 2.4783, "step": 252924 }, { "epoch": 48.57, "learning_rate": 0.001, "loss": 2.487, "step": 252936 }, { "epoch": 48.57, "learning_rate": 0.001, "loss": 2.4963, "step": 252948 }, { "epoch": 48.57, "learning_rate": 0.001, "loss": 2.4779, "step": 252960 }, { "epoch": 48.57, "learning_rate": 0.001, "loss": 2.482, "step": 252972 }, { "epoch": 48.58, "learning_rate": 0.001, "loss": 2.4854, "step": 252984 }, { "epoch": 48.58, "learning_rate": 0.001, "loss": 2.4817, "step": 252996 }, { "epoch": 48.58, "learning_rate": 0.001, "loss": 2.4782, "step": 253008 }, { "epoch": 48.58, "learning_rate": 0.001, "loss": 2.4742, "step": 253020 }, { "epoch": 48.59, "learning_rate": 0.001, "loss": 2.4752, "step": 253032 }, { "epoch": 48.59, "learning_rate": 0.001, "loss": 2.4775, "step": 253044 }, { "epoch": 48.59, "learning_rate": 0.001, "loss": 2.4795, "step": 253056 }, { "epoch": 48.59, "learning_rate": 0.001, "loss": 2.477, "step": 253068 }, { "epoch": 48.59, "learning_rate": 0.001, "loss": 2.4816, "step": 253080 }, { "epoch": 48.6, "learning_rate": 0.001, "loss": 2.476, "step": 253092 }, { "epoch": 48.6, "learning_rate": 0.001, "loss": 2.4896, "step": 253104 }, { "epoch": 48.6, "learning_rate": 0.001, "loss": 2.4679, "step": 253116 }, { "epoch": 48.6, "eval_ag_news_accuracy": 0.33071875, "eval_ag_news_bleu_score": 5.029018791774612, "eval_ag_news_bleu_score_sem": 0.16025323196700453, "eval_ag_news_emb_cos_sim": 0.8215942978858948, "eval_ag_news_emb_cos_sim_sem": 0.006803333749916387, "eval_ag_news_emb_top1_equal": 0.28125, "eval_ag_news_emb_top1_equal_sem": 0.039896367485272234, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.456071138381958, "eval_ag_news_n_ngrams_match_1": 14.636, "eval_ag_news_n_ngrams_match_2": 3.3, "eval_ag_news_n_ngrams_match_3": 0.926, "eval_ag_news_num_pred_words": 46.904, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.692217258245563, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3637170365856054, "eval_ag_news_runtime": 15.4867, "eval_ag_news_samples_per_second": 32.286, "eval_ag_news_steps_per_second": 0.065, "eval_ag_news_token_set_f1": 0.3642033948761011, "eval_ag_news_token_set_f1_sem": 0.0044956921091983135, "eval_ag_news_token_set_precision": 0.3522736066559957, "eval_ag_news_token_set_recall": 0.39160754827718897, "eval_ag_news_true_num_tokens": 56.09375, "step": 253125 }, { "epoch": 48.6, "eval_anthropic_toxic_prompts_accuracy": 0.1160625, "eval_anthropic_toxic_prompts_bleu_score": 3.2493823455764965, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12203842006298647, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6819679737091064, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010042318399434084, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2215282917022705, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.304, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.978, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.758, "eval_anthropic_toxic_prompts_num_pred_words": 46.51, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.066399693414002, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.9921875, "eval_anthropic_toxic_prompts_rouge_score": 0.2174435310219331, "eval_anthropic_toxic_prompts_runtime": 13.5509, "eval_anthropic_toxic_prompts_samples_per_second": 36.898, "eval_anthropic_toxic_prompts_steps_per_second": 0.074, "eval_anthropic_toxic_prompts_token_set_f1": 0.3573758256908113, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006767660534730145, "eval_anthropic_toxic_prompts_token_set_precision": 0.4446148497948846, "eval_anthropic_toxic_prompts_token_set_recall": 0.3240845534948466, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 253125 }, { "epoch": 48.6, "eval_arxiv_accuracy": 0.3534375, "eval_arxiv_bleu_score": 4.550651971693835, "eval_arxiv_bleu_score_sem": 0.13280610259382022, "eval_arxiv_emb_cos_sim": 0.786454439163208, "eval_arxiv_emb_cos_sim_sem": 0.007206302402707076, "eval_arxiv_emb_top1_equal": 0.296875, "eval_arxiv_emb_top1_equal_sem": 0.04054163310179599, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3219754695892334, "eval_arxiv_n_ngrams_match_1": 15.536, "eval_arxiv_n_ngrams_match_2": 3.094, "eval_arxiv_n_ngrams_match_3": 0.686, "eval_arxiv_num_pred_words": 40.352, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.71504674741398, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37486797961873203, "eval_arxiv_runtime": 16.6703, "eval_arxiv_samples_per_second": 29.993, "eval_arxiv_steps_per_second": 0.06, "eval_arxiv_token_set_f1": 0.36440504973071813, "eval_arxiv_token_set_f1_sem": 0.004194924982756976, "eval_arxiv_token_set_precision": 0.31769162676906054, "eval_arxiv_token_set_recall": 0.44514689265712953, "eval_arxiv_true_num_tokens": 64.0, "step": 253125 }, { "epoch": 48.6, "eval_python_code_alpaca_accuracy": 0.1634375, "eval_python_code_alpaca_bleu_score": 4.654156693089146, "eval_python_code_alpaca_bleu_score_sem": 0.13700916934822985, "eval_python_code_alpaca_emb_cos_sim": 0.7764959335327148, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007400505202477656, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8254692554473877, "eval_python_code_alpaca_n_ngrams_match_1": 10.194, "eval_python_code_alpaca_n_ngrams_match_2": 3.044, "eval_python_code_alpaca_n_ngrams_match_3": 1.0, "eval_python_code_alpaca_num_pred_words": 44.298, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.868858933058, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34306904025356433, "eval_python_code_alpaca_runtime": 50.4345, "eval_python_code_alpaca_samples_per_second": 9.914, "eval_python_code_alpaca_steps_per_second": 0.02, "eval_python_code_alpaca_token_set_f1": 0.48697602189721617, "eval_python_code_alpaca_token_set_f1_sem": 0.005473215237270407, "eval_python_code_alpaca_token_set_precision": 0.5592901350802559, "eval_python_code_alpaca_token_set_recall": 0.4544569818030442, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 253125 }, { "epoch": 48.6, "eval_wikibio_accuracy": 0.33134375, "eval_wikibio_bleu_score": 6.2896605784173705, "eval_wikibio_bleu_score_sem": 0.22623901816344533, "eval_wikibio_emb_cos_sim": 0.7492214441299438, "eval_wikibio_emb_cos_sim_sem": 0.009473250333750977, "eval_wikibio_emb_top1_equal": 0.2109375, "eval_wikibio_emb_top1_equal_sem": 0.03620184850179216, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6325645446777344, "eval_wikibio_n_ngrams_match_1": 10.512, "eval_wikibio_n_ngrams_match_2": 3.582, "eval_wikibio_n_ngrams_match_3": 1.37, "eval_wikibio_num_pred_words": 36.928, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.80965694291502, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3676459807421379, "eval_wikibio_runtime": 40.8156, "eval_wikibio_samples_per_second": 12.25, "eval_wikibio_steps_per_second": 0.025, "eval_wikibio_token_set_f1": 0.3288924977014137, "eval_wikibio_token_set_f1_sem": 0.005279235262117307, "eval_wikibio_token_set_precision": 0.33927496964631826, "eval_wikibio_token_set_recall": 0.33376851739270497, "eval_wikibio_true_num_tokens": 61.1328125, "step": 253125 }, { "epoch": 48.6, "eval_nq_accuracy": 0.53753125, "eval_nq_bleu_score": 12.373496132029944, "eval_nq_bleu_score_sem": 0.49336341376410425, "eval_nq_emb_cos_sim": 0.8392571210861206, "eval_nq_emb_cos_sim_sem": 0.007294463731624247, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1036689281463623, "eval_nq_n_ngrams_match_1": 23.858, "eval_nq_n_ngrams_match_2": 8.906, "eval_nq_n_ngrams_match_3": 4.15, "eval_nq_num_pred_words": 49.086, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.196186033031795, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.46387491758727206, "eval_nq_runtime": 13.0131, "eval_nq_samples_per_second": 38.423, "eval_nq_steps_per_second": 0.077, "eval_nq_token_set_f1": 0.4749037839251745, "eval_nq_token_set_f1_sem": 0.004927637796246293, "eval_nq_token_set_precision": 0.43430281628408124, "eval_nq_token_set_recall": 0.5315837021105687, "eval_nq_true_num_tokens": 64.0, "step": 253125 }, { "epoch": 48.6, "learning_rate": 0.001, "loss": 2.4964, "step": 253128 }, { "epoch": 48.61, "learning_rate": 0.001, "loss": 2.4808, "step": 253140 }, { "epoch": 48.61, "learning_rate": 0.001, "loss": 2.4695, "step": 253152 }, { "epoch": 48.61, "learning_rate": 0.001, "loss": 2.4874, "step": 253164 }, { "epoch": 48.61, "learning_rate": 0.001, "loss": 2.4859, "step": 253176 }, { "epoch": 48.62, "learning_rate": 0.001, "loss": 2.4806, "step": 253188 }, { "epoch": 48.62, "learning_rate": 0.001, "loss": 2.4868, "step": 253200 }, { "epoch": 48.62, "learning_rate": 0.001, "loss": 2.4841, "step": 253212 }, { "epoch": 48.62, "learning_rate": 0.001, "loss": 2.4859, "step": 253224 }, { "epoch": 48.62, "learning_rate": 0.001, "loss": 2.4879, "step": 253236 }, { "epoch": 48.63, "learning_rate": 0.001, "loss": 2.4843, "step": 253248 }, { "epoch": 48.63, "learning_rate": 0.001, "loss": 2.4762, "step": 253260 }, { "epoch": 48.63, "learning_rate": 0.001, "loss": 2.48, "step": 253272 }, { "epoch": 48.63, "learning_rate": 0.001, "loss": 2.4763, "step": 253284 }, { "epoch": 48.64, "learning_rate": 0.001, "loss": 2.4833, "step": 253296 }, { "epoch": 48.64, "learning_rate": 0.001, "loss": 2.4818, "step": 253308 }, { "epoch": 48.64, "learning_rate": 0.001, "loss": 2.4819, "step": 253320 }, { "epoch": 48.64, "learning_rate": 0.001, "loss": 2.4723, "step": 253332 }, { "epoch": 48.65, "learning_rate": 0.001, "loss": 2.4805, "step": 253344 }, { "epoch": 48.65, "learning_rate": 0.001, "loss": 2.4844, "step": 253356 }, { "epoch": 48.65, "learning_rate": 0.001, "loss": 2.4864, "step": 253368 }, { "epoch": 48.65, "learning_rate": 0.001, "loss": 2.4826, "step": 253380 }, { "epoch": 48.65, "learning_rate": 0.001, "loss": 2.4775, "step": 253392 }, { "epoch": 48.66, "learning_rate": 0.001, "loss": 2.484, "step": 253404 }, { "epoch": 48.66, "learning_rate": 0.001, "loss": 2.4843, "step": 253416 }, { "epoch": 48.66, "learning_rate": 0.001, "loss": 2.48, "step": 253428 }, { "epoch": 48.66, "learning_rate": 0.001, "loss": 2.4908, "step": 253440 }, { "epoch": 48.67, "learning_rate": 0.001, "loss": 2.4884, "step": 253452 }, { "epoch": 48.67, "learning_rate": 0.001, "loss": 2.4782, "step": 253464 }, { "epoch": 48.67, "learning_rate": 0.001, "loss": 2.4821, "step": 253476 }, { "epoch": 48.67, "learning_rate": 0.001, "loss": 2.4858, "step": 253488 }, { "epoch": 48.68, "learning_rate": 0.001, "loss": 2.481, "step": 253500 }, { "epoch": 48.68, "learning_rate": 0.001, "loss": 2.4805, "step": 253512 }, { "epoch": 48.68, "learning_rate": 0.001, "loss": 2.4915, "step": 253524 }, { "epoch": 48.68, "learning_rate": 0.001, "loss": 2.4801, "step": 253536 }, { "epoch": 48.68, "learning_rate": 0.001, "loss": 2.4744, "step": 253548 }, { "epoch": 48.69, "learning_rate": 0.001, "loss": 2.4844, "step": 253560 }, { "epoch": 48.69, "learning_rate": 0.001, "loss": 2.4847, "step": 253572 }, { "epoch": 48.69, "learning_rate": 0.001, "loss": 2.4916, "step": 253584 }, { "epoch": 48.69, "learning_rate": 0.001, "loss": 2.4872, "step": 253596 }, { "epoch": 48.7, "learning_rate": 0.001, "loss": 2.4943, "step": 253608 }, { "epoch": 48.7, "learning_rate": 0.001, "loss": 2.4802, "step": 253620 }, { "epoch": 48.7, "learning_rate": 0.001, "loss": 2.4829, "step": 253632 }, { "epoch": 48.7, "learning_rate": 0.001, "loss": 2.4907, "step": 253644 }, { "epoch": 48.71, "learning_rate": 0.001, "loss": 2.4802, "step": 253656 }, { "epoch": 48.71, "learning_rate": 0.001, "loss": 2.493, "step": 253668 }, { "epoch": 48.71, "learning_rate": 0.001, "loss": 2.4806, "step": 253680 }, { "epoch": 48.71, "learning_rate": 0.001, "loss": 2.4866, "step": 253692 }, { "epoch": 48.71, "learning_rate": 0.001, "loss": 2.4829, "step": 253704 }, { "epoch": 48.72, "learning_rate": 0.001, "loss": 2.478, "step": 253716 }, { "epoch": 48.72, "learning_rate": 0.001, "loss": 2.4891, "step": 253728 }, { "epoch": 48.72, "learning_rate": 0.001, "loss": 2.4871, "step": 253740 }, { "epoch": 48.72, "eval_ag_news_accuracy": 0.3313125, "eval_ag_news_bleu_score": 5.128968358415727, "eval_ag_news_bleu_score_sem": 0.16366944669656422, "eval_ag_news_emb_cos_sim": 0.8214734792709351, "eval_ag_news_emb_cos_sim_sem": 0.006868129261145822, "eval_ag_news_emb_top1_equal": 0.2265625, "eval_ag_news_emb_top1_equal_sem": 0.03714537682851538, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.45040225982666, "eval_ag_news_n_ngrams_match_1": 14.59, "eval_ag_news_n_ngrams_match_2": 3.376, "eval_ag_news_n_ngrams_match_3": 0.95, "eval_ag_news_num_pred_words": 46.888, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.51306620002553, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3623553388552514, "eval_ag_news_runtime": 12.2011, "eval_ag_news_samples_per_second": 40.98, "eval_ag_news_steps_per_second": 0.082, "eval_ag_news_token_set_f1": 0.36379130812307414, "eval_ag_news_token_set_f1_sem": 0.004417966401312443, "eval_ag_news_token_set_precision": 0.34925352804666904, "eval_ag_news_token_set_recall": 0.3930812574418512, "eval_ag_news_true_num_tokens": 56.09375, "step": 253750 }, { "epoch": 48.72, "eval_anthropic_toxic_prompts_accuracy": 0.1166875, "eval_anthropic_toxic_prompts_bleu_score": 3.4002281031583914, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12658116029746078, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6897055506706238, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008663798376130493, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1328125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.030114394778901498, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1953988075256348, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.374, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.106, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.836, "eval_anthropic_toxic_prompts_num_pred_words": 47.184, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.419910594711585, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21717328956466744, "eval_anthropic_toxic_prompts_runtime": 11.4752, "eval_anthropic_toxic_prompts_samples_per_second": 43.572, "eval_anthropic_toxic_prompts_steps_per_second": 0.087, "eval_anthropic_toxic_prompts_token_set_f1": 0.36254640208013383, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006471987774614313, "eval_anthropic_toxic_prompts_token_set_precision": 0.4531194103882615, "eval_anthropic_toxic_prompts_token_set_recall": 0.32826765874453734, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 253750 }, { "epoch": 48.72, "eval_arxiv_accuracy": 0.3555625, "eval_arxiv_bleu_score": 4.508033467346459, "eval_arxiv_bleu_score_sem": 0.13802875036011597, "eval_arxiv_emb_cos_sim": 0.7842447757720947, "eval_arxiv_emb_cos_sim_sem": 0.006670468444163825, "eval_arxiv_emb_top1_equal": 0.2734375, "eval_arxiv_emb_top1_equal_sem": 0.03955156411760461, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.308542013168335, "eval_arxiv_n_ngrams_match_1": 15.66, "eval_arxiv_n_ngrams_match_2": 3.112, "eval_arxiv_n_ngrams_match_3": 0.696, "eval_arxiv_num_pred_words": 40.804, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.3452274120723, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37419834359180304, "eval_arxiv_runtime": 11.3403, "eval_arxiv_samples_per_second": 44.091, "eval_arxiv_steps_per_second": 0.088, "eval_arxiv_token_set_f1": 0.3675089206463676, "eval_arxiv_token_set_f1_sem": 0.004260756206432361, "eval_arxiv_token_set_precision": 0.320717927771569, "eval_arxiv_token_set_recall": 0.4491744764609272, "eval_arxiv_true_num_tokens": 64.0, "step": 253750 }, { "epoch": 48.72, "eval_python_code_alpaca_accuracy": 0.16365625, "eval_python_code_alpaca_bleu_score": 4.753768023089755, "eval_python_code_alpaca_bleu_score_sem": 0.14112334567919935, "eval_python_code_alpaca_emb_cos_sim": 0.778369665145874, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007066850334785672, "eval_python_code_alpaca_emb_top1_equal": 0.1484375, "eval_python_code_alpaca_emb_top1_equal_sem": 0.031548465007086954, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.837441921234131, "eval_python_code_alpaca_n_ngrams_match_1": 10.174, "eval_python_code_alpaca_n_ngrams_match_2": 3.074, "eval_python_code_alpaca_n_ngrams_match_3": 1.004, "eval_python_code_alpaca_num_pred_words": 43.51, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.07203801383337, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3494550184851903, "eval_python_code_alpaca_runtime": 10.8989, "eval_python_code_alpaca_samples_per_second": 45.876, "eval_python_code_alpaca_steps_per_second": 0.092, "eval_python_code_alpaca_token_set_f1": 0.4892069168098178, "eval_python_code_alpaca_token_set_f1_sem": 0.00532658545556864, "eval_python_code_alpaca_token_set_precision": 0.5602916041459619, "eval_python_code_alpaca_token_set_recall": 0.45590776024631197, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 253750 }, { "epoch": 48.72, "eval_wikibio_accuracy": 0.33384375, "eval_wikibio_bleu_score": 6.01514441990093, "eval_wikibio_bleu_score_sem": 0.2116492784977639, "eval_wikibio_emb_cos_sim": 0.7581247091293335, "eval_wikibio_emb_cos_sim_sem": 0.00824277506710895, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.57375431060791, "eval_wikibio_n_ngrams_match_1": 10.182, "eval_wikibio_n_ngrams_match_2": 3.41, "eval_wikibio_n_ngrams_match_3": 1.256, "eval_wikibio_num_pred_words": 36.986, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 35.65018408807742, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3586691305485913, "eval_wikibio_runtime": 11.3856, "eval_wikibio_samples_per_second": 43.915, "eval_wikibio_steps_per_second": 0.088, "eval_wikibio_token_set_f1": 0.3239858067130597, "eval_wikibio_token_set_f1_sem": 0.005147906260231054, "eval_wikibio_token_set_precision": 0.33242034267173876, "eval_wikibio_token_set_recall": 0.3335561853714311, "eval_wikibio_true_num_tokens": 61.1328125, "step": 253750 }, { "epoch": 48.72, "eval_nq_accuracy": 0.54, "eval_nq_bleu_score": 12.526305871999059, "eval_nq_bleu_score_sem": 0.49175494701577427, "eval_nq_emb_cos_sim": 0.8404859304428101, "eval_nq_emb_cos_sim_sem": 0.0073053751555770124, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1025004386901855, "eval_nq_n_ngrams_match_1": 23.64, "eval_nq_n_ngrams_match_2": 8.892, "eval_nq_n_ngrams_match_3": 4.208, "eval_nq_num_pred_words": 49.04, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.186614469296044, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.457992451046238, "eval_nq_runtime": 11.6494, "eval_nq_samples_per_second": 42.921, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.47238334226022505, "eval_nq_token_set_f1_sem": 0.005001456740180658, "eval_nq_token_set_precision": 0.431945326249031, "eval_nq_token_set_recall": 0.5287395962647848, "eval_nq_true_num_tokens": 64.0, "step": 253750 }, { "epoch": 48.72, "learning_rate": 0.001, "loss": 2.4851, "step": 253752 }, { "epoch": 48.73, "learning_rate": 0.001, "loss": 2.4765, "step": 253764 }, { "epoch": 48.73, "learning_rate": 0.001, "loss": 2.4831, "step": 253776 }, { "epoch": 48.73, "learning_rate": 0.001, "loss": 2.4767, "step": 253788 }, { "epoch": 48.73, "learning_rate": 0.001, "loss": 2.4794, "step": 253800 }, { "epoch": 48.74, "learning_rate": 0.001, "loss": 2.4867, "step": 253812 }, { "epoch": 48.74, "learning_rate": 0.001, "loss": 2.4955, "step": 253824 }, { "epoch": 48.74, "learning_rate": 0.001, "loss": 2.4799, "step": 253836 }, { "epoch": 48.74, "learning_rate": 0.001, "loss": 2.4698, "step": 253848 }, { "epoch": 48.74, "learning_rate": 0.001, "loss": 2.486, "step": 253860 }, { "epoch": 48.75, "learning_rate": 0.001, "loss": 2.4838, "step": 253872 }, { "epoch": 48.75, "learning_rate": 0.001, "loss": 2.4899, "step": 253884 }, { "epoch": 48.75, "learning_rate": 0.001, "loss": 2.4834, "step": 253896 }, { "epoch": 48.75, "learning_rate": 0.001, "loss": 2.4821, "step": 253908 }, { "epoch": 48.76, "learning_rate": 0.001, "loss": 2.486, "step": 253920 }, { "epoch": 48.76, "learning_rate": 0.001, "loss": 2.4874, "step": 253932 }, { "epoch": 48.76, "learning_rate": 0.001, "loss": 2.4777, "step": 253944 }, { "epoch": 48.76, "learning_rate": 0.001, "loss": 2.4949, "step": 253956 }, { "epoch": 48.76, "learning_rate": 0.001, "loss": 2.4937, "step": 253968 }, { "epoch": 48.77, "learning_rate": 0.001, "loss": 2.4861, "step": 253980 }, { "epoch": 48.77, "learning_rate": 0.001, "loss": 2.4799, "step": 253992 }, { "epoch": 48.77, "learning_rate": 0.001, "loss": 2.4831, "step": 254004 }, { "epoch": 48.77, "learning_rate": 0.001, "loss": 2.4849, "step": 254016 }, { "epoch": 48.78, "learning_rate": 0.001, "loss": 2.4958, "step": 254028 }, { "epoch": 48.78, "learning_rate": 0.001, "loss": 2.4836, "step": 254040 }, { "epoch": 48.78, "learning_rate": 0.001, "loss": 2.488, "step": 254052 }, { "epoch": 48.78, "learning_rate": 0.001, "loss": 2.4834, "step": 254064 }, { "epoch": 48.79, "learning_rate": 0.001, "loss": 2.4918, "step": 254076 }, { "epoch": 48.79, "learning_rate": 0.001, "loss": 2.4813, "step": 254088 }, { "epoch": 48.79, "learning_rate": 0.001, "loss": 2.4834, "step": 254100 }, { "epoch": 48.79, "learning_rate": 0.001, "loss": 2.488, "step": 254112 }, { "epoch": 48.79, "learning_rate": 0.001, "loss": 2.4931, "step": 254124 }, { "epoch": 48.8, "learning_rate": 0.001, "loss": 2.4921, "step": 254136 }, { "epoch": 48.8, "learning_rate": 0.001, "loss": 2.4871, "step": 254148 }, { "epoch": 48.8, "learning_rate": 0.001, "loss": 2.4825, "step": 254160 }, { "epoch": 48.8, "learning_rate": 0.001, "loss": 2.4896, "step": 254172 }, { "epoch": 48.81, "learning_rate": 0.001, "loss": 2.4878, "step": 254184 }, { "epoch": 48.81, "learning_rate": 0.001, "loss": 2.4907, "step": 254196 }, { "epoch": 48.81, "learning_rate": 0.001, "loss": 2.4877, "step": 254208 }, { "epoch": 48.81, "learning_rate": 0.001, "loss": 2.4837, "step": 254220 }, { "epoch": 48.82, "learning_rate": 0.001, "loss": 2.4861, "step": 254232 }, { "epoch": 48.82, "learning_rate": 0.001, "loss": 2.4957, "step": 254244 }, { "epoch": 48.82, "learning_rate": 0.001, "loss": 2.4841, "step": 254256 }, { "epoch": 48.82, "learning_rate": 0.001, "loss": 2.489, "step": 254268 }, { "epoch": 48.82, "learning_rate": 0.001, "loss": 2.4925, "step": 254280 }, { "epoch": 48.83, "learning_rate": 0.001, "loss": 2.4883, "step": 254292 }, { "epoch": 48.83, "learning_rate": 0.001, "loss": 2.4845, "step": 254304 }, { "epoch": 48.83, "learning_rate": 0.001, "loss": 2.4852, "step": 254316 }, { "epoch": 48.83, "learning_rate": 0.001, "loss": 2.4882, "step": 254328 }, { "epoch": 48.84, "learning_rate": 0.001, "loss": 2.4945, "step": 254340 }, { "epoch": 48.84, "learning_rate": 0.001, "loss": 2.4886, "step": 254352 }, { "epoch": 48.84, "learning_rate": 0.001, "loss": 2.4807, "step": 254364 }, { "epoch": 48.84, "eval_ag_news_accuracy": 0.33259375, "eval_ag_news_bleu_score": 5.116390845398218, "eval_ag_news_bleu_score_sem": 0.16607483030992765, "eval_ag_news_emb_cos_sim": 0.8242052793502808, "eval_ag_news_emb_cos_sim_sem": 0.006755547126239128, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4539828300476074, "eval_ag_news_n_ngrams_match_1": 14.506, "eval_ag_news_n_ngrams_match_2": 3.362, "eval_ag_news_n_ngrams_match_3": 0.998, "eval_ag_news_num_pred_words": 46.708, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.626103194113817, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3621275434508225, "eval_ag_news_runtime": 21.7164, "eval_ag_news_samples_per_second": 23.024, "eval_ag_news_steps_per_second": 0.046, "eval_ag_news_token_set_f1": 0.3609943472548123, "eval_ag_news_token_set_f1_sem": 0.004383250753844021, "eval_ag_news_token_set_precision": 0.3462977448534223, "eval_ag_news_token_set_recall": 0.39312052325829416, "eval_ag_news_true_num_tokens": 56.09375, "step": 254375 }, { "epoch": 48.84, "eval_anthropic_toxic_prompts_accuracy": 0.11665625, "eval_anthropic_toxic_prompts_bleu_score": 3.2670446914870928, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12414597670877574, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6821401715278625, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009851487347544376, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.2194252014160156, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.376, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.764, "eval_anthropic_toxic_prompts_num_pred_words": 46.948, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 25.013738187068732, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21795243524092545, "eval_anthropic_toxic_prompts_runtime": 11.1921, "eval_anthropic_toxic_prompts_samples_per_second": 44.674, "eval_anthropic_toxic_prompts_steps_per_second": 0.089, "eval_anthropic_toxic_prompts_token_set_f1": 0.3576999860582102, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006463571248950621, "eval_anthropic_toxic_prompts_token_set_precision": 0.4497737653146231, "eval_anthropic_toxic_prompts_token_set_recall": 0.3212514680656723, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 254375 }, { "epoch": 48.84, "eval_arxiv_accuracy": 0.3536875, "eval_arxiv_bleu_score": 4.522572548241414, "eval_arxiv_bleu_score_sem": 0.1386361293207542, "eval_arxiv_emb_cos_sim": 0.7847319841384888, "eval_arxiv_emb_cos_sim_sem": 0.007399415969861453, "eval_arxiv_emb_top1_equal": 0.2578125, "eval_arxiv_emb_top1_equal_sem": 0.038815656435002115, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3165972232818604, "eval_arxiv_n_ngrams_match_1": 15.6, "eval_arxiv_n_ngrams_match_2": 3.14, "eval_arxiv_n_ngrams_match_3": 0.75, "eval_arxiv_num_pred_words": 40.546, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.56638851820688, "eval_arxiv_pred_num_tokens": 62.96875, "eval_arxiv_rouge_score": 0.37230089354554563, "eval_arxiv_runtime": 16.4873, "eval_arxiv_samples_per_second": 30.326, "eval_arxiv_steps_per_second": 0.061, "eval_arxiv_token_set_f1": 0.36531037581461423, "eval_arxiv_token_set_f1_sem": 0.004353938343634288, "eval_arxiv_token_set_precision": 0.3176507963641678, "eval_arxiv_token_set_recall": 0.45086158988561276, "eval_arxiv_true_num_tokens": 64.0, "step": 254375 }, { "epoch": 48.84, "eval_python_code_alpaca_accuracy": 0.16309375, "eval_python_code_alpaca_bleu_score": 4.8138126980068465, "eval_python_code_alpaca_bleu_score_sem": 0.1538142144557821, "eval_python_code_alpaca_emb_cos_sim": 0.77247154712677, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007147478573076028, "eval_python_code_alpaca_emb_top1_equal": 0.1875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.809692144393921, "eval_python_code_alpaca_n_ngrams_match_1": 9.982, "eval_python_code_alpaca_n_ngrams_match_2": 3.006, "eval_python_code_alpaca_n_ngrams_match_3": 1.052, "eval_python_code_alpaca_num_pred_words": 43.218, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.60480554936888, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3493759692055528, "eval_python_code_alpaca_runtime": 11.6555, "eval_python_code_alpaca_samples_per_second": 42.898, "eval_python_code_alpaca_steps_per_second": 0.086, "eval_python_code_alpaca_token_set_f1": 0.4826573502048842, "eval_python_code_alpaca_token_set_f1_sem": 0.005559391738856014, "eval_python_code_alpaca_token_set_precision": 0.5483371128134638, "eval_python_code_alpaca_token_set_recall": 0.45333285934503614, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 254375 }, { "epoch": 48.84, "eval_wikibio_accuracy": 0.33096875, "eval_wikibio_bleu_score": 6.045653172373738, "eval_wikibio_bleu_score_sem": 0.21311411770047786, "eval_wikibio_emb_cos_sim": 0.7322331070899963, "eval_wikibio_emb_cos_sim_sem": 0.009280145986219712, "eval_wikibio_emb_top1_equal": 0.21875, "eval_wikibio_emb_top1_equal_sem": 0.03668319712192295, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5873866081237793, "eval_wikibio_n_ngrams_match_1": 9.956, "eval_wikibio_n_ngrams_match_2": 3.374, "eval_wikibio_n_ngrams_match_3": 1.212, "eval_wikibio_num_pred_words": 35.502, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.13950571514363, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35297284421981323, "eval_wikibio_runtime": 11.8409, "eval_wikibio_samples_per_second": 42.226, "eval_wikibio_steps_per_second": 0.084, "eval_wikibio_token_set_f1": 0.3200043641537511, "eval_wikibio_token_set_f1_sem": 0.005632981453349366, "eval_wikibio_token_set_precision": 0.32535151519833405, "eval_wikibio_token_set_recall": 0.3329054724100076, "eval_wikibio_true_num_tokens": 61.1328125, "step": 254375 }, { "epoch": 48.84, "eval_nq_accuracy": 0.53865625, "eval_nq_bleu_score": 12.199795292352317, "eval_nq_bleu_score_sem": 0.46594130850674187, "eval_nq_emb_cos_sim": 0.8400046825408936, "eval_nq_emb_cos_sim_sem": 0.007104173344775592, "eval_nq_emb_top1_equal": 0.3359375, "eval_nq_emb_top1_equal_sem": 0.04191137143408563, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1046581268310547, "eval_nq_n_ngrams_match_1": 23.63, "eval_nq_n_ngrams_match_2": 8.756, "eval_nq_n_ngrams_match_3": 4.078, "eval_nq_num_pred_words": 49.026, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.204297700839279, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4593181281504545, "eval_nq_runtime": 11.844, "eval_nq_samples_per_second": 42.215, "eval_nq_steps_per_second": 0.084, "eval_nq_token_set_f1": 0.470378723385126, "eval_nq_token_set_f1_sem": 0.004948314345017564, "eval_nq_token_set_precision": 0.43025516421183024, "eval_nq_token_set_recall": 0.526909176280272, "eval_nq_true_num_tokens": 64.0, "step": 254375 }, { "epoch": 48.84, "learning_rate": 0.001, "loss": 2.4857, "step": 254376 }, { "epoch": 48.85, "learning_rate": 0.001, "loss": 2.491, "step": 254388 }, { "epoch": 48.85, "learning_rate": 0.001, "loss": 2.4946, "step": 254400 }, { "epoch": 48.85, "learning_rate": 0.001, "loss": 2.4773, "step": 254412 }, { "epoch": 48.85, "learning_rate": 0.001, "loss": 2.4957, "step": 254424 }, { "epoch": 48.85, "learning_rate": 0.001, "loss": 2.4964, "step": 254436 }, { "epoch": 48.86, "learning_rate": 0.001, "loss": 2.4998, "step": 254448 }, { "epoch": 48.86, "learning_rate": 0.001, "loss": 2.5007, "step": 254460 }, { "epoch": 48.86, "learning_rate": 0.001, "loss": 2.4778, "step": 254472 }, { "epoch": 48.86, "learning_rate": 0.001, "loss": 2.4986, "step": 254484 }, { "epoch": 48.87, "learning_rate": 0.001, "loss": 2.4862, "step": 254496 }, { "epoch": 48.87, "learning_rate": 0.001, "loss": 2.4827, "step": 254508 }, { "epoch": 48.87, "learning_rate": 0.001, "loss": 2.4893, "step": 254520 }, { "epoch": 48.87, "learning_rate": 0.001, "loss": 2.4823, "step": 254532 }, { "epoch": 48.88, "learning_rate": 0.001, "loss": 2.4741, "step": 254544 }, { "epoch": 48.88, "learning_rate": 0.001, "loss": 2.487, "step": 254556 }, { "epoch": 48.88, "learning_rate": 0.001, "loss": 2.4851, "step": 254568 }, { "epoch": 48.88, "learning_rate": 0.001, "loss": 2.4778, "step": 254580 }, { "epoch": 48.88, "learning_rate": 0.001, "loss": 2.492, "step": 254592 }, { "epoch": 48.89, "learning_rate": 0.001, "loss": 2.481, "step": 254604 }, { "epoch": 48.89, "learning_rate": 0.001, "loss": 2.4827, "step": 254616 }, { "epoch": 48.89, "learning_rate": 0.001, "loss": 2.4939, "step": 254628 }, { "epoch": 48.89, "learning_rate": 0.001, "loss": 2.4848, "step": 254640 }, { "epoch": 48.9, "learning_rate": 0.001, "loss": 2.4815, "step": 254652 }, { "epoch": 48.9, "learning_rate": 0.001, "loss": 2.4942, "step": 254664 }, { "epoch": 48.9, "learning_rate": 0.001, "loss": 2.4928, "step": 254676 }, { "epoch": 48.9, "learning_rate": 0.001, "loss": 2.4828, "step": 254688 }, { "epoch": 48.91, "learning_rate": 0.001, "loss": 2.4822, "step": 254700 }, { "epoch": 48.91, "learning_rate": 0.001, "loss": 2.5007, "step": 254712 }, { "epoch": 48.91, "learning_rate": 0.001, "loss": 2.4829, "step": 254724 }, { "epoch": 48.91, "learning_rate": 0.001, "loss": 2.4853, "step": 254736 }, { "epoch": 48.91, "learning_rate": 0.001, "loss": 2.4911, "step": 254748 }, { "epoch": 48.92, "learning_rate": 0.001, "loss": 2.4868, "step": 254760 }, { "epoch": 48.92, "learning_rate": 0.001, "loss": 2.4848, "step": 254772 }, { "epoch": 48.92, "learning_rate": 0.001, "loss": 2.4805, "step": 254784 }, { "epoch": 48.92, "learning_rate": 0.001, "loss": 2.4868, "step": 254796 }, { "epoch": 48.93, "learning_rate": 0.001, "loss": 2.4826, "step": 254808 }, { "epoch": 48.93, "learning_rate": 0.001, "loss": 2.491, "step": 254820 }, { "epoch": 48.93, "learning_rate": 0.001, "loss": 2.4825, "step": 254832 }, { "epoch": 48.93, "learning_rate": 0.001, "loss": 2.4859, "step": 254844 }, { "epoch": 48.94, "learning_rate": 0.001, "loss": 2.4842, "step": 254856 }, { "epoch": 48.94, "learning_rate": 0.001, "loss": 2.4881, "step": 254868 }, { "epoch": 48.94, "learning_rate": 0.001, "loss": 2.4952, "step": 254880 }, { "epoch": 48.94, "learning_rate": 0.001, "loss": 2.4833, "step": 254892 }, { "epoch": 48.94, "learning_rate": 0.001, "loss": 2.4788, "step": 254904 }, { "epoch": 48.95, "learning_rate": 0.001, "loss": 2.4846, "step": 254916 }, { "epoch": 48.95, "learning_rate": 0.001, "loss": 2.4865, "step": 254928 }, { "epoch": 48.95, "learning_rate": 0.001, "loss": 2.4859, "step": 254940 }, { "epoch": 48.95, "learning_rate": 0.001, "loss": 2.4846, "step": 254952 }, { "epoch": 48.96, "learning_rate": 0.001, "loss": 2.484, "step": 254964 }, { "epoch": 48.96, "learning_rate": 0.001, "loss": 2.4837, "step": 254976 }, { "epoch": 48.96, "learning_rate": 0.001, "loss": 2.4886, "step": 254988 }, { "epoch": 48.96, "learning_rate": 0.001, "loss": 2.4834, "step": 255000 }, { "epoch": 48.96, "eval_ag_news_accuracy": 0.33328125, "eval_ag_news_bleu_score": 5.085752596456417, "eval_ag_news_bleu_score_sem": 0.16053073564002043, "eval_ag_news_emb_cos_sim": 0.823728621006012, "eval_ag_news_emb_cos_sim_sem": 0.0070037736247220705, "eval_ag_news_emb_top1_equal": 0.296875, "eval_ag_news_emb_top1_equal_sem": 0.04054163310179599, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.454833507537842, "eval_ag_news_n_ngrams_match_1": 14.626, "eval_ag_news_n_ngrams_match_2": 3.326, "eval_ag_news_n_ngrams_match_3": 0.932, "eval_ag_news_num_pred_words": 46.364, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.653018254599836, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3645419732953368, "eval_ag_news_runtime": 12.7888, "eval_ag_news_samples_per_second": 39.097, "eval_ag_news_steps_per_second": 0.078, "eval_ag_news_token_set_f1": 0.3638430978439053, "eval_ag_news_token_set_f1_sem": 0.004393774169987551, "eval_ag_news_token_set_precision": 0.3491185340997753, "eval_ag_news_token_set_recall": 0.3948944478224903, "eval_ag_news_true_num_tokens": 56.09375, "step": 255000 }, { "epoch": 48.96, "eval_anthropic_toxic_prompts_accuracy": 0.11721875, "eval_anthropic_toxic_prompts_bleu_score": 3.2375065412788535, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.1186138332351007, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6916943788528442, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008727303798668791, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1852755546569824, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.314, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.046, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.77, "eval_anthropic_toxic_prompts_num_pred_words": 46.896, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.173948732311807, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21522095019795162, "eval_anthropic_toxic_prompts_runtime": 10.923, "eval_anthropic_toxic_prompts_samples_per_second": 45.775, "eval_anthropic_toxic_prompts_steps_per_second": 0.092, "eval_anthropic_toxic_prompts_token_set_f1": 0.36031047607084526, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006670312847901974, "eval_anthropic_toxic_prompts_token_set_precision": 0.4415682902984979, "eval_anthropic_toxic_prompts_token_set_recall": 0.3310342872590095, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 255000 }, { "epoch": 48.96, "eval_arxiv_accuracy": 0.35359375, "eval_arxiv_bleu_score": 4.6744568097240755, "eval_arxiv_bleu_score_sem": 0.13816459704929035, "eval_arxiv_emb_cos_sim": 0.7932637929916382, "eval_arxiv_emb_cos_sim_sem": 0.005929044134217957, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3140530586242676, "eval_arxiv_n_ngrams_match_1": 15.706, "eval_arxiv_n_ngrams_match_2": 3.182, "eval_arxiv_n_ngrams_match_3": 0.752, "eval_arxiv_num_pred_words": 40.334, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.496344226688596, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3779389021059821, "eval_arxiv_runtime": 12.3162, "eval_arxiv_samples_per_second": 40.597, "eval_arxiv_steps_per_second": 0.081, "eval_arxiv_token_set_f1": 0.36883365446540173, "eval_arxiv_token_set_f1_sem": 0.004326562442193468, "eval_arxiv_token_set_precision": 0.3222304183839358, "eval_arxiv_token_set_recall": 0.4513081623526788, "eval_arxiv_true_num_tokens": 64.0, "step": 255000 }, { "epoch": 48.96, "eval_python_code_alpaca_accuracy": 0.16196875, "eval_python_code_alpaca_bleu_score": 4.733534700409421, "eval_python_code_alpaca_bleu_score_sem": 0.14786379444792305, "eval_python_code_alpaca_emb_cos_sim": 0.7603433132171631, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007492076474857161, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.855957508087158, "eval_python_code_alpaca_n_ngrams_match_1": 9.898, "eval_python_code_alpaca_n_ngrams_match_2": 2.944, "eval_python_code_alpaca_n_ngrams_match_3": 1.006, "eval_python_code_alpaca_num_pred_words": 42.93, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.391081338965936, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3439203879910345, "eval_python_code_alpaca_runtime": 13.0871, "eval_python_code_alpaca_samples_per_second": 38.206, "eval_python_code_alpaca_steps_per_second": 0.076, "eval_python_code_alpaca_token_set_f1": 0.4798061564333848, "eval_python_code_alpaca_token_set_f1_sem": 0.005485128761968899, "eval_python_code_alpaca_token_set_precision": 0.5429240085197976, "eval_python_code_alpaca_token_set_recall": 0.4544728813680131, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 255000 }, { "epoch": 48.96, "eval_wikibio_accuracy": 0.33103125, "eval_wikibio_bleu_score": 6.146821760889383, "eval_wikibio_bleu_score_sem": 0.21852813109526867, "eval_wikibio_emb_cos_sim": 0.7664906978607178, "eval_wikibio_emb_cos_sim_sem": 0.0083355132803566, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.584001064300537, "eval_wikibio_n_ngrams_match_1": 10.072, "eval_wikibio_n_ngrams_match_2": 3.35, "eval_wikibio_n_ngrams_match_3": 1.264, "eval_wikibio_num_pred_words": 35.88, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.017360715087804, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3561500289438334, "eval_wikibio_runtime": 11.5179, "eval_wikibio_samples_per_second": 43.411, "eval_wikibio_steps_per_second": 0.087, "eval_wikibio_token_set_f1": 0.3210639942421022, "eval_wikibio_token_set_f1_sem": 0.005447391853149522, "eval_wikibio_token_set_precision": 0.32915487126781606, "eval_wikibio_token_set_recall": 0.3292660788137592, "eval_wikibio_true_num_tokens": 61.1328125, "step": 255000 }, { "epoch": 48.96, "eval_nq_accuracy": 0.5411875, "eval_nq_bleu_score": 12.414931991591992, "eval_nq_bleu_score_sem": 0.497635814101261, "eval_nq_emb_cos_sim": 0.8404495716094971, "eval_nq_emb_cos_sim_sem": 0.006607508296478137, "eval_nq_emb_top1_equal": 0.2890625, "eval_nq_emb_top1_equal_sem": 0.04022626667363519, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1004555225372314, "eval_nq_n_ngrams_match_1": 23.684, "eval_nq_n_ngrams_match_2": 8.868, "eval_nq_n_ngrams_match_3": 4.182, "eval_nq_num_pred_words": 49.044, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.169890634377667, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45974699431206145, "eval_nq_runtime": 15.948, "eval_nq_samples_per_second": 31.352, "eval_nq_steps_per_second": 0.063, "eval_nq_token_set_f1": 0.47324117221861284, "eval_nq_token_set_f1_sem": 0.005170891797397959, "eval_nq_token_set_precision": 0.431889991612914, "eval_nq_token_set_recall": 0.5309646132809431, "eval_nq_true_num_tokens": 64.0, "step": 255000 }, { "epoch": 48.97, "learning_rate": 0.001, "loss": 2.4895, "step": 255012 }, { "epoch": 48.97, "learning_rate": 0.001, "loss": 2.4865, "step": 255024 }, { "epoch": 48.97, "learning_rate": 0.001, "loss": 2.4895, "step": 255036 }, { "epoch": 48.97, "learning_rate": 0.001, "loss": 2.4857, "step": 255048 }, { "epoch": 48.97, "learning_rate": 0.001, "loss": 2.4807, "step": 255060 }, { "epoch": 48.98, "learning_rate": 0.001, "loss": 2.4985, "step": 255072 }, { "epoch": 48.98, "learning_rate": 0.001, "loss": 2.4841, "step": 255084 }, { "epoch": 48.98, "learning_rate": 0.001, "loss": 2.5027, "step": 255096 }, { "epoch": 48.98, "learning_rate": 0.001, "loss": 2.4875, "step": 255108 }, { "epoch": 48.99, "learning_rate": 0.001, "loss": 2.4901, "step": 255120 }, { "epoch": 48.99, "learning_rate": 0.001, "loss": 2.4716, "step": 255132 }, { "epoch": 48.99, "learning_rate": 0.001, "loss": 2.4967, "step": 255144 }, { "epoch": 48.99, "learning_rate": 0.001, "loss": 2.4942, "step": 255156 }, { "epoch": 49.0, "learning_rate": 0.001, "loss": 2.4861, "step": 255168 }, { "epoch": 49.0, "learning_rate": 0.001, "loss": 2.4828, "step": 255180 }, { "epoch": 49.0, "learning_rate": 0.001, "loss": 2.4896, "step": 255192 }, { "epoch": 49.0, "learning_rate": 0.001, "loss": 2.4663, "step": 255204 }, { "epoch": 49.0, "learning_rate": 0.001, "loss": 2.4718, "step": 255216 }, { "epoch": 49.01, "learning_rate": 0.001, "loss": 2.4728, "step": 255228 }, { "epoch": 49.01, "learning_rate": 0.001, "loss": 2.4671, "step": 255240 }, { "epoch": 49.01, "learning_rate": 0.001, "loss": 2.4731, "step": 255252 }, { "epoch": 49.01, "learning_rate": 0.001, "loss": 2.4735, "step": 255264 }, { "epoch": 49.02, "learning_rate": 0.001, "loss": 2.4709, "step": 255276 }, { "epoch": 49.02, "learning_rate": 0.001, "loss": 2.4797, "step": 255288 }, { "epoch": 49.02, "learning_rate": 0.001, "loss": 2.4698, "step": 255300 }, { "epoch": 49.02, "learning_rate": 0.001, "loss": 2.4625, "step": 255312 }, { "epoch": 49.03, "learning_rate": 0.001, "loss": 2.4648, "step": 255324 }, { "epoch": 49.03, "learning_rate": 0.001, "loss": 2.474, "step": 255336 }, { "epoch": 49.03, "learning_rate": 0.001, "loss": 2.4684, "step": 255348 }, { "epoch": 49.03, "learning_rate": 0.001, "loss": 2.4749, "step": 255360 }, { "epoch": 49.03, "learning_rate": 0.001, "loss": 2.4724, "step": 255372 }, { "epoch": 49.04, "learning_rate": 0.001, "loss": 2.4723, "step": 255384 }, { "epoch": 49.04, "learning_rate": 0.001, "loss": 2.476, "step": 255396 }, { "epoch": 49.04, "learning_rate": 0.001, "loss": 2.4792, "step": 255408 }, { "epoch": 49.04, "learning_rate": 0.001, "loss": 2.4766, "step": 255420 }, { "epoch": 49.05, "learning_rate": 0.001, "loss": 2.4744, "step": 255432 }, { "epoch": 49.05, "learning_rate": 0.001, "loss": 2.4734, "step": 255444 }, { "epoch": 49.05, "learning_rate": 0.001, "loss": 2.4715, "step": 255456 }, { "epoch": 49.05, "learning_rate": 0.001, "loss": 2.4721, "step": 255468 }, { "epoch": 49.06, "learning_rate": 0.001, "loss": 2.4661, "step": 255480 }, { "epoch": 49.06, "learning_rate": 0.001, "loss": 2.485, "step": 255492 }, { "epoch": 49.06, "learning_rate": 0.001, "loss": 2.4758, "step": 255504 }, { "epoch": 49.06, "learning_rate": 0.001, "loss": 2.4684, "step": 255516 }, { "epoch": 49.06, "learning_rate": 0.001, "loss": 2.4705, "step": 255528 }, { "epoch": 49.07, "learning_rate": 0.001, "loss": 2.4683, "step": 255540 }, { "epoch": 49.07, "learning_rate": 0.001, "loss": 2.4738, "step": 255552 }, { "epoch": 49.07, "learning_rate": 0.001, "loss": 2.4778, "step": 255564 }, { "epoch": 49.07, "learning_rate": 0.001, "loss": 2.4771, "step": 255576 }, { "epoch": 49.08, "learning_rate": 0.001, "loss": 2.4737, "step": 255588 }, { "epoch": 49.08, "learning_rate": 0.001, "loss": 2.4719, "step": 255600 }, { "epoch": 49.08, "learning_rate": 0.001, "loss": 2.4778, "step": 255612 }, { "epoch": 49.08, "learning_rate": 0.001, "loss": 2.4811, "step": 255624 }, { "epoch": 49.08, "eval_ag_news_accuracy": 0.3303125, "eval_ag_news_bleu_score": 5.100494217804903, "eval_ag_news_bleu_score_sem": 0.1571130840546052, "eval_ag_news_emb_cos_sim": 0.8211169838905334, "eval_ag_news_emb_cos_sim_sem": 0.006613904080739665, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.455061674118042, "eval_ag_news_n_ngrams_match_1": 14.588, "eval_ag_news_n_ngrams_match_2": 3.402, "eval_ag_news_n_ngrams_match_3": 0.996, "eval_ag_news_num_pred_words": 46.588, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.66024123951855, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36327022297672823, "eval_ag_news_runtime": 11.8886, "eval_ag_news_samples_per_second": 42.057, "eval_ag_news_steps_per_second": 0.084, "eval_ag_news_token_set_f1": 0.36385899409076766, "eval_ag_news_token_set_f1_sem": 0.004306569554235365, "eval_ag_news_token_set_precision": 0.3495863481706578, "eval_ag_news_token_set_recall": 0.39296495283245336, "eval_ag_news_true_num_tokens": 56.09375, "step": 255625 }, { "epoch": 49.08, "eval_anthropic_toxic_prompts_accuracy": 0.11721875, "eval_anthropic_toxic_prompts_bleu_score": 3.275562210930755, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12351720417569237, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6904870271682739, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008971926741604052, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1015625, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.026804565886848545, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.17292857170105, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.488, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.024, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.754, "eval_anthropic_toxic_prompts_num_pred_words": 47.21, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.87730847395135, "eval_anthropic_toxic_prompts_pred_num_tokens": 62.953125, "eval_anthropic_toxic_prompts_rouge_score": 0.21946768835170108, "eval_anthropic_toxic_prompts_runtime": 12.004, "eval_anthropic_toxic_prompts_samples_per_second": 41.653, "eval_anthropic_toxic_prompts_steps_per_second": 0.083, "eval_anthropic_toxic_prompts_token_set_f1": 0.3602042240177149, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006441531191770978, "eval_anthropic_toxic_prompts_token_set_precision": 0.455426196114472, "eval_anthropic_toxic_prompts_token_set_recall": 0.3248023174474306, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 255625 }, { "epoch": 49.08, "eval_arxiv_accuracy": 0.35478125, "eval_arxiv_bleu_score": 4.629254039295774, "eval_arxiv_bleu_score_sem": 0.13831173033533106, "eval_arxiv_emb_cos_sim": 0.7929853200912476, "eval_arxiv_emb_cos_sim_sem": 0.0063699120234430515, "eval_arxiv_emb_top1_equal": 0.265625, "eval_arxiv_emb_top1_equal_sem": 0.03919146934646163, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3138437271118164, "eval_arxiv_n_ngrams_match_1": 16.01, "eval_arxiv_n_ngrams_match_2": 3.21, "eval_arxiv_n_ngrams_match_3": 0.732, "eval_arxiv_num_pred_words": 41.314, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.49058897776324, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.38063996866515315, "eval_arxiv_runtime": 12.0681, "eval_arxiv_samples_per_second": 41.431, "eval_arxiv_steps_per_second": 0.083, "eval_arxiv_token_set_f1": 0.37258514545758065, "eval_arxiv_token_set_f1_sem": 0.004185401543605356, "eval_arxiv_token_set_precision": 0.32784967190899983, "eval_arxiv_token_set_recall": 0.44341501940445516, "eval_arxiv_true_num_tokens": 64.0, "step": 255625 }, { "epoch": 49.08, "eval_python_code_alpaca_accuracy": 0.16346875, "eval_python_code_alpaca_bleu_score": 4.906324059278739, "eval_python_code_alpaca_bleu_score_sem": 0.16284075585793573, "eval_python_code_alpaca_emb_cos_sim": 0.7793626189231873, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00683757740578698, "eval_python_code_alpaca_emb_top1_equal": 0.15625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03221922156442571, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8239521980285645, "eval_python_code_alpaca_n_ngrams_match_1": 10.236, "eval_python_code_alpaca_n_ngrams_match_2": 3.068, "eval_python_code_alpaca_n_ngrams_match_3": 1.114, "eval_python_code_alpaca_num_pred_words": 44.132, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.84328730718324, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.35001021614155914, "eval_python_code_alpaca_runtime": 11.173, "eval_python_code_alpaca_samples_per_second": 44.751, "eval_python_code_alpaca_steps_per_second": 0.09, "eval_python_code_alpaca_token_set_f1": 0.4887993173008514, "eval_python_code_alpaca_token_set_f1_sem": 0.005267455232967129, "eval_python_code_alpaca_token_set_precision": 0.5630063929275726, "eval_python_code_alpaca_token_set_recall": 0.44813212032920374, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 255625 }, { "epoch": 49.08, "eval_wikibio_accuracy": 0.334, "eval_wikibio_bleu_score": 6.109185382063926, "eval_wikibio_bleu_score_sem": 0.20418145808930296, "eval_wikibio_emb_cos_sim": 0.7559391260147095, "eval_wikibio_emb_cos_sim_sem": 0.008285049673254392, "eval_wikibio_emb_top1_equal": 0.171875, "eval_wikibio_emb_top1_equal_sem": 0.03347745514062371, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6052134037017822, "eval_wikibio_n_ngrams_match_1": 10.216, "eval_wikibio_n_ngrams_match_2": 3.42, "eval_wikibio_n_ngrams_match_3": 1.262, "eval_wikibio_num_pred_words": 35.988, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.789534042336584, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3625460417161077, "eval_wikibio_runtime": 11.6569, "eval_wikibio_samples_per_second": 42.893, "eval_wikibio_steps_per_second": 0.086, "eval_wikibio_token_set_f1": 0.3287405463006294, "eval_wikibio_token_set_f1_sem": 0.005024926415774864, "eval_wikibio_token_set_precision": 0.3349647816156045, "eval_wikibio_token_set_recall": 0.3411954359495211, "eval_wikibio_true_num_tokens": 61.1328125, "step": 255625 }, { "epoch": 49.08, "eval_nq_accuracy": 0.539625, "eval_nq_bleu_score": 12.596793891130252, "eval_nq_bleu_score_sem": 0.5054312577362579, "eval_nq_emb_cos_sim": 0.8442134857177734, "eval_nq_emb_cos_sim_sem": 0.006530488882091526, "eval_nq_emb_top1_equal": 0.2734375, "eval_nq_emb_top1_equal_sem": 0.03955156411760461, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1003105640411377, "eval_nq_n_ngrams_match_1": 23.644, "eval_nq_n_ngrams_match_2": 8.9, "eval_nq_n_ngrams_match_3": 4.272, "eval_nq_num_pred_words": 48.948, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.168706425150726, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4600487435592311, "eval_nq_runtime": 11.601, "eval_nq_samples_per_second": 43.1, "eval_nq_steps_per_second": 0.086, "eval_nq_token_set_f1": 0.4725121750221839, "eval_nq_token_set_f1_sem": 0.0051769751972101105, "eval_nq_token_set_precision": 0.43366092015912555, "eval_nq_token_set_recall": 0.5262507692564834, "eval_nq_true_num_tokens": 64.0, "step": 255625 }, { "epoch": 49.09, "learning_rate": 0.001, "loss": 2.4712, "step": 255636 }, { "epoch": 49.09, "learning_rate": 0.001, "loss": 2.4722, "step": 255648 }, { "epoch": 49.09, "learning_rate": 0.001, "loss": 2.479, "step": 255660 }, { "epoch": 49.09, "learning_rate": 0.001, "loss": 2.4771, "step": 255672 }, { "epoch": 49.09, "learning_rate": 0.001, "loss": 2.4808, "step": 255684 }, { "epoch": 49.1, "learning_rate": 0.001, "loss": 2.487, "step": 255696 }, { "epoch": 49.1, "learning_rate": 0.001, "loss": 2.4834, "step": 255708 }, { "epoch": 49.1, "learning_rate": 0.001, "loss": 2.4775, "step": 255720 }, { "epoch": 49.1, "learning_rate": 0.001, "loss": 2.4839, "step": 255732 }, { "epoch": 49.11, "learning_rate": 0.001, "loss": 2.4811, "step": 255744 }, { "epoch": 49.11, "learning_rate": 0.001, "loss": 2.4792, "step": 255756 }, { "epoch": 49.11, "learning_rate": 0.001, "loss": 2.4757, "step": 255768 }, { "epoch": 49.11, "learning_rate": 0.001, "loss": 2.4749, "step": 255780 }, { "epoch": 49.12, "learning_rate": 0.001, "loss": 2.4806, "step": 255792 }, { "epoch": 49.12, "learning_rate": 0.001, "loss": 2.4791, "step": 255804 }, { "epoch": 49.12, "learning_rate": 0.001, "loss": 2.4719, "step": 255816 }, { "epoch": 49.12, "learning_rate": 0.001, "loss": 2.4788, "step": 255828 }, { "epoch": 49.12, "learning_rate": 0.001, "loss": 2.4759, "step": 255840 }, { "epoch": 49.13, "learning_rate": 0.001, "loss": 2.4758, "step": 255852 }, { "epoch": 49.13, "learning_rate": 0.001, "loss": 2.4688, "step": 255864 }, { "epoch": 49.13, "learning_rate": 0.001, "loss": 2.4851, "step": 255876 }, { "epoch": 49.13, "learning_rate": 0.001, "loss": 2.4794, "step": 255888 }, { "epoch": 49.14, "learning_rate": 0.001, "loss": 2.4724, "step": 255900 }, { "epoch": 49.14, "learning_rate": 0.001, "loss": 2.4823, "step": 255912 }, { "epoch": 49.14, "learning_rate": 0.001, "loss": 2.4724, "step": 255924 }, { "epoch": 49.14, "learning_rate": 0.001, "loss": 2.4847, "step": 255936 }, { "epoch": 49.15, "learning_rate": 0.001, "loss": 2.4808, "step": 255948 }, { "epoch": 49.15, "learning_rate": 0.001, "loss": 2.4783, "step": 255960 }, { "epoch": 49.15, "learning_rate": 0.001, "loss": 2.4749, "step": 255972 }, { "epoch": 49.15, "learning_rate": 0.001, "loss": 2.475, "step": 255984 }, { "epoch": 49.15, "learning_rate": 0.001, "loss": 2.4678, "step": 255996 }, { "epoch": 49.16, "learning_rate": 0.001, "loss": 2.4643, "step": 256008 }, { "epoch": 49.16, "learning_rate": 0.001, "loss": 2.4641, "step": 256020 }, { "epoch": 49.16, "learning_rate": 0.001, "loss": 2.4667, "step": 256032 }, { "epoch": 49.16, "learning_rate": 0.001, "loss": 2.474, "step": 256044 }, { "epoch": 49.17, "learning_rate": 0.001, "loss": 2.4766, "step": 256056 }, { "epoch": 49.17, "learning_rate": 0.001, "loss": 2.4708, "step": 256068 }, { "epoch": 49.17, "learning_rate": 0.001, "loss": 2.4842, "step": 256080 }, { "epoch": 49.17, "learning_rate": 0.001, "loss": 2.4834, "step": 256092 }, { "epoch": 49.18, "learning_rate": 0.001, "loss": 2.4721, "step": 256104 }, { "epoch": 49.18, "learning_rate": 0.001, "loss": 2.4862, "step": 256116 }, { "epoch": 49.18, "learning_rate": 0.001, "loss": 2.4772, "step": 256128 }, { "epoch": 49.18, "learning_rate": 0.001, "loss": 2.4766, "step": 256140 }, { "epoch": 49.18, "learning_rate": 0.001, "loss": 2.4758, "step": 256152 }, { "epoch": 49.19, "learning_rate": 0.001, "loss": 2.4791, "step": 256164 }, { "epoch": 49.19, "learning_rate": 0.001, "loss": 2.486, "step": 256176 }, { "epoch": 49.19, "learning_rate": 0.001, "loss": 2.4713, "step": 256188 }, { "epoch": 49.19, "learning_rate": 0.001, "loss": 2.4827, "step": 256200 }, { "epoch": 49.2, "learning_rate": 0.001, "loss": 2.4864, "step": 256212 }, { "epoch": 49.2, "learning_rate": 0.001, "loss": 2.4741, "step": 256224 }, { "epoch": 49.2, "learning_rate": 0.001, "loss": 2.4691, "step": 256236 }, { "epoch": 49.2, "learning_rate": 0.001, "loss": 2.4699, "step": 256248 }, { "epoch": 49.2, "eval_ag_news_accuracy": 0.33046875, "eval_ag_news_bleu_score": 5.043217042776179, "eval_ag_news_bleu_score_sem": 0.16174832329162295, "eval_ag_news_emb_cos_sim": 0.8152117133140564, "eval_ag_news_emb_cos_sim_sem": 0.008054775756603568, "eval_ag_news_emb_top1_equal": 0.2734375, "eval_ag_news_emb_top1_equal_sem": 0.03955156411760461, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.45247745513916, "eval_ag_news_n_ngrams_match_1": 14.53, "eval_ag_news_n_ngrams_match_2": 3.302, "eval_ag_news_n_ngrams_match_3": 0.922, "eval_ag_news_num_pred_words": 46.728, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.578529868742635, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3640949688473055, "eval_ag_news_runtime": 14.2536, "eval_ag_news_samples_per_second": 35.079, "eval_ag_news_steps_per_second": 0.07, "eval_ag_news_token_set_f1": 0.3626868877968545, "eval_ag_news_token_set_f1_sem": 0.004380139390985463, "eval_ag_news_token_set_precision": 0.3481912033393453, "eval_ag_news_token_set_recall": 0.3915673255987398, "eval_ag_news_true_num_tokens": 56.09375, "step": 256250 }, { "epoch": 49.2, "eval_anthropic_toxic_prompts_accuracy": 0.1165625, "eval_anthropic_toxic_prompts_bleu_score": 3.2949912416774136, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12462337117179827, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.686430811882019, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.009695480129236794, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.109375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.027695207821224692, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.185387134552002, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.462, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.07, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.808, "eval_anthropic_toxic_prompts_num_pred_words": 47.364, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.176646209462877, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.22147314383432543, "eval_anthropic_toxic_prompts_runtime": 17.6559, "eval_anthropic_toxic_prompts_samples_per_second": 28.319, "eval_anthropic_toxic_prompts_steps_per_second": 0.057, "eval_anthropic_toxic_prompts_token_set_f1": 0.3610745672460277, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006413178292314354, "eval_anthropic_toxic_prompts_token_set_precision": 0.45573239268142074, "eval_anthropic_toxic_prompts_token_set_recall": 0.32517712727149634, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 256250 }, { "epoch": 49.2, "eval_arxiv_accuracy": 0.3531875, "eval_arxiv_bleu_score": 4.495392834026501, "eval_arxiv_bleu_score_sem": 0.12826398561178315, "eval_arxiv_emb_cos_sim": 0.778884768486023, "eval_arxiv_emb_cos_sim_sem": 0.007722691517563122, "eval_arxiv_emb_top1_equal": 0.2109375, "eval_arxiv_emb_top1_equal_sem": 0.03620184850179216, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3233225345611572, "eval_arxiv_n_ngrams_match_1": 15.682, "eval_arxiv_n_ngrams_match_2": 3.118, "eval_arxiv_n_ngrams_match_3": 0.692, "eval_arxiv_num_pred_words": 41.268, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.75240587301812, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37126008651175524, "eval_arxiv_runtime": 14.5949, "eval_arxiv_samples_per_second": 34.259, "eval_arxiv_steps_per_second": 0.069, "eval_arxiv_token_set_f1": 0.3653475091302374, "eval_arxiv_token_set_f1_sem": 0.004507638298671149, "eval_arxiv_token_set_precision": 0.31921645517267927, "eval_arxiv_token_set_recall": 0.44357360213561625, "eval_arxiv_true_num_tokens": 64.0, "step": 256250 }, { "epoch": 49.2, "eval_python_code_alpaca_accuracy": 0.16465625, "eval_python_code_alpaca_bleu_score": 4.872121200975159, "eval_python_code_alpaca_bleu_score_sem": 0.15571547758094378, "eval_python_code_alpaca_emb_cos_sim": 0.7787461280822754, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006888881974225882, "eval_python_code_alpaca_emb_top1_equal": 0.140625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.030847557647994725, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8354156017303467, "eval_python_code_alpaca_n_ngrams_match_1": 10.244, "eval_python_code_alpaca_n_ngrams_match_2": 3.092, "eval_python_code_alpaca_n_ngrams_match_3": 1.064, "eval_python_code_alpaca_num_pred_words": 43.936, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.037479635219537, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3501267549648855, "eval_python_code_alpaca_runtime": 14.0163, "eval_python_code_alpaca_samples_per_second": 35.673, "eval_python_code_alpaca_steps_per_second": 0.071, "eval_python_code_alpaca_token_set_f1": 0.49198952511198535, "eval_python_code_alpaca_token_set_f1_sem": 0.00534944324439524, "eval_python_code_alpaca_token_set_precision": 0.565114902902067, "eval_python_code_alpaca_token_set_recall": 0.45752985917392913, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 256250 }, { "epoch": 49.2, "eval_wikibio_accuracy": 0.33040625, "eval_wikibio_bleu_score": 6.092429866180376, "eval_wikibio_bleu_score_sem": 0.21055146410116587, "eval_wikibio_emb_cos_sim": 0.7502623796463013, "eval_wikibio_emb_cos_sim_sem": 0.009124294842960522, "eval_wikibio_emb_top1_equal": 0.25, "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.584160089492798, "eval_wikibio_n_ngrams_match_1": 10.184, "eval_wikibio_n_ngrams_match_2": 3.438, "eval_wikibio_n_ngrams_match_3": 1.234, "eval_wikibio_num_pred_words": 36.03, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.02308883824612, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.36075220562791344, "eval_wikibio_runtime": 13.6901, "eval_wikibio_samples_per_second": 36.523, "eval_wikibio_steps_per_second": 0.073, "eval_wikibio_token_set_f1": 0.32352363354992836, "eval_wikibio_token_set_f1_sem": 0.005260468138923508, "eval_wikibio_token_set_precision": 0.329763042866111, "eval_wikibio_token_set_recall": 0.33496254819562765, "eval_wikibio_true_num_tokens": 61.1328125, "step": 256250 }, { "epoch": 49.2, "eval_nq_accuracy": 0.53921875, "eval_nq_bleu_score": 12.427637148300843, "eval_nq_bleu_score_sem": 0.5037686049432977, "eval_nq_emb_cos_sim": 0.8388968706130981, "eval_nq_emb_cos_sim_sem": 0.007309439692280282, "eval_nq_emb_top1_equal": 0.25, "eval_nq_emb_top1_equal_sem": 0.03842366440207048, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1017231941223145, "eval_nq_n_ngrams_match_1": 23.72, "eval_nq_n_ngrams_match_2": 8.876, "eval_nq_n_ngrams_match_3": 4.19, "eval_nq_num_pred_words": 49.212, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.18025393983422, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4613722903285064, "eval_nq_runtime": 19.4728, "eval_nq_samples_per_second": 25.677, "eval_nq_steps_per_second": 0.051, "eval_nq_token_set_f1": 0.47590787459483735, "eval_nq_token_set_f1_sem": 0.00493083699606327, "eval_nq_token_set_precision": 0.4348087147899886, "eval_nq_token_set_recall": 0.5331275817310658, "eval_nq_true_num_tokens": 64.0, "step": 256250 }, { "epoch": 49.21, "learning_rate": 0.001, "loss": 2.4738, "step": 256260 }, { "epoch": 49.21, "learning_rate": 0.001, "loss": 2.4732, "step": 256272 }, { "epoch": 49.21, "learning_rate": 0.001, "loss": 2.4752, "step": 256284 }, { "epoch": 49.21, "learning_rate": 0.001, "loss": 2.478, "step": 256296 }, { "epoch": 49.21, "learning_rate": 0.001, "loss": 2.4857, "step": 256308 }, { "epoch": 49.22, "learning_rate": 0.001, "loss": 2.4839, "step": 256320 }, { "epoch": 49.22, "learning_rate": 0.001, "loss": 2.4786, "step": 256332 }, { "epoch": 49.22, "learning_rate": 0.001, "loss": 2.481, "step": 256344 }, { "epoch": 49.22, "learning_rate": 0.001, "loss": 2.4704, "step": 256356 }, { "epoch": 49.23, "learning_rate": 0.001, "loss": 2.4768, "step": 256368 }, { "epoch": 49.23, "learning_rate": 0.001, "loss": 2.4795, "step": 256380 }, { "epoch": 49.23, "learning_rate": 0.001, "loss": 2.4787, "step": 256392 }, { "epoch": 49.23, "learning_rate": 0.001, "loss": 2.4707, "step": 256404 }, { "epoch": 49.24, "learning_rate": 0.001, "loss": 2.4768, "step": 256416 }, { "epoch": 49.24, "learning_rate": 0.001, "loss": 2.4696, "step": 256428 }, { "epoch": 49.24, "learning_rate": 0.001, "loss": 2.4713, "step": 256440 }, { "epoch": 49.24, "learning_rate": 0.001, "loss": 2.4768, "step": 256452 }, { "epoch": 49.24, "learning_rate": 0.001, "loss": 2.4846, "step": 256464 }, { "epoch": 49.25, "learning_rate": 0.001, "loss": 2.4789, "step": 256476 }, { "epoch": 49.25, "learning_rate": 0.001, "loss": 2.4816, "step": 256488 }, { "epoch": 49.25, "learning_rate": 0.001, "loss": 2.4784, "step": 256500 }, { "epoch": 49.25, "learning_rate": 0.001, "loss": 2.4687, "step": 256512 }, { "epoch": 49.26, "learning_rate": 0.001, "loss": 2.4684, "step": 256524 }, { "epoch": 49.26, "learning_rate": 0.001, "loss": 2.4767, "step": 256536 }, { "epoch": 49.26, "learning_rate": 0.001, "loss": 2.478, "step": 256548 }, { "epoch": 49.26, "learning_rate": 0.001, "loss": 2.4889, "step": 256560 }, { "epoch": 49.26, "learning_rate": 0.001, "loss": 2.4771, "step": 256572 }, { "epoch": 49.27, "learning_rate": 0.001, "loss": 2.4809, "step": 256584 }, { "epoch": 49.27, "learning_rate": 0.001, "loss": 2.4767, "step": 256596 }, { "epoch": 49.27, "learning_rate": 0.001, "loss": 2.4795, "step": 256608 }, { "epoch": 49.27, "learning_rate": 0.001, "loss": 2.4795, "step": 256620 }, { "epoch": 49.28, "learning_rate": 0.001, "loss": 2.4853, "step": 256632 }, { "epoch": 49.28, "learning_rate": 0.001, "loss": 2.475, "step": 256644 }, { "epoch": 49.28, "learning_rate": 0.001, "loss": 2.4809, "step": 256656 }, { "epoch": 49.28, "learning_rate": 0.001, "loss": 2.4763, "step": 256668 }, { "epoch": 49.29, "learning_rate": 0.001, "loss": 2.476, "step": 256680 }, { "epoch": 49.29, "learning_rate": 0.001, "loss": 2.4855, "step": 256692 }, { "epoch": 49.29, "learning_rate": 0.001, "loss": 2.4859, "step": 256704 }, { "epoch": 49.29, "learning_rate": 0.001, "loss": 2.4785, "step": 256716 }, { "epoch": 49.29, "learning_rate": 0.001, "loss": 2.4794, "step": 256728 }, { "epoch": 49.3, "learning_rate": 0.001, "loss": 2.4879, "step": 256740 }, { "epoch": 49.3, "learning_rate": 0.001, "loss": 2.4718, "step": 256752 }, { "epoch": 49.3, "learning_rate": 0.001, "loss": 2.4833, "step": 256764 }, { "epoch": 49.3, "learning_rate": 0.001, "loss": 2.4875, "step": 256776 }, { "epoch": 49.31, "learning_rate": 0.001, "loss": 2.4747, "step": 256788 }, { "epoch": 49.31, "learning_rate": 0.001, "loss": 2.4863, "step": 256800 }, { "epoch": 49.31, "learning_rate": 0.001, "loss": 2.4823, "step": 256812 }, { "epoch": 49.31, "learning_rate": 0.001, "loss": 2.4817, "step": 256824 }, { "epoch": 49.32, "learning_rate": 0.001, "loss": 2.4792, "step": 256836 }, { "epoch": 49.32, "learning_rate": 0.001, "loss": 2.4881, "step": 256848 }, { "epoch": 49.32, "learning_rate": 0.001, "loss": 2.4825, "step": 256860 }, { "epoch": 49.32, "learning_rate": 0.001, "loss": 2.4781, "step": 256872 }, { "epoch": 49.32, "eval_ag_news_accuracy": 0.3318125, "eval_ag_news_bleu_score": 5.081396200302893, "eval_ag_news_bleu_score_sem": 0.16451155846949, "eval_ag_news_emb_cos_sim": 0.8254844546318054, "eval_ag_news_emb_cos_sim_sem": 0.007121095962997224, "eval_ag_news_emb_top1_equal": 0.2578125, "eval_ag_news_emb_top1_equal_sem": 0.038815656435002115, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4433860778808594, "eval_ag_news_n_ngrams_match_1": 14.642, "eval_ag_news_n_ngrams_match_2": 3.322, "eval_ag_news_n_ngrams_match_3": 0.932, "eval_ag_news_num_pred_words": 46.838, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.292738626900984, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3649736249923148, "eval_ag_news_runtime": 14.5676, "eval_ag_news_samples_per_second": 34.323, "eval_ag_news_steps_per_second": 0.069, "eval_ag_news_token_set_f1": 0.36144229924221694, "eval_ag_news_token_set_f1_sem": 0.004469218352070889, "eval_ag_news_token_set_precision": 0.35119472441975647, "eval_ag_news_token_set_recall": 0.3872363475860789, "eval_ag_news_true_num_tokens": 56.09375, "step": 256875 }, { "epoch": 49.32, "eval_anthropic_toxic_prompts_accuracy": 0.1185625, "eval_anthropic_toxic_prompts_bleu_score": 3.30944831317759, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12618017653442176, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6850172877311707, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.010458037132443828, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.078125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.023813825516515504, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.175732135772705, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.408, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.056, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.79, "eval_anthropic_toxic_prompts_num_pred_words": 46.322, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.944343963353475, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.2200586302775161, "eval_anthropic_toxic_prompts_runtime": 13.8099, "eval_anthropic_toxic_prompts_samples_per_second": 36.206, "eval_anthropic_toxic_prompts_steps_per_second": 0.072, "eval_anthropic_toxic_prompts_token_set_f1": 0.35854321758104724, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006503358291102491, "eval_anthropic_toxic_prompts_token_set_precision": 0.44649057659363245, "eval_anthropic_toxic_prompts_token_set_recall": 0.32697149018921445, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 256875 }, { "epoch": 49.32, "eval_arxiv_accuracy": 0.35403125, "eval_arxiv_bleu_score": 4.461321904384291, "eval_arxiv_bleu_score_sem": 0.12213479557784862, "eval_arxiv_emb_cos_sim": 0.7896615266799927, "eval_arxiv_emb_cos_sim_sem": 0.007116109463982779, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3053104877471924, "eval_arxiv_n_ngrams_match_1": 15.728, "eval_arxiv_n_ngrams_match_2": 3.056, "eval_arxiv_n_ngrams_match_3": 0.642, "eval_arxiv_num_pred_words": 40.716, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.25700324064491, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.37917725386830603, "eval_arxiv_runtime": 14.577, "eval_arxiv_samples_per_second": 34.301, "eval_arxiv_steps_per_second": 0.069, "eval_arxiv_token_set_f1": 0.3675406337866314, "eval_arxiv_token_set_f1_sem": 0.0041793149078806395, "eval_arxiv_token_set_precision": 0.3228675605668348, "eval_arxiv_token_set_recall": 0.4407260843846051, "eval_arxiv_true_num_tokens": 64.0, "step": 256875 }, { "epoch": 49.32, "eval_python_code_alpaca_accuracy": 0.162625, "eval_python_code_alpaca_bleu_score": 5.005547956255304, "eval_python_code_alpaca_bleu_score_sem": 0.16610661730934456, "eval_python_code_alpaca_emb_cos_sim": 0.7768363952636719, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007135297475311871, "eval_python_code_alpaca_emb_top1_equal": 0.1875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8392891883850098, "eval_python_code_alpaca_n_ngrams_match_1": 9.842, "eval_python_code_alpaca_n_ngrams_match_2": 3.058, "eval_python_code_alpaca_n_ngrams_match_3": 1.09, "eval_python_code_alpaca_num_pred_words": 42.426, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.10360377507556, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3457762888173629, "eval_python_code_alpaca_runtime": 13.6367, "eval_python_code_alpaca_samples_per_second": 36.666, "eval_python_code_alpaca_steps_per_second": 0.073, "eval_python_code_alpaca_token_set_f1": 0.4778991604897977, "eval_python_code_alpaca_token_set_f1_sem": 0.005398598328638675, "eval_python_code_alpaca_token_set_precision": 0.5438141225217484, "eval_python_code_alpaca_token_set_recall": 0.4527064567341806, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 256875 }, { "epoch": 49.32, "eval_wikibio_accuracy": 0.330125, "eval_wikibio_bleu_score": 6.222894586384821, "eval_wikibio_bleu_score_sem": 0.211048056856142, "eval_wikibio_emb_cos_sim": 0.7671869993209839, "eval_wikibio_emb_cos_sim_sem": 0.008162788251169369, "eval_wikibio_emb_top1_equal": 0.234375, "eval_wikibio_emb_top1_equal_sem": 0.03758909358128201, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.607555389404297, "eval_wikibio_n_ngrams_match_1": 10.55, "eval_wikibio_n_ngrams_match_2": 3.602, "eval_wikibio_n_ngrams_match_3": 1.32, "eval_wikibio_num_pred_words": 36.912, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.8757955772787, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3698757084550667, "eval_wikibio_runtime": 13.7057, "eval_wikibio_samples_per_second": 36.481, "eval_wikibio_steps_per_second": 0.073, "eval_wikibio_token_set_f1": 0.3324199898823237, "eval_wikibio_token_set_f1_sem": 0.005267986338693477, "eval_wikibio_token_set_precision": 0.3445027595237636, "eval_wikibio_token_set_recall": 0.3352381215160292, "eval_wikibio_true_num_tokens": 61.1328125, "step": 256875 }, { "epoch": 49.32, "eval_nq_accuracy": 0.54053125, "eval_nq_bleu_score": 12.30907177544018, "eval_nq_bleu_score_sem": 0.4960400321429855, "eval_nq_emb_cos_sim": 0.8414657115936279, "eval_nq_emb_cos_sim_sem": 0.007097317896447492, "eval_nq_emb_top1_equal": 0.3359375, "eval_nq_emb_top1_equal_sem": 0.04191137143408563, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.09922456741333, "eval_nq_n_ngrams_match_1": 23.626, "eval_nq_n_ngrams_match_2": 8.816, "eval_nq_n_ngrams_match_3": 4.188, "eval_nq_num_pred_words": 48.93, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.159840052816099, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4596799378507059, "eval_nq_runtime": 14.1124, "eval_nq_samples_per_second": 35.43, "eval_nq_steps_per_second": 0.071, "eval_nq_token_set_f1": 0.47371893101131696, "eval_nq_token_set_f1_sem": 0.005024055977908796, "eval_nq_token_set_precision": 0.43442789850436325, "eval_nq_token_set_recall": 0.5293189250752252, "eval_nq_true_num_tokens": 64.0, "step": 256875 }, { "epoch": 49.32, "learning_rate": 0.001, "loss": 2.4675, "step": 256884 }, { "epoch": 49.33, "learning_rate": 0.001, "loss": 2.4829, "step": 256896 }, { "epoch": 49.33, "learning_rate": 0.001, "loss": 2.4781, "step": 256908 }, { "epoch": 49.33, "learning_rate": 0.001, "loss": 2.4792, "step": 256920 }, { "epoch": 49.33, "learning_rate": 0.001, "loss": 2.4773, "step": 256932 }, { "epoch": 49.34, "learning_rate": 0.001, "loss": 2.4815, "step": 256944 }, { "epoch": 49.34, "learning_rate": 0.001, "loss": 2.4896, "step": 256956 }, { "epoch": 49.34, "learning_rate": 0.001, "loss": 2.4732, "step": 256968 }, { "epoch": 49.34, "learning_rate": 0.001, "loss": 2.4762, "step": 256980 }, { "epoch": 49.35, "learning_rate": 0.001, "loss": 2.4756, "step": 256992 }, { "epoch": 49.35, "learning_rate": 0.001, "loss": 2.4712, "step": 257004 }, { "epoch": 49.35, "learning_rate": 0.001, "loss": 2.4754, "step": 257016 }, { "epoch": 49.35, "learning_rate": 0.001, "loss": 2.4709, "step": 257028 }, { "epoch": 49.35, "learning_rate": 0.001, "loss": 2.4749, "step": 257040 }, { "epoch": 49.36, "learning_rate": 0.001, "loss": 2.4712, "step": 257052 }, { "epoch": 49.36, "learning_rate": 0.001, "loss": 2.4751, "step": 257064 }, { "epoch": 49.36, "learning_rate": 0.001, "loss": 2.4763, "step": 257076 }, { "epoch": 49.36, "learning_rate": 0.001, "loss": 2.4805, "step": 257088 }, { "epoch": 49.37, "learning_rate": 0.001, "loss": 2.4824, "step": 257100 }, { "epoch": 49.37, "learning_rate": 0.001, "loss": 2.4828, "step": 257112 }, { "epoch": 49.37, "learning_rate": 0.001, "loss": 2.4821, "step": 257124 }, { "epoch": 49.37, "learning_rate": 0.001, "loss": 2.4831, "step": 257136 }, { "epoch": 49.38, "learning_rate": 0.001, "loss": 2.4755, "step": 257148 }, { "epoch": 49.38, "learning_rate": 0.001, "loss": 2.4783, "step": 257160 }, { "epoch": 49.38, "learning_rate": 0.001, "loss": 2.4839, "step": 257172 }, { "epoch": 49.38, "learning_rate": 0.001, "loss": 2.4757, "step": 257184 }, { "epoch": 49.38, "learning_rate": 0.001, "loss": 2.4735, "step": 257196 }, { "epoch": 49.39, "learning_rate": 0.001, "loss": 2.4815, "step": 257208 }, { "epoch": 49.39, "learning_rate": 0.001, "loss": 2.4839, "step": 257220 }, { "epoch": 49.39, "learning_rate": 0.001, "loss": 2.4844, "step": 257232 }, { "epoch": 49.39, "learning_rate": 0.001, "loss": 2.4943, "step": 257244 }, { "epoch": 49.4, "learning_rate": 0.001, "loss": 2.4818, "step": 257256 }, { "epoch": 49.4, "learning_rate": 0.001, "loss": 2.4738, "step": 257268 }, { "epoch": 49.4, "learning_rate": 0.001, "loss": 2.4893, "step": 257280 }, { "epoch": 49.4, "learning_rate": 0.001, "loss": 2.4751, "step": 257292 }, { "epoch": 49.41, "learning_rate": 0.001, "loss": 2.4813, "step": 257304 }, { "epoch": 49.41, "learning_rate": 0.001, "loss": 2.4754, "step": 257316 }, { "epoch": 49.41, "learning_rate": 0.001, "loss": 2.4848, "step": 257328 }, { "epoch": 49.41, "learning_rate": 0.001, "loss": 2.4747, "step": 257340 }, { "epoch": 49.41, "learning_rate": 0.001, "loss": 2.4793, "step": 257352 }, { "epoch": 49.42, "learning_rate": 0.001, "loss": 2.4735, "step": 257364 }, { "epoch": 49.42, "learning_rate": 0.001, "loss": 2.485, "step": 257376 }, { "epoch": 49.42, "learning_rate": 0.001, "loss": 2.4753, "step": 257388 }, { "epoch": 49.42, "learning_rate": 0.001, "loss": 2.4765, "step": 257400 }, { "epoch": 49.43, "learning_rate": 0.001, "loss": 2.4747, "step": 257412 }, { "epoch": 49.43, "learning_rate": 0.001, "loss": 2.4804, "step": 257424 }, { "epoch": 49.43, "learning_rate": 0.001, "loss": 2.4724, "step": 257436 }, { "epoch": 49.43, "learning_rate": 0.001, "loss": 2.4747, "step": 257448 }, { "epoch": 49.44, "learning_rate": 0.001, "loss": 2.4794, "step": 257460 }, { "epoch": 49.44, "learning_rate": 0.001, "loss": 2.4787, "step": 257472 }, { "epoch": 49.44, "learning_rate": 0.001, "loss": 2.4795, "step": 257484 }, { "epoch": 49.44, "learning_rate": 0.001, "loss": 2.4862, "step": 257496 }, { "epoch": 49.44, "eval_ag_news_accuracy": 0.33103125, "eval_ag_news_bleu_score": 5.012001864411936, "eval_ag_news_bleu_score_sem": 0.16031987311286103, "eval_ag_news_emb_cos_sim": 0.8189452886581421, "eval_ag_news_emb_cos_sim_sem": 0.007686430739864577, "eval_ag_news_emb_top1_equal": 0.25, "eval_ag_news_emb_top1_equal_sem": 0.03842366440207048, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4618349075317383, "eval_ag_news_n_ngrams_match_1": 14.518, "eval_ag_news_n_ngrams_match_2": 3.324, "eval_ag_news_n_ngrams_match_3": 0.948, "eval_ag_news_num_pred_words": 46.752, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.875411319347933, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3610674022987158, "eval_ag_news_runtime": 14.4649, "eval_ag_news_samples_per_second": 34.566, "eval_ag_news_steps_per_second": 0.069, "eval_ag_news_token_set_f1": 0.36011668877367287, "eval_ag_news_token_set_f1_sem": 0.004720647887973495, "eval_ag_news_token_set_precision": 0.3480136119382942, "eval_ag_news_token_set_recall": 0.38732049186993767, "eval_ag_news_true_num_tokens": 56.09375, "step": 257500 }, { "epoch": 49.44, "eval_anthropic_toxic_prompts_accuracy": 0.11740625, "eval_anthropic_toxic_prompts_bleu_score": 3.2862728924611435, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12005320503349222, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6906065940856934, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008941596157199893, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0859375, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02487009666300537, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.1902427673339844, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.438, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.026, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.768, "eval_anthropic_toxic_prompts_num_pred_words": 47.354, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.29432459566063, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21911415891886715, "eval_anthropic_toxic_prompts_runtime": 13.3755, "eval_anthropic_toxic_prompts_samples_per_second": 37.382, "eval_anthropic_toxic_prompts_steps_per_second": 0.075, "eval_anthropic_toxic_prompts_token_set_f1": 0.3646844160771978, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006502895282041204, "eval_anthropic_toxic_prompts_token_set_precision": 0.4555083787849245, "eval_anthropic_toxic_prompts_token_set_recall": 0.3308919202481369, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 257500 }, { "epoch": 49.44, "eval_arxiv_accuracy": 0.35446875, "eval_arxiv_bleu_score": 4.492802702590087, "eval_arxiv_bleu_score_sem": 0.1295615135734006, "eval_arxiv_emb_cos_sim": 0.7812449932098389, "eval_arxiv_emb_cos_sim_sem": 0.0065135906300166895, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3121416568756104, "eval_arxiv_n_ngrams_match_1": 15.396, "eval_arxiv_n_ngrams_match_2": 3.108, "eval_arxiv_n_ngrams_match_3": 0.7, "eval_arxiv_num_pred_words": 40.67, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.443837862615883, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3653685230464675, "eval_arxiv_runtime": 14.6278, "eval_arxiv_samples_per_second": 34.181, "eval_arxiv_steps_per_second": 0.068, "eval_arxiv_token_set_f1": 0.3605950477836636, "eval_arxiv_token_set_f1_sem": 0.004239692448617283, "eval_arxiv_token_set_precision": 0.3126675176317515, "eval_arxiv_token_set_recall": 0.4469840507072583, "eval_arxiv_true_num_tokens": 64.0, "step": 257500 }, { "epoch": 49.44, "eval_python_code_alpaca_accuracy": 0.1636875, "eval_python_code_alpaca_bleu_score": 4.626496892207398, "eval_python_code_alpaca_bleu_score_sem": 0.14452358955931308, "eval_python_code_alpaca_emb_cos_sim": 0.7660123109817505, "eval_python_code_alpaca_emb_cos_sim_sem": 0.006986079850881591, "eval_python_code_alpaca_emb_top1_equal": 0.1875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.034634623208270626, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.842632293701172, "eval_python_code_alpaca_n_ngrams_match_1": 10.07, "eval_python_code_alpaca_n_ngrams_match_2": 2.9, "eval_python_code_alpaca_n_ngrams_match_3": 0.95, "eval_python_code_alpaca_num_pred_words": 43.02, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 17.160878608337686, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34704052699175136, "eval_python_code_alpaca_runtime": 14.2226, "eval_python_code_alpaca_samples_per_second": 35.155, "eval_python_code_alpaca_steps_per_second": 0.07, "eval_python_code_alpaca_token_set_f1": 0.48346910977841, "eval_python_code_alpaca_token_set_f1_sem": 0.005342612114974879, "eval_python_code_alpaca_token_set_precision": 0.5536433886337334, "eval_python_code_alpaca_token_set_recall": 0.4544745018469489, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 257500 }, { "epoch": 49.44, "eval_wikibio_accuracy": 0.3296875, "eval_wikibio_bleu_score": 6.220462834287385, "eval_wikibio_bleu_score_sem": 0.2210614598981559, "eval_wikibio_emb_cos_sim": 0.7541302442550659, "eval_wikibio_emb_cos_sim_sem": 0.008750735470807324, "eval_wikibio_emb_top1_equal": 0.1640625, "eval_wikibio_emb_top1_equal_sem": 0.03286167651298939, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.635528326034546, "eval_wikibio_n_ngrams_match_1": 10.426, "eval_wikibio_n_ngrams_match_2": 3.488, "eval_wikibio_n_ngrams_match_3": 1.32, "eval_wikibio_num_pred_words": 36.842, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 37.92188272345868, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3709975040511946, "eval_wikibio_runtime": 13.815, "eval_wikibio_samples_per_second": 36.192, "eval_wikibio_steps_per_second": 0.072, "eval_wikibio_token_set_f1": 0.32911650091476735, "eval_wikibio_token_set_f1_sem": 0.0053852770162088155, "eval_wikibio_token_set_precision": 0.3389797088123273, "eval_wikibio_token_set_recall": 0.33577738209664915, "eval_wikibio_true_num_tokens": 61.1328125, "step": 257500 }, { "epoch": 49.44, "eval_nq_accuracy": 0.53878125, "eval_nq_bleu_score": 12.172312054505728, "eval_nq_bleu_score_sem": 0.4938659788837319, "eval_nq_emb_cos_sim": 0.83785080909729, "eval_nq_emb_cos_sim_sem": 0.007079568804868342, "eval_nq_emb_top1_equal": 0.296875, "eval_nq_emb_top1_equal_sem": 0.04054163310179599, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1025428771972656, "eval_nq_n_ngrams_match_1": 23.614, "eval_nq_n_ngrams_match_2": 8.724, "eval_nq_n_ngrams_match_3": 4.104, "eval_nq_num_pred_words": 48.942, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.186961904364422, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4597328167668949, "eval_nq_runtime": 14.1255, "eval_nq_samples_per_second": 35.397, "eval_nq_steps_per_second": 0.071, "eval_nq_token_set_f1": 0.4718487581369448, "eval_nq_token_set_f1_sem": 0.0050176425751479094, "eval_nq_token_set_precision": 0.43057553402009285, "eval_nq_token_set_recall": 0.5304307727196864, "eval_nq_true_num_tokens": 64.0, "step": 257500 }, { "epoch": 49.44, "learning_rate": 0.001, "loss": 2.4736, "step": 257508 }, { "epoch": 49.45, "learning_rate": 0.001, "loss": 2.4751, "step": 257520 }, { "epoch": 49.45, "learning_rate": 0.001, "loss": 2.4764, "step": 257532 }, { "epoch": 49.45, "learning_rate": 0.001, "loss": 2.4837, "step": 257544 }, { "epoch": 49.45, "learning_rate": 0.001, "loss": 2.4849, "step": 257556 }, { "epoch": 49.46, "learning_rate": 0.001, "loss": 2.4792, "step": 257568 }, { "epoch": 49.46, "learning_rate": 0.001, "loss": 2.4751, "step": 257580 }, { "epoch": 49.46, "learning_rate": 0.001, "loss": 2.4884, "step": 257592 }, { "epoch": 49.46, "learning_rate": 0.001, "loss": 2.4783, "step": 257604 }, { "epoch": 49.47, "learning_rate": 0.001, "loss": 2.4853, "step": 257616 }, { "epoch": 49.47, "learning_rate": 0.001, "loss": 2.475, "step": 257628 }, { "epoch": 49.47, "learning_rate": 0.001, "loss": 2.4766, "step": 257640 }, { "epoch": 49.47, "learning_rate": 0.001, "loss": 2.4839, "step": 257652 }, { "epoch": 49.47, "learning_rate": 0.001, "loss": 2.4791, "step": 257664 }, { "epoch": 49.48, "learning_rate": 0.001, "loss": 2.4771, "step": 257676 }, { "epoch": 49.48, "learning_rate": 0.001, "loss": 2.4871, "step": 257688 }, { "epoch": 49.48, "learning_rate": 0.001, "loss": 2.4859, "step": 257700 }, { "epoch": 49.48, "learning_rate": 0.001, "loss": 2.4857, "step": 257712 }, { "epoch": 49.49, "learning_rate": 0.001, "loss": 2.486, "step": 257724 }, { "epoch": 49.49, "learning_rate": 0.001, "loss": 2.4759, "step": 257736 }, { "epoch": 49.49, "learning_rate": 0.001, "loss": 2.4765, "step": 257748 }, { "epoch": 49.49, "learning_rate": 0.001, "loss": 2.4808, "step": 257760 }, { "epoch": 49.5, "learning_rate": 0.001, "loss": 2.4809, "step": 257772 }, { "epoch": 49.5, "learning_rate": 0.001, "loss": 2.4882, "step": 257784 }, { "epoch": 49.5, "learning_rate": 0.001, "loss": 2.489, "step": 257796 }, { "epoch": 49.5, "learning_rate": 0.001, "loss": 2.4774, "step": 257808 }, { "epoch": 49.5, "learning_rate": 0.001, "loss": 2.4856, "step": 257820 }, { "epoch": 49.51, "learning_rate": 0.001, "loss": 2.4882, "step": 257832 }, { "epoch": 49.51, "learning_rate": 0.001, "loss": 2.4732, "step": 257844 }, { "epoch": 49.51, "learning_rate": 0.001, "loss": 2.4754, "step": 257856 }, { "epoch": 49.51, "learning_rate": 0.001, "loss": 2.4673, "step": 257868 }, { "epoch": 49.52, "learning_rate": 0.001, "loss": 2.4715, "step": 257880 }, { "epoch": 49.52, "learning_rate": 0.001, "loss": 2.487, "step": 257892 }, { "epoch": 49.52, "learning_rate": 0.001, "loss": 2.4833, "step": 257904 }, { "epoch": 49.52, "learning_rate": 0.001, "loss": 2.4784, "step": 257916 }, { "epoch": 49.53, "learning_rate": 0.001, "loss": 2.4795, "step": 257928 }, { "epoch": 49.53, "learning_rate": 0.001, "loss": 2.4837, "step": 257940 }, { "epoch": 49.53, "learning_rate": 0.001, "loss": 2.4854, "step": 257952 }, { "epoch": 49.53, "learning_rate": 0.001, "loss": 2.4762, "step": 257964 }, { "epoch": 49.53, "learning_rate": 0.001, "loss": 2.4646, "step": 257976 }, { "epoch": 49.54, "learning_rate": 0.001, "loss": 2.485, "step": 257988 }, { "epoch": 49.54, "learning_rate": 0.001, "loss": 2.4802, "step": 258000 }, { "epoch": 49.54, "learning_rate": 0.001, "loss": 2.4811, "step": 258012 }, { "epoch": 49.54, "learning_rate": 0.001, "loss": 2.4755, "step": 258024 }, { "epoch": 49.55, "learning_rate": 0.001, "loss": 2.4818, "step": 258036 }, { "epoch": 49.55, "learning_rate": 0.001, "loss": 2.4826, "step": 258048 }, { "epoch": 49.55, "learning_rate": 0.001, "loss": 2.4782, "step": 258060 }, { "epoch": 49.55, "learning_rate": 0.001, "loss": 2.4872, "step": 258072 }, { "epoch": 49.56, "learning_rate": 0.001, "loss": 2.4815, "step": 258084 }, { "epoch": 49.56, "learning_rate": 0.001, "loss": 2.487, "step": 258096 }, { "epoch": 49.56, "learning_rate": 0.001, "loss": 2.4803, "step": 258108 }, { "epoch": 49.56, "learning_rate": 0.001, "loss": 2.4834, "step": 258120 }, { "epoch": 49.56, "eval_ag_news_accuracy": 0.33271875, "eval_ag_news_bleu_score": 5.045128992550928, "eval_ag_news_bleu_score_sem": 0.1529287698263182, "eval_ag_news_emb_cos_sim": 0.8254880905151367, "eval_ag_news_emb_cos_sim_sem": 0.00673620106422812, "eval_ag_news_emb_top1_equal": 0.2890625, "eval_ag_news_emb_top1_equal_sem": 0.04022626667363519, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.440908670425415, "eval_ag_news_n_ngrams_match_1": 14.736, "eval_ag_news_n_ngrams_match_2": 3.296, "eval_ag_news_n_ngrams_match_3": 0.944, "eval_ag_news_num_pred_words": 47.0, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.215309714010427, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.3667432944430328, "eval_ag_news_runtime": 17.6279, "eval_ag_news_samples_per_second": 28.364, "eval_ag_news_steps_per_second": 0.057, "eval_ag_news_token_set_f1": 0.3644557274858799, "eval_ag_news_token_set_f1_sem": 0.004389778163579907, "eval_ag_news_token_set_precision": 0.3523713704177665, "eval_ag_news_token_set_recall": 0.39104628547716985, "eval_ag_news_true_num_tokens": 56.09375, "step": 258125 }, { "epoch": 49.56, "eval_anthropic_toxic_prompts_accuracy": 0.1185625, "eval_anthropic_toxic_prompts_bleu_score": 3.197570500488512, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12216970018893475, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6847611665725708, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.01041867263447916, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.0703125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.022687306110270106, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.142594814300537, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.346, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.008, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.75, "eval_anthropic_toxic_prompts_num_pred_words": 47.774, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.163894950038728, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.216561625281712, "eval_anthropic_toxic_prompts_runtime": 19.4697, "eval_anthropic_toxic_prompts_samples_per_second": 25.681, "eval_anthropic_toxic_prompts_steps_per_second": 0.051, "eval_anthropic_toxic_prompts_token_set_f1": 0.36554749152382593, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006791511000486857, "eval_anthropic_toxic_prompts_token_set_precision": 0.4515452745725378, "eval_anthropic_toxic_prompts_token_set_recall": 0.3347656964259751, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 258125 }, { "epoch": 49.56, "eval_arxiv_accuracy": 0.35440625, "eval_arxiv_bleu_score": 4.444683078472756, "eval_arxiv_bleu_score_sem": 0.13296104559803884, "eval_arxiv_emb_cos_sim": 0.7827038764953613, "eval_arxiv_emb_cos_sim_sem": 0.006676158927257304, "eval_arxiv_emb_top1_equal": 0.28125, "eval_arxiv_emb_top1_equal_sem": 0.039896367485272234, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.307297945022583, "eval_arxiv_n_ngrams_match_1": 15.374, "eval_arxiv_n_ngrams_match_2": 3.066, "eval_arxiv_n_ngrams_match_3": 0.7, "eval_arxiv_num_pred_words": 40.528, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.311229238118145, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.36843238097775527, "eval_arxiv_runtime": 21.8423, "eval_arxiv_samples_per_second": 22.891, "eval_arxiv_steps_per_second": 0.046, "eval_arxiv_token_set_f1": 0.3612567192170928, "eval_arxiv_token_set_f1_sem": 0.00428581343570969, "eval_arxiv_token_set_precision": 0.3151330291346013, "eval_arxiv_token_set_recall": 0.43962445118036747, "eval_arxiv_true_num_tokens": 64.0, "step": 258125 }, { "epoch": 49.56, "eval_python_code_alpaca_accuracy": 0.1643125, "eval_python_code_alpaca_bleu_score": 4.831610690207038, "eval_python_code_alpaca_bleu_score_sem": 0.1472396786805451, "eval_python_code_alpaca_emb_cos_sim": 0.7790913581848145, "eval_python_code_alpaca_emb_cos_sim_sem": 0.00708565770738915, "eval_python_code_alpaca_emb_top1_equal": 0.203125, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03570055125142555, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8174359798431396, "eval_python_code_alpaca_n_ngrams_match_1": 10.196, "eval_python_code_alpaca_n_ngrams_match_2": 3.06, "eval_python_code_alpaca_n_ngrams_match_3": 1.036, "eval_python_code_alpaca_num_pred_words": 43.82, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.733889588925756, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3475238271830434, "eval_python_code_alpaca_runtime": 15.5486, "eval_python_code_alpaca_samples_per_second": 32.157, "eval_python_code_alpaca_steps_per_second": 0.064, "eval_python_code_alpaca_token_set_f1": 0.4887089110297281, "eval_python_code_alpaca_token_set_f1_sem": 0.005312195430781894, "eval_python_code_alpaca_token_set_precision": 0.5612977019349719, "eval_python_code_alpaca_token_set_recall": 0.4552000565625454, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 258125 }, { "epoch": 49.56, "eval_wikibio_accuracy": 0.3281875, "eval_wikibio_bleu_score": 5.823480451513563, "eval_wikibio_bleu_score_sem": 0.20514314672935266, "eval_wikibio_emb_cos_sim": 0.7469199895858765, "eval_wikibio_emb_cos_sim_sem": 0.00945570604951381, "eval_wikibio_emb_top1_equal": 0.203125, "eval_wikibio_emb_top1_equal_sem": 0.03570055125142555, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6378750801086426, "eval_wikibio_n_ngrams_match_1": 9.896, "eval_wikibio_n_ngrams_match_2": 3.324, "eval_wikibio_n_ngrams_match_3": 1.204, "eval_wikibio_num_pred_words": 35.426, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 38.010980560703274, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.35008114938468204, "eval_wikibio_runtime": 30.5772, "eval_wikibio_samples_per_second": 16.352, "eval_wikibio_steps_per_second": 0.033, "eval_wikibio_token_set_f1": 0.3117331201833853, "eval_wikibio_token_set_f1_sem": 0.0058107660718428435, "eval_wikibio_token_set_precision": 0.32022907286668956, "eval_wikibio_token_set_recall": 0.32288410213510443, "eval_wikibio_true_num_tokens": 61.1328125, "step": 258125 }, { "epoch": 49.56, "eval_nq_accuracy": 0.54128125, "eval_nq_bleu_score": 12.260514324446934, "eval_nq_bleu_score_sem": 0.49662803297191177, "eval_nq_emb_cos_sim": 0.8366309404373169, "eval_nq_emb_cos_sim_sem": 0.007653960544353982, "eval_nq_emb_top1_equal": 0.2265625, "eval_nq_emb_top1_equal_sem": 0.03714537682851538, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.09952974319458, "eval_nq_n_ngrams_match_1": 23.684, "eval_nq_n_ngrams_match_2": 8.744, "eval_nq_n_ngrams_match_3": 4.088, "eval_nq_num_pred_words": 49.208, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.162330618389909, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.45968197880089, "eval_nq_runtime": 18.5772, "eval_nq_samples_per_second": 26.915, "eval_nq_steps_per_second": 0.054, "eval_nq_token_set_f1": 0.4712345158338733, "eval_nq_token_set_f1_sem": 0.005061503177211867, "eval_nq_token_set_precision": 0.43193471309365156, "eval_nq_token_set_recall": 0.5267336983653503, "eval_nq_true_num_tokens": 64.0, "step": 258125 }, { "epoch": 49.56, "learning_rate": 0.001, "loss": 2.4889, "step": 258132 }, { "epoch": 49.57, "learning_rate": 0.001, "loss": 2.4841, "step": 258144 }, { "epoch": 49.57, "learning_rate": 0.001, "loss": 2.4868, "step": 258156 }, { "epoch": 49.57, "learning_rate": 0.001, "loss": 2.4856, "step": 258168 }, { "epoch": 49.57, "learning_rate": 0.001, "loss": 2.4803, "step": 258180 }, { "epoch": 49.58, "learning_rate": 0.001, "loss": 2.4734, "step": 258192 }, { "epoch": 49.58, "learning_rate": 0.001, "loss": 2.4789, "step": 258204 }, { "epoch": 49.58, "learning_rate": 0.001, "loss": 2.4872, "step": 258216 }, { "epoch": 49.58, "learning_rate": 0.001, "loss": 2.4856, "step": 258228 }, { "epoch": 49.59, "learning_rate": 0.001, "loss": 2.4905, "step": 258240 }, { "epoch": 49.59, "learning_rate": 0.001, "loss": 2.4752, "step": 258252 }, { "epoch": 49.59, "learning_rate": 0.001, "loss": 2.4804, "step": 258264 }, { "epoch": 49.59, "learning_rate": 0.001, "loss": 2.4729, "step": 258276 }, { "epoch": 49.59, "learning_rate": 0.001, "loss": 2.4853, "step": 258288 }, { "epoch": 49.6, "learning_rate": 0.001, "loss": 2.4837, "step": 258300 }, { "epoch": 49.6, "learning_rate": 0.001, "loss": 2.4838, "step": 258312 }, { "epoch": 49.6, "learning_rate": 0.001, "loss": 2.4857, "step": 258324 }, { "epoch": 49.6, "learning_rate": 0.001, "loss": 2.4845, "step": 258336 }, { "epoch": 49.61, "learning_rate": 0.001, "loss": 2.4777, "step": 258348 }, { "epoch": 49.61, "learning_rate": 0.001, "loss": 2.4761, "step": 258360 }, { "epoch": 49.61, "learning_rate": 0.001, "loss": 2.4885, "step": 258372 }, { "epoch": 49.61, "learning_rate": 0.001, "loss": 2.4848, "step": 258384 }, { "epoch": 49.62, "learning_rate": 0.001, "loss": 2.4886, "step": 258396 }, { "epoch": 49.62, "learning_rate": 0.001, "loss": 2.497, "step": 258408 }, { "epoch": 49.62, "learning_rate": 0.001, "loss": 2.4857, "step": 258420 }, { "epoch": 49.62, "learning_rate": 0.001, "loss": 2.4783, "step": 258432 }, { "epoch": 49.62, "learning_rate": 0.001, "loss": 2.4813, "step": 258444 }, { "epoch": 49.63, "learning_rate": 0.001, "loss": 2.4888, "step": 258456 }, { "epoch": 49.63, "learning_rate": 0.001, "loss": 2.4778, "step": 258468 }, { "epoch": 49.63, "learning_rate": 0.001, "loss": 2.4707, "step": 258480 }, { "epoch": 49.63, "learning_rate": 0.001, "loss": 2.4848, "step": 258492 }, { "epoch": 49.64, "learning_rate": 0.001, "loss": 2.4858, "step": 258504 }, { "epoch": 49.64, "learning_rate": 0.001, "loss": 2.4857, "step": 258516 }, { "epoch": 49.64, "learning_rate": 0.001, "loss": 2.4893, "step": 258528 }, { "epoch": 49.64, "learning_rate": 0.001, "loss": 2.4764, "step": 258540 }, { "epoch": 49.65, "learning_rate": 0.001, "loss": 2.4822, "step": 258552 }, { "epoch": 49.65, "learning_rate": 0.001, "loss": 2.4883, "step": 258564 }, { "epoch": 49.65, "learning_rate": 0.001, "loss": 2.4755, "step": 258576 }, { "epoch": 49.65, "learning_rate": 0.001, "loss": 2.4718, "step": 258588 }, { "epoch": 49.65, "learning_rate": 0.001, "loss": 2.4801, "step": 258600 }, { "epoch": 49.66, "learning_rate": 0.001, "loss": 2.4772, "step": 258612 }, { "epoch": 49.66, "learning_rate": 0.001, "loss": 2.4808, "step": 258624 }, { "epoch": 49.66, "learning_rate": 0.001, "loss": 2.491, "step": 258636 }, { "epoch": 49.66, "learning_rate": 0.001, "loss": 2.4972, "step": 258648 }, { "epoch": 49.67, "learning_rate": 0.001, "loss": 2.4771, "step": 258660 }, { "epoch": 49.67, "learning_rate": 0.001, "loss": 2.4763, "step": 258672 }, { "epoch": 49.67, "learning_rate": 0.001, "loss": 2.4794, "step": 258684 }, { "epoch": 49.67, "learning_rate": 0.001, "loss": 2.4825, "step": 258696 }, { "epoch": 49.68, "learning_rate": 0.001, "loss": 2.4757, "step": 258708 }, { "epoch": 49.68, "learning_rate": 0.001, "loss": 2.4932, "step": 258720 }, { "epoch": 49.68, "learning_rate": 0.001, "loss": 2.4871, "step": 258732 }, { "epoch": 49.68, "learning_rate": 0.001, "loss": 2.489, "step": 258744 }, { "epoch": 49.68, "eval_ag_news_accuracy": 0.3313125, "eval_ag_news_bleu_score": 5.026388187917658, "eval_ag_news_bleu_score_sem": 0.15984461876813788, "eval_ag_news_emb_cos_sim": 0.8151041269302368, "eval_ag_news_emb_cos_sim_sem": 0.007203880934551224, "eval_ag_news_emb_top1_equal": 0.2109375, "eval_ag_news_emb_top1_equal_sem": 0.03620184850179216, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.4551503658294678, "eval_ag_news_n_ngrams_match_1": 14.622, "eval_ag_news_n_ngrams_match_2": 3.35, "eval_ag_news_n_ngrams_match_3": 0.968, "eval_ag_news_num_pred_words": 47.042, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.663049365025124, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36154966371497654, "eval_ag_news_runtime": 18.308, "eval_ag_news_samples_per_second": 27.31, "eval_ag_news_steps_per_second": 0.055, "eval_ag_news_token_set_f1": 0.3623605357325836, "eval_ag_news_token_set_f1_sem": 0.004426005081012672, "eval_ag_news_token_set_precision": 0.34952832898902786, "eval_ag_news_token_set_recall": 0.39070160009853905, "eval_ag_news_true_num_tokens": 56.09375, "step": 258750 }, { "epoch": 49.68, "eval_anthropic_toxic_prompts_accuracy": 0.118625, "eval_anthropic_toxic_prompts_bleu_score": 3.3133452280034077, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.12967709966693836, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6886178851127625, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.008527596883900493, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.1171875, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02854125312152025, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.141610860824585, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.398, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 2.04, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.792, "eval_anthropic_toxic_prompts_num_pred_words": 47.876, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 23.141113964638908, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21812681908562342, "eval_anthropic_toxic_prompts_runtime": 18.9633, "eval_anthropic_toxic_prompts_samples_per_second": 26.367, "eval_anthropic_toxic_prompts_steps_per_second": 0.053, "eval_anthropic_toxic_prompts_token_set_f1": 0.3671589512071575, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006774948452226139, "eval_anthropic_toxic_prompts_token_set_precision": 0.45554466526800835, "eval_anthropic_toxic_prompts_token_set_recall": 0.33826798163114935, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 258750 }, { "epoch": 49.68, "eval_arxiv_accuracy": 0.35453125, "eval_arxiv_bleu_score": 4.425632613346473, "eval_arxiv_bleu_score_sem": 0.1313574698536943, "eval_arxiv_emb_cos_sim": 0.7889786958694458, "eval_arxiv_emb_cos_sim_sem": 0.006259094129273244, "eval_arxiv_emb_top1_equal": 0.2890625, "eval_arxiv_emb_top1_equal_sem": 0.04022626667363519, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.316903829574585, "eval_arxiv_n_ngrams_match_1": 15.586, "eval_arxiv_n_ngrams_match_2": 3.072, "eval_arxiv_n_ngrams_match_3": 0.668, "eval_arxiv_num_pred_words": 40.736, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.574841842249207, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.372673177400283, "eval_arxiv_runtime": 15.0144, "eval_arxiv_samples_per_second": 33.301, "eval_arxiv_steps_per_second": 0.067, "eval_arxiv_token_set_f1": 0.3663227317283526, "eval_arxiv_token_set_f1_sem": 0.004295528750382863, "eval_arxiv_token_set_precision": 0.3185767354422295, "eval_arxiv_token_set_recall": 0.4488085183419965, "eval_arxiv_true_num_tokens": 64.0, "step": 258750 }, { "epoch": 49.68, "eval_python_code_alpaca_accuracy": 0.1651875, "eval_python_code_alpaca_bleu_score": 4.7027181483117, "eval_python_code_alpaca_bleu_score_sem": 0.15003966165738836, "eval_python_code_alpaca_emb_cos_sim": 0.7729647159576416, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007068172692157701, "eval_python_code_alpaca_emb_top1_equal": 0.1640625, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03286167651298939, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.80320143699646, "eval_python_code_alpaca_n_ngrams_match_1": 10.058, "eval_python_code_alpaca_n_ngrams_match_2": 2.944, "eval_python_code_alpaca_n_ngrams_match_3": 1.004, "eval_python_code_alpaca_num_pred_words": 43.774, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.497377633893844, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.3432791431731794, "eval_python_code_alpaca_runtime": 19.2727, "eval_python_code_alpaca_samples_per_second": 25.943, "eval_python_code_alpaca_steps_per_second": 0.052, "eval_python_code_alpaca_token_set_f1": 0.4815326733307023, "eval_python_code_alpaca_token_set_f1_sem": 0.005268090701621846, "eval_python_code_alpaca_token_set_precision": 0.5475949658199415, "eval_python_code_alpaca_token_set_recall": 0.4494336779185127, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 258750 }, { "epoch": 49.68, "eval_wikibio_accuracy": 0.3321875, "eval_wikibio_bleu_score": 6.285614792164227, "eval_wikibio_bleu_score_sem": 0.21793175034967222, "eval_wikibio_emb_cos_sim": 0.7413852214813232, "eval_wikibio_emb_cos_sim_sem": 0.010085725702133258, "eval_wikibio_emb_top1_equal": 0.25, "eval_wikibio_emb_top1_equal_sem": 0.03842366440207048, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.5919065475463867, "eval_wikibio_n_ngrams_match_1": 10.02, "eval_wikibio_n_ngrams_match_2": 3.498, "eval_wikibio_n_ngrams_match_3": 1.346, "eval_wikibio_num_pred_words": 35.98, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.303223810948815, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3532924181443103, "eval_wikibio_runtime": 14.1132, "eval_wikibio_samples_per_second": 35.428, "eval_wikibio_steps_per_second": 0.071, "eval_wikibio_token_set_f1": 0.320521546391255, "eval_wikibio_token_set_f1_sem": 0.005959177613992733, "eval_wikibio_token_set_precision": 0.32700298192726723, "eval_wikibio_token_set_recall": 0.3307985375555874, "eval_wikibio_true_num_tokens": 61.1328125, "step": 258750 }, { "epoch": 49.68, "eval_nq_accuracy": 0.5409375, "eval_nq_bleu_score": 12.177007682206469, "eval_nq_bleu_score_sem": 0.4982037183398605, "eval_nq_emb_cos_sim": 0.8359046578407288, "eval_nq_emb_cos_sim_sem": 0.007212094301654784, "eval_nq_emb_top1_equal": 0.3125, "eval_nq_emb_top1_equal_sem": 0.041130074229814934, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1012213230133057, "eval_nq_n_ngrams_match_1": 23.598, "eval_nq_n_ngrams_match_2": 8.808, "eval_nq_n_ngrams_match_3": 4.068, "eval_nq_num_pred_words": 49.248, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.176149536744276, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.456941885633945, "eval_nq_runtime": 14.5098, "eval_nq_samples_per_second": 34.459, "eval_nq_steps_per_second": 0.069, "eval_nq_token_set_f1": 0.4701322877336389, "eval_nq_token_set_f1_sem": 0.004956829214956096, "eval_nq_token_set_precision": 0.4292128193509934, "eval_nq_token_set_recall": 0.5278939867536141, "eval_nq_true_num_tokens": 64.0, "step": 258750 }, { "epoch": 49.68, "learning_rate": 0.001, "loss": 2.483, "step": 258756 }, { "epoch": 49.69, "learning_rate": 0.001, "loss": 2.4855, "step": 258768 }, { "epoch": 49.69, "learning_rate": 0.001, "loss": 2.4879, "step": 258780 }, { "epoch": 49.69, "learning_rate": 0.001, "loss": 2.4785, "step": 258792 }, { "epoch": 49.69, "learning_rate": 0.001, "loss": 2.4774, "step": 258804 }, { "epoch": 49.7, "learning_rate": 0.001, "loss": 2.4881, "step": 258816 }, { "epoch": 49.7, "learning_rate": 0.001, "loss": 2.4975, "step": 258828 }, { "epoch": 49.7, "learning_rate": 0.001, "loss": 2.4766, "step": 258840 }, { "epoch": 49.7, "learning_rate": 0.001, "loss": 2.4799, "step": 258852 }, { "epoch": 49.71, "learning_rate": 0.001, "loss": 2.4936, "step": 258864 }, { "epoch": 49.71, "learning_rate": 0.001, "loss": 2.4829, "step": 258876 }, { "epoch": 49.71, "learning_rate": 0.001, "loss": 2.4837, "step": 258888 }, { "epoch": 49.71, "learning_rate": 0.001, "loss": 2.4774, "step": 258900 }, { "epoch": 49.71, "learning_rate": 0.001, "loss": 2.4945, "step": 258912 }, { "epoch": 49.72, "learning_rate": 0.001, "loss": 2.4826, "step": 258924 }, { "epoch": 49.72, "learning_rate": 0.001, "loss": 2.4836, "step": 258936 }, { "epoch": 49.72, "learning_rate": 0.001, "loss": 2.4822, "step": 258948 }, { "epoch": 49.72, "learning_rate": 0.001, "loss": 2.4884, "step": 258960 }, { "epoch": 49.73, "learning_rate": 0.001, "loss": 2.4926, "step": 258972 }, { "epoch": 49.73, "learning_rate": 0.001, "loss": 2.4903, "step": 258984 }, { "epoch": 49.73, "learning_rate": 0.001, "loss": 2.4931, "step": 258996 }, { "epoch": 49.73, "learning_rate": 0.001, "loss": 2.4871, "step": 259008 }, { "epoch": 49.74, "learning_rate": 0.001, "loss": 2.4821, "step": 259020 }, { "epoch": 49.74, "learning_rate": 0.001, "loss": 2.4979, "step": 259032 }, { "epoch": 49.74, "learning_rate": 0.001, "loss": 2.4824, "step": 259044 }, { "epoch": 49.74, "learning_rate": 0.001, "loss": 2.49, "step": 259056 }, { "epoch": 49.74, "learning_rate": 0.001, "loss": 2.4857, "step": 259068 }, { "epoch": 49.75, "learning_rate": 0.001, "loss": 2.4773, "step": 259080 }, { "epoch": 49.75, "learning_rate": 0.001, "loss": 2.4712, "step": 259092 }, { "epoch": 49.75, "learning_rate": 0.001, "loss": 2.4902, "step": 259104 }, { "epoch": 49.75, "learning_rate": 0.001, "loss": 2.4849, "step": 259116 }, { "epoch": 49.76, "learning_rate": 0.001, "loss": 2.4862, "step": 259128 }, { "epoch": 49.76, "learning_rate": 0.001, "loss": 2.4856, "step": 259140 }, { "epoch": 49.76, "learning_rate": 0.001, "loss": 2.4901, "step": 259152 }, { "epoch": 49.76, "learning_rate": 0.001, "loss": 2.4776, "step": 259164 }, { "epoch": 49.76, "learning_rate": 0.001, "loss": 2.4766, "step": 259176 }, { "epoch": 49.77, "learning_rate": 0.001, "loss": 2.4835, "step": 259188 }, { "epoch": 49.77, "learning_rate": 0.001, "loss": 2.4888, "step": 259200 }, { "epoch": 49.77, "learning_rate": 0.001, "loss": 2.4813, "step": 259212 }, { "epoch": 49.77, "learning_rate": 0.001, "loss": 2.4942, "step": 259224 }, { "epoch": 49.78, "learning_rate": 0.001, "loss": 2.4833, "step": 259236 }, { "epoch": 49.78, "learning_rate": 0.001, "loss": 2.488, "step": 259248 }, { "epoch": 49.78, "learning_rate": 0.001, "loss": 2.4824, "step": 259260 }, { "epoch": 49.78, "learning_rate": 0.001, "loss": 2.4831, "step": 259272 }, { "epoch": 49.79, "learning_rate": 0.001, "loss": 2.4793, "step": 259284 }, { "epoch": 49.79, "learning_rate": 0.001, "loss": 2.4885, "step": 259296 }, { "epoch": 49.79, "learning_rate": 0.001, "loss": 2.4951, "step": 259308 }, { "epoch": 49.79, "learning_rate": 0.001, "loss": 2.481, "step": 259320 }, { "epoch": 49.79, "learning_rate": 0.001, "loss": 2.4881, "step": 259332 }, { "epoch": 49.8, "learning_rate": 0.001, "loss": 2.4745, "step": 259344 }, { "epoch": 49.8, "learning_rate": 0.001, "loss": 2.4892, "step": 259356 }, { "epoch": 49.8, "learning_rate": 0.001, "loss": 2.4896, "step": 259368 }, { "epoch": 49.8, "eval_ag_news_accuracy": 0.33125, "eval_ag_news_bleu_score": 4.9935354565193615, "eval_ag_news_bleu_score_sem": 0.15510571057256645, "eval_ag_news_emb_cos_sim": 0.8198114633560181, "eval_ag_news_emb_cos_sim_sem": 0.006765341658706355, "eval_ag_news_emb_top1_equal": 0.234375, "eval_ag_news_emb_top1_equal_sem": 0.03758909358128201, "eval_ag_news_exact_match": 0.0, "eval_ag_news_exact_match_sem": 0.0, "eval_ag_news_loss": 3.45509934425354, "eval_ag_news_n_ngrams_match_1": 14.532, "eval_ag_news_n_ngrams_match_2": 3.326, "eval_ag_news_n_ngrams_match_3": 0.92, "eval_ag_news_num_pred_words": 47.01, "eval_ag_news_num_true_words": 39.994, "eval_ag_news_perplexity": 31.661433907559786, "eval_ag_news_pred_num_tokens": 63.0, "eval_ag_news_rouge_score": 0.36059860284351575, "eval_ag_news_runtime": 14.4972, "eval_ag_news_samples_per_second": 34.489, "eval_ag_news_steps_per_second": 0.069, "eval_ag_news_token_set_f1": 0.36093227554643503, "eval_ag_news_token_set_f1_sem": 0.004427950073237712, "eval_ag_news_token_set_precision": 0.34603615110360586, "eval_ag_news_token_set_recall": 0.3929845565245582, "eval_ag_news_true_num_tokens": 56.09375, "step": 259375 }, { "epoch": 49.8, "eval_anthropic_toxic_prompts_accuracy": 0.1176875, "eval_anthropic_toxic_prompts_bleu_score": 3.253135907178858, "eval_anthropic_toxic_prompts_bleu_score_sem": 0.11851330226531899, "eval_anthropic_toxic_prompts_emb_cos_sim": 0.6837367415428162, "eval_anthropic_toxic_prompts_emb_cos_sim_sem": 0.00865816518640921, "eval_anthropic_toxic_prompts_emb_top1_equal": 0.125, "eval_anthropic_toxic_prompts_emb_top1_equal_sem": 0.02934655822437397, "eval_anthropic_toxic_prompts_exact_match": 0.0, "eval_anthropic_toxic_prompts_exact_match_sem": 0.0, "eval_anthropic_toxic_prompts_loss": 3.197890520095825, "eval_anthropic_toxic_prompts_n_ngrams_match_1": 6.346, "eval_anthropic_toxic_prompts_n_ngrams_match_2": 1.998, "eval_anthropic_toxic_prompts_n_ngrams_match_3": 0.782, "eval_anthropic_toxic_prompts_num_pred_words": 47.42, "eval_anthropic_toxic_prompts_num_true_words": 14.584, "eval_anthropic_toxic_prompts_perplexity": 24.480833863019242, "eval_anthropic_toxic_prompts_pred_num_tokens": 63.0, "eval_anthropic_toxic_prompts_rouge_score": 0.21850487522570472, "eval_anthropic_toxic_prompts_runtime": 14.4457, "eval_anthropic_toxic_prompts_samples_per_second": 34.612, "eval_anthropic_toxic_prompts_steps_per_second": 0.069, "eval_anthropic_toxic_prompts_token_set_f1": 0.3598482503018601, "eval_anthropic_toxic_prompts_token_set_f1_sem": 0.006412825510789877, "eval_anthropic_toxic_prompts_token_set_precision": 0.4436025870010036, "eval_anthropic_toxic_prompts_token_set_recall": 0.32932732711768936, "eval_anthropic_toxic_prompts_true_num_tokens": 17.8515625, "step": 259375 }, { "epoch": 49.8, "eval_arxiv_accuracy": 0.354375, "eval_arxiv_bleu_score": 4.40920830892549, "eval_arxiv_bleu_score_sem": 0.13379961042053165, "eval_arxiv_emb_cos_sim": 0.7760727405548096, "eval_arxiv_emb_cos_sim_sem": 0.006924069324376414, "eval_arxiv_emb_top1_equal": 0.3046875, "eval_arxiv_emb_top1_equal_sem": 0.04084279867618665, "eval_arxiv_exact_match": 0.0, "eval_arxiv_exact_match_sem": 0.0, "eval_arxiv_loss": 3.3212099075317383, "eval_arxiv_n_ngrams_match_1": 15.464, "eval_arxiv_n_ngrams_match_2": 3.02, "eval_arxiv_n_ngrams_match_3": 0.674, "eval_arxiv_num_pred_words": 40.57, "eval_arxiv_num_true_words": 43.592, "eval_arxiv_perplexity": 27.693837278820542, "eval_arxiv_pred_num_tokens": 63.0, "eval_arxiv_rouge_score": 0.3711305269417794, "eval_arxiv_runtime": 14.6293, "eval_arxiv_samples_per_second": 34.178, "eval_arxiv_steps_per_second": 0.068, "eval_arxiv_token_set_f1": 0.3646692453807495, "eval_arxiv_token_set_f1_sem": 0.004317602221869201, "eval_arxiv_token_set_precision": 0.3161505585548265, "eval_arxiv_token_set_recall": 0.44766296422486834, "eval_arxiv_true_num_tokens": 64.0, "step": 259375 }, { "epoch": 49.8, "eval_python_code_alpaca_accuracy": 0.164, "eval_python_code_alpaca_bleu_score": 4.690671898848345, "eval_python_code_alpaca_bleu_score_sem": 0.14029483247587066, "eval_python_code_alpaca_emb_cos_sim": 0.7661554217338562, "eval_python_code_alpaca_emb_cos_sim_sem": 0.007425235524385931, "eval_python_code_alpaca_emb_top1_equal": 0.171875, "eval_python_code_alpaca_emb_top1_equal_sem": 0.03347745514062371, "eval_python_code_alpaca_exact_match": 0.0, "eval_python_code_alpaca_exact_match_sem": 0.0, "eval_python_code_alpaca_loss": 2.8167998790740967, "eval_python_code_alpaca_n_ngrams_match_1": 9.954, "eval_python_code_alpaca_n_ngrams_match_2": 2.926, "eval_python_code_alpaca_n_ngrams_match_3": 1.0, "eval_python_code_alpaca_num_pred_words": 43.148, "eval_python_code_alpaca_num_true_words": 18.128, "eval_python_code_alpaca_perplexity": 16.723248533639687, "eval_python_code_alpaca_pred_num_tokens": 63.0, "eval_python_code_alpaca_rouge_score": 0.34367616277343627, "eval_python_code_alpaca_runtime": 13.8569, "eval_python_code_alpaca_samples_per_second": 36.083, "eval_python_code_alpaca_steps_per_second": 0.072, "eval_python_code_alpaca_token_set_f1": 0.47920856608859286, "eval_python_code_alpaca_token_set_f1_sem": 0.00520049240427338, "eval_python_code_alpaca_token_set_precision": 0.545327103607184, "eval_python_code_alpaca_token_set_recall": 0.44997447735357915, "eval_python_code_alpaca_true_num_tokens": 23.359375, "step": 259375 }, { "epoch": 49.8, "eval_wikibio_accuracy": 0.33165625, "eval_wikibio_bleu_score": 6.246138423663437, "eval_wikibio_bleu_score_sem": 0.22274419811649707, "eval_wikibio_emb_cos_sim": 0.7574476599693298, "eval_wikibio_emb_cos_sim_sem": 0.00904951184572302, "eval_wikibio_emb_top1_equal": 0.2265625, "eval_wikibio_emb_top1_equal_sem": 0.03714537682851538, "eval_wikibio_exact_match": 0.0, "eval_wikibio_exact_match_sem": 0.0, "eval_wikibio_loss": 3.6061134338378906, "eval_wikibio_n_ngrams_match_1": 10.386, "eval_wikibio_n_ngrams_match_2": 3.528, "eval_wikibio_n_ngrams_match_3": 1.322, "eval_wikibio_num_pred_words": 36.602, "eval_wikibio_num_true_words": 29.41, "eval_wikibio_perplexity": 36.82266063689858, "eval_wikibio_pred_num_tokens": 63.0, "eval_wikibio_rouge_score": 0.3647958944360802, "eval_wikibio_runtime": 13.4607, "eval_wikibio_samples_per_second": 37.145, "eval_wikibio_steps_per_second": 0.074, "eval_wikibio_token_set_f1": 0.3278060448010174, "eval_wikibio_token_set_f1_sem": 0.005511555343504312, "eval_wikibio_token_set_precision": 0.33910962220973623, "eval_wikibio_token_set_recall": 0.3326095488671017, "eval_wikibio_true_num_tokens": 61.1328125, "step": 259375 }, { "epoch": 49.8, "eval_nq_accuracy": 0.54090625, "eval_nq_bleu_score": 12.494611504861831, "eval_nq_bleu_score_sem": 0.5066426279855258, "eval_nq_emb_cos_sim": 0.8411279916763306, "eval_nq_emb_cos_sim_sem": 0.006997197399265355, "eval_nq_emb_top1_equal": 0.328125, "eval_nq_emb_top1_equal_sem": 0.041664103776406315, "eval_nq_exact_match": 0.0, "eval_nq_exact_match_sem": 0.0, "eval_nq_loss": 2.1031692028045654, "eval_nq_n_ngrams_match_1": 23.658, "eval_nq_n_ngrams_match_2": 8.892, "eval_nq_n_ngrams_match_3": 4.2, "eval_nq_num_pred_words": 49.114, "eval_nq_num_true_words": 49.824, "eval_nq_perplexity": 8.192091214392544, "eval_nq_pred_num_tokens": 63.0, "eval_nq_rouge_score": 0.4595867352122004, "eval_nq_runtime": 14.3062, "eval_nq_samples_per_second": 34.95, "eval_nq_steps_per_second": 0.07, "eval_nq_token_set_f1": 0.47280429920137734, "eval_nq_token_set_f1_sem": 0.004972447546981815, "eval_nq_token_set_precision": 0.431889686886597, "eval_nq_token_set_recall": 0.5302696249153123, "eval_nq_true_num_tokens": 64.0, "step": 259375 }, { "epoch": 49.8, "learning_rate": 0.001, "loss": 2.4856, "step": 259380 }, { "epoch": 49.81, "learning_rate": 0.001, "loss": 2.4897, "step": 259392 }, { "epoch": 49.81, "learning_rate": 0.001, "loss": 2.4851, "step": 259404 }, { "epoch": 49.81, "learning_rate": 0.001, "loss": 2.4956, "step": 259416 }, { "epoch": 49.81, "learning_rate": 0.001, "loss": 2.4822, "step": 259428 }, { "epoch": 49.82, "learning_rate": 0.001, "loss": 2.4881, "step": 259440 }, { "epoch": 49.82, "learning_rate": 0.001, "loss": 2.4893, "step": 259452 }, { "epoch": 49.82, "learning_rate": 0.001, "loss": 2.4687, "step": 259464 }, { "epoch": 49.82, "learning_rate": 0.001, "loss": 2.4813, "step": 259476 }, { "epoch": 49.82, "learning_rate": 0.001, "loss": 2.4905, "step": 259488 }, { "epoch": 49.83, "learning_rate": 0.001, "loss": 2.484, "step": 259500 }, { "epoch": 49.83, "learning_rate": 0.001, "loss": 2.4895, "step": 259512 }, { "epoch": 49.83, "learning_rate": 0.001, "loss": 2.4789, "step": 259524 }, { "epoch": 49.83, "learning_rate": 0.001, "loss": 2.4804, "step": 259536 }, { "epoch": 49.84, "learning_rate": 0.001, "loss": 2.4929, "step": 259548 }, { "epoch": 49.84, "learning_rate": 0.001, "loss": 2.4837, "step": 259560 }, { "epoch": 49.84, "learning_rate": 0.001, "loss": 2.4848, "step": 259572 }, { "epoch": 49.84, "learning_rate": 0.001, "loss": 2.4914, "step": 259584 }, { "epoch": 49.85, "learning_rate": 0.001, "loss": 2.4845, "step": 259596 }, { "epoch": 49.85, "learning_rate": 0.001, "loss": 2.4892, "step": 259608 }, { "epoch": 49.85, "learning_rate": 0.001, "loss": 2.4799, "step": 259620 }, { "epoch": 49.85, "learning_rate": 0.001, "loss": 2.486, "step": 259632 }, { "epoch": 49.85, "learning_rate": 0.001, "loss": 2.4845, "step": 259644 }, { "epoch": 49.86, "learning_rate": 0.001, "loss": 2.4896, "step": 259656 }, { "epoch": 49.86, "learning_rate": 0.001, "loss": 2.4804, "step": 259668 }, { "epoch": 49.86, "learning_rate": 0.001, "loss": 2.4968, "step": 259680 }, { "epoch": 49.86, "learning_rate": 0.001, "loss": 2.4859, "step": 259692 }, { "epoch": 49.87, "learning_rate": 0.001, "loss": 2.4805, "step": 259704 }, { "epoch": 49.87, "learning_rate": 0.001, "loss": 2.4899, "step": 259716 }, { "epoch": 49.87, "learning_rate": 0.001, "loss": 2.4795, "step": 259728 }, { "epoch": 49.87, "learning_rate": 0.001, "loss": 2.4885, "step": 259740 }, { "epoch": 49.88, "learning_rate": 0.001, "loss": 2.4909, "step": 259752 }, { "epoch": 49.88, "learning_rate": 0.001, "loss": 2.4825, "step": 259764 }, { "epoch": 49.88, "learning_rate": 0.001, "loss": 2.482, "step": 259776 }, { "epoch": 49.88, "learning_rate": 0.001, "loss": 2.4869, "step": 259788 }, { "epoch": 49.88, "learning_rate": 0.001, "loss": 2.4828, "step": 259800 }, { "epoch": 49.89, "learning_rate": 0.001, "loss": 2.4728, "step": 259812 }, { "epoch": 49.89, "learning_rate": 0.001, "loss": 2.4868, "step": 259824 }, { "epoch": 49.89, "learning_rate": 0.001, "loss": 2.4823, "step": 259836 }, { "epoch": 49.89, "learning_rate": 0.001, "loss": 2.4937, "step": 259848 }, { "epoch": 49.9, "learning_rate": 0.001, "loss": 2.4793, "step": 259860 }, { "epoch": 49.9, "learning_rate": 0.001, "loss": 2.4939, "step": 259872 }, { "epoch": 49.9, "learning_rate": 0.001, "loss": 2.4843, "step": 259884 }, { "epoch": 49.9, "learning_rate": 0.001, "loss": 2.4732, "step": 259896 }, { "epoch": 49.91, "learning_rate": 0.001, "loss": 2.4932, "step": 259908 }, { "epoch": 49.91, "learning_rate": 0.001, "loss": 2.4767, "step": 259920 }, { "epoch": 49.91, "learning_rate": 0.001, "loss": 2.4781, "step": 259932 }, { "epoch": 49.91, "learning_rate": 0.001, "loss": 2.4872, "step": 259944 }, { "epoch": 49.91, "learning_rate": 0.001, "loss": 2.4894, "step": 259956 } ], "logging_steps": 12, "max_steps": 494760, "num_train_epochs": 95, "save_steps": 62, "total_flos": 1.3889668107139744e+19, "trial_name": null, "trial_params": null }