diff --git a/.gitattributes b/.gitattributes index a6344aac8c09253b3b630fb776ae94478aa0275b..52373fe24473b1aa44333d318f578ae6bf04b49b 100644 --- a/.gitattributes +++ b/.gitattributes @@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text *.zip filter=lfs diff=lfs merge=lfs -text *.zst filter=lfs diff=lfs merge=lfs -text *tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index 7b95401dc46245ac339fc25059d4a56d90b4cde5..459a5ef61b4c331d849ab94cb47d9951e94c35ad 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,43 @@ ---- -license: apache-2.0 ---- +--- +base_model: +- cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese +library_name: transformers +tags: +- mergekit +- merge + +--- +# SKYCAVE-R1-32B-v0.1 + +This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit). + +## Merge Details +### Merge Method + +This model was merged using the [Model Stock](https://arxiv.org/abs/2403.19522) merge method using [cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese](https://huggingface.co/cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese) as a base. + +### Models Merged + +The following models were included in the merge: +* SKYCAVE_element_QR_jp +* SKYCAVE_element_Sky_jp +* SKYCAVE_element_R1_jp_02 +* SKYCAVE_element_R1_jp_03 +* SKYCAVE_element_R1_jp_01 + +### Configuration + +The following YAML configuration was used to produce this model: + +```yaml +merge_method: model_stock +base_model: cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese +models: + - model: SKYCAVE_element_QR_jp + - model: SKYCAVE_element_R1_jp_01 + - model: SKYCAVE_element_R1_jp_02 + - model: SKYCAVE_element_R1_jp_03 + - model: SKYCAVE_element_Sky_jp +dtype: bfloat16 +name: SKYCAVE-R1-32B-v0.1 +``` diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..918c0efa58624129068e5aadb4f8636bcbb0fe8c --- /dev/null +++ b/config.json @@ -0,0 +1,29 @@ +{ + "_name_or_path": "cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese", + "architectures": [ + "Qwen2ForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 151643, + "eos_token_id": 151643, + "hidden_act": "silu", + "hidden_size": 5120, + "initializer_range": 0.02, + "intermediate_size": 27648, + "max_position_embeddings": 131072, + "max_window_layers": 64, + "model_type": "qwen2", + "num_attention_heads": 40, + "num_hidden_layers": 64, + "num_key_value_heads": 8, + "rms_norm_eps": 1e-05, + "rope_scaling": null, + "rope_theta": 1000000.0, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.48.1", + "use_cache": true, + "use_sliding_window": false, + "vocab_size": 152064 +} diff --git a/mergekit_config.yml b/mergekit_config.yml new file mode 100644 index 0000000000000000000000000000000000000000..ae07ac859029262ea605c4016cabbada16dcce57 --- /dev/null +++ b/mergekit_config.yml @@ -0,0 +1,10 @@ +merge_method: model_stock +base_model: cyberagent/DeepSeek-R1-Distill-Qwen-32B-Japanese +models: + - model: SKYCAVE_element_QR_jp + - model: SKYCAVE_element_R1_jp_01 + - model: SKYCAVE_element_R1_jp_02 + - model: SKYCAVE_element_R1_jp_03 + - model: SKYCAVE_element_Sky_jp +dtype: bfloat16 +name: SKYCAVE-R1-32B-v0.1 \ No newline at end of file diff --git a/model-00001-of-00066.safetensors b/model-00001-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..761f44f40c44cbfbb6fe942dec6a83c8359c441a --- /dev/null +++ b/model-00001-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60cb17732bf5453c578aec8a85234a3e9792e9560c25ff58cca7eeccb17025bb +size 1557135488 diff --git a/model-00002-of-00066.safetensors b/model-00002-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..668d6fba08cf4ad8290f232a07e08aaae9be4bbf --- /dev/null +++ b/model-00002-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c42d8e131511a390d6ed5393d50160d1970c49d94b185fd67c466c54513cb25 +size 1557135504 diff --git a/model-00003-of-00066.safetensors b/model-00003-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4b592742c11c9943bf772de7e5b0f7ea919e83c1 --- /dev/null +++ b/model-00003-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d607166f155ca8059fe9bb266721a1dbf7bd11d591c37996bd96d8c3d5a5b874 +size 975222224 diff --git a/model-00004-of-00066.safetensors b/model-00004-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1a6264e1bd8c8bb836699a82e2c458d2608372ad --- /dev/null +++ b/model-00004-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02a4f225052f0c0f373686522ec03bf4f929fc56ba27f6eca1fe5e712edb61ee +size 975211880 diff --git a/model-00005-of-00066.safetensors b/model-00005-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d06ed5ff541430f16294c2d981b48ba6dab28781 --- /dev/null +++ b/model-00005-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd000cd3642d8477715cdbf7835624fe8df9df77895719b3615bc42b4ff2e9f5 +size 975211888 diff --git a/model-00006-of-00066.safetensors b/model-00006-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e40a5f0934f002f2e2fe2334141e562763012959 --- /dev/null +++ b/model-00006-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64068918fa83a39c6d1657ccf07f5f5d6958b4d5fedca9f689db218b28b0ef8b +size 975211888 diff --git a/model-00007-of-00066.safetensors b/model-00007-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b34f1a98b65129339151333280527872c97da0fe --- /dev/null +++ b/model-00007-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:371342f86d49699744d64ba97a3e6f972de5a39f70cdc450b2f4a433df38fcfa +size 975211888 diff --git a/model-00008-of-00066.safetensors b/model-00008-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2790696b36c33c29044bb954381281af5dc79e17 --- /dev/null +++ b/model-00008-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f576b5e1c24d47925262e7f9b215733af7ea19a81d528ae9d815be25e8543ce +size 975211888 diff --git a/model-00009-of-00066.safetensors b/model-00009-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbe9e3a73f312db746ed63f3374d9763905dc73f --- /dev/null +++ b/model-00009-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a74bfa466ca46938a0b02145cc043384c0f4fd5de0dad30c699011a414b25ec +size 975211888 diff --git a/model-00010-of-00066.safetensors b/model-00010-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c0a546c6330275100c68538b7c4af2521c697b42 --- /dev/null +++ b/model-00010-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fb508223b0aa30dbc38e1333a470804d952a949424523987e32d626532d8d1dd +size 975211888 diff --git a/model-00011-of-00066.safetensors b/model-00011-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..12a0429bbc0a9beff0ac71eef8d068867f9f7638 --- /dev/null +++ b/model-00011-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:deb618daa0bede903575fc90e4d612338b90d69335e2035edf0df176cd7da952 +size 975211888 diff --git a/model-00012-of-00066.safetensors b/model-00012-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cd7a15b1a74c5057eb0743babb97920e86e269b2 --- /dev/null +++ b/model-00012-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ae793d23bab5ab64f1c73df0e3bcaa4840f94454396972cd9807fded4d1d387c +size 975211888 diff --git a/model-00013-of-00066.safetensors b/model-00013-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b7ef0a8ddca0f9da23d722acecd13418e241befd --- /dev/null +++ b/model-00013-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5116eaa67924fae32ce172d1035027bee904134ecfda09403fa143223372a4be +size 975211888 diff --git a/model-00014-of-00066.safetensors b/model-00014-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a38da374d432f5515e5c73f7ad769fd3620fc9e4 --- /dev/null +++ b/model-00014-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55aff65ae7d2b5f1004f1be321f0bb70c59dae34caa8f2236d994791698daea9 +size 975211888 diff --git a/model-00015-of-00066.safetensors b/model-00015-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2118cef45a001b18e14c9ec25834f483e5e22fa --- /dev/null +++ b/model-00015-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad640788ad498fefb7bfb03b636466508a7af714b1397362751a159f2cbb7db +size 975211880 diff --git a/model-00016-of-00066.safetensors b/model-00016-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b31b1fe0bfebc825c53bce15fb9b6bfc157c7a53 --- /dev/null +++ b/model-00016-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed1f4223b7a160aef96a12393d1303c7afb431bee223941be5d26ff14470b410 +size 975211888 diff --git a/model-00017-of-00066.safetensors b/model-00017-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4afd748f6a281a1aa67a351578067e9014dc8af5 --- /dev/null +++ b/model-00017-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f9db5fe8f8eeb013592a08aea334866170ae322237b144cb4dbe5462497a5f4 +size 975211888 diff --git a/model-00018-of-00066.safetensors b/model-00018-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6d5af51450a3b7b3d12b6ffe1244c88abf96b1d5 --- /dev/null +++ b/model-00018-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f905ac2cf3b4423e89f1655462499444d9e85e3e65e8553140c0f429c734fb93 +size 975211888 diff --git a/model-00019-of-00066.safetensors b/model-00019-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0441a7cbce894fae77b9ba2e6c4a61aead1ae54 --- /dev/null +++ b/model-00019-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aeacb399243e4e9650b48e357948296be56e824d31e2dcd464511b8a40e33790 +size 975211888 diff --git a/model-00020-of-00066.safetensors b/model-00020-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8c3778d8705333e3f1c3ca21d96c7d0e1347f95c --- /dev/null +++ b/model-00020-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6196d9c600073d5ceaa5d4ea56ae156cb0ccb2d590db3d4667a5cfa53131abff +size 975211888 diff --git a/model-00021-of-00066.safetensors b/model-00021-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e06223ec6d1c82702e66c71613a369416dabdb94 --- /dev/null +++ b/model-00021-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3792456741e6a9f631669ed0d2057d737aec5fe4238493f01e5a18d4957ff43d +size 975211888 diff --git a/model-00022-of-00066.safetensors b/model-00022-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..49a718e65ced40563ef8e44918dba2930255f080 --- /dev/null +++ b/model-00022-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cd68039d60c5f585c197bebacea127383b2b8e60cd0c09ee09631eddeb1ec60 +size 975211888 diff --git a/model-00023-of-00066.safetensors b/model-00023-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d1745c21761e9a25caf4bceadb39368b2dcd7b22 --- /dev/null +++ b/model-00023-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4b50b7b23450e9e500c3b6d37908f1ebffda2e37f660787427afe8da635665b +size 975211888 diff --git a/model-00024-of-00066.safetensors b/model-00024-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..503374ec5e261453fbcd3e7514d74771e493b781 --- /dev/null +++ b/model-00024-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fdfca6aca27311a8b7eab972fe6a3eed522435790092a16e43c08da6488478fa +size 975211888 diff --git a/model-00025-of-00066.safetensors b/model-00025-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..039e0e0112bf620f37bd99dc7937c44a5734036a --- /dev/null +++ b/model-00025-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a7e9601436ebbe376b461be61f239186955796051a3d6afe7d499ea1f93dead +size 975211888 diff --git a/model-00026-of-00066.safetensors b/model-00026-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dd853bbf02f701daebb2b14bb15a254c9c0d429a --- /dev/null +++ b/model-00026-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4d25f9fbe3fb9e2a10672cbcb13fa28002ec17a9c29e8a21454bc7490b75ce8 +size 975211880 diff --git a/model-00027-of-00066.safetensors b/model-00027-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c24c5672213313856eb9deb60ce11f466346c3c0 --- /dev/null +++ b/model-00027-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:91f96db731cd85b4d78257456812b5fe4f61eccb103069c1c7730e35f1df9874 +size 975211888 diff --git a/model-00028-of-00066.safetensors b/model-00028-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1ff943a4a4f9956832480faf36f635b925c38b4e --- /dev/null +++ b/model-00028-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42c9225f541002f0b071b2c0648bc5a74fc097997dbab3ae3c9c5f60f95d4497 +size 975211888 diff --git a/model-00029-of-00066.safetensors b/model-00029-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a12ee1b42d773a12a5a21bf91fd05be803317249 --- /dev/null +++ b/model-00029-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dffa18aa0277c9896d29e90843d7a8e7c9cf5e1f026cc0ab55ff288d623cb50a +size 975211888 diff --git a/model-00030-of-00066.safetensors b/model-00030-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c7228d58407a02b1031e049a91ecae3c231c7be --- /dev/null +++ b/model-00030-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ac451677ae976f316c196c70316b7bc9f4d08c5ab47615870757b31b7d7e7dc +size 975211888 diff --git a/model-00031-of-00066.safetensors b/model-00031-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66d7daa18b87c4e2466ff033d22f7c4490f410ef --- /dev/null +++ b/model-00031-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfaa3b5efe16028ab460eb00676fdc533a35ea39fb549a1279d5703a69d272fc +size 975211888 diff --git a/model-00032-of-00066.safetensors b/model-00032-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..60b53fb6983aaa2f73d7f906fdca180f212f028b --- /dev/null +++ b/model-00032-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26e00ea8f24e4a2a295b579dcbf02ee3d36f11f92b02375a164b7a4de6c7c42a +size 975211888 diff --git a/model-00033-of-00066.safetensors b/model-00033-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f4e6bc6e45f025bb071579033616e913537121d0 --- /dev/null +++ b/model-00033-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bbe4f5e350ef3a18d6692a92fa628e833bf98794d4002ee68a2f174ae277e289 +size 975211888 diff --git a/model-00034-of-00066.safetensors b/model-00034-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..68daa2e548ca41d1dff541bdf7af65f80d9a0ae0 --- /dev/null +++ b/model-00034-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f12d1080ce57aa00c64d5be536f2fc10a6346c05c3ec12cfac15ed12a062a18 +size 975211888 diff --git a/model-00035-of-00066.safetensors b/model-00035-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d5165b964fdae97367483523e0b35504df626400 --- /dev/null +++ b/model-00035-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a9f4da6f3c27dfea1cf504ad3fb1b80855733960cf33dafa405e78746c96e3 +size 975211888 diff --git a/model-00036-of-00066.safetensors b/model-00036-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..460a1f96cbd0dee58c44f8eea4633819384328ce --- /dev/null +++ b/model-00036-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b169a0c1e9048080b5b3828741221c25c88faab5eb85c5344621bfbd87799fff +size 975211888 diff --git a/model-00037-of-00066.safetensors b/model-00037-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c842dc133de54d500d90f55568fa430254041a5c --- /dev/null +++ b/model-00037-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad0b17f190c90498801094e1e1784fc689d50058c3eb66ec4726781baee51e93 +size 975211880 diff --git a/model-00038-of-00066.safetensors b/model-00038-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8fc2804ec782fd6f8a9e0b819728bfc3b4ba3d4a --- /dev/null +++ b/model-00038-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84ab4f4d27fd05ac7dd0b1f5a85c87fd9becb915ccdba870606ea0b50763a978 +size 975211888 diff --git a/model-00039-of-00066.safetensors b/model-00039-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5e5aaa91f5080a615499762996a179eaae7e30c --- /dev/null +++ b/model-00039-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee047b0a5e6a41446dcd8cbe06db31f78d23bb9d89987a38b823fd8ff050e812 +size 975211888 diff --git a/model-00040-of-00066.safetensors b/model-00040-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6bdd8fd8016f7960a10444bb33944ffeced96098 --- /dev/null +++ b/model-00040-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48fbccc9aa8141639efaf8ec91f66bfd678342822fc5de7a5492ca7eece98518 +size 975211888 diff --git a/model-00041-of-00066.safetensors b/model-00041-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..253b749d977c11a0e0e9c54f45b6bd158a887b2d --- /dev/null +++ b/model-00041-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a411d375432de6b1afdf410cac57c9d202effab5ee02c25b3514f60536e24405 +size 975211888 diff --git a/model-00042-of-00066.safetensors b/model-00042-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..932b649f29bdf9d3ad5fbaa4bec9841c4495aa97 --- /dev/null +++ b/model-00042-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:672ca5893815ce121c04354320ce5ffe7c9bf82bf71dabd94979b360f3b87b06 +size 975211888 diff --git a/model-00043-of-00066.safetensors b/model-00043-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..974eaaf3013bdecab59e34e663177b40cc2f59e7 --- /dev/null +++ b/model-00043-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fe1f4ba6706b903028332aa1328fdde01577a80f133dc0304578957837f3db7 +size 975211888 diff --git a/model-00044-of-00066.safetensors b/model-00044-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a47cacadff954a2fc33ca12f454552c5c4a48d17 --- /dev/null +++ b/model-00044-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22642172a7ecf2be6078cd59188ebed27a0764d93e71533282c81676f3b51207 +size 975211888 diff --git a/model-00045-of-00066.safetensors b/model-00045-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d093cfe84985a49aaed6b51464d8dae6c45231f6 --- /dev/null +++ b/model-00045-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3415776d7c5d6985e5833f3b55be51f3dfb1cd7d4e3a152f6ad8b5990f57ce46 +size 975211888 diff --git a/model-00046-of-00066.safetensors b/model-00046-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..730c2a9e662b4ba9df888054b1fef798d44aaf1e --- /dev/null +++ b/model-00046-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3138f3a2b100bf7d3575fce4f1db9255569709b2f85771d9bbe04beaf308217 +size 975211888 diff --git a/model-00047-of-00066.safetensors b/model-00047-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5382c86f6371b7501d65e947b802d1a3bdc2f4e8 --- /dev/null +++ b/model-00047-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0822949cce787318f5c008dad0ffd1b201e7cd1c2d88dbafa4349b3d6dea39c8 +size 975211888 diff --git a/model-00048-of-00066.safetensors b/model-00048-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f238b1dc260bfc549781d49b5cd8d48777ab5ace --- /dev/null +++ b/model-00048-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a830f83b6558941e4a8e6d5de1b4eb66a43ba3bc0375eabc1fc9239d82da08d +size 975211880 diff --git a/model-00049-of-00066.safetensors b/model-00049-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9cf78b6c95eb44a0a3d0fb0cf660089d3c1e5a38 --- /dev/null +++ b/model-00049-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9251731090b4f4de2e86ac6940a156bc64ff77ea58f6e657be50086865900de +size 975211888 diff --git a/model-00050-of-00066.safetensors b/model-00050-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ddd032b91b2b0672266a303cf1bc55b36073fd12 --- /dev/null +++ b/model-00050-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9005dd2d7341c01f17c336d7e17e79814801955e3dee821a8374cbbfe93030aa +size 975211888 diff --git a/model-00051-of-00066.safetensors b/model-00051-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d9fb6f7491e46b26134a0267fc6dfcb8644f57f --- /dev/null +++ b/model-00051-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:504f03c3cf334009c354eecdf2f3c307937842c6e31959af95e44069ccfda9ab +size 975211888 diff --git a/model-00052-of-00066.safetensors b/model-00052-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9527193f79335450def1aa87ed2e5d469f2e1fe --- /dev/null +++ b/model-00052-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f8330b3492216d0f5ac2c0c1bbf3a13ebf7232292b0a82608f10208aa33574 +size 975211888 diff --git a/model-00053-of-00066.safetensors b/model-00053-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..762fa96547fc59f8366633167e1fdcc2e2d08b2e --- /dev/null +++ b/model-00053-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e2aa51a27e2dcbc66eccd557abf0e075e579e0d8261c4626909dfb4fc4a39ef +size 975211888 diff --git a/model-00054-of-00066.safetensors b/model-00054-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2a8ebfe4c2816f45463884ed211e9d377f4c86b4 --- /dev/null +++ b/model-00054-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e50912c7141c59f741725207b487c22d119a0348784ded31eef0db7df16f12fb +size 975211888 diff --git a/model-00055-of-00066.safetensors b/model-00055-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..309ecb80b9ed99f32ea60a38855c2354ad7dbc34 --- /dev/null +++ b/model-00055-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:64d350edccf159243a2a2d57ab902a573772fb20a8faff5e0335cbc31ca56ac4 +size 975211888 diff --git a/model-00056-of-00066.safetensors b/model-00056-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e6c9d4c32b1680a7286501b16608a7d76facd3ae --- /dev/null +++ b/model-00056-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ee589df7d1756bd3fd8d62c691a4de257f89066b4f67cc4b86a89cfd0e0a4d3 +size 975211888 diff --git a/model-00057-of-00066.safetensors b/model-00057-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69ea0efd8c59bf5f5b8c6bacb75b955cf3ab13e2 --- /dev/null +++ b/model-00057-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:23e405ebafb8096872d046edf669648a2db939c639d3e32524dd1b88b175ed06 +size 975211888 diff --git a/model-00058-of-00066.safetensors b/model-00058-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f16ffa7cbb6025c9933a8faadbc9de64a9e43a8 --- /dev/null +++ b/model-00058-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62338e5ebe4eca135c089afdf0087d7e004accd9d1d3037ece188e0e844ae6a2 +size 975211888 diff --git a/model-00059-of-00066.safetensors b/model-00059-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..45a33587748277019d5ac23f59c6f9d3340b3da4 --- /dev/null +++ b/model-00059-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59f3a143c49848d67e12f2dd61967efea93a3e776bbb9e16130c5db8a51dcf15 +size 975211880 diff --git a/model-00060-of-00066.safetensors b/model-00060-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..06d88dcba7e5cd0ef8d136bd7f3e107ffdf47dbf --- /dev/null +++ b/model-00060-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cfbf2f7eb16b8f12a5b587fca5fd3bf00908ad83ad3a2b6e8596a9cdf3a7b3b +size 975211888 diff --git a/model-00061-of-00066.safetensors b/model-00061-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..072557cab972de8bc1c2ae9344e19f2d1276c8af --- /dev/null +++ b/model-00061-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ece2b323df80197bb8dc3195e64d4f086c0362251d4152eb1ee7b17106f3e44 +size 975211888 diff --git a/model-00062-of-00066.safetensors b/model-00062-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8eb67211b041443edc62fffe91bbc6b612259097 --- /dev/null +++ b/model-00062-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ee52e3b728c01403b67d3c513a8a4eca18c172e8ce3332c41f60ce39c4d6e0cb +size 975211888 diff --git a/model-00063-of-00066.safetensors b/model-00063-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..eafdbf878f48e16e14eddd58645e14c3ab2f8083 --- /dev/null +++ b/model-00063-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a1205939f0c49c704963fbe340876a25adf87f158303c16f90ae6af093e4594 +size 975211888 diff --git a/model-00064-of-00066.safetensors b/model-00064-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..67b09571d87d028e6627dfce4dea89c89e76d050 --- /dev/null +++ b/model-00064-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:58c744cff0525585504958f02bc9fb4f690263a13d21a912da0f0fddbf472707 +size 975211880 diff --git a/model-00065-of-00066.safetensors b/model-00065-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1fd013098d0b91e5efb5ff8868ee4882af6fcd7f --- /dev/null +++ b/model-00065-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f048116a043c741f1a6722038900a5dcfdee711fe9272768fad7ec2a9324332 +size 975211880 diff --git a/model-00066-of-00066.safetensors b/model-00066-of-00066.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3cebc1934a84f74d4fd8da29df35a8aefd38f7da --- /dev/null +++ b/model-00066-of-00066.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8aad1d9426f998c0f635cff82464d18599fbdc78eb8e400243e8d6b926024e9 +size 975211856 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..fa8b60fa7d015f1145b9f161f0489ec14b9fbb13 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1 @@ +{"metadata": {"mergekit_version": "0.0.6", "total_size": 65527752704}, "weight_map": {"lm_head.weight": "model-00001-of-00066.safetensors", "model.embed_tokens.weight": "model-00002-of-00066.safetensors", "model.layers.0.input_layernorm.weight": "model-00003-of-00066.safetensors", "model.layers.0.mlp.down_proj.weight": "model-00003-of-00066.safetensors", "model.layers.0.mlp.gate_proj.weight": "model-00003-of-00066.safetensors", "model.layers.0.mlp.up_proj.weight": "model-00003-of-00066.safetensors", "model.layers.0.post_attention_layernorm.weight": "model-00003-of-00066.safetensors", "model.layers.0.self_attn.k_proj.bias": "model-00003-of-00066.safetensors", "model.layers.0.self_attn.k_proj.weight": "model-00003-of-00066.safetensors", "model.layers.0.self_attn.o_proj.weight": "model-00003-of-00066.safetensors", "model.layers.0.self_attn.q_proj.bias": "model-00003-of-00066.safetensors", "model.layers.0.self_attn.q_proj.weight": "model-00003-of-00066.safetensors", "model.layers.0.self_attn.v_proj.bias": "model-00003-of-00066.safetensors", "model.layers.0.self_attn.v_proj.weight": "model-00003-of-00066.safetensors", "model.layers.1.input_layernorm.weight": "model-00003-of-00066.safetensors", "model.layers.1.mlp.down_proj.weight": "model-00004-of-00066.safetensors", "model.layers.1.mlp.gate_proj.weight": "model-00004-of-00066.safetensors", "model.layers.1.mlp.up_proj.weight": "model-00004-of-00066.safetensors", "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00066.safetensors", "model.layers.1.self_attn.k_proj.bias": "model-00004-of-00066.safetensors", "model.layers.1.self_attn.k_proj.weight": "model-00004-of-00066.safetensors", "model.layers.1.self_attn.o_proj.weight": "model-00004-of-00066.safetensors", "model.layers.1.self_attn.q_proj.bias": "model-00004-of-00066.safetensors", "model.layers.1.self_attn.q_proj.weight": "model-00004-of-00066.safetensors", "model.layers.1.self_attn.v_proj.bias": "model-00004-of-00066.safetensors", "model.layers.1.self_attn.v_proj.weight": "model-00004-of-00066.safetensors", "model.layers.10.input_layernorm.weight": "model-00004-of-00066.safetensors", "model.layers.10.mlp.down_proj.weight": "model-00005-of-00066.safetensors", "model.layers.10.mlp.gate_proj.weight": "model-00005-of-00066.safetensors", "model.layers.10.mlp.up_proj.weight": "model-00005-of-00066.safetensors", "model.layers.10.post_attention_layernorm.weight": "model-00005-of-00066.safetensors", "model.layers.10.self_attn.k_proj.bias": "model-00005-of-00066.safetensors", "model.layers.10.self_attn.k_proj.weight": "model-00005-of-00066.safetensors", "model.layers.10.self_attn.o_proj.weight": "model-00005-of-00066.safetensors", "model.layers.10.self_attn.q_proj.bias": "model-00005-of-00066.safetensors", "model.layers.10.self_attn.q_proj.weight": "model-00005-of-00066.safetensors", "model.layers.10.self_attn.v_proj.bias": "model-00005-of-00066.safetensors", "model.layers.10.self_attn.v_proj.weight": "model-00005-of-00066.safetensors", "model.layers.11.input_layernorm.weight": "model-00005-of-00066.safetensors", "model.layers.11.mlp.down_proj.weight": "model-00006-of-00066.safetensors", "model.layers.11.mlp.gate_proj.weight": "model-00006-of-00066.safetensors", "model.layers.11.mlp.up_proj.weight": "model-00006-of-00066.safetensors", "model.layers.11.post_attention_layernorm.weight": "model-00006-of-00066.safetensors", "model.layers.11.self_attn.k_proj.bias": "model-00006-of-00066.safetensors", "model.layers.11.self_attn.k_proj.weight": "model-00006-of-00066.safetensors", "model.layers.11.self_attn.o_proj.weight": "model-00006-of-00066.safetensors", "model.layers.11.self_attn.q_proj.bias": "model-00006-of-00066.safetensors", "model.layers.11.self_attn.q_proj.weight": "model-00006-of-00066.safetensors", "model.layers.11.self_attn.v_proj.bias": "model-00006-of-00066.safetensors", "model.layers.11.self_attn.v_proj.weight": "model-00006-of-00066.safetensors", "model.layers.12.input_layernorm.weight": "model-00006-of-00066.safetensors", "model.layers.12.mlp.down_proj.weight": "model-00007-of-00066.safetensors", "model.layers.12.mlp.gate_proj.weight": "model-00007-of-00066.safetensors", "model.layers.12.mlp.up_proj.weight": "model-00007-of-00066.safetensors", "model.layers.12.post_attention_layernorm.weight": "model-00007-of-00066.safetensors", "model.layers.12.self_attn.k_proj.bias": "model-00007-of-00066.safetensors", "model.layers.12.self_attn.k_proj.weight": "model-00007-of-00066.safetensors", "model.layers.12.self_attn.o_proj.weight": "model-00007-of-00066.safetensors", "model.layers.12.self_attn.q_proj.bias": "model-00007-of-00066.safetensors", "model.layers.12.self_attn.q_proj.weight": "model-00007-of-00066.safetensors", "model.layers.12.self_attn.v_proj.bias": "model-00007-of-00066.safetensors", "model.layers.12.self_attn.v_proj.weight": "model-00007-of-00066.safetensors", "model.layers.13.input_layernorm.weight": "model-00007-of-00066.safetensors", "model.layers.13.mlp.down_proj.weight": "model-00008-of-00066.safetensors", "model.layers.13.mlp.gate_proj.weight": "model-00008-of-00066.safetensors", "model.layers.13.mlp.up_proj.weight": "model-00008-of-00066.safetensors", "model.layers.13.post_attention_layernorm.weight": "model-00008-of-00066.safetensors", "model.layers.13.self_attn.k_proj.bias": "model-00008-of-00066.safetensors", "model.layers.13.self_attn.k_proj.weight": "model-00008-of-00066.safetensors", "model.layers.13.self_attn.o_proj.weight": "model-00008-of-00066.safetensors", "model.layers.13.self_attn.q_proj.bias": "model-00008-of-00066.safetensors", "model.layers.13.self_attn.q_proj.weight": "model-00008-of-00066.safetensors", "model.layers.13.self_attn.v_proj.bias": "model-00008-of-00066.safetensors", "model.layers.13.self_attn.v_proj.weight": "model-00008-of-00066.safetensors", "model.layers.14.input_layernorm.weight": "model-00008-of-00066.safetensors", "model.layers.14.mlp.down_proj.weight": "model-00009-of-00066.safetensors", "model.layers.14.mlp.gate_proj.weight": "model-00009-of-00066.safetensors", "model.layers.14.mlp.up_proj.weight": "model-00009-of-00066.safetensors", "model.layers.14.post_attention_layernorm.weight": "model-00009-of-00066.safetensors", "model.layers.14.self_attn.k_proj.bias": "model-00009-of-00066.safetensors", "model.layers.14.self_attn.k_proj.weight": "model-00009-of-00066.safetensors", "model.layers.14.self_attn.o_proj.weight": "model-00009-of-00066.safetensors", "model.layers.14.self_attn.q_proj.bias": "model-00009-of-00066.safetensors", "model.layers.14.self_attn.q_proj.weight": "model-00009-of-00066.safetensors", "model.layers.14.self_attn.v_proj.bias": "model-00009-of-00066.safetensors", "model.layers.14.self_attn.v_proj.weight": "model-00009-of-00066.safetensors", "model.layers.15.input_layernorm.weight": "model-00009-of-00066.safetensors", "model.layers.15.mlp.down_proj.weight": "model-00010-of-00066.safetensors", "model.layers.15.mlp.gate_proj.weight": "model-00010-of-00066.safetensors", "model.layers.15.mlp.up_proj.weight": "model-00010-of-00066.safetensors", "model.layers.15.post_attention_layernorm.weight": "model-00010-of-00066.safetensors", "model.layers.15.self_attn.k_proj.bias": "model-00010-of-00066.safetensors", "model.layers.15.self_attn.k_proj.weight": "model-00010-of-00066.safetensors", "model.layers.15.self_attn.o_proj.weight": "model-00010-of-00066.safetensors", "model.layers.15.self_attn.q_proj.bias": "model-00010-of-00066.safetensors", "model.layers.15.self_attn.q_proj.weight": "model-00010-of-00066.safetensors", "model.layers.15.self_attn.v_proj.bias": "model-00010-of-00066.safetensors", "model.layers.15.self_attn.v_proj.weight": "model-00010-of-00066.safetensors", "model.layers.16.input_layernorm.weight": "model-00010-of-00066.safetensors", "model.layers.16.mlp.down_proj.weight": "model-00011-of-00066.safetensors", "model.layers.16.mlp.gate_proj.weight": "model-00011-of-00066.safetensors", "model.layers.16.mlp.up_proj.weight": "model-00011-of-00066.safetensors", "model.layers.16.post_attention_layernorm.weight": "model-00011-of-00066.safetensors", "model.layers.16.self_attn.k_proj.bias": "model-00011-of-00066.safetensors", "model.layers.16.self_attn.k_proj.weight": "model-00011-of-00066.safetensors", "model.layers.16.self_attn.o_proj.weight": "model-00011-of-00066.safetensors", "model.layers.16.self_attn.q_proj.bias": "model-00011-of-00066.safetensors", "model.layers.16.self_attn.q_proj.weight": "model-00011-of-00066.safetensors", "model.layers.16.self_attn.v_proj.bias": "model-00011-of-00066.safetensors", "model.layers.16.self_attn.v_proj.weight": "model-00011-of-00066.safetensors", "model.layers.17.input_layernorm.weight": "model-00011-of-00066.safetensors", "model.layers.17.mlp.down_proj.weight": "model-00012-of-00066.safetensors", "model.layers.17.mlp.gate_proj.weight": "model-00012-of-00066.safetensors", "model.layers.17.mlp.up_proj.weight": "model-00012-of-00066.safetensors", "model.layers.17.post_attention_layernorm.weight": "model-00012-of-00066.safetensors", "model.layers.17.self_attn.k_proj.bias": "model-00012-of-00066.safetensors", "model.layers.17.self_attn.k_proj.weight": "model-00012-of-00066.safetensors", "model.layers.17.self_attn.o_proj.weight": "model-00012-of-00066.safetensors", "model.layers.17.self_attn.q_proj.bias": "model-00012-of-00066.safetensors", "model.layers.17.self_attn.q_proj.weight": "model-00012-of-00066.safetensors", "model.layers.17.self_attn.v_proj.bias": "model-00012-of-00066.safetensors", "model.layers.17.self_attn.v_proj.weight": "model-00012-of-00066.safetensors", "model.layers.18.input_layernorm.weight": "model-00012-of-00066.safetensors", "model.layers.18.mlp.down_proj.weight": "model-00013-of-00066.safetensors", "model.layers.18.mlp.gate_proj.weight": "model-00013-of-00066.safetensors", "model.layers.18.mlp.up_proj.weight": "model-00013-of-00066.safetensors", "model.layers.18.post_attention_layernorm.weight": "model-00013-of-00066.safetensors", "model.layers.18.self_attn.k_proj.bias": "model-00013-of-00066.safetensors", "model.layers.18.self_attn.k_proj.weight": "model-00013-of-00066.safetensors", "model.layers.18.self_attn.o_proj.weight": "model-00013-of-00066.safetensors", "model.layers.18.self_attn.q_proj.bias": "model-00013-of-00066.safetensors", "model.layers.18.self_attn.q_proj.weight": "model-00013-of-00066.safetensors", "model.layers.18.self_attn.v_proj.bias": "model-00013-of-00066.safetensors", "model.layers.18.self_attn.v_proj.weight": "model-00013-of-00066.safetensors", "model.layers.19.input_layernorm.weight": "model-00013-of-00066.safetensors", "model.layers.19.mlp.down_proj.weight": "model-00014-of-00066.safetensors", "model.layers.19.mlp.gate_proj.weight": "model-00014-of-00066.safetensors", "model.layers.19.mlp.up_proj.weight": "model-00014-of-00066.safetensors", "model.layers.19.post_attention_layernorm.weight": "model-00014-of-00066.safetensors", "model.layers.19.self_attn.k_proj.bias": "model-00014-of-00066.safetensors", "model.layers.19.self_attn.k_proj.weight": "model-00014-of-00066.safetensors", "model.layers.19.self_attn.o_proj.weight": "model-00014-of-00066.safetensors", "model.layers.19.self_attn.q_proj.bias": "model-00014-of-00066.safetensors", "model.layers.19.self_attn.q_proj.weight": "model-00014-of-00066.safetensors", "model.layers.19.self_attn.v_proj.bias": "model-00014-of-00066.safetensors", "model.layers.19.self_attn.v_proj.weight": "model-00014-of-00066.safetensors", "model.layers.2.input_layernorm.weight": "model-00014-of-00066.safetensors", "model.layers.2.mlp.down_proj.weight": "model-00015-of-00066.safetensors", "model.layers.2.mlp.gate_proj.weight": "model-00015-of-00066.safetensors", "model.layers.2.mlp.up_proj.weight": "model-00015-of-00066.safetensors", "model.layers.2.post_attention_layernorm.weight": "model-00015-of-00066.safetensors", "model.layers.2.self_attn.k_proj.bias": "model-00015-of-00066.safetensors", "model.layers.2.self_attn.k_proj.weight": "model-00015-of-00066.safetensors", "model.layers.2.self_attn.o_proj.weight": "model-00015-of-00066.safetensors", "model.layers.2.self_attn.q_proj.bias": "model-00015-of-00066.safetensors", "model.layers.2.self_attn.q_proj.weight": "model-00015-of-00066.safetensors", "model.layers.2.self_attn.v_proj.bias": "model-00015-of-00066.safetensors", "model.layers.2.self_attn.v_proj.weight": "model-00015-of-00066.safetensors", "model.layers.20.input_layernorm.weight": "model-00015-of-00066.safetensors", "model.layers.20.mlp.down_proj.weight": "model-00016-of-00066.safetensors", "model.layers.20.mlp.gate_proj.weight": "model-00016-of-00066.safetensors", "model.layers.20.mlp.up_proj.weight": "model-00016-of-00066.safetensors", "model.layers.20.post_attention_layernorm.weight": "model-00016-of-00066.safetensors", "model.layers.20.self_attn.k_proj.bias": "model-00016-of-00066.safetensors", "model.layers.20.self_attn.k_proj.weight": "model-00016-of-00066.safetensors", "model.layers.20.self_attn.o_proj.weight": "model-00016-of-00066.safetensors", "model.layers.20.self_attn.q_proj.bias": "model-00016-of-00066.safetensors", "model.layers.20.self_attn.q_proj.weight": "model-00016-of-00066.safetensors", "model.layers.20.self_attn.v_proj.bias": "model-00016-of-00066.safetensors", "model.layers.20.self_attn.v_proj.weight": "model-00016-of-00066.safetensors", "model.layers.21.input_layernorm.weight": "model-00016-of-00066.safetensors", "model.layers.21.mlp.down_proj.weight": "model-00017-of-00066.safetensors", "model.layers.21.mlp.gate_proj.weight": "model-00017-of-00066.safetensors", "model.layers.21.mlp.up_proj.weight": "model-00017-of-00066.safetensors", "model.layers.21.post_attention_layernorm.weight": "model-00017-of-00066.safetensors", "model.layers.21.self_attn.k_proj.bias": "model-00017-of-00066.safetensors", "model.layers.21.self_attn.k_proj.weight": "model-00017-of-00066.safetensors", "model.layers.21.self_attn.o_proj.weight": "model-00017-of-00066.safetensors", "model.layers.21.self_attn.q_proj.bias": "model-00017-of-00066.safetensors", "model.layers.21.self_attn.q_proj.weight": "model-00017-of-00066.safetensors", "model.layers.21.self_attn.v_proj.bias": "model-00017-of-00066.safetensors", "model.layers.21.self_attn.v_proj.weight": "model-00017-of-00066.safetensors", "model.layers.22.input_layernorm.weight": "model-00017-of-00066.safetensors", "model.layers.22.mlp.down_proj.weight": "model-00018-of-00066.safetensors", "model.layers.22.mlp.gate_proj.weight": "model-00018-of-00066.safetensors", "model.layers.22.mlp.up_proj.weight": "model-00018-of-00066.safetensors", "model.layers.22.post_attention_layernorm.weight": "model-00018-of-00066.safetensors", "model.layers.22.self_attn.k_proj.bias": "model-00018-of-00066.safetensors", "model.layers.22.self_attn.k_proj.weight": "model-00018-of-00066.safetensors", "model.layers.22.self_attn.o_proj.weight": "model-00018-of-00066.safetensors", "model.layers.22.self_attn.q_proj.bias": "model-00018-of-00066.safetensors", "model.layers.22.self_attn.q_proj.weight": "model-00018-of-00066.safetensors", "model.layers.22.self_attn.v_proj.bias": "model-00018-of-00066.safetensors", "model.layers.22.self_attn.v_proj.weight": "model-00018-of-00066.safetensors", "model.layers.23.input_layernorm.weight": "model-00018-of-00066.safetensors", "model.layers.23.mlp.down_proj.weight": "model-00019-of-00066.safetensors", "model.layers.23.mlp.gate_proj.weight": "model-00019-of-00066.safetensors", "model.layers.23.mlp.up_proj.weight": "model-00019-of-00066.safetensors", "model.layers.23.post_attention_layernorm.weight": "model-00019-of-00066.safetensors", "model.layers.23.self_attn.k_proj.bias": "model-00019-of-00066.safetensors", "model.layers.23.self_attn.k_proj.weight": "model-00019-of-00066.safetensors", "model.layers.23.self_attn.o_proj.weight": "model-00019-of-00066.safetensors", "model.layers.23.self_attn.q_proj.bias": "model-00019-of-00066.safetensors", "model.layers.23.self_attn.q_proj.weight": "model-00019-of-00066.safetensors", "model.layers.23.self_attn.v_proj.bias": "model-00019-of-00066.safetensors", "model.layers.23.self_attn.v_proj.weight": "model-00019-of-00066.safetensors", "model.layers.24.input_layernorm.weight": "model-00019-of-00066.safetensors", "model.layers.24.mlp.down_proj.weight": "model-00020-of-00066.safetensors", "model.layers.24.mlp.gate_proj.weight": "model-00020-of-00066.safetensors", "model.layers.24.mlp.up_proj.weight": "model-00020-of-00066.safetensors", "model.layers.24.post_attention_layernorm.weight": "model-00020-of-00066.safetensors", "model.layers.24.self_attn.k_proj.bias": "model-00020-of-00066.safetensors", "model.layers.24.self_attn.k_proj.weight": "model-00020-of-00066.safetensors", "model.layers.24.self_attn.o_proj.weight": "model-00020-of-00066.safetensors", "model.layers.24.self_attn.q_proj.bias": "model-00020-of-00066.safetensors", "model.layers.24.self_attn.q_proj.weight": "model-00020-of-00066.safetensors", "model.layers.24.self_attn.v_proj.bias": "model-00020-of-00066.safetensors", "model.layers.24.self_attn.v_proj.weight": "model-00020-of-00066.safetensors", "model.layers.25.input_layernorm.weight": "model-00020-of-00066.safetensors", "model.layers.25.mlp.down_proj.weight": "model-00021-of-00066.safetensors", "model.layers.25.mlp.gate_proj.weight": "model-00021-of-00066.safetensors", "model.layers.25.mlp.up_proj.weight": "model-00021-of-00066.safetensors", "model.layers.25.post_attention_layernorm.weight": "model-00021-of-00066.safetensors", "model.layers.25.self_attn.k_proj.bias": "model-00021-of-00066.safetensors", "model.layers.25.self_attn.k_proj.weight": "model-00021-of-00066.safetensors", "model.layers.25.self_attn.o_proj.weight": "model-00021-of-00066.safetensors", "model.layers.25.self_attn.q_proj.bias": "model-00021-of-00066.safetensors", "model.layers.25.self_attn.q_proj.weight": "model-00021-of-00066.safetensors", "model.layers.25.self_attn.v_proj.bias": "model-00021-of-00066.safetensors", "model.layers.25.self_attn.v_proj.weight": "model-00021-of-00066.safetensors", "model.layers.26.input_layernorm.weight": "model-00021-of-00066.safetensors", "model.layers.26.mlp.down_proj.weight": "model-00022-of-00066.safetensors", "model.layers.26.mlp.gate_proj.weight": "model-00022-of-00066.safetensors", "model.layers.26.mlp.up_proj.weight": "model-00022-of-00066.safetensors", "model.layers.26.post_attention_layernorm.weight": "model-00022-of-00066.safetensors", "model.layers.26.self_attn.k_proj.bias": "model-00022-of-00066.safetensors", "model.layers.26.self_attn.k_proj.weight": "model-00022-of-00066.safetensors", "model.layers.26.self_attn.o_proj.weight": "model-00022-of-00066.safetensors", "model.layers.26.self_attn.q_proj.bias": "model-00022-of-00066.safetensors", "model.layers.26.self_attn.q_proj.weight": "model-00022-of-00066.safetensors", "model.layers.26.self_attn.v_proj.bias": "model-00022-of-00066.safetensors", "model.layers.26.self_attn.v_proj.weight": "model-00022-of-00066.safetensors", "model.layers.27.input_layernorm.weight": "model-00022-of-00066.safetensors", "model.layers.27.mlp.down_proj.weight": "model-00023-of-00066.safetensors", "model.layers.27.mlp.gate_proj.weight": "model-00023-of-00066.safetensors", "model.layers.27.mlp.up_proj.weight": "model-00023-of-00066.safetensors", "model.layers.27.post_attention_layernorm.weight": "model-00023-of-00066.safetensors", "model.layers.27.self_attn.k_proj.bias": "model-00023-of-00066.safetensors", "model.layers.27.self_attn.k_proj.weight": "model-00023-of-00066.safetensors", "model.layers.27.self_attn.o_proj.weight": "model-00023-of-00066.safetensors", "model.layers.27.self_attn.q_proj.bias": "model-00023-of-00066.safetensors", "model.layers.27.self_attn.q_proj.weight": "model-00023-of-00066.safetensors", "model.layers.27.self_attn.v_proj.bias": "model-00023-of-00066.safetensors", "model.layers.27.self_attn.v_proj.weight": "model-00023-of-00066.safetensors", "model.layers.28.input_layernorm.weight": "model-00023-of-00066.safetensors", "model.layers.28.mlp.down_proj.weight": "model-00024-of-00066.safetensors", "model.layers.28.mlp.gate_proj.weight": "model-00024-of-00066.safetensors", "model.layers.28.mlp.up_proj.weight": "model-00024-of-00066.safetensors", "model.layers.28.post_attention_layernorm.weight": "model-00024-of-00066.safetensors", "model.layers.28.self_attn.k_proj.bias": "model-00024-of-00066.safetensors", "model.layers.28.self_attn.k_proj.weight": "model-00024-of-00066.safetensors", "model.layers.28.self_attn.o_proj.weight": "model-00024-of-00066.safetensors", "model.layers.28.self_attn.q_proj.bias": "model-00024-of-00066.safetensors", "model.layers.28.self_attn.q_proj.weight": "model-00024-of-00066.safetensors", "model.layers.28.self_attn.v_proj.bias": "model-00024-of-00066.safetensors", "model.layers.28.self_attn.v_proj.weight": "model-00024-of-00066.safetensors", "model.layers.29.input_layernorm.weight": "model-00024-of-00066.safetensors", "model.layers.29.mlp.down_proj.weight": "model-00025-of-00066.safetensors", "model.layers.29.mlp.gate_proj.weight": "model-00025-of-00066.safetensors", "model.layers.29.mlp.up_proj.weight": "model-00025-of-00066.safetensors", "model.layers.29.post_attention_layernorm.weight": "model-00025-of-00066.safetensors", "model.layers.29.self_attn.k_proj.bias": "model-00025-of-00066.safetensors", "model.layers.29.self_attn.k_proj.weight": "model-00025-of-00066.safetensors", "model.layers.29.self_attn.o_proj.weight": "model-00025-of-00066.safetensors", "model.layers.29.self_attn.q_proj.bias": "model-00025-of-00066.safetensors", "model.layers.29.self_attn.q_proj.weight": "model-00025-of-00066.safetensors", "model.layers.29.self_attn.v_proj.bias": "model-00025-of-00066.safetensors", "model.layers.29.self_attn.v_proj.weight": "model-00025-of-00066.safetensors", "model.layers.3.input_layernorm.weight": "model-00025-of-00066.safetensors", "model.layers.3.mlp.down_proj.weight": "model-00026-of-00066.safetensors", "model.layers.3.mlp.gate_proj.weight": "model-00026-of-00066.safetensors", "model.layers.3.mlp.up_proj.weight": "model-00026-of-00066.safetensors", "model.layers.3.post_attention_layernorm.weight": "model-00026-of-00066.safetensors", "model.layers.3.self_attn.k_proj.bias": "model-00026-of-00066.safetensors", "model.layers.3.self_attn.k_proj.weight": "model-00026-of-00066.safetensors", "model.layers.3.self_attn.o_proj.weight": "model-00026-of-00066.safetensors", "model.layers.3.self_attn.q_proj.bias": "model-00026-of-00066.safetensors", "model.layers.3.self_attn.q_proj.weight": "model-00026-of-00066.safetensors", "model.layers.3.self_attn.v_proj.bias": "model-00026-of-00066.safetensors", "model.layers.3.self_attn.v_proj.weight": "model-00026-of-00066.safetensors", "model.layers.30.input_layernorm.weight": "model-00026-of-00066.safetensors", "model.layers.30.mlp.down_proj.weight": "model-00027-of-00066.safetensors", "model.layers.30.mlp.gate_proj.weight": "model-00027-of-00066.safetensors", "model.layers.30.mlp.up_proj.weight": "model-00027-of-00066.safetensors", "model.layers.30.post_attention_layernorm.weight": "model-00027-of-00066.safetensors", "model.layers.30.self_attn.k_proj.bias": "model-00027-of-00066.safetensors", "model.layers.30.self_attn.k_proj.weight": "model-00027-of-00066.safetensors", "model.layers.30.self_attn.o_proj.weight": "model-00027-of-00066.safetensors", "model.layers.30.self_attn.q_proj.bias": "model-00027-of-00066.safetensors", "model.layers.30.self_attn.q_proj.weight": "model-00027-of-00066.safetensors", "model.layers.30.self_attn.v_proj.bias": "model-00027-of-00066.safetensors", "model.layers.30.self_attn.v_proj.weight": "model-00027-of-00066.safetensors", "model.layers.31.input_layernorm.weight": "model-00027-of-00066.safetensors", "model.layers.31.mlp.down_proj.weight": "model-00028-of-00066.safetensors", "model.layers.31.mlp.gate_proj.weight": "model-00028-of-00066.safetensors", "model.layers.31.mlp.up_proj.weight": "model-00028-of-00066.safetensors", "model.layers.31.post_attention_layernorm.weight": "model-00028-of-00066.safetensors", "model.layers.31.self_attn.k_proj.bias": "model-00028-of-00066.safetensors", "model.layers.31.self_attn.k_proj.weight": "model-00028-of-00066.safetensors", "model.layers.31.self_attn.o_proj.weight": "model-00028-of-00066.safetensors", "model.layers.31.self_attn.q_proj.bias": "model-00028-of-00066.safetensors", "model.layers.31.self_attn.q_proj.weight": "model-00028-of-00066.safetensors", "model.layers.31.self_attn.v_proj.bias": "model-00028-of-00066.safetensors", "model.layers.31.self_attn.v_proj.weight": "model-00028-of-00066.safetensors", "model.layers.32.input_layernorm.weight": "model-00028-of-00066.safetensors", "model.layers.32.mlp.down_proj.weight": "model-00029-of-00066.safetensors", "model.layers.32.mlp.gate_proj.weight": "model-00029-of-00066.safetensors", "model.layers.32.mlp.up_proj.weight": "model-00029-of-00066.safetensors", "model.layers.32.post_attention_layernorm.weight": "model-00029-of-00066.safetensors", "model.layers.32.self_attn.k_proj.bias": "model-00029-of-00066.safetensors", "model.layers.32.self_attn.k_proj.weight": "model-00029-of-00066.safetensors", "model.layers.32.self_attn.o_proj.weight": "model-00029-of-00066.safetensors", "model.layers.32.self_attn.q_proj.bias": "model-00029-of-00066.safetensors", "model.layers.32.self_attn.q_proj.weight": "model-00029-of-00066.safetensors", "model.layers.32.self_attn.v_proj.bias": "model-00029-of-00066.safetensors", "model.layers.32.self_attn.v_proj.weight": "model-00029-of-00066.safetensors", "model.layers.33.input_layernorm.weight": "model-00029-of-00066.safetensors", "model.layers.33.mlp.down_proj.weight": "model-00030-of-00066.safetensors", "model.layers.33.mlp.gate_proj.weight": "model-00030-of-00066.safetensors", "model.layers.33.mlp.up_proj.weight": "model-00030-of-00066.safetensors", "model.layers.33.post_attention_layernorm.weight": "model-00030-of-00066.safetensors", "model.layers.33.self_attn.k_proj.bias": "model-00030-of-00066.safetensors", "model.layers.33.self_attn.k_proj.weight": "model-00030-of-00066.safetensors", "model.layers.33.self_attn.o_proj.weight": "model-00030-of-00066.safetensors", "model.layers.33.self_attn.q_proj.bias": "model-00030-of-00066.safetensors", "model.layers.33.self_attn.q_proj.weight": "model-00030-of-00066.safetensors", "model.layers.33.self_attn.v_proj.bias": "model-00030-of-00066.safetensors", "model.layers.33.self_attn.v_proj.weight": "model-00030-of-00066.safetensors", "model.layers.34.input_layernorm.weight": "model-00030-of-00066.safetensors", "model.layers.34.mlp.down_proj.weight": "model-00031-of-00066.safetensors", "model.layers.34.mlp.gate_proj.weight": "model-00031-of-00066.safetensors", "model.layers.34.mlp.up_proj.weight": "model-00031-of-00066.safetensors", "model.layers.34.post_attention_layernorm.weight": "model-00031-of-00066.safetensors", "model.layers.34.self_attn.k_proj.bias": "model-00031-of-00066.safetensors", "model.layers.34.self_attn.k_proj.weight": "model-00031-of-00066.safetensors", "model.layers.34.self_attn.o_proj.weight": "model-00031-of-00066.safetensors", "model.layers.34.self_attn.q_proj.bias": "model-00031-of-00066.safetensors", "model.layers.34.self_attn.q_proj.weight": "model-00031-of-00066.safetensors", "model.layers.34.self_attn.v_proj.bias": "model-00031-of-00066.safetensors", "model.layers.34.self_attn.v_proj.weight": "model-00031-of-00066.safetensors", "model.layers.35.input_layernorm.weight": "model-00031-of-00066.safetensors", "model.layers.35.mlp.down_proj.weight": "model-00032-of-00066.safetensors", "model.layers.35.mlp.gate_proj.weight": "model-00032-of-00066.safetensors", "model.layers.35.mlp.up_proj.weight": "model-00032-of-00066.safetensors", "model.layers.35.post_attention_layernorm.weight": "model-00032-of-00066.safetensors", "model.layers.35.self_attn.k_proj.bias": "model-00032-of-00066.safetensors", "model.layers.35.self_attn.k_proj.weight": "model-00032-of-00066.safetensors", "model.layers.35.self_attn.o_proj.weight": "model-00032-of-00066.safetensors", "model.layers.35.self_attn.q_proj.bias": "model-00032-of-00066.safetensors", "model.layers.35.self_attn.q_proj.weight": "model-00032-of-00066.safetensors", "model.layers.35.self_attn.v_proj.bias": "model-00032-of-00066.safetensors", "model.layers.35.self_attn.v_proj.weight": "model-00032-of-00066.safetensors", "model.layers.36.input_layernorm.weight": "model-00032-of-00066.safetensors", "model.layers.36.mlp.down_proj.weight": "model-00033-of-00066.safetensors", "model.layers.36.mlp.gate_proj.weight": "model-00033-of-00066.safetensors", "model.layers.36.mlp.up_proj.weight": "model-00033-of-00066.safetensors", "model.layers.36.post_attention_layernorm.weight": "model-00033-of-00066.safetensors", "model.layers.36.self_attn.k_proj.bias": "model-00033-of-00066.safetensors", "model.layers.36.self_attn.k_proj.weight": "model-00033-of-00066.safetensors", "model.layers.36.self_attn.o_proj.weight": "model-00033-of-00066.safetensors", "model.layers.36.self_attn.q_proj.bias": "model-00033-of-00066.safetensors", "model.layers.36.self_attn.q_proj.weight": "model-00033-of-00066.safetensors", "model.layers.36.self_attn.v_proj.bias": "model-00033-of-00066.safetensors", "model.layers.36.self_attn.v_proj.weight": "model-00033-of-00066.safetensors", "model.layers.37.input_layernorm.weight": "model-00033-of-00066.safetensors", "model.layers.37.mlp.down_proj.weight": "model-00034-of-00066.safetensors", "model.layers.37.mlp.gate_proj.weight": "model-00034-of-00066.safetensors", "model.layers.37.mlp.up_proj.weight": "model-00034-of-00066.safetensors", "model.layers.37.post_attention_layernorm.weight": "model-00034-of-00066.safetensors", "model.layers.37.self_attn.k_proj.bias": "model-00034-of-00066.safetensors", "model.layers.37.self_attn.k_proj.weight": "model-00034-of-00066.safetensors", "model.layers.37.self_attn.o_proj.weight": "model-00034-of-00066.safetensors", "model.layers.37.self_attn.q_proj.bias": "model-00034-of-00066.safetensors", "model.layers.37.self_attn.q_proj.weight": "model-00034-of-00066.safetensors", "model.layers.37.self_attn.v_proj.bias": "model-00034-of-00066.safetensors", "model.layers.37.self_attn.v_proj.weight": "model-00034-of-00066.safetensors", "model.layers.38.input_layernorm.weight": "model-00034-of-00066.safetensors", "model.layers.38.mlp.down_proj.weight": "model-00035-of-00066.safetensors", "model.layers.38.mlp.gate_proj.weight": "model-00035-of-00066.safetensors", "model.layers.38.mlp.up_proj.weight": "model-00035-of-00066.safetensors", "model.layers.38.post_attention_layernorm.weight": "model-00035-of-00066.safetensors", "model.layers.38.self_attn.k_proj.bias": "model-00035-of-00066.safetensors", "model.layers.38.self_attn.k_proj.weight": "model-00035-of-00066.safetensors", "model.layers.38.self_attn.o_proj.weight": "model-00035-of-00066.safetensors", "model.layers.38.self_attn.q_proj.bias": "model-00035-of-00066.safetensors", "model.layers.38.self_attn.q_proj.weight": "model-00035-of-00066.safetensors", "model.layers.38.self_attn.v_proj.bias": "model-00035-of-00066.safetensors", "model.layers.38.self_attn.v_proj.weight": "model-00035-of-00066.safetensors", "model.layers.39.input_layernorm.weight": "model-00035-of-00066.safetensors", "model.layers.39.mlp.down_proj.weight": "model-00036-of-00066.safetensors", "model.layers.39.mlp.gate_proj.weight": "model-00036-of-00066.safetensors", "model.layers.39.mlp.up_proj.weight": "model-00036-of-00066.safetensors", "model.layers.39.post_attention_layernorm.weight": "model-00036-of-00066.safetensors", "model.layers.39.self_attn.k_proj.bias": "model-00036-of-00066.safetensors", "model.layers.39.self_attn.k_proj.weight": "model-00036-of-00066.safetensors", "model.layers.39.self_attn.o_proj.weight": "model-00036-of-00066.safetensors", "model.layers.39.self_attn.q_proj.bias": "model-00036-of-00066.safetensors", "model.layers.39.self_attn.q_proj.weight": "model-00036-of-00066.safetensors", "model.layers.39.self_attn.v_proj.bias": "model-00036-of-00066.safetensors", "model.layers.39.self_attn.v_proj.weight": "model-00036-of-00066.safetensors", "model.layers.4.input_layernorm.weight": "model-00036-of-00066.safetensors", "model.layers.4.mlp.down_proj.weight": "model-00037-of-00066.safetensors", "model.layers.4.mlp.gate_proj.weight": "model-00037-of-00066.safetensors", "model.layers.4.mlp.up_proj.weight": "model-00037-of-00066.safetensors", "model.layers.4.post_attention_layernorm.weight": "model-00037-of-00066.safetensors", "model.layers.4.self_attn.k_proj.bias": "model-00037-of-00066.safetensors", "model.layers.4.self_attn.k_proj.weight": "model-00037-of-00066.safetensors", "model.layers.4.self_attn.o_proj.weight": "model-00037-of-00066.safetensors", "model.layers.4.self_attn.q_proj.bias": "model-00037-of-00066.safetensors", "model.layers.4.self_attn.q_proj.weight": "model-00037-of-00066.safetensors", "model.layers.4.self_attn.v_proj.bias": "model-00037-of-00066.safetensors", "model.layers.4.self_attn.v_proj.weight": "model-00037-of-00066.safetensors", "model.layers.40.input_layernorm.weight": "model-00037-of-00066.safetensors", "model.layers.40.mlp.down_proj.weight": "model-00038-of-00066.safetensors", "model.layers.40.mlp.gate_proj.weight": "model-00038-of-00066.safetensors", "model.layers.40.mlp.up_proj.weight": "model-00038-of-00066.safetensors", "model.layers.40.post_attention_layernorm.weight": "model-00038-of-00066.safetensors", "model.layers.40.self_attn.k_proj.bias": "model-00038-of-00066.safetensors", "model.layers.40.self_attn.k_proj.weight": "model-00038-of-00066.safetensors", "model.layers.40.self_attn.o_proj.weight": "model-00038-of-00066.safetensors", "model.layers.40.self_attn.q_proj.bias": "model-00038-of-00066.safetensors", "model.layers.40.self_attn.q_proj.weight": "model-00038-of-00066.safetensors", "model.layers.40.self_attn.v_proj.bias": "model-00038-of-00066.safetensors", "model.layers.40.self_attn.v_proj.weight": "model-00038-of-00066.safetensors", "model.layers.41.input_layernorm.weight": "model-00038-of-00066.safetensors", "model.layers.41.mlp.down_proj.weight": "model-00039-of-00066.safetensors", "model.layers.41.mlp.gate_proj.weight": "model-00039-of-00066.safetensors", "model.layers.41.mlp.up_proj.weight": "model-00039-of-00066.safetensors", "model.layers.41.post_attention_layernorm.weight": "model-00039-of-00066.safetensors", "model.layers.41.self_attn.k_proj.bias": "model-00039-of-00066.safetensors", "model.layers.41.self_attn.k_proj.weight": "model-00039-of-00066.safetensors", "model.layers.41.self_attn.o_proj.weight": "model-00039-of-00066.safetensors", "model.layers.41.self_attn.q_proj.bias": "model-00039-of-00066.safetensors", "model.layers.41.self_attn.q_proj.weight": "model-00039-of-00066.safetensors", "model.layers.41.self_attn.v_proj.bias": "model-00039-of-00066.safetensors", "model.layers.41.self_attn.v_proj.weight": "model-00039-of-00066.safetensors", "model.layers.42.input_layernorm.weight": "model-00039-of-00066.safetensors", "model.layers.42.mlp.down_proj.weight": "model-00040-of-00066.safetensors", "model.layers.42.mlp.gate_proj.weight": "model-00040-of-00066.safetensors", "model.layers.42.mlp.up_proj.weight": "model-00040-of-00066.safetensors", "model.layers.42.post_attention_layernorm.weight": "model-00040-of-00066.safetensors", "model.layers.42.self_attn.k_proj.bias": "model-00040-of-00066.safetensors", "model.layers.42.self_attn.k_proj.weight": "model-00040-of-00066.safetensors", "model.layers.42.self_attn.o_proj.weight": "model-00040-of-00066.safetensors", "model.layers.42.self_attn.q_proj.bias": "model-00040-of-00066.safetensors", "model.layers.42.self_attn.q_proj.weight": "model-00040-of-00066.safetensors", "model.layers.42.self_attn.v_proj.bias": "model-00040-of-00066.safetensors", "model.layers.42.self_attn.v_proj.weight": "model-00040-of-00066.safetensors", "model.layers.43.input_layernorm.weight": "model-00040-of-00066.safetensors", "model.layers.43.mlp.down_proj.weight": "model-00041-of-00066.safetensors", "model.layers.43.mlp.gate_proj.weight": "model-00041-of-00066.safetensors", "model.layers.43.mlp.up_proj.weight": "model-00041-of-00066.safetensors", "model.layers.43.post_attention_layernorm.weight": "model-00041-of-00066.safetensors", "model.layers.43.self_attn.k_proj.bias": "model-00041-of-00066.safetensors", "model.layers.43.self_attn.k_proj.weight": "model-00041-of-00066.safetensors", "model.layers.43.self_attn.o_proj.weight": "model-00041-of-00066.safetensors", "model.layers.43.self_attn.q_proj.bias": "model-00041-of-00066.safetensors", "model.layers.43.self_attn.q_proj.weight": "model-00041-of-00066.safetensors", "model.layers.43.self_attn.v_proj.bias": "model-00041-of-00066.safetensors", "model.layers.43.self_attn.v_proj.weight": "model-00041-of-00066.safetensors", "model.layers.44.input_layernorm.weight": "model-00041-of-00066.safetensors", "model.layers.44.mlp.down_proj.weight": "model-00042-of-00066.safetensors", "model.layers.44.mlp.gate_proj.weight": "model-00042-of-00066.safetensors", "model.layers.44.mlp.up_proj.weight": "model-00042-of-00066.safetensors", "model.layers.44.post_attention_layernorm.weight": "model-00042-of-00066.safetensors", "model.layers.44.self_attn.k_proj.bias": "model-00042-of-00066.safetensors", "model.layers.44.self_attn.k_proj.weight": "model-00042-of-00066.safetensors", "model.layers.44.self_attn.o_proj.weight": "model-00042-of-00066.safetensors", "model.layers.44.self_attn.q_proj.bias": "model-00042-of-00066.safetensors", "model.layers.44.self_attn.q_proj.weight": "model-00042-of-00066.safetensors", "model.layers.44.self_attn.v_proj.bias": "model-00042-of-00066.safetensors", "model.layers.44.self_attn.v_proj.weight": "model-00042-of-00066.safetensors", "model.layers.45.input_layernorm.weight": "model-00042-of-00066.safetensors", "model.layers.45.mlp.down_proj.weight": "model-00043-of-00066.safetensors", "model.layers.45.mlp.gate_proj.weight": "model-00043-of-00066.safetensors", "model.layers.45.mlp.up_proj.weight": "model-00043-of-00066.safetensors", "model.layers.45.post_attention_layernorm.weight": "model-00043-of-00066.safetensors", "model.layers.45.self_attn.k_proj.bias": "model-00043-of-00066.safetensors", "model.layers.45.self_attn.k_proj.weight": "model-00043-of-00066.safetensors", "model.layers.45.self_attn.o_proj.weight": "model-00043-of-00066.safetensors", "model.layers.45.self_attn.q_proj.bias": "model-00043-of-00066.safetensors", "model.layers.45.self_attn.q_proj.weight": "model-00043-of-00066.safetensors", "model.layers.45.self_attn.v_proj.bias": "model-00043-of-00066.safetensors", "model.layers.45.self_attn.v_proj.weight": "model-00043-of-00066.safetensors", "model.layers.46.input_layernorm.weight": "model-00043-of-00066.safetensors", "model.layers.46.mlp.down_proj.weight": "model-00044-of-00066.safetensors", "model.layers.46.mlp.gate_proj.weight": "model-00044-of-00066.safetensors", "model.layers.46.mlp.up_proj.weight": "model-00044-of-00066.safetensors", "model.layers.46.post_attention_layernorm.weight": "model-00044-of-00066.safetensors", "model.layers.46.self_attn.k_proj.bias": "model-00044-of-00066.safetensors", "model.layers.46.self_attn.k_proj.weight": "model-00044-of-00066.safetensors", "model.layers.46.self_attn.o_proj.weight": "model-00044-of-00066.safetensors", "model.layers.46.self_attn.q_proj.bias": "model-00044-of-00066.safetensors", "model.layers.46.self_attn.q_proj.weight": "model-00044-of-00066.safetensors", "model.layers.46.self_attn.v_proj.bias": "model-00044-of-00066.safetensors", "model.layers.46.self_attn.v_proj.weight": "model-00044-of-00066.safetensors", "model.layers.47.input_layernorm.weight": "model-00044-of-00066.safetensors", "model.layers.47.mlp.down_proj.weight": "model-00045-of-00066.safetensors", "model.layers.47.mlp.gate_proj.weight": "model-00045-of-00066.safetensors", "model.layers.47.mlp.up_proj.weight": "model-00045-of-00066.safetensors", "model.layers.47.post_attention_layernorm.weight": "model-00045-of-00066.safetensors", "model.layers.47.self_attn.k_proj.bias": "model-00045-of-00066.safetensors", "model.layers.47.self_attn.k_proj.weight": "model-00045-of-00066.safetensors", "model.layers.47.self_attn.o_proj.weight": "model-00045-of-00066.safetensors", "model.layers.47.self_attn.q_proj.bias": "model-00045-of-00066.safetensors", "model.layers.47.self_attn.q_proj.weight": "model-00045-of-00066.safetensors", "model.layers.47.self_attn.v_proj.bias": "model-00045-of-00066.safetensors", "model.layers.47.self_attn.v_proj.weight": "model-00045-of-00066.safetensors", "model.layers.48.input_layernorm.weight": "model-00045-of-00066.safetensors", "model.layers.48.mlp.down_proj.weight": "model-00046-of-00066.safetensors", "model.layers.48.mlp.gate_proj.weight": "model-00046-of-00066.safetensors", "model.layers.48.mlp.up_proj.weight": "model-00046-of-00066.safetensors", "model.layers.48.post_attention_layernorm.weight": "model-00046-of-00066.safetensors", "model.layers.48.self_attn.k_proj.bias": "model-00046-of-00066.safetensors", "model.layers.48.self_attn.k_proj.weight": "model-00046-of-00066.safetensors", "model.layers.48.self_attn.o_proj.weight": "model-00046-of-00066.safetensors", "model.layers.48.self_attn.q_proj.bias": "model-00046-of-00066.safetensors", "model.layers.48.self_attn.q_proj.weight": "model-00046-of-00066.safetensors", "model.layers.48.self_attn.v_proj.bias": "model-00046-of-00066.safetensors", "model.layers.48.self_attn.v_proj.weight": "model-00046-of-00066.safetensors", "model.layers.49.input_layernorm.weight": "model-00046-of-00066.safetensors", "model.layers.49.mlp.down_proj.weight": "model-00047-of-00066.safetensors", "model.layers.49.mlp.gate_proj.weight": "model-00047-of-00066.safetensors", "model.layers.49.mlp.up_proj.weight": "model-00047-of-00066.safetensors", "model.layers.49.post_attention_layernorm.weight": "model-00047-of-00066.safetensors", "model.layers.49.self_attn.k_proj.bias": "model-00047-of-00066.safetensors", "model.layers.49.self_attn.k_proj.weight": "model-00047-of-00066.safetensors", "model.layers.49.self_attn.o_proj.weight": "model-00047-of-00066.safetensors", "model.layers.49.self_attn.q_proj.bias": "model-00047-of-00066.safetensors", "model.layers.49.self_attn.q_proj.weight": "model-00047-of-00066.safetensors", "model.layers.49.self_attn.v_proj.bias": "model-00047-of-00066.safetensors", "model.layers.49.self_attn.v_proj.weight": "model-00047-of-00066.safetensors", "model.layers.5.input_layernorm.weight": "model-00047-of-00066.safetensors", "model.layers.5.mlp.down_proj.weight": "model-00048-of-00066.safetensors", "model.layers.5.mlp.gate_proj.weight": "model-00048-of-00066.safetensors", "model.layers.5.mlp.up_proj.weight": "model-00048-of-00066.safetensors", "model.layers.5.post_attention_layernorm.weight": "model-00048-of-00066.safetensors", "model.layers.5.self_attn.k_proj.bias": "model-00048-of-00066.safetensors", "model.layers.5.self_attn.k_proj.weight": "model-00048-of-00066.safetensors", "model.layers.5.self_attn.o_proj.weight": "model-00048-of-00066.safetensors", "model.layers.5.self_attn.q_proj.bias": "model-00048-of-00066.safetensors", "model.layers.5.self_attn.q_proj.weight": "model-00048-of-00066.safetensors", "model.layers.5.self_attn.v_proj.bias": "model-00048-of-00066.safetensors", "model.layers.5.self_attn.v_proj.weight": "model-00048-of-00066.safetensors", "model.layers.50.input_layernorm.weight": "model-00048-of-00066.safetensors", "model.layers.50.mlp.down_proj.weight": "model-00049-of-00066.safetensors", "model.layers.50.mlp.gate_proj.weight": "model-00049-of-00066.safetensors", "model.layers.50.mlp.up_proj.weight": "model-00049-of-00066.safetensors", "model.layers.50.post_attention_layernorm.weight": "model-00049-of-00066.safetensors", "model.layers.50.self_attn.k_proj.bias": "model-00049-of-00066.safetensors", "model.layers.50.self_attn.k_proj.weight": "model-00049-of-00066.safetensors", "model.layers.50.self_attn.o_proj.weight": "model-00049-of-00066.safetensors", "model.layers.50.self_attn.q_proj.bias": "model-00049-of-00066.safetensors", "model.layers.50.self_attn.q_proj.weight": "model-00049-of-00066.safetensors", "model.layers.50.self_attn.v_proj.bias": "model-00049-of-00066.safetensors", "model.layers.50.self_attn.v_proj.weight": "model-00049-of-00066.safetensors", "model.layers.51.input_layernorm.weight": "model-00049-of-00066.safetensors", "model.layers.51.mlp.down_proj.weight": "model-00050-of-00066.safetensors", "model.layers.51.mlp.gate_proj.weight": "model-00050-of-00066.safetensors", "model.layers.51.mlp.up_proj.weight": "model-00050-of-00066.safetensors", "model.layers.51.post_attention_layernorm.weight": "model-00050-of-00066.safetensors", "model.layers.51.self_attn.k_proj.bias": "model-00050-of-00066.safetensors", "model.layers.51.self_attn.k_proj.weight": "model-00050-of-00066.safetensors", "model.layers.51.self_attn.o_proj.weight": "model-00050-of-00066.safetensors", "model.layers.51.self_attn.q_proj.bias": "model-00050-of-00066.safetensors", "model.layers.51.self_attn.q_proj.weight": "model-00050-of-00066.safetensors", "model.layers.51.self_attn.v_proj.bias": "model-00050-of-00066.safetensors", "model.layers.51.self_attn.v_proj.weight": "model-00050-of-00066.safetensors", "model.layers.52.input_layernorm.weight": "model-00050-of-00066.safetensors", "model.layers.52.mlp.down_proj.weight": "model-00051-of-00066.safetensors", "model.layers.52.mlp.gate_proj.weight": "model-00051-of-00066.safetensors", "model.layers.52.mlp.up_proj.weight": "model-00051-of-00066.safetensors", "model.layers.52.post_attention_layernorm.weight": "model-00051-of-00066.safetensors", "model.layers.52.self_attn.k_proj.bias": "model-00051-of-00066.safetensors", "model.layers.52.self_attn.k_proj.weight": "model-00051-of-00066.safetensors", "model.layers.52.self_attn.o_proj.weight": "model-00051-of-00066.safetensors", "model.layers.52.self_attn.q_proj.bias": "model-00051-of-00066.safetensors", "model.layers.52.self_attn.q_proj.weight": "model-00051-of-00066.safetensors", "model.layers.52.self_attn.v_proj.bias": "model-00051-of-00066.safetensors", "model.layers.52.self_attn.v_proj.weight": "model-00051-of-00066.safetensors", "model.layers.53.input_layernorm.weight": "model-00051-of-00066.safetensors", "model.layers.53.mlp.down_proj.weight": "model-00052-of-00066.safetensors", "model.layers.53.mlp.gate_proj.weight": "model-00052-of-00066.safetensors", "model.layers.53.mlp.up_proj.weight": "model-00052-of-00066.safetensors", "model.layers.53.post_attention_layernorm.weight": "model-00052-of-00066.safetensors", "model.layers.53.self_attn.k_proj.bias": "model-00052-of-00066.safetensors", "model.layers.53.self_attn.k_proj.weight": "model-00052-of-00066.safetensors", "model.layers.53.self_attn.o_proj.weight": "model-00052-of-00066.safetensors", "model.layers.53.self_attn.q_proj.bias": "model-00052-of-00066.safetensors", "model.layers.53.self_attn.q_proj.weight": "model-00052-of-00066.safetensors", "model.layers.53.self_attn.v_proj.bias": "model-00052-of-00066.safetensors", "model.layers.53.self_attn.v_proj.weight": "model-00052-of-00066.safetensors", "model.layers.54.input_layernorm.weight": "model-00052-of-00066.safetensors", "model.layers.54.mlp.down_proj.weight": "model-00053-of-00066.safetensors", "model.layers.54.mlp.gate_proj.weight": "model-00053-of-00066.safetensors", "model.layers.54.mlp.up_proj.weight": "model-00053-of-00066.safetensors", "model.layers.54.post_attention_layernorm.weight": "model-00053-of-00066.safetensors", "model.layers.54.self_attn.k_proj.bias": "model-00053-of-00066.safetensors", "model.layers.54.self_attn.k_proj.weight": "model-00053-of-00066.safetensors", "model.layers.54.self_attn.o_proj.weight": "model-00053-of-00066.safetensors", "model.layers.54.self_attn.q_proj.bias": "model-00053-of-00066.safetensors", "model.layers.54.self_attn.q_proj.weight": "model-00053-of-00066.safetensors", "model.layers.54.self_attn.v_proj.bias": "model-00053-of-00066.safetensors", "model.layers.54.self_attn.v_proj.weight": "model-00053-of-00066.safetensors", "model.layers.55.input_layernorm.weight": "model-00053-of-00066.safetensors", "model.layers.55.mlp.down_proj.weight": "model-00054-of-00066.safetensors", "model.layers.55.mlp.gate_proj.weight": "model-00054-of-00066.safetensors", "model.layers.55.mlp.up_proj.weight": "model-00054-of-00066.safetensors", "model.layers.55.post_attention_layernorm.weight": "model-00054-of-00066.safetensors", "model.layers.55.self_attn.k_proj.bias": "model-00054-of-00066.safetensors", "model.layers.55.self_attn.k_proj.weight": "model-00054-of-00066.safetensors", "model.layers.55.self_attn.o_proj.weight": "model-00054-of-00066.safetensors", "model.layers.55.self_attn.q_proj.bias": "model-00054-of-00066.safetensors", "model.layers.55.self_attn.q_proj.weight": "model-00054-of-00066.safetensors", "model.layers.55.self_attn.v_proj.bias": "model-00054-of-00066.safetensors", "model.layers.55.self_attn.v_proj.weight": "model-00054-of-00066.safetensors", "model.layers.56.input_layernorm.weight": "model-00054-of-00066.safetensors", "model.layers.56.mlp.down_proj.weight": "model-00055-of-00066.safetensors", "model.layers.56.mlp.gate_proj.weight": "model-00055-of-00066.safetensors", "model.layers.56.mlp.up_proj.weight": "model-00055-of-00066.safetensors", "model.layers.56.post_attention_layernorm.weight": "model-00055-of-00066.safetensors", "model.layers.56.self_attn.k_proj.bias": "model-00055-of-00066.safetensors", "model.layers.56.self_attn.k_proj.weight": "model-00055-of-00066.safetensors", "model.layers.56.self_attn.o_proj.weight": "model-00055-of-00066.safetensors", "model.layers.56.self_attn.q_proj.bias": "model-00055-of-00066.safetensors", "model.layers.56.self_attn.q_proj.weight": "model-00055-of-00066.safetensors", "model.layers.56.self_attn.v_proj.bias": "model-00055-of-00066.safetensors", "model.layers.56.self_attn.v_proj.weight": "model-00055-of-00066.safetensors", "model.layers.57.input_layernorm.weight": "model-00055-of-00066.safetensors", "model.layers.57.mlp.down_proj.weight": "model-00056-of-00066.safetensors", "model.layers.57.mlp.gate_proj.weight": "model-00056-of-00066.safetensors", "model.layers.57.mlp.up_proj.weight": "model-00056-of-00066.safetensors", "model.layers.57.post_attention_layernorm.weight": "model-00056-of-00066.safetensors", "model.layers.57.self_attn.k_proj.bias": "model-00056-of-00066.safetensors", "model.layers.57.self_attn.k_proj.weight": "model-00056-of-00066.safetensors", "model.layers.57.self_attn.o_proj.weight": "model-00056-of-00066.safetensors", "model.layers.57.self_attn.q_proj.bias": "model-00056-of-00066.safetensors", "model.layers.57.self_attn.q_proj.weight": "model-00056-of-00066.safetensors", "model.layers.57.self_attn.v_proj.bias": "model-00056-of-00066.safetensors", "model.layers.57.self_attn.v_proj.weight": "model-00056-of-00066.safetensors", "model.layers.58.input_layernorm.weight": "model-00056-of-00066.safetensors", "model.layers.58.mlp.down_proj.weight": "model-00057-of-00066.safetensors", "model.layers.58.mlp.gate_proj.weight": "model-00057-of-00066.safetensors", "model.layers.58.mlp.up_proj.weight": "model-00057-of-00066.safetensors", "model.layers.58.post_attention_layernorm.weight": "model-00057-of-00066.safetensors", "model.layers.58.self_attn.k_proj.bias": "model-00057-of-00066.safetensors", "model.layers.58.self_attn.k_proj.weight": "model-00057-of-00066.safetensors", "model.layers.58.self_attn.o_proj.weight": "model-00057-of-00066.safetensors", "model.layers.58.self_attn.q_proj.bias": "model-00057-of-00066.safetensors", "model.layers.58.self_attn.q_proj.weight": "model-00057-of-00066.safetensors", "model.layers.58.self_attn.v_proj.bias": "model-00057-of-00066.safetensors", "model.layers.58.self_attn.v_proj.weight": "model-00057-of-00066.safetensors", "model.layers.59.input_layernorm.weight": "model-00057-of-00066.safetensors", "model.layers.59.mlp.down_proj.weight": "model-00058-of-00066.safetensors", "model.layers.59.mlp.gate_proj.weight": "model-00058-of-00066.safetensors", "model.layers.59.mlp.up_proj.weight": "model-00058-of-00066.safetensors", "model.layers.59.post_attention_layernorm.weight": "model-00058-of-00066.safetensors", "model.layers.59.self_attn.k_proj.bias": "model-00058-of-00066.safetensors", "model.layers.59.self_attn.k_proj.weight": "model-00058-of-00066.safetensors", "model.layers.59.self_attn.o_proj.weight": "model-00058-of-00066.safetensors", "model.layers.59.self_attn.q_proj.bias": "model-00058-of-00066.safetensors", "model.layers.59.self_attn.q_proj.weight": "model-00058-of-00066.safetensors", "model.layers.59.self_attn.v_proj.bias": "model-00058-of-00066.safetensors", "model.layers.59.self_attn.v_proj.weight": "model-00058-of-00066.safetensors", "model.layers.6.input_layernorm.weight": "model-00058-of-00066.safetensors", "model.layers.6.mlp.down_proj.weight": "model-00059-of-00066.safetensors", "model.layers.6.mlp.gate_proj.weight": "model-00059-of-00066.safetensors", "model.layers.6.mlp.up_proj.weight": "model-00059-of-00066.safetensors", "model.layers.6.post_attention_layernorm.weight": "model-00059-of-00066.safetensors", "model.layers.6.self_attn.k_proj.bias": "model-00059-of-00066.safetensors", "model.layers.6.self_attn.k_proj.weight": "model-00059-of-00066.safetensors", "model.layers.6.self_attn.o_proj.weight": "model-00059-of-00066.safetensors", "model.layers.6.self_attn.q_proj.bias": "model-00059-of-00066.safetensors", "model.layers.6.self_attn.q_proj.weight": "model-00059-of-00066.safetensors", "model.layers.6.self_attn.v_proj.bias": "model-00059-of-00066.safetensors", "model.layers.6.self_attn.v_proj.weight": "model-00059-of-00066.safetensors", "model.layers.60.input_layernorm.weight": "model-00059-of-00066.safetensors", "model.layers.60.mlp.down_proj.weight": "model-00060-of-00066.safetensors", "model.layers.60.mlp.gate_proj.weight": "model-00060-of-00066.safetensors", "model.layers.60.mlp.up_proj.weight": "model-00060-of-00066.safetensors", "model.layers.60.post_attention_layernorm.weight": "model-00060-of-00066.safetensors", "model.layers.60.self_attn.k_proj.bias": "model-00060-of-00066.safetensors", "model.layers.60.self_attn.k_proj.weight": "model-00060-of-00066.safetensors", "model.layers.60.self_attn.o_proj.weight": "model-00060-of-00066.safetensors", "model.layers.60.self_attn.q_proj.bias": "model-00060-of-00066.safetensors", "model.layers.60.self_attn.q_proj.weight": "model-00060-of-00066.safetensors", "model.layers.60.self_attn.v_proj.bias": "model-00060-of-00066.safetensors", "model.layers.60.self_attn.v_proj.weight": "model-00060-of-00066.safetensors", "model.layers.61.input_layernorm.weight": "model-00060-of-00066.safetensors", "model.layers.61.mlp.down_proj.weight": "model-00061-of-00066.safetensors", "model.layers.61.mlp.gate_proj.weight": "model-00061-of-00066.safetensors", "model.layers.61.mlp.up_proj.weight": "model-00061-of-00066.safetensors", "model.layers.61.post_attention_layernorm.weight": "model-00061-of-00066.safetensors", "model.layers.61.self_attn.k_proj.bias": "model-00061-of-00066.safetensors", "model.layers.61.self_attn.k_proj.weight": "model-00061-of-00066.safetensors", "model.layers.61.self_attn.o_proj.weight": "model-00061-of-00066.safetensors", "model.layers.61.self_attn.q_proj.bias": "model-00061-of-00066.safetensors", "model.layers.61.self_attn.q_proj.weight": "model-00061-of-00066.safetensors", "model.layers.61.self_attn.v_proj.bias": "model-00061-of-00066.safetensors", "model.layers.61.self_attn.v_proj.weight": "model-00061-of-00066.safetensors", "model.layers.62.input_layernorm.weight": "model-00061-of-00066.safetensors", "model.layers.62.mlp.down_proj.weight": "model-00062-of-00066.safetensors", "model.layers.62.mlp.gate_proj.weight": "model-00062-of-00066.safetensors", "model.layers.62.mlp.up_proj.weight": "model-00062-of-00066.safetensors", "model.layers.62.post_attention_layernorm.weight": "model-00062-of-00066.safetensors", "model.layers.62.self_attn.k_proj.bias": "model-00062-of-00066.safetensors", "model.layers.62.self_attn.k_proj.weight": "model-00062-of-00066.safetensors", "model.layers.62.self_attn.o_proj.weight": "model-00062-of-00066.safetensors", "model.layers.62.self_attn.q_proj.bias": "model-00062-of-00066.safetensors", "model.layers.62.self_attn.q_proj.weight": "model-00062-of-00066.safetensors", "model.layers.62.self_attn.v_proj.bias": "model-00062-of-00066.safetensors", "model.layers.62.self_attn.v_proj.weight": "model-00062-of-00066.safetensors", "model.layers.63.input_layernorm.weight": "model-00062-of-00066.safetensors", "model.layers.63.mlp.down_proj.weight": "model-00063-of-00066.safetensors", "model.layers.63.mlp.gate_proj.weight": "model-00063-of-00066.safetensors", "model.layers.63.mlp.up_proj.weight": "model-00063-of-00066.safetensors", "model.layers.63.post_attention_layernorm.weight": "model-00063-of-00066.safetensors", "model.layers.63.self_attn.k_proj.bias": "model-00063-of-00066.safetensors", "model.layers.63.self_attn.k_proj.weight": "model-00063-of-00066.safetensors", "model.layers.63.self_attn.o_proj.weight": "model-00063-of-00066.safetensors", "model.layers.63.self_attn.q_proj.bias": "model-00063-of-00066.safetensors", "model.layers.63.self_attn.q_proj.weight": "model-00063-of-00066.safetensors", "model.layers.63.self_attn.v_proj.bias": "model-00063-of-00066.safetensors", "model.layers.63.self_attn.v_proj.weight": "model-00063-of-00066.safetensors", "model.layers.7.input_layernorm.weight": "model-00063-of-00066.safetensors", "model.layers.7.mlp.down_proj.weight": "model-00064-of-00066.safetensors", "model.layers.7.mlp.gate_proj.weight": "model-00064-of-00066.safetensors", "model.layers.7.mlp.up_proj.weight": "model-00064-of-00066.safetensors", "model.layers.7.post_attention_layernorm.weight": "model-00064-of-00066.safetensors", "model.layers.7.self_attn.k_proj.bias": "model-00064-of-00066.safetensors", "model.layers.7.self_attn.k_proj.weight": "model-00064-of-00066.safetensors", "model.layers.7.self_attn.o_proj.weight": "model-00064-of-00066.safetensors", "model.layers.7.self_attn.q_proj.bias": "model-00064-of-00066.safetensors", "model.layers.7.self_attn.q_proj.weight": "model-00064-of-00066.safetensors", "model.layers.7.self_attn.v_proj.bias": "model-00064-of-00066.safetensors", "model.layers.7.self_attn.v_proj.weight": "model-00064-of-00066.safetensors", "model.layers.8.input_layernorm.weight": "model-00064-of-00066.safetensors", "model.layers.8.mlp.down_proj.weight": "model-00065-of-00066.safetensors", "model.layers.8.mlp.gate_proj.weight": "model-00065-of-00066.safetensors", "model.layers.8.mlp.up_proj.weight": "model-00065-of-00066.safetensors", "model.layers.8.post_attention_layernorm.weight": "model-00065-of-00066.safetensors", "model.layers.8.self_attn.k_proj.bias": "model-00065-of-00066.safetensors", "model.layers.8.self_attn.k_proj.weight": "model-00065-of-00066.safetensors", "model.layers.8.self_attn.o_proj.weight": "model-00065-of-00066.safetensors", "model.layers.8.self_attn.q_proj.bias": "model-00065-of-00066.safetensors", "model.layers.8.self_attn.q_proj.weight": "model-00065-of-00066.safetensors", "model.layers.8.self_attn.v_proj.bias": "model-00065-of-00066.safetensors", "model.layers.8.self_attn.v_proj.weight": "model-00065-of-00066.safetensors", "model.layers.9.input_layernorm.weight": "model-00065-of-00066.safetensors", "model.layers.9.mlp.down_proj.weight": "model-00066-of-00066.safetensors", "model.layers.9.mlp.gate_proj.weight": "model-00066-of-00066.safetensors", "model.layers.9.mlp.up_proj.weight": "model-00066-of-00066.safetensors", "model.layers.9.post_attention_layernorm.weight": "model-00066-of-00066.safetensors", "model.layers.9.self_attn.k_proj.bias": "model-00066-of-00066.safetensors", "model.layers.9.self_attn.k_proj.weight": "model-00066-of-00066.safetensors", "model.layers.9.self_attn.o_proj.weight": "model-00066-of-00066.safetensors", "model.layers.9.self_attn.q_proj.bias": "model-00066-of-00066.safetensors", "model.layers.9.self_attn.q_proj.weight": "model-00066-of-00066.safetensors", "model.layers.9.self_attn.v_proj.bias": "model-00066-of-00066.safetensors", "model.layers.9.self_attn.v_proj.weight": "model-00066-of-00066.safetensors", "model.norm.weight": "model-00066-of-00066.safetensors"}} \ No newline at end of file diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..1d385d62cf08bca35254547902b792c243656ec1 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000000000000000000000000000000000000..1a2db243e47cbc113f6b2ddcc388aeeb8fe1a94c --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e20ddafc659ba90242154b55275402edeca0715e5dbb30f56815a4ce081f4893 +size 11422778 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b068ffca3220a746ba50cc69f850e544217e3a86 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,195 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "add_prefix_space": null, + "added_tokens_decoder": { + "151643": { + "content": "<|end▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151644": { + "content": "<|User|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151645": { + "content": "<|Assistant|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151646": { + "content": "<|begin▁of▁sentence|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151647": { + "content": "<|EOT|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151648": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151649": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151650": { + "content": "<|quad_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151651": { + "content": "<|quad_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151652": { + "content": "<|vision_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151653": { + "content": "<|vision_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151654": { + "content": "<|vision_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151655": { + "content": "<|image_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151656": { + "content": "<|video_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "151657": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151658": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151659": { + "content": "<|fim_prefix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151660": { + "content": "<|fim_middle|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151661": { + "content": "<|fim_suffix|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151662": { + "content": "<|fim_pad|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151663": { + "content": "<|repo_name|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + }, + "151664": { + "content": "<|file_sep|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "bos_token": "<|begin▁of▁sentence|>", + "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='') %}{%- for message in messages %}{%- if message['role'] == 'system' %}{% set ns.system_prompt = message['content'] %}{%- endif %}{%- endfor %}{{bos_token}}{{ns.system_prompt}}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is none %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls']%}{%- if not ns.is_first %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- endfor %}{%- endif %}{%- if message['role'] == 'assistant' and message['content'] is not none %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'\\n<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|end▁of▁sentence|>", + "extra_special_tokens": {}, + "legacy": true, + "model_max_length": 16384, + "pad_token": "<|end▁of▁sentence|>", + "sp_model_kwargs": {}, + "tokenizer_class": "LlamaTokenizer", + "unk_token": null, + "use_default_system_prompt": false +}