LeoChiuu commited on
Commit
f2976aa
·
verified ·
1 Parent(s): a58ee6b

Add new SentenceTransformer model.

Browse files
Files changed (47) hide show
  1. 1_Pooling/config.json +10 -0
  2. README.md +547 -0
  3. added_tokens.json +3 -0
  4. checkpoint-54/1_Pooling/config.json +10 -0
  5. checkpoint-54/README.md +546 -0
  6. checkpoint-54/added_tokens.json +3 -0
  7. checkpoint-54/config.json +33 -0
  8. checkpoint-54/config_sentence_transformers.json +10 -0
  9. checkpoint-54/model.safetensors +3 -0
  10. checkpoint-54/modules.json +14 -0
  11. checkpoint-54/optimizer.pt +3 -0
  12. checkpoint-54/rng_state.pth +3 -0
  13. checkpoint-54/scheduler.pt +3 -0
  14. checkpoint-54/sentence_bert_config.json +4 -0
  15. checkpoint-54/special_tokens_map.json +15 -0
  16. checkpoint-54/spm.model +3 -0
  17. checkpoint-54/tokenizer.json +0 -0
  18. checkpoint-54/tokenizer_config.json +65 -0
  19. checkpoint-54/trainer_state.json +483 -0
  20. checkpoint-54/training_args.bin +3 -0
  21. checkpoint-60/1_Pooling/config.json +10 -0
  22. checkpoint-60/README.md +547 -0
  23. checkpoint-60/added_tokens.json +3 -0
  24. checkpoint-60/config.json +33 -0
  25. checkpoint-60/config_sentence_transformers.json +10 -0
  26. checkpoint-60/model.safetensors +3 -0
  27. checkpoint-60/modules.json +14 -0
  28. checkpoint-60/optimizer.pt +3 -0
  29. checkpoint-60/rng_state.pth +3 -0
  30. checkpoint-60/scheduler.pt +3 -0
  31. checkpoint-60/sentence_bert_config.json +4 -0
  32. checkpoint-60/special_tokens_map.json +15 -0
  33. checkpoint-60/spm.model +3 -0
  34. checkpoint-60/tokenizer.json +0 -0
  35. checkpoint-60/tokenizer_config.json +65 -0
  36. checkpoint-60/trainer_state.json +533 -0
  37. checkpoint-60/training_args.bin +3 -0
  38. config.json +33 -0
  39. config_sentence_transformers.json +10 -0
  40. model.safetensors +3 -0
  41. modules.json +14 -0
  42. runs/Sep17_22-48-14_default/events.out.tfevents.1726613296.default.7605.0 +3 -0
  43. sentence_bert_config.json +4 -0
  44. special_tokens_map.json +15 -0
  45. tokenizer.json +0 -0
  46. tokenizer.model +3 -0
  47. tokenizer_config.json +15 -0
1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
README.md ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: colorfulscoop/sbert-base-ja
3
+ library_name: sentence-transformers
4
+ metrics:
5
+ - cosine_accuracy
6
+ - cosine_accuracy_threshold
7
+ - cosine_f1
8
+ - cosine_f1_threshold
9
+ - cosine_precision
10
+ - cosine_recall
11
+ - cosine_ap
12
+ - dot_accuracy
13
+ - dot_accuracy_threshold
14
+ - dot_f1
15
+ - dot_f1_threshold
16
+ - dot_precision
17
+ - dot_recall
18
+ - dot_ap
19
+ - manhattan_accuracy
20
+ - manhattan_accuracy_threshold
21
+ - manhattan_f1
22
+ - manhattan_f1_threshold
23
+ - manhattan_precision
24
+ - manhattan_recall
25
+ - manhattan_ap
26
+ - euclidean_accuracy
27
+ - euclidean_accuracy_threshold
28
+ - euclidean_f1
29
+ - euclidean_f1_threshold
30
+ - euclidean_precision
31
+ - euclidean_recall
32
+ - euclidean_ap
33
+ - max_accuracy
34
+ - max_accuracy_threshold
35
+ - max_f1
36
+ - max_f1_threshold
37
+ - max_precision
38
+ - max_recall
39
+ - max_ap
40
+ pipeline_tag: sentence-similarity
41
+ tags:
42
+ - sentence-transformers
43
+ - sentence-similarity
44
+ - feature-extraction
45
+ - generated_from_trainer
46
+ - dataset_size:53
47
+ - loss:CosineSimilarityLoss
48
+ model-index:
49
+ - name: SentenceTransformer based on colorfulscoop/sbert-base-ja
50
+ results:
51
+ - task:
52
+ type: binary-classification
53
+ name: Binary Classification
54
+ dataset:
55
+ name: custom arc semantics data jp
56
+ type: custom-arc-semantics-data-jp
57
+ metrics:
58
+ - type: cosine_accuracy
59
+ value: 0.6666666666666666
60
+ name: Cosine Accuracy
61
+ - type: cosine_accuracy_threshold
62
+ value: 0.4631122350692749
63
+ name: Cosine Accuracy Threshold
64
+ - type: cosine_f1
65
+ value: 0.8000000000000002
66
+ name: Cosine F1
67
+ - type: cosine_f1_threshold
68
+ value: 0.4631122350692749
69
+ name: Cosine F1 Threshold
70
+ - type: cosine_precision
71
+ value: 0.8
72
+ name: Cosine Precision
73
+ - type: cosine_recall
74
+ value: 0.8
75
+ name: Cosine Recall
76
+ - type: cosine_ap
77
+ value: 0.8766666666666667
78
+ name: Cosine Ap
79
+ - type: dot_accuracy
80
+ value: 0.6666666666666666
81
+ name: Dot Accuracy
82
+ - type: dot_accuracy_threshold
83
+ value: 248.13394165039062
84
+ name: Dot Accuracy Threshold
85
+ - type: dot_f1
86
+ value: 0.8000000000000002
87
+ name: Dot F1
88
+ - type: dot_f1_threshold
89
+ value: 248.13394165039062
90
+ name: Dot F1 Threshold
91
+ - type: dot_precision
92
+ value: 0.8
93
+ name: Dot Precision
94
+ - type: dot_recall
95
+ value: 0.8
96
+ name: Dot Recall
97
+ - type: dot_ap
98
+ value: 0.8766666666666667
99
+ name: Dot Ap
100
+ - type: manhattan_accuracy
101
+ value: 0.6666666666666666
102
+ name: Manhattan Accuracy
103
+ - type: manhattan_accuracy_threshold
104
+ value: 524.65185546875
105
+ name: Manhattan Accuracy Threshold
106
+ - type: manhattan_f1
107
+ value: 0.8000000000000002
108
+ name: Manhattan F1
109
+ - type: manhattan_f1_threshold
110
+ value: 524.65185546875
111
+ name: Manhattan F1 Threshold
112
+ - type: manhattan_precision
113
+ value: 0.8
114
+ name: Manhattan Precision
115
+ - type: manhattan_recall
116
+ value: 0.8
117
+ name: Manhattan Recall
118
+ - type: manhattan_ap
119
+ value: 0.8766666666666667
120
+ name: Manhattan Ap
121
+ - type: euclidean_accuracy
122
+ value: 0.6666666666666666
123
+ name: Euclidean Accuracy
124
+ - type: euclidean_accuracy_threshold
125
+ value: 23.945947647094727
126
+ name: Euclidean Accuracy Threshold
127
+ - type: euclidean_f1
128
+ value: 0.8000000000000002
129
+ name: Euclidean F1
130
+ - type: euclidean_f1_threshold
131
+ value: 23.945947647094727
132
+ name: Euclidean F1 Threshold
133
+ - type: euclidean_precision
134
+ value: 0.8
135
+ name: Euclidean Precision
136
+ - type: euclidean_recall
137
+ value: 0.8
138
+ name: Euclidean Recall
139
+ - type: euclidean_ap
140
+ value: 0.8766666666666667
141
+ name: Euclidean Ap
142
+ - type: max_accuracy
143
+ value: 0.6666666666666666
144
+ name: Max Accuracy
145
+ - type: max_accuracy_threshold
146
+ value: 524.65185546875
147
+ name: Max Accuracy Threshold
148
+ - type: max_f1
149
+ value: 0.8000000000000002
150
+ name: Max F1
151
+ - type: max_f1_threshold
152
+ value: 524.65185546875
153
+ name: Max F1 Threshold
154
+ - type: max_precision
155
+ value: 0.8
156
+ name: Max Precision
157
+ - type: max_recall
158
+ value: 0.8
159
+ name: Max Recall
160
+ - type: max_ap
161
+ value: 0.8766666666666667
162
+ name: Max Ap
163
+ ---
164
+
165
+ # SentenceTransformer based on colorfulscoop/sbert-base-ja
166
+
167
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [colorfulscoop/sbert-base-ja](https://huggingface.co/colorfulscoop/sbert-base-ja) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
168
+
169
+ ## Model Details
170
+
171
+ ### Model Description
172
+ - **Model Type:** Sentence Transformer
173
+ - **Base model:** [colorfulscoop/sbert-base-ja](https://huggingface.co/colorfulscoop/sbert-base-ja) <!-- at revision ecb8a98cd5176719ff7ab0d770a27420118732cf -->
174
+ - **Maximum Sequence Length:** 512 tokens
175
+ - **Output Dimensionality:** 768 tokens
176
+ - **Similarity Function:** Cosine Similarity
177
+ - **Training Dataset:**
178
+ - csv
179
+ <!-- - **Language:** Unknown -->
180
+ <!-- - **License:** Unknown -->
181
+
182
+ ### Model Sources
183
+
184
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
185
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
186
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
187
+
188
+ ### Full Model Architecture
189
+
190
+ ```
191
+ SentenceTransformer(
192
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
193
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
194
+ )
195
+ ```
196
+
197
+ ## Usage
198
+
199
+ ### Direct Usage (Sentence Transformers)
200
+
201
+ First install the Sentence Transformers library:
202
+
203
+ ```bash
204
+ pip install -U sentence-transformers
205
+ ```
206
+
207
+ Then you can load this model and run inference.
208
+ ```python
209
+ from sentence_transformers import SentenceTransformer
210
+
211
+ # Download from the 🤗 Hub
212
+ model = SentenceTransformer("sentence_transformers_model_id")
213
+ # Run inference
214
+ sentences = [
215
+ 'The weather is lovely today.',
216
+ "It's so sunny outside!",
217
+ 'He drove to the stadium.',
218
+ ]
219
+ embeddings = model.encode(sentences)
220
+ print(embeddings.shape)
221
+ # [3, 768]
222
+
223
+ # Get the similarity scores for the embeddings
224
+ similarities = model.similarity(embeddings, embeddings)
225
+ print(similarities.shape)
226
+ # [3, 3]
227
+ ```
228
+
229
+ <!--
230
+ ### Direct Usage (Transformers)
231
+
232
+ <details><summary>Click to see the direct usage in Transformers</summary>
233
+
234
+ </details>
235
+ -->
236
+
237
+ <!--
238
+ ### Downstream Usage (Sentence Transformers)
239
+
240
+ You can finetune this model on your own dataset.
241
+
242
+ <details><summary>Click to expand</summary>
243
+
244
+ </details>
245
+ -->
246
+
247
+ <!--
248
+ ### Out-of-Scope Use
249
+
250
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
251
+ -->
252
+
253
+ ## Evaluation
254
+
255
+ ### Metrics
256
+
257
+ #### Binary Classification
258
+ * Dataset: `custom-arc-semantics-data-jp`
259
+ * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
260
+
261
+ | Metric | Value |
262
+ |:-----------------------------|:-----------|
263
+ | cosine_accuracy | 0.6667 |
264
+ | cosine_accuracy_threshold | 0.4631 |
265
+ | cosine_f1 | 0.8 |
266
+ | cosine_f1_threshold | 0.4631 |
267
+ | cosine_precision | 0.8 |
268
+ | cosine_recall | 0.8 |
269
+ | cosine_ap | 0.8767 |
270
+ | dot_accuracy | 0.6667 |
271
+ | dot_accuracy_threshold | 248.1339 |
272
+ | dot_f1 | 0.8 |
273
+ | dot_f1_threshold | 248.1339 |
274
+ | dot_precision | 0.8 |
275
+ | dot_recall | 0.8 |
276
+ | dot_ap | 0.8767 |
277
+ | manhattan_accuracy | 0.6667 |
278
+ | manhattan_accuracy_threshold | 524.6519 |
279
+ | manhattan_f1 | 0.8 |
280
+ | manhattan_f1_threshold | 524.6519 |
281
+ | manhattan_precision | 0.8 |
282
+ | manhattan_recall | 0.8 |
283
+ | manhattan_ap | 0.8767 |
284
+ | euclidean_accuracy | 0.6667 |
285
+ | euclidean_accuracy_threshold | 23.9459 |
286
+ | euclidean_f1 | 0.8 |
287
+ | euclidean_f1_threshold | 23.9459 |
288
+ | euclidean_precision | 0.8 |
289
+ | euclidean_recall | 0.8 |
290
+ | euclidean_ap | 0.8767 |
291
+ | max_accuracy | 0.6667 |
292
+ | max_accuracy_threshold | 524.6519 |
293
+ | max_f1 | 0.8 |
294
+ | max_f1_threshold | 524.6519 |
295
+ | max_precision | 0.8 |
296
+ | max_recall | 0.8 |
297
+ | **max_ap** | **0.8767** |
298
+
299
+ <!--
300
+ ## Bias, Risks and Limitations
301
+
302
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
303
+ -->
304
+
305
+ <!--
306
+ ### Recommendations
307
+
308
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
309
+ -->
310
+
311
+ ## Training Details
312
+
313
+ ### Training Dataset
314
+
315
+ #### csv
316
+
317
+ * Dataset: csv
318
+ * Size: 53 training samples
319
+ * Columns: <code>text1</code>, <code>text2</code>, and <code>label</code>
320
+ * Approximate statistics based on the first 53 samples:
321
+ | | text1 | text2 | label |
322
+ |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------|
323
+ | type | string | string | int |
324
+ | details | <ul><li>min: 14 tokens</li><li>mean: 35.94 tokens</li><li>max: 84 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 21.72 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>0: ~38.30%</li><li>1: ~61.70%</li></ul> |
325
+ * Samples:
326
+ | text1 | text2 | label |
327
+ |:-----------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------|:---------------|
328
+ | <code>茶色 の ドレス を 着た 若い 女の子 と サンダル が 黒い 帽子 、 タンクトップ 、 青い カーゴ ショーツ を 着た 若い 男の子 を 、 同じ ボール に 向かって 銀 の ボール を 投げ つける ように 笑い ます 。</code> | <code>人々 は ハンバーガー を 待って い ます 。</code> | <code>1</code> |
329
+ | <code>水 の 近く の ドック に 2 人 が 座って い ます 。</code> | <code>岩 の 上 に 座って いる 二 人</code> | <code>0</code> |
330
+ | <code>小さな 女の子 が 草 を 横切って 木 に 向かって 走り ます 。</code> | <code>女の子 は 、 かつて 木 が 立って いた 裏庭 を 見 ながら 中 に い ました 。</code> | <code>1</code> |
331
+ * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
332
+ ```json
333
+ {
334
+ "loss_fct": "torch.nn.modules.loss.MSELoss"
335
+ }
336
+ ```
337
+
338
+ ### Evaluation Dataset
339
+
340
+ #### csv
341
+
342
+ * Dataset: csv
343
+ * Size: 53 evaluation samples
344
+ * Columns: <code>text1</code>, <code>text2</code>, and <code>label</code>
345
+ * Approximate statistics based on the first 53 samples:
346
+ | | text1 | text2 | label |
347
+ |:--------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
348
+ | type | string | string | int |
349
+ | details | <ul><li>min: 19 tokens</li><li>mean: 38.67 tokens</li><li>max: 61 tokens</li></ul> | <ul><li>min: 20 tokens</li><li>mean: 25.5 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>0: ~16.67%</li><li>1: ~83.33%</li></ul> |
350
+ * Samples:
351
+ | text1 | text2 | label |
352
+ |:----------------------------------------------------------------------------------------------------------|:------------------------------------------------|:---------------|
353
+ | <code>岩 の 多い 景色 を 見て 二 人</code> | <code>何 か を 見て いる 二 人 が い ます 。</code> | <code>0</code> |
354
+ | <code>白い ヘルメット と オレンジ色 の シャツ 、 ジーンズ 、 白い トラック と オレンジ色 の パイロン の 前 に 反射 ジャケット を 着た 金髪 の ストリート ワーカー 。</code> | <code>ストリート ワーカー は 保護 具 を 着用 して い ませ ん 。</code> | <code>1</code> |
355
+ | <code>白い 帽子 を かぶった 女性 が 、 鮮やかな 色 の 岩 の 風景 を 描いて い ます 。 岩 層 自体 が 背景 に 見え ます 。</code> | <code>誰 か が 肖像 画 を 描いて い ます 。</code> | <code>1</code> |
356
+ * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
357
+ ```json
358
+ {
359
+ "loss_fct": "torch.nn.modules.loss.MSELoss"
360
+ }
361
+ ```
362
+
363
+ ### Training Hyperparameters
364
+ #### Non-Default Hyperparameters
365
+
366
+ - `eval_strategy`: epoch
367
+ - `learning_rate`: 2e-05
368
+ - `num_train_epochs`: 10
369
+ - `warmup_ratio`: 0.4
370
+ - `fp16`: True
371
+ - `batch_sampler`: no_duplicates
372
+
373
+ #### All Hyperparameters
374
+ <details><summary>Click to expand</summary>
375
+
376
+ - `overwrite_output_dir`: False
377
+ - `do_predict`: False
378
+ - `eval_strategy`: epoch
379
+ - `prediction_loss_only`: True
380
+ - `per_device_train_batch_size`: 8
381
+ - `per_device_eval_batch_size`: 8
382
+ - `per_gpu_train_batch_size`: None
383
+ - `per_gpu_eval_batch_size`: None
384
+ - `gradient_accumulation_steps`: 1
385
+ - `eval_accumulation_steps`: None
386
+ - `torch_empty_cache_steps`: None
387
+ - `learning_rate`: 2e-05
388
+ - `weight_decay`: 0.0
389
+ - `adam_beta1`: 0.9
390
+ - `adam_beta2`: 0.999
391
+ - `adam_epsilon`: 1e-08
392
+ - `max_grad_norm`: 1.0
393
+ - `num_train_epochs`: 10
394
+ - `max_steps`: -1
395
+ - `lr_scheduler_type`: linear
396
+ - `lr_scheduler_kwargs`: {}
397
+ - `warmup_ratio`: 0.4
398
+ - `warmup_steps`: 0
399
+ - `log_level`: passive
400
+ - `log_level_replica`: warning
401
+ - `log_on_each_node`: True
402
+ - `logging_nan_inf_filter`: True
403
+ - `save_safetensors`: True
404
+ - `save_on_each_node`: False
405
+ - `save_only_model`: False
406
+ - `restore_callback_states_from_checkpoint`: False
407
+ - `no_cuda`: False
408
+ - `use_cpu`: False
409
+ - `use_mps_device`: False
410
+ - `seed`: 42
411
+ - `data_seed`: None
412
+ - `jit_mode_eval`: False
413
+ - `use_ipex`: False
414
+ - `bf16`: False
415
+ - `fp16`: True
416
+ - `fp16_opt_level`: O1
417
+ - `half_precision_backend`: auto
418
+ - `bf16_full_eval`: False
419
+ - `fp16_full_eval`: False
420
+ - `tf32`: None
421
+ - `local_rank`: 0
422
+ - `ddp_backend`: None
423
+ - `tpu_num_cores`: None
424
+ - `tpu_metrics_debug`: False
425
+ - `debug`: []
426
+ - `dataloader_drop_last`: False
427
+ - `dataloader_num_workers`: 0
428
+ - `dataloader_prefetch_factor`: None
429
+ - `past_index`: -1
430
+ - `disable_tqdm`: False
431
+ - `remove_unused_columns`: True
432
+ - `label_names`: None
433
+ - `load_best_model_at_end`: False
434
+ - `ignore_data_skip`: False
435
+ - `fsdp`: []
436
+ - `fsdp_min_num_params`: 0
437
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
438
+ - `fsdp_transformer_layer_cls_to_wrap`: None
439
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
440
+ - `deepspeed`: None
441
+ - `label_smoothing_factor`: 0.0
442
+ - `optim`: adamw_torch
443
+ - `optim_args`: None
444
+ - `adafactor`: False
445
+ - `group_by_length`: False
446
+ - `length_column_name`: length
447
+ - `ddp_find_unused_parameters`: None
448
+ - `ddp_bucket_cap_mb`: None
449
+ - `ddp_broadcast_buffers`: False
450
+ - `dataloader_pin_memory`: True
451
+ - `dataloader_persistent_workers`: False
452
+ - `skip_memory_metrics`: True
453
+ - `use_legacy_prediction_loop`: False
454
+ - `push_to_hub`: False
455
+ - `resume_from_checkpoint`: None
456
+ - `hub_model_id`: None
457
+ - `hub_strategy`: every_save
458
+ - `hub_private_repo`: False
459
+ - `hub_always_push`: False
460
+ - `gradient_checkpointing`: False
461
+ - `gradient_checkpointing_kwargs`: None
462
+ - `include_inputs_for_metrics`: False
463
+ - `eval_do_concat_batches`: True
464
+ - `fp16_backend`: auto
465
+ - `push_to_hub_model_id`: None
466
+ - `push_to_hub_organization`: None
467
+ - `mp_parameters`:
468
+ - `auto_find_batch_size`: False
469
+ - `full_determinism`: False
470
+ - `torchdynamo`: None
471
+ - `ray_scope`: last
472
+ - `ddp_timeout`: 1800
473
+ - `torch_compile`: False
474
+ - `torch_compile_backend`: None
475
+ - `torch_compile_mode`: None
476
+ - `dispatch_batches`: None
477
+ - `split_batches`: None
478
+ - `include_tokens_per_second`: False
479
+ - `include_num_input_tokens_seen`: False
480
+ - `neftune_noise_alpha`: None
481
+ - `optim_target_modules`: None
482
+ - `batch_eval_metrics`: False
483
+ - `eval_on_start`: False
484
+ - `eval_use_gather_object`: False
485
+ - `batch_sampler`: no_duplicates
486
+ - `multi_dataset_batch_sampler`: proportional
487
+
488
+ </details>
489
+
490
+ ### Training Logs
491
+ | Epoch | Step | Training Loss | loss | custom-arc-semantics-data-jp_max_ap |
492
+ |:-----:|:----:|:-------------:|:------:|:-----------------------------------:|
493
+ | 1.0 | 6 | 0.3183 | 0.1717 | 0.8767 |
494
+ | 2.0 | 12 | 0.3026 | 0.1703 | 0.8767 |
495
+ | 3.0 | 18 | 0.2667 | 0.1662 | 0.8767 |
496
+ | 4.0 | 24 | 0.2164 | 0.1595 | 0.9267 |
497
+ | 5.0 | 30 | 0.1779 | 0.1680 | 0.9267 |
498
+ | 6.0 | 36 | 0.1271 | 0.1939 | 0.8767 |
499
+ | 7.0 | 42 | 0.1018 | 0.2169 | 0.8767 |
500
+ | 8.0 | 48 | 0.0824 | 0.2246 | 0.8767 |
501
+ | 9.0 | 54 | 0.0732 | 0.2209 | 0.8767 |
502
+ | 10.0 | 60 | 0.0672 | 0.2187 | 0.8767 |
503
+
504
+
505
+ ### Framework Versions
506
+ - Python: 3.10.14
507
+ - Sentence Transformers: 3.1.0
508
+ - Transformers: 4.44.2
509
+ - PyTorch: 2.4.1+cu121
510
+ - Accelerate: 0.34.2
511
+ - Datasets: 2.20.0
512
+ - Tokenizers: 0.19.1
513
+
514
+ ## Citation
515
+
516
+ ### BibTeX
517
+
518
+ #### Sentence Transformers
519
+ ```bibtex
520
+ @inproceedings{reimers-2019-sentence-bert,
521
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
522
+ author = "Reimers, Nils and Gurevych, Iryna",
523
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
524
+ month = "11",
525
+ year = "2019",
526
+ publisher = "Association for Computational Linguistics",
527
+ url = "https://arxiv.org/abs/1908.10084",
528
+ }
529
+ ```
530
+
531
+ <!--
532
+ ## Glossary
533
+
534
+ *Clearly define terms in order to be accessible across audiences.*
535
+ -->
536
+
537
+ <!--
538
+ ## Model Card Authors
539
+
540
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
541
+ -->
542
+
543
+ <!--
544
+ ## Model Card Contact
545
+
546
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
547
+ -->
added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 32000
3
+ }
checkpoint-54/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-54/README.md ADDED
@@ -0,0 +1,546 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: colorfulscoop/sbert-base-ja
3
+ library_name: sentence-transformers
4
+ metrics:
5
+ - cosine_accuracy
6
+ - cosine_accuracy_threshold
7
+ - cosine_f1
8
+ - cosine_f1_threshold
9
+ - cosine_precision
10
+ - cosine_recall
11
+ - cosine_ap
12
+ - dot_accuracy
13
+ - dot_accuracy_threshold
14
+ - dot_f1
15
+ - dot_f1_threshold
16
+ - dot_precision
17
+ - dot_recall
18
+ - dot_ap
19
+ - manhattan_accuracy
20
+ - manhattan_accuracy_threshold
21
+ - manhattan_f1
22
+ - manhattan_f1_threshold
23
+ - manhattan_precision
24
+ - manhattan_recall
25
+ - manhattan_ap
26
+ - euclidean_accuracy
27
+ - euclidean_accuracy_threshold
28
+ - euclidean_f1
29
+ - euclidean_f1_threshold
30
+ - euclidean_precision
31
+ - euclidean_recall
32
+ - euclidean_ap
33
+ - max_accuracy
34
+ - max_accuracy_threshold
35
+ - max_f1
36
+ - max_f1_threshold
37
+ - max_precision
38
+ - max_recall
39
+ - max_ap
40
+ pipeline_tag: sentence-similarity
41
+ tags:
42
+ - sentence-transformers
43
+ - sentence-similarity
44
+ - feature-extraction
45
+ - generated_from_trainer
46
+ - dataset_size:53
47
+ - loss:CosineSimilarityLoss
48
+ model-index:
49
+ - name: SentenceTransformer based on colorfulscoop/sbert-base-ja
50
+ results:
51
+ - task:
52
+ type: binary-classification
53
+ name: Binary Classification
54
+ dataset:
55
+ name: custom arc semantics data jp
56
+ type: custom-arc-semantics-data-jp
57
+ metrics:
58
+ - type: cosine_accuracy
59
+ value: 0.6666666666666666
60
+ name: Cosine Accuracy
61
+ - type: cosine_accuracy_threshold
62
+ value: 0.45798632502555847
63
+ name: Cosine Accuracy Threshold
64
+ - type: cosine_f1
65
+ value: 0.8000000000000002
66
+ name: Cosine F1
67
+ - type: cosine_f1_threshold
68
+ value: 0.45798632502555847
69
+ name: Cosine F1 Threshold
70
+ - type: cosine_precision
71
+ value: 0.8
72
+ name: Cosine Precision
73
+ - type: cosine_recall
74
+ value: 0.8
75
+ name: Cosine Recall
76
+ - type: cosine_ap
77
+ value: 0.8766666666666667
78
+ name: Cosine Ap
79
+ - type: dot_accuracy
80
+ value: 0.6666666666666666
81
+ name: Dot Accuracy
82
+ - type: dot_accuracy_threshold
83
+ value: 245.57119750976562
84
+ name: Dot Accuracy Threshold
85
+ - type: dot_f1
86
+ value: 0.8000000000000002
87
+ name: Dot F1
88
+ - type: dot_f1_threshold
89
+ value: 245.57119750976562
90
+ name: Dot F1 Threshold
91
+ - type: dot_precision
92
+ value: 0.8
93
+ name: Dot Precision
94
+ - type: dot_recall
95
+ value: 0.8
96
+ name: Dot Recall
97
+ - type: dot_ap
98
+ value: 0.8766666666666667
99
+ name: Dot Ap
100
+ - type: manhattan_accuracy
101
+ value: 0.6666666666666666
102
+ name: Manhattan Accuracy
103
+ - type: manhattan_accuracy_threshold
104
+ value: 527.4176025390625
105
+ name: Manhattan Accuracy Threshold
106
+ - type: manhattan_f1
107
+ value: 0.8000000000000002
108
+ name: Manhattan F1
109
+ - type: manhattan_f1_threshold
110
+ value: 527.4176025390625
111
+ name: Manhattan F1 Threshold
112
+ - type: manhattan_precision
113
+ value: 0.8
114
+ name: Manhattan Precision
115
+ - type: manhattan_recall
116
+ value: 0.8
117
+ name: Manhattan Recall
118
+ - type: manhattan_ap
119
+ value: 0.8766666666666667
120
+ name: Manhattan Ap
121
+ - type: euclidean_accuracy
122
+ value: 0.6666666666666666
123
+ name: Euclidean Accuracy
124
+ - type: euclidean_accuracy_threshold
125
+ value: 24.071979522705078
126
+ name: Euclidean Accuracy Threshold
127
+ - type: euclidean_f1
128
+ value: 0.8000000000000002
129
+ name: Euclidean F1
130
+ - type: euclidean_f1_threshold
131
+ value: 24.071979522705078
132
+ name: Euclidean F1 Threshold
133
+ - type: euclidean_precision
134
+ value: 0.8
135
+ name: Euclidean Precision
136
+ - type: euclidean_recall
137
+ value: 0.8
138
+ name: Euclidean Recall
139
+ - type: euclidean_ap
140
+ value: 0.8766666666666667
141
+ name: Euclidean Ap
142
+ - type: max_accuracy
143
+ value: 0.6666666666666666
144
+ name: Max Accuracy
145
+ - type: max_accuracy_threshold
146
+ value: 527.4176025390625
147
+ name: Max Accuracy Threshold
148
+ - type: max_f1
149
+ value: 0.8000000000000002
150
+ name: Max F1
151
+ - type: max_f1_threshold
152
+ value: 527.4176025390625
153
+ name: Max F1 Threshold
154
+ - type: max_precision
155
+ value: 0.8
156
+ name: Max Precision
157
+ - type: max_recall
158
+ value: 0.8
159
+ name: Max Recall
160
+ - type: max_ap
161
+ value: 0.8766666666666667
162
+ name: Max Ap
163
+ ---
164
+
165
+ # SentenceTransformer based on colorfulscoop/sbert-base-ja
166
+
167
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [colorfulscoop/sbert-base-ja](https://huggingface.co/colorfulscoop/sbert-base-ja) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
168
+
169
+ ## Model Details
170
+
171
+ ### Model Description
172
+ - **Model Type:** Sentence Transformer
173
+ - **Base model:** [colorfulscoop/sbert-base-ja](https://huggingface.co/colorfulscoop/sbert-base-ja) <!-- at revision ecb8a98cd5176719ff7ab0d770a27420118732cf -->
174
+ - **Maximum Sequence Length:** 512 tokens
175
+ - **Output Dimensionality:** 768 tokens
176
+ - **Similarity Function:** Cosine Similarity
177
+ - **Training Dataset:**
178
+ - csv
179
+ <!-- - **Language:** Unknown -->
180
+ <!-- - **License:** Unknown -->
181
+
182
+ ### Model Sources
183
+
184
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
185
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
186
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
187
+
188
+ ### Full Model Architecture
189
+
190
+ ```
191
+ SentenceTransformer(
192
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
193
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
194
+ )
195
+ ```
196
+
197
+ ## Usage
198
+
199
+ ### Direct Usage (Sentence Transformers)
200
+
201
+ First install the Sentence Transformers library:
202
+
203
+ ```bash
204
+ pip install -U sentence-transformers
205
+ ```
206
+
207
+ Then you can load this model and run inference.
208
+ ```python
209
+ from sentence_transformers import SentenceTransformer
210
+
211
+ # Download from the 🤗 Hub
212
+ model = SentenceTransformer("sentence_transformers_model_id")
213
+ # Run inference
214
+ sentences = [
215
+ 'The weather is lovely today.',
216
+ "It's so sunny outside!",
217
+ 'He drove to the stadium.',
218
+ ]
219
+ embeddings = model.encode(sentences)
220
+ print(embeddings.shape)
221
+ # [3, 768]
222
+
223
+ # Get the similarity scores for the embeddings
224
+ similarities = model.similarity(embeddings, embeddings)
225
+ print(similarities.shape)
226
+ # [3, 3]
227
+ ```
228
+
229
+ <!--
230
+ ### Direct Usage (Transformers)
231
+
232
+ <details><summary>Click to see the direct usage in Transformers</summary>
233
+
234
+ </details>
235
+ -->
236
+
237
+ <!--
238
+ ### Downstream Usage (Sentence Transformers)
239
+
240
+ You can finetune this model on your own dataset.
241
+
242
+ <details><summary>Click to expand</summary>
243
+
244
+ </details>
245
+ -->
246
+
247
+ <!--
248
+ ### Out-of-Scope Use
249
+
250
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
251
+ -->
252
+
253
+ ## Evaluation
254
+
255
+ ### Metrics
256
+
257
+ #### Binary Classification
258
+ * Dataset: `custom-arc-semantics-data-jp`
259
+ * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
260
+
261
+ | Metric | Value |
262
+ |:-----------------------------|:-----------|
263
+ | cosine_accuracy | 0.6667 |
264
+ | cosine_accuracy_threshold | 0.458 |
265
+ | cosine_f1 | 0.8 |
266
+ | cosine_f1_threshold | 0.458 |
267
+ | cosine_precision | 0.8 |
268
+ | cosine_recall | 0.8 |
269
+ | cosine_ap | 0.8767 |
270
+ | dot_accuracy | 0.6667 |
271
+ | dot_accuracy_threshold | 245.5712 |
272
+ | dot_f1 | 0.8 |
273
+ | dot_f1_threshold | 245.5712 |
274
+ | dot_precision | 0.8 |
275
+ | dot_recall | 0.8 |
276
+ | dot_ap | 0.8767 |
277
+ | manhattan_accuracy | 0.6667 |
278
+ | manhattan_accuracy_threshold | 527.4176 |
279
+ | manhattan_f1 | 0.8 |
280
+ | manhattan_f1_threshold | 527.4176 |
281
+ | manhattan_precision | 0.8 |
282
+ | manhattan_recall | 0.8 |
283
+ | manhattan_ap | 0.8767 |
284
+ | euclidean_accuracy | 0.6667 |
285
+ | euclidean_accuracy_threshold | 24.072 |
286
+ | euclidean_f1 | 0.8 |
287
+ | euclidean_f1_threshold | 24.072 |
288
+ | euclidean_precision | 0.8 |
289
+ | euclidean_recall | 0.8 |
290
+ | euclidean_ap | 0.8767 |
291
+ | max_accuracy | 0.6667 |
292
+ | max_accuracy_threshold | 527.4176 |
293
+ | max_f1 | 0.8 |
294
+ | max_f1_threshold | 527.4176 |
295
+ | max_precision | 0.8 |
296
+ | max_recall | 0.8 |
297
+ | **max_ap** | **0.8767** |
298
+
299
+ <!--
300
+ ## Bias, Risks and Limitations
301
+
302
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
303
+ -->
304
+
305
+ <!--
306
+ ### Recommendations
307
+
308
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
309
+ -->
310
+
311
+ ## Training Details
312
+
313
+ ### Training Dataset
314
+
315
+ #### csv
316
+
317
+ * Dataset: csv
318
+ * Size: 53 training samples
319
+ * Columns: <code>text1</code>, <code>text2</code>, and <code>label</code>
320
+ * Approximate statistics based on the first 53 samples:
321
+ | | text1 | text2 | label |
322
+ |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------|
323
+ | type | string | string | int |
324
+ | details | <ul><li>min: 14 tokens</li><li>mean: 35.94 tokens</li><li>max: 84 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 21.72 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>0: ~38.30%</li><li>1: ~61.70%</li></ul> |
325
+ * Samples:
326
+ | text1 | text2 | label |
327
+ |:-----------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------|:---------------|
328
+ | <code>茶色 の ドレス を 着た 若い 女の子 と サンダル が 黒い 帽子 、 タンクトップ 、 青い カーゴ ショーツ を 着た 若い 男の子 を 、 同じ ボール に 向かって 銀 の ボール を 投げ つける ように 笑い ます 。</code> | <code>人々 は ハンバーガー を 待って い ます 。</code> | <code>1</code> |
329
+ | <code>水 の 近く の ドック に 2 人 が 座って い ます 。</code> | <code>岩 の 上 に 座って いる 二 人</code> | <code>0</code> |
330
+ | <code>小さな 女の子 が 草 を 横切って 木 に 向かって 走り ます 。</code> | <code>女の子 は 、 かつて 木 が 立って いた 裏庭 を 見 ながら 中 に い ました 。</code> | <code>1</code> |
331
+ * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
332
+ ```json
333
+ {
334
+ "loss_fct": "torch.nn.modules.loss.MSELoss"
335
+ }
336
+ ```
337
+
338
+ ### Evaluation Dataset
339
+
340
+ #### csv
341
+
342
+ * Dataset: csv
343
+ * Size: 53 evaluation samples
344
+ * Columns: <code>text1</code>, <code>text2</code>, and <code>label</code>
345
+ * Approximate statistics based on the first 53 samples:
346
+ | | text1 | text2 | label |
347
+ |:--------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
348
+ | type | string | string | int |
349
+ | details | <ul><li>min: 19 tokens</li><li>mean: 38.67 tokens</li><li>max: 61 tokens</li></ul> | <ul><li>min: 20 tokens</li><li>mean: 25.5 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>0: ~16.67%</li><li>1: ~83.33%</li></ul> |
350
+ * Samples:
351
+ | text1 | text2 | label |
352
+ |:----------------------------------------------------------------------------------------------------------|:------------------------------------------------|:---------------|
353
+ | <code>岩 の 多い 景色 を 見て 二 人</code> | <code>何 か を 見て いる 二 人 が い ます 。</code> | <code>0</code> |
354
+ | <code>白い ヘルメット と オレンジ色 の シャツ 、 ジーンズ 、 白い トラック と オレンジ色 の パイロン の 前 に 反射 ジャケット を 着た 金髪 の ストリート ワーカー 。</code> | <code>ストリート ワーカー は 保護 具 を 着用 して い ませ ん 。</code> | <code>1</code> |
355
+ | <code>白い 帽子 を かぶった 女性 が 、 鮮やかな 色 の 岩 の 風景 を 描いて い ます 。 岩 層 自体 が 背景 に 見え ます 。</code> | <code>誰 か が 肖像 画 を 描いて い ます 。</code> | <code>1</code> |
356
+ * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
357
+ ```json
358
+ {
359
+ "loss_fct": "torch.nn.modules.loss.MSELoss"
360
+ }
361
+ ```
362
+
363
+ ### Training Hyperparameters
364
+ #### Non-Default Hyperparameters
365
+
366
+ - `eval_strategy`: epoch
367
+ - `learning_rate`: 2e-05
368
+ - `num_train_epochs`: 10
369
+ - `warmup_ratio`: 0.4
370
+ - `fp16`: True
371
+ - `batch_sampler`: no_duplicates
372
+
373
+ #### All Hyperparameters
374
+ <details><summary>Click to expand</summary>
375
+
376
+ - `overwrite_output_dir`: False
377
+ - `do_predict`: False
378
+ - `eval_strategy`: epoch
379
+ - `prediction_loss_only`: True
380
+ - `per_device_train_batch_size`: 8
381
+ - `per_device_eval_batch_size`: 8
382
+ - `per_gpu_train_batch_size`: None
383
+ - `per_gpu_eval_batch_size`: None
384
+ - `gradient_accumulation_steps`: 1
385
+ - `eval_accumulation_steps`: None
386
+ - `torch_empty_cache_steps`: None
387
+ - `learning_rate`: 2e-05
388
+ - `weight_decay`: 0.0
389
+ - `adam_beta1`: 0.9
390
+ - `adam_beta2`: 0.999
391
+ - `adam_epsilon`: 1e-08
392
+ - `max_grad_norm`: 1.0
393
+ - `num_train_epochs`: 10
394
+ - `max_steps`: -1
395
+ - `lr_scheduler_type`: linear
396
+ - `lr_scheduler_kwargs`: {}
397
+ - `warmup_ratio`: 0.4
398
+ - `warmup_steps`: 0
399
+ - `log_level`: passive
400
+ - `log_level_replica`: warning
401
+ - `log_on_each_node`: True
402
+ - `logging_nan_inf_filter`: True
403
+ - `save_safetensors`: True
404
+ - `save_on_each_node`: False
405
+ - `save_only_model`: False
406
+ - `restore_callback_states_from_checkpoint`: False
407
+ - `no_cuda`: False
408
+ - `use_cpu`: False
409
+ - `use_mps_device`: False
410
+ - `seed`: 42
411
+ - `data_seed`: None
412
+ - `jit_mode_eval`: False
413
+ - `use_ipex`: False
414
+ - `bf16`: False
415
+ - `fp16`: True
416
+ - `fp16_opt_level`: O1
417
+ - `half_precision_backend`: auto
418
+ - `bf16_full_eval`: False
419
+ - `fp16_full_eval`: False
420
+ - `tf32`: None
421
+ - `local_rank`: 0
422
+ - `ddp_backend`: None
423
+ - `tpu_num_cores`: None
424
+ - `tpu_metrics_debug`: False
425
+ - `debug`: []
426
+ - `dataloader_drop_last`: False
427
+ - `dataloader_num_workers`: 0
428
+ - `dataloader_prefetch_factor`: None
429
+ - `past_index`: -1
430
+ - `disable_tqdm`: False
431
+ - `remove_unused_columns`: True
432
+ - `label_names`: None
433
+ - `load_best_model_at_end`: False
434
+ - `ignore_data_skip`: False
435
+ - `fsdp`: []
436
+ - `fsdp_min_num_params`: 0
437
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
438
+ - `fsdp_transformer_layer_cls_to_wrap`: None
439
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
440
+ - `deepspeed`: None
441
+ - `label_smoothing_factor`: 0.0
442
+ - `optim`: adamw_torch
443
+ - `optim_args`: None
444
+ - `adafactor`: False
445
+ - `group_by_length`: False
446
+ - `length_column_name`: length
447
+ - `ddp_find_unused_parameters`: None
448
+ - `ddp_bucket_cap_mb`: None
449
+ - `ddp_broadcast_buffers`: False
450
+ - `dataloader_pin_memory`: True
451
+ - `dataloader_persistent_workers`: False
452
+ - `skip_memory_metrics`: True
453
+ - `use_legacy_prediction_loop`: False
454
+ - `push_to_hub`: False
455
+ - `resume_from_checkpoint`: None
456
+ - `hub_model_id`: None
457
+ - `hub_strategy`: every_save
458
+ - `hub_private_repo`: False
459
+ - `hub_always_push`: False
460
+ - `gradient_checkpointing`: False
461
+ - `gradient_checkpointing_kwargs`: None
462
+ - `include_inputs_for_metrics`: False
463
+ - `eval_do_concat_batches`: True
464
+ - `fp16_backend`: auto
465
+ - `push_to_hub_model_id`: None
466
+ - `push_to_hub_organization`: None
467
+ - `mp_parameters`:
468
+ - `auto_find_batch_size`: False
469
+ - `full_determinism`: False
470
+ - `torchdynamo`: None
471
+ - `ray_scope`: last
472
+ - `ddp_timeout`: 1800
473
+ - `torch_compile`: False
474
+ - `torch_compile_backend`: None
475
+ - `torch_compile_mode`: None
476
+ - `dispatch_batches`: None
477
+ - `split_batches`: None
478
+ - `include_tokens_per_second`: False
479
+ - `include_num_input_tokens_seen`: False
480
+ - `neftune_noise_alpha`: None
481
+ - `optim_target_modules`: None
482
+ - `batch_eval_metrics`: False
483
+ - `eval_on_start`: False
484
+ - `eval_use_gather_object`: False
485
+ - `batch_sampler`: no_duplicates
486
+ - `multi_dataset_batch_sampler`: proportional
487
+
488
+ </details>
489
+
490
+ ### Training Logs
491
+ | Epoch | Step | Training Loss | loss | custom-arc-semantics-data-jp_max_ap |
492
+ |:-----:|:----:|:-------------:|:------:|:-----------------------------------:|
493
+ | 1.0 | 6 | 0.3183 | 0.1717 | 0.8767 |
494
+ | 2.0 | 12 | 0.3026 | 0.1703 | 0.8767 |
495
+ | 3.0 | 18 | 0.2667 | 0.1662 | 0.8767 |
496
+ | 4.0 | 24 | 0.2164 | 0.1595 | 0.9267 |
497
+ | 5.0 | 30 | 0.1779 | 0.1680 | 0.9267 |
498
+ | 6.0 | 36 | 0.1271 | 0.1939 | 0.8767 |
499
+ | 7.0 | 42 | 0.1018 | 0.2169 | 0.8767 |
500
+ | 8.0 | 48 | 0.0824 | 0.2246 | 0.8767 |
501
+ | 9.0 | 54 | 0.0732 | 0.2209 | 0.8767 |
502
+
503
+
504
+ ### Framework Versions
505
+ - Python: 3.10.14
506
+ - Sentence Transformers: 3.1.0
507
+ - Transformers: 4.44.2
508
+ - PyTorch: 2.4.1+cu121
509
+ - Accelerate: 0.34.2
510
+ - Datasets: 2.20.0
511
+ - Tokenizers: 0.19.1
512
+
513
+ ## Citation
514
+
515
+ ### BibTeX
516
+
517
+ #### Sentence Transformers
518
+ ```bibtex
519
+ @inproceedings{reimers-2019-sentence-bert,
520
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
521
+ author = "Reimers, Nils and Gurevych, Iryna",
522
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
523
+ month = "11",
524
+ year = "2019",
525
+ publisher = "Association for Computational Linguistics",
526
+ url = "https://arxiv.org/abs/1908.10084",
527
+ }
528
+ ```
529
+
530
+ <!--
531
+ ## Glossary
532
+
533
+ *Clearly define terms in order to be accessible across audiences.*
534
+ -->
535
+
536
+ <!--
537
+ ## Model Card Authors
538
+
539
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
540
+ -->
541
+
542
+ <!--
543
+ ## Model Card Contact
544
+
545
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
546
+ -->
checkpoint-54/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 32000
3
+ }
checkpoint-54/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "colorfulscoop/sbert-base-ja",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout": null,
9
+ "cls_token_id": 2,
10
+ "eos_token_id": 3,
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "layer_norm_eps": 1e-12,
18
+ "mask_token_id": 4,
19
+ "max_position_embeddings": 512,
20
+ "model_type": "bert",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "pad_token_id": 0,
24
+ "position_embedding_type": "absolute",
25
+ "sep_token_id": 3,
26
+ "tokenizer_class": "DebertaV2Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.44.2",
29
+ "type_vocab_size": 2,
30
+ "unk_token_id": 1,
31
+ "use_cache": true,
32
+ "vocab_size": 32000
33
+ }
checkpoint-54/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.1.0",
4
+ "transformers": "4.44.2",
5
+ "pytorch": "2.4.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-54/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2efb2eafec7d89cb75d471bd7d6d04235104281db51419a56c16ddb3a938e73
3
+ size 442491744
checkpoint-54/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-54/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b343583e9b53d05b8d2cc8fc41d27b82bc364c2e3c58870c96e1a02f3c6e4393
3
+ size 880373306
checkpoint-54/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c4ddf165d5585042b16b6d15630a9fc34d0c3423b33ab22b29d733b9012159d
3
+ size 13990
checkpoint-54/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61ada5f40a98c3af164b390bb152f712e602d6fda9df30f1d4ba1c98960643d0
3
+ size 1064
checkpoint-54/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-54/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "<unk>",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-54/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6467857b4b0c77ded9bac7ad2fb5c16eb64e17e417ce46624dacac2bbb404fc
3
+ size 802713
checkpoint-54/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-54/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "32000": {
44
+ "content": "[PAD]",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ }
51
+ },
52
+ "bos_token": "[CLS]",
53
+ "clean_up_tokenization_spaces": true,
54
+ "cls_token": "[CLS]",
55
+ "do_lower_case": false,
56
+ "eos_token": "[SEP]",
57
+ "mask_token": "[MASK]",
58
+ "model_max_length": 512,
59
+ "pad_token": "<pad>",
60
+ "sep_token": "[SEP]",
61
+ "sp_model_kwargs": {},
62
+ "split_by_punct": false,
63
+ "tokenizer_class": "DebertaV2Tokenizer",
64
+ "unk_token": "<unk>"
65
+ }
checkpoint-54/trainer_state.json ADDED
@@ -0,0 +1,483 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 9.0,
5
+ "eval_steps": 50,
6
+ "global_step": 54,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.4512845277786255,
14
+ "learning_rate": 5e-06,
15
+ "loss": 0.3183,
16
+ "step": 6
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
21
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.6471868753433228,
22
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
23
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
24
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.6471868753433228,
25
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
26
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
27
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
28
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 345.4730529785156,
29
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
30
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
31
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 345.4730529785156,
32
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
33
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
34
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
35
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 19.563411712646484,
36
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.81,
37
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
38
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 19.563411712646484,
39
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
40
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
41
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
42
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 429.613037109375,
43
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.81,
44
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
45
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 429.613037109375,
46
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
47
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
48
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
49
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 429.613037109375,
50
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
51
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
52
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 429.613037109375,
53
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
54
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
55
+ "eval_loss": 0.17167754471302032,
56
+ "eval_runtime": 2.9189,
57
+ "eval_samples_per_second": 2.056,
58
+ "eval_steps_per_second": 0.343,
59
+ "step": 6
60
+ },
61
+ {
62
+ "epoch": 2.0,
63
+ "grad_norm": 1.304966688156128,
64
+ "learning_rate": 1e-05,
65
+ "loss": 0.3026,
66
+ "step": 12
67
+ },
68
+ {
69
+ "epoch": 2.0,
70
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
71
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.6410852670669556,
72
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
73
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
74
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.6410852670669556,
75
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
76
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
77
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
78
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 341.5276184082031,
79
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
80
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
81
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 341.5276184082031,
82
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
83
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
84
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
85
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 19.734420776367188,
86
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
87
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
88
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 19.734420776367188,
89
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
90
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
91
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
92
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 434.0592346191406,
93
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
94
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
95
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 434.0592346191406,
96
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
97
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
98
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
99
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 434.0592346191406,
100
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
101
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
102
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 434.0592346191406,
103
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
104
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
105
+ "eval_loss": 0.17033492028713226,
106
+ "eval_runtime": 2.9121,
107
+ "eval_samples_per_second": 2.06,
108
+ "eval_steps_per_second": 0.343,
109
+ "step": 12
110
+ },
111
+ {
112
+ "epoch": 3.0,
113
+ "grad_norm": 1.0188632011413574,
114
+ "learning_rate": 1.5000000000000002e-05,
115
+ "loss": 0.2667,
116
+ "step": 18
117
+ },
118
+ {
119
+ "epoch": 3.0,
120
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
121
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.6364033818244934,
122
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
123
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
124
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.6364033818244934,
125
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
126
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
127
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
128
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 338.5497131347656,
129
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
130
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
131
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 338.5497131347656,
132
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
133
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
134
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
135
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 19.83578109741211,
136
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
137
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
138
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 19.83578109741211,
139
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
140
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
141
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
142
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 437.1195068359375,
143
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
144
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
145
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 437.1195068359375,
146
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
147
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
148
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
149
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 437.1195068359375,
150
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
151
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
152
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 437.1195068359375,
153
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
154
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
155
+ "eval_loss": 0.16623182594776154,
156
+ "eval_runtime": 3.4915,
157
+ "eval_samples_per_second": 1.718,
158
+ "eval_steps_per_second": 0.286,
159
+ "step": 18
160
+ },
161
+ {
162
+ "epoch": 4.0,
163
+ "grad_norm": 0.5784842371940613,
164
+ "learning_rate": 2e-05,
165
+ "loss": 0.2164,
166
+ "step": 24
167
+ },
168
+ {
169
+ "epoch": 4.0,
170
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
171
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.6302204132080078,
172
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
173
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
174
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.6302204132080078,
175
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
176
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
177
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
178
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 385.5712585449219,
179
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.9266666666666665,
180
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
181
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 339.04254150390625,
182
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
183
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
184
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
185
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 19.902908325195312,
186
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
187
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
188
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 19.902908325195312,
189
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
190
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
191
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
192
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 438.79205322265625,
193
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
194
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
195
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 438.79205322265625,
196
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
197
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
198
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
199
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 438.79205322265625,
200
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.9266666666666665,
201
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
202
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 438.79205322265625,
203
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
204
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
205
+ "eval_loss": 0.15949387848377228,
206
+ "eval_runtime": 2.6844,
207
+ "eval_samples_per_second": 2.235,
208
+ "eval_steps_per_second": 0.373,
209
+ "step": 24
210
+ },
211
+ {
212
+ "epoch": 5.0,
213
+ "grad_norm": 0.4092578589916229,
214
+ "learning_rate": 1.6666666666666667e-05,
215
+ "loss": 0.1779,
216
+ "step": 30
217
+ },
218
+ {
219
+ "epoch": 5.0,
220
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
221
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.587942361831665,
222
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
223
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
224
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.587942361831665,
225
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
226
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
227
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
228
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 369.192626953125,
229
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.9266666666666665,
230
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
231
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 318.3497619628906,
232
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
233
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
234
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
235
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 21.073081970214844,
236
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
237
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
238
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 21.073081970214844,
239
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
240
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
241
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
242
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 462.51629638671875,
243
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
244
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
245
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 462.51629638671875,
246
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
247
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
248
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
249
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 462.51629638671875,
250
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.9266666666666665,
251
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
252
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 462.51629638671875,
253
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
254
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
255
+ "eval_loss": 0.16796135902404785,
256
+ "eval_runtime": 3.0306,
257
+ "eval_samples_per_second": 1.98,
258
+ "eval_steps_per_second": 0.33,
259
+ "step": 30
260
+ },
261
+ {
262
+ "epoch": 6.0,
263
+ "grad_norm": 0.45830854773521423,
264
+ "learning_rate": 1.3333333333333333e-05,
265
+ "loss": 0.1271,
266
+ "step": 36
267
+ },
268
+ {
269
+ "epoch": 6.0,
270
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
271
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5134342908859253,
272
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
273
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
274
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.5134342908859253,
275
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
276
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
277
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
278
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 278.04107666015625,
279
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
280
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
281
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 278.04107666015625,
282
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
283
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
284
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
285
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 22.917387008666992,
286
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
287
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
288
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 22.917387008666992,
289
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
290
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
291
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
292
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 502.63287353515625,
293
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
294
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
295
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 502.63287353515625,
296
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
297
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
298
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
299
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 502.63287353515625,
300
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
301
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
302
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 502.63287353515625,
303
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
304
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
305
+ "eval_loss": 0.19386596977710724,
306
+ "eval_runtime": 2.8354,
307
+ "eval_samples_per_second": 2.116,
308
+ "eval_steps_per_second": 0.353,
309
+ "step": 36
310
+ },
311
+ {
312
+ "epoch": 7.0,
313
+ "grad_norm": 0.3822881579399109,
314
+ "learning_rate": 1e-05,
315
+ "loss": 0.1018,
316
+ "step": 42
317
+ },
318
+ {
319
+ "epoch": 7.0,
320
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
321
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.46284571290016174,
322
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
323
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
324
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.46284571290016174,
325
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
326
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
327
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
328
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 249.8519287109375,
329
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
330
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
331
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 249.8519287109375,
332
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
333
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
334
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
335
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 24.051647186279297,
336
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
337
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
338
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 24.051647186279297,
339
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
340
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
341
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
342
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 527.3822021484375,
343
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
344
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
345
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 527.3822021484375,
346
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
347
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
348
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
349
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 527.3822021484375,
350
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
351
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
352
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 527.3822021484375,
353
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
354
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
355
+ "eval_loss": 0.2168869525194168,
356
+ "eval_runtime": 2.7296,
357
+ "eval_samples_per_second": 2.198,
358
+ "eval_steps_per_second": 0.366,
359
+ "step": 42
360
+ },
361
+ {
362
+ "epoch": 8.0,
363
+ "grad_norm": 0.3190430998802185,
364
+ "learning_rate": 6.666666666666667e-06,
365
+ "loss": 0.0824,
366
+ "step": 48
367
+ },
368
+ {
369
+ "epoch": 8.0,
370
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
371
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.45021578669548035,
372
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
373
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
374
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.45021578669548035,
375
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
376
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
377
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
378
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 241.99093627929688,
379
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
380
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
381
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 241.99093627929688,
382
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
383
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
384
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
385
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 24.27983283996582,
386
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
387
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
388
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 24.27983283996582,
389
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
390
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
391
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
392
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 532.0448608398438,
393
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
394
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
395
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 532.0448608398438,
396
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
397
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
398
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
399
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 532.0448608398438,
400
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
401
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
402
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 532.0448608398438,
403
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
404
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
405
+ "eval_loss": 0.2245994359254837,
406
+ "eval_runtime": 2.6403,
407
+ "eval_samples_per_second": 2.273,
408
+ "eval_steps_per_second": 0.379,
409
+ "step": 48
410
+ },
411
+ {
412
+ "epoch": 9.0,
413
+ "grad_norm": 0.2815457880496979,
414
+ "learning_rate": 3.3333333333333333e-06,
415
+ "loss": 0.0732,
416
+ "step": 54
417
+ },
418
+ {
419
+ "epoch": 9.0,
420
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
421
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.45798632502555847,
422
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
423
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
424
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.45798632502555847,
425
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
426
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
427
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
428
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 245.57119750976562,
429
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
430
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
431
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 245.57119750976562,
432
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
433
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
434
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
435
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 24.071979522705078,
436
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
437
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
438
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 24.071979522705078,
439
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
440
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
441
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
442
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 527.4176025390625,
443
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
444
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
445
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 527.4176025390625,
446
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
447
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
448
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
449
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 527.4176025390625,
450
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
451
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
452
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 527.4176025390625,
453
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
454
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
455
+ "eval_loss": 0.22094659507274628,
456
+ "eval_runtime": 2.8393,
457
+ "eval_samples_per_second": 2.113,
458
+ "eval_steps_per_second": 0.352,
459
+ "step": 54
460
+ }
461
+ ],
462
+ "logging_steps": 500,
463
+ "max_steps": 60,
464
+ "num_input_tokens_seen": 0,
465
+ "num_train_epochs": 10,
466
+ "save_steps": 100,
467
+ "stateful_callbacks": {
468
+ "TrainerControl": {
469
+ "args": {
470
+ "should_epoch_stop": false,
471
+ "should_evaluate": false,
472
+ "should_log": false,
473
+ "should_save": true,
474
+ "should_training_stop": false
475
+ },
476
+ "attributes": {}
477
+ }
478
+ },
479
+ "total_flos": 0.0,
480
+ "train_batch_size": 8,
481
+ "trial_name": null,
482
+ "trial_params": null
483
+ }
checkpoint-54/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:357936512702101c3d6fcb1fbc6019e2e1a0c6628f613da90d340ef26a75e926
3
+ size 5432
checkpoint-60/1_Pooling/config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "word_embedding_dimension": 768,
3
+ "pooling_mode_cls_token": false,
4
+ "pooling_mode_mean_tokens": true,
5
+ "pooling_mode_max_tokens": false,
6
+ "pooling_mode_mean_sqrt_len_tokens": false,
7
+ "pooling_mode_weightedmean_tokens": false,
8
+ "pooling_mode_lasttoken": false,
9
+ "include_prompt": true
10
+ }
checkpoint-60/README.md ADDED
@@ -0,0 +1,547 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: colorfulscoop/sbert-base-ja
3
+ library_name: sentence-transformers
4
+ metrics:
5
+ - cosine_accuracy
6
+ - cosine_accuracy_threshold
7
+ - cosine_f1
8
+ - cosine_f1_threshold
9
+ - cosine_precision
10
+ - cosine_recall
11
+ - cosine_ap
12
+ - dot_accuracy
13
+ - dot_accuracy_threshold
14
+ - dot_f1
15
+ - dot_f1_threshold
16
+ - dot_precision
17
+ - dot_recall
18
+ - dot_ap
19
+ - manhattan_accuracy
20
+ - manhattan_accuracy_threshold
21
+ - manhattan_f1
22
+ - manhattan_f1_threshold
23
+ - manhattan_precision
24
+ - manhattan_recall
25
+ - manhattan_ap
26
+ - euclidean_accuracy
27
+ - euclidean_accuracy_threshold
28
+ - euclidean_f1
29
+ - euclidean_f1_threshold
30
+ - euclidean_precision
31
+ - euclidean_recall
32
+ - euclidean_ap
33
+ - max_accuracy
34
+ - max_accuracy_threshold
35
+ - max_f1
36
+ - max_f1_threshold
37
+ - max_precision
38
+ - max_recall
39
+ - max_ap
40
+ pipeline_tag: sentence-similarity
41
+ tags:
42
+ - sentence-transformers
43
+ - sentence-similarity
44
+ - feature-extraction
45
+ - generated_from_trainer
46
+ - dataset_size:53
47
+ - loss:CosineSimilarityLoss
48
+ model-index:
49
+ - name: SentenceTransformer based on colorfulscoop/sbert-base-ja
50
+ results:
51
+ - task:
52
+ type: binary-classification
53
+ name: Binary Classification
54
+ dataset:
55
+ name: custom arc semantics data jp
56
+ type: custom-arc-semantics-data-jp
57
+ metrics:
58
+ - type: cosine_accuracy
59
+ value: 0.6666666666666666
60
+ name: Cosine Accuracy
61
+ - type: cosine_accuracy_threshold
62
+ value: 0.4631122350692749
63
+ name: Cosine Accuracy Threshold
64
+ - type: cosine_f1
65
+ value: 0.8000000000000002
66
+ name: Cosine F1
67
+ - type: cosine_f1_threshold
68
+ value: 0.4631122350692749
69
+ name: Cosine F1 Threshold
70
+ - type: cosine_precision
71
+ value: 0.8
72
+ name: Cosine Precision
73
+ - type: cosine_recall
74
+ value: 0.8
75
+ name: Cosine Recall
76
+ - type: cosine_ap
77
+ value: 0.8766666666666667
78
+ name: Cosine Ap
79
+ - type: dot_accuracy
80
+ value: 0.6666666666666666
81
+ name: Dot Accuracy
82
+ - type: dot_accuracy_threshold
83
+ value: 248.13394165039062
84
+ name: Dot Accuracy Threshold
85
+ - type: dot_f1
86
+ value: 0.8000000000000002
87
+ name: Dot F1
88
+ - type: dot_f1_threshold
89
+ value: 248.13394165039062
90
+ name: Dot F1 Threshold
91
+ - type: dot_precision
92
+ value: 0.8
93
+ name: Dot Precision
94
+ - type: dot_recall
95
+ value: 0.8
96
+ name: Dot Recall
97
+ - type: dot_ap
98
+ value: 0.8766666666666667
99
+ name: Dot Ap
100
+ - type: manhattan_accuracy
101
+ value: 0.6666666666666666
102
+ name: Manhattan Accuracy
103
+ - type: manhattan_accuracy_threshold
104
+ value: 524.65185546875
105
+ name: Manhattan Accuracy Threshold
106
+ - type: manhattan_f1
107
+ value: 0.8000000000000002
108
+ name: Manhattan F1
109
+ - type: manhattan_f1_threshold
110
+ value: 524.65185546875
111
+ name: Manhattan F1 Threshold
112
+ - type: manhattan_precision
113
+ value: 0.8
114
+ name: Manhattan Precision
115
+ - type: manhattan_recall
116
+ value: 0.8
117
+ name: Manhattan Recall
118
+ - type: manhattan_ap
119
+ value: 0.8766666666666667
120
+ name: Manhattan Ap
121
+ - type: euclidean_accuracy
122
+ value: 0.6666666666666666
123
+ name: Euclidean Accuracy
124
+ - type: euclidean_accuracy_threshold
125
+ value: 23.945947647094727
126
+ name: Euclidean Accuracy Threshold
127
+ - type: euclidean_f1
128
+ value: 0.8000000000000002
129
+ name: Euclidean F1
130
+ - type: euclidean_f1_threshold
131
+ value: 23.945947647094727
132
+ name: Euclidean F1 Threshold
133
+ - type: euclidean_precision
134
+ value: 0.8
135
+ name: Euclidean Precision
136
+ - type: euclidean_recall
137
+ value: 0.8
138
+ name: Euclidean Recall
139
+ - type: euclidean_ap
140
+ value: 0.8766666666666667
141
+ name: Euclidean Ap
142
+ - type: max_accuracy
143
+ value: 0.6666666666666666
144
+ name: Max Accuracy
145
+ - type: max_accuracy_threshold
146
+ value: 524.65185546875
147
+ name: Max Accuracy Threshold
148
+ - type: max_f1
149
+ value: 0.8000000000000002
150
+ name: Max F1
151
+ - type: max_f1_threshold
152
+ value: 524.65185546875
153
+ name: Max F1 Threshold
154
+ - type: max_precision
155
+ value: 0.8
156
+ name: Max Precision
157
+ - type: max_recall
158
+ value: 0.8
159
+ name: Max Recall
160
+ - type: max_ap
161
+ value: 0.8766666666666667
162
+ name: Max Ap
163
+ ---
164
+
165
+ # SentenceTransformer based on colorfulscoop/sbert-base-ja
166
+
167
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [colorfulscoop/sbert-base-ja](https://huggingface.co/colorfulscoop/sbert-base-ja) on the csv dataset. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
168
+
169
+ ## Model Details
170
+
171
+ ### Model Description
172
+ - **Model Type:** Sentence Transformer
173
+ - **Base model:** [colorfulscoop/sbert-base-ja](https://huggingface.co/colorfulscoop/sbert-base-ja) <!-- at revision ecb8a98cd5176719ff7ab0d770a27420118732cf -->
174
+ - **Maximum Sequence Length:** 512 tokens
175
+ - **Output Dimensionality:** 768 tokens
176
+ - **Similarity Function:** Cosine Similarity
177
+ - **Training Dataset:**
178
+ - csv
179
+ <!-- - **Language:** Unknown -->
180
+ <!-- - **License:** Unknown -->
181
+
182
+ ### Model Sources
183
+
184
+ - **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
185
+ - **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
186
+ - **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
187
+
188
+ ### Full Model Architecture
189
+
190
+ ```
191
+ SentenceTransformer(
192
+ (0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel
193
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
194
+ )
195
+ ```
196
+
197
+ ## Usage
198
+
199
+ ### Direct Usage (Sentence Transformers)
200
+
201
+ First install the Sentence Transformers library:
202
+
203
+ ```bash
204
+ pip install -U sentence-transformers
205
+ ```
206
+
207
+ Then you can load this model and run inference.
208
+ ```python
209
+ from sentence_transformers import SentenceTransformer
210
+
211
+ # Download from the 🤗 Hub
212
+ model = SentenceTransformer("sentence_transformers_model_id")
213
+ # Run inference
214
+ sentences = [
215
+ 'The weather is lovely today.',
216
+ "It's so sunny outside!",
217
+ 'He drove to the stadium.',
218
+ ]
219
+ embeddings = model.encode(sentences)
220
+ print(embeddings.shape)
221
+ # [3, 768]
222
+
223
+ # Get the similarity scores for the embeddings
224
+ similarities = model.similarity(embeddings, embeddings)
225
+ print(similarities.shape)
226
+ # [3, 3]
227
+ ```
228
+
229
+ <!--
230
+ ### Direct Usage (Transformers)
231
+
232
+ <details><summary>Click to see the direct usage in Transformers</summary>
233
+
234
+ </details>
235
+ -->
236
+
237
+ <!--
238
+ ### Downstream Usage (Sentence Transformers)
239
+
240
+ You can finetune this model on your own dataset.
241
+
242
+ <details><summary>Click to expand</summary>
243
+
244
+ </details>
245
+ -->
246
+
247
+ <!--
248
+ ### Out-of-Scope Use
249
+
250
+ *List how the model may foreseeably be misused and address what users ought not to do with the model.*
251
+ -->
252
+
253
+ ## Evaluation
254
+
255
+ ### Metrics
256
+
257
+ #### Binary Classification
258
+ * Dataset: `custom-arc-semantics-data-jp`
259
+ * Evaluated with [<code>BinaryClassificationEvaluator</code>](https://sbert.net/docs/package_reference/sentence_transformer/evaluation.html#sentence_transformers.evaluation.BinaryClassificationEvaluator)
260
+
261
+ | Metric | Value |
262
+ |:-----------------------------|:-----------|
263
+ | cosine_accuracy | 0.6667 |
264
+ | cosine_accuracy_threshold | 0.4631 |
265
+ | cosine_f1 | 0.8 |
266
+ | cosine_f1_threshold | 0.4631 |
267
+ | cosine_precision | 0.8 |
268
+ | cosine_recall | 0.8 |
269
+ | cosine_ap | 0.8767 |
270
+ | dot_accuracy | 0.6667 |
271
+ | dot_accuracy_threshold | 248.1339 |
272
+ | dot_f1 | 0.8 |
273
+ | dot_f1_threshold | 248.1339 |
274
+ | dot_precision | 0.8 |
275
+ | dot_recall | 0.8 |
276
+ | dot_ap | 0.8767 |
277
+ | manhattan_accuracy | 0.6667 |
278
+ | manhattan_accuracy_threshold | 524.6519 |
279
+ | manhattan_f1 | 0.8 |
280
+ | manhattan_f1_threshold | 524.6519 |
281
+ | manhattan_precision | 0.8 |
282
+ | manhattan_recall | 0.8 |
283
+ | manhattan_ap | 0.8767 |
284
+ | euclidean_accuracy | 0.6667 |
285
+ | euclidean_accuracy_threshold | 23.9459 |
286
+ | euclidean_f1 | 0.8 |
287
+ | euclidean_f1_threshold | 23.9459 |
288
+ | euclidean_precision | 0.8 |
289
+ | euclidean_recall | 0.8 |
290
+ | euclidean_ap | 0.8767 |
291
+ | max_accuracy | 0.6667 |
292
+ | max_accuracy_threshold | 524.6519 |
293
+ | max_f1 | 0.8 |
294
+ | max_f1_threshold | 524.6519 |
295
+ | max_precision | 0.8 |
296
+ | max_recall | 0.8 |
297
+ | **max_ap** | **0.8767** |
298
+
299
+ <!--
300
+ ## Bias, Risks and Limitations
301
+
302
+ *What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
303
+ -->
304
+
305
+ <!--
306
+ ### Recommendations
307
+
308
+ *What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
309
+ -->
310
+
311
+ ## Training Details
312
+
313
+ ### Training Dataset
314
+
315
+ #### csv
316
+
317
+ * Dataset: csv
318
+ * Size: 53 training samples
319
+ * Columns: <code>text1</code>, <code>text2</code>, and <code>label</code>
320
+ * Approximate statistics based on the first 53 samples:
321
+ | | text1 | text2 | label |
322
+ |:--------|:-----------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:------------------------------------------------|
323
+ | type | string | string | int |
324
+ | details | <ul><li>min: 14 tokens</li><li>mean: 35.94 tokens</li><li>max: 84 tokens</li></ul> | <ul><li>min: 11 tokens</li><li>mean: 21.72 tokens</li><li>max: 38 tokens</li></ul> | <ul><li>0: ~38.30%</li><li>1: ~61.70%</li></ul> |
325
+ * Samples:
326
+ | text1 | text2 | label |
327
+ |:-----------------------------------------------------------------------------------------------------------------------------------|:-----------------------------------------------------------|:---------------|
328
+ | <code>茶色 の ドレス を 着た 若い 女の子 と サンダル が 黒い 帽子 、 タンクトップ 、 青い カーゴ ショーツ を 着た 若い 男の子 を 、 同じ ボール に 向かって 銀 の ボール を 投げ つける ように 笑い ます 。</code> | <code>人々 は ハンバーガー を 待って い ます 。</code> | <code>1</code> |
329
+ | <code>水 の 近く の ドック に 2 人 が 座って い ます 。</code> | <code>岩 の 上 に 座って いる 二 人</code> | <code>0</code> |
330
+ | <code>小さな 女の子 が 草 を 横切って 木 に 向かって 走り ます 。</code> | <code>女の子 は 、 かつて 木 が 立って いた 裏庭 を 見 ながら 中 に い ました 。</code> | <code>1</code> |
331
+ * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
332
+ ```json
333
+ {
334
+ "loss_fct": "torch.nn.modules.loss.MSELoss"
335
+ }
336
+ ```
337
+
338
+ ### Evaluation Dataset
339
+
340
+ #### csv
341
+
342
+ * Dataset: csv
343
+ * Size: 53 evaluation samples
344
+ * Columns: <code>text1</code>, <code>text2</code>, and <code>label</code>
345
+ * Approximate statistics based on the first 53 samples:
346
+ | | text1 | text2 | label |
347
+ |:--------|:-----------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|:------------------------------------------------|
348
+ | type | string | string | int |
349
+ | details | <ul><li>min: 19 tokens</li><li>mean: 38.67 tokens</li><li>max: 61 tokens</li></ul> | <ul><li>min: 20 tokens</li><li>mean: 25.5 tokens</li><li>max: 33 tokens</li></ul> | <ul><li>0: ~16.67%</li><li>1: ~83.33%</li></ul> |
350
+ * Samples:
351
+ | text1 | text2 | label |
352
+ |:----------------------------------------------------------------------------------------------------------|:------------------------------------------------|:---------------|
353
+ | <code>岩 の 多い 景色 を 見て 二 人</code> | <code>何 か を 見て いる 二 人 が い ます 。</code> | <code>0</code> |
354
+ | <code>白い ヘルメット と オレンジ色 の シャツ 、 ジーンズ 、 白い トラック と オレンジ色 の パイロン の 前 に 反射 ジャケット を 着た 金髪 の ストリート ワーカー 。</code> | <code>ストリート ワーカー は 保護 具 を 着用 して い ませ ん 。</code> | <code>1</code> |
355
+ | <code>白い 帽子 を かぶった 女性 が 、 鮮やかな 色 の 岩 の 風景 を 描いて い ます 。 岩 層 自体 が 背景 に 見え ます 。</code> | <code>誰 か が 肖像 画 を 描いて い ます 。</code> | <code>1</code> |
356
+ * Loss: [<code>CosineSimilarityLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#cosinesimilarityloss) with these parameters:
357
+ ```json
358
+ {
359
+ "loss_fct": "torch.nn.modules.loss.MSELoss"
360
+ }
361
+ ```
362
+
363
+ ### Training Hyperparameters
364
+ #### Non-Default Hyperparameters
365
+
366
+ - `eval_strategy`: epoch
367
+ - `learning_rate`: 2e-05
368
+ - `num_train_epochs`: 10
369
+ - `warmup_ratio`: 0.4
370
+ - `fp16`: True
371
+ - `batch_sampler`: no_duplicates
372
+
373
+ #### All Hyperparameters
374
+ <details><summary>Click to expand</summary>
375
+
376
+ - `overwrite_output_dir`: False
377
+ - `do_predict`: False
378
+ - `eval_strategy`: epoch
379
+ - `prediction_loss_only`: True
380
+ - `per_device_train_batch_size`: 8
381
+ - `per_device_eval_batch_size`: 8
382
+ - `per_gpu_train_batch_size`: None
383
+ - `per_gpu_eval_batch_size`: None
384
+ - `gradient_accumulation_steps`: 1
385
+ - `eval_accumulation_steps`: None
386
+ - `torch_empty_cache_steps`: None
387
+ - `learning_rate`: 2e-05
388
+ - `weight_decay`: 0.0
389
+ - `adam_beta1`: 0.9
390
+ - `adam_beta2`: 0.999
391
+ - `adam_epsilon`: 1e-08
392
+ - `max_grad_norm`: 1.0
393
+ - `num_train_epochs`: 10
394
+ - `max_steps`: -1
395
+ - `lr_scheduler_type`: linear
396
+ - `lr_scheduler_kwargs`: {}
397
+ - `warmup_ratio`: 0.4
398
+ - `warmup_steps`: 0
399
+ - `log_level`: passive
400
+ - `log_level_replica`: warning
401
+ - `log_on_each_node`: True
402
+ - `logging_nan_inf_filter`: True
403
+ - `save_safetensors`: True
404
+ - `save_on_each_node`: False
405
+ - `save_only_model`: False
406
+ - `restore_callback_states_from_checkpoint`: False
407
+ - `no_cuda`: False
408
+ - `use_cpu`: False
409
+ - `use_mps_device`: False
410
+ - `seed`: 42
411
+ - `data_seed`: None
412
+ - `jit_mode_eval`: False
413
+ - `use_ipex`: False
414
+ - `bf16`: False
415
+ - `fp16`: True
416
+ - `fp16_opt_level`: O1
417
+ - `half_precision_backend`: auto
418
+ - `bf16_full_eval`: False
419
+ - `fp16_full_eval`: False
420
+ - `tf32`: None
421
+ - `local_rank`: 0
422
+ - `ddp_backend`: None
423
+ - `tpu_num_cores`: None
424
+ - `tpu_metrics_debug`: False
425
+ - `debug`: []
426
+ - `dataloader_drop_last`: False
427
+ - `dataloader_num_workers`: 0
428
+ - `dataloader_prefetch_factor`: None
429
+ - `past_index`: -1
430
+ - `disable_tqdm`: False
431
+ - `remove_unused_columns`: True
432
+ - `label_names`: None
433
+ - `load_best_model_at_end`: False
434
+ - `ignore_data_skip`: False
435
+ - `fsdp`: []
436
+ - `fsdp_min_num_params`: 0
437
+ - `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
438
+ - `fsdp_transformer_layer_cls_to_wrap`: None
439
+ - `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
440
+ - `deepspeed`: None
441
+ - `label_smoothing_factor`: 0.0
442
+ - `optim`: adamw_torch
443
+ - `optim_args`: None
444
+ - `adafactor`: False
445
+ - `group_by_length`: False
446
+ - `length_column_name`: length
447
+ - `ddp_find_unused_parameters`: None
448
+ - `ddp_bucket_cap_mb`: None
449
+ - `ddp_broadcast_buffers`: False
450
+ - `dataloader_pin_memory`: True
451
+ - `dataloader_persistent_workers`: False
452
+ - `skip_memory_metrics`: True
453
+ - `use_legacy_prediction_loop`: False
454
+ - `push_to_hub`: False
455
+ - `resume_from_checkpoint`: None
456
+ - `hub_model_id`: None
457
+ - `hub_strategy`: every_save
458
+ - `hub_private_repo`: False
459
+ - `hub_always_push`: False
460
+ - `gradient_checkpointing`: False
461
+ - `gradient_checkpointing_kwargs`: None
462
+ - `include_inputs_for_metrics`: False
463
+ - `eval_do_concat_batches`: True
464
+ - `fp16_backend`: auto
465
+ - `push_to_hub_model_id`: None
466
+ - `push_to_hub_organization`: None
467
+ - `mp_parameters`:
468
+ - `auto_find_batch_size`: False
469
+ - `full_determinism`: False
470
+ - `torchdynamo`: None
471
+ - `ray_scope`: last
472
+ - `ddp_timeout`: 1800
473
+ - `torch_compile`: False
474
+ - `torch_compile_backend`: None
475
+ - `torch_compile_mode`: None
476
+ - `dispatch_batches`: None
477
+ - `split_batches`: None
478
+ - `include_tokens_per_second`: False
479
+ - `include_num_input_tokens_seen`: False
480
+ - `neftune_noise_alpha`: None
481
+ - `optim_target_modules`: None
482
+ - `batch_eval_metrics`: False
483
+ - `eval_on_start`: False
484
+ - `eval_use_gather_object`: False
485
+ - `batch_sampler`: no_duplicates
486
+ - `multi_dataset_batch_sampler`: proportional
487
+
488
+ </details>
489
+
490
+ ### Training Logs
491
+ | Epoch | Step | Training Loss | loss | custom-arc-semantics-data-jp_max_ap |
492
+ |:-----:|:----:|:-------------:|:------:|:-----------------------------------:|
493
+ | 1.0 | 6 | 0.3183 | 0.1717 | 0.8767 |
494
+ | 2.0 | 12 | 0.3026 | 0.1703 | 0.8767 |
495
+ | 3.0 | 18 | 0.2667 | 0.1662 | 0.8767 |
496
+ | 4.0 | 24 | 0.2164 | 0.1595 | 0.9267 |
497
+ | 5.0 | 30 | 0.1779 | 0.1680 | 0.9267 |
498
+ | 6.0 | 36 | 0.1271 | 0.1939 | 0.8767 |
499
+ | 7.0 | 42 | 0.1018 | 0.2169 | 0.8767 |
500
+ | 8.0 | 48 | 0.0824 | 0.2246 | 0.8767 |
501
+ | 9.0 | 54 | 0.0732 | 0.2209 | 0.8767 |
502
+ | 10.0 | 60 | 0.0672 | 0.2187 | 0.8767 |
503
+
504
+
505
+ ### Framework Versions
506
+ - Python: 3.10.14
507
+ - Sentence Transformers: 3.1.0
508
+ - Transformers: 4.44.2
509
+ - PyTorch: 2.4.1+cu121
510
+ - Accelerate: 0.34.2
511
+ - Datasets: 2.20.0
512
+ - Tokenizers: 0.19.1
513
+
514
+ ## Citation
515
+
516
+ ### BibTeX
517
+
518
+ #### Sentence Transformers
519
+ ```bibtex
520
+ @inproceedings{reimers-2019-sentence-bert,
521
+ title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
522
+ author = "Reimers, Nils and Gurevych, Iryna",
523
+ booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
524
+ month = "11",
525
+ year = "2019",
526
+ publisher = "Association for Computational Linguistics",
527
+ url = "https://arxiv.org/abs/1908.10084",
528
+ }
529
+ ```
530
+
531
+ <!--
532
+ ## Glossary
533
+
534
+ *Clearly define terms in order to be accessible across audiences.*
535
+ -->
536
+
537
+ <!--
538
+ ## Model Card Authors
539
+
540
+ *Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
541
+ -->
542
+
543
+ <!--
544
+ ## Model Card Contact
545
+
546
+ *Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
547
+ -->
checkpoint-60/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[PAD]": 32000
3
+ }
checkpoint-60/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "colorfulscoop/sbert-base-ja",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout": null,
9
+ "cls_token_id": 2,
10
+ "eos_token_id": 3,
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "layer_norm_eps": 1e-12,
18
+ "mask_token_id": 4,
19
+ "max_position_embeddings": 512,
20
+ "model_type": "bert",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "pad_token_id": 0,
24
+ "position_embedding_type": "absolute",
25
+ "sep_token_id": 3,
26
+ "tokenizer_class": "DebertaV2Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.44.2",
29
+ "type_vocab_size": 2,
30
+ "unk_token_id": 1,
31
+ "use_cache": true,
32
+ "vocab_size": 32000
33
+ }
checkpoint-60/config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.1.0",
4
+ "transformers": "4.44.2",
5
+ "pytorch": "2.4.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
checkpoint-60/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0158dcd40aad79e19f1987892a9aa626466edcac34128ea69556407560fad1e5
3
+ size 442491744
checkpoint-60/modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
checkpoint-60/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:397a8c127f329e75b0772747b593b6f21a20c6b2c9bb829b7cc6d193062c2148
3
+ size 880373306
checkpoint-60/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad9ad5f8b582fdf935b76487b4ce3a50440db403519c318e76d5212cb54ef38b
3
+ size 13990
checkpoint-60/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e89c67ef1a6d101b02bb26c7df91ea55b73bf474e2d9389ae8dfa1e065ee8883
3
+ size 1064
checkpoint-60/sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
checkpoint-60/special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "<unk>",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
checkpoint-60/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6467857b4b0c77ded9bac7ad2fb5c16eb64e17e417ce46624dacac2bbb404fc
3
+ size 802713
checkpoint-60/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-60/tokenizer_config.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<pad>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": true,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[CLS]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": false
26
+ },
27
+ "3": {
28
+ "content": "[SEP]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": false
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": false
42
+ },
43
+ "32000": {
44
+ "content": "[PAD]",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ }
51
+ },
52
+ "bos_token": "[CLS]",
53
+ "clean_up_tokenization_spaces": true,
54
+ "cls_token": "[CLS]",
55
+ "do_lower_case": false,
56
+ "eos_token": "[SEP]",
57
+ "mask_token": "[MASK]",
58
+ "model_max_length": 512,
59
+ "pad_token": "<pad>",
60
+ "sep_token": "[SEP]",
61
+ "sp_model_kwargs": {},
62
+ "split_by_punct": false,
63
+ "tokenizer_class": "DebertaV2Tokenizer",
64
+ "unk_token": "<unk>"
65
+ }
checkpoint-60/trainer_state.json ADDED
@@ -0,0 +1,533 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "eval_steps": 50,
6
+ "global_step": 60,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 1.4512845277786255,
14
+ "learning_rate": 5e-06,
15
+ "loss": 0.3183,
16
+ "step": 6
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
21
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.6471868753433228,
22
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
23
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
24
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.6471868753433228,
25
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
26
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
27
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
28
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 345.4730529785156,
29
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
30
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
31
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 345.4730529785156,
32
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
33
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
34
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
35
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 19.563411712646484,
36
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.81,
37
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
38
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 19.563411712646484,
39
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
40
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
41
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
42
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 429.613037109375,
43
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.81,
44
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
45
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 429.613037109375,
46
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
47
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
48
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
49
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 429.613037109375,
50
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
51
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
52
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 429.613037109375,
53
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
54
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
55
+ "eval_loss": 0.17167754471302032,
56
+ "eval_runtime": 2.9189,
57
+ "eval_samples_per_second": 2.056,
58
+ "eval_steps_per_second": 0.343,
59
+ "step": 6
60
+ },
61
+ {
62
+ "epoch": 2.0,
63
+ "grad_norm": 1.304966688156128,
64
+ "learning_rate": 1e-05,
65
+ "loss": 0.3026,
66
+ "step": 12
67
+ },
68
+ {
69
+ "epoch": 2.0,
70
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
71
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.6410852670669556,
72
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
73
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
74
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.6410852670669556,
75
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
76
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
77
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
78
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 341.5276184082031,
79
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
80
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
81
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 341.5276184082031,
82
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
83
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
84
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
85
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 19.734420776367188,
86
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
87
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
88
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 19.734420776367188,
89
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
90
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
91
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
92
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 434.0592346191406,
93
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
94
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
95
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 434.0592346191406,
96
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
97
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
98
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
99
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 434.0592346191406,
100
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
101
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
102
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 434.0592346191406,
103
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
104
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
105
+ "eval_loss": 0.17033492028713226,
106
+ "eval_runtime": 2.9121,
107
+ "eval_samples_per_second": 2.06,
108
+ "eval_steps_per_second": 0.343,
109
+ "step": 12
110
+ },
111
+ {
112
+ "epoch": 3.0,
113
+ "grad_norm": 1.0188632011413574,
114
+ "learning_rate": 1.5000000000000002e-05,
115
+ "loss": 0.2667,
116
+ "step": 18
117
+ },
118
+ {
119
+ "epoch": 3.0,
120
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
121
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.6364033818244934,
122
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
123
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
124
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.6364033818244934,
125
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
126
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
127
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
128
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 338.5497131347656,
129
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
130
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
131
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 338.5497131347656,
132
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
133
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
134
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
135
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 19.83578109741211,
136
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
137
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
138
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 19.83578109741211,
139
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
140
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
141
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
142
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 437.1195068359375,
143
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
144
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
145
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 437.1195068359375,
146
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
147
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
148
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
149
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 437.1195068359375,
150
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
151
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
152
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 437.1195068359375,
153
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
154
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
155
+ "eval_loss": 0.16623182594776154,
156
+ "eval_runtime": 3.4915,
157
+ "eval_samples_per_second": 1.718,
158
+ "eval_steps_per_second": 0.286,
159
+ "step": 18
160
+ },
161
+ {
162
+ "epoch": 4.0,
163
+ "grad_norm": 0.5784842371940613,
164
+ "learning_rate": 2e-05,
165
+ "loss": 0.2164,
166
+ "step": 24
167
+ },
168
+ {
169
+ "epoch": 4.0,
170
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
171
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.6302204132080078,
172
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
173
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
174
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.6302204132080078,
175
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
176
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
177
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
178
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 385.5712585449219,
179
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.9266666666666665,
180
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
181
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 339.04254150390625,
182
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
183
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
184
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
185
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 19.902908325195312,
186
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
187
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
188
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 19.902908325195312,
189
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
190
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
191
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
192
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 438.79205322265625,
193
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
194
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
195
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 438.79205322265625,
196
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
197
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
198
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
199
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 438.79205322265625,
200
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.9266666666666665,
201
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
202
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 438.79205322265625,
203
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
204
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
205
+ "eval_loss": 0.15949387848377228,
206
+ "eval_runtime": 2.6844,
207
+ "eval_samples_per_second": 2.235,
208
+ "eval_steps_per_second": 0.373,
209
+ "step": 24
210
+ },
211
+ {
212
+ "epoch": 5.0,
213
+ "grad_norm": 0.4092578589916229,
214
+ "learning_rate": 1.6666666666666667e-05,
215
+ "loss": 0.1779,
216
+ "step": 30
217
+ },
218
+ {
219
+ "epoch": 5.0,
220
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
221
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.587942361831665,
222
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
223
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
224
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.587942361831665,
225
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
226
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
227
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
228
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 369.192626953125,
229
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.9266666666666665,
230
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
231
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 318.3497619628906,
232
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
233
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
234
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
235
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 21.073081970214844,
236
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
237
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
238
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 21.073081970214844,
239
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
240
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
241
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
242
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 462.51629638671875,
243
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
244
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
245
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 462.51629638671875,
246
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
247
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
248
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
249
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 462.51629638671875,
250
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.9266666666666665,
251
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
252
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 462.51629638671875,
253
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
254
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
255
+ "eval_loss": 0.16796135902404785,
256
+ "eval_runtime": 3.0306,
257
+ "eval_samples_per_second": 1.98,
258
+ "eval_steps_per_second": 0.33,
259
+ "step": 30
260
+ },
261
+ {
262
+ "epoch": 6.0,
263
+ "grad_norm": 0.45830854773521423,
264
+ "learning_rate": 1.3333333333333333e-05,
265
+ "loss": 0.1271,
266
+ "step": 36
267
+ },
268
+ {
269
+ "epoch": 6.0,
270
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
271
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.5134342908859253,
272
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
273
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
274
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.5134342908859253,
275
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
276
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
277
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
278
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 278.04107666015625,
279
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
280
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
281
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 278.04107666015625,
282
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
283
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
284
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
285
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 22.917387008666992,
286
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
287
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
288
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 22.917387008666992,
289
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
290
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
291
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
292
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 502.63287353515625,
293
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
294
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
295
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 502.63287353515625,
296
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
297
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
298
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
299
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 502.63287353515625,
300
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
301
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
302
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 502.63287353515625,
303
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
304
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
305
+ "eval_loss": 0.19386596977710724,
306
+ "eval_runtime": 2.8354,
307
+ "eval_samples_per_second": 2.116,
308
+ "eval_steps_per_second": 0.353,
309
+ "step": 36
310
+ },
311
+ {
312
+ "epoch": 7.0,
313
+ "grad_norm": 0.3822881579399109,
314
+ "learning_rate": 1e-05,
315
+ "loss": 0.1018,
316
+ "step": 42
317
+ },
318
+ {
319
+ "epoch": 7.0,
320
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
321
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.46284571290016174,
322
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
323
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
324
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.46284571290016174,
325
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
326
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
327
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
328
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 249.8519287109375,
329
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
330
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
331
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 249.8519287109375,
332
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
333
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
334
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
335
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 24.051647186279297,
336
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
337
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
338
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 24.051647186279297,
339
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
340
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
341
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
342
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 527.3822021484375,
343
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
344
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
345
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 527.3822021484375,
346
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
347
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
348
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
349
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 527.3822021484375,
350
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
351
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
352
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 527.3822021484375,
353
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
354
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
355
+ "eval_loss": 0.2168869525194168,
356
+ "eval_runtime": 2.7296,
357
+ "eval_samples_per_second": 2.198,
358
+ "eval_steps_per_second": 0.366,
359
+ "step": 42
360
+ },
361
+ {
362
+ "epoch": 8.0,
363
+ "grad_norm": 0.3190430998802185,
364
+ "learning_rate": 6.666666666666667e-06,
365
+ "loss": 0.0824,
366
+ "step": 48
367
+ },
368
+ {
369
+ "epoch": 8.0,
370
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
371
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.45021578669548035,
372
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
373
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
374
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.45021578669548035,
375
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
376
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
377
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
378
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 241.99093627929688,
379
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
380
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
381
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 241.99093627929688,
382
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
383
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
384
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
385
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 24.27983283996582,
386
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
387
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
388
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 24.27983283996582,
389
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
390
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
391
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
392
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 532.0448608398438,
393
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
394
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
395
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 532.0448608398438,
396
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
397
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
398
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
399
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 532.0448608398438,
400
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
401
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
402
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 532.0448608398438,
403
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
404
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
405
+ "eval_loss": 0.2245994359254837,
406
+ "eval_runtime": 2.6403,
407
+ "eval_samples_per_second": 2.273,
408
+ "eval_steps_per_second": 0.379,
409
+ "step": 48
410
+ },
411
+ {
412
+ "epoch": 9.0,
413
+ "grad_norm": 0.2815457880496979,
414
+ "learning_rate": 3.3333333333333333e-06,
415
+ "loss": 0.0732,
416
+ "step": 54
417
+ },
418
+ {
419
+ "epoch": 9.0,
420
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
421
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.45798632502555847,
422
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
423
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
424
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.45798632502555847,
425
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
426
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
427
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
428
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 245.57119750976562,
429
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
430
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
431
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 245.57119750976562,
432
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
433
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
434
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
435
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 24.071979522705078,
436
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
437
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
438
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 24.071979522705078,
439
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
440
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
441
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
442
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 527.4176025390625,
443
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
444
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
445
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 527.4176025390625,
446
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
447
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
448
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
449
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 527.4176025390625,
450
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
451
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
452
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 527.4176025390625,
453
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
454
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
455
+ "eval_loss": 0.22094659507274628,
456
+ "eval_runtime": 2.8393,
457
+ "eval_samples_per_second": 2.113,
458
+ "eval_steps_per_second": 0.352,
459
+ "step": 54
460
+ },
461
+ {
462
+ "epoch": 10.0,
463
+ "grad_norm": 0.32951635122299194,
464
+ "learning_rate": 0.0,
465
+ "loss": 0.0672,
466
+ "step": 60
467
+ },
468
+ {
469
+ "epoch": 10.0,
470
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy": 0.6666666666666666,
471
+ "eval_custom-arc-semantics-data-jp_cosine_accuracy_threshold": 0.4631122350692749,
472
+ "eval_custom-arc-semantics-data-jp_cosine_ap": 0.8766666666666667,
473
+ "eval_custom-arc-semantics-data-jp_cosine_f1": 0.8000000000000002,
474
+ "eval_custom-arc-semantics-data-jp_cosine_f1_threshold": 0.4631122350692749,
475
+ "eval_custom-arc-semantics-data-jp_cosine_precision": 0.8,
476
+ "eval_custom-arc-semantics-data-jp_cosine_recall": 0.8,
477
+ "eval_custom-arc-semantics-data-jp_dot_accuracy": 0.6666666666666666,
478
+ "eval_custom-arc-semantics-data-jp_dot_accuracy_threshold": 248.13394165039062,
479
+ "eval_custom-arc-semantics-data-jp_dot_ap": 0.8766666666666667,
480
+ "eval_custom-arc-semantics-data-jp_dot_f1": 0.8000000000000002,
481
+ "eval_custom-arc-semantics-data-jp_dot_f1_threshold": 248.13394165039062,
482
+ "eval_custom-arc-semantics-data-jp_dot_precision": 0.8,
483
+ "eval_custom-arc-semantics-data-jp_dot_recall": 0.8,
484
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy": 0.6666666666666666,
485
+ "eval_custom-arc-semantics-data-jp_euclidean_accuracy_threshold": 23.945947647094727,
486
+ "eval_custom-arc-semantics-data-jp_euclidean_ap": 0.8766666666666667,
487
+ "eval_custom-arc-semantics-data-jp_euclidean_f1": 0.8000000000000002,
488
+ "eval_custom-arc-semantics-data-jp_euclidean_f1_threshold": 23.945947647094727,
489
+ "eval_custom-arc-semantics-data-jp_euclidean_precision": 0.8,
490
+ "eval_custom-arc-semantics-data-jp_euclidean_recall": 0.8,
491
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy": 0.6666666666666666,
492
+ "eval_custom-arc-semantics-data-jp_manhattan_accuracy_threshold": 524.65185546875,
493
+ "eval_custom-arc-semantics-data-jp_manhattan_ap": 0.8766666666666667,
494
+ "eval_custom-arc-semantics-data-jp_manhattan_f1": 0.8000000000000002,
495
+ "eval_custom-arc-semantics-data-jp_manhattan_f1_threshold": 524.65185546875,
496
+ "eval_custom-arc-semantics-data-jp_manhattan_precision": 0.8,
497
+ "eval_custom-arc-semantics-data-jp_manhattan_recall": 0.8,
498
+ "eval_custom-arc-semantics-data-jp_max_accuracy": 0.6666666666666666,
499
+ "eval_custom-arc-semantics-data-jp_max_accuracy_threshold": 524.65185546875,
500
+ "eval_custom-arc-semantics-data-jp_max_ap": 0.8766666666666667,
501
+ "eval_custom-arc-semantics-data-jp_max_f1": 0.8000000000000002,
502
+ "eval_custom-arc-semantics-data-jp_max_f1_threshold": 524.65185546875,
503
+ "eval_custom-arc-semantics-data-jp_max_precision": 0.8,
504
+ "eval_custom-arc-semantics-data-jp_max_recall": 0.8,
505
+ "eval_loss": 0.21867813169956207,
506
+ "eval_runtime": 2.9145,
507
+ "eval_samples_per_second": 2.059,
508
+ "eval_steps_per_second": 0.343,
509
+ "step": 60
510
+ }
511
+ ],
512
+ "logging_steps": 500,
513
+ "max_steps": 60,
514
+ "num_input_tokens_seen": 0,
515
+ "num_train_epochs": 10,
516
+ "save_steps": 100,
517
+ "stateful_callbacks": {
518
+ "TrainerControl": {
519
+ "args": {
520
+ "should_epoch_stop": false,
521
+ "should_evaluate": false,
522
+ "should_log": false,
523
+ "should_save": true,
524
+ "should_training_stop": true
525
+ },
526
+ "attributes": {}
527
+ }
528
+ },
529
+ "total_flos": 0.0,
530
+ "train_batch_size": 8,
531
+ "trial_name": null,
532
+ "trial_params": null
533
+ }
checkpoint-60/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:357936512702101c3d6fcb1fbc6019e2e1a0c6628f613da90d340ef26a75e926
3
+ size 5432
config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "colorfulscoop/sbert-base-ja",
3
+ "architectures": [
4
+ "BertModel"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 2,
8
+ "classifier_dropout": null,
9
+ "cls_token_id": 2,
10
+ "eos_token_id": 3,
11
+ "gradient_checkpointing": false,
12
+ "hidden_act": "gelu",
13
+ "hidden_dropout_prob": 0.1,
14
+ "hidden_size": 768,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 3072,
17
+ "layer_norm_eps": 1e-12,
18
+ "mask_token_id": 4,
19
+ "max_position_embeddings": 512,
20
+ "model_type": "bert",
21
+ "num_attention_heads": 12,
22
+ "num_hidden_layers": 12,
23
+ "pad_token_id": 0,
24
+ "position_embedding_type": "absolute",
25
+ "sep_token_id": 3,
26
+ "tokenizer_class": "DebertaV2Tokenizer",
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.44.2",
29
+ "type_vocab_size": 2,
30
+ "unk_token_id": 1,
31
+ "use_cache": true,
32
+ "vocab_size": 32000
33
+ }
config_sentence_transformers.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "__version__": {
3
+ "sentence_transformers": "3.1.0",
4
+ "transformers": "4.44.2",
5
+ "pytorch": "2.4.1+cu121"
6
+ },
7
+ "prompts": {},
8
+ "default_prompt_name": null,
9
+ "similarity_fn_name": null
10
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0158dcd40aad79e19f1987892a9aa626466edcac34128ea69556407560fad1e5
3
+ size 442491744
modules.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "idx": 0,
4
+ "name": "0",
5
+ "path": "",
6
+ "type": "sentence_transformers.models.Transformer"
7
+ },
8
+ {
9
+ "idx": 1,
10
+ "name": "1",
11
+ "path": "1_Pooling",
12
+ "type": "sentence_transformers.models.Pooling"
13
+ }
14
+ ]
runs/Sep17_22-48-14_default/events.out.tfevents.1726613296.default.7605.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:af880816a76b412a78fcd9121f913fed065b2240d4e6bf0aebcfdb6db59de6c8
3
+ size 39878
sentence_bert_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "max_seq_length": 512,
3
+ "do_lower_case": false
4
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "<pad>",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": {
9
+ "content": "<unk>",
10
+ "lstrip": false,
11
+ "normalized": true,
12
+ "rstrip": false,
13
+ "single_word": false
14
+ }
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6467857b4b0c77ded9bac7ad2fb5c16eb64e17e417ce46624dacac2bbb404fc
3
+ size 802713
tokenizer_config.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 512,
9
+ "pad_token": "<pad>",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "<unk>"
15
+ }