karsar's picture
Update README.md
68dd128 verified
|
raw
history blame
93 kB

model-index:

  • name: karsar/paraphrase-multilingual-MiniLM-L12-hu_v1 results:
    • dataset: config: hun_Latn-hun_Latn name: MTEB BelebeleRetrieval (hun_Latn-hun_Latn) revision: 75b399394a9803252cfec289d103de462763db7c split: test type: facebook/belebele metrics:
      • type: main_score value: 77.865
      • type: map_at_1 value: 67.333
      • type: map_at_10 value: 74.404
      • type: map_at_100 value: 74.802
      • type: map_at_1000 value: 74.809
      • type: map_at_20 value: 74.63
      • type: map_at_3 value: 72.796
      • type: map_at_5 value: 73.67399999999999
      • type: mrr_at_1 value: 67.33333333333333
      • type: mrr_at_10 value: 74.40396825396829
      • type: mrr_at_100 value: 74.80177264047548
      • type: mrr_at_1000 value: 74.80937346439818
      • type: mrr_at_20 value: 74.62979204843244
      • type: mrr_at_3 value: 72.7962962962963
      • type: mrr_at_5 value: 73.6740740740741
      • type: nauc_map_at_1000_diff1 value: 76.08133094195743
      • type: nauc_map_at_1000_max value: 61.727834175182736
      • type: nauc_map_at_1000_std value: -2.3231732437794568
      • type: nauc_map_at_100_diff1 value: 76.07916259051902
      • type: nauc_map_at_100_max value: 61.72703450852774
      • type: nauc_map_at_100_std value: -2.3175338063349575
      • type: nauc_map_at_10_diff1 value: 75.97996147738112
      • type: nauc_map_at_10_max value: 61.860784493617224
      • type: nauc_map_at_10_std value: -2.4887315051072356
      • type: nauc_map_at_1_diff1 value: 78.13561632940586
      • type: nauc_map_at_1_max value: 59.243520843511746
      • type: nauc_map_at_1_std value: -2.6689239089679515
      • type: nauc_map_at_20_diff1 value: 76.06883452011327
      • type: nauc_map_at_20_max value: 61.775589074510826
      • type: nauc_map_at_20_std value: -2.3905575770447585
      • type: nauc_map_at_3_diff1 value: 75.85937006372846
      • type: nauc_map_at_3_max value: 61.819093557650895
      • type: nauc_map_at_3_std value: -2.5207238945764647
      • type: nauc_map_at_5_diff1 value: 76.06929563357589
      • type: nauc_map_at_5_max value: 61.93563829360039
      • type: nauc_map_at_5_std value: -1.9424637593671918
      • type: nauc_mrr_at_1000_diff1 value: 76.08133094195743
      • type: nauc_mrr_at_1000_max value: 61.727834175182736
      • type: nauc_mrr_at_1000_std value: -2.3231732437794568
      • type: nauc_mrr_at_100_diff1 value: 76.07916259051902
      • type: nauc_mrr_at_100_max value: 61.72703450852774
      • type: nauc_mrr_at_100_std value: -2.3175338063349575
      • type: nauc_mrr_at_10_diff1 value: 75.97996147738112
      • type: nauc_mrr_at_10_max value: 61.860784493617224
      • type: nauc_mrr_at_10_std value: -2.4887315051072356
      • type: nauc_mrr_at_1_diff1 value: 78.13561632940586
      • type: nauc_mrr_at_1_max value: 59.243520843511746
      • type: nauc_mrr_at_1_std value: -2.6689239089679515
      • type: nauc_mrr_at_20_diff1 value: 76.06883452011327
      • type: nauc_mrr_at_20_max value: 61.775589074510826
      • type: nauc_mrr_at_20_std value: -2.3905575770447585
      • type: nauc_mrr_at_3_diff1 value: 75.85937006372846
      • type: nauc_mrr_at_3_max value: 61.819093557650895
      • type: nauc_mrr_at_3_std value: -2.5207238945764647
      • type: nauc_mrr_at_5_diff1 value: 76.06929563357589
      • type: nauc_mrr_at_5_max value: 61.93563829360039
      • type: nauc_mrr_at_5_std value: -1.9424637593671918
      • type: nauc_ndcg_at_1000_diff1 value: 75.7057240434196
      • type: nauc_ndcg_at_1000_max value: 62.021717989510385
      • type: nauc_ndcg_at_1000_std value: -2.2522490330905103
      • type: nauc_ndcg_at_100_diff1 value: 75.62156032414751
      • type: nauc_ndcg_at_100_max value: 61.97932968109654
      • type: nauc_ndcg_at_100_std value: -2.0118635701265375
      • type: nauc_ndcg_at_10_diff1 value: 75.09836101324169
      • type: nauc_ndcg_at_10_max value: 62.703427209156736
      • type: nauc_ndcg_at_10_std value: -2.9287738405282395
      • type: nauc_ndcg_at_1_diff1 value: 78.13561632940586
      • type: nauc_ndcg_at_1_max value: 59.243520843511746
      • type: nauc_ndcg_at_1_std value: -2.6689239089679515
      • type: nauc_ndcg_at_20_diff1 value: 75.46348763248093
      • type: nauc_ndcg_at_20_max value: 62.35498579351012
      • type: nauc_ndcg_at_20_std value: -2.577338920595739
      • type: nauc_ndcg_at_3_diff1 value: 74.92773626606146
      • type: nauc_ndcg_at_3_max value: 62.55812080913172
      • type: nauc_ndcg_at_3_std value: -2.5630879822636476
      • type: nauc_ndcg_at_5_diff1 value: 75.3100398038724
      • type: nauc_ndcg_at_5_max value: 62.81733471459409
      • type: nauc_ndcg_at_5_std value: -1.501748019065971
      • type: nauc_precision_at_1000_diff1 value: .nan
      • type: nauc_precision_at_1000_max value: .nan
      • type: nauc_precision_at_1000_std value: .nan
      • type: nauc_precision_at_100_diff1 value: 66.63165266106552
      • type: nauc_precision_at_100_max value: 57.60582010582053
      • type: nauc_precision_at_100_std value: 23.844537815126937
      • type: nauc_precision_at_10_diff1 value: 70.08984254109942
      • type: nauc_precision_at_10_max value: 67.45880653843606
      • type: nauc_precision_at_10_std value: -6.3555626412584
      • type: nauc_precision_at_1_diff1 value: 78.13561632940586
      • type: nauc_precision_at_1_max value: 59.243520843511746
      • type: nauc_precision_at_1_std value: -2.6689239089679515
      • type: nauc_precision_at_20_diff1 value: 71.63306637208878
      • type: nauc_precision_at_20_max value: 65.99137307505141
      • type: nauc_precision_at_20_std value: -4.675767020423249
      • type: nauc_precision_at_3_diff1 value: 71.57608769475272
      • type: nauc_precision_at_3_max value: 65.10683383365713
      • type: nauc_precision_at_3_std value: -2.7514636167292985
      • type: nauc_precision_at_5_diff1 value: 72.21412151067312
      • type: nauc_precision_at_5_max value: 66.43448275862069
      • type: nauc_precision_at_5_std value: 0.4555008210180189
      • type: nauc_recall_at_1000_diff1 value: .nan
      • type: nauc_recall_at_1000_max value: .nan
      • type: nauc_recall_at_1000_std value: .nan
      • type: nauc_recall_at_100_diff1 value: 66.63165266106327
      • type: nauc_recall_at_100_max value: 57.60582010581922
      • type: nauc_recall_at_100_std value: 23.844537815125907
      • type: nauc_recall_at_10_diff1 value: 70.08984254109967
      • type: nauc_recall_at_10_max value: 67.45880653843632
      • type: nauc_recall_at_10_std value: -6.355562641258283
      • type: nauc_recall_at_1_diff1 value: 78.13561632940586
      • type: nauc_recall_at_1_max value: 59.243520843511746
      • type: nauc_recall_at_1_std value: -2.6689239089679515
      • type: nauc_recall_at_20_diff1 value: 71.6330663720887
      • type: nauc_recall_at_20_max value: 65.9913730750516
      • type: nauc_recall_at_20_std value: -4.675767020422999
      • type: nauc_recall_at_3_diff1 value: 71.57608769475274
      • type: nauc_recall_at_3_max value: 65.106833833657
      • type: nauc_recall_at_3_std value: -2.7514636167294
      • type: nauc_recall_at_5_diff1 value: 72.21412151067315
      • type: nauc_recall_at_5_max value: 66.43448275862077
      • type: nauc_recall_at_5_std value: 0.4555008210180812
      • type: ndcg_at_1 value: 67.333
      • type: ndcg_at_10 value: 77.865
      • type: ndcg_at_100 value: 79.927
      • type: ndcg_at_1000 value: 80.104
      • type: ndcg_at_20 value: 78.701
      • type: ndcg_at_3 value: 74.509
      • type: ndcg_at_5 value: 76.101
      • type: precision_at_1 value: 67.333
      • type: precision_at_10 value: 8.878
      • type: precision_at_100 value: 0.987
      • type: precision_at_1000 value: 0.1
      • type: precision_at_20 value: 4.606
      • type: precision_at_3 value: 26.480999999999998
      • type: precision_at_5 value: 16.667
      • type: recall_at_1 value: 67.333
      • type: recall_at_10 value: 88.778
      • type: recall_at_100 value: 98.667
      • type: recall_at_1000 value: 100.0
      • type: recall_at_20 value: 92.111
      • type: recall_at_3 value: 79.444
      • type: recall_at_5 value: 83.333 task: type: Retrieval
    • dataset: config: hun_Latn-eng_Latn name: MTEB BelebeleRetrieval (hun_Latn-eng_Latn) revision: 75b399394a9803252cfec289d103de462763db7c split: test type: facebook/belebele metrics:
      • type: main_score value: 71.307
      • type: map_at_1 value: 57.778
      • type: map_at_10 value: 66.843
      • type: map_at_100 value: 67.368
      • type: map_at_1000 value: 67.38300000000001
      • type: map_at_20 value: 67.162
      • type: map_at_3 value: 64.704
      • type: map_at_5 value: 65.97
      • type: mrr_at_1 value: 57.77777777777777
      • type: mrr_at_10 value: 66.8428130511464
      • type: mrr_at_100 value: 67.36803803097415
      • type: mrr_at_1000 value: 67.38317813286176
      • type: mrr_at_20 value: 67.16164827986293
      • type: mrr_at_3 value: 64.7037037037037
      • type: mrr_at_5 value: 65.97037037037038
      • type: nauc_map_at_1000_diff1 value: 69.02219987684592
      • type: nauc_map_at_1000_max value: 60.114123597785785
      • type: nauc_map_at_1000_std value: 4.880216382742553
      • type: nauc_map_at_100_diff1 value: 69.01116363727591
      • type: nauc_map_at_100_max value: 60.11716622079215
      • type: nauc_map_at_100_std value: 4.890393343425179
      • type: nauc_map_at_10_diff1 value: 68.95240309900163
      • type: nauc_map_at_10_max value: 60.124170478386105
      • type: nauc_map_at_10_std value: 4.819161459028938
      • type: nauc_map_at_1_diff1 value: 72.45335820895522
      • type: nauc_map_at_1_max value: 59.127316006176
      • type: nauc_map_at_1_std value: 6.580191713844538
      • type: nauc_map_at_20_diff1 value: 68.87249492072671
      • type: nauc_map_at_20_max value: 60.04834608184139
      • type: nauc_map_at_20_std value: 4.807958211395879
      • type: nauc_map_at_3_diff1 value: 69.38092756897547
      • type: nauc_map_at_3_max value: 60.30271451423346
      • type: nauc_map_at_3_std value: 3.9374045068220322
      • type: nauc_map_at_5_diff1 value: 69.10875854889262
      • type: nauc_map_at_5_max value: 60.24557626138646
      • type: nauc_map_at_5_std value: 4.271289591515184
      • type: nauc_mrr_at_1000_diff1 value: 69.02219987684592
      • type: nauc_mrr_at_1000_max value: 60.114123597785785
      • type: nauc_mrr_at_1000_std value: 4.880216382742553
      • type: nauc_mrr_at_100_diff1 value: 69.01116363727591
      • type: nauc_mrr_at_100_max value: 60.11716622079215
      • type: nauc_mrr_at_100_std value: 4.890393343425179
      • type: nauc_mrr_at_10_diff1 value: 68.95240309900163
      • type: nauc_mrr_at_10_max value: 60.124170478386105
      • type: nauc_mrr_at_10_std value: 4.819161459028938
      • type: nauc_mrr_at_1_diff1 value: 72.45335820895522
      • type: nauc_mrr_at_1_max value: 59.127316006176
      • type: nauc_mrr_at_1_std value: 6.580191713844538
      • type: nauc_mrr_at_20_diff1 value: 68.87249492072671
      • type: nauc_mrr_at_20_max value: 60.04834608184139
      • type: nauc_mrr_at_20_std value: 4.807958211395879
      • type: nauc_mrr_at_3_diff1 value: 69.38092756897547
      • type: nauc_mrr_at_3_max value: 60.30271451423346
      • type: nauc_mrr_at_3_std value: 3.9374045068220322
      • type: nauc_mrr_at_5_diff1 value: 69.10875854889262
      • type: nauc_mrr_at_5_max value: 60.24557626138646
      • type: nauc_mrr_at_5_std value: 4.271289591515184
      • type: nauc_ndcg_at_1000_diff1 value: 68.36151731152576
      • type: nauc_ndcg_at_1000_max value: 60.21499073164881
      • type: nauc_ndcg_at_1000_std value: 5.019374170320369
      • type: nauc_ndcg_at_100_diff1 value: 68.12777182930174
      • type: nauc_ndcg_at_100_max value: 60.293069076013296
      • type: nauc_ndcg_at_100_std value: 5.375522795479381
      • type: nauc_ndcg_at_10_diff1 value: 67.46914440211127
      • type: nauc_ndcg_at_10_max value: 60.210209508170976
      • type: nauc_ndcg_at_10_std value: 4.921793458534013
      • type: nauc_ndcg_at_1_diff1 value: 72.45335820895522
      • type: nauc_ndcg_at_1_max value: 59.127316006176
      • type: nauc_ndcg_at_1_std value: 6.580191713844538
      • type: nauc_ndcg_at_20_diff1 value: 67.09692054164125
      • type: nauc_ndcg_at_20_max value: 59.89689460185056
      • type: nauc_ndcg_at_20_std value: 4.977631579372532
      • type: nauc_ndcg_at_3_diff1 value: 68.54468748113734
      • type: nauc_ndcg_at_3_max value: 60.66886257099051
      • type: nauc_ndcg_at_3_std value: 3.073807310026356
      • type: nauc_ndcg_at_5_diff1 value: 67.94441056262235
      • type: nauc_ndcg_at_5_max value: 60.47774252804478
      • type: nauc_ndcg_at_5_std value: 3.572034464519458
      • type: nauc_precision_at_1000_diff1 value: .nan
      • type: nauc_precision_at_1000_max value: .nan
      • type: nauc_precision_at_1000_std value: .nan
      • type: nauc_precision_at_100_diff1 value: 52.808123249299676
      • type: nauc_precision_at_100_max value: 65.81699346405254
      • type: nauc_precision_at_100_std value: 31.809056956116383
      • type: nauc_precision_at_10_diff1 value: 59.02820830750145
      • type: nauc_precision_at_10_max value: 60.33787972721626
      • type: nauc_precision_at_10_std value: 6.405175213296739
      • type: nauc_precision_at_1_diff1 value: 72.45335820895522
      • type: nauc_precision_at_1_max value: 59.127316006176
      • type: nauc_precision_at_1_std value: 6.580191713844538
      • type: nauc_precision_at_20_diff1 value: 52.242994576107485
      • type: nauc_precision_at_20_max value: 57.56617253643015
      • type: nauc_precision_at_20_std value: 7.9884388212213455
      • type: nauc_precision_at_3_diff1 value: 65.73191064426206
      • type: nauc_precision_at_3_max value: 61.92373010829596
      • type: nauc_precision_at_3_std value: 0.096317142458587
      • type: nauc_precision_at_5_diff1 value: 63.20464039592358
      • type: nauc_precision_at_5_max value: 61.25721735891223
      • type: nauc_precision_at_5_std value: 0.7937099220392029
      • type: nauc_recall_at_1000_diff1 value: .nan
      • type: nauc_recall_at_1000_max value: .nan
      • type: nauc_recall_at_1000_std value: .nan
      • type: nauc_recall_at_100_diff1 value: 52.80812324929921
      • type: nauc_recall_at_100_max value: 65.81699346405242
      • type: nauc_recall_at_100_std value: 31.809056956115235
      • type: nauc_recall_at_10_diff1 value: 59.02820830750159
      • type: nauc_recall_at_10_max value: 60.337879727216446
      • type: nauc_recall_at_10_std value: 6.405175213296646
      • type: nauc_recall_at_1_diff1 value: 72.45335820895522
      • type: nauc_recall_at_1_max value: 59.127316006176
      • type: nauc_recall_at_1_std value: 6.580191713844538
      • type: nauc_recall_at_20_diff1 value: 52.242994576107534
      • type: nauc_recall_at_20_max value: 57.56617253643034
      • type: nauc_recall_at_20_std value: 7.988438821221468
      • type: nauc_recall_at_3_diff1 value: 65.73191064426209
      • type: nauc_recall_at_3_max value: 61.923730108295906
      • type: nauc_recall_at_3_std value: 0.09631714245861488
      • type: nauc_recall_at_5_diff1 value: 63.204640395923626
      • type: nauc_recall_at_5_max value: 61.25721735891235
      • type: nauc_recall_at_5_std value: 0.7937099220392697
      • type: ndcg_at_1 value: 57.778
      • type: ndcg_at_10 value: 71.307
      • type: ndcg_at_100 value: 73.942
      • type: ndcg_at_1000 value: 74.248
      • type: ndcg_at_20 value: 72.499
      • type: ndcg_at_3 value: 66.95
      • type: ndcg_at_5 value: 69.21199999999999
      • type: precision_at_1 value: 57.778
      • type: precision_at_10 value: 8.533
      • type: precision_at_100 value: 0.9780000000000001
      • type: precision_at_1000 value: 0.1
      • type: precision_at_20 value: 4.506
      • type: precision_at_3 value: 24.481
      • type: precision_at_5 value: 15.778
      • type: recall_at_1 value: 57.778
      • type: recall_at_10 value: 85.333
      • type: recall_at_100 value: 97.77799999999999
      • type: recall_at_1000 value: 100.0
      • type: recall_at_20 value: 90.11099999999999
      • type: recall_at_3 value: 73.444
      • type: recall_at_5 value: 78.889 task: type: Retrieval
    • dataset: config: eng_Latn-hun_Latn name: MTEB BelebeleRetrieval (eng_Latn-hun_Latn) revision: 75b399394a9803252cfec289d103de462763db7c split: test type: facebook/belebele metrics:
      • type: main_score value: 73.668
      • type: map_at_1 value: 60.778
      • type: map_at_10 value: 69.571
      • type: map_at_100 value: 70.114
      • type: map_at_1000 value: 70.124
      • type: map_at_20 value: 69.93700000000001
      • type: map_at_3 value: 67.778
      • type: map_at_5 value: 68.872
      • type: mrr_at_1 value: 60.77777777777777
      • type: mrr_at_10 value: 69.57142857142857
      • type: mrr_at_100 value: 70.1136336675579
      • type: mrr_at_1000 value: 70.12432347462514
      • type: mrr_at_20 value: 69.93690215204663
      • type: mrr_at_3 value: 67.77777777777779
      • type: mrr_at_5 value: 68.87222222222223
      • type: nauc_map_at_1000_diff1 value: 70.84789011327231
      • type: nauc_map_at_1000_max value: 60.852088181225824
      • type: nauc_map_at_1000_std value: 6.549993568212846
      • type: nauc_map_at_100_diff1 value: 70.84603146007751
      • type: nauc_map_at_100_max value: 60.859417397516125
      • type: nauc_map_at_100_std value: 6.577244018939677
      • type: nauc_map_at_10_diff1 value: 70.71490936568583
      • type: nauc_map_at_10_max value: 60.94472236517367
      • type: nauc_map_at_10_std value: 6.53657697773106
      • type: nauc_map_at_1_diff1 value: 74.59301032751448
      • type: nauc_map_at_1_max value: 59.251209223705935
      • type: nauc_map_at_1_std value: 6.536579330592454
      • type: nauc_map_at_20_diff1 value: 70.69902333418673
      • type: nauc_map_at_20_max value: 60.84819592450007
      • type: nauc_map_at_20_std value: 6.487171209675751
      • type: nauc_map_at_3_diff1 value: 70.94073456299253
      • type: nauc_map_at_3_max value: 61.117845574972286
      • type: nauc_map_at_3_std value: 5.824524654602759
      • type: nauc_map_at_5_diff1 value: 70.64337838638826
      • type: nauc_map_at_5_max value: 60.69375707294804
      • type: nauc_map_at_5_std value: 6.1403804587682025
      • type: nauc_mrr_at_1000_diff1 value: 70.84789011327231
      • type: nauc_mrr_at_1000_max value: 60.852088181225824
      • type: nauc_mrr_at_1000_std value: 6.549993568212846
      • type: nauc_mrr_at_100_diff1 value: 70.84603146007751
      • type: nauc_mrr_at_100_max value: 60.859417397516125
      • type: nauc_mrr_at_100_std value: 6.577244018939677
      • type: nauc_mrr_at_10_diff1 value: 70.71490936568583
      • type: nauc_mrr_at_10_max value: 60.94472236517367
      • type: nauc_mrr_at_10_std value: 6.53657697773106
      • type: nauc_mrr_at_1_diff1 value: 74.59301032751448
      • type: nauc_mrr_at_1_max value: 59.251209223705935
      • type: nauc_mrr_at_1_std value: 6.536579330592454
      • type: nauc_mrr_at_20_diff1 value: 70.69902333418673
      • type: nauc_mrr_at_20_max value: 60.84819592450007
      • type: nauc_mrr_at_20_std value: 6.487171209675751
      • type: nauc_mrr_at_3_diff1 value: 70.94073456299253
      • type: nauc_mrr_at_3_max value: 61.117845574972286
      • type: nauc_mrr_at_3_std value: 5.824524654602759
      • type: nauc_mrr_at_5_diff1 value: 70.64337838638826
      • type: nauc_mrr_at_5_max value: 60.69375707294804
      • type: nauc_mrr_at_5_std value: 6.1403804587682025
      • type: nauc_ndcg_at_1000_diff1 value: 70.2568421673153
      • type: nauc_ndcg_at_1000_max value: 61.154155762479746
      • type: nauc_ndcg_at_1000_std value: 6.987492117976732
      • type: nauc_ndcg_at_100_diff1 value: 70.23106290886678
      • type: nauc_ndcg_at_100_max value: 61.387176821366296
      • type: nauc_ndcg_at_100_std value: 7.782749694416603
      • type: nauc_ndcg_at_10_diff1 value: 69.26227190907855
      • type: nauc_ndcg_at_10_max value: 61.634434826859874
      • type: nauc_ndcg_at_10_std value: 7.185316156791736
      • type: nauc_ndcg_at_1_diff1 value: 74.59301032751448
      • type: nauc_ndcg_at_1_max value: 59.251209223705935
      • type: nauc_ndcg_at_1_std value: 6.536579330592454
      • type: nauc_ndcg_at_20_diff1 value: 69.1954116973286
      • type: nauc_ndcg_at_20_max value: 61.38887961478062
      • type: nauc_ndcg_at_20_std value: 7.1318041010309585
      • type: nauc_ndcg_at_3_diff1 value: 69.75775816678905
      • type: nauc_ndcg_at_3_max value: 61.67436817540673
      • type: nauc_ndcg_at_3_std value: 5.650531149732009
      • type: nauc_ndcg_at_5_diff1 value: 69.1651947412561
      • type: nauc_ndcg_at_5_max value: 60.97882565960433
      • type: nauc_ndcg_at_5_std value: 6.203128058155249
      • type: nauc_precision_at_1000_diff1 value: .nan
      • type: nauc_precision_at_1000_max value: .nan
      • type: nauc_precision_at_1000_std value: .nan
      • type: nauc_precision_at_100_diff1 value: 68.65491294557121
      • type: nauc_precision_at_100_max value: 80.36744109408565
      • type: nauc_precision_at_100_std value: 70.92327126929257
      • type: nauc_precision_at_10_diff1 value: 61.29162638094176
      • type: nauc_precision_at_10_max value: 65.7264903076506
      • type: nauc_precision_at_10_std value: 11.47548778748128
      • type: nauc_precision_at_1_diff1 value: 74.59301032751448
      • type: nauc_precision_at_1_max value: 59.251209223705935
      • type: nauc_precision_at_1_std value: 6.536579330592454
      • type: nauc_precision_at_20_diff1 value: 56.51478369125409
      • type: nauc_precision_at_20_max value: 66.28882664176771
      • type: nauc_precision_at_20_std value: 14.05415499533146
      • type: nauc_precision_at_3_diff1 value: 65.55150000975934
      • type: nauc_precision_at_3_max value: 63.631594870493636
      • type: nauc_precision_at_3_std value: 5.057287295297996
      • type: nauc_precision_at_5_diff1 value: 62.93787770906014
      • type: nauc_precision_at_5_max value: 62.06285784899278
      • type: nauc_precision_at_5_std value: 6.577948558011871
      • type: nauc_recall_at_1000_diff1 value: .nan
      • type: nauc_recall_at_1000_max value: .nan
      • type: nauc_recall_at_1000_std value: .nan
      • type: nauc_recall_at_100_diff1 value: 68.6549129455701
      • type: nauc_recall_at_100_max value: 80.36744109408454
      • type: nauc_recall_at_100_std value: 70.92327126929207
      • type: nauc_recall_at_10_diff1 value: 61.29162638094184
      • type: nauc_recall_at_10_max value: 65.72649030765079
      • type: nauc_recall_at_10_std value: 11.475487787481537
      • type: nauc_recall_at_1_diff1 value: 74.59301032751448
      • type: nauc_recall_at_1_max value: 59.251209223705935
      • type: nauc_recall_at_1_std value: 6.536579330592454
      • type: nauc_recall_at_20_diff1 value: 56.514783691254266
      • type: nauc_recall_at_20_max value: 66.28882664176774
      • type: nauc_recall_at_20_std value: 14.054154995331741
      • type: nauc_recall_at_3_diff1 value: 65.55150000975928
      • type: nauc_recall_at_3_max value: 63.63159487049364
      • type: nauc_recall_at_3_std value: 5.05728729529798
      • type: nauc_recall_at_5_diff1 value: 62.937877709060295
      • type: nauc_recall_at_5_max value: 62.06285784899285
      • type: nauc_recall_at_5_std value: 6.577948558011953
      • type: ndcg_at_1 value: 60.778
      • type: ndcg_at_10 value: 73.668
      • type: ndcg_at_100 value: 76.21
      • type: ndcg_at_1000 value: 76.459
      • type: ndcg_at_20 value: 74.993
      • type: ndcg_at_3 value: 70.00800000000001
      • type: ndcg_at_5 value: 71.978
      • type: precision_at_1 value: 60.778
      • type: precision_at_10 value: 8.644
      • type: precision_at_100 value: 0.9809999999999999
      • type: precision_at_1000 value: 0.1
      • type: precision_at_20 value: 4.583
      • type: precision_at_3 value: 25.480999999999998
      • type: precision_at_5 value: 16.244
      • type: recall_at_1 value: 60.778
      • type: recall_at_10 value: 86.444
      • type: recall_at_100 value: 98.111
      • type: recall_at_1000 value: 100.0
      • type: recall_at_20 value: 91.667
      • type: recall_at_3 value: 76.444
      • type: recall_at_5 value: 81.22200000000001 task: type: Retrieval
    • dataset: config: eng_Latn-hun_Latn name: MTEB BibleNLPBitextMining (eng_Latn-hun_Latn) revision: 264a18480c529d9e922483839b4b9758e690b762 split: train type: davidstap/biblenlp-corpus-mmteb metrics:
      • type: accuracy value: 88.671875
      • type: f1 value: 85.859375
      • type: main_score value: 85.859375
      • type: precision value: 84.71354166666667
      • type: recall value: 88.671875 task: type: BitextMining
    • dataset: config: hun_Latn-eng_Latn name: MTEB BibleNLPBitextMining (hun_Latn-eng_Latn) revision: 264a18480c529d9e922483839b4b9758e690b762 split: train type: davidstap/biblenlp-corpus-mmteb metrics:
      • type: accuracy value: 91.796875
      • type: f1 value: 89.41406249999999
      • type: main_score value: 89.41406249999999
      • type: precision value: 88.31380208333334
      • type: recall value: 91.796875 task: type: BitextMining
    • dataset: config: default name: MTEB HunSum2AbstractiveRetrieval (default) revision: 24e1445c8180d937f0a16f8ae8a62e77cc952e56 split: test type: SZTAKI-HLT/HunSum-2-abstractive metrics:
      • type: main_score value: 63.263000000000005
      • type: map_at_1 value: 63.263000000000005
      • type: map_at_10 value: 69.717
      • type: map_at_100 value: 70.19999999999999
      • type: map_at_1000 value: 70.223
      • type: map_at_20 value: 69.987
      • type: map_at_3 value: 68.126
      • type: map_at_5 value: 69.11500000000001
      • type: mrr_at_1 value: 63.263263263263255
      • type: mrr_at_10 value: 69.71656179989505
      • type: mrr_at_100 value: 70.20005091433352
      • type: mrr_at_1000 value: 70.22300238535382
      • type: mrr_at_20 value: 69.98650484718584
      • type: mrr_at_3 value: 68.12645979312641
      • type: mrr_at_5 value: 69.11494828161491
      • type: nauc_map_at_1000_diff1 value: 78.57062147162597
      • type: nauc_map_at_1000_max value: 67.50701502337495
      • type: nauc_map_at_1000_std value: -0.5617129044803558
      • type: nauc_map_at_100_diff1 value: 78.55994402867587
      • type: nauc_map_at_100_max value: 67.50751346612932
      • type: nauc_map_at_100_std value: -0.5527533150571393
      • type: nauc_map_at_10_diff1 value: 78.40366721771652
      • type: nauc_map_at_10_max value: 67.49241622659412
      • type: nauc_map_at_10_std value: -0.48552097268197614
      • type: nauc_map_at_1_diff1 value: 82.01486923813978
      • type: nauc_map_at_1_max value: 65.96265600324601
      • type: nauc_map_at_1_std value: -3.3920974069100702
      • type: nauc_map_at_20_diff1 value: 78.47160921094391
      • type: nauc_map_at_20_max value: 67.53010937556571
      • type: nauc_map_at_20_std value: -0.5304810036230149
      • type: nauc_map_at_3_diff1 value: 78.82728109994231
      • type: nauc_map_at_3_max value: 67.67886259360823
      • type: nauc_map_at_3_std value: -0.8390404611287001
      • type: nauc_map_at_5_diff1 value: 78.64851152021848
      • type: nauc_map_at_5_max value: 67.56443643847581
      • type: nauc_map_at_5_std value: -0.5438994708241538
      • type: nauc_mrr_at_1000_diff1 value: 78.57062147162597
      • type: nauc_mrr_at_1000_max value: 67.50701502337495
      • type: nauc_mrr_at_1000_std value: -0.5617129044803558
      • type: nauc_mrr_at_100_diff1 value: 78.55994402867587
      • type: nauc_mrr_at_100_max value: 67.50751346612932
      • type: nauc_mrr_at_100_std value: -0.5527533150571393
      • type: nauc_mrr_at_10_diff1 value: 78.40366721771652
      • type: nauc_mrr_at_10_max value: 67.49241622659412
      • type: nauc_mrr_at_10_std value: -0.48552097268197614
      • type: nauc_mrr_at_1_diff1 value: 82.01486923813978
      • type: nauc_mrr_at_1_max value: 65.96265600324601
      • type: nauc_mrr_at_1_std value: -3.3920974069100702
      • type: nauc_mrr_at_20_diff1 value: 78.47160921094391
      • type: nauc_mrr_at_20_max value: 67.53010937556571
      • type: nauc_mrr_at_20_std value: -0.5304810036230149
      • type: nauc_mrr_at_3_diff1 value: 78.82728109994231
      • type: nauc_mrr_at_3_max value: 67.67886259360823
      • type: nauc_mrr_at_3_std value: -0.8390404611287001
      • type: nauc_mrr_at_5_diff1 value: 78.64851152021848
      • type: nauc_mrr_at_5_max value: 67.56443643847581
      • type: nauc_mrr_at_5_std value: -0.5438994708241538
      • type: nauc_ndcg_at_1000_diff1 value: 77.85313935589254
      • type: nauc_ndcg_at_1000_max value: 67.79745016701565
      • type: nauc_ndcg_at_1000_std value: 0.3743893992928968
      • type: nauc_ndcg_at_100_diff1 value: 77.54895730138853
      • type: nauc_ndcg_at_100_max value: 67.90017248869928
      • type: nauc_ndcg_at_100_std value: 0.859162358234398
      • type: nauc_ndcg_at_10_diff1 value: 76.71113405671676
      • type: nauc_ndcg_at_10_max value: 67.96034182778398
      • type: nauc_ndcg_at_10_std value: 1.1822837192182254
      • type: nauc_ndcg_at_1_diff1 value: 82.01486923813978
      • type: nauc_ndcg_at_1_max value: 65.96265600324601
      • type: nauc_ndcg_at_1_std value: -3.3920974069100702
      • type: nauc_ndcg_at_20_diff1 value: 76.93959621702203
      • type: nauc_ndcg_at_20_max value: 68.11195662698223
      • type: nauc_ndcg_at_20_std value: 1.04309687394849
      • type: nauc_ndcg_at_3_diff1 value: 77.79565059957739
      • type: nauc_ndcg_at_3_max value: 68.28729385816999
      • type: nauc_ndcg_at_3_std value: 0.2325515867720005
      • type: nauc_ndcg_at_5_diff1 value: 77.37740780039985
      • type: nauc_ndcg_at_5_max value: 68.0591693716456
      • type: nauc_ndcg_at_5_std value: 0.8419316054801026
      • type: nauc_precision_at_1000_diff1 value: 70.06119288295852
      • type: nauc_precision_at_1000_max value: 56.300969751588504
      • type: nauc_precision_at_1000_std value: 42.8131104675957
      • type: nauc_precision_at_100_diff1 value: 67.53252742986358
      • type: nauc_precision_at_100_max value: 71.63984328411749
      • type: nauc_precision_at_100_std value: 20.467710864542678
      • type: nauc_precision_at_10_diff1 value: 68.62375685620702
      • type: nauc_precision_at_10_max value: 70.02532507228068
      • type: nauc_precision_at_10_std value: 9.35439782317633
      • type: nauc_precision_at_1_diff1 value: 82.01486923813978
      • type: nauc_precision_at_1_max value: 65.96265600324601
      • type: nauc_precision_at_1_std value: -3.3920974069100702
      • type: nauc_precision_at_20_diff1 value: 67.96187481073133
      • type: nauc_precision_at_20_max value: 71.59854027319963
      • type: nauc_precision_at_20_std value: 10.641909874113086
      • type: nauc_precision_at_3_diff1 value: 74.38802810704372
      • type: nauc_precision_at_3_max value: 70.31804260818862
      • type: nauc_precision_at_3_std value: 3.8694413447531946
      • type: nauc_precision_at_5_diff1 value: 72.53680275396366
      • type: nauc_precision_at_5_max value: 69.84127154759457
      • type: nauc_precision_at_5_std value: 6.232801743816592
      • type: nauc_recall_at_1000_diff1 value: 70.06119288296337
      • type: nauc_recall_at_1000_max value: 56.30096975158339
      • type: nauc_recall_at_1000_std value: 42.81311046760523
      • type: nauc_recall_at_100_diff1 value: 67.53252742986345
      • type: nauc_recall_at_100_max value: 71.63984328411706
      • type: nauc_recall_at_100_std value: 20.46771086454334
      • type: nauc_recall_at_10_diff1 value: 68.62375685620707
      • type: nauc_recall_at_10_max value: 70.02532507228068
      • type: nauc_recall_at_10_std value: 9.354397823176459
      • type: nauc_recall_at_1_diff1 value: 82.01486923813978
      • type: nauc_recall_at_1_max value: 65.96265600324601
      • type: nauc_recall_at_1_std value: -3.3920974069100702
      • type: nauc_recall_at_20_diff1 value: 67.96187481073152
      • type: nauc_recall_at_20_max value: 71.59854027319979
      • type: nauc_recall_at_20_std value: 10.641909874113258
      • type: nauc_recall_at_3_diff1 value: 74.3880281070437
      • type: nauc_recall_at_3_max value: 70.31804260818865
      • type: nauc_recall_at_3_std value: 3.8694413447530995
      • type: nauc_recall_at_5_diff1 value: 72.53680275396374
      • type: nauc_recall_at_5_max value: 69.84127154759464
      • type: nauc_recall_at_5_std value: 6.232801743816686
      • type: ndcg_at_1 value: 63.263000000000005
      • type: ndcg_at_10 value: 72.89099999999999
      • type: ndcg_at_100 value: 75.421
      • type: ndcg_at_1000 value: 76.027
      • type: ndcg_at_20 value: 73.919
      • type: ndcg_at_3 value: 69.646
      • type: ndcg_at_5 value: 71.434
      • type: precision_at_1 value: 63.263000000000005
      • type: precision_at_10 value: 8.288
      • type: precision_at_100 value: 0.95
      • type: precision_at_1000 value: 0.1
      • type: precision_at_20 value: 4.352
      • type: precision_at_3 value: 24.675
      • type: precision_at_5 value: 15.676000000000002
      • type: recall_at_1 value: 63.263000000000005
      • type: recall_at_10 value: 82.883
      • type: recall_at_100 value: 95.045
      • type: recall_at_1000 value: 99.8
      • type: recall_at_20 value: 87.03699999999999
      • type: recall_at_3 value: 74.024
      • type: recall_at_5 value: 78.378 task: type: Retrieval
    • dataset: config: hu name: MTEB MassiveIntentClassification (hu) revision: 4672e20407010da34463acc759c162ca9734bca6 split: test type: mteb/amazon_massive_intent metrics:
      • type: accuracy value: 60.08406186953599
      • type: f1 value: 56.958742875652455
      • type: f1_weighted value: 60.57068245324919
      • type: main_score value: 60.08406186953599 task: type: Classification
    • dataset: config: hu name: MTEB MassiveIntentClassification (hu) revision: 4672e20407010da34463acc759c162ca9734bca6 split: validation type: mteb/amazon_massive_intent metrics:
      • type: accuracy value: 60.201672405312344
      • type: f1 value: 57.03816512332761
      • type: f1_weighted value: 60.53109947438201
      • type: main_score value: 60.201672405312344 task: type: Classification
    • dataset: config: hu name: MTEB MassiveScenarioClassification (hu) revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8 split: test type: mteb/amazon_massive_scenario metrics:
      • type: accuracy value: 66.61398789509079
      • type: f1 value: 65.88647044935249
      • type: f1_weighted value: 66.80145146976484
      • type: main_score value: 66.61398789509079 task: type: Classification
    • dataset: config: hu name: MTEB MassiveScenarioClassification (hu) revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8 split: validation type: mteb/amazon_massive_scenario metrics:
      • type: accuracy value: 66.11411706837187
      • type: f1 value: 65.76717397996951
      • type: f1_weighted value: 66.29902597756885
      • type: main_score value: 66.11411706837187 task: type: Classification
    • dataset: config: hu name: MTEB MultiEURLEXMultilabelClassification (hu) revision: 2aea5a6dc8fdcfeca41d0fb963c0a338930bde5c split: test type: mteb/eurlex-multilingual metrics:
      • type: accuracy value: 3.0839999999999996
      • type: f1 value: 27.860225486785566
      • type: lrap value: 43.02579150793552
      • type: main_score value: 3.0839999999999996 task: type: MultilabelClassification
    • dataset: config: arb_Arab-hun_Latn name: MTEB NTREXBitextMining (arb_Arab-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 85.678517776665
      • type: f1 value: 81.92049979731502
      • type: main_score value: 81.92049979731502
      • type: precision value: 80.21115005842097
      • type: recall value: 85.678517776665 task: type: BitextMining
    • dataset: config: ben_Beng-hun_Latn name: MTEB NTREXBitextMining (ben_Beng-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 44.566850275413124
      • type: f1 value: 39.07033025889276
      • type: main_score value: 39.07033025889276
      • type: precision value: 37.07348327291399
      • type: recall value: 44.566850275413124 task: type: BitextMining
    • dataset: config: deu_Latn-hun_Latn name: MTEB NTREXBitextMining (deu_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.44016024036054
      • type: f1 value: 91.61909530963112
      • type: main_score value: 91.61909530963112
      • type: precision value: 90.75279586045735
      • type: recall value: 93.44016024036054 task: type: BitextMining
    • dataset: config: ell_Grek-hun_Latn name: MTEB NTREXBitextMining (ell_Grek-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 91.4371557336004
      • type: f1 value: 89.0261582850466
      • type: main_score value: 89.0261582850466
      • type: precision value: 87.9043565348022
      • type: recall value: 91.4371557336004 task: type: BitextMining
    • dataset: config: eng_Latn-hun_Latn name: MTEB NTREXBitextMining (eng_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 94.44166249374061
      • type: f1 value: 92.8092138207311
      • type: main_score value: 92.8092138207311
      • type: precision value: 92.0422300116842
      • type: recall value: 94.44166249374061 task: type: BitextMining
    • dataset: config: fas_Arab-hun_Latn name: MTEB NTREXBitextMining (fas_Arab-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 89.53430145217827
      • type: f1 value: 86.72270310227245
      • type: main_score value: 86.72270310227245
      • type: precision value: 85.42814221331997
      • type: recall value: 89.53430145217827 task: type: BitextMining
    • dataset: config: fin_Latn-hun_Latn name: MTEB NTREXBitextMining (fin_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 90.98647971957938
      • type: f1 value: 88.44600233683859
      • type: main_score value: 88.44600233683859
      • type: precision value: 87.2575529961609
      • type: recall value: 90.98647971957938 task: type: BitextMining
    • dataset: config: fra_Latn-hun_Latn name: MTEB NTREXBitextMining (fra_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 92.28843264897347
      • type: f1 value: 90.12518778167251
      • type: main_score value: 90.12518778167251
      • type: precision value: 89.12535469871473
      • type: recall value: 92.28843264897347 task: type: BitextMining
    • dataset: config: heb_Hebr-hun_Latn name: MTEB NTREXBitextMining (heb_Hebr-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 87.33099649474211
      • type: f1 value: 83.88582874311467
      • type: main_score value: 83.88582874311467
      • type: precision value: 82.31263562009681
      • type: recall value: 87.33099649474211 task: type: BitextMining
    • dataset: config: hin_Deva-hun_Latn name: MTEB NTREXBitextMining (hin_Deva-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 86.52979469203805
      • type: f1 value: 83.08240137984755
      • type: main_score value: 83.08240137984755
      • type: precision value: 81.51352028042064
      • type: recall value: 86.52979469203805 task: type: BitextMining
    • dataset: config: hun_Latn-arb_Arab name: MTEB NTREXBitextMining (hun_Latn-arb_Arab) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 86.73009514271406
      • type: f1 value: 83.12397167179341
      • type: main_score value: 83.12397167179341
      • type: precision value: 81.47805040894676
      • type: recall value: 86.73009514271406 task: type: BitextMining
    • dataset: config: hun_Latn-ben_Beng name: MTEB NTREXBitextMining (hun_Latn-ben_Beng) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 41.16174261392088
      • type: f1 value: 32.73025519520262
      • type: main_score value: 32.73025519520262
      • type: precision value: 29.859172986363774
      • type: recall value: 41.16174261392088 task: type: BitextMining
    • dataset: config: hun_Latn-deu_Latn name: MTEB NTREXBitextMining (hun_Latn-deu_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.39008512769153
      • type: f1 value: 91.5456518110499
      • type: main_score value: 91.5456518110499
      • type: precision value: 90.66099148723085
      • type: recall value: 93.39008512769153 task: type: BitextMining
    • dataset: config: hun_Latn-ell_Grek name: MTEB NTREXBitextMining (hun_Latn-ell_Grek) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 92.03805708562844
      • type: f1 value: 89.81305291270239
      • type: main_score value: 89.81305291270239
      • type: precision value: 88.78317476214322
      • type: recall value: 92.03805708562844 task: type: BitextMining
    • dataset: config: hun_Latn-eng_Latn name: MTEB NTREXBitextMining (hun_Latn-eng_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 94.74211316975463
      • type: f1 value: 93.23985978968453
      • type: main_score value: 93.23985978968453
      • type: precision value: 92.51377065598398
      • type: recall value: 94.74211316975463 task: type: BitextMining
    • dataset: config: hun_Latn-fas_Arab name: MTEB NTREXBitextMining (hun_Latn-fas_Arab) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 88.5327991987982
      • type: f1 value: 85.49240527457853
      • type: main_score value: 85.49240527457853
      • type: precision value: 84.10413238905979
      • type: recall value: 88.5327991987982 task: type: BitextMining
    • dataset: config: hun_Latn-fin_Latn name: MTEB NTREXBitextMining (hun_Latn-fin_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 90.23535302954431
      • type: f1 value: 87.53296611584042
      • type: main_score value: 87.53296611584042
      • type: precision value: 86.26690035052579
      • type: recall value: 90.23535302954431 task: type: BitextMining
    • dataset: config: hun_Latn-fra_Latn name: MTEB NTREXBitextMining (hun_Latn-fra_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 92.63895843765648
      • type: f1 value: 90.47070605908863
      • type: main_score value: 90.47070605908863
      • type: precision value: 89.42163244867301
      • type: recall value: 92.63895843765648 task: type: BitextMining
    • dataset: config: hun_Latn-heb_Hebr name: MTEB NTREXBitextMining (hun_Latn-heb_Hebr) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 86.62994491737606
      • type: f1 value: 83.19388173168845
      • type: main_score value: 83.19388173168845
      • type: precision value: 81.65832081455517
      • type: recall value: 86.62994491737606 task: type: BitextMining
    • dataset: config: hun_Latn-hin_Deva name: MTEB NTREXBitextMining (hun_Latn-hin_Deva) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 83.97596394591888
      • type: f1 value: 79.85502062617736
      • type: main_score value: 79.85502062617736
      • type: precision value: 78.01758192844824
      • type: recall value: 83.97596394591888 task: type: BitextMining
    • dataset: config: hun_Latn-ind_Latn name: MTEB NTREXBitextMining (hun_Latn-ind_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 92.68903355032549
      • type: f1 value: 90.64596895343014
      • type: main_score value: 90.64596895343014
      • type: precision value: 89.68869971624103
      • type: recall value: 92.68903355032549 task: type: BitextMining
    • dataset: config: hun_Latn-jpn_Jpan name: MTEB NTREXBitextMining (hun_Latn-jpn_Jpan) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 85.778668002003
      • type: f1 value: 82.19829744616925
      • type: main_score value: 82.19829744616925
      • type: precision value: 80.62426973794025
      • type: recall value: 85.778668002003 task: type: BitextMining
    • dataset: config: hun_Latn-kor_Hang name: MTEB NTREXBitextMining (hun_Latn-kor_Hang) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 84.17626439659489
      • type: f1 value: 80.26746468909714
      • type: main_score value: 80.26746468909714
      • type: precision value: 78.5646097351155
      • type: recall value: 84.17626439659489 task: type: BitextMining
    • dataset: config: hun_Latn-lav_Latn name: MTEB NTREXBitextMining (hun_Latn-lav_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 90.1352028042063
      • type: f1 value: 87.30262059756302
      • type: main_score value: 87.30262059756302
      • type: precision value: 85.98731430479052
      • type: recall value: 90.1352028042063 task: type: BitextMining
    • dataset: config: hun_Latn-lit_Latn name: MTEB NTREXBitextMining (hun_Latn-lit_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 89.58437656484726
      • type: f1 value: 86.8252378567852
      • type: main_score value: 86.8252378567852
      • type: precision value: 85.54581872809214
      • type: recall value: 89.58437656484726 task: type: BitextMining
    • dataset: config: hun_Latn-nld_Latn name: MTEB NTREXBitextMining (hun_Latn-nld_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.03955933900852
      • type: f1 value: 91.03989317309296
      • type: main_score value: 91.03989317309296
      • type: precision value: 90.08930061759305
      • type: recall value: 93.03955933900852 task: type: BitextMining
    • dataset: config: hun_Latn-pol_Latn name: MTEB NTREXBitextMining (hun_Latn-pol_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 91.58738107160741
      • type: f1 value: 89.28225671841095
      • type: main_score value: 89.28225671841095
      • type: precision value: 88.18227341011517
      • type: recall value: 91.58738107160741 task: type: BitextMining
    • dataset: config: hun_Latn-por_Latn name: MTEB NTREXBitextMining (hun_Latn-por_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.59038557836755
      • type: f1 value: 91.71256885327992
      • type: main_score value: 91.71256885327992
      • type: precision value: 90.80287097312635
      • type: recall value: 93.59038557836755 task: type: BitextMining
    • dataset: config: hun_Latn-rus_Cyrl name: MTEB NTREXBitextMining (hun_Latn-rus_Cyrl) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 91.3370055082624
      • type: f1 value: 88.88916708395926
      • type: main_score value: 88.88916708395926
      • type: precision value: 87.75961561389704
      • type: recall value: 91.3370055082624 task: type: BitextMining
    • dataset: config: hun_Latn-spa_Latn name: MTEB NTREXBitextMining (hun_Latn-spa_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.69053580370556
      • type: f1 value: 91.94959105324652
      • type: main_score value: 91.94959105324652
      • type: precision value: 91.12418627941913
      • type: recall value: 93.69053580370556 task: type: BitextMining
    • dataset: config: hun_Latn-swa_Latn name: MTEB NTREXBitextMining (hun_Latn-swa_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 35.803705558337505
      • type: f1 value: 27.79832969518814
      • type: main_score value: 27.79832969518814
      • type: precision value: 25.370895920971037
      • type: recall value: 35.803705558337505 task: type: BitextMining
    • dataset: config: hun_Latn-swe_Latn name: MTEB NTREXBitextMining (hun_Latn-swe_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.59038557836755
      • type: f1 value: 91.66249374061091
      • type: main_score value: 91.66249374061091
      • type: precision value: 90.74445000834585
      • type: recall value: 93.59038557836755 task: type: BitextMining
    • dataset: config: hun_Latn-tam_Taml name: MTEB NTREXBitextMining (hun_Latn-tam_Taml) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 27.391086629944915
      • type: f1 value: 19.094552675413095
      • type: main_score value: 19.094552675413095
      • type: precision value: 16.88288208814635
      • type: recall value: 27.391086629944915 task: type: BitextMining
    • dataset: config: hun_Latn-tur_Latn name: MTEB NTREXBitextMining (hun_Latn-tur_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 91.48723084626941
      • type: f1 value: 89.11700884660323
      • type: main_score value: 89.11700884660323
      • type: precision value: 87.99031881155067
      • type: recall value: 91.48723084626941 task: type: BitextMining
    • dataset: config: hun_Latn-vie_Latn name: MTEB NTREXBitextMining (hun_Latn-vie_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 91.13670505758637
      • type: f1 value: 88.6696711734268
      • type: main_score value: 88.6696711734268
      • type: precision value: 87.49374061091638
      • type: recall value: 91.13670505758637 task: type: BitextMining
    • dataset: config: hun_Latn-zho_Hant name: MTEB NTREXBitextMining (hun_Latn-zho_Hant) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 89.33400100150224
      • type: f1 value: 86.55745523046474
      • type: main_score value: 86.55745523046474
      • type: precision value: 85.29794692038057
      • type: recall value: 89.33400100150224 task: type: BitextMining
    • dataset: config: hun_Latn-zul_Latn name: MTEB NTREXBitextMining (hun_Latn-zul_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 16.675012518778168
      • type: f1 value: 11.21636405139599
      • type: main_score value: 11.21636405139599
      • type: precision value: 9.903070059112947
      • type: recall value: 16.675012518778168 task: type: BitextMining
    • dataset: config: ind_Latn-hun_Latn name: MTEB NTREXBitextMining (ind_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 92.93940911367051
      • type: f1 value: 90.96478050408946
      • type: main_score value: 90.96478050408946
      • type: precision value: 90.03922550492406
      • type: recall value: 92.93940911367051 task: type: BitextMining
    • dataset: config: jpn_Jpan-hun_Latn name: MTEB NTREXBitextMining (jpn_Jpan-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 88.28242363545317
      • type: f1 value: 85.11433817392756
      • type: main_score value: 85.11433817392756
      • type: precision value: 83.67551326990485
      • type: recall value: 88.28242363545317 task: type: BitextMining
    • dataset: config: kor_Hang-hun_Latn name: MTEB NTREXBitextMining (kor_Hang-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 85.778668002003
      • type: f1 value: 81.83608746453012
      • type: main_score value: 81.83608746453012
      • type: precision value: 80.0233683859122
      • type: recall value: 85.778668002003 task: type: BitextMining
    • dataset: config: lav_Latn-hun_Latn name: MTEB NTREXBitextMining (lav_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 91.73760640961443
      • type: f1 value: 89.42914371557336
      • type: main_score value: 89.42914371557336
      • type: precision value: 88.32832582206642
      • type: recall value: 91.73760640961443 task: type: BitextMining
    • dataset: config: lit_Latn-hun_Latn name: MTEB NTREXBitextMining (lit_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 91.78768152228342
      • type: f1 value: 89.50926389584376
      • type: main_score value: 89.50926389584376
      • type: precision value: 88.39926556501419
      • type: recall value: 91.78768152228342 task: type: BitextMining
    • dataset: config: nld_Latn-hun_Latn name: MTEB NTREXBitextMining (nld_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.49023535302955
      • type: f1 value: 91.6190953096311
      • type: main_score value: 91.6190953096311
      • type: precision value: 90.72775830412286
      • type: recall value: 93.49023535302955 task: type: BitextMining
    • dataset: config: pol_Latn-hun_Latn name: MTEB NTREXBitextMining (pol_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 91.28693039559339
      • type: f1 value: 88.99515940577533
      • type: main_score value: 88.99515940577533
      • type: precision value: 87.9293940911367
      • type: recall value: 91.28693039559339 task: type: BitextMining
    • dataset: config: por_Latn-hun_Latn name: MTEB NTREXBitextMining (por_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.03955933900852
      • type: f1 value: 91.08496077449509
      • type: main_score value: 91.08496077449509
      • type: precision value: 90.17860123518612
      • type: recall value: 93.03955933900852 task: type: BitextMining
    • dataset: config: rus_Cyrl-hun_Latn name: MTEB NTREXBitextMining (rus_Cyrl-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 90.98647971957938
      • type: f1 value: 88.43932565514937
      • type: main_score value: 88.43932565514937
      • type: precision value: 87.2475379736271
      • type: recall value: 90.98647971957938 task: type: BitextMining
    • dataset: config: spa_Latn-hun_Latn name: MTEB NTREXBitextMining (spa_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.23985978968453
      • type: f1 value: 91.3386746786847
      • type: main_score value: 91.3386746786847
      • type: precision value: 90.43148055416457
      • type: recall value: 93.23985978968453 task: type: BitextMining
    • dataset: config: swa_Latn-hun_Latn name: MTEB NTREXBitextMining (swa_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 35.95393089634452
      • type: f1 value: 30.612257939034187
      • type: main_score value: 30.612257939034187
      • type: precision value: 28.995078568906944
      • type: recall value: 35.95393089634452 task: type: BitextMining
    • dataset: config: swe_Latn-hun_Latn name: MTEB NTREXBitextMining (swe_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 93.64046069103655
      • type: f1 value: 91.86613253213153
      • type: main_score value: 91.86613253213153
      • type: precision value: 91.04072775830413
      • type: recall value: 93.64046069103655 task: type: BitextMining
    • dataset: config: tam_Taml-hun_Latn name: MTEB NTREXBitextMining (tam_Taml-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 29.04356534802203
      • type: f1 value: 25.164093122029808
      • type: main_score value: 25.164093122029808
      • type: precision value: 23.849573878565543
      • type: recall value: 29.04356534802203 task: type: BitextMining
    • dataset: config: tur_Latn-hun_Latn name: MTEB NTREXBitextMining (tur_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 90.83625438157236
      • type: f1 value: 88.36087464530128
      • type: main_score value: 88.36087464530128
      • type: precision value: 87.19829744616925
      • type: recall value: 90.83625438157236 task: type: BitextMining
    • dataset: config: vie_Latn-hun_Latn name: MTEB NTREXBitextMining (vie_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 90.68602904356536
      • type: f1 value: 88.10882991153397
      • type: main_score value: 88.10882991153397
      • type: precision value: 86.90118511099983
      • type: recall value: 90.68602904356536 task: type: BitextMining
    • dataset: config: zho_Hant-hun_Latn name: MTEB NTREXBitextMining (zho_Hant-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 90.1352028042063
      • type: f1 value: 87.46035720247039
      • type: main_score value: 87.46035720247039
      • type: precision value: 86.19810668383528
      • type: recall value: 90.1352028042063 task: type: BitextMining
    • dataset: config: zul_Latn-hun_Latn name: MTEB NTREXBitextMining (zul_Latn-hun_Latn) revision: ed9a4403ed4adbfaf4aab56d5b2709e9f6c3ba33 split: test type: mteb/NTREX metrics:
      • type: accuracy value: 17.1256885327992
      • type: f1 value: 13.692538409811572
      • type: main_score value: 13.692538409811572
      • type: precision value: 12.811084017018844
      • type: recall value: 17.1256885327992 task: type: BitextMining
    • dataset: config: rom-hun name: MTEB RomaTalesBitextMining (rom-hun) revision: f4394dbca6845743cd33eba77431767b232ef489 split: test type: kardosdrur/roma-tales metrics:
      • type: accuracy value: 6.046511627906977
      • type: f1 value: 2.950830564784053
      • type: main_score value: 2.950830564784053
      • type: precision value: 2.295127353266888
      • type: recall value: 6.046511627906977 task: type: BitextMining
    • dataset: config: hun_Latn name: MTEB SIB200Classification (hun_Latn) revision: a74d7350ea12af010cfb1c21e34f1f81fd2e615b split: test type: mteb/sib200 metrics:
      • type: accuracy value: 72.74509803921569
      • type: f1 value: 71.6748881571977
      • type: f1_weighted value: 72.7699432186266
      • type: main_score value: 72.74509803921569 task: type: Classification
    • dataset: config: hun_Latn name: MTEB SIB200Classification (hun_Latn) revision: a74d7350ea12af010cfb1c21e34f1f81fd2e615b split: train type: mteb/sib200 metrics:
      • type: accuracy value: 71.92582025677605
      • type: f1 value: 70.9175403606058
      • type: f1_weighted value: 71.9988920000764
      • type: main_score value: 71.92582025677605 task: type: Classification
    • dataset: config: hun_Latn name: MTEB SIB200Classification (hun_Latn) revision: a74d7350ea12af010cfb1c21e34f1f81fd2e615b split: validation type: mteb/sib200 metrics:
      • type: accuracy value: 66.76767676767676
      • type: f1 value: 66.07599012119566
      • type: f1_weighted value: 67.15823510190054
      • type: main_score value: 66.76767676767676 task: type: Classification
    • dataset: config: hun_Latn name: MTEB SIB200ClusteringS2S (hun_Latn) revision: a74d7350ea12af010cfb1c21e34f1f81fd2e615b split: test type: mteb/sib200 metrics:
      • type: main_score value: 39.24288169703154
      • type: v_measure value: 39.24288169703154
      • type: v_measure_std value: 2.214708184335194 task: type: Clustering
    • dataset: config: hun-eng name: MTEB Tatoeba (hun-eng) revision: 69e8f12da6e31d59addadda9a9c8a2e601a0e282 split: test type: mteb/tatoeba-bitext-mining metrics:
      • type: accuracy value: 91.0
      • type: f1 value: 88.47999999999999
      • type: main_score value: 88.47999999999999
      • type: precision value: 87.3
      • type: recall value: 91.0 task: type: BitextMining

tags:

  • mteb

base_model: sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 language:

  • hu library_name: sentence-transformers license: apache-2.0 metrics:
  • cosine_accuracy
  • dot_accuracy
  • manhattan_accuracy
  • euclidean_accuracy
  • max_accuracy pipeline_tag: sentence-similarity tags:
  • sentence-transformers
  • sentence-similarity
  • feature-extraction
  • generated_from_trainer
  • dataset_size:857856
  • loss:MultipleNegativesRankingLoss widget:
  • source_sentence: Emberek várnak a lámpánál kerékpárral. sentences:
    • Az emberek piros lámpánál haladnak.
    • Az emberek a kerékpárjukon vannak.
    • Egy fekete kutya úszik a vízben egy teniszlabdával a szájában
  • source_sentence: A kutya a vízben van. sentences:
    • Két férfi takarítja a havat a tetőről, az egyik egy emelőben ül, a másik pedig a tetőn.
    • A macska a vízben van, és dühös.
    • Egy kutya van a vízben, a szájában egy faág.
  • source_sentence: A nő feketét visel. sentences:
    • Egy barna kutya fröcsköl, ahogy úszik a vízben.
    • Egy tetoválással rendelkező nő, aki fekete tank tetején néz a földre.
    • 'Egy kékbe öltözött nő intenzív arckifejezéssel üti a teniszlabdát. A képen:'
  • source_sentence: Az emberek alszanak. sentences:
    • Három ember beszélget egy városi utcán.
    • A nő fehéret visel.
    • Egy apa és a fia ölelgeti alvás közben.
  • source_sentence: Az emberek alszanak. sentences:
    • Egy feketébe öltözött nő cigarettát és bevásárlótáskát tart a kezében, miközben egy idősebb nő átmegy az utcán.
    • Egy csoport ember ül egy nyitott, térszerű területen, mögötte nagy bokrok és egy sor viktoriánus stílusú épület, melyek közül sokat a kép jobb oldalán lévő erős elmosódás tesz kivehetetlenné.
    • Egy apa és a fia ölelgeti alvás közben. model-index:
  • name: paraphrase-multilingual-MiniLM-L12-hu-v1 results:
    • task: type: triplet name: Triplet dataset: name: all nli dev type: all-nli-dev metrics:
      • type: cosine_accuracy value: 0.992 name: Cosine Accuracy
      • type: dot_accuracy value: 0.0108 name: Dot Accuracy
      • type: manhattan_accuracy value: 0.9908 name: Manhattan Accuracy
      • type: euclidean_accuracy value: 0.9908 name: Euclidean Accuracy
      • type: max_accuracy value: 0.992 name: Max Accuracy
    • task: type: triplet name: Triplet dataset: name: all nli test type: all-nli-test metrics:
      • type: cosine_accuracy value: 0.9913636363636363 name: Cosine Accuracy
      • type: dot_accuracy value: 0.013939393939393939 name: Dot Accuracy
      • type: manhattan_accuracy value: 0.990909090909091 name: Manhattan Accuracy
      • type: euclidean_accuracy value: 0.9910606060606061 name: Euclidean Accuracy
      • type: max_accuracy value: 0.9913636363636363 name: Max Accuracy

paraphrase-multilingual-MiniLM-L12-v2

This is a sentence-transformers model finetuned from sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2 on the train dataset. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.

Model Details

Model Description

Model Sources

Full Model Architecture

SentenceTransformer(
  (0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: BertModel 
  (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)

Usage

Direct Usage (Sentence Transformers)

First install the Sentence Transformers library:

pip install -U sentence-transformers

Then you can load this model and run inference.

from sentence_transformers import SentenceTransformer

# Download from the 🤗 Hub
model = SentenceTransformer("karsar/paraphrase-multilingual-MiniLM-L12-hu_v1")
# Run inference
sentences = [
    'Az emberek alszanak.',
    'Egy apa és a fia ölelgeti alvás közben.',
    'Egy csoport ember ül egy nyitott, térszerű területen, mögötte nagy bokrok és egy sor viktoriánus stílusú épület, melyek közül sokat a kép jobb oldalán lévő erős elmosódás tesz kivehetetlenné.',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 384]

# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]

Evaluation

Metrics

Triplet

Metric Value
cosine_accuracy 0.992
dot_accuracy 0.0108
manhattan_accuracy 0.9908
euclidean_accuracy 0.9908
max_accuracy 0.992

Triplet

Metric Value
cosine_accuracy 0.9914
dot_accuracy 0.0139
manhattan_accuracy 0.9909
euclidean_accuracy 0.9911
max_accuracy 0.9914

Training Details

Training Dataset

train

  • Dataset: train
  • Size: 857,856 training samples
  • Columns: anchor, positive, and negative
  • Approximate statistics based on the first 1000 samples:
    anchor positive negative
    type string string string
    details
    • min: 7 tokens
    • mean: 11.73 tokens
    • max: 56 tokens
    • min: 6 tokens
    • mean: 15.24 tokens
    • max: 47 tokens
    • min: 7 tokens
    • mean: 16.07 tokens
    • max: 53 tokens
  • Samples:
    anchor positive negative
    Egy lóháton ülő ember átugrik egy lerombolt repülőgép felett. Egy ember a szabadban, lóháton. Egy ember egy étteremben van, és omlettet rendel.
    Gyerekek mosolyogva és integetett a kamera Gyermekek vannak jelen A gyerekek homlokot rántanak
    Egy fiú ugrál a gördeszkát a közepén egy piros híd. A fiú gördeszkás trükköt csinál. A fiú korcsolyázik a járdán.
  • Loss: MultipleNegativesRankingLoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "cos_sim"
    }
    

Evaluation Dataset

train

  • Dataset: train
  • Size: 5,000 evaluation samples
  • Columns: anchor, positive, and negative
  • Approximate statistics based on the first 1000 samples:
    anchor positive negative
    type string string string
    details
    • min: 7 tokens
    • mean: 11.73 tokens
    • max: 56 tokens
    • min: 6 tokens
    • mean: 15.24 tokens
    • max: 47 tokens
    • min: 7 tokens
    • mean: 16.07 tokens
    • max: 53 tokens
  • Samples:
    anchor positive negative
    Egy lóháton ülő ember átugrik egy lerombolt repülőgép felett. Egy ember a szabadban, lóháton. Egy ember egy étteremben van, és omlettet rendel.
    Gyerekek mosolyogva és integetett a kamera Gyermekek vannak jelen A gyerekek homlokot rántanak
    Egy fiú ugrál a gördeszkát a közepén egy piros híd. A fiú gördeszkás trükköt csinál. A fiú korcsolyázik a járdán.
  • Loss: MultipleNegativesRankingLoss with these parameters:
    {
        "scale": 20.0,
        "similarity_fct": "cos_sim"
    }
    

Training Hyperparameters

Non-Default Hyperparameters

  • eval_strategy: steps
  • per_device_train_batch_size: 128
  • per_device_eval_batch_size: 128
  • num_train_epochs: 1
  • warmup_ratio: 0.1
  • bf16: True
  • batch_sampler: no_duplicates

All Hyperparameters

Click to expand
  • overwrite_output_dir: False
  • do_predict: False
  • eval_strategy: steps
  • prediction_loss_only: True
  • per_device_train_batch_size: 128
  • per_device_eval_batch_size: 128
  • per_gpu_train_batch_size: None
  • per_gpu_eval_batch_size: None
  • gradient_accumulation_steps: 1
  • eval_accumulation_steps: None
  • torch_empty_cache_steps: None
  • learning_rate: 5e-05
  • weight_decay: 0.0
  • adam_beta1: 0.9
  • adam_beta2: 0.999
  • adam_epsilon: 1e-08
  • max_grad_norm: 1.0
  • num_train_epochs: 1
  • max_steps: -1
  • lr_scheduler_type: linear
  • lr_scheduler_kwargs: {}
  • warmup_ratio: 0.1
  • warmup_steps: 0
  • log_level: passive
  • log_level_replica: warning
  • log_on_each_node: True
  • logging_nan_inf_filter: True
  • save_safetensors: True
  • save_on_each_node: False
  • save_only_model: False
  • restore_callback_states_from_checkpoint: False
  • no_cuda: False
  • use_cpu: False
  • use_mps_device: False
  • seed: 42
  • data_seed: None
  • jit_mode_eval: False
  • use_ipex: False
  • bf16: True
  • fp16: False
  • fp16_opt_level: O1
  • half_precision_backend: auto
  • bf16_full_eval: False
  • fp16_full_eval: False
  • tf32: None
  • local_rank: 0
  • ddp_backend: None
  • tpu_num_cores: None
  • tpu_metrics_debug: False
  • debug: []
  • dataloader_drop_last: False
  • dataloader_num_workers: 0
  • dataloader_prefetch_factor: None
  • past_index: -1
  • disable_tqdm: False
  • remove_unused_columns: True
  • label_names: None
  • load_best_model_at_end: False
  • ignore_data_skip: False
  • fsdp: []
  • fsdp_min_num_params: 0
  • fsdp_config: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
  • fsdp_transformer_layer_cls_to_wrap: None
  • accelerator_config: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
  • deepspeed: None
  • label_smoothing_factor: 0.0
  • optim: adamw_torch
  • optim_args: None
  • adafactor: False
  • group_by_length: False
  • length_column_name: length
  • ddp_find_unused_parameters: None
  • ddp_bucket_cap_mb: None
  • ddp_broadcast_buffers: False
  • dataloader_pin_memory: True
  • dataloader_persistent_workers: False
  • skip_memory_metrics: True
  • use_legacy_prediction_loop: False
  • push_to_hub: False
  • resume_from_checkpoint: None
  • hub_model_id: None
  • hub_strategy: every_save
  • hub_private_repo: False
  • hub_always_push: False
  • gradient_checkpointing: False
  • gradient_checkpointing_kwargs: None
  • include_inputs_for_metrics: False
  • eval_do_concat_batches: True
  • fp16_backend: auto
  • push_to_hub_model_id: None
  • push_to_hub_organization: None
  • mp_parameters:
  • auto_find_batch_size: False
  • full_determinism: False
  • torchdynamo: None
  • ray_scope: last
  • ddp_timeout: 1800
  • torch_compile: False
  • torch_compile_backend: None
  • torch_compile_mode: None
  • dispatch_batches: None
  • split_batches: None
  • include_tokens_per_second: False
  • include_num_input_tokens_seen: False
  • neftune_noise_alpha: None
  • optim_target_modules: None
  • batch_eval_metrics: False
  • eval_on_start: False
  • eval_use_gather_object: False
  • batch_sampler: no_duplicates
  • multi_dataset_batch_sampler: proportional

Training Logs

Epoch Step Training Loss train loss all-nli-dev_max_accuracy all-nli-test_max_accuracy
0 0 - - 0.7574 -
0.0149 100 2.5002 - - -
0.0298 200 1.9984 - - -
0.0448 300 1.8094 - - -
0.0597 400 1.6704 - - -
0.0746 500 1.5518 - - -
0.0895 600 1.449 - - -
0.1044 700 1.5998 - - -
0.1194 800 1.5725 - - -
0.1343 900 1.5341 - - -
0.1492 1000 1.3423 - - -
0.1641 1100 1.2485 - - -
0.1791 1200 1.1527 - - -
0.1940 1300 1.1672 - - -
0.2089 1400 1.2426 - - -
0.2238 1500 1.0948 - - -
0.2387 1600 1.0069 - - -
0.2537 1700 0.976 - - -
0.2686 1800 0.897 - - -
0.2835 1900 0.7825 - - -
0.2984 2000 0.9421 0.1899 0.9568 -
0.3133 2100 0.8651 - - -
0.3283 2200 0.8184 - - -
0.3432 2300 0.699 - - -
0.3581 2400 0.6704 - - -
0.3730 2500 0.6477 - - -
0.3879 2600 0.7077 - - -
0.4029 2700 0.7364 - - -
0.4178 2800 0.665 - - -
0.4327 2900 1.2512 - - -
0.4476 3000 1.3693 - - -
0.4625 3100 1.3959 - - -
0.4775 3200 1.4175 - - -
0.4924 3300 1.402 - - -
0.5073 3400 1.3832 - - -
0.5222 3500 1.3671 - - -
0.5372 3600 1.3666 - - -
0.5521 3700 1.3479 - - -
0.5670 3800 1.3272 - - -
0.5819 3900 1.3353 - - -
0.5968 4000 1.3177 0.0639 0.9902 -
0.6118 4100 1.3068 - - -
0.6267 4200 1.3054 - - -
0.6416 4300 1.3098 - - -
0.6565 4400 1.2839 - - -
0.6714 4500 1.2976 - - -
0.6864 4600 1.2669 - - -
0.7013 4700 1.208 - - -
0.7162 4800 1.194 - - -
0.7311 4900 1.1974 - - -
0.7460 5000 1.1834 - - -
0.7610 5100 1.1876 - - -
0.7759 5200 1.1743 - - -
0.7908 5300 1.1839 - - -
0.8057 5400 1.1778 - - -
0.8207 5500 1.1711 - - -
0.8356 5600 1.1809 - - -
0.8505 5700 1.1825 - - -
0.8654 5800 1.1795 - - -
0.8803 5900 1.1788 - - -
0.8953 6000 1.1819 0.0371 0.992 -
0.9102 6100 1.1741 - - -
0.9251 6200 1.1871 - - -
0.9400 6300 0.498 - - -
0.9549 6400 0.093 - - -
0.9699 6500 0.1597 - - -
0.9848 6600 0.2033 - - -
0.9997 6700 0.16 - - -
1.0 6702 - - - 0.9914

Framework Versions

  • Python: 3.11.8
  • Sentence Transformers: 3.1.1
  • Transformers: 4.44.0
  • PyTorch: 2.3.0.post101
  • Accelerate: 0.33.0
  • Datasets: 2.18.0
  • Tokenizers: 0.19.0

Citation

BibTeX

Sentence Transformers

@inproceedings{reimers-2019-sentence-bert,
    title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
    author = "Reimers, Nils and Gurevych, Iryna",
    booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
    month = "11",
    year = "2019",
    publisher = "Association for Computational Linguistics",
    url = "https://arxiv.org/abs/1908.10084",
}

MultipleNegativesRankingLoss

@misc{henderson2017efficient,
    title={Efficient Natural Language Response Suggestion for Smart Reply},
    author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
    year={2017},
    eprint={1705.00652},
    archivePrefix={arXiv},
    primaryClass={cs.CL}
}