{ "cells": [ { "cell_type": "code", "execution_count": 2, "id": "0f9f666f", "metadata": {}, "outputs": [], "source": [ "from transformers import DistilBertTokenizerFast, DistilBertForSequenceClassification, Trainer, TrainingArguments\n", "from datasets import load_dataset\n", "import torch\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import accuracy_score, precision_recall_fscore_support" ] }, { "cell_type": "code", "execution_count": 3, "id": "2f35116b", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "a3bdffef37cd4d5aaa090640d5384825", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Map: 0%| | 0/25000 [00:00\n", " \n", " \n", " [1875/1875 14:06:36, Epoch 3/3]\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
StepTraining LossValidation LossAccuracyF1PrecisionRecall
500.6888000.6809380.6610000.5437420.7921570.413934
1000.6290000.4652590.8410000.8191130.9207160.737705
1500.3712000.3234070.8680000.8674700.8503940.885246
2000.3363000.3741500.8570000.8361970.9480520.747951
2500.3367000.3127630.8650000.8717950.8123890.940574
3000.3118000.2965060.8890000.8825400.9124730.854508
3500.3098000.2863190.8860000.8862280.8638130.909836
4000.2723000.2927730.8900000.8846960.9055790.864754
4500.3151000.4198560.8540000.8310190.9547870.735656
5000.3509000.2983030.8620000.8695650.8070180.942623
5500.3552000.3330940.8700000.8526080.9543150.770492
6000.2799000.2820810.8870000.8799150.9139070.848361
6500.2792000.2883120.8920000.8836210.9318180.840164
7000.1986000.3383010.8760000.8637360.9312800.805328
7500.1956000.2929160.8970000.8975120.8723400.924180
8000.2434000.2893070.8990000.9008830.8644070.940574
8500.1930000.3044640.8970000.8943590.8952770.893443
9000.2145000.2576090.8990000.8953370.9056600.885246
9500.2280000.2794650.8870000.8916590.8378380.952869
10000.2081000.2303800.9100000.9085370.9012100.915984
10500.2006000.3077650.9010000.9020770.8718930.934426
11000.2106000.2787250.9010000.9014930.8762090.928279
11500.2082000.2830950.9120000.9098360.9098360.909836
12000.2010000.2563530.9010000.8952380.9256020.866803
12500.1862000.2492050.9090000.9062820.9109730.901639
13000.0804000.3673440.9020000.9006090.8915660.909836
13500.1527000.3233760.9050000.9003150.9225810.879098
14000.1004000.4169150.8880000.8918920.8430660.946721
14500.1088000.3248850.9080000.9072580.8928570.922131
15000.0667000.3788260.9020000.9012100.8869050.915984
15500.0785000.3689800.9060000.9016740.9209400.883197
16000.0815000.3649180.9090000.9070480.9042770.909836
16500.0626000.3868550.9050000.9039430.8922160.915984
17000.0670000.3922430.9060000.9050510.8924300.918033
17500.0474000.4098930.9100000.9083500.9028340.913934
18000.1082000.4019620.9090000.9078010.8977960.918033
18500.1054000.3905890.9120000.9100200.9081630.911885

" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stderr", "output_type": "stream", "text": [ "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-50\n", "Configuration saved in ./results\\checkpoint-50\\config.json\n", "Model weights saved in ./results\\checkpoint-50\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-100\n", "Configuration saved in ./results\\checkpoint-100\\config.json\n", "Model weights saved in ./results\\checkpoint-100\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-150\n", "Configuration saved in ./results\\checkpoint-150\\config.json\n", "Model weights saved in ./results\\checkpoint-150\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-200\n", "Configuration saved in ./results\\checkpoint-200\\config.json\n", "Model weights saved in ./results\\checkpoint-200\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-250\n", "Configuration saved in ./results\\checkpoint-250\\config.json\n", "Model weights saved in ./results\\checkpoint-250\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-300\n", "Configuration saved in ./results\\checkpoint-300\\config.json\n", "Model weights saved in ./results\\checkpoint-300\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-350\n", "Configuration saved in ./results\\checkpoint-350\\config.json\n", "Model weights saved in ./results\\checkpoint-350\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-400\n", "Configuration saved in ./results\\checkpoint-400\\config.json\n", "Model weights saved in ./results\\checkpoint-400\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-450\n", "Configuration saved in ./results\\checkpoint-450\\config.json\n", "Model weights saved in ./results\\checkpoint-450\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-500\n", "Configuration saved in ./results\\checkpoint-500\\config.json\n", "Model weights saved in ./results\\checkpoint-500\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-550\n", "Configuration saved in ./results\\checkpoint-550\\config.json\n", "Model weights saved in ./results\\checkpoint-550\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-600\n", "Configuration saved in ./results\\checkpoint-600\\config.json\n", "Model weights saved in ./results\\checkpoint-600\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-650\n", "Configuration saved in ./results\\checkpoint-650\\config.json\n", "Model weights saved in ./results\\checkpoint-650\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-700\n", "Configuration saved in ./results\\checkpoint-700\\config.json\n", "Model weights saved in ./results\\checkpoint-700\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-750\n", "Configuration saved in ./results\\checkpoint-750\\config.json\n", "Model weights saved in ./results\\checkpoint-750\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-800\n", "Configuration saved in ./results\\checkpoint-800\\config.json\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Model weights saved in ./results\\checkpoint-800\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-850\n", "Configuration saved in ./results\\checkpoint-850\\config.json\n", "Model weights saved in ./results\\checkpoint-850\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-900\n", "Configuration saved in ./results\\checkpoint-900\\config.json\n", "Model weights saved in ./results\\checkpoint-900\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-950\n", "Configuration saved in ./results\\checkpoint-950\\config.json\n", "Model weights saved in ./results\\checkpoint-950\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1000\n", "Configuration saved in ./results\\checkpoint-1000\\config.json\n", "Model weights saved in ./results\\checkpoint-1000\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1050\n", "Configuration saved in ./results\\checkpoint-1050\\config.json\n", "Model weights saved in ./results\\checkpoint-1050\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1100\n", "Configuration saved in ./results\\checkpoint-1100\\config.json\n", "Model weights saved in ./results\\checkpoint-1100\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1150\n", "Configuration saved in ./results\\checkpoint-1150\\config.json\n", "Model weights saved in ./results\\checkpoint-1150\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1200\n", "Configuration saved in ./results\\checkpoint-1200\\config.json\n", "Model weights saved in ./results\\checkpoint-1200\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1250\n", "Configuration saved in ./results\\checkpoint-1250\\config.json\n", "Model weights saved in ./results\\checkpoint-1250\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1300\n", "Configuration saved in ./results\\checkpoint-1300\\config.json\n", "Model weights saved in ./results\\checkpoint-1300\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1350\n", "Configuration saved in ./results\\checkpoint-1350\\config.json\n", "Model weights saved in ./results\\checkpoint-1350\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1400\n", "Configuration saved in ./results\\checkpoint-1400\\config.json\n", "Model weights saved in ./results\\checkpoint-1400\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1450\n", "Configuration saved in ./results\\checkpoint-1450\\config.json\n", "Model weights saved in ./results\\checkpoint-1450\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1500\n", "Configuration saved in ./results\\checkpoint-1500\\config.json\n", "Model weights saved in ./results\\checkpoint-1500\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1550\n", "Configuration saved in ./results\\checkpoint-1550\\config.json\n", "Model weights saved in ./results\\checkpoint-1550\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1600\n", "Configuration saved in ./results\\checkpoint-1600\\config.json\n", "Model weights saved in ./results\\checkpoint-1600\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1650\n", "Configuration saved in ./results\\checkpoint-1650\\config.json\n", "Model weights saved in ./results\\checkpoint-1650\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1700\n", "Configuration saved in ./results\\checkpoint-1700\\config.json\n", "Model weights saved in ./results\\checkpoint-1700\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1750\n", "Configuration saved in ./results\\checkpoint-1750\\config.json\n", "Model weights saved in ./results\\checkpoint-1750\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1800\n", "Configuration saved in ./results\\checkpoint-1800\\config.json\n", "Model weights saved in ./results\\checkpoint-1800\\pytorch_model.bin\n", "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n", "Saving model checkpoint to ./results\\checkpoint-1850\n", "Configuration saved in ./results\\checkpoint-1850\\config.json\n", "Model weights saved in ./results\\checkpoint-1850\\pytorch_model.bin\n", "\n", "\n", "Training completed. Do not forget to share your model on huggingface.co/models =)\n", "\n", "\n", "Loading best model from ./results\\checkpoint-1000 (score: 0.23037973046302795).\n" ] }, { "data": { "text/plain": [ "TrainOutput(global_step=1875, training_loss=0.22492422332763673, metrics={'train_runtime': 50814.837, 'train_samples_per_second': 0.59, 'train_steps_per_second': 0.037, 'total_flos': 3974021959680000.0, 'train_loss': 0.22492422332763673, 'epoch': 3.0})" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer = Trainer(\n", " model=model,\n", " args=training_args,\n", " train_dataset=train_dataset,\n", " eval_dataset=test_dataset,\n", " compute_metrics=compute_metrics,\n", ")\n", "\n", "trainer.train()\n" ] }, { "cell_type": "code", "execution_count": 8, "id": "e2b3a88e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "The following columns in the evaluation set don't have a corresponding argument in `DistilBertForSequenceClassification.forward` and have been ignored: text. If text are not expected by `DistilBertForSequenceClassification.forward`, you can safely ignore this message.\n", "***** Running Evaluation *****\n", " Num examples = 1000\n", " Batch size = 64\n" ] }, { "data": { "text/html": [ "\n", "

\n", " \n", " \n", " [16/16 07:01]\n", "
\n", " " ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "{'eval_loss': 0.23037973046302795,\n", " 'eval_accuracy': 0.91,\n", " 'eval_f1': 0.9085365853658537,\n", " 'eval_precision': 0.9012096774193549,\n", " 'eval_recall': 0.9159836065573771,\n", " 'eval_runtime': 450.0402,\n", " 'eval_samples_per_second': 2.222,\n", " 'eval_steps_per_second': 0.036,\n", " 'epoch': 3.0}" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "trainer.evaluate()" ] }, { "cell_type": "code", "execution_count": 9, "id": "a15f4208", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Configuration saved in ./saved_model\\config.json\n", "Model weights saved in ./saved_model\\pytorch_model.bin\n", "tokenizer config file saved in ./saved_model\\tokenizer_config.json\n", "Special tokens file saved in ./saved_model\\special_tokens_map.json\n" ] }, { "data": { "text/plain": [ "('./saved_model\\\\tokenizer_config.json',\n", " './saved_model\\\\special_tokens_map.json',\n", " './saved_model\\\\vocab.txt',\n", " './saved_model\\\\added_tokens.json',\n", " './saved_model\\\\tokenizer.json')" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.save_pretrained('./saved_model')\n", "tokenizer.save_pretrained('./saved_model')" ] }, { "cell_type": "code", "execution_count": 10, "id": "eb978982", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "positive\n" ] } ], "source": [ "def predict_sentiment(text):\n", " inputs = tokenizer(text, return_tensors=\"pt\", padding=True, truncation=True, max_length=512)\n", " inputs = {k: v.to(model.device) for k, v in inputs.items()}\n", " with torch.no_grad():\n", " logits = model(**inputs).logits\n", " prediction = logits.argmax(-1).item()\n", " return 'positive' if prediction == 1 else 'negative'\n", "\n", "# Test with a new sentence\n", "print(predict_sentiment(\"This movie was great! I loved it.\"))\n" ] }, { "cell_type": "code", "execution_count": 3, "id": "30dac866", "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'model' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "Input \u001b[1;32mIn [3]\u001b[0m, in \u001b[0;36m\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mmodel\u001b[49m\u001b[38;5;241m.\u001b[39msave_pretrained(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./Sentimental_Analysis\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 2\u001b[0m tokenizer\u001b[38;5;241m.\u001b[39msave_pretrained(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m./Sentimental_Analysis\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", "\u001b[1;31mNameError\u001b[0m: name 'model' is not defined" ] } ], "source": [ "model.save_pretrained('./Sentimental_Analysis')\n", "tokenizer.save_pretrained('./Sentimental_Analysis')\n" ] }, { "cell_type": "code", "execution_count": null, "id": "f3b53c73", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 5 }