{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "601b739b", "metadata": { "_cell_guid": "f3fb334d-43c6-4fae-93a0-4b6783397ad9", "_uuid": "da9ddfdf-832b-4d7d-bb6a-942f879f6447", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:06:50.055739Z", "iopub.status.busy": "2024-11-11T01:06:50.055346Z", "iopub.status.idle": "2024-11-11T01:06:50.059890Z", "shell.execute_reply": "2024-11-11T01:06:50.059090Z" }, "id": "gYpPYG55PDy0", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.02954, "end_time": "2024-11-11T01:06:50.061890", "exception": false, "start_time": "2024-11-11T01:06:50.032350", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "## Try reverse input string (will increase the performance)\n", "## adopt beam search" ] }, { "cell_type": "code", "execution_count": 2, "id": "ee5d0131", "metadata": { "_cell_guid": "53f3ea36-80e9-4eed-9b09-11fa0490ae70", "_uuid": "2075d24a-41ad-4336-b551-a0909866836c", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:06:50.103209Z", "iopub.status.busy": "2024-11-11T01:06:50.102887Z", "iopub.status.idle": "2024-11-11T01:07:30.253102Z", "shell.execute_reply": "2024-11-11T01:07:30.251825Z" }, "executionInfo": { "elapsed": 12078, "status": "ok", "timestamp": 1731233836806, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "sRPtnzMJ5DEN", "jupyter": { "outputs_hidden": false }, "outputId": "7a2d1eaf-ddb8-4082-b260-dd52ed452c9b", "papermill": { "duration": 40.174693, "end_time": "2024-11-11T01:07:30.256024", "exception": false, "start_time": "2024-11-11T01:06:50.081331", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "## Turn around camel-kenlm wheel error\n", "!pip install -q future six docopt cachetools numpy scipy pandas scikit-learn torch transformers editdistance requests emoji pyrsistent muddler\n", "!pip install -q camel-tools --no-deps\n", "!pip install -q contractions datasets\n", "\n", "!pip install -q kaggle" ] }, { "cell_type": "code", "execution_count": 3, "id": "d453e83f", "metadata": { "_cell_guid": "8aa266b9-fd1e-4344-8d1a-5a7fab478b63", "_uuid": "426a4f4a-d536-4eb3-a9d7-15e0b265fb63", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:07:30.296516Z", "iopub.status.busy": "2024-11-11T01:07:30.296121Z", "iopub.status.idle": "2024-11-11T01:07:43.286183Z", "shell.execute_reply": "2024-11-11T01:07:43.284925Z" }, "executionInfo": { "elapsed": 264, "status": "ok", "timestamp": 1731240458903, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "oKlf2c2DFqoS", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 13.013489, "end_time": "2024-11-11T01:07:43.288838", "exception": false, "start_time": "2024-11-11T01:07:30.275349", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "from tqdm import tqdm\n", "import time\n", "from camel_tools.tokenizers.word import simple_word_tokenize\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split\n", "from zipfile import ZipFile\n", "import torch\n", "from torch import nn\n", "from torch.nn import functional as F\n", "from torch.utils.data import DataLoader, Dataset\n", "import spacy\n", "from collections import Counter\n", "import random\n", "import unicodedata\n", "import pyarabic.araby as araby\n", "import contractions\n", "import nltk\n", "from datasets import load_dataset\n", "import re" ] }, { "cell_type": "code", "execution_count": 4, "id": "83ec4bc2", "metadata": { "_cell_guid": "9b098a3a-f196-4d86-85e8-4918c3edbab4", "_uuid": "eec15a85-e283-4d85-97f5-cb6d80ab078a", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:07:43.328761Z", "iopub.status.busy": "2024-11-11T01:07:43.328171Z", "iopub.status.idle": "2024-11-11T01:07:43.333426Z", "shell.execute_reply": "2024-11-11T01:07:43.332598Z" }, "executionInfo": { "elapsed": 272, "status": "ok", "timestamp": 1731234205580, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "DlvMCmdIWudl", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.027077, "end_time": "2024-11-11T01:07:43.335287", "exception": false, "start_time": "2024-11-11T01:07:43.308210", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "lr = 1e-3\n", "epochs = 50\n", "valid_test_size = 0.3\n", "# maxlen = 100 # length of one training sample by words\n", "embd_features = 128 # length of embedding vectors for each word (input_size) (=1000 in paper)\n", "batch_size = 64\n", "max_freq = 2 # to add all words to the vocabulary that seen more than one time\n", "lstm_hidden_size = 128 # The number of features in the hidden state (=1000 in paper)\n", "lstm_layers = 4 # Number of stacked recurrent layers\n", "dropout_p = 0.5" ] }, { "cell_type": "code", "execution_count": 5, "id": "fc2b096a", "metadata": { "_cell_guid": "a421e16e-78a1-45f1-a185-5ede26629408", "_uuid": "3eb4f197-592d-4d39-8a09-79cc7b939b10", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:07:43.375090Z", "iopub.status.busy": "2024-11-11T01:07:43.374478Z", "iopub.status.idle": "2024-11-11T01:07:43.379619Z", "shell.execute_reply": "2024-11-11T01:07:43.378747Z" }, "executionInfo": { "elapsed": 4, "status": "ok", "timestamp": 1731233855441, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "SPMCS8ajW1jK", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.027399, "end_time": "2024-11-11T01:07:43.381624", "exception": false, "start_time": "2024-11-11T01:07:43.354225", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "seed = 42\n", "g = torch.Generator().manual_seed(seed)\n", "\n", "device = 'cuda' if torch.cuda.is_available() else 'cpu'\n", "\n", "isKaggle = True\n", "base_dir = '/kaggle/working' if isKaggle else '/content'" ] }, { "cell_type": "markdown", "id": "510bed3e", "metadata": { "_cell_guid": "2823160c-9473-4388-b059-a8f97ecebc30", "_uuid": "fee9820e-a6f6-414c-b042-b320f067a2e8", "collapsed": false, "id": "jKW1NUp9XA9S", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.018429, "end_time": "2024-11-11T01:07:43.418685", "exception": false, "start_time": "2024-11-11T01:07:43.400256", "status": "completed" }, "tags": [] }, "source": [ "## Data" ] }, { "cell_type": "markdown", "id": "02c1cc46", "metadata": { "_cell_guid": "304718a6-ce41-4489-870f-cd7241f2ebdf", "_uuid": "2872f338-182c-484e-9be4-c3d7322a3e40", "collapsed": false, "id": "TFfKvsudXDMI", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.018401, "end_time": "2024-11-11T01:07:43.455689", "exception": false, "start_time": "2024-11-11T01:07:43.437288", "status": "completed" }, "tags": [] }, "source": [ "### Downloading" ] }, { "cell_type": "code", "execution_count": 6, "id": "4d155cf0", "metadata": { "_cell_guid": "eb367fb7-e526-4d7d-8b1e-e576bdaa58a2", "_uuid": "cd81eef9-90f5-466c-94dd-56c123e5ffda", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:07:43.494855Z", "iopub.status.busy": "2024-11-11T01:07:43.494145Z", "iopub.status.idle": "2024-11-11T01:07:57.356235Z", "shell.execute_reply": "2024-11-11T01:07:57.355391Z" }, "executionInfo": { "elapsed": 6154, "status": "ok", "timestamp": 1731233861591, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "y7jys3D2W2rf", "jupyter": { "outputs_hidden": false }, "outputId": "7eaed1e1-0941-4575-8482-e7f8d328c531", "papermill": { "duration": 13.883923, "end_time": "2024-11-11T01:07:57.358298", "exception": false, "start_time": "2024-11-11T01:07:43.474375", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Dataset URL: https://www.kaggle.com/datasets/samirmoustafa/arabic-to-english-translation-sentences\r\n", "License(s): copyright-authors\r\n", "Archive: /kaggle/working/arabic-to-english-translation-sentences.zip\r\n", " inflating: ara_eng.txt \r\n" ] }, { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "b5839b51ee1d4222b48ff200fa2aac6f", "version_major": 2, "version_minor": 0 }, "text/plain": [ "tatoeba_mt.py: 0%| | 0.00/15.5k [00:00\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EN_sentenceAR_sentence
0Hi.مرحبًا.
1Run!اركض!
2Help!النجدة!
\n", "" ], "text/plain": [ " EN_sentence AR_sentence\n", "0 Hi. مرحبًا.\n", "1 Run! اركض!\n", "2 Help! النجدة!" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_data.head(3)" ] }, { "cell_type": "code", "execution_count": 9, "id": "f9c198b5", "metadata": { "_cell_guid": "f4aa1236-9a0d-4404-a567-99b74ce5cd62", "_uuid": "2bdd8195-553f-420f-bacd-34f14fc7a122", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:07:59.485375Z", "iopub.status.busy": "2024-11-11T01:07:59.485061Z", "iopub.status.idle": "2024-11-11T01:07:59.493835Z", "shell.execute_reply": "2024-11-11T01:07:59.492990Z" }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1731233862004, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "Q6_5CpqeR1WZ", "jupyter": { "outputs_hidden": false }, "outputId": "dafe43f3-f344-4d77-b589-e3dde59226e9", "papermill": { "duration": 0.031971, "end_time": "2024-11-11T01:07:59.495776", "exception": false, "start_time": "2024-11-11T01:07:59.463805", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EN_sentenceAR_sentence
101406You'd better go.يستحسن انك تروح.
101407You should not talk here.يستحسن إنك ما تتكلمش هنا.
101408Make your choice.يلا اختار.
\n", "
" ], "text/plain": [ " EN_sentence AR_sentence\n", "101406 You'd better go. يستحسن انك تروح.\n", "101407 You should not talk here. يستحسن إنك ما تتكلمش هنا.\n", "101408 Make your choice. يلا اختار." ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_data.tail(3)" ] }, { "cell_type": "code", "execution_count": 10, "id": "ddd88e87", "metadata": { "_cell_guid": "82791955-ac05-4454-a04d-79ff1445e6b5", "_uuid": "eb3335c0-e8c8-458d-97a9-cb5521fd4594", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:07:59.539311Z", "iopub.status.busy": "2024-11-11T01:07:59.538539Z", "iopub.status.idle": "2024-11-11T01:07:59.783825Z", "shell.execute_reply": "2024-11-11T01:07:59.782983Z" }, "executionInfo": { "elapsed": 312, "status": "ok", "timestamp": 1731233862309, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "lkehEZmETpS3", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.269515, "end_time": "2024-11-11T01:07:59.786005", "exception": false, "start_time": "2024-11-11T01:07:59.516490", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df_data['EN_sentence_length'] = df_data['EN_sentence'].apply(lambda x: len(x.split(' ')))\n", "df_data['AR_sentence_length'] = df_data['AR_sentence'].apply(lambda x: len(x.split(' ')))" ] }, { "cell_type": "code", "execution_count": 11, "id": "c2512e94", "metadata": { "_cell_guid": "2ce3e90d-84aa-4528-b3b8-ebb69e124bee", "_uuid": "c0db62ea-a6e2-4884-8cb9-459b90c5d335", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:07:59.829149Z", "iopub.status.busy": "2024-11-11T01:07:59.828349Z", "iopub.status.idle": "2024-11-11T01:08:00.536692Z", "shell.execute_reply": "2024-11-11T01:08:00.535734Z" }, "executionInfo": { "elapsed": 922, "status": "ok", "timestamp": 1731233863229, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "yYMZjHJsaUBK", "jupyter": { "outputs_hidden": false }, "outputId": "0ba341d8-f865-496b-9920-fb489d215d61", "papermill": { "duration": 0.732373, "end_time": "2024-11-11T01:08:00.539150", "exception": false, "start_time": "2024-11-11T01:07:59.806777", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Create a figure and two subplots\n", "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", "\n", "# Define custom tick range based on your data range\n", "EN_x_ticks = np.arange(0, df_data['EN_sentence_length'].max()+1, 25)\n", "AR_x_ticks = np.arange(0, df_data['AR_sentence_length'].max()+1, 25)\n", "\n", "# Plot histogram for EN_sentence_length\n", "axes[0].hist(df_data['EN_sentence_length'], bins=50, color='skyblue', edgecolor='black')\n", "axes[0].set_title('EN Sentence Length')\n", "axes[0].set_xlabel('Length')\n", "axes[0].set_ylabel('Frequency')\n", "axes[0].set_xticks(EN_x_ticks) # Add more x-axis ticks\n", "\n", "# Plot histogram for AR_sentence_length\n", "axes[1].hist(df_data['AR_sentence_length'], bins=50, color='salmon', edgecolor='black')\n", "axes[1].set_title('AR Sentence Length')\n", "axes[1].set_xlabel('Length')\n", "axes[1].set_ylabel('Frequency')\n", "axes[1].set_xticks(AR_x_ticks) # Add more x-axis ticks\n", "\n", "# Display the plots\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "d9a20ed6", "metadata": { "_cell_guid": "93840f23-72fc-4a14-afab-a7d0bb3cbdec", "_uuid": "e1d2150a-bbdd-4a97-987f-3c5c348cf946", "collapsed": false, "id": "JnYOEPnJSK1I", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.02095, "end_time": "2024-11-11T01:08:00.581884", "exception": false, "start_time": "2024-11-11T01:08:00.560934", "status": "completed" }, "tags": [] }, "source": [ "As we see there are long and too short sentences.\n", "\n", "Short sentences will suffer from vanishing Gradients, As we will do post-padding (right-padding), so we will dorp short sentences." ] }, { "cell_type": "markdown", "id": "016a024c", "metadata": { "_cell_guid": "5401299b-ecd8-4433-8818-2dde942b8f1c", "_uuid": "7993eb62-7c1c-4400-a2cc-488985cda970", "collapsed": false, "id": "yk9hgqksU9_W", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.020897, "end_time": "2024-11-11T01:08:00.623676", "exception": false, "start_time": "2024-11-11T01:08:00.602779", "status": "completed" }, "tags": [] }, "source": [ "### Cleaning" ] }, { "cell_type": "code", "execution_count": 12, "id": "32c51041", "metadata": { "_cell_guid": "de16e1a4-e20e-4ec4-b24f-b05db3f151dd", "_uuid": "c68e2832-2b45-4638-96d4-9bbd6c10231c", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:00.667588Z", "iopub.status.busy": "2024-11-11T01:08:00.666834Z", "iopub.status.idle": "2024-11-11T01:08:00.724816Z", "shell.execute_reply": "2024-11-11T01:08:00.723725Z" }, "executionInfo": { "elapsed": 9, "status": "ok", "timestamp": 1731233863229, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "D7-HJg_PVBWV", "jupyter": { "outputs_hidden": false }, "outputId": "882a3d1e-ea75-4afb-a6ff-e976d037e084", "papermill": { "duration": 0.08227, "end_time": "2024-11-11T01:08:00.726872", "exception": false, "start_time": "2024-11-11T01:08:00.644602", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 101409 entries, 0 to 101408\n", "Data columns (total 4 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 EN_sentence 101409 non-null object\n", " 1 AR_sentence 101409 non-null object\n", " 2 EN_sentence_length 101409 non-null int64 \n", " 3 AR_sentence_length 101409 non-null int64 \n", "dtypes: int64(2), object(2)\n", "memory usage: 3.1+ MB\n", "None\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EN_sentence_lengthAR_sentence_length
count101409.000000101409.000000
mean8.8230537.350225
std11.48724210.119463
min1.0000001.000000
25%4.0000003.000000
50%6.0000005.000000
75%8.0000007.000000
max225.000000225.000000
\n", "
" ], "text/plain": [ " EN_sentence_length AR_sentence_length\n", "count 101409.000000 101409.000000\n", "mean 8.823053 7.350225\n", "std 11.487242 10.119463\n", "min 1.000000 1.000000\n", "25% 4.000000 3.000000\n", "50% 6.000000 5.000000\n", "75% 8.000000 7.000000\n", "max 225.000000 225.000000" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(df_data.info())\n", "df_data.describe()\n", "## so Q3 at 8" ] }, { "cell_type": "code", "execution_count": 13, "id": "dbf84937", "metadata": { "execution": { "iopub.execute_input": "2024-11-11T01:08:00.771935Z", "iopub.status.busy": "2024-11-11T01:08:00.771264Z", "iopub.status.idle": "2024-11-11T01:08:00.779175Z", "shell.execute_reply": "2024-11-11T01:08:00.778295Z" }, "papermill": { "duration": 0.032529, "end_time": "2024-11-11T01:08:00.781173", "exception": false, "start_time": "2024-11-11T01:08:00.748644", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# https://stackoverflow.com/a/518232/2809427\n", "def unicodeToAscii(s):\n", " return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')\n", "\n", "def preprocess_ar(text):\n", " text = araby.strip_diacritics(text).strip() # Remove diacritics \"التشكيل\"\n", " text = re.sub(r'[a-zA-Z]', '', text) # Remove English letters\n", " text = re.sub(r'\\s+', ' ', text).strip() # Trim multiple whitespaces to one\n", " text = re.sub(r'[_|\\d+|\\\\|\\-|؛|،|,|\\[|\\]|\\(|\\)|\\\"|/|%|!|,|.|:|♪|«|»|}|{|*|#]+', '', text) # Remove special characters and digits\n", " text = unicodeToAscii(text)\n", " return text\n", "\n", "def preprocess_en(text):\n", " text = text.lower()\n", " text = contractions.fix(text) # Fix contractions \"it's\" -> \"it is\"\n", " text = re.sub(r'[\\u0600-\\u06FF]', '', text) # Remove Arabic letters\n", " text = re.sub(r'\\s+', ' ', text).strip() # Trim multiple whitespaces to one\n", " text = re.sub(r'[_|\\d+|\\\\|\\-|؛|،|,|\\[|\\]|\\(|\\)|\\\"|/|%|!|,|.|:|♪|«|»|}|{|*|#]+', '', text) # Remove special characters and digits\n", " text = unicodeToAscii(text)\n", " return text" ] }, { "cell_type": "code", "execution_count": 14, "id": "65970704", "metadata": { "execution": { "iopub.execute_input": "2024-11-11T01:08:00.826388Z", "iopub.status.busy": "2024-11-11T01:08:00.825773Z", "iopub.status.idle": "2024-11-11T01:08:07.840714Z", "shell.execute_reply": "2024-11-11T01:08:07.839840Z" }, "papermill": { "duration": 7.040131, "end_time": "2024-11-11T01:08:07.842951", "exception": false, "start_time": "2024-11-11T01:08:00.802820", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "df_data['EN_sentence'] = df_data['EN_sentence'].apply(preprocess_en)\n", "df_data['AR_sentence'] = df_data['AR_sentence'].apply(preprocess_ar)" ] }, { "cell_type": "code", "execution_count": 15, "id": "975eb354", "metadata": { "_cell_guid": "504b87d4-4ae2-4546-b20d-bd8a5a11a003", "_uuid": "048c8e25-6fc4-4724-ba6d-0ec33961d904", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:07.888010Z", "iopub.status.busy": "2024-11-11T01:08:07.887209Z", "iopub.status.idle": "2024-11-11T01:08:07.896071Z", "shell.execute_reply": "2024-11-11T01:08:07.895194Z" }, "executionInfo": { "elapsed": 8, "status": "ok", "timestamp": 1731233863230, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "E8ApzqfrVHw9", "jupyter": { "outputs_hidden": false }, "outputId": "9e86fcd0-9677-4d6f-ef58-5d347f82f4be", "papermill": { "duration": 0.033156, "end_time": "2024-11-11T01:08:07.897900", "exception": false, "start_time": "2024-11-11T01:08:07.864744", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(101409, 7383, 3334)" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "arr_len = df_data['EN_sentence_length']\n", "len(arr_len), len(arr_len[arr_len>20]), len(arr_len[arr_len<3])" ] }, { "cell_type": "markdown", "id": "141b4830", "metadata": { "_cell_guid": "dbce33c5-d4b9-4569-bc38-057f5714efdc", "_uuid": "19ce86d4-32d0-43ba-bac0-d5cc75013a9b", "collapsed": false, "id": "qHIb_lTkXmd_", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.022359, "end_time": "2024-11-11T01:08:07.941762", "exception": false, "start_time": "2024-11-11T01:08:07.919403", "status": "completed" }, "tags": [] }, "source": [ "We will drop sentences that > 20 words or < 3 words for source language." ] }, { "cell_type": "code", "execution_count": 16, "id": "77fa809d", "metadata": { "_cell_guid": "6b37401f-f687-4f7c-aca0-c9acd2bf4664", "_uuid": "4ac3524d-109b-4e79-a6b2-8f63d3643f9f", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:07.987137Z", "iopub.status.busy": "2024-11-11T01:08:07.986287Z", "iopub.status.idle": "2024-11-11T01:08:08.112139Z", "shell.execute_reply": "2024-11-11T01:08:08.111095Z" }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1731233863230, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "1XTCKT3nXxOD", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.150953, "end_time": "2024-11-11T01:08:08.114259", "exception": false, "start_time": "2024-11-11T01:08:07.963306", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "EN_sentence 0\n", "AR_sentence 0\n", "EN_sentence_length 0\n", "AR_sentence_length 0\n", "dtype: int64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_data = df_data[(df_data['EN_sentence_length'] <= 20)]\n", "df_data = df_data[(df_data['EN_sentence_length'] >= 3)]\n", "\n", "df_data = df_data[(df_data['AR_sentence_length'] <= 20)]\n", "df_data = df_data[(df_data['AR_sentence_length'] >= 3)]\n", "\n", "df_data = df_data.drop_duplicates(keep='first', subset='AR_sentence')\n", "df_data = df_data.drop_duplicates(keep='first', subset='EN_sentence')\n", "\n", "df_data = df_data.replace('', pd.NA).dropna()\n", "df_data = df_data.replace(' ', pd.NA).dropna()\n", "df_data.isna().sum()" ] }, { "cell_type": "code", "execution_count": 17, "id": "8bcf0f96", "metadata": { "_cell_guid": "e6d0a9ee-0f33-4fd3-9d10-3b26815ec3fe", "_uuid": "06c813f4-6881-4566-a2df-485bdfac9520", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:08.159726Z", "iopub.status.busy": "2024-11-11T01:08:08.159108Z", "iopub.status.idle": "2024-11-11T01:08:08.841666Z", "shell.execute_reply": "2024-11-11T01:08:08.840716Z" }, "executionInfo": { "elapsed": 759, "status": "ok", "timestamp": 1731233863984, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "JYQ1JaoEfw9F", "jupyter": { "outputs_hidden": false }, "outputId": "4c7856f2-a45b-4a0b-b5a2-ccdbc377cb25", "papermill": { "duration": 0.707615, "end_time": "2024-11-11T01:08:08.844017", "exception": false, "start_time": "2024-11-11T01:08:08.136402", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Create a figure and two subplots\n", "fig, axes = plt.subplots(1, 2, figsize=(12, 5))\n", "\n", "# Define custom tick range based on your data range\n", "EN_x_ticks = np.arange(0, df_data['EN_sentence_length'].max()+1, 2)\n", "AR_x_ticks = np.arange(0, df_data['AR_sentence_length'].max()+1, 2)\n", "\n", "# Plot histogram for EN_sentence_length\n", "axes[0].hist(df_data['EN_sentence_length'], bins=50, color='skyblue', edgecolor='black')\n", "axes[0].set_title('EN Sentence Length')\n", "axes[0].set_xlabel('Length')\n", "axes[0].set_ylabel('Frequency')\n", "axes[0].set_xticks(EN_x_ticks) # Add more x-axis ticks\n", "\n", "# Plot histogram for AR_sentence_length\n", "axes[1].hist(df_data['AR_sentence_length'], bins=50, color='salmon', edgecolor='black')\n", "axes[1].set_title('AR Sentence Length')\n", "axes[1].set_xlabel('Length')\n", "axes[1].set_ylabel('Frequency')\n", "axes[1].set_xticks(AR_x_ticks) # Add more x-axis ticks\n", "\n", "# Display the plots\n", "plt.tight_layout()\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "d3e4a1da", "metadata": { "_cell_guid": "2ded56c1-f0e8-4a28-aa06-266e70ba1746", "_uuid": "724377a6-b9a8-4e18-ad30-8907bbc42b5d", "collapsed": false, "id": "0qY-VQnsX6Uz", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.022022, "end_time": "2024-11-11T01:08:08.888808", "exception": false, "start_time": "2024-11-11T01:08:08.866786", "status": "completed" }, "tags": [] }, "source": [ "### Spliting" ] }, { "cell_type": "code", "execution_count": 18, "id": "d47f79cd", "metadata": { "_cell_guid": "4827dfe0-5cac-4a5f-b16e-f4a0f12275f3", "_uuid": "e27f0b2f-b122-4bf6-97cb-9fee19db02ca", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:08.934766Z", "iopub.status.busy": "2024-11-11T01:08:08.934343Z", "iopub.status.idle": "2024-11-11T01:08:08.953662Z", "shell.execute_reply": "2024-11-11T01:08:08.952610Z" }, "executionInfo": { "elapsed": 5, "status": "ok", "timestamp": 1731233863984, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "YVmnp4vwW1e5", "jupyter": { "outputs_hidden": false }, "outputId": "7eaa09bf-7ac0-4dea-9ea4-3596f29d47c9", "papermill": { "duration": 0.044679, "end_time": "2024-11-11T01:08:08.955719", "exception": false, "start_time": "2024-11-11T01:08:08.911040", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(30129, 4), (6457, 4), (6456, 4)\n" ] } ], "source": [ "df_train, df_test = train_test_split(df_data, test_size=valid_test_size, shuffle=True, random_state=seed)\n", "df_valid, df_test = train_test_split(df_test, test_size=0.5, shuffle=True, random_state=seed)\n", "\n", "print(df_train.shape, df_test.shape, df_valid.shape, sep=', ')" ] }, { "cell_type": "markdown", "id": "4d6cd36a", "metadata": { "_cell_guid": "d2b96602-f2d3-42d4-9354-5d03cd34567f", "_uuid": "52c29e11-bff0-4ac1-9277-988f3a67f908", "collapsed": false, "id": "x_tK_7w7X9k7", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.022027, "end_time": "2024-11-11T01:08:09.000136", "exception": false, "start_time": "2024-11-11T01:08:08.978109", "status": "completed" }, "tags": [] }, "source": [ "### Loading and preprocessing (DataPipeline)" ] }, { "cell_type": "code", "execution_count": 19, "id": "d1d417fb", "metadata": { "_cell_guid": "304de1ba-2dde-45d3-abf2-66f83bb0395e", "_uuid": "64b75afc-3ad0-4d82-8c0a-08f1b12a8d5e", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:09.046341Z", "iopub.status.busy": "2024-11-11T01:08:09.045959Z", "iopub.status.idle": "2024-11-11T01:08:09.050816Z", "shell.execute_reply": "2024-11-11T01:08:09.049774Z" }, "executionInfo": { "elapsed": 4, "status": "ok", "timestamp": 1731233863984, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "l71HUAXhW1c2", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.030357, "end_time": "2024-11-11T01:08:09.052761", "exception": false, "start_time": "2024-11-11T01:08:09.022404", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# Arabic Tokenizer\n", "class camel_tokenizer():\n", " def __call__(self, text):\n", " return simple_word_tokenize(text.lower())\n", " # return [tok.text.lower() for tok in simple_word_tokenize(text)]" ] }, { "cell_type": "code", "execution_count": 20, "id": "00fc9c7b", "metadata": { "_cell_guid": "51223be9-dc37-4e76-b718-64a53cff204e", "_uuid": "5efa6e4e-b322-4cbc-9012-593d1981505e", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:09.099090Z", "iopub.status.busy": "2024-11-11T01:08:09.098713Z", "iopub.status.idle": "2024-11-11T01:08:09.104140Z", "shell.execute_reply": "2024-11-11T01:08:09.103258Z" }, "executionInfo": { "elapsed": 3, "status": "ok", "timestamp": 1731233863984, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "WcmKh1fRakye", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.031054, "end_time": "2024-11-11T01:08:09.106114", "exception": false, "start_time": "2024-11-11T01:08:09.075060", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# English Tokenizer\n", "class spacy_tokenizer():\n", " def __init__(self):\n", " self.spacy_eng = spacy.load(\"en_core_web_sm\")\n", "\n", " def __call__(self, text):\n", " return [tok.text.lower() for tok in self.spacy_eng.tokenizer(text)]" ] }, { "cell_type": "code", "execution_count": 21, "id": "859880fc", "metadata": { "_cell_guid": "68d52d75-d185-413e-ac51-d783a1674441", "_uuid": "9c54045e-9bee-4006-a808-96c86b15256b", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:09.152951Z", "iopub.status.busy": "2024-11-11T01:08:09.152130Z", "iopub.status.idle": "2024-11-11T01:08:09.166297Z", "shell.execute_reply": "2024-11-11T01:08:09.165398Z" }, "executionInfo": { "elapsed": 3, "status": "ok", "timestamp": 1731233863984, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "46MfvfOhYA5E", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.039344, "end_time": "2024-11-11T01:08:09.168072", "exception": false, "start_time": "2024-11-11T01:08:09.128728", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "class Vocabulary():\n", " def __init__(self, callable_tokenizer, max_freq=3, unk=True, sos=False, eos=False):\n", "\n", " self.sos = sos\n", " self.eos = eos\n", " self.unk = unk\n", " self.callable_tokenizer = callable_tokenizer\n", " self.max_freq = max_freq\n", "\n", " self.stoi = {\"\": 0}\n", " if self.unk:\n", " self.stoi[''] = len(self.stoi)\n", " if self.sos:\n", " self.stoi[''] = len(self.stoi)\n", " if self.eos:\n", " self.stoi[''] = len(self.stoi)\n", "\n", " def __len__(self):\n", " return len(self.stoi)\n", "\n", " def get_vocabulary(self):\n", " return self.stoi\n", "\n", " def add_token(self, token_name: str):\n", " if token_name not in self.stoi:\n", " self.stoi[token_name] = len(self.stoi)\n", "\n", " def build_vocabulary(self, sentences_list):\n", " if type(sentences_list[0]) != str:\n", " ## ex: [['eating', 'apples'], ['eating', 'oranges']]\n", " sentences_list = [' '.join(sen) for sen in sentences_list]\n", "\n", " word_counts = Counter()\n", " for sentence in sentences_list:\n", " tokens = self.callable_tokenizer(sentence)\n", " word_counts.update(tokens)\n", "\n", " # Filter words with mox_freq or more occurrences\n", " filtered_words = [word for word, count in word_counts.items() if count >= self.max_freq]\n", " for word in filtered_words:\n", " if word not in self.stoi:\n", " self.stoi[word] = len(self.stoi)\n", "\n", " def get_numerical_tokens(self, text: str):\n", " tokens = self.callable_tokenizer(text)\n", " tokens.insert(0, '') if self.sos else None\n", " tokens.append('') if self.eos else None\n", " unk_id = self.stoi.get('', None)\n", " return [self.stoi.get(word, unk_id) for word in tokens]\n", "\n", " def __call__(self, text: str):\n", " return self.get_numerical_tokens(text)\n", "\n", " def tokens_to_text(self, tokens_list):\n", " keys = list(self.stoi.keys())\n", " values = list(self.stoi.values())\n", "\n", " return ' '.join([keys[values.index(token)] for token in tokens_list])" ] }, { "cell_type": "code", "execution_count": 22, "id": "c875f624", "metadata": { "_cell_guid": "d751258b-c525-4b89-a196-5c12b044fed3", "_uuid": "935e104d-c674-4f0b-b13a-d31914cd1d05", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:09.214919Z", "iopub.status.busy": "2024-11-11T01:08:09.213969Z", "iopub.status.idle": "2024-11-11T01:08:09.221143Z", "shell.execute_reply": "2024-11-11T01:08:09.220415Z" }, "executionInfo": { "elapsed": 3, "status": "ok", "timestamp": 1731233863984, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "77CoJfh-e0xT", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.032558, "end_time": "2024-11-11T01:08:09.223075", "exception": false, "start_time": "2024-11-11T01:08:09.190517", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "class MT_Dataset(Dataset):\n", "\n", " def __init__(self, en_sentences_list, ar_sentences_list, en_vocabulary:Vocabulary, ar_vocabulary:Vocabulary):\n", "\n", " super(MT_Dataset, self).__init__()\n", " self.ar_sentences_list = ar_sentences_list\n", " self.en_sentences_list = en_sentences_list\n", " self.ar_vocabulary = ar_vocabulary\n", " self.en_vocabulary = en_vocabulary\n", " # self.maxlen = maxlen\n", "\n", " def __len__(self):\n", " return len(self.en_sentences_list)\n", "\n", " def __getitem__(self, index):\n", " en_sentence, ar_sentence = self.en_sentences_list[index], self.ar_sentences_list[index]\n", "\n", " en_numrical_tokens = self.en_vocabulary(en_sentence)\n", " ar_numrical_tokens = self.ar_vocabulary(ar_sentence)\n", "\n", " return torch.tensor(en_numrical_tokens), torch.tensor(ar_numrical_tokens)" ] }, { "cell_type": "code", "execution_count": 23, "id": "001d01e2", "metadata": { "_cell_guid": "47c0900b-d167-4818-ba8e-e1147ed613b9", "_uuid": "a54656ee-11dc-4c06-ab78-3899410b45a9", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:09.269083Z", "iopub.status.busy": "2024-11-11T01:08:09.268475Z", "iopub.status.idle": "2024-11-11T01:08:09.275337Z", "shell.execute_reply": "2024-11-11T01:08:09.274439Z" }, "executionInfo": { "elapsed": 4, "status": "ok", "timestamp": 1731233863985, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "u4xNdej7gDiO", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.031982, "end_time": "2024-11-11T01:08:09.277244", "exception": false, "start_time": "2024-11-11T01:08:09.245262", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "class MYCollate():\n", " def __init__(self, batch_first=True, pad_value=0):\n", " self.pad_value = pad_value\n", " self.batch_first = batch_first\n", "\n", " def __call__(self, data):\n", " en_stentences = [ex[0] for ex in data]\n", " ar_stentences = [ex[1] for ex in data]\n", "\n", " padded_en_stentences = nn.utils.rnn.pad_sequence(en_stentences, batch_first=self.batch_first,\n", " padding_value=self.pad_value)\n", " padded_ar_stentences = nn.utils.rnn.pad_sequence(ar_stentences, batch_first=self.batch_first,\n", " padding_value=self.pad_value)\n", " return padded_en_stentences, padded_ar_stentences" ] }, { "cell_type": "code", "execution_count": 24, "id": "bff46f11", "metadata": { "_cell_guid": "14b402fa-17df-4017-a7da-365181a14479", "_uuid": "18c8ef64-8c8e-4c81-8b84-441d37feafcc", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:09.323293Z", "iopub.status.busy": "2024-11-11T01:08:09.322931Z", "iopub.status.idle": "2024-11-11T01:08:57.424135Z", "shell.execute_reply": "2024-11-11T01:08:57.423129Z" }, "executionInfo": { "elapsed": 67228, "status": "ok", "timestamp": 1731233931209, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "oV00SOQbYA3N", "jupyter": { "outputs_hidden": false }, "outputId": "2aa5864a-a73c-4f7c-816b-194564811a21", "papermill": { "duration": 48.148695, "end_time": "2024-11-11T01:08:57.448170", "exception": false, "start_time": "2024-11-11T01:08:09.299475", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "len(ar_vocab) = 12378, len(en_vocab) = 7254\n" ] } ], "source": [ "ar_tokenizer = camel_tokenizer()\n", "ar_vocab = Vocabulary(ar_tokenizer, max_freq=max_freq, unk=True, sos=True, eos=True)\n", "ar_vocab.build_vocabulary(df_train['AR_sentence'].values)\n", "\n", "en_tokenizer = spacy_tokenizer()\n", "en_vocab = Vocabulary(en_tokenizer, max_freq=max_freq, unk=True, sos=True, eos=True)\n", "en_vocab.build_vocabulary(df_train['EN_sentence'].values)\n", "\n", "print(f'{len(ar_vocab) = }, {len(en_vocab) = }')" ] }, { "cell_type": "code", "execution_count": 25, "id": "72ed39a2", "metadata": { "_cell_guid": "4789d166-345f-43a8-b385-17f80b65e83b", "_uuid": "44ef404a-9854-43c5-925c-8a34e31ccc49", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:57.494221Z", "iopub.status.busy": "2024-11-11T01:08:57.493852Z", "iopub.status.idle": "2024-11-11T01:08:57.499789Z", "shell.execute_reply": "2024-11-11T01:08:57.498912Z" }, "executionInfo": { "elapsed": 6, "status": "ok", "timestamp": 1731233931210, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "jENeERnqgomm", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.031231, "end_time": "2024-11-11T01:08:57.501731", "exception": false, "start_time": "2024-11-11T01:08:57.470500", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "train_ds = MT_Dataset(df_train['EN_sentence'].values, df_train['AR_sentence'].values,\n", " en_vocabulary=en_vocab, ar_vocabulary=ar_vocab)\n", "\n", "valid_ds = MT_Dataset(df_valid['EN_sentence'].values, df_valid['AR_sentence'].values,\n", " en_vocabulary=en_vocab, ar_vocabulary=ar_vocab)\n", "\n", "test_ds = MT_Dataset(df_test['EN_sentence'].values, df_train['AR_sentence'].values,\n", " en_vocabulary=en_vocab, ar_vocabulary=ar_vocab)" ] }, { "cell_type": "code", "execution_count": 26, "id": "98262ce7", "metadata": { "_cell_guid": "a10ccc79-3902-471a-928f-cba34ca00282", "_uuid": "06399323-d0b3-4af8-9019-3430895769e8", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:57.547519Z", "iopub.status.busy": "2024-11-11T01:08:57.547185Z", "iopub.status.idle": "2024-11-11T01:08:57.600479Z", "shell.execute_reply": "2024-11-11T01:08:57.599545Z" }, "executionInfo": { "elapsed": 5, "status": "ok", "timestamp": 1731233931210, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "HT8HBMCKh_Sx", "jupyter": { "outputs_hidden": false }, "outputId": "50cdbe43-dce1-4945-b870-ccc8cdeaa8ab", "papermill": { "duration": 0.078699, "end_time": "2024-11-11T01:08:57.602616", "exception": false, "start_time": "2024-11-11T01:08:57.523917", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(' how old is ? ', ' كم عمر بلاد القبايل ؟ ')" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "en_vocab.tokens_to_text(train_ds[4][0]), ar_vocab.tokens_to_text(train_ds[4][1])" ] }, { "cell_type": "code", "execution_count": 27, "id": "87f401fa", "metadata": { "_cell_guid": "ece9c447-29db-41df-bf0f-b99dfd019872", "_uuid": "3b59c099-2c91-40b2-a7f1-e2b8a51c34e6", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:57.648998Z", "iopub.status.busy": "2024-11-11T01:08:57.648637Z", "iopub.status.idle": "2024-11-11T01:08:59.723232Z", "shell.execute_reply": "2024-11-11T01:08:59.721945Z" }, "executionInfo": { "elapsed": 828, "status": "ok", "timestamp": 1731233932034, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "YnL7lnOQiJeM", "jupyter": { "outputs_hidden": false }, "outputId": "e94a9318-e02a-47a9-b1fe-3af145637ae6", "papermill": { "duration": 2.100321, "end_time": "2024-11-11T01:08:59.725450", "exception": false, "start_time": "2024-11-11T01:08:57.625129", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "torch.Size([64, 19])\n", "torch.Size([64, 18])\n" ] } ], "source": [ "mycollate = MYCollate(batch_first=True, pad_value=0)\n", "pin_memory = True if device == 'cuda' else False\n", "num_workers = 4 if isKaggle else 2\n", "\n", "train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True,\n", " collate_fn=mycollate, num_workers=num_workers,\n", " generator=g, pin_memory=pin_memory)\n", "\n", "valid_loader = DataLoader(valid_ds, batch_size=batch_size, shuffle=False,\n", " collate_fn=mycollate, num_workers=num_workers,\n", " generator=g, pin_memory=pin_memory)\n", "\n", "test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False,\n", " collate_fn=mycollate, num_workers=num_workers,\n", " generator=g, pin_memory=pin_memory)\n", "\n", "for x, y in train_loader:\n", " print(x.shape)\n", " print(y.shape)\n", " break" ] }, { "cell_type": "markdown", "id": "adbd7977", "metadata": { "_cell_guid": "a817db50-1bc3-42ff-81bc-1a9344854d0a", "_uuid": "eb280099-6497-412f-bd28-dce39f6172ef", "collapsed": false, "id": "AHHg7lo7ioRi", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.022273, "end_time": "2024-11-11T01:08:59.770703", "exception": false, "start_time": "2024-11-11T01:08:59.748430", "status": "completed" }, "tags": [] }, "source": [ "## Form Embedding matrices" ] }, { "cell_type": "markdown", "id": "0ee89422", "metadata": { "_cell_guid": "5cc6b90b-3f42-4ec1-9929-e11cab7728dc", "_uuid": "d75f3d3e-6988-4a11-8ffc-f95dabab89b7", "collapsed": false, "id": "19wgB5jtiwmR", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.022131, "end_time": "2024-11-11T01:08:59.815123", "exception": false, "start_time": "2024-11-11T01:08:59.792992", "status": "completed" }, "tags": [] }, "source": [ "### Ar." ] }, { "cell_type": "code", "execution_count": 28, "id": "a6284ec3", "metadata": { "_cell_guid": "57659a37-f1a4-4d5b-8dc3-fcb0dcc17062", "_uuid": "c05ba1a3-c973-4e3f-beaf-6b33de3d1b03", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:08:59.862377Z", "iopub.status.busy": "2024-11-11T01:08:59.861445Z", "iopub.status.idle": "2024-11-11T01:08:59.887720Z", "shell.execute_reply": "2024-11-11T01:08:59.886941Z" }, "executionInfo": { "elapsed": 5, "status": "ok", "timestamp": 1731233932034, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "B2KiRkeUioIm", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.052234, "end_time": "2024-11-11T01:08:59.889752", "exception": false, "start_time": "2024-11-11T01:08:59.837518", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "ar_embedding_matrix = torch.rand((len(ar_vocab), embd_features))*0.1" ] }, { "cell_type": "markdown", "id": "a3c6413b", "metadata": { "_cell_guid": "dc32ef97-4bdf-4326-9c1e-041d530de0ff", "_uuid": "c7b73bf9-ecda-4be5-882c-7ed91a0a6bdf", "collapsed": false, "id": "HTLciTVMiyiC", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.022308, "end_time": "2024-11-11T01:08:59.934896", "exception": false, "start_time": "2024-11-11T01:08:59.912588", "status": "completed" }, "tags": [] }, "source": [ "### En." ] }, { "cell_type": "code", "execution_count": 29, "id": "74ee5a8c", "metadata": { "execution": { "iopub.execute_input": "2024-11-11T01:08:59.982378Z", "iopub.status.busy": "2024-11-11T01:08:59.981513Z", "iopub.status.idle": "2024-11-11T01:08:59.992360Z", "shell.execute_reply": "2024-11-11T01:08:59.991610Z" }, "papermill": { "duration": 0.037074, "end_time": "2024-11-11T01:08:59.994510", "exception": false, "start_time": "2024-11-11T01:08:59.957436", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "en_embedding_matrix = torch.rand((len(en_vocab), embd_features))" ] }, { "cell_type": "code", "execution_count": 30, "id": "da92aeb6", "metadata": { "_cell_guid": "7a924a18-13e3-4032-9643-de520b116fd9", "_uuid": "02e9e880-3a98-46a7-946a-d23d8567728e", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:09:00.043552Z", "iopub.status.busy": "2024-11-11T01:09:00.042887Z", "iopub.status.idle": "2024-11-11T01:09:00.047745Z", "shell.execute_reply": "2024-11-11T01:09:00.046435Z" }, "executionInfo": { "elapsed": 163275, "status": "ok", "timestamp": 1731234095305, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "uNLla5SVin7S", "jupyter": { "outputs_hidden": false }, "outputId": "bba535e6-a616-46fa-caee-cfe9697bb9c0", "papermill": { "duration": 0.032874, "end_time": "2024-11-11T01:09:00.050056", "exception": false, "start_time": "2024-11-11T01:09:00.017182", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# !wget https://nlp.stanford.edu/data/glove.6B.zip\n", "\n", "# with ZipFile(f'{base_dir}/glove.6B.zip') as file:\n", "# file.extract('glove.6B.100d.txt')" ] }, { "cell_type": "code", "execution_count": 31, "id": "86bcc5a8", "metadata": { "_cell_guid": "35254387-29e7-4d9a-8df1-b487dbb8a5cb", "_uuid": "4cf53f0b-d2c3-43a8-b4b2-2773ecce4169", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:09:00.097396Z", "iopub.status.busy": "2024-11-11T01:09:00.096704Z", "iopub.status.idle": "2024-11-11T01:09:00.101405Z", "shell.execute_reply": "2024-11-11T01:09:00.100538Z" }, "executionInfo": { "elapsed": 4732, "status": "ok", "timestamp": 1731234100034, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "er-PzgMCin4-", "jupyter": { "outputs_hidden": false }, "outputId": "c586fe92-fe2b-4175-d356-1f6a92071b75", "papermill": { "duration": 0.030074, "end_time": "2024-11-11T01:09:00.103374", "exception": false, "start_time": "2024-11-11T01:09:00.073300", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# # form an embedding matrix form glove pre-trained embedding weights for my vocab\n", "# glove_embeddings_index = dict()\n", "# en_embedding_matrix = torch.rand((len(en_vocab), embd_features))*0.1\n", "# vocab_dict = en_vocab.get_vocabulary()\n", "# found = 0\n", "\n", "# with open('glove.6B.100d.txt') as f:\n", "# for line in f:\n", "# values = line.split()\n", "# word = values[0]\n", "\n", "# # token = word_tokenizer.word_tokenizing(word, stemming=False)\n", "# token = word\n", "# if token:\n", "# # print(word)\n", "# idx = vocab_dict.get(token, 0)\n", "# if idx:\n", "# found += 1\n", "# coefs = torch.tensor([float(value) for value in values[1:]])\n", "# en_embedding_matrix[idx] = coefs\n", "# # glove_embeddings_index[word] = coefs\n", "\n", "# print('Found %s words.' % found)\n", "# print(\"Embedding matrix shape: \", en_embedding_matrix.shape)\n" ] }, { "cell_type": "code", "execution_count": 32, "id": "d75d4bb4", "metadata": { "_cell_guid": "5e0d0e86-e19b-4437-9688-ea151c2e7a91", "_uuid": "ecda150d-f859-41d6-9ebc-059ea8634b68", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:09:00.149757Z", "iopub.status.busy": "2024-11-11T01:09:00.149101Z", "iopub.status.idle": "2024-11-11T01:09:01.682289Z", "shell.execute_reply": "2024-11-11T01:09:01.681077Z" }, "executionInfo": { "elapsed": 261, "status": "ok", "timestamp": 1731234100289, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "5A4dFwe6inte", "jupyter": { "outputs_hidden": false }, "outputId": "edeceb73-d326-4ce3-dfef-80d0764bb5e4", "papermill": { "duration": 1.558894, "end_time": "2024-11-11T01:09:01.684578", "exception": false, "start_time": "2024-11-11T01:09:00.125684", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Before Embedding layer\t\ttorch.Size([64, 19])\t\ttorch.Size([64, 18])\n", "After Embedding layer\t\ttorch.Size([64, 19, 128])\ttorch.Size([64, 18, 128])\n" ] } ], "source": [ "for x, y in test_loader:\n", " print('Before Embedding layer', x.shape, y.shape, sep='\\t\\t')\n", " en_emb = en_embedding_matrix[x]\n", " ar_emb = ar_embedding_matrix[y]\n", " print('After Embedding layer\\t', en_emb.shape, ar_emb.shape, sep='\\t')\n", " break" ] }, { "cell_type": "markdown", "id": "0b13b3f7", "metadata": { "_cell_guid": "8934e170-4176-41b6-922d-c7b143f173fd", "_uuid": "c5dfbf25-e82c-4d5a-9e2d-fb1a3c08fa5c", "collapsed": false, "id": "jdDsKs2rne2H", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.022639, "end_time": "2024-11-11T01:09:01.730088", "exception": false, "start_time": "2024-11-11T01:09:01.707449", "status": "completed" }, "tags": [] }, "source": [ "## Models: Defining, Training and Evaluation" ] }, { "cell_type": "code", "execution_count": 33, "id": "e2ee4bd6", "metadata": { "_cell_guid": "0e1ca778-cb8c-42ac-9d11-6e79543ca0cf", "_uuid": "c5f7431f-fb18-4a2b-9bda-edadbab27db2", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:09:01.777291Z", "iopub.status.busy": "2024-11-11T01:09:01.776908Z", "iopub.status.idle": "2024-11-11T01:09:01.797446Z", "shell.execute_reply": "2024-11-11T01:09:01.796516Z" }, "executionInfo": { "elapsed": 282, "status": "ok", "timestamp": 1731243010208, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "7Ongp0CUnzNG", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.046575, "end_time": "2024-11-11T01:09:01.799369", "exception": false, "start_time": "2024-11-11T01:09:01.752794", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "class Encoder(nn.Module):\n", " def __init__(self, embd_matrix:torch.Tensor, pretrained:bool, lstm_hidden_size, lstm_layers=1, dropout_probability=0.1):\n", " super().__init__()\n", " self.hidden_size = lstm_hidden_size\n", " self.lstm_layers = lstm_layers\n", " self.input_size = embd_matrix.size(-1)\n", " self.embd_layer = nn.Embedding.from_pretrained(embd_matrix, freeze=False) if pretrained else nn.Embedding(embd_matrix.size(0), embd_matrix.size(1))\n", "\n", " self.dropout = nn.Dropout(dropout_probability)\n", " self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.lstm_layers, dropout=dropout_probability, batch_first=True)\n", "\n", " def forward(self, x):\n", " embds = self.dropout(self.embd_layer(x))\n", " output, (hidden, cell) = self.lstm(embds)\n", " return hidden, cell\n", "\n", "\n", "class Decoder(nn.Module):\n", " def __init__(self, embd_matrix:torch.Tensor, pretrained:bool, lstm_hidden_size, lstm_layers=1, dropout_probability=0.1):\n", " super().__init__()\n", " self.hidden_size = lstm_hidden_size\n", " self.lstm_layers = lstm_layers\n", " self.input_size = embd_matrix.size(-1)\n", " self.embd_layer = nn.Embedding.from_pretrained(embd_matrix, freeze=False) if pretrained else nn.Embedding(embd_matrix.size(0), embd_matrix.size(1))\n", "\n", " self.dropout = nn.Dropout(dropout_probability)\n", " self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.lstm_layers, dropout=dropout_probability, batch_first=True)\n", " self.fc = nn.Linear(self.hidden_size, embd_matrix.size(0))\n", " \n", " def forward(self, x, hidden_t_1, cell_t_1):\n", " embds = self.dropout(self.embd_layer(x))\n", " output, (hidden_t, cell_t) = self.lstm(embds, (hidden_t_1, cell_t_1))\n", " needed_hidden = hidden_t[-1]\n", " logits = self.fc(needed_hidden)\n", " return logits, hidden_t, cell_t\n", "\n", "\n", "class Seq2seq_no_attention(nn.Module):\n", " def __init__(self, encoder:Encoder, decoder:Decoder):\n", " super(Seq2seq_no_attention, self).__init__()\n", "\n", " self.decoder_vocab_size = decoder.embd_layer.weight.size(0)\n", " self.encoder = encoder\n", " self.decoder = decoder\n", "\n", " def forward(self, source, target, teacher_force_ratio=0.5):\n", "\n", " batch_size, seq_len = target.size()\n", " x = target[:, [0]]\n", " total_outputs = torch.zeros(batch_size, seq_len, self.decoder_vocab_size, device=source.device)\n", "\n", " context = self.encoder(source)\n", "\n", " for step in range(1, seq_len):\n", " logits, *context = self.decoder(x, *context)\n", " \n", " total_outputs[:, step] = logits\n", " top1 = logits.argmax(1, keepdim=True)\n", " x = target[:, [step]] if teacher_force_ratio > random.random() else top1\n", "\n", " return total_outputs\n", "\n", " def translate(self, source:torch.Tensor, max_tries=50):\n", " \n", " source.unsqueeze(0)\n", " targets_hat = [2]\n", "\n", " context = self.encoder(source)\n", "\n", " for step in range(1, max_tries):\n", " x = torch.tensor([targets_hat[-1]]).to(source.device)\n", " logits, *context = self.decoder(x, *context)\n", " top1 = logits.argmax(0)\n", " targets_hat.append(top1.item())\n", " if top1 == 3:\n", " return targets_hat\n", "\n", " return targets_hat\n" ] }, { "cell_type": "code", "execution_count": 34, "id": "b03c237c", "metadata": { "_cell_guid": "83ff9ba2-424f-4099-aa64-3fe48dfc6371", "_uuid": "54389c38-3b2c-4856-9950-25cf7b27fcdb", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:09:01.846284Z", "iopub.status.busy": "2024-11-11T01:09:01.845958Z", "iopub.status.idle": "2024-11-11T01:09:01.856131Z", "shell.execute_reply": "2024-11-11T01:09:01.855239Z" }, "executionInfo": { "elapsed": 266, "status": "ok", "timestamp": 1731242572842, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "ofJbaxvAnqlm", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.035844, "end_time": "2024-11-11T01:09:01.858008", "exception": false, "start_time": "2024-11-11T01:09:01.822164", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "## Helper\n", "@torch.no_grad()\n", "def loss_acc_loader(model, data_loader, device, force=0.5):\n", " model.eval()\n", "\n", " total_predicts = []\n", " total_targets = []\n", " total_class_loss = 0\n", "\n", " for data, labels in data_loader:\n", " data = data.to(device)\n", " labels = labels.to(device)\n", "\n", " class_logits = model(data, labels, force)\n", " class_logits_flat = class_logits[:, 1:, :].reshape(-1, class_logits.size(-1))\n", " labels_flat = labels[:, 1:].reshape(-1)\n", "\n", " total_class_loss += class_criterion(class_logits_flat, labels_flat).item()\n", "\n", " predictions = class_logits_flat.argmax(-1)\n", " non_pad_mask = labels_flat != 0\n", " filtered_predictions = predictions[non_pad_mask]\n", " filtered_labels = labels_flat[non_pad_mask]\n", "\n", " total_predicts.append(filtered_predictions)\n", " total_targets.append(filtered_labels)\n", "\n", " total_predicts = torch.concat(total_predicts)\n", " total_targets = torch.concat(total_targets)\n", "\n", " avg_class_loss = total_class_loss / len(data_loader)\n", " accuracy = (total_predicts == total_targets).float().mean().item()\n", "\n", " return avg_class_loss, accuracy, total_predicts, total_targets\n", "\n", "\n", "def get_parameters_info(model):\n", " trainable = sum(p.numel() for p in model.parameters() if p.requires_grad==True)\n", " nontrainable = sum(p.numel() for p in model.parameters() if p.requires_grad==False)\n", "\n", " return trainable, nontrainable" ] }, { "cell_type": "code", "execution_count": 35, "id": "5e711cca", "metadata": { "_cell_guid": "9ce859d3-2a47-4e02-b39c-49f7e4620838", "_uuid": "f76d300d-abc8-40eb-a742-2705933799de", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:09:01.904906Z", "iopub.status.busy": "2024-11-11T01:09:01.904575Z", "iopub.status.idle": "2024-11-11T01:09:03.177673Z", "shell.execute_reply": "2024-11-11T01:09:03.176842Z" }, "executionInfo": { "elapsed": 373, "status": "ok", "timestamp": 1731244140261, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "LqquDrUDoVzF", "jupyter": { "outputs_hidden": false }, "outputId": "7159acf6-c1be-4cc4-c261-d8f7a8e00259", "papermill": { "duration": 1.299418, "end_time": "2024-11-11T01:09:03.180156", "exception": false, "start_time": "2024-11-11T01:09:01.880738", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total trainable parameters = 5166426\n", "Total non-trainable parameters = 0\n" ] } ], "source": [ "encoder = Encoder(en_embedding_matrix, False, lstm_hidden_size, lstm_layers, dropout_p).to(device)\n", "decoder = Decoder(ar_embedding_matrix, False, lstm_hidden_size, lstm_layers, dropout_p).to(device)\n", "seq2seq = Seq2seq_no_attention(encoder, decoder).to(device)\n", "\n", "tr, nontr = get_parameters_info(seq2seq)\n", "print(f\"Total trainable parameters = {tr}\\nTotal non-trainable parameters = {nontr}\")\n", "\n", "class_criterion = nn.CrossEntropyLoss(ignore_index=0) # Classification loss\n", "\n", "optim = torch.optim.Adam(seq2seq.parameters(), lr)\n", "scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer=optim, mode='min', factor=0.5,\n", " patience=4, min_lr=1e-5, threshold=0.001)\n" ] }, { "cell_type": "markdown", "id": "da896026", "metadata": { "_cell_guid": "9b1b9af2-f4b6-475a-9344-52ac5c090f8c", "_uuid": "a26a7d97-7db7-4da3-a904-13ca126d615a", "collapsed": false, "id": "W2Fux31-Lqbc", "jupyter": { "outputs_hidden": false }, "papermill": { "duration": 0.022404, "end_time": "2024-11-11T01:09:03.225919", "exception": false, "start_time": "2024-11-11T01:09:03.203515", "status": "completed" }, "tags": [] }, "source": [ "### Training loop" ] }, { "cell_type": "code", "execution_count": 36, "id": "dbb089ff", "metadata": { "_cell_guid": "bb8befcd-46a7-4d65-b6f1-131765ca660e", "_uuid": "61512162-89c4-405b-ae55-98b944027795", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:09:03.272905Z", "iopub.status.busy": "2024-11-11T01:09:03.272192Z", "iopub.status.idle": "2024-11-11T01:09:03.277532Z", "shell.execute_reply": "2024-11-11T01:09:03.276524Z" }, "id": "hkLo26GQLs-h", "jupyter": { "outputs_hidden": false }, "outputId": "b260f65b-7cd0-40ce-962c-792fe01b847f", "papermill": { "duration": 0.031216, "end_time": "2024-11-11T01:09:03.279502", "exception": false, "start_time": "2024-11-11T01:09:03.248286", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# ## overfit one-batch\n", "\n", "# x_batch, y_batch = next(iter(train_loader))\n", "# data = x_batch.to(device)\n", "# labels = y_batch.to(device)\n", "# epochs = 2000\n", "# for epoch in range(epochs):\n", "\n", "# class_logits = seq2seq(data, labels)\n", "\n", "# class_logits_flat = class_logits[:, 1:, :].reshape(-1, class_logits.size(-1))\n", "# labels_flat = labels[:, 1:].reshape(-1)\n", "# loss = class_criterion(class_logits_flat, labels_flat)\n", "\n", "\n", "# optim.zero_grad()\n", "# loss.backward()\n", "# optim.step()\n", "\n", "# predictions = class_logits_flat.argmax(-1)\n", "\n", "# non_pad_mask = labels_flat != 0\n", "# filtered_predictions = predictions[non_pad_mask]\n", "# filtered_labels = labels_flat[non_pad_mask]\n", "\n", "# if (epoch+1) % 100 == 0:\n", "# print(f'Epoch {epoch+1}/{epochs}: ', end='\\t')\n", "# print(f'Loss = {loss.item():.2f}', f'Accuracy = {(filtered_predictions==filtered_labels).sum()}/{filtered_predictions.size(0)}')\n" ] }, { "cell_type": "code", "execution_count": 37, "id": "e2d4f657", "metadata": { "_cell_guid": "2259b2ba-27bd-456f-8306-452d326903a9", "_uuid": "cc50041b-b460-4312-9afd-57997d737e8b", "collapsed": false, "execution": { "iopub.execute_input": "2024-11-11T01:09:03.326421Z", "iopub.status.busy": "2024-11-11T01:09:03.326051Z", "iopub.status.idle": "2024-11-11T01:46:49.101159Z", "shell.execute_reply": "2024-11-11T01:46:49.099821Z" }, "executionInfo": { "elapsed": 48635, "status": "error", "timestamp": 1731243891889, "user": { "displayName": "Abdelrhman Ashraf", "userId": "11249532378747886614" }, "user_tz": -120 }, "id": "O5-7kPlBo0fQ", "jupyter": { "outputs_hidden": false }, "outputId": "9a36329a-8f7f-4817-d054-29ab0a23d74a", "papermill": { "duration": 2265.801013, "end_time": "2024-11-11T01:46:49.103299", "exception": false, "start_time": "2024-11-11T01:09:03.302286", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Epoch [1/50]: 100%|██████████| 471/471 [00:40<00:00, 11.77it/s, loss = 6.4592]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 5.7168, Accuracy 23.62%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [2/50]: 100%|██████████| 471/471 [00:39<00:00, 11.83it/s, loss = 6.0609]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 5.6238, Accuracy 23.53%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [3/50]: 100%|██████████| 471/471 [00:39<00:00, 11.83it/s, loss = 5.8814]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 5.3845, Accuracy 26.77%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [4/50]: 100%|██████████| 471/471 [00:39<00:00, 11.90it/s, loss = 5.6264]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 5.2063, Accuracy 29.15%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [5/50]: 100%|██████████| 471/471 [00:40<00:00, 11.76it/s, loss = 5.4464]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 5.1234, Accuracy 29.82%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [6/50]: 100%|██████████| 471/471 [00:39<00:00, 11.88it/s, loss = 5.3096]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 5.0456, Accuracy 30.42%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [7/50]: 100%|██████████| 471/471 [00:40<00:00, 11.75it/s, loss = 5.2016]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 5.0097, Accuracy 30.64%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [8/50]: 100%|██████████| 471/471 [00:40<00:00, 11.67it/s, loss = 5.1055]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.9509, Accuracy 31.39%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [9/50]: 100%|██████████| 471/471 [00:40<00:00, 11.68it/s, loss = 5.0176]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.9131, Accuracy 31.51%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [10/50]: 100%|██████████| 471/471 [00:40<00:00, 11.63it/s, loss = 4.9369]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.8733, Accuracy 32.09%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [11/50]: 100%|██████████| 471/471 [00:39<00:00, 11.85it/s, loss = 4.8611]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.8501, Accuracy 32.30%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [12/50]: 100%|██████████| 471/471 [00:40<00:00, 11.60it/s, loss = 4.7879]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.8298, Accuracy 32.63%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [13/50]: 100%|██████████| 471/471 [00:39<00:00, 11.82it/s, loss = 4.7194]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.8121, Accuracy 32.77%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [14/50]: 100%|██████████| 471/471 [00:40<00:00, 11.67it/s, loss = 4.6513]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7804, Accuracy 33.13%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [15/50]: 100%|██████████| 471/471 [00:39<00:00, 11.78it/s, loss = 4.5977]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7882, Accuracy 33.15%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [16/50]: 100%|██████████| 471/471 [00:40<00:00, 11.74it/s, loss = 4.5431]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7742, Accuracy 33.23%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [17/50]: 100%|██████████| 471/471 [00:40<00:00, 11.62it/s, loss = 4.4842]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7753, Accuracy 33.38%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [18/50]: 100%|██████████| 471/471 [00:39<00:00, 11.80it/s, loss = 4.4343]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7556, Accuracy 33.61%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [19/50]: 100%|██████████| 471/471 [00:40<00:00, 11.66it/s, loss = 4.3848]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7498, Accuracy 33.89%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [20/50]: 100%|██████████| 471/471 [00:39<00:00, 11.78it/s, loss = 4.3307]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7472, Accuracy 33.93%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [21/50]: 100%|██████████| 471/471 [00:39<00:00, 11.87it/s, loss = 4.2856]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7475, Accuracy 34.14%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [22/50]: 100%|██████████| 471/471 [00:40<00:00, 11.51it/s, loss = 4.2378]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7519, Accuracy 34.11%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [23/50]: 100%|██████████| 471/471 [00:40<00:00, 11.75it/s, loss = 4.1954]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7465, Accuracy 34.43%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [24/50]: 100%|██████████| 471/471 [00:39<00:00, 11.86it/s, loss = 4.1571]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7485, Accuracy 34.14%\n", "Epoch 24: Reducing learning rate from 0.001000 to 0.000500\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [25/50]: 100%|██████████| 471/471 [00:40<00:00, 11.66it/s, loss = 4.0588]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7330, Accuracy 34.68%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [26/50]: 100%|██████████| 471/471 [00:40<00:00, 11.67it/s, loss = 4.0317]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7418, Accuracy 34.63%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [27/50]: 100%|██████████| 471/471 [00:40<00:00, 11.59it/s, loss = 4.0065]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7302, Accuracy 34.81%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [28/50]: 100%|██████████| 471/471 [00:40<00:00, 11.55it/s, loss = 3.9884]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7486, Accuracy 34.57%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [29/50]: 100%|██████████| 471/471 [00:40<00:00, 11.68it/s, loss = 3.9701]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7332, Accuracy 34.94%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [30/50]: 100%|██████████| 471/471 [00:40<00:00, 11.66it/s, loss = 3.9443]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7443, Accuracy 34.81%\n", "Epoch 30: Reducing learning rate from 0.000500 to 0.000250\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [31/50]: 100%|██████████| 471/471 [00:41<00:00, 11.45it/s, loss = 3.8933]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7394, Accuracy 34.99%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [32/50]: 100%|██████████| 471/471 [00:40<00:00, 11.70it/s, loss = 3.8777]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7291, Accuracy 35.20%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [33/50]: 100%|██████████| 471/471 [00:40<00:00, 11.64it/s, loss = 3.8656]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7376, Accuracy 34.94%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [34/50]: 100%|██████████| 471/471 [00:41<00:00, 11.37it/s, loss = 3.8499]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7490, Accuracy 34.96%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [35/50]: 100%|██████████| 471/471 [00:40<00:00, 11.72it/s, loss = 3.8447]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7392, Accuracy 35.16%\n", "Epoch 35: Reducing learning rate from 0.000250 to 0.000125\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [36/50]: 100%|██████████| 471/471 [00:40<00:00, 11.65it/s, loss = 3.8158]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7406, Accuracy 35.12%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [37/50]: 100%|██████████| 471/471 [00:41<00:00, 11.47it/s, loss = 3.812]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7404, Accuracy 34.95%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [38/50]: 100%|██████████| 471/471 [00:40<00:00, 11.65it/s, loss = 3.8081]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7574, Accuracy 34.94%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [39/50]: 100%|██████████| 471/471 [00:41<00:00, 11.49it/s, loss = 3.7999]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7548, Accuracy 34.99%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [40/50]: 100%|██████████| 471/471 [00:40<00:00, 11.67it/s, loss = 3.7986]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7505, Accuracy 35.15%\n", "Epoch 40: Reducing learning rate from 0.000125 to 0.000063\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [41/50]: 100%|██████████| 471/471 [00:40<00:00, 11.56it/s, loss = 3.7804]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7394, Accuracy 35.19%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [42/50]: 100%|██████████| 471/471 [00:40<00:00, 11.76it/s, loss = 3.7724]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7561, Accuracy 35.01%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [43/50]: 100%|██████████| 471/471 [00:40<00:00, 11.68it/s, loss = 3.774]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7509, Accuracy 35.22%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [44/50]: 100%|██████████| 471/471 [00:40<00:00, 11.53it/s, loss = 3.7687]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7498, Accuracy 35.14%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [45/50]: 100%|██████████| 471/471 [00:40<00:00, 11.68it/s, loss = 3.7644]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7486, Accuracy 35.11%\n", "Epoch 45: Reducing learning rate from 0.000063 to 0.000031\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [46/50]: 100%|██████████| 471/471 [00:41<00:00, 11.48it/s, loss = 3.7604]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7420, Accuracy 35.20%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [47/50]: 100%|██████████| 471/471 [00:40<00:00, 11.63it/s, loss = 3.758]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7525, Accuracy 35.03%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [48/50]: 100%|██████████| 471/471 [00:42<00:00, 11.13it/s, loss = 3.7641]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7503, Accuracy 35.09%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [49/50]: 100%|██████████| 471/471 [00:40<00:00, 11.54it/s, loss = 3.7553]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7557, Accuracy 35.02%\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "Epoch [50/50]: 100%|██████████| 471/471 [00:40<00:00, 11.61it/s, loss = 3.757]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Validation: Class Loss 4.7532, Accuracy 35.05%\n", "Epoch 50: Reducing learning rate from 0.000031 to 0.000016\n" ] } ], "source": [ "total_batches = len(train_loader)\n", "train_class_losses = []\n", "val_class_losses = []\n", "teacher_force = 0.5\n", "\n", "for epoch in range(epochs):\n", " seq2seq.train() # Set the model to training mode\n", " epoch_losses = []\n", "\n", " tqdm_loop = tqdm(enumerate(train_loader), total=total_batches, position=0)\n", " for batch_idx, (data, labels) in tqdm_loop:\n", " # Get data\n", " data = data.to(device)\n", " labels = labels.to(device)\n", " # Forward\n", " class_logits = seq2seq(data, labels, teacher_force)\n", "\n", " class_logits_flat = class_logits[:, 1:, :].reshape(-1, class_logits.size(-1))\n", " labels_flat = labels[:, 1:].reshape(-1)\n", " loss = class_criterion(class_logits_flat, labels_flat)\n", "\n", " epoch_losses.append(loss.item())\n", " mean_epoch_loss = sum(epoch_losses)/len(epoch_losses)\n", " \n", " # Backward\n", " optim.zero_grad()\n", " loss.backward()\n", " nn.utils.clip_grad_norm_(seq2seq.parameters(), max_norm=1)\n", " optim.step()\n", " # Update progress bar\n", " tqdm_loop.set_description(f\"Epoch [{epoch+1}/{epochs}]\")\n", " tqdm_loop.set_postfix_str(f'loss = {round(mean_epoch_loss, 4)}')\n", "\n", " train_class_losses.append(mean_epoch_loss)\n", "\n", " val_class_loss, val_accuracy, _, _ = loss_acc_loader(seq2seq, valid_loader, device, force=teacher_force)\n", " val_class_losses.append(val_class_loss)\n", " print(f'Validation: Class Loss {val_class_loss:.4f}, Accuracy {val_accuracy*100:.2f}%')\n", "\n", " # Get the current learning rate from the optimizer\n", " current_lr = optim.param_groups[0]['lr']\n", " \n", " # Step the scheduler with the validation loss\n", " scheduler.step(val_class_loss)\n", " \n", " # Check if the learning rate has changed\n", " new_lr = optim.param_groups[0]['lr']\n", " if new_lr < current_lr:\n", " print(f\"Epoch {epoch + 1}: Reducing learning rate from {current_lr:.6f} to {new_lr:.6f}\")" ] }, { "cell_type": "code", "execution_count": 38, "id": "19bc881b", "metadata": { "execution": { "iopub.execute_input": "2024-11-11T01:46:58.846786Z", "iopub.status.busy": "2024-11-11T01:46:58.846315Z", "iopub.status.idle": "2024-11-11T01:46:58.851486Z", "shell.execute_reply": "2024-11-11T01:46:58.850615Z" }, "papermill": { "duration": 4.880185, "end_time": "2024-11-11T01:46:58.853401", "exception": false, "start_time": "2024-11-11T01:46:53.973216", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "# if valid_loss < best_loss:\n", "# best_loss, best_epoch = valid_loss, epoch\n", "# torch.save(model.state_dict(), 'model.pt')\n", "# early_stop_counter = 0\n", "# else:\n", "# early_stop_counter += 1\n", "# if early_stop_counter >= PATIENCE:\n", "# print(f'Early stopping after epoch {epoch+1}: no improvement for {PATIENCE} epochs.')\n", "# break\n", "\n", "# if early_stop_counter < PATIENCE:\n", "# print(f'Best validation loss of {best_loss:.3f} at epoch {best_epoch+1}.')" ] }, { "cell_type": "code", "execution_count": 39, "id": "ff243f30", "metadata": { "execution": { "iopub.execute_input": "2024-11-11T01:47:08.655183Z", "iopub.status.busy": "2024-11-11T01:47:08.654796Z", "iopub.status.idle": "2024-11-11T01:47:08.880763Z", "shell.execute_reply": "2024-11-11T01:47:08.879807Z" }, "papermill": { "duration": 5.170037, "end_time": "2024-11-11T01:47:08.882916", "exception": false, "start_time": "2024-11-11T01:47:03.712879", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(8, 3))\n", "plt.plot(\n", " [ep + 1 for ep in range(len(train_class_losses))],\n", " train_class_losses, val_class_losses,)\n", "plt.xlabel(\"Epochs\")\n", "plt.ylabel(\"Loss\")\n", "plt.show()" ] }, { "cell_type": "markdown", "id": "18c74f36", "metadata": { "papermill": { "duration": 4.90533, "end_time": "2024-11-11T01:47:18.576864", "exception": false, "start_time": "2024-11-11T01:47:13.671534", "status": "completed" }, "tags": [] }, "source": [ "### Evaluation" ] }, { "cell_type": "code", "execution_count": 40, "id": "6117fd27", "metadata": { "execution": { "iopub.execute_input": "2024-11-11T01:47:28.307799Z", "iopub.status.busy": "2024-11-11T01:47:28.307151Z", "iopub.status.idle": "2024-11-11T01:47:33.044286Z", "shell.execute_reply": "2024-11-11T01:47:33.042989Z" }, "papermill": { "duration": 9.615093, "end_time": "2024-11-11T01:47:33.046353", "exception": false, "start_time": "2024-11-11T01:47:23.431260", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test: Class Loss 8.5770, Accuracy 12.84%\n" ] } ], "source": [ "test_class_loss, test_accuracy, _, _ = loss_acc_loader(seq2seq, test_loader, device, force=0.5)\n", "\n", "print(f\"Test: Class Loss {test_class_loss:.4f}, Accuracy {test_accuracy*100:.2f}%\")" ] }, { "cell_type": "code", "execution_count": 41, "id": "33c197d0", "metadata": { "execution": { "iopub.execute_input": "2024-11-11T01:47:42.781774Z", "iopub.status.busy": "2024-11-11T01:47:42.781329Z", "iopub.status.idle": "2024-11-11T01:47:47.368858Z", "shell.execute_reply": "2024-11-11T01:47:47.367498Z" }, "papermill": { "duration": 9.460011, "end_time": "2024-11-11T01:47:47.371036", "exception": false, "start_time": "2024-11-11T01:47:37.911025", "status": "completed" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Test: Class Loss 8.7582, Accuracy 13.08%\n" ] } ], "source": [ "## With zero teacher_force\n", "test_class_loss, test_accuracy, _, _ = loss_acc_loader(seq2seq, test_loader, device, force=0.0)\n", "\n", "print(f\"Test: Class Loss {test_class_loss:.4f}, Accuracy {test_accuracy*100:.2f}%\")" ] }, { "cell_type": "code", "execution_count": 42, "id": "ae8364b8", "metadata": { "execution": { "iopub.execute_input": "2024-11-11T01:47:57.106807Z", "iopub.status.busy": "2024-11-11T01:47:57.105844Z", "iopub.status.idle": "2024-11-11T01:47:57.152764Z", "shell.execute_reply": "2024-11-11T01:47:57.151831Z" }, "papermill": { "duration": 4.923818, "end_time": "2024-11-11T01:47:57.154755", "exception": false, "start_time": "2024-11-11T01:47:52.230937", "status": "completed" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "' هو '" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "custom_test = \"He's writing a letter to his brother.\"\n", "preprocessed = preprocess_en(custom_test)\n", "en_tokens = torch.tensor(en_vocab(preprocessed)).to(device)\n", "targets_hat = seq2seq.translate(en_tokens, 30)\n", "ar_vocab.tokens_to_text(targets_hat)" ] }, { "cell_type": "markdown", "id": "7b041b82", "metadata": { "papermill": { "duration": 4.915677, "end_time": "2024-11-11T01:48:06.885959", "exception": false, "start_time": "2024-11-11T01:48:01.970282", "status": "completed" }, "tags": [] }, "source": [ "### Saving" ] }, { "cell_type": "code", "execution_count": 43, "id": "979c9dcf", "metadata": { "execution": { "iopub.execute_input": "2024-11-11T01:48:16.464274Z", "iopub.status.busy": "2024-11-11T01:48:16.463867Z", "iopub.status.idle": "2024-11-11T01:48:16.542721Z", "shell.execute_reply": "2024-11-11T01:48:16.541886Z" }, "papermill": { "duration": 4.934126, "end_time": "2024-11-11T01:48:16.544850", "exception": false, "start_time": "2024-11-11T01:48:11.610724", "status": "completed" }, "tags": [] }, "outputs": [], "source": [ "model_states = {\n", " \"en_vocabulary\": en_vocab.get_vocabulary(),\n", " \"ar_vocabulary\": ar_vocab.get_vocabulary()\n", "}\n", "\n", "torch.save(model_states, f\"{base_dir}/seq2seq_no_attention_states.pt\")\n", "\n", "## Save Entire Model\n", "torch.save(seq2seq, f\"{base_dir}/seq2seq_no_attention.bin\")" ] } ], "metadata": { "kaggle": { "accelerator": "gpu", "dataSources": [], "dockerImageVersionId": 30787, "isGpuEnabled": true, "isInternetEnabled": true, "language": "python", "sourceType": "notebook" }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.14" }, "papermill": { "default_parameters": {}, "duration": 2496.643123, "end_time": "2024-11-11T01:48:23.779139", "environment_variables": {}, "exception": null, "input_path": "__notebook__.ipynb", "output_path": "__notebook__.ipynb", "parameters": {}, "start_time": "2024-11-11T01:06:47.136016", "version": "2.6.0" }, "widgets": { "application/vnd.jupyter.widget-state+json": { "state": { "0135941924744f8d89297029f73f081e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "0c9ca1d5768741f6912ddf425e4b23ef": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_9ac847c8021a47318d6b5168e9f9c462", "placeholder": "​", "style": "IPY_MODEL_33df2f043f7a436ea99d9ca996c5937f", "value": " 2/2 [00:00<00:00, 225.11it/s]" } }, "0dfdb91685064ca483671e9b44f8bb1d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0eeafbe2f53f41fcbfe974132518cd2e", "placeholder": "​", "style": "IPY_MODEL_b0abe16cc7fa4110baab79812481994d", "value": "tatoeba_mt.py: 100%" } }, "0e876d0679d4416fa418f66ba4c93080": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0eeafbe2f53f41fcbfe974132518cd2e": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "0f9db9d9776340c684a3a8085832fb84": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c0816d08d0aa4abab53a947498fa5042", "placeholder": "​", "style": "IPY_MODEL_ccf9e9a854204f26945d7c5e4b44d5d3", "value": "Computing checksums: 100%" } }, "1111e12346ca472e8a761b34b6039984": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_96da51b31f4a400597dbed64e9d72060", "placeholder": "​", "style": "IPY_MODEL_bc3b20060f5849e798869ed3bb941188", "value": "tatoeba-dev.ara-eng.tsv: 100%" } }, "11b5dc344966490782350c44b15ec364": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_0e876d0679d4416fa418f66ba4c93080", "placeholder": "​", "style": "IPY_MODEL_cab0d5d32bd04132bdcc5e8d0c7a3d3f", "value": " 1.96M/1.96M [00:00<00:00, 8.12MB/s]" } }, "132bbe8e2c1e488ba4b8a148ba24f460": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_ea4f088430bd4b00a5301589218bd06d", "IPY_MODEL_aed3969a63ea408f97d4ec661584a6a4", "IPY_MODEL_11b5dc344966490782350c44b15ec364" ], "layout": "IPY_MODEL_31c93f3c77e04d87af853c5a37930094" } }, "152236340fdc4377877824989e647435": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "194b0511cc384f92b3cffc8343f13d04": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c90d0b0fb2564e04af5888246ad7975b", "placeholder": "​", "style": "IPY_MODEL_fb3ae7f0e2474cbabe473d047dec9f8e", "value": " 1.78M/1.78M [00:00<00:00, 30.1MB/s]" } }, "1a800a2434f6435e8bbeadbc58a75a6d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_1111e12346ca472e8a761b34b6039984", "IPY_MODEL_c66eb5169efb426d96d16afed017685a", "IPY_MODEL_194b0511cc384f92b3cffc8343f13d04" ], "layout": "IPY_MODEL_d9a91d5ebdc34305b291ec883256bf84" } }, "1cfc1d8893454daba58af61a5c4ab548": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "1d7dae618dc44c0a8cc5333c0dd50426": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "2cf432f0ac954dd9a3eaccf838f1e8c3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f114804ff9b84ec39200c1de4369cdf0", "placeholder": "​", "style": "IPY_MODEL_8ffd706864094cb98336319a129ebab8", "value": " 12.1k/12.1k [00:00<00:00, 953kB/s]" } }, "31c93f3c77e04d87af853c5a37930094": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "33df2f043f7a436ea99d9ca996c5937f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "350b123f18974e51b5a36f237e2542ff": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "3660064a7c494900b6e4a883e54ffa4b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "39b031adbefa42068d67741f468191b9": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "43ab77d982ee46649281940590ae1c80": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5b6cab41f8e0484d8d616c599a18a8d5", "placeholder": "​", "style": "IPY_MODEL_750afffa61e84afe844fc875d1f68cef", "value": "README.md: 100%" } }, "46a289e79c1044d2b9a363e3f87a3a75": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "51a54868553d49d7ba71109e741de712": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "51f24fcebc1d46e788b3e9d655f58ca0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "53ca5f02ab1d4a9c9b0a26f206959de2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_43ab77d982ee46649281940590ae1c80", "IPY_MODEL_d98252660e3549348bf3fc15a831ade7", "IPY_MODEL_2cf432f0ac954dd9a3eaccf838f1e8c3" ], "layout": "IPY_MODEL_787b0acbb6094978bf4db4e04efbc52b" } }, "5627dee200cf4f398bad6b0be953b174": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5798ab828a134621b0b4b2ea3df3a106": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5b6cab41f8e0484d8d616c599a18a8d5": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5d61661e28e141f3a5d8b370c5e85138": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "5d9384b309824f6198f2332cd4f3066b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "64e511e57e8f40df999965906fa427d4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5627dee200cf4f398bad6b0be953b174", "max": 15499.0, "min": 0.0, "orientation": "horizontal", "style": "IPY_MODEL_941bb2536d4b4e4d8434ea1db82be185", "value": 15499.0 } }, "69378ccb7f8b4b4ab41dfb2a6c4057c0": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "6e9e325a30ff47e5914c761a10714a4d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_78d6b09826c5461498fe1c0a311d1df6", "placeholder": "​", "style": "IPY_MODEL_8692eb659ea540209cd49b676b904bd4", "value": "Generating validation split: 100%" } }, "706a03d7ff3c429a81ea84f8d572b4d9": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "71791f01fec14bbea3ff9401bd47fa99": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "71f9cef1c6d445cf8d97525ec07313b7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "735f4c43661f4376a02d0513643015f2": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_f910b86651b44dcb8edb2dcffe5380e8", "placeholder": "​", "style": "IPY_MODEL_a07e552a7b274de6804fbfc918955985", "value": " 938k/938k [00:00<00:00, 10.9MB/s]" } }, "750afffa61e84afe844fc875d1f68cef": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "787b0acbb6094978bf4db4e04efbc52b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "78d6b09826c5461498fe1c0a311d1df6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "81a984dbbfa54e8b8f64fbae745422cb": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_6e9e325a30ff47e5914c761a10714a4d", "IPY_MODEL_daee3b9a084c4cc59aa01b073235c3a1", "IPY_MODEL_9529afb656dd450785fd9c7f3b777e65" ], "layout": "IPY_MODEL_152236340fdc4377877824989e647435" } }, "8277b17c63014d99af9e02de45ec3a3d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "8692eb659ea540209cd49b676b904bd4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "874cff55a1b549bea7b6d2c04b7300ac": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "8ffd706864094cb98336319a129ebab8": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "924a299ee90d417e8898f2aa95211e4f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0f9db9d9776340c684a3a8085832fb84", "IPY_MODEL_d092dd4ad34f4581ba7d1e381983350e", "IPY_MODEL_0c9ca1d5768741f6912ddf425e4b23ef" ], "layout": "IPY_MODEL_caf51498261e47d686e0d68df9e9f102" } }, "941bb2536d4b4e4d8434ea1db82be185": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "9529afb656dd450785fd9c7f3b777e65": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_51a54868553d49d7ba71109e741de712", "placeholder": "​", "style": "IPY_MODEL_8277b17c63014d99af9e02de45ec3a3d", "value": " 19528/19528 [00:01<00:00, 18879.69 examples/s]" } }, "96da51b31f4a400597dbed64e9d72060": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "994f439033d64619b17ba0cc308e9375": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "99f9f5bf99804243be6219a6e9758668": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_ac2789dd97054daba193938f0ff05ee6", "max": 938171.0, "min": 0.0, "orientation": "horizontal", "style": "IPY_MODEL_706a03d7ff3c429a81ea84f8d572b4d9", "value": 938171.0 } }, "9ac847c8021a47318d6b5168e9f9c462": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "a07e552a7b274de6804fbfc918955985": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "a4984fc8b5e74b97add3f5c878645643": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_994f439033d64619b17ba0cc308e9375", "placeholder": "​", "style": "IPY_MODEL_1cfc1d8893454daba58af61a5c4ab548", "value": " 10304/10304 [00:00<00:00, 18241.26 examples/s]" } }, "ac2789dd97054daba193938f0ff05ee6": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "aed3969a63ea408f97d4ec661584a6a4": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_c09759f969294433ad6df3fc12801f7d", "max": 1958806.0, "min": 0.0, "orientation": "horizontal", "style": "IPY_MODEL_e64042d87c684fd3b35dcf603f8b369e", "value": 1958806.0 } }, "b0abe16cc7fa4110baab79812481994d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "b5839b51ee1d4222b48ff200fa2aac6f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_0dfdb91685064ca483671e9b44f8bb1d", "IPY_MODEL_64e511e57e8f40df999965906fa427d4", "IPY_MODEL_bcccf22d48a341a293f99d55c51fc8f5" ], "layout": "IPY_MODEL_5d61661e28e141f3a5d8b370c5e85138" } }, "b9df24865a844c93932c9f1123c38cb9": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "bc3b20060f5849e798869ed3bb941188": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "bcccf22d48a341a293f99d55c51fc8f5": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e7170e52f13e4556891ebd418718ed52", "placeholder": "​", "style": "IPY_MODEL_d9d52968afce4226842e2f621065c3f6", "value": " 15.5k/15.5k [00:00<00:00, 1.03MB/s]" } }, "c0816d08d0aa4abab53a947498fa5042": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c09759f969294433ad6df3fc12801f7d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "c66eb5169efb426d96d16afed017685a": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5d9384b309824f6198f2332cd4f3066b", "max": 1778245.0, "min": 0.0, "orientation": "horizontal", "style": "IPY_MODEL_51f24fcebc1d46e788b3e9d655f58ca0", "value": 1778245.0 } }, "c90d0b0fb2564e04af5888246ad7975b": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "cab0d5d32bd04132bdcc5e8d0c7a3d3f": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "caf51498261e47d686e0d68df9e9f102": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "ccf9e9a854204f26945d7c5e4b44d5d3": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "d092dd4ad34f4581ba7d1e381983350e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_b9df24865a844c93932c9f1123c38cb9", "max": 2.0, "min": 0.0, "orientation": "horizontal", "style": "IPY_MODEL_69378ccb7f8b4b4ab41dfb2a6c4057c0", "value": 2.0 } }, "d5ae93a7aff144f0b65353474a1ae76d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_3660064a7c494900b6e4a883e54ffa4b", "placeholder": "​", "style": "IPY_MODEL_71f9cef1c6d445cf8d97525ec07313b7", "value": "Generating test split: 100%" } }, "d6ba2d7e268b43edbf5af4ce7d61f0b7": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d7945159d4774b6b8cb2554077b49b41": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_f4379135f11747968ba20aba5f859d26", "IPY_MODEL_99f9f5bf99804243be6219a6e9758668", "IPY_MODEL_735f4c43661f4376a02d0513643015f2" ], "layout": "IPY_MODEL_46a289e79c1044d2b9a363e3f87a3a75" } }, "d86fc9f3f15348638c4221c7a586e096": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "d98252660e3549348bf3fc15a831ade7": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_5798ab828a134621b0b4b2ea3df3a106", "max": 12098.0, "min": 0.0, "orientation": "horizontal", "style": "IPY_MODEL_d86fc9f3f15348638c4221c7a586e096", "value": 12098.0 } }, "d9a91d5ebdc34305b291ec883256bf84": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "d9d52968afce4226842e2f621065c3f6": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "daee3b9a084c4cc59aa01b073235c3a1": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_e387d353bfc94d13878bdcd835966d6d", "max": 19528.0, "min": 0.0, "orientation": "horizontal", "style": "IPY_MODEL_0135941924744f8d89297029f73f081e", "value": 19528.0 } }, "e387d353bfc94d13878bdcd835966d6d": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e64042d87c684fd3b35dcf603f8b369e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "ProgressStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "ProgressStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "bar_color": null, "description_width": "" } }, "e7170e52f13e4556891ebd418718ed52": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "e87fe085d45c4fa0a0ff2a7f9d3cbf0e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HBoxModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HBoxModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HBoxView", "box_style": "", "children": [ "IPY_MODEL_d5ae93a7aff144f0b65353474a1ae76d", "IPY_MODEL_f01d46c635ab47fe9fe0c2eaa573b901", "IPY_MODEL_a4984fc8b5e74b97add3f5c878645643" ], "layout": "IPY_MODEL_1d7dae618dc44c0a8cc5333c0dd50426" } }, "e9674af7143f4dc7884f63bafb3a453d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } }, "ea4f088430bd4b00a5301589218bd06d": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_d6ba2d7e268b43edbf5af4ce7d61f0b7", "placeholder": "​", "style": "IPY_MODEL_e9674af7143f4dc7884f63bafb3a453d", "value": "dataset_infos.json: 100%" } }, "f01d46c635ab47fe9fe0c2eaa573b901": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "FloatProgressModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "FloatProgressModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "ProgressView", "bar_style": "success", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_39b031adbefa42068d67741f468191b9", "max": 10304.0, "min": 0.0, "orientation": "horizontal", "style": "IPY_MODEL_350b123f18974e51b5a36f237e2542ff", "value": 10304.0 } }, "f114804ff9b84ec39200c1de4369cdf0": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "f4379135f11747968ba20aba5f859d26": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "HTMLModel", "state": { "_dom_classes": [], "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "HTMLModel", "_view_count": null, "_view_module": "@jupyter-widgets/controls", "_view_module_version": "1.5.0", "_view_name": "HTMLView", "description": "", "description_tooltip": null, "layout": "IPY_MODEL_874cff55a1b549bea7b6d2c04b7300ac", "placeholder": "​", "style": "IPY_MODEL_71791f01fec14bbea3ff9401bd47fa99", "value": "tatoeba-test.ara-eng.tsv: 100%" } }, "f910b86651b44dcb8edb2dcffe5380e8": { "model_module": "@jupyter-widgets/base", "model_module_version": "1.2.0", "model_name": "LayoutModel", "state": { "_model_module": "@jupyter-widgets/base", "_model_module_version": "1.2.0", "_model_name": "LayoutModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "LayoutView", "align_content": null, "align_items": null, "align_self": null, "border": null, "bottom": null, "display": null, "flex": null, "flex_flow": null, "grid_area": null, "grid_auto_columns": null, "grid_auto_flow": null, "grid_auto_rows": null, "grid_column": null, "grid_gap": null, "grid_row": null, "grid_template_areas": null, "grid_template_columns": null, "grid_template_rows": null, "height": null, "justify_content": null, "justify_items": null, "left": null, "margin": null, "max_height": null, "max_width": null, "min_height": null, "min_width": null, "object_fit": null, "object_position": null, "order": null, "overflow": null, "overflow_x": null, "overflow_y": null, "padding": null, "right": null, "top": null, "visibility": null, "width": null } }, "fb3ae7f0e2474cbabe473d047dec9f8e": { "model_module": "@jupyter-widgets/controls", "model_module_version": "1.5.0", "model_name": "DescriptionStyleModel", "state": { "_model_module": "@jupyter-widgets/controls", "_model_module_version": "1.5.0", "_model_name": "DescriptionStyleModel", "_view_count": null, "_view_module": "@jupyter-widgets/base", "_view_module_version": "1.2.0", "_view_name": "StyleView", "description_width": "" } } }, "version_major": 2, "version_minor": 0 } } }, "nbformat": 4, "nbformat_minor": 5 }