{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import torch\n", "from utils import Callable_tokenizer, preprocess_en\n", "from models import Seq2seq_with_attention, Encoder, Decoder, Attention\n", "\n", "device = 'cuda' if torch.cuda.is_available() else 'cpu'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "seq2seq_with_attention = torch.load(\"./seq2seq_with_attention_df-CoVoST2_df-opus_seed-123_subword.bin\", map_location=device, weights_only=False)\n", "\n", "en_sp = Callable_tokenizer('./tokenizers/NEW_en_vocab_df-CoVoST2_df-opus_seed-123_vocab-16K_FULL.model')\n", "ar_sp = Callable_tokenizer('./tokenizers/NEW_ar_vocab_df-CoVoST2_df-opus_seed-123_vocab-32K_FULL.model')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "how are you? : كيف حالك حالك\n", "he is a good boy. : فتى ولد طيب\n", "she is a good girl. : فتاه فتاه فتاه\n", "who is a better? : افضل افضل افضل\n" ] } ], "source": [ "# custom_test = 'how are you?'\n", "custom_tests = ['how are you?', 'he is a good boy.', 'she is a good girl.', 'who is a better?']\n", "for custom_test in custom_tests:\n", " preprocessed = preprocess_en(custom_test)\n", " # print(preprocessed)\n", "\n", " en_tokens = torch.tensor(en_sp.user_tokenization(preprocessed)).unsqueeze(0).to(device)\n", " # print(en_tokens)\n", "\n", " maxtries = 30\n", " targets_hat = seq2seq_with_attention.translate(en_tokens, maxtries)\n", " # print(targets_hat)\n", " print(custom_test, ':', ar_sp.decode(targets_hat[1:-1]))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }