Delete assets/basic_inference_llama_2_70b_dolphin.ipynb

Browse files

Files changed (1) hide show

assets/basic_inference_llama_2_70b_dolphin.ipynb +0 -355

assets/basic_inference_llama_2_70b_dolphin.ipynb DELETED Viewed

@@ -1,355 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "6f46e840-8a7f-4be2-a082-49b9ebf5a8c5",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.1.2\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpython3 -m pip install --upgrade pip\u001b[0m\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install -q -U huggingface_hub peft transformers torch accelerate\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "id": "2d2918a1-d701-4a66-946c-6f668cb4ac1e",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Mon Jul 24 21:41:13 2023       \n",
-      "+-----------------------------------------------------------------------------+\n",
-      "| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |\n",
-      "|-------------------------------+----------------------+----------------------+\n",
-      "| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |\n",
-      "| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |\n",
-      "|                               |                      |               MIG M. |\n",
-      "|===============================+======================+======================|\n",
-      "|   0  NVIDIA H100 PCIe    On   | 00000000:06:00.0 Off |                    0 |\n",
-      "| N/A   39C    P0    52W / 350W |      0MiB / 81559MiB |      0%      Default |\n",
-      "|                               |                      |             Disabled |\n",
-      "+-------------------------------+----------------------+----------------------+\n",
-      "                                                                               \n",
-      "+-----------------------------------------------------------------------------+\n",
-      "| Processes:                                                                  |\n",
-      "|  GPU   GI   CI        PID   Type   Process name                  GPU Memory |\n",
-      "|        ID   ID                                                   Usage      |\n",
-      "|=============================================================================|\n",
-      "|  No running processes found                                                 |\n",
-      "+-----------------------------------------------------------------------------+\n"
-     ]
-    }
-   ],
-   "source": [
-    "!nvidia-smi"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "0afdf8a6-ea7d-44ab-a1f9-a19e550e9dbd",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ubuntu/.local/lib/python3.8/site-packages/pandas/core/computation/expressions.py:20: UserWarning: Pandas requires version '2.7.3' or newer of 'numexpr' (version '2.7.1' currently installed).\n",
-      "  from pandas.core.computation.check import NUMEXPR_INSTALLED\n"
-     ]
-    }
-   ],
-   "source": [
-    "import torch\n",
-    "from peft import PeftModel, PeftConfig\n",
-    "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "adfcd11e-8d98-4cf3-abf4-e9fa933eb0d6",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "7dc80313fdcd41a5a7ee168956df3dd9",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "from huggingface_hub import notebook_login\n",
-    "\n",
-    "notebook_login()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "id": "82cfa4fb-af16-4927-82c4-1fbf0fa84bfa",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/ubuntu/.local/lib/python3.8/site-packages/transformers/modeling_utils.py:2193: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "d0f18088e32f4d4b857d2de5430528d4",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Loading checkpoint shards:   0%|          | 0/15 [00:00<?, ?it/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "# peft_model_id = \"results/checkpoint-12500\"\n",
-    "peft_model_id = \"dfurman/llama-2-70b-dolphin-peft\"\n",
-    "config = PeftConfig.from_pretrained(peft_model_id)\n",
-    "\n",
-    "bnb_config = BitsAndBytesConfig(\n",
-    "    load_in_4bit=True,\n",
-    "    bnb_4bit_quant_type=\"nf4\",\n",
-    "    bnb_4bit_compute_dtype=torch.bfloat16,\n",
-    ")\n",
-    "\n",
-    "model = AutoModelForCausalLM.from_pretrained(\n",
-    "    config.base_model_name_or_path,\n",
-    "    quantization_config=bnb_config,\n",
-    "    use_auth_token=True,\n",
-    "    torch_dtype=torch.bfloat16,\n",
-    "    device_map=\"auto\",\n",
-    ")\n",
-    "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
-    "tokenizer.pad_token = tokenizer.eos_token\n",
-    "\n",
-    "# Load the Lora model\n",
-    "model = PeftModel.from_pretrained(model, peft_model_id)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "d86f6a79-95f2-4e05-9bc7-3cbcbbbc9552",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# text generation function\n",
-    "\n",
-    "\n",
-    "def llama_generate(\n",
-    "    model: AutoModelForCausalLM,\n",
-    "    tokenizer: AutoTokenizer,\n",
-    "    prompt: str,\n",
-    "    max_new_tokens: int = 128,\n",
-    "    temperature: int = 1.0,\n",
-    ") -> str:\n",
-    "    \"\"\"\n",
-    "    Initialize the pipeline\n",
-    "    Uses Hugging Face GenerationConfig defaults\n",
-    "        https://huggingface.co/docs/transformers/v4.29.1/en/main_classes/text_generation#transformers.GenerationConfig\n",
-    "    Args:\n",
-    "        model (transformers.AutoModelForCausalLM): Falcon model for text generation\n",
-    "        tokenizer (transformers.AutoTokenizer): Tokenizer for model\n",
-    "        prompt (str): Prompt for text generation\n",
-    "        max_new_tokens (int, optional): Max new tokens after the prompt to generate. Defaults to 128.\n",
-    "        temperature (float, optional): The value used to modulate the next token probabilities.\n",
-    "            Defaults to 1.0\n",
-    "    \"\"\"\n",
-    "    device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
-    "\n",
-    "    inputs = tokenizer(\n",
-    "        [prompt],\n",
-    "        return_tensors=\"pt\",\n",
-    "        return_token_type_ids=False,\n",
-    "    ).to(\n",
-    "        device\n",
-    "    )  # tokenize inputs, load on device\n",
-    "\n",
-    "    # when running Torch modules in lower precision, it is best practice to use the torch.autocast context manager.\n",
-    "    with torch.autocast(\"cuda\", dtype=torch.bfloat16):\n",
-    "        response = model.generate(\n",
-    "            **inputs,\n",
-    "            max_new_tokens=max_new_tokens,\n",
-    "            temperature=temperature,\n",
-    "            return_dict_in_generate=True,\n",
-    "            eos_token_id=tokenizer.eos_token_id,\n",
-    "            pad_token_id=tokenizer.pad_token_id,\n",
-    "        )\n",
-    "\n",
-    "    decoded_output = tokenizer.decode(\n",
-    "        response[\"sequences\"][0],\n",
-    "        skip_special_tokens=True,\n",
-    "    )  # grab output in natural language\n",
-    "\n",
-    "    return decoded_output[len(prompt) :]  # remove prompt from output"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "id": "28be263a-dd15-419f-a67e-7ca05b27435f",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Sure! Here's a delicious and easy vegan banana bread recipe:\n",
-      "\n",
-      "Ingredients:\n",
-      "- 2 cups all-purpose flour\n",
-      "- 1/2 cup sugar\n",
-      "- 1/2 cup vegan butter (such as Earth Balance)\n",
-      "- 1/2 cup vegan milk (such as almond milk)\n",
-      "- 1/2 cup unsweetened applesauce\n",
-      "- 1/2 cup mashed ripe bananas (about 2 medium bananas)\n",
-      "- 1 teaspoon baking soda\n",
-      "- 1/2 teaspoon salt\n",
-      "- 1/2 teaspoon ground cinnamon\n",
-      "- 1/2 teaspoon ground nutmeg\n",
-      "- 1/2 teaspoon ground cloves\n",
-      "- 1/2 cup chopped walnuts (optional)\n",
-      "\n",
-      "Instructions:\n",
-      "1. Preheat the oven to 350°F (175°C). Grease a 9x5-inch loaf pan with vegan butter or cooking spray.\n",
-      "2. In a large bowl, mix together the flour, sugar, vegan butter, vegan milk, applesauce, bananas, baking soda, salt, cinnamon, nutmeg, and cloves. Stir until well combined.\n",
-      "3. Fold in the chopped walnuts, if using.\n",
-      "4. Pour the batter into the prepared loaf pan.\n",
-      "5. Bake for 50-60 minutes, or until a toothpick inserted into the center of the bread comes out clean.\n",
-      "6. Let the bread cool in the pan for 10 minutes before transferring it to a wire rack to cool completely.\n",
-      "7. Slice and enjoy!\n",
-      "\n",
-      "Note: You can also add chocolate chips, dried fruit, or other mix-ins to the batter for extra flavor and texture. Enjoy your vegan banana bread!\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "prompt = \"You are a helpful assistant. Tell me a recipe for vegan banana bread.\\n\"\n",
-    "\n",
-    "response = llama_generate(\n",
-    "    model,\n",
-    "    tokenizer,\n",
-    "    prompt,\n",
-    "    max_new_tokens=500,\n",
-    "    temperature=0.92,\n",
-    ")\n",
-    "\n",
-    "print(response)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3625b3ff-6467-43ea-8557-9541934539ec",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.10"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}