diff --git a/.gitignore b/.gitignore index 7e144ff..d2c9a76 100644 --- a/.gitignore +++ b/.gitignore @@ -167,10 +167,89 @@ dmypy.json # Cython debug symbols cython_debug/ -# PyCharm -# JetBrains specific template is maintained in a separate JetBrains.gitignore that can -# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore -# and can be added to the global gitignore or merged into this file. For a more nuclear -# option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +# Covers JetBrains IDEs: IntelliJ, GoLand, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider +# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 +# User-specific stuff +.idea/**/workspace.xml +.idea/**/tasks.xml +.idea/**/usage.statistics.xml +.idea/**/dictionaries +.idea/**/shelf + +# AWS User-specific +.idea/**/aws.xml + +# Generated files +.idea/**/contentModel.xml + +# Sensitive or high-churn files +.idea/**/dataSources/ +.idea/**/dataSources.ids +.idea/**/dataSources.local.xml +.idea/**/sqlDataSources.xml +.idea/**/dynamic.xml +.idea/**/uiDesigner.xml +.idea/**/dbnavigator.xml + +# Gradle +.idea/**/gradle.xml +.idea/**/libraries + +# Gradle and Maven with auto-import +# When using Gradle or Maven with auto-import, you should exclude module files, +# since they will be recreated, and may cause churn. Uncomment if using +# auto-import. +# .idea/artifacts +# .idea/compiler.xml +# .idea/jarRepositories.xml +# .idea/modules.xml +# .idea/*.iml +# .idea/modules +# *.iml +# *.ipr + +# CMake +cmake-build-*/ + +# Mongo Explorer plugin +.idea/**/mongoSettings.xml + +# File-based project format +*.iws + +# IntelliJ +out/ + +# mpeltonen/sbt-idea plugin +.idea_modules/ + +# JIRA plugin +atlassian-ide-plugin.xml + +# Cursive Clojure plugin +.idea/replstate.xml + +# SonarLint plugin +.idea/sonarlint/ +.idea/sonarlint.xml # see https://community.sonarsource.com/t/is-the-file-idea-idea-idea-sonarlint-xml-intended-to-be-under-source-control/121119 + +# Crashlytics plugin (for Android Studio and IntelliJ) +com_crashlytics_export_strings.xml +crashlytics.properties +crashlytics-build.properties +fabric.properties + +# Editor-based HTTP Client +.idea/httpRequests +http-client.private.env.json + +# Android studio 3.1+ serialized cache file +.idea/caches/build_file_checksums.ser + +# Apifox Helper cache +.idea/.cache/.Apifox_Helper +.idea/ApifoxUploaderProjectSetting.xml + +# Github Copilot persisted session migrations, see: https://github.com/microsoft/copilot-intellij-feedback/issues/712#issuecomment-3322062215 +.idea/**/copilot.data.migration.*.xml \ No newline at end of file diff --git a/Building an LLM.md b/Building an LLM.md new file mode 100644 index 0000000..8f2462e --- /dev/null +++ b/Building an LLM.md @@ -0,0 +1,259 @@ +# 🧠 LLM Mini Project — Step-by-Step Checklist + +--- + +## 📦 0. Setup Environment + +- [ ] Create a new project folder +- [ ] Set up a virtual environment +- [ ] Install core dependencies: + - [ ] torch + - [ ] transformers + - [ ] datasets + - [ ] accelerate + - [ ] peft (for LoRA later) + - [ ] bitsandbytes (for quantization later) +- [ ] Confirm GPU is available (`torch.cuda.is_available()`) + +--- + +## 🔍 1. Understand the Problem (don’t skip this) + +- [ ] Write down in your own words: + - [ ] What is a language model? + - [ ] What does “predict next token” actually mean? +- [ ] Manually inspect: + - [ ] A sample sentence + - [ ] Its tokenized form +- [ ] Verify: + - [ ] Input tokens vs target tokens (shifted by 1) + +--- + +## 📚 2. Load Dataset + +- [ ] Choose dataset: + - [ ] Start with WikiText-2 +- [ ] Load dataset using `datasets` +- [ ] Print: + - [ ] A few raw samples +- [ ] Check: + - [ ] Dataset size + - [ ] Train/validation split + +--- + +## 🔢 3. Tokenization + +- [ ] Load GPT-2 tokenizer +- [ ] Tokenize dataset: + - [ ] Apply truncation + - [ ] Apply padding +- [ ] Verify: + - [ ] Shape of tokenized output + - [ ] Decode tokens back to text (sanity check) + +--- + +## 🧱 4. Prepare Training Data + +- [ ] Convert dataset to PyTorch format +- [ ] Create DataLoader: + - [ ] Set batch size (start small: 2–8) +- [ ] Confirm: + - [ ] Batches load correctly + - [ ] Tensor shapes are consistent + +--- + +## 🤖 5. Load Model + +- [ ] Load pretrained GPT-2 small +- [ ] Move model to GPU (if available) +- [ ] Print: + - [ ] Model size (parameters) +- [ ] Run a single forward pass to confirm: + - [ ] No errors + +--- + +## 🔁 6. Build Training Loop (core understanding) + +- [ ] Write your own training loop (no Trainer API yet) +- [ ] Include: + - [ ] Forward pass + - [ ] Loss calculation + - [ ] Backpropagation + - [ ] Optimizer step +- [ ] Print: + - [ ] Loss every few steps + +--- + +## 📉 7. Observe Training Behaviour + +- [ ] Track: + - [ ] Training loss over time +- [ ] Answer: + - [ ] Is loss decreasing? + - [ ] Is it noisy or stable? +- [ ] (Optional) + - [ ] Plot loss curve + +--- + +## 🧪 8. Evaluate Model + +- [ ] Generate text from model: + - [ ] Before training + - [ ] After training +- [ ] Compare: + - [ ] Coherence + - [ ] Structure +- [ ] Note: + - [ ] Any overfitting signs (repetition, memorization) + +--- + +## ⚖️ 9. Try LoRA Fine-Tuning + +- [ ] Add LoRA using `peft` +- [ ] Freeze base model weights +- [ ] Train only adapter layers +- [ ] Compare vs full fine-tuning: + - [ ] Speed + - [ ] Memory usage + - [ ] Output quality + +--- + +## 🧠 10. Understand Convergence + +- [ ] Identify: + - [ ] When loss plateaus +- [ ] Check validation loss: + - [ ] Does it increase? (overfitting) +- [ ] Write down: + - [ ] What “good training” looks like + +--- + +## ⚙️ 11. Model Saving & Loading + +- [ ] Save: + - [ ] Model weights + - [ ] Tokenizer +- [ ] Reload model +- [ ] Confirm: + - [ ] Outputs remain consistent + +--- + +# 🚀 PART 2 — Infrastructure & Serving + +--- + +## 🧠 12. Understand Inference Flow + +- [ ] Write down: + - [ ] Steps from input → output +- [ ] Measure: + - [ ] Time taken for a single generation + +--- + +## ⚡ 13. Optimize Inference + +- [ ] Test batching: + - [ ] Multiple inputs at once +- [ ] Compare: + - [ ] Latency vs throughput + +--- + +## 🧮 14. Apply Quantization + +- [ ] Load model in: + - [ ] 8-bit + - [ ] (Optional) 4-bit +- [ ] Compare: + - [ ] Memory usage + - [ ] Speed + - [ ] Output quality + +--- + +## 🖥️ 15. Simulate Real-World Usage + +- [ ] Pretend you have: + - [ ] Multiple users hitting your model +- [ ] Think through: + - [ ] How would you queue requests? + - [ ] When would you batch? + - [ ] When would you scale? + +--- + +## ☁️ 16. Understand Infra Concepts + +- [ ] Research: + - [ ] GPU provisioning + - [ ] Autoscaling + - [ ] Model warm starts +- [ ] Understand: + - [ ] Why loading time matters + - [ ] Why GPUs shouldn’t sit idle + +--- + +## 🧬 17. (Bonus) DICOM Exploration + +- [ ] Learn: + - [ ] What DICOM files are +- [ ] Think: + - [ ] How LLMs could be used with medical data +- [ ] Note: + - [ ] Privacy + domain challenges + +--- + +## ✍️ 18. Write Your Blog + +### Structure + +- [ ] Introduction: + - [ ] What is an LLM really? +- [ ] Training: + - [ ] Tokenization + - [ ] Training loop + - [ ] Loss behaviour +- [ ] Fine-tuning: + - [ ] Full vs LoRA +- [ ] Challenges: + - [ ] What went wrong +- [ ] Infrastructure: + - [ ] Serving challenges + - [ ] Batching + - [ ] Quantization +- [ ] Key Learnings: + - [ ] What surprised you + - [ ] What actually matters + +--- + +## ✅ Final Deliverables + +- [ ] Working training script +- [ ] LoRA vs full fine-tune comparison +- [ ] Basic inference script +- [ ] Blog post (clear + honest) +- [ ] Notes showing your understanding + +--- + +## ⚠️ Keep Yourself Honest + +- [ ] Can you explain the training loop without looking? +- [ ] Do you understand why loss decreases? +- [ ] Can you explain batching vs latency tradeoffs? +- [ ] Do you know what would break at scale? \ No newline at end of file diff --git a/LLM-gpt.ipynb b/LLM-gpt.ipynb new file mode 100644 index 0000000..40e9e03 --- /dev/null +++ b/LLM-gpt.ipynb @@ -0,0 +1,1445 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "14af33bd", + "metadata": { + "id": "56960f30a3439f44", + "papermill": { + "duration": 0.005418, + "end_time": "2026-04-11T13:53:13.487797+00:00", + "exception": false, + "start_time": "2026-04-11T13:53:13.482379+00:00", + "status": "completed" + }, + "tags": [] + }, + "source": [ + "# Training GPT2 on a wikipedia data to understand how to finetune a foundational model\n", + "\n", + "## Tokenization of the data\n", + "\n", + "So we need to tokenize the data using the byte pair encoding method to get the training data ready. The model does not understand UTF-8 characters but can make sense of the raw UTF-8 bytes that you can encode using the byte pair encoding method." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35498829", + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-10T18:41:26.391038949Z", + "start_time": "2026-04-10T18:41:23.168674202Z" + }, + "execution": { + "iopub.execute_input": "2026-04-11T12:30:46.405141Z", + "iopub.status.busy": "2026-04-11T12:30:46.404565Z", + "iopub.status.idle": "2026-04-11T12:30:51.811305Z", + "shell.execute_reply": "2026-04-11T12:30:51.810501Z", + "shell.execute_reply.started": "2026-04-11T12:30:46.405105Z" + }, + "id": "e826153a4c898239", + "outputId": "56e438e4-47ed-49a7-e514-d8a61bd32c94", + "papermill": { + "duration": null, + "end_time": null, + "exception": false, + "start_time": "2026-04-11T13:53:13.492503+00:00", + "status": "running" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from datasets import load_dataset\n", + "\n", + "dataset = load_dataset(\"wikitext\", \"wikitext-2-raw-v1\")\n", + "dataset = dataset.filter(lambda x: len(x[\"text\"].strip()) > 0)" + ] + }, + { + "cell_type": "markdown", + "id": "cda2e2f4", + "metadata": { + "id": "27c55eaf29e555a2", + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "pending" + }, + "tags": [] + }, + "source": [ + "Let's see if there's some data for us to use" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3125c85f", + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-10T18:41:26.542937056Z", + "start_time": "2026-04-10T18:41:26.411012195Z" + }, + "execution": { + "iopub.execute_input": "2026-04-11T12:30:51.813142Z", + "iopub.status.busy": "2026-04-11T12:30:51.812559Z", + "iopub.status.idle": "2026-04-11T12:30:51.819801Z", + "shell.execute_reply": "2026-04-11T12:30:51.818873Z", + "shell.execute_reply.started": "2026-04-11T12:30:51.813098Z" + }, + "id": "b1efb38c36d2dd42", + "outputId": "826d3ede-0e4b-483f-de7d-1fd2f294cb14", + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "pending" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "for i in range(10):\n", + " print(dataset[\"train\"][i])" + ] + }, + { + "cell_type": "markdown", + "id": "c3cf0915", + "metadata": { + "id": "e8119df124aca910", + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "pending" + }, + "tags": [] + }, + "source": [ + "## Now time to tokenize the data and then chunk the data\n", + "We need to tokenize the data so that it can be understood by the model. This is an important step as the model doesn't understnad plain text" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b81928a", + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-10T18:53:46.503479303Z", + "start_time": "2026-04-10T18:53:43.641312576Z" + }, + "execution": { + "iopub.execute_input": "2026-04-11T12:30:51.820972Z", + "iopub.status.busy": "2026-04-11T12:30:51.820748Z", + "iopub.status.idle": "2026-04-11T12:31:01.890648Z", + "shell.execute_reply": "2026-04-11T12:31:01.889735Z", + "shell.execute_reply.started": "2026-04-11T12:30:51.820951Z" + }, + "id": "252fb89c3351bf1e", + "outputId": "06f9f922-c233-4445-bedd-e616bd871dbb", + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "pending" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "from transformers import GPT2Tokenizer, GPT2LMHeadModel\n", + "\n", + "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n", + "tokenizer.pad_token = tokenizer.eos_token\n", + "\n", + "\n", + "def tokenize(data):\n", + " return tokenizer(data[\"text\"], max_length=128)\n", + "\n", + "\n", + "def combine(data):\n", + " concatenated = {}\n", + " # Let's concat the data (tokens)\n", + " for k, lists in data.items():\n", + " combined = []\n", + " for lst in lists:\n", + " combined.extend(lst)\n", + " concatenated[k] = combined\n", + "\n", + " total_length = len(concatenated[\"input_ids\"])\n", + "\n", + " total_length = (total_length // 128) * 128\n", + "\n", + " result = {}\n", + "\n", + " # split into chunks\n", + " for k, lst in concatenated.items():\n", + " chunks = []\n", + " for l in range(0, total_length, 128):\n", + " chunks.append(lst[l:l + 128])\n", + " result[k] = chunks\n", + "\n", + " result[\"labels\"] = result[\"input_ids\"].copy()\n", + " return result\n", + "\n", + "\n", + "tokenized_datasets = dataset.map(tokenize, batched=True, remove_columns=[\"text\"])\n", + "\n", + "training_dataset = tokenized_datasets.map(combine, batched=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72c4e93f", + "metadata": { + "ExecuteTime": { + "end_time": "2026-04-10T18:41:33.790641588Z", + "start_time": "2026-04-10T18:41:33.561714562Z" + }, + "execution": { + "iopub.execute_input": "2026-04-11T12:31:01.893004Z", + "iopub.status.busy": "2026-04-11T12:31:01.892278Z", + "iopub.status.idle": "2026-04-11T12:31:01.901272Z", + "shell.execute_reply": "2026-04-11T12:31:01.900470Z", + "shell.execute_reply.started": "2026-04-11T12:31:01.892976Z" + }, + "id": "b47c2da1cef52c2e", + "outputId": "a5ef3e2d-defa-4a1a-f040-03ccfc7a91df", + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "pending" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "for i in range(10):\n", + " print(len(training_dataset[\"train\"][i][\"input_ids\"]))" + ] + }, + { + "cell_type": "markdown", + "id": "af6d3ad7", + "metadata": { + "id": "bfd3873f9f578429", + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "pending" + }, + "tags": [] + }, + "source": [ + "## Model loop\n", + "Now we need to create our training loop for GPT2 using the pretrained model account for back propagation, loss and number of epochs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41b3d0ab", + "metadata": { + "execution": { + "iopub.execute_input": "2026-04-11T12:31:01.902695Z", + "iopub.status.busy": "2026-04-11T12:31:01.902379Z" + }, + "id": "8208e77b670a0eb2", + "outputId": "005113d2-3641-47a9-9c34-fbff7d36ac05", + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "pending" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import torch\n", + "import time\n", + "import torch.optim as optim\n", + "from torch.utils.data import DataLoader\n", + "\n", + "model = GPT2LMHeadModel.from_pretrained(\"gpt2\")\n", + "model.resize_token_embeddings(len(tokenizer))\n", + "\n", + "training_data = DataLoader(training_dataset[\"train\"], batch_size=2, shuffle=True)\n", + "training_dataset.set_format(type=\"torch\")\n", + "optimizer = optim.AdamW(model.parameters(), lr=5e-5)\n", + "\n", + "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n", + "model.to(device)\n", + "\n", + "num_epochs = 10\n", + "total_start = time.time()\n", + "\n", + "for epoch in range(num_epochs):\n", + " start = time.time()\n", + "\n", + " model.train()\n", + " running_loss = 0.0\n", + "\n", + " for batch in training_data:\n", + " input_ids = batch[\"input_ids\"].to(device)\n", + " attention_mask = batch[\"attention_mask\"].to(device)\n", + " labels = batch[\"labels\"].to(device)\n", + "\n", + " optimizer.zero_grad()\n", + " outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)\n", + " loss = outputs.loss\n", + " loss.backward()\n", + " optimizer.step()\n", + "\n", + " running_loss += loss.item()\n", + "\n", + " if device.type == \"cuda\": torch.cuda.synchronize()\n", + "\n", + " epoch_time = time.time() - start\n", + " avg_time = (time.time() - total_start) / (epoch + 1)\n", + " eta = avg_time * (num_epochs - epoch - 1)\n", + "\n", + " print(f\"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(training_data):.4f}, Time: {epoch_time:.2f}s, ETA: {eta/60:.2f}m\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65323f48", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "pending" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "output_dir = \"./gpt2_finetuned\"\n", + "model.save_pretrained(output_dir)\n", + "\n", + "tokenizer.save_pretrained(output_dir)\n", + "\n", + "print(f\"Model saved to {output_dir}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27001a9a", + "metadata": { + "papermill": { + "duration": null, + "end_time": null, + "exception": null, + "start_time": null, + "status": "pending" + }, + "tags": [] + }, + "outputs": [], + "source": [ + "import shutil\n", + "\n", + "shutil.make_archive('model_output', 'zip', './gpt2_finetuned')\n", + "\n", + "print(\"Model zipped and ready for download!\")" + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kaggle": { + "accelerator": "nvidiaTeslaT4", + "dataSources": [], + "dockerImageVersionId": 31328, + "isGpuEnabled": true, + "isInternetEnabled": true, + "language": "python", + "sourceType": "notebook" + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + }, + "papermill": { + "default_parameters": {}, + "duration": null, + "end_time": null, + "environment_variables": {}, + "exception": null, + "input_path": "__notebook__.ipynb", + "output_path": "__notebook__.ipynb", + "parameters": {}, + "start_time": "2026-04-11T13:53:10.044748+00:00", + "version": "2.7.0" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "03e0356919f14fd685bcc1612c9050d6": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "0741c9735ba24cd8b2e574780ed9bb2c": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "09c6543498e34a368543a953aa4a83ba": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_1c0993d123a74bb0852197162b2e2d3e", + "IPY_MODEL_6cd5ddd66dbb4bf1ac3066f268b1b363", + "IPY_MODEL_6f265574427b4a73b774de42d12659c2" + ], + "layout": "IPY_MODEL_5680619f39e94c419be50e40bd1a775a" + } + }, + "0be49291f31545f789176d9bc9e642c4": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_434d9c38ce03441f9b69ca86ddfb3c0b", + "placeholder": "​", + "style": "IPY_MODEL_b54f1d1380b94261b13997f9eceb423e", + "value": " 148/148 [00:00<00:00, 614.69it/s, Materializing param=transformer.wte.weight]" + } + }, + "0e45bcff086342129fdb48114477f777": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "1c0993d123a74bb0852197162b2e2d3e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_f5b01ae92a974970a4f7b191951840bf", + "placeholder": "​", + "style": "IPY_MODEL_3fd65bd7c9c64480a99356bc7836640e", + "value": "Map: 100%" + } + }, + "2349ef76bed847b79b5e7da6540aedef": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "36433e0953664a70981a91977217cf3f": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "3fd65bd7c9c64480a99356bc7836640e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "434d9c38ce03441f9b69ca86ddfb3c0b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "4b279cc9d382415fa1d2cf21a94415f2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_c70ad2b68e9e49649e2bfc71764cb5a0", + "max": 148, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_7b6f9b2d74c643ab8fd016d7e8117a53", + "value": 148 + } + }, + "4b60e0df52a7481d8e4b9df0f3171a2d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "54c2e9c7137049ff8beb05bb8c972658": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_0741c9735ba24cd8b2e574780ed9bb2c", + "placeholder": "​", + "style": "IPY_MODEL_5bdc83d116af40c88bbb879e5f4df01f", + "value": "Map: 100%" + } + }, + "5680619f39e94c419be50e40bd1a775a": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "5bdc83d116af40c88bbb879e5f4df01f": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "6cd5ddd66dbb4bf1ac3066f268b1b363": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_8215a42be5c64fe9a25d92fc8008fa0b", + "max": 23767, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_2349ef76bed847b79b5e7da6540aedef", + "value": 23767 + } + }, + "6f265574427b4a73b774de42d12659c2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_aec637e7513b442c9326632354961ada", + "placeholder": "​", + "style": "IPY_MODEL_03e0356919f14fd685bcc1612c9050d6", + "value": " 23767/23767 [00:02<00:00, 9996.68 examples/s]" + } + }, + "7b1dc4f98c654745a1a6ce607a4a9be0": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "FloatProgressModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "FloatProgressModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "ProgressView", + "bar_style": "success", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_b26f87cd94374c4985e9d8ea029fcb19", + "max": 23767, + "min": 0, + "orientation": "horizontal", + "style": "IPY_MODEL_4b60e0df52a7481d8e4b9df0f3171a2d", + "value": 23767 + } + }, + "7b6f9b2d74c643ab8fd016d7e8117a53": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "ProgressStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "ProgressStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "bar_color": null, + "description_width": "" + } + }, + "8215a42be5c64fe9a25d92fc8008fa0b": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "8e25812455fd4680b9d664aaecfe47f2": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_54c2e9c7137049ff8beb05bb8c972658", + "IPY_MODEL_7b1dc4f98c654745a1a6ce607a4a9be0", + "IPY_MODEL_f2157862dc9d4ef294741a2e8a36d97d" + ], + "layout": "IPY_MODEL_ce23d7097c2a4d189f0e7d9a2c22c9be" + } + }, + "8f05f89d52b047368c4405847015de33": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HBoxModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HBoxModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HBoxView", + "box_style": "", + "children": [ + "IPY_MODEL_c9173aa946df480d81648bf01808a153", + "IPY_MODEL_4b279cc9d382415fa1d2cf21a94415f2", + "IPY_MODEL_0be49291f31545f789176d9bc9e642c4" + ], + "layout": "IPY_MODEL_f12fe68a33ff4a699d24ac0f6b3ba320" + } + }, + "aec637e7513b442c9326632354961ada": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b26f87cd94374c4985e9d8ea029fcb19": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "b54f1d1380b94261b13997f9eceb423e": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "c70ad2b68e9e49649e2bfc71764cb5a0": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "c9173aa946df480d81648bf01808a153": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_36433e0953664a70981a91977217cf3f", + "placeholder": "​", + "style": "IPY_MODEL_caaaadd11e8d49559e8278629f6f0b45", + "value": "Loading weights: 100%" + } + }, + "caaaadd11e8d49559e8278629f6f0b45": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "DescriptionStyleModel", + "state": { + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "DescriptionStyleModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "StyleView", + "description_width": "" + } + }, + "ce23d7097c2a4d189f0e7d9a2c22c9be": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "d5c1213d8f1d420c8835d9d8740aad4d": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f12fe68a33ff4a699d24ac0f6b3ba320": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + }, + "f2157862dc9d4ef294741a2e8a36d97d": { + "model_module": "@jupyter-widgets/controls", + "model_module_version": "1.5.0", + "model_name": "HTMLModel", + "state": { + "_dom_classes": [], + "_model_module": "@jupyter-widgets/controls", + "_model_module_version": "1.5.0", + "_model_name": "HTMLModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/controls", + "_view_module_version": "1.5.0", + "_view_name": "HTMLView", + "description": "", + "description_tooltip": null, + "layout": "IPY_MODEL_d5c1213d8f1d420c8835d9d8740aad4d", + "placeholder": "​", + "style": "IPY_MODEL_0e45bcff086342129fdb48114477f777", + "value": " 23767/23767 [00:08<00:00, 3680.56 examples/s]" + } + }, + "f5b01ae92a974970a4f7b191951840bf": { + "model_module": "@jupyter-widgets/base", + "model_module_version": "1.2.0", + "model_name": "LayoutModel", + "state": { + "_model_module": "@jupyter-widgets/base", + "_model_module_version": "1.2.0", + "_model_name": "LayoutModel", + "_view_count": null, + "_view_module": "@jupyter-widgets/base", + "_view_module_version": "1.2.0", + "_view_name": "LayoutView", + "align_content": null, + "align_items": null, + "align_self": null, + "border": null, + "bottom": null, + "display": null, + "flex": null, + "flex_flow": null, + "grid_area": null, + "grid_auto_columns": null, + "grid_auto_flow": null, + "grid_auto_rows": null, + "grid_column": null, + "grid_gap": null, + "grid_row": null, + "grid_template_areas": null, + "grid_template_columns": null, + "grid_template_rows": null, + "height": null, + "justify_content": null, + "justify_items": null, + "left": null, + "margin": null, + "max_height": null, + "max_width": null, + "min_height": null, + "min_width": null, + "object_fit": null, + "object_position": null, + "order": null, + "overflow": null, + "overflow_x": null, + "overflow_y": null, + "padding": null, + "right": null, + "top": null, + "visibility": null, + "width": null + } + } + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/model_output_10_epochs.zip b/model_output_10_epochs.zip new file mode 100644 index 0000000..d9d4a05 Binary files /dev/null and b/model_output_10_epochs.zip differ