diff --git a/.gitignore b/.gitignore
index 7e144ff..d2c9a76 100644
--- a/.gitignore
+++ b/.gitignore
@@ -167,10 +167,89 @@ dmypy.json
 # Cython debug symbols
 cython_debug/
 
-# PyCharm
-#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
-#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
-#  and can be added to the global gitignore or merged into this file.  For a more nuclear
-#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
-#.idea/
+# Covers JetBrains IDEs: IntelliJ, GoLand, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+
+# AWS User-specific
+.idea/**/aws.xml
+
+# Generated files
+.idea/**/contentModel.xml
+
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn.  Uncomment if using
+# auto-import.
+# .idea/artifacts
+# .idea/compiler.xml
+# .idea/jarRepositories.xml
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+# *.iml
+# *.ipr
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# SonarLint plugin
+.idea/sonarlint/
+.idea/sonarlint.xml # see https://community.sonarsource.com/t/is-the-file-idea-idea-idea-sonarlint-xml-intended-to-be-under-source-control/121119
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based HTTP Client
+.idea/httpRequests
+http-client.private.env.json
+
+# Android studio 3.1+ serialized cache file
+.idea/caches/build_file_checksums.ser
+
+# Apifox Helper cache
+.idea/.cache/.Apifox_Helper
+.idea/ApifoxUploaderProjectSetting.xml
+
+# Github Copilot persisted session migrations, see: https://github.com/microsoft/copilot-intellij-feedback/issues/712#issuecomment-3322062215
+.idea/**/copilot.data.migration.*.xml
\ No newline at end of file
diff --git a/Building an LLM.md b/Building an LLM.md
new file mode 100644
index 0000000..8f2462e
--- /dev/null
+++ b/Building an LLM.md	
@@ -0,0 +1,259 @@
+# 🧠 LLM Mini Project — Step-by-Step Checklist
+
+---
+
+## 📦 0. Setup Environment
+
+- [ ] Create a new project folder
+- [ ] Set up a virtual environment
+- [ ] Install core dependencies:
+  - [ ] torch
+  - [ ] transformers
+  - [ ] datasets
+  - [ ] accelerate
+  - [ ] peft (for LoRA later)
+  - [ ] bitsandbytes (for quantization later)
+- [ ] Confirm GPU is available (`torch.cuda.is_available()`)
+
+---
+
+## 🔍 1. Understand the Problem (don’t skip this)
+
+- [ ] Write down in your own words:
+  - [ ] What is a language model?
+  - [ ] What does “predict next token” actually mean?
+- [ ] Manually inspect:
+  - [ ] A sample sentence
+  - [ ] Its tokenized form
+- [ ] Verify:
+  - [ ] Input tokens vs target tokens (shifted by 1)
+
+---
+
+## 📚 2. Load Dataset
+
+- [ ] Choose dataset:
+  - [ ] Start with WikiText-2
+- [ ] Load dataset using `datasets`
+- [ ] Print:
+  - [ ] A few raw samples
+- [ ] Check:
+  - [ ] Dataset size
+  - [ ] Train/validation split
+
+---
+
+## 🔢 3. Tokenization
+
+- [ ] Load GPT-2 tokenizer
+- [ ] Tokenize dataset:
+  - [ ] Apply truncation
+  - [ ] Apply padding
+- [ ] Verify:
+  - [ ] Shape of tokenized output
+  - [ ] Decode tokens back to text (sanity check)
+
+---
+
+## 🧱 4. Prepare Training Data
+
+- [ ] Convert dataset to PyTorch format
+- [ ] Create DataLoader:
+  - [ ] Set batch size (start small: 2–8)
+- [ ] Confirm:
+  - [ ] Batches load correctly
+  - [ ] Tensor shapes are consistent
+
+---
+
+## 🤖 5. Load Model
+
+- [ ] Load pretrained GPT-2 small
+- [ ] Move model to GPU (if available)
+- [ ] Print:
+  - [ ] Model size (parameters)
+- [ ] Run a single forward pass to confirm:
+  - [ ] No errors
+
+---
+
+## 🔁 6. Build Training Loop (core understanding)
+
+- [ ] Write your own training loop (no Trainer API yet)
+- [ ] Include:
+  - [ ] Forward pass
+  - [ ] Loss calculation
+  - [ ] Backpropagation
+  - [ ] Optimizer step
+- [ ] Print:
+  - [ ] Loss every few steps
+
+---
+
+## 📉 7. Observe Training Behaviour
+
+- [ ] Track:
+  - [ ] Training loss over time
+- [ ] Answer:
+  - [ ] Is loss decreasing?
+  - [ ] Is it noisy or stable?
+- [ ] (Optional)
+  - [ ] Plot loss curve
+
+---
+
+## 🧪 8. Evaluate Model
+
+- [ ] Generate text from model:
+  - [ ] Before training
+  - [ ] After training
+- [ ] Compare:
+  - [ ] Coherence
+  - [ ] Structure
+- [ ] Note:
+  - [ ] Any overfitting signs (repetition, memorization)
+
+---
+
+## ⚖️ 9. Try LoRA Fine-Tuning
+
+- [ ] Add LoRA using `peft`
+- [ ] Freeze base model weights
+- [ ] Train only adapter layers
+- [ ] Compare vs full fine-tuning:
+  - [ ] Speed
+  - [ ] Memory usage
+  - [ ] Output quality
+
+---
+
+## 🧠 10. Understand Convergence
+
+- [ ] Identify:
+  - [ ] When loss plateaus
+- [ ] Check validation loss:
+  - [ ] Does it increase? (overfitting)
+- [ ] Write down:
+  - [ ] What “good training” looks like
+
+---
+
+## ⚙️ 11. Model Saving & Loading
+
+- [ ] Save:
+  - [ ] Model weights
+  - [ ] Tokenizer
+- [ ] Reload model
+- [ ] Confirm:
+  - [ ] Outputs remain consistent
+
+---
+
+# 🚀 PART 2 — Infrastructure & Serving
+
+---
+
+## 🧠 12. Understand Inference Flow
+
+- [ ] Write down:
+  - [ ] Steps from input → output
+- [ ] Measure:
+  - [ ] Time taken for a single generation
+
+---
+
+## ⚡ 13. Optimize Inference
+
+- [ ] Test batching:
+  - [ ] Multiple inputs at once
+- [ ] Compare:
+  - [ ] Latency vs throughput
+
+---
+
+## 🧮 14. Apply Quantization
+
+- [ ] Load model in:
+  - [ ] 8-bit
+  - [ ] (Optional) 4-bit
+- [ ] Compare:
+  - [ ] Memory usage
+  - [ ] Speed
+  - [ ] Output quality
+
+---
+
+## 🖥️ 15. Simulate Real-World Usage
+
+- [ ] Pretend you have:
+  - [ ] Multiple users hitting your model
+- [ ] Think through:
+  - [ ] How would you queue requests?
+  - [ ] When would you batch?
+  - [ ] When would you scale?
+
+---
+
+## ☁️ 16. Understand Infra Concepts
+
+- [ ] Research:
+  - [ ] GPU provisioning
+  - [ ] Autoscaling
+  - [ ] Model warm starts
+- [ ] Understand:
+  - [ ] Why loading time matters
+  - [ ] Why GPUs shouldn’t sit idle
+
+---
+
+## 🧬 17. (Bonus) DICOM Exploration
+
+- [ ] Learn:
+  - [ ] What DICOM files are
+- [ ] Think:
+  - [ ] How LLMs could be used with medical data
+- [ ] Note:
+  - [ ] Privacy + domain challenges
+
+---
+
+## ✍️ 18. Write Your Blog
+
+### Structure
+
+- [ ] Introduction:
+  - [ ] What is an LLM really?
+- [ ] Training:
+  - [ ] Tokenization
+  - [ ] Training loop
+  - [ ] Loss behaviour
+- [ ] Fine-tuning:
+  - [ ] Full vs LoRA
+- [ ] Challenges:
+  - [ ] What went wrong
+- [ ] Infrastructure:
+  - [ ] Serving challenges
+  - [ ] Batching
+  - [ ] Quantization
+- [ ] Key Learnings:
+  - [ ] What surprised you
+  - [ ] What actually matters
+
+---
+
+## ✅ Final Deliverables
+
+- [ ] Working training script
+- [ ] LoRA vs full fine-tune comparison
+- [ ] Basic inference script
+- [ ] Blog post (clear + honest)
+- [ ] Notes showing your understanding
+
+---
+
+## ⚠️ Keep Yourself Honest
+
+- [ ] Can you explain the training loop without looking?
+- [ ] Do you understand why loss decreases?
+- [ ] Can you explain batching vs latency tradeoffs?
+- [ ] Do you know what would break at scale?
\ No newline at end of file
diff --git a/LLM-gpt.ipynb b/LLM-gpt.ipynb
new file mode 100644
index 0000000..40e9e03
--- /dev/null
+++ b/LLM-gpt.ipynb
@@ -0,0 +1,1445 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "14af33bd",
+   "metadata": {
+    "id": "56960f30a3439f44",
+    "papermill": {
+     "duration": 0.005418,
+     "end_time": "2026-04-11T13:53:13.487797+00:00",
+     "exception": false,
+     "start_time": "2026-04-11T13:53:13.482379+00:00",
+     "status": "completed"
+    },
+    "tags": []
+   },
+   "source": [
+    "# Training GPT2 on a wikipedia data to understand how to finetune a foundational model\n",
+    "\n",
+    "## Tokenization of the data\n",
+    "\n",
+    "So we need to tokenize the data using the byte pair encoding method to get the training data ready. The model does not understand UTF-8 characters but can make sense of the raw UTF-8 bytes that you can encode using the byte pair encoding method."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35498829",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-04-10T18:41:26.391038949Z",
+     "start_time": "2026-04-10T18:41:23.168674202Z"
+    },
+    "execution": {
+     "iopub.execute_input": "2026-04-11T12:30:46.405141Z",
+     "iopub.status.busy": "2026-04-11T12:30:46.404565Z",
+     "iopub.status.idle": "2026-04-11T12:30:51.811305Z",
+     "shell.execute_reply": "2026-04-11T12:30:51.810501Z",
+     "shell.execute_reply.started": "2026-04-11T12:30:46.405105Z"
+    },
+    "id": "e826153a4c898239",
+    "outputId": "56e438e4-47ed-49a7-e514-d8a61bd32c94",
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": false,
+     "start_time": "2026-04-11T13:53:13.492503+00:00",
+     "status": "running"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "dataset = load_dataset(\"wikitext\", \"wikitext-2-raw-v1\")\n",
+    "dataset = dataset.filter(lambda x: len(x[\"text\"].strip()) > 0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cda2e2f4",
+   "metadata": {
+    "id": "27c55eaf29e555a2",
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "source": [
+    "Let's see if there's some data for us to use"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3125c85f",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-04-10T18:41:26.542937056Z",
+     "start_time": "2026-04-10T18:41:26.411012195Z"
+    },
+    "execution": {
+     "iopub.execute_input": "2026-04-11T12:30:51.813142Z",
+     "iopub.status.busy": "2026-04-11T12:30:51.812559Z",
+     "iopub.status.idle": "2026-04-11T12:30:51.819801Z",
+     "shell.execute_reply": "2026-04-11T12:30:51.818873Z",
+     "shell.execute_reply.started": "2026-04-11T12:30:51.813098Z"
+    },
+    "id": "b1efb38c36d2dd42",
+    "outputId": "826d3ede-0e4b-483f-de7d-1fd2f294cb14",
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for i in range(10):\n",
+    "    print(dataset[\"train\"][i])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c3cf0915",
+   "metadata": {
+    "id": "e8119df124aca910",
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Now time to tokenize the data and then chunk the data\n",
+    "We need to tokenize the data so that it can be understood by the model. This is an important step as the model doesn't understnad plain text"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4b81928a",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-04-10T18:53:46.503479303Z",
+     "start_time": "2026-04-10T18:53:43.641312576Z"
+    },
+    "execution": {
+     "iopub.execute_input": "2026-04-11T12:30:51.820972Z",
+     "iopub.status.busy": "2026-04-11T12:30:51.820748Z",
+     "iopub.status.idle": "2026-04-11T12:31:01.890648Z",
+     "shell.execute_reply": "2026-04-11T12:31:01.889735Z",
+     "shell.execute_reply.started": "2026-04-11T12:30:51.820951Z"
+    },
+    "id": "252fb89c3351bf1e",
+    "outputId": "06f9f922-c233-4445-bedd-e616bd871dbb",
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from transformers import GPT2Tokenizer, GPT2LMHeadModel\n",
+    "\n",
+    "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
+    "tokenizer.pad_token = tokenizer.eos_token\n",
+    "\n",
+    "\n",
+    "def tokenize(data):\n",
+    "    return tokenizer(data[\"text\"], max_length=128)\n",
+    "\n",
+    "\n",
+    "def combine(data):\n",
+    "    concatenated = {}\n",
+    "    # Let's concat the data (tokens)\n",
+    "    for k, lists in data.items():\n",
+    "        combined = []\n",
+    "        for lst in lists:\n",
+    "            combined.extend(lst)\n",
+    "        concatenated[k] = combined\n",
+    "\n",
+    "    total_length = len(concatenated[\"input_ids\"])\n",
+    "\n",
+    "    total_length = (total_length // 128) * 128\n",
+    "\n",
+    "    result = {}\n",
+    "\n",
+    "    # split into chunks\n",
+    "    for k, lst in concatenated.items():\n",
+    "        chunks = []\n",
+    "        for l in range(0, total_length, 128):\n",
+    "            chunks.append(lst[l:l + 128])\n",
+    "        result[k] = chunks\n",
+    "\n",
+    "    result[\"labels\"] = result[\"input_ids\"].copy()\n",
+    "    return result\n",
+    "\n",
+    "\n",
+    "tokenized_datasets = dataset.map(tokenize, batched=True, remove_columns=[\"text\"])\n",
+    "\n",
+    "training_dataset = tokenized_datasets.map(combine, batched=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72c4e93f",
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2026-04-10T18:41:33.790641588Z",
+     "start_time": "2026-04-10T18:41:33.561714562Z"
+    },
+    "execution": {
+     "iopub.execute_input": "2026-04-11T12:31:01.893004Z",
+     "iopub.status.busy": "2026-04-11T12:31:01.892278Z",
+     "iopub.status.idle": "2026-04-11T12:31:01.901272Z",
+     "shell.execute_reply": "2026-04-11T12:31:01.900470Z",
+     "shell.execute_reply.started": "2026-04-11T12:31:01.892976Z"
+    },
+    "id": "b47c2da1cef52c2e",
+    "outputId": "a5ef3e2d-defa-4a1a-f040-03ccfc7a91df",
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "for i in range(10):\n",
+    "    print(len(training_dataset[\"train\"][i][\"input_ids\"]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "af6d3ad7",
+   "metadata": {
+    "id": "bfd3873f9f578429",
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "source": [
+    "## Model loop\n",
+    "Now we need to create our training loop for GPT2 using the pretrained model account for back propagation, loss and number of epochs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "41b3d0ab",
+   "metadata": {
+    "execution": {
+     "iopub.execute_input": "2026-04-11T12:31:01.902695Z",
+     "iopub.status.busy": "2026-04-11T12:31:01.902379Z"
+    },
+    "id": "8208e77b670a0eb2",
+    "outputId": "005113d2-3641-47a9-9c34-fbff7d36ac05",
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import time\n",
+    "import torch.optim as optim\n",
+    "from torch.utils.data import DataLoader\n",
+    "\n",
+    "model = GPT2LMHeadModel.from_pretrained(\"gpt2\")\n",
+    "model.resize_token_embeddings(len(tokenizer))\n",
+    "\n",
+    "training_data = DataLoader(training_dataset[\"train\"], batch_size=2, shuffle=True)\n",
+    "training_dataset.set_format(type=\"torch\")\n",
+    "optimizer = optim.AdamW(model.parameters(), lr=5e-5)\n",
+    "\n",
+    "device = torch.device(\"cuda\" if torch.cuda.is_available() else \"cpu\")\n",
+    "model.to(device)\n",
+    "\n",
+    "num_epochs = 10\n",
+    "total_start = time.time()\n",
+    "\n",
+    "for epoch in range(num_epochs):\n",
+    "    start = time.time()\n",
+    "\n",
+    "    model.train()\n",
+    "    running_loss = 0.0\n",
+    "\n",
+    "    for batch in training_data:\n",
+    "        input_ids = batch[\"input_ids\"].to(device)\n",
+    "        attention_mask = batch[\"attention_mask\"].to(device)\n",
+    "        labels = batch[\"labels\"].to(device)\n",
+    "\n",
+    "        optimizer.zero_grad()\n",
+    "        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=labels)\n",
+    "        loss = outputs.loss\n",
+    "        loss.backward()\n",
+    "        optimizer.step()\n",
+    "\n",
+    "        running_loss += loss.item()\n",
+    "\n",
+    "    if device.type == \"cuda\": torch.cuda.synchronize()\n",
+    "\n",
+    "    epoch_time = time.time() - start\n",
+    "    avg_time = (time.time() - total_start) / (epoch + 1)\n",
+    "    eta = avg_time * (num_epochs - epoch - 1)\n",
+    "\n",
+    "    print(f\"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(training_data):.4f}, Time: {epoch_time:.2f}s, ETA: {eta/60:.2f}m\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "65323f48",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "output_dir = \"./gpt2_finetuned\"\n",
+    "model.save_pretrained(output_dir)\n",
+    "\n",
+    "tokenizer.save_pretrained(output_dir)\n",
+    "\n",
+    "print(f\"Model saved to {output_dir}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27001a9a",
+   "metadata": {
+    "papermill": {
+     "duration": null,
+     "end_time": null,
+     "exception": null,
+     "start_time": null,
+     "status": "pending"
+    },
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "import shutil\n",
+    "\n",
+    "shutil.make_archive('model_output', 'zip', './gpt2_finetuned')\n",
+    "\n",
+    "print(\"Model zipped and ready for download!\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "gpuType": "T4",
+   "provenance": []
+  },
+  "kaggle": {
+   "accelerator": "nvidiaTeslaT4",
+   "dataSources": [],
+   "dockerImageVersionId": 31328,
+   "isGpuEnabled": true,
+   "isInternetEnabled": true,
+   "language": "python",
+   "sourceType": "notebook"
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.12"
+  },
+  "papermill": {
+   "default_parameters": {},
+   "duration": null,
+   "end_time": null,
+   "environment_variables": {},
+   "exception": null,
+   "input_path": "__notebook__.ipynb",
+   "output_path": "__notebook__.ipynb",
+   "parameters": {},
+   "start_time": "2026-04-11T13:53:10.044748+00:00",
+   "version": "2.7.0"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "03e0356919f14fd685bcc1612c9050d6": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "0741c9735ba24cd8b2e574780ed9bb2c": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "09c6543498e34a368543a953aa4a83ba": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_1c0993d123a74bb0852197162b2e2d3e",
+       "IPY_MODEL_6cd5ddd66dbb4bf1ac3066f268b1b363",
+       "IPY_MODEL_6f265574427b4a73b774de42d12659c2"
+      ],
+      "layout": "IPY_MODEL_5680619f39e94c419be50e40bd1a775a"
+     }
+    },
+    "0be49291f31545f789176d9bc9e642c4": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_434d9c38ce03441f9b69ca86ddfb3c0b",
+      "placeholder": "​",
+      "style": "IPY_MODEL_b54f1d1380b94261b13997f9eceb423e",
+      "value": " 148/148 [00:00&lt;00:00, 614.69it/s, Materializing param=transformer.wte.weight]"
+     }
+    },
+    "0e45bcff086342129fdb48114477f777": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "1c0993d123a74bb0852197162b2e2d3e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_f5b01ae92a974970a4f7b191951840bf",
+      "placeholder": "​",
+      "style": "IPY_MODEL_3fd65bd7c9c64480a99356bc7836640e",
+      "value": "Map: 100%"
+     }
+    },
+    "2349ef76bed847b79b5e7da6540aedef": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "36433e0953664a70981a91977217cf3f": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "3fd65bd7c9c64480a99356bc7836640e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "434d9c38ce03441f9b69ca86ddfb3c0b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "4b279cc9d382415fa1d2cf21a94415f2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_c70ad2b68e9e49649e2bfc71764cb5a0",
+      "max": 148,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_7b6f9b2d74c643ab8fd016d7e8117a53",
+      "value": 148
+     }
+    },
+    "4b60e0df52a7481d8e4b9df0f3171a2d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "54c2e9c7137049ff8beb05bb8c972658": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_0741c9735ba24cd8b2e574780ed9bb2c",
+      "placeholder": "​",
+      "style": "IPY_MODEL_5bdc83d116af40c88bbb879e5f4df01f",
+      "value": "Map: 100%"
+     }
+    },
+    "5680619f39e94c419be50e40bd1a775a": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "5bdc83d116af40c88bbb879e5f4df01f": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "6cd5ddd66dbb4bf1ac3066f268b1b363": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_8215a42be5c64fe9a25d92fc8008fa0b",
+      "max": 23767,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_2349ef76bed847b79b5e7da6540aedef",
+      "value": 23767
+     }
+    },
+    "6f265574427b4a73b774de42d12659c2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_aec637e7513b442c9326632354961ada",
+      "placeholder": "​",
+      "style": "IPY_MODEL_03e0356919f14fd685bcc1612c9050d6",
+      "value": " 23767/23767 [00:02&lt;00:00, 9996.68 examples/s]"
+     }
+    },
+    "7b1dc4f98c654745a1a6ce607a4a9be0": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "FloatProgressModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "FloatProgressModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "ProgressView",
+      "bar_style": "success",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_b26f87cd94374c4985e9d8ea029fcb19",
+      "max": 23767,
+      "min": 0,
+      "orientation": "horizontal",
+      "style": "IPY_MODEL_4b60e0df52a7481d8e4b9df0f3171a2d",
+      "value": 23767
+     }
+    },
+    "7b6f9b2d74c643ab8fd016d7e8117a53": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "ProgressStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "ProgressStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "bar_color": null,
+      "description_width": ""
+     }
+    },
+    "8215a42be5c64fe9a25d92fc8008fa0b": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "8e25812455fd4680b9d664aaecfe47f2": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_54c2e9c7137049ff8beb05bb8c972658",
+       "IPY_MODEL_7b1dc4f98c654745a1a6ce607a4a9be0",
+       "IPY_MODEL_f2157862dc9d4ef294741a2e8a36d97d"
+      ],
+      "layout": "IPY_MODEL_ce23d7097c2a4d189f0e7d9a2c22c9be"
+     }
+    },
+    "8f05f89d52b047368c4405847015de33": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HBoxModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HBoxModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HBoxView",
+      "box_style": "",
+      "children": [
+       "IPY_MODEL_c9173aa946df480d81648bf01808a153",
+       "IPY_MODEL_4b279cc9d382415fa1d2cf21a94415f2",
+       "IPY_MODEL_0be49291f31545f789176d9bc9e642c4"
+      ],
+      "layout": "IPY_MODEL_f12fe68a33ff4a699d24ac0f6b3ba320"
+     }
+    },
+    "aec637e7513b442c9326632354961ada": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b26f87cd94374c4985e9d8ea029fcb19": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "b54f1d1380b94261b13997f9eceb423e": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "c70ad2b68e9e49649e2bfc71764cb5a0": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "c9173aa946df480d81648bf01808a153": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_36433e0953664a70981a91977217cf3f",
+      "placeholder": "​",
+      "style": "IPY_MODEL_caaaadd11e8d49559e8278629f6f0b45",
+      "value": "Loading weights: 100%"
+     }
+    },
+    "caaaadd11e8d49559e8278629f6f0b45": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "DescriptionStyleModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "DescriptionStyleModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "StyleView",
+      "description_width": ""
+     }
+    },
+    "ce23d7097c2a4d189f0e7d9a2c22c9be": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "d5c1213d8f1d420c8835d9d8740aad4d": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f12fe68a33ff4a699d24ac0f6b3ba320": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    },
+    "f2157862dc9d4ef294741a2e8a36d97d": {
+     "model_module": "@jupyter-widgets/controls",
+     "model_module_version": "1.5.0",
+     "model_name": "HTMLModel",
+     "state": {
+      "_dom_classes": [],
+      "_model_module": "@jupyter-widgets/controls",
+      "_model_module_version": "1.5.0",
+      "_model_name": "HTMLModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/controls",
+      "_view_module_version": "1.5.0",
+      "_view_name": "HTMLView",
+      "description": "",
+      "description_tooltip": null,
+      "layout": "IPY_MODEL_d5c1213d8f1d420c8835d9d8740aad4d",
+      "placeholder": "​",
+      "style": "IPY_MODEL_0e45bcff086342129fdb48114477f777",
+      "value": " 23767/23767 [00:08&lt;00:00, 3680.56 examples/s]"
+     }
+    },
+    "f5b01ae92a974970a4f7b191951840bf": {
+     "model_module": "@jupyter-widgets/base",
+     "model_module_version": "1.2.0",
+     "model_name": "LayoutModel",
+     "state": {
+      "_model_module": "@jupyter-widgets/base",
+      "_model_module_version": "1.2.0",
+      "_model_name": "LayoutModel",
+      "_view_count": null,
+      "_view_module": "@jupyter-widgets/base",
+      "_view_module_version": "1.2.0",
+      "_view_name": "LayoutView",
+      "align_content": null,
+      "align_items": null,
+      "align_self": null,
+      "border": null,
+      "bottom": null,
+      "display": null,
+      "flex": null,
+      "flex_flow": null,
+      "grid_area": null,
+      "grid_auto_columns": null,
+      "grid_auto_flow": null,
+      "grid_auto_rows": null,
+      "grid_column": null,
+      "grid_gap": null,
+      "grid_row": null,
+      "grid_template_areas": null,
+      "grid_template_columns": null,
+      "grid_template_rows": null,
+      "height": null,
+      "justify_content": null,
+      "justify_items": null,
+      "left": null,
+      "margin": null,
+      "max_height": null,
+      "max_width": null,
+      "min_height": null,
+      "min_width": null,
+      "object_fit": null,
+      "object_position": null,
+      "order": null,
+      "overflow": null,
+      "overflow_x": null,
+      "overflow_y": null,
+      "padding": null,
+      "right": null,
+      "top": null,
+      "visibility": null,
+      "width": null
+     }
+    }
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
\ No newline at end of file
diff --git a/model_output_10_epochs.zip b/model_output_10_epochs.zip
new file mode 100644
index 0000000..d9d4a05
Binary files /dev/null and b/model_output_10_epochs.zip differ