From fcef20ea961a326744f65b6483c79e170ae4d49e Mon Sep 17 00:00:00 2001
From: Yu Li <lyo.gavin@gmail.com>
Date: Mon, 25 Dec 2023 16:28:47 -0600
Subject: [PATCH] fix automodel on macos

---
 air_llm/airllm/__init__.py                  |   1 +
 air_llm/airllm/airllm_llama_mlx.py          |   4 +-
 air_llm/airllm/auto_model.py                |  11 +
 air_llm/examples/run_on_macos.ipynb         | 680 +-------------------
 air_llm/setup.py                            |   2 +-
 air_llm/tests/test_notebooks/test_mlx.ipynb | 680 +-------------------
 6 files changed, 47 insertions(+), 1331 deletions(-)

diff --git a/air_llm/airllm/__init__.py b/air_llm/airllm/__init__.py
index a1d20c4..5c13fc9 100644
--- a/air_llm/airllm/__init__.py
+++ b/air_llm/airllm/__init__.py
@@ -7,6 +7,7 @@ if platform == "darwin":
 
 if is_on_mac_os:
     from .airllm_llama_mlx import AirLLMLlamaMlx
+    from .auto_model import AutoModel
 else:
     from .airllm import AirLLMLlama2
     from .airllm_chatglm import AirLLMChatGLM
diff --git a/air_llm/airllm/airllm_llama_mlx.py b/air_llm/airllm/airllm_llama_mlx.py
index be1098d..55ea7ba 100644
--- a/air_llm/airllm/airllm_llama_mlx.py
+++ b/air_llm/airllm/airllm_llama_mlx.py
@@ -295,7 +295,7 @@ class AirLLMLlamaMlx:
         self.record_memory('after_tok_embeddings')
         #for l in self.layers:
 
-        for il in tqdm(range(self.model_args.n_layers), desc='running layers:'):
+        for il in tqdm(range(self.model_args.n_layers), desc='running layers'):
             self.record_memory(f'before layer {il}')
             l = TransformerBlock(args=self.model_args)
             l.update(
@@ -381,7 +381,7 @@ class AirLLMLlamaMlx:
                 gc.collect()
             self.record_memory('after_tok_embeddings')
 
-            for i in tqdm(range(len(cache)), desc='running layers:'):
+            for i in tqdm(range(len(cache)), desc='running layers'):
                 self.record_memory(f'before layer {il}')
                 # We are overwriting the arrays in the cache list. When
                 # the computation will happen, MLX will be discarding the
diff --git a/air_llm/airllm/auto_model.py b/air_llm/airllm/auto_model.py
index a7e0f73..c899c1f 100644
--- a/air_llm/airllm/auto_model.py
+++ b/air_llm/airllm/auto_model.py
@@ -1,6 +1,14 @@
 import importlib
 from transformers import AutoConfig
+from sys import platform
 
+is_on_mac_os = False
+
+if platform == "darwin":
+    is_on_mac_os = True
+
+if is_on_mac_os:
+    from airllm import AirLLMLlamaMlx
 
 class AutoModel:
     def __init__(self):
@@ -37,6 +45,9 @@ class AutoModel:
     @classmethod
     def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs):
 
+        if is_on_mac_os:
+            return AirLLMLlamaMlx(pretrained_model_name_or_path, *inputs, ** kwargs)
+
         module, cls = AutoModel.get_module_class(pretrained_model_name_or_path, *inputs, **kwargs)
         module = importlib.import_module(module)
         class_ = getattr(module, cls)
diff --git a/air_llm/examples/run_on_macos.ipynb b/air_llm/examples/run_on_macos.ipynb
index a348417..53db577 100644
--- a/air_llm/examples/run_on_macos.ipynb
+++ b/air_llm/examples/run_on_macos.ipynb
@@ -165,33 +165,6 @@
     "!pip install -U  airllm"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install transformers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!conda install pytorch  -c pytorch\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!conda install -y sentencepiece"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -199,7 +172,7 @@
    "outputs": [],
    "source": [
     "# copy local code to test\n",
-    "!cp -r /Users/l_y_o/Work/Anima/air_llm/airllm/* /usr/local/anaconda3/envs/native/lib/python3.11/site-packages/airllm/"
+    "#!cp -r /Users/l_y_o/Work/Anima/air_llm/airllm/* /usr/local/anaconda3/envs/native/lib/python3.11/site-packages/airllm/"
    ]
   },
   {
@@ -222,7 +195,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from airllm import AirLLMLlamaMlx"
+    "from airllm import AutoModel"
    ]
   },
   {
@@ -1212,7 +1185,7 @@
     }
    ],
    "source": [
-    "model = AirLLMLlamaMlx(\"01-ai/Yi-34B\")#\"garage-bAInd/Platypus2-7B\")"
+    "model = AutoModel.from_pretrained(\"01-ai/Yi-34B\")#\"garage-bAInd/Platypus2-7B\")"
    ]
   },
   {
@@ -1312,22 +1285,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from airllm import AirLLMLlamaMlx"
+    "from airllm import AutoModel"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ac9bc7f5f7cb4e98b8913cc533bf6d5c",
+       "model_id": "9abc1702b4c34ed69aba9442d745cc29",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1341,7 +1314,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "474ae14c2f7b4c759f53393d13daa86f",
+       "model_id": "92138b9c855b41c4a91eb92dee9404bf",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1352,637 +1325,16 @@
      "metadata": {},
      "output_type": "display_data"
     },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "26ef8712234d42498ffbdee1ed17514a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       ".gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2e48bcc168514e01a320e4f200390cf5",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bf6bc64a7441489a90501915a8b910c2",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Best_Platty_small.jpeg:   0%|          | 0.00/7.35k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "88bc6a86ebff4c1a8031411a1465abb8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6281737965004d0698a80bfaac8c518e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3dd2a5971b664179911aecf465bc5181",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "README.md:   0%|          | 0.00/5.23k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f334509f6e83496587b4b79319c07795",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e2066e8b954d4611bef220a5c61fe0d3",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3e63b3ef1601494e8fd537613119bf3e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "84f6ff70472946e5828a6376dd60cdbe",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a5ded99432254e8781d2e3ebef09c597",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4f65c2c1cc094b4f80dace069698503f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  0%|                                                                                                                                                                                               | 0/35 [00:00<?, ?it/s]"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loading shard 1/2\n",
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.embed_tokens.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  6%|██████████▍                                                                                                                                                                            | 2/35 [00:10<02:27,  4.46s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.0.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  9%|███████████████▋                                                                                                                                                                       | 3/35 [00:12<01:47,  3.35s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.1.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 11%|████████████████████▉                                                                                                                                                                  | 4/35 [00:13<01:23,  2.71s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.2.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 14%|██████████████████████████▏                                                                                                                                                            | 5/35 [00:15<01:09,  2.31s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.3.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 17%|███████████████████████████████▎                                                                                                                                                       | 6/35 [00:17<01:02,  2.14s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.4.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 20%|████████████████████████████████████▌                                                                                                                                                  | 7/35 [00:17<00:42,  1.52s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.5.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 23%|█████████████████████████████████████████▊                                                                                                                                             | 8/35 [00:17<00:30,  1.12s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.6.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 26%|███████████████████████████████████████████████                                                                                                                                        | 9/35 [00:17<00:21,  1.19it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.7.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 29%|████████████████████████████████████████████████████                                                                                                                                  | 10/35 [00:18<00:16,  1.51it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.8.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 31%|█████████████████████████████████████████████████████████▏                                                                                                                            | 11/35 [00:18<00:13,  1.80it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.9.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 34%|██████████████████████████████████████████████████████████████▍                                                                                                                       | 12/35 [00:18<00:11,  2.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.10.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 37%|███████████████████████████████████████████████████████████████████▌                                                                                                                  | 13/35 [00:19<00:09,  2.23it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.11.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 40%|████████████████████████████████████████████████████████████████████████▊                                                                                                             | 14/35 [00:19<00:08,  2.50it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.12.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 43%|██████████████████████████████████████████████████████████████████████████████                                                                                                        | 15/35 [00:19<00:07,  2.80it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.13.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 46%|███████████████████████████████████████████████████████████████████████████████████▏                                                                                                  | 16/35 [00:20<00:06,  3.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.14.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 49%|████████████████████████████████████████████████████████████████████████████████████████▍                                                                                             | 17/35 [00:20<00:06,  2.76it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.15.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 51%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                        | 18/35 [00:20<00:06,  2.65it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.16.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 54%|██████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                   | 19/35 [00:21<00:06,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.17.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 57%|████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                              | 20/35 [00:21<00:05,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.18.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                        | 21/35 [00:22<00:05,  2.63it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.19.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 63%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                   | 22/35 [00:22<00:04,  2.71it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.20.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 66%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                              | 23/35 [00:22<00:04,  2.73it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.21.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 69%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                         | 24/35 [00:23<00:03,  2.79it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.22.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                    | 25/35 [00:23<00:03,  2.83it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.23.mlx\n",
-      "Loading shard 2/2\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                              | 26/35 [00:24<00:05,  1.68it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.24.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                         | 27/35 [00:24<00:04,  2.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.25.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                    | 28/35 [00:25<00:02,  2.34it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.26.mlx\n",
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.27.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                          | 30/35 [00:25<00:01,  3.36it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.28.mlx\n",
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.29.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍               | 32/35 [00:25<00:00,  4.12it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.30.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 33/35 [00:26<00:00,  4.33it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.31.mlx\n",
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.norm.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [00:26<00:00,  1.33it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/lm_head.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
+      "saved layers already found in /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model\n"
      ]
     }
    ],
    "source": [
-    "model = AirLLMLlamaMlx(\"garage-bAInd/Platypus2-7B\")"
+    "model = AutoModel.from_pretrained(\"garage-bAInd/Platypus2-7B\")"
    ]
   },
   {
@@ -1994,7 +1346,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -2003,7 +1355,7 @@
        "{'input_ids': array([[  1, 306, 763]])}"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2027,16 +1379,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:07<00:00,  4.43it/s]\n",
-      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  4.77it/s]\n",
-      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:07<00:00,  4.50it/s]\n"
+      "running layers:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:08<00:00,  3.95it/s]\n",
+      "running layers:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:08<00:00,  3.66it/s]\n",
+      "running layers:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:07<00:00,  4.06it/s]\n"
      ]
     },
     {
diff --git a/air_llm/setup.py b/air_llm/setup.py
index a2fccda..f8314b3 100644
--- a/air_llm/setup.py
+++ b/air_llm/setup.py
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:
 
 setuptools.setup(
     name="airllm",
-    version="2.8.1",
+    version="2.8.2",
     author="Gavin Li",
     author_email="gavinli@animaai.cloud",
     description="AirLLM allows single 4GB GPU card to run 70B large language models without quantization, distillation or pruning.",
diff --git a/air_llm/tests/test_notebooks/test_mlx.ipynb b/air_llm/tests/test_notebooks/test_mlx.ipynb
index a348417..53db577 100644
--- a/air_llm/tests/test_notebooks/test_mlx.ipynb
+++ b/air_llm/tests/test_notebooks/test_mlx.ipynb
@@ -165,33 +165,6 @@
     "!pip install -U  airllm"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!pip install transformers"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!conda install pytorch  -c pytorch\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "!conda install -y sentencepiece"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -199,7 +172,7 @@
    "outputs": [],
    "source": [
     "# copy local code to test\n",
-    "!cp -r /Users/l_y_o/Work/Anima/air_llm/airllm/* /usr/local/anaconda3/envs/native/lib/python3.11/site-packages/airllm/"
+    "#!cp -r /Users/l_y_o/Work/Anima/air_llm/airllm/* /usr/local/anaconda3/envs/native/lib/python3.11/site-packages/airllm/"
    ]
   },
   {
@@ -222,7 +195,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from airllm import AirLLMLlamaMlx"
+    "from airllm import AutoModel"
    ]
   },
   {
@@ -1212,7 +1185,7 @@
     }
    ],
    "source": [
-    "model = AirLLMLlamaMlx(\"01-ai/Yi-34B\")#\"garage-bAInd/Platypus2-7B\")"
+    "model = AutoModel.from_pretrained(\"01-ai/Yi-34B\")#\"garage-bAInd/Platypus2-7B\")"
    ]
   },
   {
@@ -1312,22 +1285,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from airllm import AirLLMLlamaMlx"
+    "from airllm import AutoModel"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "ac9bc7f5f7cb4e98b8913cc533bf6d5c",
+       "model_id": "9abc1702b4c34ed69aba9442d745cc29",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1341,7 +1314,7 @@
     {
      "data": {
       "application/vnd.jupyter.widget-view+json": {
-       "model_id": "474ae14c2f7b4c759f53393d13daa86f",
+       "model_id": "92138b9c855b41c4a91eb92dee9404bf",
        "version_major": 2,
        "version_minor": 0
       },
@@ -1352,637 +1325,16 @@
      "metadata": {},
      "output_type": "display_data"
     },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "26ef8712234d42498ffbdee1ed17514a",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       ".gitattributes:   0%|          | 0.00/1.52k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "2e48bcc168514e01a320e4f200390cf5",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "pytorch_model.bin.index.json:   0%|          | 0.00/26.8k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "bf6bc64a7441489a90501915a8b910c2",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Best_Platty_small.jpeg:   0%|          | 0.00/7.35k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "88bc6a86ebff4c1a8031411a1465abb8",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "6281737965004d0698a80bfaac8c518e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "config.json:   0%|          | 0.00/625 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3dd2a5971b664179911aecf465bc5181",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "README.md:   0%|          | 0.00/5.23k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "f334509f6e83496587b4b79319c07795",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer_config.json:   0%|          | 0.00/725 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e2066e8b954d4611bef220a5c61fe0d3",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "3e63b3ef1601494e8fd537613119bf3e",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "84f6ff70472946e5828a6376dd60cdbe",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "pytorch_model-00002-of-00002.bin:   0%|          | 0.00/3.50G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "a5ded99432254e8781d2e3ebef09c597",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "pytorch_model-00001-of-00002.bin:   0%|          | 0.00/9.98G [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "4f65c2c1cc094b4f80dace069698503f",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  0%|                                                                                                                                                                                               | 0/35 [00:00<?, ?it/s]"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Loading shard 1/2\n",
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.embed_tokens.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  6%|██████████▍                                                                                                                                                                            | 2/35 [00:10<02:27,  4.46s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.0.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "  9%|███████████████▋                                                                                                                                                                       | 3/35 [00:12<01:47,  3.35s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.1.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 11%|████████████████████▉                                                                                                                                                                  | 4/35 [00:13<01:23,  2.71s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.2.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 14%|██████████████████████████▏                                                                                                                                                            | 5/35 [00:15<01:09,  2.31s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.3.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 17%|███████████████████████████████▎                                                                                                                                                       | 6/35 [00:17<01:02,  2.14s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.4.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 20%|████████████████████████████████████▌                                                                                                                                                  | 7/35 [00:17<00:42,  1.52s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.5.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 23%|█████████████████████████████████████████▊                                                                                                                                             | 8/35 [00:17<00:30,  1.12s/it]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.6.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 26%|███████████████████████████████████████████████                                                                                                                                        | 9/35 [00:17<00:21,  1.19it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.7.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 29%|████████████████████████████████████████████████████                                                                                                                                  | 10/35 [00:18<00:16,  1.51it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.8.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 31%|█████████████████████████████████████████████████████████▏                                                                                                                            | 11/35 [00:18<00:13,  1.80it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.9.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 34%|██████████████████████████████████████████████████████████████▍                                                                                                                       | 12/35 [00:18<00:11,  2.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.10.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 37%|███████████████████████████████████████████████████████████████████▌                                                                                                                  | 13/35 [00:19<00:09,  2.23it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.11.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 40%|████████████████████████████████████████████████████████████████████████▊                                                                                                             | 14/35 [00:19<00:08,  2.50it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.12.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 43%|██████████████████████████████████████████████████████████████████████████████                                                                                                        | 15/35 [00:19<00:07,  2.80it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.13.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 46%|███████████████████████████████████████████████████████████████████████████████████▏                                                                                                  | 16/35 [00:20<00:06,  3.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.14.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 49%|████████████████████████████████████████████████████████████████████████████████████████▍                                                                                             | 17/35 [00:20<00:06,  2.76it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.15.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 51%|█████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                        | 18/35 [00:20<00:06,  2.65it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.16.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 54%|██████████████████████████████████████████████████████████████████████████████████████████████████▊                                                                                   | 19/35 [00:21<00:06,  2.61it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.17.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 57%|████████████████████████████████████████████████████████████████████████████████████████████████████████                                                                              | 20/35 [00:21<00:05,  2.60it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.18.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 60%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                                                        | 21/35 [00:22<00:05,  2.63it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.19.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 63%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                   | 22/35 [00:22<00:04,  2.71it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.20.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 66%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                                              | 23/35 [00:22<00:04,  2.73it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.21.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 69%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▊                                                         | 24/35 [00:23<00:03,  2.79it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.22.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 71%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                    | 25/35 [00:23<00:03,  2.83it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.23.mlx\n",
-      "Loading shard 2/2\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                              | 26/35 [00:24<00:05,  1.68it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.24.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 77%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                         | 27/35 [00:24<00:04,  2.00it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.25.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 80%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌                                    | 28/35 [00:25<00:02,  2.34it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.26.mlx\n",
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.27.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 86%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                          | 30/35 [00:25<00:01,  3.36it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.28.mlx\n",
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.29.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 91%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍               | 32/35 [00:25<00:00,  4.12it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.30.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      " 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▌          | 33/35 [00:26<00:00,  4.33it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.layers.31.mlx\n",
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/model.norm.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 35/35 [00:26<00:00,  1.33it/s]"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "saved as: /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model/lm_head.mlx\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "\n"
+      "saved layers already found in /Users/l_y_o/.cache/huggingface/hub/models--garage-bAInd--Platypus2-7B/snapshots/c27aff7201e611f301c0e19f351cbe74b1a9f1f1/splitted_model\n"
      ]
     }
    ],
    "source": [
-    "model = AirLLMLlamaMlx(\"garage-bAInd/Platypus2-7B\")"
+    "model = AutoModel.from_pretrained(\"garage-bAInd/Platypus2-7B\")"
    ]
   },
   {
@@ -1994,7 +1346,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -2003,7 +1355,7 @@
        "{'input_ids': array([[  1, 306, 763]])}"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2027,16 +1379,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:07<00:00,  4.43it/s]\n",
-      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:06<00:00,  4.77it/s]\n",
-      "100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:07<00:00,  4.50it/s]\n"
+      "running layers:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:08<00:00,  3.95it/s]\n",
+      "running layers:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:08<00:00,  3.66it/s]\n",
+      "running layers:: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 32/32 [00:07<00:00,  4.06it/s]\n"
      ]
     },
     {