From fcef20ea961a326744f65b6483c79e170ae4d49e Mon Sep 17 00:00:00 2001 From: Yu Li Date: Mon, 25 Dec 2023 16:28:47 -0600 Subject: [PATCH] fix automodel on macos --- air_llm/airllm/__init__.py | 1 + air_llm/airllm/airllm_llama_mlx.py | 4 +- air_llm/airllm/auto_model.py | 11 + air_llm/examples/run_on_macos.ipynb | 680 +------------------- air_llm/setup.py | 2 +- air_llm/tests/test_notebooks/test_mlx.ipynb | 680 +------------------- 6 files changed, 47 insertions(+), 1331 deletions(-) diff --git a/air_llm/airllm/__init__.py b/air_llm/airllm/__init__.py index a1d20c4..5c13fc9 100644 --- a/air_llm/airllm/__init__.py +++ b/air_llm/airllm/__init__.py @@ -7,6 +7,7 @@ if platform == "darwin": if is_on_mac_os: from .airllm_llama_mlx import AirLLMLlamaMlx + from .auto_model import AutoModel else: from .airllm import AirLLMLlama2 from .airllm_chatglm import AirLLMChatGLM diff --git a/air_llm/airllm/airllm_llama_mlx.py b/air_llm/airllm/airllm_llama_mlx.py index be1098d..55ea7ba 100644 --- a/air_llm/airllm/airllm_llama_mlx.py +++ b/air_llm/airllm/airllm_llama_mlx.py @@ -295,7 +295,7 @@ class AirLLMLlamaMlx: self.record_memory('after_tok_embeddings') #for l in self.layers: - for il in tqdm(range(self.model_args.n_layers), desc='running layers:'): + for il in tqdm(range(self.model_args.n_layers), desc='running layers'): self.record_memory(f'before layer {il}') l = TransformerBlock(args=self.model_args) l.update( @@ -381,7 +381,7 @@ class AirLLMLlamaMlx: gc.collect() self.record_memory('after_tok_embeddings') - for i in tqdm(range(len(cache)), desc='running layers:'): + for i in tqdm(range(len(cache)), desc='running layers'): self.record_memory(f'before layer {il}') # We are overwriting the arrays in the cache list. When # the computation will happen, MLX will be discarding the diff --git a/air_llm/airllm/auto_model.py b/air_llm/airllm/auto_model.py index a7e0f73..c899c1f 100644 --- a/air_llm/airllm/auto_model.py +++ b/air_llm/airllm/auto_model.py @@ -1,6 +1,14 @@ import importlib from transformers import AutoConfig +from sys import platform +is_on_mac_os = False + +if platform == "darwin": + is_on_mac_os = True + +if is_on_mac_os: + from airllm import AirLLMLlamaMlx class AutoModel: def __init__(self): @@ -37,6 +45,9 @@ class AutoModel: @classmethod def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): + if is_on_mac_os: + return AirLLMLlamaMlx(pretrained_model_name_or_path, *inputs, ** kwargs) + module, cls = AutoModel.get_module_class(pretrained_model_name_or_path, *inputs, **kwargs) module = importlib.import_module(module) class_ = getattr(module, cls) diff --git a/air_llm/examples/run_on_macos.ipynb b/air_llm/examples/run_on_macos.ipynb index a348417..53db577 100644 --- a/air_llm/examples/run_on_macos.ipynb +++ b/air_llm/examples/run_on_macos.ipynb @@ -165,33 +165,6 @@ "!pip install -U airllm" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install transformers" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!conda install pytorch -c pytorch\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!conda install -y sentencepiece" - ] - }, { "cell_type": "code", "execution_count": 1, @@ -199,7 +172,7 @@ "outputs": [], "source": [ "# copy local code to test\n", - "!cp -r /Users/l_y_o/Work/Anima/air_llm/airllm/* /usr/local/anaconda3/envs/native/lib/python3.11/site-packages/airllm/" + "#!cp -r /Users/l_y_o/Work/Anima/air_llm/airllm/* /usr/local/anaconda3/envs/native/lib/python3.11/site-packages/airllm/" ] }, { @@ -222,7 +195,7 @@ "metadata": {}, "outputs": [], "source": [ - "from airllm import AirLLMLlamaMlx" + "from airllm import AutoModel" ] }, { @@ -1212,7 +1185,7 @@ } ], "source": [ - "model = AirLLMLlamaMlx(\"01-ai/Yi-34B\")#\"garage-bAInd/Platypus2-7B\")" + "model = AutoModel.from_pretrained(\"01-ai/Yi-34B\")#\"garage-bAInd/Platypus2-7B\")" ] }, { @@ -1312,22 +1285,22 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "from airllm import AirLLMLlamaMlx" + "from airllm import AutoModel" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "ac9bc7f5f7cb4e98b8913cc533bf6d5c", + "model_id": "9abc1702b4c34ed69aba9442d745cc29", "version_major": 2, "version_minor": 0 }, @@ -1341,7 +1314,7 @@ { "data": { "application/vnd.jupyter.widget-view+json": { - "model_id": "474ae14c2f7b4c759f53393d13daa86f", + "model_id": "92138b9c855b41c4a91eb92dee9404bf", "version_major": 2, "version_minor": 0 }, @@ -1352,637 +1325,16 @@ "metadata": {}, "output_type": "display_data" }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "26ef8712234d42498ffbdee1ed17514a", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - ".gitattributes: 0%| | 0.00/1.52k [00:00