From d2182a1bf6d73c171d8f8ed6bf4047323c4c609e Mon Sep 17 00:00:00 2001 From: Yu Li Date: Wed, 20 Dec 2023 22:35:37 -0600 Subject: [PATCH] add Mixtral --- air_llm/README.md | 3 + air_llm/airllm/__init__.py | 1 + air_llm/airllm/airllm_mixtral.py | 22 + air_llm/airllm/auto_model.py | 10 +- air_llm/setup.py | 2 +- air_llm/tests/test_automodel.py | 3 +- .../tests/test_notebooks/test_mixtral.ipynb | 710 ++++++++++++++++++ 7 files changed, 741 insertions(+), 10 deletions(-) create mode 100644 air_llm/airllm/airllm_mixtral.py create mode 100644 air_llm/tests/test_notebooks/test_mixtral.ipynb diff --git a/air_llm/README.md b/air_llm/README.md index 0fd9cce..d8c8a2c 100644 --- a/air_llm/README.md +++ b/air_llm/README.md @@ -6,6 +6,8 @@ AirLLM优化inference内存,4GB单卡GPU可以运行70B大语言模型推理 ## Updates +[2023/12/20] v2.7: Support AirLLMMixtral. + [2023/12/20] v2.6: Added AutoModel, automatically detect model type, no need to provide model class to initialize model. 提供AuoModel,自动根据repo参数检测模型类型,自动初始化模型。 @@ -165,6 +167,7 @@ Example colabs here: | 9 | jondurbin/airoboros-l2-70b-2.2.1 | ✅ | AirLLMLlama2 | | 10 | chargoddard/Yi-34B-Llama | ✅ | AirLLMLlama2 | | ? | mistralai/Mistral-7B-Instruct-v0.1 | ✅ | AirLLMMistral | +| ? | mistralai/Mixtral-8x7B-v0.1 | ✅ | AirLLMMixtral | #### [opencompass leaderboard](https://opencompass.org.cn/leaderboard-llm) top models diff --git a/air_llm/airllm/__init__.py b/air_llm/airllm/__init__.py index d2f01fe..66c5b3a 100644 --- a/air_llm/airllm/__init__.py +++ b/air_llm/airllm/__init__.py @@ -4,6 +4,7 @@ from .airllm_qwen import AirLLMQWen from .airllm_baichuan import AirLLMBaichuan from .airllm_internlm import AirLLMInternLM from .airllm_mistral import AirLLMMistral +from .airllm_mixtral import AirLLMMixtral from .airllm_base import AirLLMBaseModel from .auto_model import AutoModel from .utils import split_and_save_layers diff --git a/air_llm/airllm/airllm_mixtral.py b/air_llm/airllm/airllm_mixtral.py new file mode 100644 index 0000000..3dbd183 --- /dev/null +++ b/air_llm/airllm/airllm_mixtral.py @@ -0,0 +1,22 @@ + +from transformers import GenerationConfig + +from .airllm_base import AirLLMBaseModel + + + +class AirLLMMixtral(AirLLMBaseModel): + + + def __init__(self, *args, **kwargs): + + + super(AirLLMMixtral, self).__init__(*args, **kwargs) + + def get_use_better_transformer(self): + return False + + def get_generation_config(self): + return GenerationConfig() + + diff --git a/air_llm/airllm/auto_model.py b/air_llm/airllm/auto_model.py index f013b94..a7e0f73 100644 --- a/air_llm/airllm/auto_model.py +++ b/air_llm/airllm/auto_model.py @@ -1,13 +1,6 @@ import importlib from transformers import AutoConfig -from .airllm import AirLLMLlama2 -from .airllm_mistral import AirLLMMistral -from .airllm_baichuan import AirLLMBaichuan -from .airllm_internlm import AirLLMInternLM -from .airllm_chatglm import AirLLMChatGLM -from .airllm_qwen import AirLLMQWen - class AutoModel: def __init__(self): @@ -33,6 +26,8 @@ class AutoModel: return "airllm", "AirLLMInternLM" elif "Mistral" in config.architectures[0]: return "airllm", "AirLLMMistral" + elif "Mixtral" in config.architectures[0]: + return "airllm", "AirLLMMixtral" elif "Llama" in config.architectures[0]: return "airllm", "AirLLMLlama2" else: @@ -43,7 +38,6 @@ class AutoModel: def from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs): module, cls = AutoModel.get_module_class(pretrained_model_name_or_path, *inputs, **kwargs) - module = importlib.import_module(module) class_ = getattr(module, cls) return class_(pretrained_model_name_or_path, *inputs, ** kwargs) \ No newline at end of file diff --git a/air_llm/setup.py b/air_llm/setup.py index 4c61c92..9d846f1 100644 --- a/air_llm/setup.py +++ b/air_llm/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name="airllm", - version="2.6.2", + version="2.7", author="Gavin Li", author_email="gavinli@animaai.cloud", description="AirLLM allows single 4GB GPU card to run 70B large language models without quantization, distillation or pruning.", diff --git a/air_llm/tests/test_automodel.py b/air_llm/tests/test_automodel.py index 01c2f91..349b3dd 100644 --- a/air_llm/tests/test_automodel.py +++ b/air_llm/tests/test_automodel.py @@ -20,7 +20,8 @@ class TestAutoModel(unittest.TestCase): 'internlm/internlm-chat-7b': 'AirLLMInternLM', 'THUDM/chatglm3-6b-base': 'AirLLMChatGLM', 'baichuan-inc/Baichuan2-7B-Base': 'AirLLMBaichuan', - 'mistralai/Mistral-7B-Instruct-v0.1': 'AirLLMMistral' + 'mistralai/Mistral-7B-Instruct-v0.1': 'AirLLMMistral', + 'mistralai/Mixtral-8x7B-v0.1': 'AirLLMMixtral' } diff --git a/air_llm/tests/test_notebooks/test_mixtral.ipynb b/air_llm/tests/test_notebooks/test_mixtral.ipynb new file mode 100644 index 0000000..a3afdc5 --- /dev/null +++ b/air_llm/tests/test_notebooks/test_mixtral.ipynb @@ -0,0 +1,710 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "440851a0-170d-4226-9857-f39f05cc6c70", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Looking in indexes: http://mirrors.tencentyun.com/pypi/simple\n", + "Requirement already satisfied: airllm in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (0.9.1)\n", + "Collecting airllm\n", + " Downloading http://mirrors.tencentyun.com/pypi/packages/b5/36/d1cefb0725097e7ddf907783f31e9e17b191009978839a3d06598e72c41d/airllm-2.6-py3-none-any.whl (33 kB)\n", + "Requirement already satisfied: transformers in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (4.35.0)\n", + "Collecting transformers\n", + " Downloading http://mirrors.tencentyun.com/pypi/packages/20/0a/739426a81f7635b422fbe6cb8d1d99d1235579a6ac8024c13d743efa6847/transformers-4.36.2-py3-none-any.whl (8.2 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.2/8.2 MB\u001b[0m \u001b[31m1.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m0m\n", + "\u001b[?25hRequirement already satisfied: tqdm in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from airllm) (4.66.1)\n", + "Requirement already satisfied: torch in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from airllm) (2.1.0)\n", + "Requirement already satisfied: accelerate in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from airllm) (0.24.1)\n", + "Requirement already satisfied: safetensors in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from airllm) (0.4.0)\n", + "Requirement already satisfied: optimum in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from airllm) (1.14.0)\n", + "Requirement already satisfied: huggingface-hub in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from airllm) (0.17.3)\n", + "Collecting scipy (from airllm)\n", + " Downloading http://mirrors.tencentyun.com/pypi/packages/69/f0/fb07a9548e48b687b8bf2fa81d71aba9cfc548d365046ca1c791e24db99d/scipy-1.10.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (34.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m34.5/34.5 MB\u001b[0m \u001b[31m10.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: filelock in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from transformers) (3.9.0)\n", + "Collecting huggingface-hub (from airllm)\n", + " Downloading http://mirrors.tencentyun.com/pypi/packages/a0/0a/02ac0ae1047d97769003ff4fb8e6717024f3f174a5d13257415aa09e13d9/huggingface_hub-0.20.1-py3-none-any.whl (330 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m330.1/330.1 kB\u001b[0m \u001b[31m1.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0m\n", + "\u001b[?25hRequirement already satisfied: numpy>=1.17 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from transformers) (1.24.3)\n", + "Requirement already satisfied: packaging>=20.0 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from transformers) (23.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from transformers) (6.0.1)\n", + "Requirement already satisfied: regex!=2019.12.17 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from transformers) (2023.10.3)\n", + "Requirement already satisfied: requests in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from transformers) (2.31.0)\n", + "Requirement already satisfied: tokenizers<0.19,>=0.14 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from transformers) (0.14.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from huggingface-hub->airllm) (2023.10.0)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from huggingface-hub->airllm) (4.7.1)\n", + "INFO: pip is looking at multiple versions of tokenizers to determine which version is compatible with other requirements. This could take a while.\n", + "Collecting tokenizers<0.19,>=0.14 (from transformers)\n", + " Downloading http://mirrors.tencentyun.com/pypi/packages/ad/75/56230c5c65b226e707e1adbc759c19fdf1b20bb02c0276796b132c97118a/tokenizers-0.15.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.8 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m3.8/3.8 MB\u001b[0m \u001b[31m2.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m00:01\u001b[0m00:01\u001b[0m0m\n", + "\u001b[?25hRequirement already satisfied: psutil in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from accelerate->airllm) (5.9.6)\n", + "Requirement already satisfied: sympy in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from torch->airllm) (1.11.1)\n", + "Requirement already satisfied: networkx in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from torch->airllm) (3.1)\n", + "Requirement already satisfied: jinja2 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from torch->airllm) (3.1.2)\n", + "Requirement already satisfied: coloredlogs in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from optimum->airllm) (15.0.1)\n", + "Requirement already satisfied: datasets in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from optimum->airllm) (2.14.6)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from requests->transformers) (2.0.4)\n", + "Requirement already satisfied: idna<4,>=2.5 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from requests->transformers) (3.4)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from requests->transformers) (1.26.18)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from requests->transformers) (2023.7.22)\n", + "Requirement already satisfied: sentencepiece!=0.1.92,>=0.1.91 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from transformers[sentencepiece]>=4.26.0->optimum->airllm) (0.1.99)\n", + "Requirement already satisfied: protobuf in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from transformers[sentencepiece]>=4.26.0->optimum->airllm) (4.25.0)\n", + "Requirement already satisfied: humanfriendly>=9.1 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from coloredlogs->optimum->airllm) (10.0)\n", + "Requirement already satisfied: pyarrow>=8.0.0 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from datasets->optimum->airllm) (14.0.0)\n", + "Requirement already satisfied: dill<0.3.8,>=0.3.0 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from datasets->optimum->airllm) (0.3.7)\n", + "Requirement already satisfied: pandas in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from datasets->optimum->airllm) (2.0.3)\n", + "Requirement already satisfied: xxhash in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from datasets->optimum->airllm) (3.4.1)\n", + "Requirement already satisfied: multiprocess in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from datasets->optimum->airllm) (0.70.15)\n", + "Requirement already satisfied: aiohttp in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from datasets->optimum->airllm) (3.8.6)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from jinja2->torch->airllm) (2.1.1)\n", + "Requirement already satisfied: mpmath>=0.19 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from sympy->torch->airllm) (1.3.0)\n", + "Requirement already satisfied: attrs>=17.3.0 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from aiohttp->datasets->optimum->airllm) (23.1.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from aiohttp->datasets->optimum->airllm) (6.0.4)\n", + "Requirement already satisfied: async-timeout<5.0,>=4.0.0a3 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from aiohttp->datasets->optimum->airllm) (4.0.3)\n", + "Requirement already satisfied: yarl<2.0,>=1.0 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from aiohttp->datasets->optimum->airllm) (1.9.2)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from aiohttp->datasets->optimum->airllm) (1.4.0)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from aiohttp->datasets->optimum->airllm) (1.3.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from pandas->datasets->optimum->airllm) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from pandas->datasets->optimum->airllm) (2023.3.post1)\n", + "Requirement already satisfied: tzdata>=2022.1 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from pandas->datasets->optimum->airllm) (2023.3)\n", + "Requirement already satisfied: six>=1.5 in /home/ubuntu/miniconda3/envs/ghostaienv/lib/python3.8/site-packages (from python-dateutil>=2.8.2->pandas->datasets->optimum->airllm) (1.16.0)\n", + "Installing collected packages: scipy, huggingface-hub, tokenizers, transformers, airllm\n", + " Attempting uninstall: huggingface-hub\n", + " Found existing installation: huggingface-hub 0.17.3\n", + " Uninstalling huggingface-hub-0.17.3:\n", + " Successfully uninstalled huggingface-hub-0.17.3\n", + " Attempting uninstall: tokenizers\n", + " Found existing installation: tokenizers 0.14.1\n", + " Uninstalling tokenizers-0.14.1:\n", + " Successfully uninstalled tokenizers-0.14.1\n", + " Attempting uninstall: transformers\n", + " Found existing installation: transformers 4.35.0\n", + " Uninstalling transformers-4.35.0:\n", + " Successfully uninstalled transformers-4.35.0\n", + " Attempting uninstall: airllm\n", + " Found existing installation: airllm 0.9.1\n", + " Uninstalling airllm-0.9.1:\n", + " Successfully uninstalled airllm-0.9.1\n", + "Successfully installed airllm-2.6 huggingface-hub-0.20.1 scipy-1.10.1 tokenizers-0.15.0 transformers-4.36.2\n" + ] + } + ], + "source": [ + "!pip install -U airllm transformers" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "47de26bf-510a-4f8f-ae99-c4c22a0e12b7", + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "d8829a3be8414a3d82aacee5f801ece7", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "config.json: 0%| | 0.00/720 [00:00] 116 --.-KB/s in 0s \n", + "\n", + "2023-12-21 12:17:37 (27.1 MB/s) - ‘generation_config.json’ saved [116/116]\n", + "\n" + ] + } + ], + "source": [ + "!wget https://huggingface.co/mistralai/Mixtral-8x7B-v0.1/raw/main/generation_config.json\n", + "!cp generation_config.json /home/ubuntu/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-v0.1/snapshots/58301445dc1378584211722b7ebf8743ec4e192b/" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19eb8fee-ab17-4a54-9af2-ca809bd096b5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + ">>>> cache_utils installed\n", + "saved layers already found in /home/ubuntu/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-v0.1/snapshots/58301445dc1378584211722b7ebf8743ec4e192b/splitted_model\n", + "either BetterTransformer or attn_implementation='sdpa' is available, creating model directly\n", + "either BetterTransformer or attn_implementation='sdpa' is available, creating model directly\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "cuda:0: 100%|██████████| 35/35 [04:29<00:00, 7.69s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "either BetterTransformer or attn_implementation='sdpa' is available, creating model directly\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "cuda:0: 100%|██████████| 35/35 [04:30<00:00, 7.73s/it]\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "either BetterTransformer or attn_implementation='sdpa' is available, creating model directly\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "cuda:0: 86%|████████▌ | 30/35 [04:03<00:41, 8.36s/it]" + ] + } + ], + "source": [ + "from airllm import AutoModel\n", + "\n", + "MAX_LENGTH = 128\n", + "# could use hugging face model repo id:\n", + "model = AutoModel.from_pretrained(\"/home/ubuntu/.cache/huggingface/hub/models--mistralai--Mixtral-8x7B-v0.1/snapshots/58301445dc1378584211722b7ebf8743ec4e192b/\")\n", + "\n", + "input_text = [\n", + " 'I like',\n", + " ]\n", + "\n", + "input_tokens = model.tokenizer(input_text,\n", + " return_tensors=\"pt\",\n", + " return_attention_mask=False,\n", + " truncation=True,\n", + " max_length=MAX_LENGTH,\n", + " #padding=True\n", + " )\n", + "\n", + "generation_output = model.generate(\n", + " input_tokens['input_ids'].cuda(),\n", + " max_new_tokens=3,\n", + " use_cache=True,\n", + " return_dict_in_generate=True)\n", + "\n", + "model.tokenizer.decode(generation_output.sequences[0])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "db4d8871-7e30-4eb8-b2f9-0310409c71d7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}