diff --git a/air_llm/airllm/airllm_base.py b/air_llm/airllm/airllm_base.py index 6dcbbb0..b214cb1 100644 --- a/air_llm/airllm/airllm_base.py +++ b/air_llm/airllm/airllm_base.py @@ -298,7 +298,7 @@ class AirLLMBaseModel(GenerationMixin): layers = [] for param_name, param in state_dict.items(): if self.hf_quantizer is None: - layers.append(layer_name) + layers.append(param_name) else: if '.weight' in param_name: layer_name = param_name[:param_name.index(".weight") + len(".weight")] diff --git a/air_llm/setup.py b/air_llm/setup.py index 4c79cde..697caff 100644 --- a/air_llm/setup.py +++ b/air_llm/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name="airllm", - version="2.9", + version="2.9.1", author="Gavin Li", author_email="gavinli@animaai.cloud", description="AirLLM allows single 4GB GPU card to run 70B large language models without quantization, distillation or pruning. 8GB vmem to run 405B Llama3.1.",