diff --git a/air_llm/airllm/airllm_base.py b/air_llm/airllm/airllm_base.py index 4fb919c..a517e6b 100644 --- a/air_llm/airllm/airllm_base.py +++ b/air_llm/airllm/airllm_base.py @@ -418,8 +418,8 @@ class AirLLMBaseModel(GenerationMixin): future = executor.submit(self.load_layer_to_cpu, self.layer_names[0]) - for i, (layer_name, layer) in tqdm(enumerate(zip(self.layer_names, self.layers)), desc=self.running_device, - desc='running layers:', + for i, (layer_name, layer) in tqdm(enumerate(zip(self.layer_names, self.layers)), + desc=f'running layers(self.running_device)', total=len(self.layers)): if self.prefetching: diff --git a/air_llm/setup.py b/air_llm/setup.py index 329cfe6..a2fccda 100644 --- a/air_llm/setup.py +++ b/air_llm/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name="airllm", - version="2.8", + version="2.8.1", author="Gavin Li", author_email="gavinli@animaai.cloud", description="AirLLM allows single 4GB GPU card to run 70B large language models without quantization, distillation or pruning.",