From 58e38734314e80e32d4bd07d1a1e41fce99a87f9 Mon Sep 17 00:00:00 2001 From: Yu Li Date: Sun, 3 Dec 2023 18:21:23 -0600 Subject: [PATCH] clean up unnecessary logs --- air_llm/airllm/airllm_qwen.py | 8 ++++---- air_llm/setup.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/air_llm/airllm/airllm_qwen.py b/air_llm/airllm/airllm_qwen.py index 51cd037..189c811 100644 --- a/air_llm/airllm/airllm_qwen.py +++ b/air_llm/airllm/airllm_qwen.py @@ -205,7 +205,7 @@ class AirLLMQWen(GenerationMixin): output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple, CausalLMOutputWithPast]: - print(f"input_ids shape: {input_ids.shape}") + #print(f"input_ids shape: {input_ids.shape}") global total_disk_loading_time, total_gpu_loading_time, total_compression_overhead_time @@ -222,7 +222,7 @@ class AirLLMQWen(GenerationMixin): batch = [input_ids_unit.to(self.running_device).unsqueeze(0) for input_ids_unit in input_ids] n_seq = len(batch[0]) - print(f"batch[0] shape:{batch[0].shape}") + #print(f"batch[0] shape:{batch[0].shape}") #batch_eos = [(input_ids_unit != self.tokenizer.pad_token_id).sum(0) - 1 for input_ids_unit in input_ids] # Create attention mask for the largest input, and position ids to use KV cache @@ -242,7 +242,7 @@ class AirLLMQWen(GenerationMixin): for i, (layer_name, layer) in tqdm(enumerate(zip(self.layer_names, self.layers)), desc=self.running_device, total=len(self.layers)): - print(f"layer:{i} {layer_name}") + #print(f"layer:{i} {layer_name}") load_layer_to_cpu_output = self.load_layer_to_cpu(layer_name, self.profiling_mode) # profile @@ -382,7 +382,7 @@ class AirLLMQWen(GenerationMixin): for i in range(len(kv_cache_list)): # print(f"{i} - {kv_cache_list[i][0].shape}") kv_cache_list[i] = (torch.cat(kv_cache_list[i][0], 0), torch.cat(kv_cache_list[i][1], 0)) - print(f"returning kvcache size: {kv_cache_list[0][0].shape}") + #print(f"returning kvcache size: {kv_cache_list[0][0].shape}") if output_attentions: all_self_attns = all_self_attns[0:-2] diff --git a/air_llm/setup.py b/air_llm/setup.py index 2ff2be0..f8f1ab7 100644 --- a/air_llm/setup.py +++ b/air_llm/setup.py @@ -5,7 +5,7 @@ with open("README.md", "r") as fh: setuptools.setup( name="airllm", - version="2.3.0", + version="2.3.1", author="Gavin Li", author_email="gavinli@animaai.cloud", description="AirLLM allows single 4GB GPU card to run 70B large language models without quantization, distillation or pruning.",