clean up unnecessary logs

2026-03-07 14:24:44 +00:00 · 2023-12-03 18:21:23 -06:00
parent 916efd5e9b
commit 58e3873431
2 changed files with 5 additions and 5 deletions
--- a/air_llm/airllm/airllm_qwen.py
+++ b/air_llm/airllm/airllm_qwen.py
@@ -205,7 +205,7 @@ class AirLLMQWen(GenerationMixin):
            output_hidden_states: Optional[bool] = None,
            return_dict: Optional[bool] = None,
    ) -> Union[Tuple, CausalLMOutputWithPast]:
-        print(f"input_ids shape: {input_ids.shape}")
+        #print(f"input_ids shape: {input_ids.shape}")

        global total_disk_loading_time, total_gpu_loading_time, total_compression_overhead_time

@@ -222,7 +222,7 @@ class AirLLMQWen(GenerationMixin):

        batch = [input_ids_unit.to(self.running_device).unsqueeze(0) for input_ids_unit in input_ids]
        n_seq = len(batch[0])
-        print(f"batch[0] shape:{batch[0].shape}")
+        #print(f"batch[0] shape:{batch[0].shape}")
        #batch_eos = [(input_ids_unit != self.tokenizer.pad_token_id).sum(0) - 1 for input_ids_unit in input_ids]

        # Create attention mask for the largest input, and position ids to use KV cache
@@ -242,7 +242,7 @@ class AirLLMQWen(GenerationMixin):

            for i, (layer_name, layer) in tqdm(enumerate(zip(self.layer_names, self.layers)), desc=self.running_device,
                                               total=len(self.layers)):
-                print(f"layer:{i} {layer_name}")
+                #print(f"layer:{i} {layer_name}")

                load_layer_to_cpu_output = self.load_layer_to_cpu(layer_name, self.profiling_mode)
                # profile
@@ -382,7 +382,7 @@ class AirLLMQWen(GenerationMixin):
            for i in range(len(kv_cache_list)):
                # print(f"{i} - {kv_cache_list[i][0].shape}")
                kv_cache_list[i] = (torch.cat(kv_cache_list[i][0], 0), torch.cat(kv_cache_list[i][1], 0))
-            print(f"returning kvcache size: {kv_cache_list[0][0].shape}")
+            #print(f"returning kvcache size: {kv_cache_list[0][0].shape}")

        if output_attentions:
            all_self_attns = all_self_attns[0:-2]
--- a/air_llm/setup.py
+++ b/air_llm/setup.py
@@ -5,7 +5,7 @@ with open("README.md", "r") as fh:

 setuptools.setup(
    name="airllm",
-    version="2.3.0",
+    version="2.3.1",
    author="Gavin Li",
    author_email="gavinli@animaai.cloud",
    description="AirLLM allows single 4GB GPU card to run 70B large language models without quantization, distillation or pruning.",