diff --git a/anima_100k/README.md b/anima_100k/README.md
index 9da5004..ede21f0 100644
--- a/anima_100k/README.md
+++ b/anima_100k/README.md
@@ -184,7 +184,8 @@ inputs['attention_mask'] = inputs['attention_mask'].cuda()
 
 # Generate
 generate_ids = model.generate(**inputs, max_new_tokens=30,
-                       only_last_logit=True,
+                       only_last_logit=True, # to save memory
+                       use_cache=False, # when run into OOM, enable this can save memory
                        xentropy=True)
 output = tokenizer.batch_decode(generate_ids, 
                                 skip_special_tokens=True,