diff --git a/anima_100k/README.md b/anima_100k/README.md index 9da5004..ede21f0 100644 --- a/anima_100k/README.md +++ b/anima_100k/README.md @@ -184,7 +184,8 @@ inputs['attention_mask'] = inputs['attention_mask'].cuda() # Generate generate_ids = model.generate(**inputs, max_new_tokens=30, - only_last_logit=True, + only_last_logit=True, # to save memory + use_cache=False, # when run into OOM, enable this can save memory xentropy=True) output = tokenizer.batch_decode(generate_ids, skip_special_tokens=True,