chore: Update Docker build platforms for application and frontend and optimised embedding import

This commit is contained in:
Alex
2024-05-18 12:10:24 +01:00
parent 4534cafd3f
commit 5c8133a810
6 changed files with 14 additions and 20 deletions

View File

@@ -1,6 +1,6 @@
from transformers import GPT2TokenizerFast
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
tokenizer.model_max_length = 100000
def count_tokens(string):
return len(tokenizer(string)['input_ids'])