From 5b4e517d9dc2949268b9db5076d486c2e895d8e2 Mon Sep 17 00:00:00 2001 From: Archit-Kohli <118905854+Archit-Kohli@users.noreply.github.com> Date: Thu, 5 Oct 2023 11:03:51 +0530 Subject: [PATCH 1/2] Update huggingface.py Added quantization support using bitsandbytes --- application/llm/huggingface.py | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/application/llm/huggingface.py b/application/llm/huggingface.py index e9cc47be..987bfa55 100644 --- a/application/llm/huggingface.py +++ b/application/llm/huggingface.py @@ -2,13 +2,25 @@ from application.llm.base import BaseLLM class HuggingFaceLLM(BaseLLM): - def __init__(self, api_key, llm_name='Arc53/DocsGPT-7B'): + def __init__(self, api_key, llm_name='Arc53/DocsGPT-7B',q=False): global hf - + from langchain.llms import HuggingFacePipeline - from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline - tokenizer = AutoTokenizer.from_pretrained(llm_name) - model = AutoModelForCausalLM.from_pretrained(llm_name) + if q: + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig + tokenizer = AutoTokenizer.from_pretrained(llm_name) + bnb_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_use_double_quant=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16 + ) + model = AutoModelForCausalLM.from_pretrained(llm_name,quantization_config=bnb_config) + else: + from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline + tokenizer = AutoTokenizer.from_pretrained(llm_name) + model = AutoModelForCausalLM.from_pretrained(llm_name) + pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=2000, From 7ce1dc9069bf359ef8b1ad2d8d74e1529b898e6d Mon Sep 17 00:00:00 2001 From: Archit-Kohli <118905854+Archit-Kohli@users.noreply.github.com> Date: Thu, 5 Oct 2023 14:27:15 +0530 Subject: [PATCH 2/2] Update huggingface.py add import torch statement Added import torch statement --- application/llm/huggingface.py | 1 + 1 file changed, 1 insertion(+) diff --git a/application/llm/huggingface.py b/application/llm/huggingface.py index 987bfa55..ef3b1fbc 100644 --- a/application/llm/huggingface.py +++ b/application/llm/huggingface.py @@ -7,6 +7,7 @@ class HuggingFaceLLM(BaseLLM): from langchain.llms import HuggingFacePipeline if q: + import torch from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline, BitsAndBytesConfig tokenizer = AutoTokenizer.from_pretrained(llm_name) bnb_config = BitsAndBytesConfig(