diff --git a/application/Dockerfile b/application/Dockerfile index 7ea99661..6f70d2f3 100644 --- a/application/Dockerfile +++ b/application/Dockerfile @@ -7,6 +7,10 @@ ENV PATH="/root/.cargo/bin:${PATH}" RUN pip install --upgrade pip && pip install tiktoken==0.5.2 COPY requirements.txt . RUN pip install -r requirements.txt +RUN apt-get install -y wget unzip +RUN wget https://docsgpt.s3.eu-west-1.amazonaws.com/models/embeddings/mpnet-base-v2.zip +RUN unzip mpnet-base-v2.zip -d model +RUN rm mpnet-base-v2.zip FROM python:3.11-slim-bullseye @@ -14,6 +18,8 @@ FROM python:3.11-slim-bullseye COPY --from=builder /usr/local/ /usr/local/ WORKDIR /app +COPY --from=builder /model /app/model + COPY . /app/application ENV FLASK_APP=app.py ENV FLASK_DEBUG=true diff --git a/application/vectorstore/base.py b/application/vectorstore/base.py index e1664c66..ffff49b6 100644 --- a/application/vectorstore/base.py +++ b/application/vectorstore/base.py @@ -44,6 +44,11 @@ class BaseVectorStore(ABC): embedding_instance = embeddings_factory[embeddings_name]( cohere_api_key=embeddings_key ) + elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2": + embedding_instance = embeddings_factory[embeddings_name]( + model_name="./model/all-mpnet-base-v2", + model_kwargs={"device": "cpu"}, + ) else: embedding_instance = embeddings_factory[embeddings_name]()