mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-12-01 17:43:15 +00:00
chore: Update Docker build platforms for application and frontend and optimised embedding import
This commit is contained in:
6
.github/workflows/ci.yml
vendored
6
.github/workflows/ci.yml
vendored
@@ -13,7 +13,6 @@ jobs:
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -36,14 +35,13 @@ jobs:
|
||||
username: ${{ github.repository_owner }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
# Runs a single command using the runners shell
|
||||
- name: Build and push Docker images to docker.io and ghcr.io
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
file: './application/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
platforms: linux/amd64,linux/arm64
|
||||
context: ./application
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_USERNAME }}/docsgpt:latest
|
||||
ghcr.io/${{ github.repository_owner }}/docsgpt:latest
|
||||
ghcr.io/${{ github.repository_owner }}/docsgpt:latest
|
||||
4
.github/workflows/cife.yml
vendored
4
.github/workflows/cife.yml
vendored
@@ -8,11 +8,11 @@ on:
|
||||
|
||||
jobs:
|
||||
deploy:
|
||||
if: github.repository == 'arc53/DocsGPT'
|
||||
runs-on: ubuntu-latest
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
@@ -40,7 +40,7 @@ jobs:
|
||||
uses: docker/build-push-action@v4
|
||||
with:
|
||||
file: './frontend/Dockerfile'
|
||||
platforms: linux/amd64
|
||||
platforms: linux/amd64, linux/arm64
|
||||
context: ./frontend
|
||||
push: true
|
||||
tags: |
|
||||
|
||||
@@ -15,7 +15,6 @@ html2text==2020.1.16
|
||||
javalang==0.13.0
|
||||
langchain==0.1.4
|
||||
langchain-openai==0.0.5
|
||||
nltk==3.8.1
|
||||
openapi3_parser==1.1.16
|
||||
pandas==2.2.0
|
||||
pydantic_settings==2.1.0
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from transformers import GPT2TokenizerFast
|
||||
|
||||
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
|
||||
tokenizer.model_max_length = 100000
|
||||
def count_tokens(string):
|
||||
|
||||
return len(tokenizer(string)['input_ids'])
|
||||
@@ -45,10 +45,15 @@ class BaseVectorStore(ABC):
|
||||
cohere_api_key=embeddings_key
|
||||
)
|
||||
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
#model_name="./model/all-mpnet-base-v2",
|
||||
model_kwargs={"device": "cpu"},
|
||||
)
|
||||
if os.path.exists("./model/all-mpnet-base-v2"):
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
model_name="./model/all-mpnet-base-v2",
|
||||
model_kwargs={"device": "cpu"},
|
||||
)
|
||||
else:
|
||||
embedding_instance = embeddings_factory[embeddings_name](
|
||||
model_kwargs={"device": "cpu"},
|
||||
)
|
||||
else:
|
||||
embedding_instance = embeddings_factory[embeddings_name]()
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@ import string
|
||||
import zipfile
|
||||
from urllib.parse import urljoin
|
||||
|
||||
import nltk
|
||||
import requests
|
||||
|
||||
from application.core.settings import settings
|
||||
@@ -14,13 +13,6 @@ from application.parser.open_ai_func import call_openai_api
|
||||
from application.parser.schema.base import Document
|
||||
from application.parser.token_func import group_split
|
||||
|
||||
try:
|
||||
nltk.download("punkt", quiet=True)
|
||||
nltk.download("averaged_perceptron_tagger", quiet=True)
|
||||
except FileExistsError:
|
||||
pass
|
||||
|
||||
|
||||
# Define a function to extract metadata from a given filename.
|
||||
def metadata_from_filename(title):
|
||||
store = "/".join(title.split("/")[1:3])
|
||||
|
||||
Reference in New Issue
Block a user