mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 16:43:16 +00:00
Merge pull request #300 from larinam/pytest-introduction
Introduce tests with pytest
This commit is contained in:
27
.github/workflows/pytest.yml
vendored
Normal file
27
.github/workflows/pytest.yml
vendored
Normal file
@@ -0,0 +1,27 @@
|
||||
name: Run python tests with pytest
|
||||
|
||||
on: [push, pull_request]
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: ["3.9", "3.10", "3.11"]
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
pip install pytest
|
||||
cd application
|
||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||
- name: Test with pytest
|
||||
run: |
|
||||
python -m pytest
|
||||
@@ -14,10 +14,10 @@ FROM python:3.10-slim-bullseye
|
||||
COPY --from=builder /usr/local/ /usr/local/
|
||||
|
||||
WORKDIR /app
|
||||
COPY . /app
|
||||
COPY . /app/application
|
||||
ENV FLASK_APP=app.py
|
||||
ENV FLASK_DEBUG=true
|
||||
|
||||
EXPOSE 7091
|
||||
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "wsgi:app"]
|
||||
CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "application.wsgi:app"]
|
||||
|
||||
0
application/__init__.py
Normal file
0
application/__init__.py
Normal file
@@ -37,9 +37,9 @@ from langchain.schema import HumanMessage, AIMessage
|
||||
from pymongo import MongoClient
|
||||
from werkzeug.utils import secure_filename
|
||||
|
||||
from core.settings import settings
|
||||
from error import bad_request
|
||||
from worker import ingest_worker
|
||||
from application.core.settings import settings
|
||||
from application.error import bad_request
|
||||
from application.worker import ingest_worker
|
||||
from bson.objectid import ObjectId
|
||||
|
||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||
@@ -68,19 +68,20 @@ if platform.system() == "Windows":
|
||||
dotenv.load_dotenv()
|
||||
|
||||
# load the prompts
|
||||
with open("prompts/combine_prompt.txt", "r") as f:
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
with open(os.path.join(current_dir, "prompts", "combine_prompt.txt"), "r") as f:
|
||||
template = f.read()
|
||||
|
||||
with open("prompts/combine_prompt_hist.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "combine_prompt_hist.txt"), "r") as f:
|
||||
template_hist = f.read()
|
||||
|
||||
with open("prompts/question_prompt.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "question_prompt.txt"), "r") as f:
|
||||
template_quest = f.read()
|
||||
|
||||
with open("prompts/chat_combine_prompt.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "chat_combine_prompt.txt"), "r") as f:
|
||||
chat_combine_template = f.read()
|
||||
|
||||
with open("prompts/chat_reduce_prompt.txt", "r") as f:
|
||||
with open(os.path.join(current_dir, "prompts", "chat_reduce_prompt.txt"), "r") as f:
|
||||
chat_reduce_template = f.read()
|
||||
|
||||
api_key_set = settings.API_KEY is not None
|
||||
@@ -92,7 +93,7 @@ app.config["CELERY_BROKER_URL"] = settings.CELERY_BROKER_URL
|
||||
app.config["CELERY_RESULT_BACKEND"] = settings.CELERY_RESULT_BACKEND
|
||||
app.config["MONGO_URI"] = settings.MONGO_URI
|
||||
celery = Celery()
|
||||
celery.config_from_object("celeryconfig")
|
||||
celery.config_from_object("application.celeryconfig")
|
||||
mongo = MongoClient(app.config["MONGO_URI"])
|
||||
db = mongo["docsgpt"]
|
||||
vectors_collection = db["vectors"]
|
||||
@@ -129,6 +130,7 @@ def get_vectorstore(data):
|
||||
vectorstore = ""
|
||||
else:
|
||||
vectorstore = ""
|
||||
vectorstore = os.path.join("application", vectorstore)
|
||||
return vectorstore
|
||||
|
||||
|
||||
|
||||
1
application/parser/file/__init__.py
Normal file
1
application/parser/file/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -3,7 +3,7 @@ from abc import abstractmethod
|
||||
from typing import Any, List
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
from parser.schema.base import Document
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
class BaseReader:
|
||||
|
||||
@@ -3,15 +3,15 @@ import logging
|
||||
from pathlib import Path
|
||||
from typing import Callable, Dict, List, Optional, Union
|
||||
|
||||
from parser.file.base import BaseReader
|
||||
from parser.file.base_parser import BaseParser
|
||||
from parser.file.docs_parser import DocxParser, PDFParser
|
||||
from parser.file.epub_parser import EpubParser
|
||||
from parser.file.html_parser import HTMLParser
|
||||
from parser.file.markdown_parser import MarkdownParser
|
||||
from parser.file.rst_parser import RstParser
|
||||
from parser.file.tabular_parser import PandasCSVParser
|
||||
from parser.schema.base import Document
|
||||
from application.parser.file.base import BaseReader
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
from application.parser.file.docs_parser import DocxParser, PDFParser
|
||||
from application.parser.file.epub_parser import EpubParser
|
||||
from application.parser.file.html_parser import HTMLParser
|
||||
from application.parser.file.markdown_parser import MarkdownParser
|
||||
from application.parser.file.rst_parser import RstParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".pdf": PDFParser(),
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for docx, pdf files.
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class PDFParser(BaseParser):
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for epub files.
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class EpubParser(BaseParser):
|
||||
|
||||
@@ -7,7 +7,7 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Dict, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class HTMLParser(BaseParser):
|
||||
|
||||
@@ -8,7 +8,7 @@ from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||
|
||||
import tiktoken
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class MarkdownParser(BaseParser):
|
||||
|
||||
@@ -7,7 +7,7 @@ import re
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class RstParser(BaseParser):
|
||||
|
||||
@@ -6,7 +6,7 @@ Contains parsers for tabular data files.
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Union
|
||||
|
||||
from parser.file.base_parser import BaseParser
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class CSVParser(BaseParser):
|
||||
|
||||
1
application/parser/schema/__init__.py
Normal file
1
application/parser/schema/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from langchain.docstore.document import Document as LCDocument
|
||||
from parser.schema.schema import BaseDocument
|
||||
from application.parser.schema.schema import BaseDocument
|
||||
|
||||
|
||||
@dataclass
|
||||
|
||||
@@ -3,7 +3,7 @@ from math import ceil
|
||||
from typing import List
|
||||
|
||||
import tiktoken
|
||||
from parser.schema.base import Document
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
|
||||
def separate_header_and_body(text):
|
||||
|
||||
@@ -73,6 +73,7 @@ pymongo==4.3.3
|
||||
pyowm==3.3.0
|
||||
PyPDF2==3.0.1
|
||||
PySocks==1.7.1
|
||||
pytest
|
||||
python-dateutil==2.8.2
|
||||
python-dotenv==1.0.0
|
||||
python-jose==3.3.0
|
||||
|
||||
@@ -7,11 +7,11 @@ from urllib.parse import urljoin
|
||||
import nltk
|
||||
import requests
|
||||
|
||||
from core.settings import settings
|
||||
from parser.file.bulk import SimpleDirectoryReader
|
||||
from parser.open_ai_func import call_openai_api
|
||||
from parser.schema.base import Document
|
||||
from parser.token_func import group_split
|
||||
from application.core.settings import settings
|
||||
from application.parser.file.bulk import SimpleDirectoryReader
|
||||
from application.parser.open_ai_func import call_openai_api
|
||||
from application.parser.schema.base import Document
|
||||
from application.parser.token_func import group_split
|
||||
|
||||
try:
|
||||
nltk.download('punkt', quiet=True)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
from app import app
|
||||
from application.app import app
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True, port=7091)
|
||||
|
||||
@@ -27,16 +27,16 @@ services:
|
||||
ports:
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
- ./application/indexes:/app/indexes
|
||||
- ./application/inputs:/app/inputs
|
||||
- ./application/vectors:/app/vectors
|
||||
- ./application/indexes:/app/application/indexes
|
||||
- ./application/inputs:/app/application/inputs
|
||||
- ./application/vectors:/app/application/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A app.celery worker -l INFO
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
|
||||
@@ -22,16 +22,16 @@ services:
|
||||
ports:
|
||||
- "7091:7091"
|
||||
volumes:
|
||||
- ./application/indexes:/app/indexes
|
||||
- ./application/inputs:/app/inputs
|
||||
- ./application/vectors:/app/vectors
|
||||
- ./application/indexes:/app/application/indexes
|
||||
- ./application/inputs:/app/application/inputs
|
||||
- ./application/vectors:/app/application/vectors
|
||||
depends_on:
|
||||
- redis
|
||||
- mongo
|
||||
|
||||
worker:
|
||||
build: ./application
|
||||
command: celery -A app.celery worker -l INFO
|
||||
command: celery -A application.app.celery worker -l INFO
|
||||
environment:
|
||||
- API_KEY=$OPENAI_API_KEY
|
||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||
|
||||
@@ -110,8 +110,6 @@ tenacity==8.2.2
|
||||
threadpoolctl==3.2.0
|
||||
tiktoken==0.4.0
|
||||
tokenizers==0.13.3
|
||||
torch==2.0.1
|
||||
torchvision==0.15.2
|
||||
tqdm==4.65.0
|
||||
transformers==4.31.0
|
||||
typer==0.9.0
|
||||
|
||||
28
tests/test_app.py
Normal file
28
tests/test_app.py
Normal file
@@ -0,0 +1,28 @@
|
||||
from application.app import get_vectorstore
|
||||
import os
|
||||
|
||||
|
||||
# Test cases for get_vectorstore function
|
||||
def test_no_active_docs():
|
||||
data = {}
|
||||
assert get_vectorstore(data) == os.path.join("application", "")
|
||||
|
||||
|
||||
def test_local_default_active_docs():
|
||||
data = {"active_docs": "local/default"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "")
|
||||
|
||||
|
||||
def test_local_non_default_active_docs():
|
||||
data = {"active_docs": "local/something"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "indexes/local/something")
|
||||
|
||||
|
||||
def test_default_active_docs():
|
||||
data = {"active_docs": "default"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "")
|
||||
|
||||
|
||||
def test_complex_active_docs():
|
||||
data = {"active_docs": "local/other/path"}
|
||||
assert get_vectorstore(data) == os.path.join("application", "indexes/local/other/path")
|
||||
Reference in New Issue
Block a user