mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
fix packaging and imports and introduce tests with pytest.
still issues with celery worker.
This commit is contained in:
28
.github/workflows/pytest.yml
vendored
Normal file
28
.github/workflows/pytest.yml
vendored
Normal file
@@ -0,0 +1,28 @@
|
|||||||
|
name: Run python tests with pytest
|
||||||
|
|
||||||
|
on: [push, pull_request]
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
build:
|
||||||
|
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
strategy:
|
||||||
|
matrix:
|
||||||
|
python-version: ["3.9", "3.10", "3.11"]
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v3
|
||||||
|
- name: Set up Python ${{ matrix.python-version }}
|
||||||
|
uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.python-version }}
|
||||||
|
- name: Install dependencies
|
||||||
|
run: |
|
||||||
|
python -m pip install --upgrade pip
|
||||||
|
pip install pytest
|
||||||
|
cd application
|
||||||
|
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||||
|
- name: Test with pytest
|
||||||
|
run: |
|
||||||
|
cd application
|
||||||
|
pytest
|
||||||
0
application/__init__.py
Normal file
0
application/__init__.py
Normal file
@@ -37,9 +37,9 @@ from langchain.schema import HumanMessage, AIMessage
|
|||||||
from pymongo import MongoClient
|
from pymongo import MongoClient
|
||||||
from werkzeug.utils import secure_filename
|
from werkzeug.utils import secure_filename
|
||||||
|
|
||||||
from core.settings import settings
|
from application.core.settings import settings
|
||||||
from error import bad_request
|
from application.error import bad_request
|
||||||
from worker import ingest_worker
|
from application.worker import ingest_worker
|
||||||
from bson.objectid import ObjectId
|
from bson.objectid import ObjectId
|
||||||
|
|
||||||
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
# os.environ["LANGCHAIN_HANDLER"] = "langchain"
|
||||||
|
|||||||
1
application/parser/file/__init__.py
Normal file
1
application/parser/file/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
@@ -3,7 +3,7 @@ from abc import abstractmethod
|
|||||||
from typing import Any, List
|
from typing import Any, List
|
||||||
|
|
||||||
from langchain.docstore.document import Document as LCDocument
|
from langchain.docstore.document import Document as LCDocument
|
||||||
from parser.schema.base import Document
|
from application.parser.schema.base import Document
|
||||||
|
|
||||||
|
|
||||||
class BaseReader:
|
class BaseReader:
|
||||||
|
|||||||
@@ -3,15 +3,15 @@ import logging
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Callable, Dict, List, Optional, Union
|
from typing import Callable, Dict, List, Optional, Union
|
||||||
|
|
||||||
from parser.file.base import BaseReader
|
from application.parser.file.base import BaseReader
|
||||||
from parser.file.base_parser import BaseParser
|
from application.parser.file.base_parser import BaseParser
|
||||||
from parser.file.docs_parser import DocxParser, PDFParser
|
from application.parser.file.docs_parser import DocxParser, PDFParser
|
||||||
from parser.file.epub_parser import EpubParser
|
from application.parser.file.epub_parser import EpubParser
|
||||||
from parser.file.html_parser import HTMLParser
|
from application.parser.file.html_parser import HTMLParser
|
||||||
from parser.file.markdown_parser import MarkdownParser
|
from application.parser.file.markdown_parser import MarkdownParser
|
||||||
from parser.file.rst_parser import RstParser
|
from application.parser.file.rst_parser import RstParser
|
||||||
from parser.file.tabular_parser import PandasCSVParser
|
from application.parser.file.tabular_parser import PandasCSVParser
|
||||||
from parser.schema.base import Document
|
from application.parser.schema.base import Document
|
||||||
|
|
||||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||||
".pdf": PDFParser(),
|
".pdf": PDFParser(),
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ Contains parsers for docx, pdf files.
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
from parser.file.base_parser import BaseParser
|
from application.parser.file.base_parser import BaseParser
|
||||||
|
|
||||||
|
|
||||||
class PDFParser(BaseParser):
|
class PDFParser(BaseParser):
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ Contains parsers for epub files.
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
|
||||||
from parser.file.base_parser import BaseParser
|
from application.parser.file.base_parser import BaseParser
|
||||||
|
|
||||||
|
|
||||||
class EpubParser(BaseParser):
|
class EpubParser(BaseParser):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Union
|
from typing import Dict, Union
|
||||||
|
|
||||||
from parser.file.base_parser import BaseParser
|
from application.parser.file.base_parser import BaseParser
|
||||||
|
|
||||||
|
|
||||||
class HTMLParser(BaseParser):
|
class HTMLParser(BaseParser):
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ from pathlib import Path
|
|||||||
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
||||||
|
|
||||||
import tiktoken
|
import tiktoken
|
||||||
from parser.file.base_parser import BaseParser
|
from application.parser.file.base_parser import BaseParser
|
||||||
|
|
||||||
|
|
||||||
class MarkdownParser(BaseParser):
|
class MarkdownParser(BaseParser):
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Optional, Tuple, Union
|
from typing import Any, Dict, List, Optional, Tuple, Union
|
||||||
|
|
||||||
from parser.file.base_parser import BaseParser
|
from application.parser.file.base_parser import BaseParser
|
||||||
|
|
||||||
|
|
||||||
class RstParser(BaseParser):
|
class RstParser(BaseParser):
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ Contains parsers for tabular data files.
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, Dict, List, Union
|
from typing import Any, Dict, List, Union
|
||||||
|
|
||||||
from parser.file.base_parser import BaseParser
|
from application.parser.file.base_parser import BaseParser
|
||||||
|
|
||||||
|
|
||||||
class CSVParser(BaseParser):
|
class CSVParser(BaseParser):
|
||||||
|
|||||||
1
application/parser/schema/__init__.py
Normal file
1
application/parser/schema/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
from langchain.docstore.document import Document as LCDocument
|
from langchain.docstore.document import Document as LCDocument
|
||||||
from parser.schema.schema import BaseDocument
|
from application.parser.schema.schema import BaseDocument
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from math import ceil
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import tiktoken
|
import tiktoken
|
||||||
from parser.schema.base import Document
|
from application.parser.schema.base import Document
|
||||||
|
|
||||||
|
|
||||||
def separate_header_and_body(text):
|
def separate_header_and_body(text):
|
||||||
|
|||||||
@@ -73,6 +73,7 @@ pymongo==4.3.3
|
|||||||
pyowm==3.3.0
|
pyowm==3.3.0
|
||||||
PyPDF2==3.0.1
|
PyPDF2==3.0.1
|
||||||
PySocks==1.7.1
|
PySocks==1.7.1
|
||||||
|
pytest
|
||||||
python-dateutil==2.8.2
|
python-dateutil==2.8.2
|
||||||
python-dotenv==1.0.0
|
python-dotenv==1.0.0
|
||||||
python-jose==3.3.0
|
python-jose==3.3.0
|
||||||
|
|||||||
0
application/tests/__init__.py
Normal file
0
application/tests/__init__.py
Normal file
37
application/tests/test_app.py
Normal file
37
application/tests/test_app.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
from application.app import get_vectorstore
|
||||||
|
|
||||||
|
|
||||||
|
# Test cases for get_vectorstore function
|
||||||
|
def test_no_active_docs():
|
||||||
|
data = {}
|
||||||
|
assert get_vectorstore(data) == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_active_docs():
|
||||||
|
data = {"active_docs": "default"}
|
||||||
|
assert get_vectorstore(data) == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_local_default_active_docs():
|
||||||
|
data = {"active_docs": "local/default"}
|
||||||
|
assert get_vectorstore(data) == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_local_custom_active_docs():
|
||||||
|
data = {"active_docs": "local/custom_index"}
|
||||||
|
assert get_vectorstore(data) == "indexes/local/custom_index"
|
||||||
|
|
||||||
|
|
||||||
|
def test_remote_active_docs():
|
||||||
|
data = {"active_docs": "remote_index"}
|
||||||
|
assert get_vectorstore(data) == "vectors/remote_index"
|
||||||
|
|
||||||
|
|
||||||
|
def test_active_docs_not_in_data():
|
||||||
|
data = {"other_key": "value"}
|
||||||
|
assert get_vectorstore(data) == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_multiple_slashes_in_active_docs():
|
||||||
|
data = {"active_docs": "local/some/other/index"}
|
||||||
|
assert get_vectorstore(data) == "indexes/local/some/other/index"
|
||||||
@@ -7,11 +7,11 @@ from urllib.parse import urljoin
|
|||||||
import nltk
|
import nltk
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from core.settings import settings
|
from application.core.settings import settings
|
||||||
from parser.file.bulk import SimpleDirectoryReader
|
from application.parser.file.bulk import SimpleDirectoryReader
|
||||||
from parser.open_ai_func import call_openai_api
|
from application.parser.open_ai_func import call_openai_api
|
||||||
from parser.schema.base import Document
|
from application.parser.schema.base import Document
|
||||||
from parser.token_func import group_split
|
from application.parser.token_func import group_split
|
||||||
|
|
||||||
try:
|
try:
|
||||||
nltk.download('punkt', quiet=True)
|
nltk.download('punkt', quiet=True)
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
from app import app
|
from application.app import app
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
app.run(debug=True, port=7091)
|
app.run(debug=True, port=7091)
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ services:
|
|||||||
|
|
||||||
backend:
|
backend:
|
||||||
build: ./application
|
build: ./application
|
||||||
|
working_dir: /application
|
||||||
environment:
|
environment:
|
||||||
- API_KEY=$OPENAI_API_KEY
|
- API_KEY=$OPENAI_API_KEY
|
||||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||||
@@ -27,16 +28,17 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "7091:7091"
|
- "7091:7091"
|
||||||
volumes:
|
volumes:
|
||||||
- ./application/indexes:/app/indexes
|
- ./application/indexes:/application/indexes
|
||||||
- ./application/inputs:/app/inputs
|
- ./application/inputs:/application/inputs
|
||||||
- ./application/vectors:/app/vectors
|
- ./application/vectors:/application/vectors
|
||||||
depends_on:
|
depends_on:
|
||||||
- redis
|
- redis
|
||||||
- mongo
|
- mongo
|
||||||
|
|
||||||
worker:
|
worker:
|
||||||
build: ./application
|
build: ./application
|
||||||
command: celery -A app.celery worker -l INFO
|
working_dir: /application
|
||||||
|
command: celery -A application.app.celery worker -l INFO
|
||||||
environment:
|
environment:
|
||||||
- API_KEY=$OPENAI_API_KEY
|
- API_KEY=$OPENAI_API_KEY
|
||||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||||
|
|||||||
@@ -13,6 +13,7 @@ services:
|
|||||||
|
|
||||||
backend:
|
backend:
|
||||||
build: ./application
|
build: ./application
|
||||||
|
working_dir: /application
|
||||||
environment:
|
environment:
|
||||||
- API_KEY=$OPENAI_API_KEY
|
- API_KEY=$OPENAI_API_KEY
|
||||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||||
@@ -22,16 +23,17 @@ services:
|
|||||||
ports:
|
ports:
|
||||||
- "7091:7091"
|
- "7091:7091"
|
||||||
volumes:
|
volumes:
|
||||||
- ./application/indexes:/app/indexes
|
- ./application/indexes:/application/indexes
|
||||||
- ./application/inputs:/app/inputs
|
- ./application/inputs:/application/inputs
|
||||||
- ./application/vectors:/app/vectors
|
- ./application/vectors:/application/vectors
|
||||||
depends_on:
|
depends_on:
|
||||||
- redis
|
- redis
|
||||||
- mongo
|
- mongo
|
||||||
|
|
||||||
worker:
|
worker:
|
||||||
build: ./application
|
build: ./application
|
||||||
command: celery -A app.celery worker -l INFO
|
working_dir: /application
|
||||||
|
command: celery -A application.app.celery worker -l INFO
|
||||||
environment:
|
environment:
|
||||||
- API_KEY=$OPENAI_API_KEY
|
- API_KEY=$OPENAI_API_KEY
|
||||||
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
- EMBEDDINGS_KEY=$OPENAI_API_KEY
|
||||||
|
|||||||
@@ -110,8 +110,6 @@ tenacity==8.2.2
|
|||||||
threadpoolctl==3.2.0
|
threadpoolctl==3.2.0
|
||||||
tiktoken==0.4.0
|
tiktoken==0.4.0
|
||||||
tokenizers==0.13.3
|
tokenizers==0.13.3
|
||||||
torch==2.0.1
|
|
||||||
torchvision==0.15.2
|
|
||||||
tqdm==4.65.0
|
tqdm==4.65.0
|
||||||
transformers==4.31.0
|
transformers==4.31.0
|
||||||
typer==0.9.0
|
typer==0.9.0
|
||||||
|
|||||||
Reference in New Issue
Block a user