mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
Merge branch 'main' of https://github.com/ManishMadan2882/docsgpt
This commit is contained in:
@@ -141,17 +141,17 @@ def save_conversation(conversation_id, question, response, source_log_docs, llm)
|
||||
"role": "assistant",
|
||||
"content": "Summarise following conversation in no more than 3 "
|
||||
"words, respond ONLY with the summary, use the same "
|
||||
"language as the system \n\nUser: "
|
||||
+ question
|
||||
+ "\n\n"
|
||||
+ "AI: "
|
||||
+ response,
|
||||
"language as the system",
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": "Summarise following conversation in no more than 3 words, "
|
||||
"respond ONLY with the summary, use the same language as the "
|
||||
"system",
|
||||
"system \n\nUser: "
|
||||
+ question
|
||||
+ "\n\n"
|
||||
+ "AI: "
|
||||
+ response,
|
||||
},
|
||||
]
|
||||
|
||||
|
||||
@@ -339,6 +339,9 @@ class UploadFile(Resource):
|
||||
".json",
|
||||
".xlsx",
|
||||
".pptx",
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
],
|
||||
job_name,
|
||||
final_filename,
|
||||
@@ -365,6 +368,9 @@ class UploadFile(Resource):
|
||||
".json",
|
||||
".xlsx",
|
||||
".pptx",
|
||||
".png",
|
||||
".jpg",
|
||||
".jpeg",
|
||||
],
|
||||
job_name,
|
||||
final_filename,
|
||||
|
||||
@@ -18,6 +18,7 @@ class Settings(BaseSettings):
|
||||
DEFAULT_MAX_HISTORY: int = 150
|
||||
MODEL_TOKEN_LIMITS: dict = {"gpt-3.5-turbo": 4096, "claude-2": 1e5}
|
||||
UPLOAD_FOLDER: str = "inputs"
|
||||
PARSE_PDF_AS_IMAGE: bool = False
|
||||
VECTOR_STORE: str = "faiss" # "faiss" or "elasticsearch" or "qdrant" or "milvus" or "lancedb"
|
||||
RETRIEVERS_ENABLED: list = ["classic_rag", "duckduck_search"] # also brave_search
|
||||
|
||||
|
||||
@@ -9,35 +9,25 @@ class DocsGPTAPILLM(BaseLLM):
|
||||
super().__init__(*args, **kwargs)
|
||||
self.api_key = api_key
|
||||
self.user_api_key = user_api_key
|
||||
self.endpoint = "https://llm.docsgpt.co.uk"
|
||||
self.endpoint = "https://llm.arc53.com"
|
||||
|
||||
def _raw_gen(self, baseself, model, messages, stream=False, *args, **kwargs):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
response = requests.post(
|
||||
f"{self.endpoint}/answer", json={"prompt": prompt, "max_new_tokens": 30}
|
||||
f"{self.endpoint}/answer", json={"messages": messages, "max_new_tokens": 30}
|
||||
)
|
||||
response_clean = response.json()["a"].replace("###", "")
|
||||
|
||||
return response_clean
|
||||
|
||||
def _raw_gen_stream(self, baseself, model, messages, stream=True, *args, **kwargs):
|
||||
context = messages[0]["content"]
|
||||
user_question = messages[-1]["content"]
|
||||
prompt = f"### Instruction \n {user_question} \n ### Context \n {context} \n ### Answer \n"
|
||||
|
||||
# send prompt to endpoint /stream
|
||||
response = requests.post(
|
||||
f"{self.endpoint}/stream",
|
||||
json={"prompt": prompt, "max_new_tokens": 256},
|
||||
json={"messages": messages, "max_new_tokens": 256},
|
||||
stream=True,
|
||||
)
|
||||
|
||||
for line in response.iter_lines():
|
||||
if line:
|
||||
# data = json.loads(line)
|
||||
data_str = line.decode("utf-8")
|
||||
if data_str.startswith("data: "):
|
||||
data = json.loads(data_str[6:])
|
||||
|
||||
@@ -13,6 +13,7 @@ from application.parser.file.rst_parser import RstParser
|
||||
from application.parser.file.tabular_parser import PandasCSVParser,ExcelParser
|
||||
from application.parser.file.json_parser import JSONParser
|
||||
from application.parser.file.pptx_parser import PPTXParser
|
||||
from application.parser.file.image_parser import ImageParser
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
@@ -27,6 +28,9 @@ DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
|
||||
".mdx": MarkdownParser(),
|
||||
".json":JSONParser(),
|
||||
".pptx":PPTXParser(),
|
||||
".png": ImageParser(),
|
||||
".jpg": ImageParser(),
|
||||
".jpeg": ImageParser(),
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,8 @@ from pathlib import Path
|
||||
from typing import Dict
|
||||
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
from application.core.settings import settings
|
||||
import requests
|
||||
|
||||
class PDFParser(BaseParser):
|
||||
"""PDF parser."""
|
||||
@@ -18,6 +19,15 @@ class PDFParser(BaseParser):
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> str:
|
||||
"""Parse file."""
|
||||
if settings.PARSE_PDF_AS_IMAGE:
|
||||
doc2md_service = "https://llm.arc53.com/doc2md"
|
||||
# alternatively you can use local vision capable LLM
|
||||
with open(file, "rb") as file_loaded:
|
||||
files = {'file': file_loaded}
|
||||
response = requests.post(doc2md_service, files=files)
|
||||
data = response.json()["markdown"]
|
||||
return data
|
||||
|
||||
try:
|
||||
import PyPDF2
|
||||
except ImportError:
|
||||
|
||||
27
application/parser/file/image_parser.py
Normal file
27
application/parser/file/image_parser.py
Normal file
@@ -0,0 +1,27 @@
|
||||
"""Image parser.
|
||||
|
||||
Contains parser for .png, .jpg, .jpeg files.
|
||||
|
||||
"""
|
||||
from pathlib import Path
|
||||
import requests
|
||||
from typing import Dict, Union
|
||||
|
||||
from application.parser.file.base_parser import BaseParser
|
||||
|
||||
|
||||
class ImageParser(BaseParser):
|
||||
"""Image parser."""
|
||||
|
||||
def _init_parser(self) -> Dict:
|
||||
"""Init parser."""
|
||||
return {}
|
||||
|
||||
def parse_file(self, file: Path, errors: str = "ignore") -> Union[str, list[str]]:
|
||||
doc2md_service = "https://llm.arc53.com/doc2md"
|
||||
# alternatively you can use local vision capable LLM
|
||||
with open(file, "rb") as file_loaded:
|
||||
files = {'file': file_loaded}
|
||||
response = requests.post(doc2md_service, files=files)
|
||||
data = response.json()["markdown"]
|
||||
return data
|
||||
@@ -332,6 +332,9 @@ function Upload({
|
||||
],
|
||||
'application/vnd.openxmlformats-officedocument.presentationml.presentation':
|
||||
['.pptx'],
|
||||
'image/png': ['.png'],
|
||||
'image/jpeg': ['.jpeg'],
|
||||
'image/jpg': ['.jpg'],
|
||||
},
|
||||
});
|
||||
|
||||
|
||||
Reference in New Issue
Block a user