mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 16:43:16 +00:00
This commit is contained in:
@@ -0,0 +1,49 @@
|
||||
import os
|
||||
import base64
|
||||
import requests
|
||||
from typing import List
|
||||
from application.parser.remote.base import BaseRemote
|
||||
from application.parser.schema.base import Document
|
||||
|
||||
class GitHubLoader(BaseRemote):
|
||||
def __init__(self, access_token: str):
|
||||
self.access_token = access_token
|
||||
|
||||
def fetch_file_content(self, repo_url: str, file_path: str) -> str:
|
||||
url = f"https://api.github.com/repos/{repo_url}/contents/{file_path}"
|
||||
headers = {
|
||||
"Authorization": f"token {self.access_token}",
|
||||
"Accept": "application/vnd.github.v3.raw"
|
||||
}
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
content = response.json()
|
||||
if content.get("encoding") == "base64":
|
||||
return base64.b64decode(content["content"]).decode("utf-8")
|
||||
return content["content"]
|
||||
|
||||
def fetch_repo_files(self, repo_url: str, path: str = "") -> List[str]:
|
||||
url = f"https://api.github.com/repos/{repo_url}/contents/{path}"
|
||||
headers = {
|
||||
"Authorization": f"token {self.access_token}",
|
||||
"Accept": "application/vnd.github.v3.raw"
|
||||
}
|
||||
response = requests.get(url, headers=headers)
|
||||
response.raise_for_status()
|
||||
contents = response.json()
|
||||
files = []
|
||||
for item in contents:
|
||||
if item["type"] == "file":
|
||||
files.append(item["path"])
|
||||
elif item["type"] == "dir":
|
||||
files.extend(self.fetch_repo_files(repo_url, item["path"]))
|
||||
return files
|
||||
|
||||
def load_data(self, repo_url: str) -> List[Document]:
|
||||
repo_name = repo_url.split("github.com/")[-1]
|
||||
files = self.fetch_repo_files(repo_name)
|
||||
documents = []
|
||||
for file_path in files:
|
||||
content = self.fetch_file_content(repo_name, file_path)
|
||||
documents.append(Document(content=content, metadata={"file_path": file_path}))
|
||||
return documents
|
||||
|
||||
Reference in New Issue
Block a user