diff --git a/application/parser/remote/github_loader.py b/application/parser/remote/github_loader.py index 2839f48d..f72d5278 100644 --- a/application/parser/remote/github_loader.py +++ b/application/parser/remote/github_loader.py @@ -1,34 +1,37 @@ -import os import base64 import requests from typing import List from application.parser.remote.base import BaseRemote -from application.parser.schema.base import Document +from langchain_core.documents import Document class GitHubLoader(BaseRemote): - def __init__(self, access_token: str): - self.access_token = access_token + def __init__(self): + self.access_token = None + self.headers = { + "Authorization": f"token {self.access_token}" + } if self.access_token else {} + return def fetch_file_content(self, repo_url: str, file_path: str) -> str: url = f"https://api.github.com/repos/{repo_url}/contents/{file_path}" - headers = { - "Authorization": f"token {self.access_token}", - "Accept": "application/vnd.github.v3.raw" - } - response = requests.get(url, headers=headers) - response.raise_for_status() - content = response.json() - if content.get("encoding") == "base64": - return base64.b64decode(content["content"]).decode("utf-8") - return content["content"] + response = requests.get(url, headers=self.headers) + + if response.status_code == 200: + content = response.json() + if content.get("encoding") == "base64": + try: + decoded_content = base64.b64decode(content["content"]).decode("utf-8") + return decoded_content + except Exception as e: + raise + else: + return content["content"] + else: + response.raise_for_status() def fetch_repo_files(self, repo_url: str, path: str = "") -> List[str]: url = f"https://api.github.com/repos/{repo_url}/contents/{path}" - headers = { - "Authorization": f"token {self.access_token}", - "Accept": "application/vnd.github.v3.raw" - } - response = requests.get(url, headers=headers) + response = requests.get(url, headers=self.headers) response.raise_for_status() contents = response.json() files = [] @@ -45,5 +48,5 @@ class GitHubLoader(BaseRemote): documents = [] for file_path in files: content = self.fetch_file_content(repo_name, file_path) - documents.append(Document(content=content, metadata={"file_path": file_path})) + documents.append(Document(page_content=content, metadata={"file_path": file_path})) return documents diff --git a/frontend/src/locale/en.json b/frontend/src/locale/en.json index fa2cac3c..c9b599bf 100644 --- a/frontend/src/locale/en.json +++ b/frontend/src/locale/en.json @@ -85,6 +85,7 @@ "train": "Train", "link": "Link", "urlLink": "URL Link", + "repoUrl": "Repository URL", "reddit": { "id": "Client ID", "secret": "Client Secret", diff --git a/frontend/src/locale/es.json b/frontend/src/locale/es.json index 7b7dbec0..98b38d7c 100644 --- a/frontend/src/locale/es.json +++ b/frontend/src/locale/es.json @@ -85,6 +85,7 @@ "train": "Entrenar", "link": "Enlace", "urlLink": "Enlace URL", + "repoUrl": "URL del Repositorio", "reddit": { "id": "ID de Cliente", "secret": "Secreto de Cliente", diff --git a/frontend/src/locale/jp.json b/frontend/src/locale/jp.json index fa61c291..b34cc5e5 100644 --- a/frontend/src/locale/jp.json +++ b/frontend/src/locale/jp.json @@ -85,6 +85,7 @@ "train": "トレーニング", "link": "リンク", "urlLink": "URLリンク", + "repoUrl": "リポジトリURL", "reddit": { "id": "クライアントID", "secret": "クライアントシークレット", diff --git a/frontend/src/locale/zh.json b/frontend/src/locale/zh.json index 080c4ee3..7decdefe 100644 --- a/frontend/src/locale/zh.json +++ b/frontend/src/locale/zh.json @@ -85,6 +85,7 @@ "train": "训练", "link": "链接", "urlLink": "URL 链接", + "repoUrl": "存储库 URL", "reddit": { "id": "客户端 ID", "secret": "客户端密钥",