mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 00:23:17 +00:00
(feat:dir_tree) improvement
This commit is contained in:
@@ -196,7 +196,7 @@ class SimpleDirectoryReader(BaseReader):
|
|||||||
|
|
||||||
# Build directory structure if input_dir is provided
|
# Build directory structure if input_dir is provided
|
||||||
if hasattr(self, 'input_dir'):
|
if hasattr(self, 'input_dir'):
|
||||||
self.directory_structure = self._build_directory_structure(self.input_dir)
|
self.directory_structure = self.build_directory_structure(self.input_dir)
|
||||||
logging.info(f"Directory structure built successfully")
|
logging.info(f"Directory structure built successfully")
|
||||||
else:
|
else:
|
||||||
self.directory_structure = {}
|
self.directory_structure = {}
|
||||||
@@ -208,48 +208,47 @@ class SimpleDirectoryReader(BaseReader):
|
|||||||
else:
|
else:
|
||||||
return [Document(d) for d in data_list]
|
return [Document(d) for d in data_list]
|
||||||
|
|
||||||
def _build_directory_structure(self, base_path):
|
def build_directory_structure(self, base_path):
|
||||||
"""Build a dictionary representing the directory structure.
|
"""Build a dictionary representing the directory structure.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
base_path: The base path to start building the structure from.
|
base_path: The base path to start building the structure from.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
dict: A nested dictionary representing the directory structure.
|
dict: A nested dictionary representing the directory structure.
|
||||||
"""
|
"""
|
||||||
structure = {}
|
import mimetypes
|
||||||
base_path = Path(base_path)
|
|
||||||
|
|
||||||
def _build_tree(path, current_dict):
|
def build_tree(path):
|
||||||
|
"""Helper function to recursively build the directory tree."""
|
||||||
|
result = {}
|
||||||
|
|
||||||
for item in path.iterdir():
|
for item in path.iterdir():
|
||||||
|
if self.exclude_hidden and item.name.startswith('.'):
|
||||||
|
continue
|
||||||
|
|
||||||
if item.is_dir():
|
if item.is_dir():
|
||||||
if self.exclude_hidden and item.name.startswith('.'):
|
subtree = build_tree(item)
|
||||||
continue
|
if subtree:
|
||||||
current_dict[item.name] = {}
|
result[item.name] = subtree
|
||||||
_build_tree(item, current_dict[item.name])
|
|
||||||
else:
|
else:
|
||||||
if self.exclude_hidden and item.name.startswith('.'):
|
|
||||||
continue
|
|
||||||
if self.required_exts is not None and item.suffix not in self.required_exts:
|
if self.required_exts is not None and item.suffix not in self.required_exts:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
full_path = str(item.resolve())
|
full_path = str(item.resolve())
|
||||||
file_size_bytes = item.stat().st_size
|
file_size_bytes = item.stat().st_size
|
||||||
|
|
||||||
import mimetypes
|
|
||||||
mime_type = mimetypes.guess_type(item.name)[0] or "application/octet-stream"
|
mime_type = mimetypes.guess_type(item.name)[0] or "application/octet-stream"
|
||||||
|
|
||||||
|
file_info = {
|
||||||
|
"type": mime_type,
|
||||||
|
"size_bytes": file_size_bytes
|
||||||
|
}
|
||||||
|
|
||||||
if hasattr(self, 'file_token_counts') and full_path in self.file_token_counts:
|
if hasattr(self, 'file_token_counts') and full_path in self.file_token_counts:
|
||||||
current_dict[item.name] = {
|
file_info["token_count"] = self.file_token_counts[full_path]
|
||||||
"type": mime_type,
|
|
||||||
"token_count": self.file_token_counts[full_path],
|
result[item.name] = file_info
|
||||||
"size_bytes": file_size_bytes
|
|
||||||
}
|
return result
|
||||||
else:
|
|
||||||
current_dict[item.name] = {
|
|
||||||
"type": mime_type,
|
|
||||||
"size_bytes": file_size_bytes
|
|
||||||
}
|
|
||||||
|
|
||||||
_build_tree(base_path, structure)
|
return build_tree(Path(base_path))
|
||||||
return structure
|
|
||||||
Reference in New Issue
Block a user