(feat:dir_tree) improvement

This commit is contained in:
ManishMadan2882
2025-08-02 01:48:43 +05:30
parent e1b1558fc9
commit a61e44d175

View File

@@ -196,7 +196,7 @@ class SimpleDirectoryReader(BaseReader):
# Build directory structure if input_dir is provided # Build directory structure if input_dir is provided
if hasattr(self, 'input_dir'): if hasattr(self, 'input_dir'):
self.directory_structure = self._build_directory_structure(self.input_dir) self.directory_structure = self.build_directory_structure(self.input_dir)
logging.info(f"Directory structure built successfully") logging.info(f"Directory structure built successfully")
else: else:
self.directory_structure = {} self.directory_structure = {}
@@ -208,48 +208,47 @@ class SimpleDirectoryReader(BaseReader):
else: else:
return [Document(d) for d in data_list] return [Document(d) for d in data_list]
def _build_directory_structure(self, base_path): def build_directory_structure(self, base_path):
"""Build a dictionary representing the directory structure. """Build a dictionary representing the directory structure.
Args: Args:
base_path: The base path to start building the structure from. base_path: The base path to start building the structure from.
Returns: Returns:
dict: A nested dictionary representing the directory structure. dict: A nested dictionary representing the directory structure.
""" """
structure = {} import mimetypes
base_path = Path(base_path)
def _build_tree(path, current_dict): def build_tree(path):
"""Helper function to recursively build the directory tree."""
result = {}
for item in path.iterdir(): for item in path.iterdir():
if self.exclude_hidden and item.name.startswith('.'):
continue
if item.is_dir(): if item.is_dir():
if self.exclude_hidden and item.name.startswith('.'): subtree = build_tree(item)
continue if subtree:
current_dict[item.name] = {} result[item.name] = subtree
_build_tree(item, current_dict[item.name])
else: else:
if self.exclude_hidden and item.name.startswith('.'):
continue
if self.required_exts is not None and item.suffix not in self.required_exts: if self.required_exts is not None and item.suffix not in self.required_exts:
continue continue
full_path = str(item.resolve()) full_path = str(item.resolve())
file_size_bytes = item.stat().st_size file_size_bytes = item.stat().st_size
import mimetypes
mime_type = mimetypes.guess_type(item.name)[0] or "application/octet-stream" mime_type = mimetypes.guess_type(item.name)[0] or "application/octet-stream"
file_info = {
"type": mime_type,
"size_bytes": file_size_bytes
}
if hasattr(self, 'file_token_counts') and full_path in self.file_token_counts: if hasattr(self, 'file_token_counts') and full_path in self.file_token_counts:
current_dict[item.name] = { file_info["token_count"] = self.file_token_counts[full_path]
"type": mime_type,
"token_count": self.file_token_counts[full_path], result[item.name] = file_info
"size_bytes": file_size_bytes
} return result
else:
current_dict[item.name] = {
"type": mime_type,
"size_bytes": file_size_bytes
}
_build_tree(base_path, structure) return build_tree(Path(base_path))
return structure