From c7409337821724d9380c0bc39adc90fd2c14b106 Mon Sep 17 00:00:00 2001
From: Pavel <32868631+pabik@users.noreply.github.com>
Date: Sun, 30 Mar 2025 11:45:33 +0100
Subject: [PATCH] Delete docsgpt_scanner.py

---
 docsgpt_scanner.py | 636 ---------------------------------------------
 1 file changed, 636 deletions(-)
 delete mode 100644 docsgpt_scanner.py

diff --git a/docsgpt_scanner.py b/docsgpt_scanner.py
deleted file mode 100644
index 98a8e0f6..00000000
--- a/docsgpt_scanner.py
+++ /dev/null
@@ -1,636 +0,0 @@
-import os
-import argparse
-from pathlib import Path
-import datetime
-import re
-import json
-
-class DocsGPTDocumentationGenerator:
-    def __init__(self, root_dir, config=None):
-        """
-        Initialize the documentation generator with customized settings for DocsGPT.
-        
-        Args:
-            root_dir (str): The path to the root directory of the project.
-            config (dict, optional): Configuration overrides.
-        """
-        self.root_dir = os.path.abspath(root_dir)
-        
-        # Default configuration optimized for DocsGPT
-        self.config = {
-            # Directories to exclude completely
-            'excluded_dirs': [
-                '__pycache__', 'venv', '.venv', 'node_modules', '.git', '.idea', '.vscode',
-                'dist', 'build', 'model', 'temp', 'indexes', 'model', 'postgres_data',
-                'logs', 'out', 'vectors'
-            ],
-            
-            # File patterns to exclude
-            'excluded_patterns': [
-                '*.pyc', '*.bin', '*.faiss', '*.pkl', '*.so', '*.o', 
-                '*.jpg', '*.jpeg', '*.png', '*.gif', '*.webp', '*.ico', '*.lock',
-                '*.pdf'  # Exclude PDFs as they're just data in your project
-            ],
-            
-            # Files that should always be included despite other exclusions
-            'always_include': [
-                'README.md', 'LICENSE', 'CONTRIBUTING.md', 'requirements.txt',
-                'package.json', 'Dockerfile', 'docker-compose*.yaml', 'docker-compose*.yml'
-            ],
-            
-            # Core code directories to focus on
-            'core_dirs': [
-                'application', 'frontend/src', 'extensions', 'docs'
-            ],
-            
-            # File types to include content in documentation
-            'content_file_types': [
-                '.py', '.js', '.jsx', '.ts', '.tsx', '.md', '.txt',
-                '.yaml', '.yml', '.json', '.dockerfile'
-            ],
-            
-            # Max file size to include content (100KB)
-            'max_content_size': 100 * 1024,
-            
-            # Max number of files to include full content for each directory
-            'max_files_per_dir': 5,
-            
-            # Max characters to show in file previews
-            'preview_length': 500
-        }
-        
-        # Override config with provided values
-        if config:
-            self.config.update(config)
-    
-    def should_exclude(self, path):
-        """
-        Determine if a path should be excluded from the documentation.
-        
-        Args:
-            path (str): The path to check.
-            
-        Returns:
-            bool: True if the path should be excluded, False otherwise.
-        """
-        name = os.path.basename(path)
-        
-        # Always include certain files
-        for pattern in self.config['always_include']:
-            if self._match_pattern(name, pattern):
-                return False
-        
-        # Check if it's in excluded directories
-        parts = Path(path).relative_to(self.root_dir).parts
-        for part in parts:
-            for excluded_dir in self.config['excluded_dirs']:
-                if self._match_pattern(part, excluded_dir):
-                    return True
-        
-        # Check excluded patterns
-        for pattern in self.config['excluded_patterns']:
-            if self._match_pattern(name, pattern):
-                return True
-        
-        # Exclude hidden files
-        if name.startswith('.') and name not in ['.env.example']:
-            return True
-        
-        return False
-    
-    def _match_pattern(self, name, pattern):
-        """
-        Check if a name matches a pattern with simple wildcard support.
-        
-        Args:
-            name (str): The name to check.
-            pattern (str): The pattern to match against.
-            
-        Returns:
-            bool: True if the name matches the pattern, False otherwise.
-        """
-        if pattern.startswith('*.'):
-            # Extension pattern
-            return name.endswith(pattern[1:])
-        elif '*' in pattern:
-            # Convert to regex pattern
-            regex_pattern = pattern.replace('.', r'\.').replace('*', '.*')
-            return bool(re.match(f"^{regex_pattern}$", name))
-        else:
-            # Exact match
-            return name == pattern
-    
-    def scan_directory(self):
-        """
-        Scan the directory and build a structure representation.
-        
-        Returns:
-            dict: A dictionary representation of the project structure
-        """
-        structure = {}
-        
-        for root, dirs, files in os.walk(self.root_dir):
-            # Skip excluded directories
-            dirs[:] = [d for d in dirs if not self.should_exclude(os.path.join(root, d))]
-            
-            # Get the relative path from the root directory
-            rel_path = os.path.relpath(root, self.root_dir)
-            if rel_path == '.':
-                rel_path = ''
-            
-            # Filter files based on excluded patterns
-            filtered_files = [file for file in files if not self.should_exclude(os.path.join(root, file))]
-            
-            # Add directory and its files to the structure
-            if rel_path:
-                current_level = structure
-                for part in rel_path.split(os.path.sep):
-                    if part not in current_level:
-                        current_level[part] = {}
-                    current_level = current_level[part]
-                current_level['__files__'] = filtered_files
-            else:
-                structure['__files__'] = filtered_files
-        
-        return structure
-    
-    def print_structure(self, structure=None, indent=0, is_last=True, prefix="", file=None):
-        """
-        Print the directory structure in a tree-like format.
-        
-        Args:
-            structure (dict): Dictionary representing the directory structure.
-            indent (int): Current indentation level.
-            is_last (bool): Whether this is the last item in its parent.
-            prefix (str): Prefix for the current line.
-            file: File object to write to.
-        """
-        if structure is None:
-            # First call, print the root directory name
-            structure = self.scan_directory()
-            root_name = os.path.basename(self.root_dir) + "/"
-            line = root_name
-            if file:
-                file.write(f"{line}\n")
-            print(line)
-        
-        # Print files
-        if '__files__' in structure:
-            files = structure.pop('__files__')
-            for i, file_name in enumerate(sorted(files)):
-                is_last_file = (i == len(files) - 1) and len(structure) == 0
-                connector = "└── " if is_last_file else "├── "
-                line = f"{prefix}{connector}{file_name}"
-                if file:
-                    file.write(f"{line}\n")
-                print(line)
-        
-        # Process directories
-        items = list(sorted(structure.items()))
-        for i, (dir_name, contents) in enumerate(items):
-            is_last_dir = i == len(items) - 1
-            connector = "└── " if is_last_dir else "├── "
-            line = f"{prefix}{connector}{dir_name}/"
-            if file:
-                file.write(f"{line}\n")
-            print(line)
-            
-            new_prefix = prefix + ("    " if is_last_dir else "│   ")
-            self.print_structure(contents, indent + 1, is_last_dir, new_prefix, file)
-    
-    def _get_file_language(self, file_path):
-        """
-        Determine the language of a file for code block formatting.
-        
-        Args:
-            file_path (str): Path to the file.
-            
-        Returns:
-            str: Language identifier for markdown code block.
-        """
-        ext = os.path.splitext(file_path)[1].lower()
-        name = os.path.basename(file_path)
-        
-        # Map file extensions to language identifiers
-        ext_to_lang = {
-            '.py': 'python',
-            '.js': 'javascript',
-            '.jsx': 'jsx',
-            '.ts': 'typescript',
-            '.tsx': 'tsx',
-            '.html': 'html',
-            '.css': 'css',
-            '.scss': 'scss',
-            '.md': 'markdown',
-            '.json': 'json',
-            '.yaml': 'yaml',
-            '.yml': 'yaml',
-            '.sh': 'bash'
-        }
-        
-        # Special files
-        if name in ['Dockerfile']:
-            return 'dockerfile'
-        elif name in ['docker-compose.yml', 'docker-compose.yaml']:
-            return 'yaml'
-        elif name in ['Makefile']:
-            return 'makefile'
-        elif name in ['.gitignore', 'requirements.txt', '.env.example']:
-            return ''  # Plain text
-        
-        return ext_to_lang.get(ext, '')
-    
-    def should_include_content(self, file_path):
-        """
-        Check if a file's content should be included in the documentation.
-        
-        Args:
-            file_path (str): The path to the file.
-            
-        Returns:
-            bool: True if content should be included, False otherwise.
-        """
-        # Check file size
-        if os.path.getsize(file_path) > self.config['max_content_size']:
-            return False
-        
-        # Check file extension
-        ext = os.path.splitext(file_path)[1].lower()
-        if ext not in self.config['content_file_types']:
-            return False
-        
-        # Check if file is in a core directory
-        rel_path = os.path.relpath(file_path, self.root_dir)
-        for core_dir in self.config['core_dirs']:
-            if rel_path.startswith(core_dir):
-                return True
-        
-        # Include any README or key configuration files
-        name = os.path.basename(file_path)
-        if any(self._match_pattern(name, pattern) for pattern in self.config['always_include']):
-            return True
-        
-        return False
-    
-    def count_files_by_type(self):
-        """
-        Count the number of files by type in the project.
-        
-        Returns:
-            dict: A dictionary mapping file extensions to counts.
-        """
-        ext_counts = {}
-        
-        for root, _, files in os.walk(self.root_dir):
-            if self.should_exclude(root):
-                continue
-                
-            for file in files:
-                file_path = os.path.join(root, file)
-                if self.should_exclude(file_path):
-                    continue
-                
-                ext = os.path.splitext(file)[1].lower()
-                if not ext:
-                    ext = '(no extension)'
-                
-                ext_counts[ext] = ext_counts.get(ext, 0) + 1
-        
-        return ext_counts
-    
-    def generate_code_snippets(self, structure=None, path="", snippets=None):
-        """
-        Generate representative code snippets from the project.
-        
-        Args:
-            structure (dict): Project structure dictionary.
-            path (str): Current path in the structure.
-            snippets (dict): Dictionary to store snippets by directory.
-            
-        Returns:
-            dict: Dictionary mapping directories to lists of file snippets.
-        """
-        if snippets is None:
-            snippets = {}
-            structure = self.scan_directory()
-        
-        # Process files in the current directory
-        if '__files__' in structure:
-            files = structure.pop('__files__')
-            dir_snippets = []
-            
-            # Sort files to prioritize key files
-            sorted_files = sorted(files, key=lambda f: f.startswith(('README', 'main', 'app')) and not f.startswith('.'), reverse=True)
-            
-            for file in sorted_files[:self.config['max_files_per_dir']]:
-                file_path = os.path.join(self.root_dir, path, file)
-                
-                if self.should_include_content(file_path):
-                    try:
-                        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-                            content = f.read(self.config['preview_length'])
-                            too_long = len(content) >= self.config['preview_length']
-                            
-                            dir_snippets.append({
-                                'name': file,
-                                'path': os.path.join(path, file),
-                                'language': self._get_file_language(file_path),
-                                'content': content + ('...' if too_long else ''),
-                                'full_path': file_path
-                            })
-                    except Exception as e:
-                        # Skip files that can't be read
-                        pass
-            
-            if dir_snippets:
-                snippets[path or '.'] = dir_snippets
-        
-        # Process subdirectories
-        for dir_name, contents in structure.items():
-            self.generate_code_snippets(contents, os.path.join(path, dir_name), snippets)
-        
-        return snippets
-    
-    def find_important_files(self):
-        """
-        Find and return a list of important files in the project.
-        
-        Returns:
-            list: List of important file paths.
-        """
-        important_files = []
-        
-        # Files to look for in any directory
-        common_important_files = [
-            'README.md', 'Dockerfile', 'docker-compose.yml', 'docker-compose.yaml',
-            'requirements.txt', 'setup.py', 'package.json', 'app.py', 'main.py',
-            'settings.py', 'config.py', 'wsgi.py', '.env.example'
-        ]
-        
-        for root, _, files in os.walk(self.root_dir):
-            if self.should_exclude(root):
-                continue
-                
-            for file in files:
-                if file in common_important_files:
-                    important_files.append(os.path.join(root, file))
-        
-        return important_files
-    
-    def generate_markdown(self, output_file):
-        """
-        Generate a comprehensive markdown document for the DocsGPT project.
-        
-        Args:
-            output_file (str): Path to the output markdown file.
-        """
-        structure = self.scan_directory()
-        ext_counts = self.count_files_by_type()
-        important_files = self.find_important_files()
-        snippets = self.generate_code_snippets()
-        
-        with open(output_file, 'w', encoding='utf-8') as md_file:
-            # Title and metadata
-            md_file.write(f"# DocsGPT Project Documentation\n\n")
-            md_file.write(f"Generated: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M')}\n\n")
-            
-            # Project Overview
-            md_file.write("## 1. Project Overview\n\n")
-            
-            # Try to include README content
-            readme_path = os.path.join(self.root_dir, "README.md")
-            if os.path.exists(readme_path):
-                try:
-                    with open(readme_path, 'r', encoding='utf-8', errors='replace') as readme:
-                        content = readme.read()
-                        md_file.write("### From README.md\n\n")
-                        md_file.write(f"{content}\n\n")
-                except Exception:
-                    md_file.write("*Error reading README.md*\n\n")
-            
-            # Project stats
-            md_file.write("### Project Statistics\n\n")
-            
-            # Count directories and files
-            total_dirs = 0
-            total_files = 0
-            for root, dirs, files in os.walk(self.root_dir):
-                if not self.should_exclude(root):
-                    total_dirs += sum(1 for d in dirs if not self.should_exclude(os.path.join(root, d)))
-                    total_files += sum(1 for f in files if not self.should_exclude(os.path.join(root, f)))
-            
-            md_file.write(f"- **Total Directories:** {total_dirs}\n")
-            md_file.write(f"- **Total Files:** {total_files}\n\n")
-            
-            md_file.write("#### File Types\n\n")
-            for ext, count in sorted(ext_counts.items(), key=lambda x: x[1], reverse=True)[:15]:
-                md_file.write(f"- **{ext}:** {count} files\n")
-            md_file.write("\n")
-            
-            # Directory Structure
-            md_file.write("## 2. Directory Structure\n\n")
-            md_file.write("```\n")
-            self.print_structure(file=md_file)
-            md_file.write("```\n\n")
-            
-            # Key Components
-            md_file.write("## 3. Key Components\n\n")
-            
-            # Application component
-            md_file.write("### 3.1. Application Core\n\n")
-            if 'application' in snippets:
-                md_file.write("The application core contains the main backend logic for DocsGPT.\n\n")
-                for snippet in snippets['application'][:3]:
-                    md_file.write(f"#### {snippet['path']}\n\n")
-                    md_file.write(f"```{snippet['language']}\n{snippet['content']}\n```\n\n")
-            
-            # Frontend component
-            md_file.write("### 3.2. Frontend\n\n")
-            frontend_snippets = [s for path, files in snippets.items() 
-                               for s in files if path.startswith('frontend/src')]
-            if frontend_snippets:
-                md_file.write("The frontend is built with React and provides the user interface.\n\n")
-                for snippet in frontend_snippets[:3]:
-                    md_file.write(f"#### {snippet['path']}\n\n")
-                    md_file.write(f"```{snippet['language']}\n{snippet['content']}\n```\n\n")
-            
-            # Extensions
-            md_file.write("### 3.3. Extensions\n\n")
-            extension_snippets = [s for path, files in snippets.items() 
-                                for s in files if path.startswith('extensions')]
-            if extension_snippets:
-                md_file.write("DocsGPT includes various extensions for different platforms.\n\n")
-                for snippet in extension_snippets[:3]:
-                    md_file.write(f"#### {snippet['path']}\n\n")
-                    md_file.write(f"```{snippet['language']}\n{snippet['content']}\n```\n\n")
-            
-            # Configuration Files
-            md_file.write("## 4. Configuration Files\n\n")
-            
-            # Docker files
-            md_file.write("### 4.1. Docker Configuration\n\n")
-            docker_files = [f for f in important_files if os.path.basename(f) in 
-                          ['Dockerfile', 'docker-compose.yml', 'docker-compose.yaml']]
-            
-            for file_path in docker_files:
-                try:
-                    with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-                        content = f.read()
-                        rel_path = os.path.relpath(file_path, self.root_dir)
-                        md_file.write(f"#### {rel_path}\n\n")
-                        
-                        lang = 'dockerfile' if os.path.basename(file_path) == 'Dockerfile' else 'yaml'
-                        md_file.write(f"```{lang}\n{content}\n```\n\n")
-                except Exception as e:
-                    md_file.write(f"*Error reading {os.path.relpath(file_path, self.root_dir)}: {e}*\n\n")
-            
-            # Requirements and package files
-            md_file.write("### 4.2. Dependencies\n\n")
-            dep_files = [f for f in important_files if os.path.basename(f) in 
-                       ['requirements.txt', 'package.json']]
-            
-            for file_path in dep_files:
-                try:
-                    with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-                        content = f.read()
-                        rel_path = os.path.relpath(file_path, self.root_dir)
-                        md_file.write(f"#### {rel_path}\n\n")
-                        
-                        lang = 'json' if file_path.endswith('.json') else ''
-                        md_file.write(f"```{lang}\n{content}\n```\n\n")
-                except Exception as e:
-                    md_file.write(f"*Error reading {os.path.relpath(file_path, self.root_dir)}: {e}*\n\n")
-            
-            # Environment files
-            env_files = [f for f in important_files if os.path.basename(f) == '.env.example']
-            if env_files:
-                md_file.write("### 4.3. Environment Configuration\n\n")
-                
-                for file_path in env_files:
-                    try:
-                        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-                            content = f.read()
-                            rel_path = os.path.relpath(file_path, self.root_dir)
-                            md_file.write(f"#### {rel_path}\n\n")
-                            md_file.write(f"```\n{content}\n```\n\n")
-                    except Exception as e:
-                        md_file.write(f"*Error reading {os.path.relpath(file_path, self.root_dir)}: {e}*\n\n")
-            
-            # API Documentation (if we can find routes)
-            md_file.write("## 5. API Documentation\n\n")
-            api_files = []
-            for root, _, files in os.walk(os.path.join(self.root_dir, 'application/api')):
-                if self.should_exclude(root):
-                    continue
-                    
-                for file in files:
-                    if file == 'routes.py':
-                        api_files.append(os.path.join(root, file))
-            
-            if api_files:
-                md_file.write("### API Routes\n\n")
-                for file_path in api_files[:5]:  # Limit to 5 route files
-                    try:
-                        with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
-                            content = f.read()
-                            rel_path = os.path.relpath(file_path, self.root_dir)
-                            md_file.write(f"#### {rel_path}\n\n")
-                            md_file.write(f"```python\n{content}\n```\n\n")
-                    except Exception as e:
-                        md_file.write(f"*Error reading {os.path.relpath(file_path, self.root_dir)}: {e}*\n\n")
-            
-            # Conclusion
-            md_file.write("## 6. Additional Information\n\n")
-            md_file.write("This documentation provides an overview of the DocsGPT project structure and key components. "
-                         "For more detailed information, please refer to the official documentation and code comments.\n\n")
-            
-            md_file.write("### License\n\n")
-            license_path = os.path.join(self.root_dir, "LICENSE")
-            if os.path.exists(license_path):
-                try:
-                    with open(license_path, 'r', encoding='utf-8', errors='replace') as f:
-                        content = f.read(500)  # Just read the beginning of the license
-                        md_file.write(f"```\n{content}...\n```\n\n")
-                except Exception:
-                    md_file.write("*Error reading LICENSE file*\n\n")
-            
-            # Generation metadata
-            md_file.write("---\n\n")
-            md_file.write(f"*Documentation generated on {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*\n")
-            md_file.write(f"*Generator: DocsGPT Project Documentation Generator*\n")
-
-
-def main():
-    parser = argparse.ArgumentParser(description='DocsGPT Project Documentation Generator')
-    
-    parser.add_argument('--root', '-r', type=str, default='.',
-                        help='Root directory of the project (default: current directory)')
-    
-    parser.add_argument('--output', '-o', type=str,
-                        help='Output markdown file (default: project_name_docs.md in the root directory)')
-    
-    parser.add_argument('--exclude-dirs', '-e', type=str, nargs='+',
-                        help='Additional directories to exclude')
-    
-    parser.add_argument('--exclude-files', '-ef', type=str, nargs='+',
-                        help='Additional file patterns to exclude')
-    
-    parser.add_argument('--include-files', '-if', type=str, nargs='+',
-                        help='Files to always include despite exclusions')
-    
-    parser.add_argument('--core-dirs', '-c', type=str, nargs='+',
-                        help='Core directories to focus on for code snippets')
-    
-    parser.add_argument('--config-file', '-cf', type=str,
-                        help='Path to JSON configuration file')
-    
-    parser.add_argument('--tree-only', action='store_true',
-                        help='Only print the directory tree structure, do not generate documentation')
-    
-    args = parser.parse_args()
-    
-    # Get absolute path of the root directory
-    root_dir = os.path.abspath(args.root)
-    
-    # Load configuration from file if provided
-    config = None
-    if args.config_file:
-        try:
-            with open(args.config_file, 'r') as f:
-                config = json.load(f)
-        except Exception as e:
-            print(f"Error loading configuration file: {e}")
-            return
-    else:
-        # Build configuration from command line arguments
-        config = {}
-        if args.exclude_dirs:
-            config['excluded_dirs'] = args.exclude_dirs
-        if args.exclude_files:
-            config['excluded_patterns'] = args.exclude_files
-        if args.include_files:
-            config['always_include'] = args.include_files
-        if args.core_dirs:
-            config['core_dirs'] = args.core_dirs
-    
-    # Create the generator
-    generator = DocsGPTDocumentationGenerator(root_dir=root_dir, config=config)
-    
-    if args.tree_only:
-        # Just print the tree structure
-        print(f"Directory structure for: {root_dir}\n")
-        generator.print_structure()
-    else:
-        # Generate full documentation
-        output_file = args.output
-        if not output_file:
-            project_name = os.path.basename(root_dir)
-            output_file = os.path.join(root_dir, f"{project_name}_documentation.md")
-        
-        print(f"Generating documentation for {root_dir}...")
-        generator.generate_markdown(output_file)
-        print(f"Documentation saved to: {output_file}")
-
-
-if __name__ == "__main__":
-    main()
\ No newline at end of file