mirror of
https://github.com/arc53/DocsGPT.git
synced 2025-11-29 08:33:20 +00:00
(feat:file_abstract) process files method
This commit is contained in:
@@ -1,6 +1,6 @@
|
|||||||
"""Base storage class for file system abstraction."""
|
"""Base storage class for file system abstraction."""
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import BinaryIO, List
|
from typing import BinaryIO, List, Optional, Callable
|
||||||
|
|
||||||
|
|
||||||
class BaseStorage(ABC):
|
class BaseStorage(ABC):
|
||||||
@@ -33,6 +33,24 @@ class BaseStorage(ABC):
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def process_file(self, path: str, processor_func: Callable, **kwargs):
|
||||||
|
"""
|
||||||
|
Process a file using the provided processor function.
|
||||||
|
|
||||||
|
This method handles the details of retrieving the file and providing
|
||||||
|
it to the processor function in an appropriate way based on the storage type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Path to the file
|
||||||
|
processor_func: Function that processes the file
|
||||||
|
**kwargs: Additional arguments to pass to the processor function
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The result of the processor function
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def delete_file(self, path: str) -> bool:
|
def delete_file(self, path: str) -> bool:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -1,9 +1,8 @@
|
|||||||
"""Local file system implementation."""
|
"""Local file system implementation."""
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
from typing import BinaryIO, List
|
from typing import BinaryIO, List, Callable
|
||||||
|
|
||||||
from application.core.settings import settings
|
|
||||||
from application.storage.base import BaseStorage
|
from application.storage.base import BaseStorage
|
||||||
|
|
||||||
|
|
||||||
@@ -83,3 +82,24 @@ class LocalStorage(BaseStorage):
|
|||||||
result.append(rel_path)
|
result.append(rel_path)
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def process_file(self, path: str, processor_func: Callable, **kwargs):
|
||||||
|
"""
|
||||||
|
Process a file using the provided processor function.
|
||||||
|
|
||||||
|
For local storage, we can directly pass the full path to the processor.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Path to the file
|
||||||
|
processor_func: Function that processes the file
|
||||||
|
**kwargs: Additional arguments to pass to the processor function
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The result of the processor function
|
||||||
|
"""
|
||||||
|
full_path = self._get_full_path(path)
|
||||||
|
|
||||||
|
if not os.path.exists(full_path):
|
||||||
|
raise FileNotFoundError(f"File not found: {full_path}")
|
||||||
|
|
||||||
|
return processor_func(file_path=full_path, **kwargs)
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
"""S3 storage implementation."""
|
"""S3 storage implementation."""
|
||||||
import io
|
import io
|
||||||
from typing import BinaryIO, List
|
from typing import BinaryIO, List, Callable
|
||||||
|
|
||||||
import boto3
|
import boto3
|
||||||
from botocore.exceptions import ClientError
|
from botocore.exceptions import ClientError
|
||||||
@@ -24,7 +24,6 @@ class S3Storage(BaseStorage):
|
|||||||
"""
|
"""
|
||||||
self.bucket_name = bucket_name
|
self.bucket_name = bucket_name
|
||||||
|
|
||||||
# Initialize S3 client
|
|
||||||
self.s3 = boto3.client(
|
self.s3 = boto3.client(
|
||||||
's3',
|
's3',
|
||||||
aws_access_key_id=aws_access_key_id,
|
aws_access_key_id=aws_access_key_id,
|
||||||
@@ -79,3 +78,37 @@ class S3Storage(BaseStorage):
|
|||||||
result.append(obj['Key'])
|
result.append(obj['Key'])
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def process_file(self, path: str, processor_func: Callable, **kwargs):
|
||||||
|
"""
|
||||||
|
Process a file using the provided processor function.
|
||||||
|
|
||||||
|
For S3 storage, we need to download the file to a temporary location first.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
path: Path to the file
|
||||||
|
processor_func: Function that processes the file
|
||||||
|
**kwargs: Additional arguments to pass to the processor function
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
The result of the processor function
|
||||||
|
"""
|
||||||
|
import tempfile
|
||||||
|
import os
|
||||||
|
|
||||||
|
if not self.file_exists(path):
|
||||||
|
raise FileNotFoundError(f"File not found: {path}")
|
||||||
|
|
||||||
|
with tempfile.NamedTemporaryFile(delete=False) as temp_file:
|
||||||
|
self.s3.download_fileobj(self.bucket_name, path, temp_file)
|
||||||
|
temp_path = temp_file.name
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = processor_func(file_path=temp_path, **kwargs)
|
||||||
|
return result
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.unlink(temp_path)
|
||||||
|
except Exception as e:
|
||||||
|
import logging
|
||||||
|
logging.warning(f"Failed to delete temporary file: {e}")
|
||||||
|
|||||||
Reference in New Issue
Block a user