Merge pull request #3 from shamspias/feat/object_stroage

Feat/object stroage
shamspias · Sep 4, 2024 · 0fc5107 · 0fc5107
2 parents 9f4ca81 + d8154f9
commit 0fc5107
Show file tree

Hide file tree

Showing 6 changed files with 274 additions and 84 deletions.
diff --git a/app/config.py b/app/config.py
@@ -1,12 +1,21 @@
 from pydantic_settings import BaseSettings
+from typing import Optional
 
 
 class Config(BaseSettings):
-    streamlit_title: str = "VideoLabelMagic"
-    models_directory: str = "models/"
-    output_directory: str = "outputs/"
-    object_class_directory: str = "object_class/"
-    default_frame_rate: float = 1.0
+    streamlit_title: Optional[str] = "VideoLabelMagic"
+    models_directory: Optional[str] = "models/"
+    output_directory: Optional[str] = "outputs/"
+    object_class_directory: Optional[str] = "object_class/"
+    default_frame_rate: Optional[float] = 1.0
+
+    # Object storage settings
+    storage_use_s3: Optional[bool] = False
+    s3_endpoint_url: Optional[str] = ""
+    s3_access_key: Optional[str] = ""
+    s3_secret_key: Optional[str] = ""
+    s3_bucket_name: Optional[str] = ""
+    s3_region_name: Optional[str] = ""
 
     class Config:
         env_file = ".env"

diff --git a/app/extractor.py b/app/extractor.py
@@ -2,35 +2,51 @@
 import os
 from ultralytics import YOLO
 import yaml
-from utils.image_processor import ImageProcessor  # Make sure this is the correct import path
+from utils.image_processor import ImageProcessor
 
 
 class VideoFrameExtractor:
     """
     Extracts frames from video at specified intervals, applies selected transformations,
-    and annotates them using YOLO model predictions.
+    and annotates them using YOLO model predictions, with options to save locally or to object storage.
     """
 
-    def __init__(self, video_path, frame_rate, output_dir, model_path, class_config_path, output_format,
+    def __init__(self, config, video_path, frame_rate, output_dir, model_path, class_config_path, output_format,
                  transformations):
-        self.video_path = video_path
+        self.config = config
+        self.video_path = video_path  # Ensure this is a string representing the path to the video file.
         self.frame_rate = frame_rate
         self.output_dir = output_dir
         self.yolo_model = YOLO(os.path.join('models', model_path))
+        self.class_config_path = class_config_path
         self.output_format = output_format
-        self.supported_classes = self.load_classes(class_config_path)
         self.transformations = transformations
+        self.supported_classes = self.load_classes(self.class_config_path)
         self.image_processor = ImageProcessor(output_size=self.transformations.get('size', (640, 640)))
 
+        # Debugging output to ensure path handling
+        if not os.path.exists(self.video_path):
+            raise FileNotFoundError(f"The specified video file was not found at {self.video_path}")
+        else:
+            print(f"VideoFrameExtractor initialized with video path: {self.video_path}")
+
     def load_classes(self, config_path):
+        """
+        Load classes from a YAML configuration file.
+        """
+        if not os.path.exists(config_path):
+            raise FileNotFoundError(f"Configuration file not found at {config_path}")
         with open(config_path, 'r') as file:
             class_data = yaml.safe_load(file)
         return [cls['name'] for cls in class_data['classes']]
 
     def extract_frames(self, model_confidence):
+        """
+        Extract and process frames from the video, and save them using the specified output format.
+        """
         cap = cv2.VideoCapture(self.video_path)
         if not cap.isOpened():
-            raise FileNotFoundError(f"Unable to open video file: {self.video_path}")
+            raise ValueError(f"Failed to open video stream for {self.video_path}")
 
         video_fps = cap.get(cv2.CAP_PROP_FPS)
         frame_interval = max(1, int(video_fps / self.frame_rate))
@@ -51,6 +67,8 @@ def extract_frames(self, model_confidence):
 
                     frame_filename = f"{self._get_video_basename()}_image{frame_count}_{key}.jpg"
                     frame_path = os.path.join(self.output_dir, 'images', frame_filename)
+
+                    # Save images locally or to configured storage
                     cv2.imwrite(frame_path, transformed_image)
                     results = self.yolo_model.predict(transformed_image, conf=model_confidence)
                     self.output_format.save_annotations(transformed_image, frame_path, frame_filename, results,
@@ -61,29 +79,29 @@ def extract_frames(self, model_confidence):
         cap.release()
 
     def apply_transformations(self, frame):
-        # Dictionary to hold transformed images
+        """
+        Apply selected transformations to the frame and return a dictionary of transformed images.
+        """
         transformed_images = {}
-
-        # Apply resizing if selected
         if 'resize' in self.transformations and self.transformations['resize']:
             frame = self.image_processor.resize_image(frame)
-            transformed_images['resized'] = frame  # Store resized image
+            transformed_images['resized'] = frame
 
-        # Apply grayscale transformation if selected
         if 'grayscale' in self.transformations and self.transformations['grayscale']:
             grayscale_image = self.image_processor.convert_to_grayscale(frame)
-            transformed_images['grayscale'] = grayscale_image  # Store grayscale image
+            transformed_images['grayscale'] = grayscale_image
 
-        # Apply 90-degree rotation if selected
         if 'rotate' in self.transformations and self.transformations['rotate']:
             rotated_image = self.image_processor.rotate_image_90_degrees(frame)
-            transformed_images['rotated'] = rotated_image  # Store rotated image
+            transformed_images['rotated'] = rotated_image
 
-        # If no transformations are selected, add the original image
         if not transformed_images:
             transformed_images['original'] = frame
 
         return transformed_images
 
     def _get_video_basename(self):
+        """
+        Extract the basename of the video file without extension.
+        """
         return os.path.splitext(os.path.basename(self.video_path))[0]
diff --git a/app/main.py b/app/main.py
@@ -1,85 +1,146 @@
 import os
 import uuid
+import shutil
 import streamlit as st
 from config import Config
 from extractor import VideoFrameExtractor
 from formats.roboflow_format import RoboflowFormat
 from formats.cvat_format import CVATFormat
-from utils.image_processor import ImageProcessor  # Import the ImageProcessor
-
-# Import other formats if available
-
-config = Config()
-
-st.title(config.streamlit_title)
-
-uploaded_file = st.file_uploader("Upload a video file", type=['mp4', 'avi', 'mov'])
-
-# Update directory path for class configurations
-class_config_files = [f for f in os.listdir(config.object_class_directory) if f.endswith('.yaml')]
-class_config_selection = st.selectbox("Choose class configuration:", class_config_files)
-
-# Filter for files ending with .pt
-models = [file for file in os.listdir(config.models_directory) if file.endswith('.pt')]
-model_selection = st.selectbox("Choose a model:", models)
-
-output_dir = st.text_input("Output directory", config.output_directory)
-frame_rate = st.number_input("Frame rate", value=config.default_frame_rate)
-model_confidence = st.number_input("Model Confidence", value=0.1)
-
-# New fields for image transformation options
-image_width = st.number_input("Image width", value=640)
-image_height = st.number_input("Image height", value=640)
-
-transformation_options = st.multiselect('Select image transformations:', ['Resize', 'Grayscale', 'Rotate 90 degrees'])
-transformations = {
-    'resize': 'Resize' in transformation_options,
-    'grayscale': 'Grayscale' in transformation_options,
-    'rotate': 'Rotate 90 degrees' in transformation_options
-}
-
-# Allow users to choose the output format
-format_options = {'Roboflow': RoboflowFormat, 'CVAT': CVATFormat}  # Add more formats to this dictionary
-format_selection = st.selectbox("Choose output format:", list(format_options.keys()))
-
-if st.button('Extract Frames'):
-    if uploaded_file is not None:
-        # Create temp directory if it does not exist
+from utils.storage_manager import StorageManager
+
+
+class VideoLabelApp:
+    def __init__(self):
+        self.config = Config()
+        self.storage_manager = StorageManager(self.config)
+        self.format_options = {'Roboflow': RoboflowFormat, 'CVAT': CVATFormat}
+        self.setup_ui()
+
+    def setup_ui(self):
+        st.title(self.config.streamlit_title)
+        st.sidebar.header("Storage Options")
+        self.storage_option = st.sidebar.radio("Choose storage type:", ('Local', 'Object Storage'))
+
+        if self.storage_option == 'Object Storage':
+            self.handle_object_storage()
+        elif self.storage_option == 'Local':
+            self.handle_local_storage()
+
+    def handle_object_storage(self):
+        if not self.config.storage_use_s3:
+            st.sidebar.error("Object storage is not configured properly in .env file.")
+            return
+        files = self.storage_manager.list_files_in_bucket()
+        self.selected_file = st.selectbox("Select a file from Object Storage:", files)
+        self.continue_ui()
+
+    def handle_local_storage(self):
+        self.uploaded_file = st.file_uploader("Upload a video file", type=['mp4', 'avi', 'mov'])
+        self.continue_ui()
+
+    def continue_ui(self):
+        class_config_files = [f for f in os.listdir(self.config.object_class_directory) if f.endswith('.yaml')]
+        self.class_config_selection = st.selectbox("Choose class configuration:", class_config_files)
+        models = [file for file in os.listdir(self.config.models_directory) if file.endswith('.pt')]
+        self.model_selection = st.selectbox("Choose a model:", models)
+        self.frame_rate = st.number_input("Frame rate", value=self.config.default_frame_rate)
+        self.model_confidence = st.number_input("Model Confidence", value=0.1)
+        transformation_options = st.multiselect('Select image transformations:',
+                                                ['Resize', 'Grayscale', 'Rotate 90 degrees'])
+        self.transformations = {
+            'resize': 'Resize' in transformation_options,
+            'grayscale': 'Grayscale' in transformation_options,
+            'rotate': 'Rotate 90 degrees' in transformation_options
+        }
+        self.format_selection = st.selectbox("Choose output format:", list(self.format_options.keys()))
+        if st.button('Extract Frames'):
+            self.process_video()
+
+    def process_video(self):
+        if self.storage_option == 'Local' and self.uploaded_file is not None:
+            self.process_local_video()
+        elif self.storage_option == 'Object Storage' and self.selected_file:
+            self.process_cloud_storage_video()
+
+    def process_local_video(self):
         temp_dir = 'temp'
         os.makedirs(temp_dir, exist_ok=True)
-
-        # Generate a unique filename
-        unique_filename = uploaded_file.name[:5] + "_" + str(uuid.uuid4())
+        unique_filename = self.uploaded_file.name[:5] + "_" + str(uuid.uuid4())
         video_filename = unique_filename + ".mp4"
         video_path = os.path.join(temp_dir, video_filename)
-
-        # Save the uploaded file
         with open(video_path, 'wb') as f:
-            f.write(uploaded_file.getbuffer())
+            f.write(self.uploaded_file.getbuffer())
+        self.run_extraction(video_path, unique_filename)
+
+    def process_cloud_storage_video(self):
+        """
+        Handle the download of the file from cloud storage, rename it similar to the local process,
+        and perform the frame extraction.
+        """
+        temp_dir = 'temp'
+        os.makedirs(temp_dir, exist_ok=True)
 
-        # Construct the class configuration path
-        class_config_path = os.path.join(config.object_class_directory, class_config_selection)
+        # Generate unique filename similar to the local upload handling
+        file_basename = os.path.basename(self.selected_file)
+        unique_filename = file_basename[:5] + "_" + str(uuid.uuid4()) + ".mp4"  # Consistent renaming
+        video_path = os.path.join(temp_dir, unique_filename)
 
-        # Create a specific output directory named after the unique file
-        specific_output_dir = os.path.join(output_dir, unique_filename)
-        os.makedirs(specific_output_dir, exist_ok=True)
+        # Download the file from S3 into the temp directory
+        self.storage_manager.download_file_from_s3(self.selected_file, video_path)
 
-        # Instantiate the selected output format
-        output_format_instance = format_options[format_selection](specific_output_dir)
+        # Proceed to run the extraction process
+        self.run_extraction(video_path, unique_filename)
 
-        # Extract frames using the VideoFrameExtractor with the chosen format
+    def run_extraction(self, video_path, unique_filename):
+        class_config_path = os.path.join(self.config.object_class_directory, self.class_config_selection)
+        specific_output_dir = os.path.join(self.config.output_directory, unique_filename)
+        os.makedirs(specific_output_dir, exist_ok=True)
+        output_format_instance = self.format_options[self.format_selection](specific_output_dir)
         try:
-            extractor = VideoFrameExtractor(video_path, frame_rate, specific_output_dir, model_selection,
-                                            class_config_path, output_format_instance, transformations)
-            extractor.extract_frames(model_confidence)
-
-            if format_selection == "CVAT":  # If CVAT export then it will save as zip format
+            extractor = VideoFrameExtractor(self.config, video_path, self.frame_rate, specific_output_dir,
+                                            self.model_selection, class_config_path, output_format_instance,
+                                            self.transformations)
+            extractor.extract_frames(self.model_confidence)
+            if self.format_selection == "CVAT":
                 output_format_instance.zip_and_cleanup()
+            if self.storage_option == 'Object Storage':
+                self.upload_outputs(specific_output_dir)
+
+            # Clean up: Remove the temporary video file after processing
+            if os.path.exists(video_path):
+                os.remove(video_path)
+                print(f"Deleted temporary video file: {video_path}")
 
             st.success('Extraction Completed!')
-            # Delete the temporary video file after successful extraction
-            os.remove(video_path)
         except Exception as e:
             st.error(f"An error occurred during frame extraction: {str(e)}")
-    else:
-        st.error("Please upload a file to proceed.")
+
+    def upload_outputs(self, directory):
+        """
+        Upload all files and directories from the specified directory to the S3 bucket,
+        maintaining the same structure under a 'processed/' prefix in S3.
+        Args:
+            directory (str): The local directory path containing the files to be uploaded.
+        """
+        # Determine the base path for the directory to maintain structure in S3
+        base_path = os.path.dirname(directory)
+
+        # Walk through the directory and upload each file to S3
+        for root, dirs, files in os.walk(directory):
+            for file in files:
+                local_file_path = os.path.join(root, file)
+                # Calculate the relative path for S3 key to maintain the folder structure
+                relative_path = os.path.relpath(local_file_path, base_path)
+                s3_object_name = os.path.join("processed", relative_path)  # Use 'processed/' prefix in S3
+
+                # Upload the file to S3, preserving directory structure
+                self.storage_manager.upload_file_to_s3(local_file_path, s3_object_name)
+                print(f"Uploaded {local_file_path} to S3 as {s3_object_name}")
+
+        # Optionally, delete the directory locally after uploading
+        shutil.rmtree(directory)
+        print(f"Deleted local directory after upload: {directory}")
+
+
+if __name__ == "__main__":
+    app = VideoLabelApp()
diff --git a/example.env b/example.env
@@ -8,3 +8,11 @@ OUTPUT_DIRECTORY=outputs/
 
 # Frame Rate it's based on second like for 2 frame in one second is 0.5
 DEFAULT_FRAME_RATE=1.0
+
+# S3 Storage Settings
+STORAGE_USE_S3=False
+S3_ENDPOINT_URL=https://your-s3-endpoint.com
+S3_ACCESS_KEY=your_access_key
+S3_SECRET_KEY=your_secret_key
+S3_BUCKET_NAME=your_bucket_name
+S3_REGION_NAME=us-east-1
diff --git a/requirements.txt b/requirements.txt
@@ -5,4 +5,5 @@ python-dotenv
 ultralytics
 pillow
 PyYAML
-opencv-python
+opencv-python
+boto3