diff --git a/ml3d/datasets/tumfacade.py b/ml3d/datasets/tumfacade.py index a28ff769..01bcfa23 100644 --- a/ml3d/datasets/tumfacade.py +++ b/ml3d/datasets/tumfacade.py @@ -1,16 +1,16 @@ -from .base_dataset import BaseDataset, BaseDatasetSplit import glob from pathlib import Path import logging -from os.path import join, exists -from ..utils import make_dir, DATASET import numpy as np import open3d as o3d +from ..utils import DATASET +from .base_dataset import BaseDataset, BaseDatasetSplit log = logging.getLogger(__name__) class TUMFacade(BaseDataset): + def __init__(self, dataset_path, info_path=None, @@ -19,29 +19,77 @@ def __init__(self, use_cache=False, use_global=False, **kwargs): - - """Initialize the function by passing the dataset and other details. - - Args: - dataset_path: The path to the dataset to use. - info_path: The path to the file that includes information about + """Dataset classes for the TUM-Facade dataset. Semantic segmentation + annotations over TUM-MLS-2016 point cloud data. + + Website: https://mediatum.ub.tum.de/node?id=1636761 + Code: https://github.com/OloOcki/tum-facade + Download: + - Original: https://dataserv.ub.tum.de/index.php/s/m1636761.003 + - Processed: https://tumde-my.sharepoint.com/:f:/g/personal/olaf_wysocki_tum_de/EjA8B_KGDyFEulRzmq-CG1QBBL4dZ7z5PoHeI8zMD0JxIQ?e=9MrMcl + Data License: CC BY-NC-SA 4.0 + Citation: + - Paper: Wysocki, O. and Hoegner, L. and Stilla, U., TUM-FAÇADE: + Reviewing and enriching point cloud benchmarks for façade + segmentation, ISPRS 2022 + - Dataset: Wysocki, Olaf and Tan, Yue and Zhang, Jiarui and + Stilla, Uwe, TUM-FACADE dataset, TU Munich, 2023 + + README file from processed dataset website: + + The dataset split is provided in the following folder structure + + -->tum-facade + -->pointclouds + -->annotatedGlobalCRS + -->test_files + -->training_files + -->validation_files + -->annotatedLocalCRS + -->test_files + -->training_files + -->validation_file + + The indivisual point clouds are compressed as .7z files and are + stored in the .pcd format. + + To make use of the dataset split in open3D-ML, all the point cloud + files have to be unpacked with 7Zip. The folder structure itself + must not be modified, else the reading functionalities in open3D-ML + are not going to work. As a path to the dataset, the path to the + 'tum-facade' folder must be set. + + The dataset is split in the following way (10.08.2023): + + Testing : Building Nr. 23 + Training : Buildings Nr. 57, Nr.58, Nr. 60 + Validation : Buildings Nr. 22, Nr.59, Nr. 62, Nr. 81 + + + Initialize the function by passing the dataset and other details. + + Args: + dataset_path: The path to the dataset to use. + info_path: The path to the file that includes information about the dataset. This is default to dataset path if nothing is provided. - name: The name of the dataset (TUM_Facade in this case). - cache_dir: The directory where the cache is stored. - use_cache: Indicates if the dataset should be cached. - use_global: Inidcates if the dataset should be used in a loca or the global CRS - - Returns: - class: The corresponding class. - """ - super().__init__(dataset_path=dataset_path, - info_path=info_path, - name=name, - cache_dir=cache_dir, - use_cache=use_cache, - use_global=use_global, # Diese habe ich selbst hinzugefügt - **kwargs) + name: The name of the dataset (TUM_Facade in this case). + cache_dir: The directory where the cache is stored. + use_cache: Indicates if the dataset should be cached. + use_global: Inidcates if the dataset should be used in a local or + the global CRS + + Returns: + class: The corresponding class. + """ + super().__init__( + dataset_path=dataset_path, + info_path=info_path, + name=name, + cache_dir=cache_dir, + use_cache=use_cache, + use_global=use_global, # Diese habe ich selbst hinzugefügt + **kwargs) cfg = self.cfg self.name = cfg.name self.dataset_path = cfg.dataset_path @@ -52,23 +100,42 @@ def __init__(self, if self.use_global: # Finding all the training files - self.trainFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedGlobalCRS' / 'training_files' / '*.pcd')) + self.trainFiles = glob.glob( + str( + Path(cfg.dataset_path) / 'pointclouds' / + 'annotatedGlobalCRS' / 'training_files' / '*.pcd')) # Finding all the validation Files - self.valFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedGlobalCRS' / 'validation_files' / '*.pcd')) + self.valFiles = glob.glob( + str( + Path(cfg.dataset_path) / 'pointclouds' / + 'annotatedGlobalCRS' / 'validation_files' / '*.pcd')) # Finding all the test files - self.testFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedGlobalCRS' / 'test_files' / '*.pcd')) + self.testFiles = glob.glob( + str( + Path(cfg.dataset_path) / 'pointclouds' / + 'annotatedGlobalCRS' / 'test_files' / '*.pcd')) elif not self.use_global: # Finding all the training files - self.trainFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedLocalCRS' / 'training_files' / '*.pcd')) + self.trainFiles = glob.glob( + str( + Path(cfg.dataset_path) / 'pointclouds' / + 'annotatedLocalCRS' / 'training_files' / '*.pcd')) # Finding all the validation Files - self.valFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedLocalCRS' / 'validation_files' / '*.pcd')) + self.valFiles = glob.glob( + str( + Path(cfg.dataset_path) / 'pointclouds' / + 'annotatedLocalCRS' / 'validation_files' / '*.pcd')) # Finding all the test files - self.testFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedLocalCRS' / 'test_files' / '*.pcd')) + self.testFiles = glob.glob( + str( + Path(cfg.dataset_path) / 'pointclouds' / + 'annotatedLocalCRS' / 'test_files' / '*.pcd')) else: - raise ValueError("Invalid specification! use_global must either be True or False!") - + raise ValueError( + "Invalid specification! use_global must either be True or False!" + ) @staticmethod def get_label_to_names(): # @@ -131,8 +198,7 @@ def get_split_list(self, split): def is_tested(self, attr): - pass - + pass def save_test_result(self, results, attr): @@ -140,10 +206,12 @@ def save_test_result(self, results, attr): class TUMFacadeSplit(BaseDatasetSplit): + def __init__(self, dataset, split='train'): super().__init__(dataset, split=split) log.info("Found {} pointclouds for {}".format(len(self.path_list), split)) + def __len__(self): return len(self.path_list) @@ -166,4 +234,5 @@ def get_attr(self, idx): attr = {'idx': idx, 'name': name, 'path': pc_path, 'split': split} return attr -DATASET._register_module(TUMFacade) \ No newline at end of file + +DATASET._register_module(TUMFacade)