Skip to content

Commit

Permalink
Add dataset docs from author comments and links
Browse files Browse the repository at this point in the history
  • Loading branch information
ssheorey committed Dec 22, 2023
1 parent cd60915 commit 77f3155
Showing 1 changed file with 104 additions and 35 deletions.
139 changes: 104 additions & 35 deletions ml3d/datasets/tumfacade.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
from .base_dataset import BaseDataset, BaseDatasetSplit
import glob
from pathlib import Path
import logging
from os.path import join, exists
from ..utils import make_dir, DATASET
import numpy as np
import open3d as o3d
from ..utils import DATASET
from .base_dataset import BaseDataset, BaseDatasetSplit

log = logging.getLogger(__name__)


class TUMFacade(BaseDataset):

def __init__(self,
dataset_path,
info_path=None,
Expand All @@ -19,29 +19,77 @@ def __init__(self,
use_cache=False,
use_global=False,
**kwargs):

"""Initialize the function by passing the dataset and other details.
Args:
dataset_path: The path to the dataset to use.
info_path: The path to the file that includes information about
"""Dataset classes for the TUM-Facade dataset. Semantic segmentation
annotations over TUM-MLS-2016 point cloud data.
Website: https://mediatum.ub.tum.de/node?id=1636761
Code: https://github.com/OloOcki/tum-facade
Download:
- Original: https://dataserv.ub.tum.de/index.php/s/m1636761.003
- Processed: https://tumde-my.sharepoint.com/:f:/g/personal/olaf_wysocki_tum_de/EjA8B_KGDyFEulRzmq-CG1QBBL4dZ7z5PoHeI8zMD0JxIQ?e=9MrMcl
Data License: CC BY-NC-SA 4.0
Citation:
- Paper: Wysocki, O. and Hoegner, L. and Stilla, U., TUM-FAÇADE:
Reviewing and enriching point cloud benchmarks for façade
segmentation, ISPRS 2022
- Dataset: Wysocki, Olaf and Tan, Yue and Zhang, Jiarui and
Stilla, Uwe, TUM-FACADE dataset, TU Munich, 2023
README file from processed dataset website:
The dataset split is provided in the following folder structure
-->tum-facade
-->pointclouds
-->annotatedGlobalCRS
-->test_files
-->training_files
-->validation_files
-->annotatedLocalCRS
-->test_files
-->training_files
-->validation_file
The indivisual point clouds are compressed as .7z files and are
stored in the .pcd format.
To make use of the dataset split in open3D-ML, all the point cloud
files have to be unpacked with 7Zip. The folder structure itself
must not be modified, else the reading functionalities in open3D-ML
are not going to work. As a path to the dataset, the path to the
'tum-facade' folder must be set.
The dataset is split in the following way (10.08.2023):
Testing : Building Nr. 23
Training : Buildings Nr. 57, Nr.58, Nr. 60
Validation : Buildings Nr. 22, Nr.59, Nr. 62, Nr. 81
Initialize the function by passing the dataset and other details.
Args:
dataset_path: The path to the dataset to use.
info_path: The path to the file that includes information about
the dataset. This is default to dataset path if nothing is
provided.
name: The name of the dataset (TUM_Facade in this case).
cache_dir: The directory where the cache is stored.
use_cache: Indicates if the dataset should be cached.
use_global: Inidcates if the dataset should be used in a loca or the global CRS
Returns:
class: The corresponding class.
"""
super().__init__(dataset_path=dataset_path,
info_path=info_path,
name=name,
cache_dir=cache_dir,
use_cache=use_cache,
use_global=use_global, # Diese habe ich selbst hinzugefügt
**kwargs)
name: The name of the dataset (TUM_Facade in this case).
cache_dir: The directory where the cache is stored.
use_cache: Indicates if the dataset should be cached.
use_global: Inidcates if the dataset should be used in a local or
the global CRS
Returns:
class: The corresponding class.
"""
super().__init__(
dataset_path=dataset_path,
info_path=info_path,
name=name,
cache_dir=cache_dir,
use_cache=use_cache,
use_global=use_global, # Diese habe ich selbst hinzugefügt
**kwargs)
cfg = self.cfg
self.name = cfg.name
self.dataset_path = cfg.dataset_path
Expand All @@ -52,23 +100,42 @@ def __init__(self,

if self.use_global:
# Finding all the training files
self.trainFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedGlobalCRS' / 'training_files' / '*.pcd'))
self.trainFiles = glob.glob(
str(
Path(cfg.dataset_path) / 'pointclouds' /
'annotatedGlobalCRS' / 'training_files' / '*.pcd'))
# Finding all the validation Files
self.valFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedGlobalCRS' / 'validation_files' / '*.pcd'))
self.valFiles = glob.glob(
str(
Path(cfg.dataset_path) / 'pointclouds' /
'annotatedGlobalCRS' / 'validation_files' / '*.pcd'))
# Finding all the test files
self.testFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedGlobalCRS' / 'test_files' / '*.pcd'))
self.testFiles = glob.glob(
str(
Path(cfg.dataset_path) / 'pointclouds' /
'annotatedGlobalCRS' / 'test_files' / '*.pcd'))

elif not self.use_global:
# Finding all the training files
self.trainFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedLocalCRS' / 'training_files' / '*.pcd'))
self.trainFiles = glob.glob(
str(
Path(cfg.dataset_path) / 'pointclouds' /
'annotatedLocalCRS' / 'training_files' / '*.pcd'))
# Finding all the validation Files
self.valFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedLocalCRS' / 'validation_files' / '*.pcd'))
self.valFiles = glob.glob(
str(
Path(cfg.dataset_path) / 'pointclouds' /
'annotatedLocalCRS' / 'validation_files' / '*.pcd'))
# Finding all the test files
self.testFiles = glob.glob(str(Path(cfg.dataset_path) / 'pointclouds' / 'annotatedLocalCRS' / 'test_files' / '*.pcd'))
self.testFiles = glob.glob(
str(
Path(cfg.dataset_path) / 'pointclouds' /
'annotatedLocalCRS' / 'test_files' / '*.pcd'))

else:
raise ValueError("Invalid specification! use_global must either be True or False!")

raise ValueError(
"Invalid specification! use_global must either be True or False!"
)

@staticmethod
def get_label_to_names(): #
Expand Down Expand Up @@ -131,19 +198,20 @@ def get_split_list(self, split):

def is_tested(self, attr):

pass

pass

def save_test_result(self, results, attr):

pass


class TUMFacadeSplit(BaseDatasetSplit):

def __init__(self, dataset, split='train'):
super().__init__(dataset, split=split)
log.info("Found {} pointclouds for {}".format(len(self.path_list),
split))

def __len__(self):
return len(self.path_list)

Expand All @@ -166,4 +234,5 @@ def get_attr(self, idx):
attr = {'idx': idx, 'name': name, 'path': pc_path, 'split': split}
return attr

DATASET._register_module(TUMFacade)

DATASET._register_module(TUMFacade)

0 comments on commit 77f3155

Please sign in to comment.