diff --git a/.gitignore b/.gitignore index 1ed5a1d..1f92441 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,7 @@ data/full_dataset logs/ code/simclr-pytorch-reefs/models/__pycache__/ +code/simclr-pytorch-reefs/myexman/ diff --git a/code/simclr-pytorch-reefs/myexman/__init__.py b/code/simclr-pytorch-reefs/myexman/__init__.py deleted file mode 100644 index 9bd5df0..0000000 --- a/code/simclr-pytorch-reefs/myexman/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .parser import ( - ExParser, - simpleroot -) -from .index import ( - Index -) -from . import index -from . import parser -__version__ = '0.0.2' diff --git a/code/simclr-pytorch-reefs/myexman/__pycache__/__init__.cpython-38.pyc b/code/simclr-pytorch-reefs/myexman/__pycache__/__init__.cpython-38.pyc deleted file mode 100644 index e4a9064..0000000 Binary files a/code/simclr-pytorch-reefs/myexman/__pycache__/__init__.cpython-38.pyc and /dev/null differ diff --git a/code/simclr-pytorch-reefs/myexman/__pycache__/index.cpython-38.pyc b/code/simclr-pytorch-reefs/myexman/__pycache__/index.cpython-38.pyc deleted file mode 100644 index 71dfbe2..0000000 Binary files a/code/simclr-pytorch-reefs/myexman/__pycache__/index.cpython-38.pyc and /dev/null differ diff --git a/code/simclr-pytorch-reefs/myexman/__pycache__/parser.cpython-38.pyc b/code/simclr-pytorch-reefs/myexman/__pycache__/parser.cpython-38.pyc deleted file mode 100644 index f8ebf4e..0000000 Binary files a/code/simclr-pytorch-reefs/myexman/__pycache__/parser.cpython-38.pyc and /dev/null differ diff --git a/code/simclr-pytorch-reefs/myexman/index.py b/code/simclr-pytorch-reefs/myexman/index.py deleted file mode 100644 index 16658d5..0000000 --- a/code/simclr-pytorch-reefs/myexman/index.py +++ /dev/null @@ -1,91 +0,0 @@ -import configargparse -import pandas as pd -import pathlib -import strconv -import json -import functools -import datetime -from . import parser -import yaml -from argparse import Namespace -__all__ = [ - 'Index' -] - - -def only_value_error(conv): - @functools.wraps(conv) - def new_conv(value): - try: - return conv(value) - except Exception as e: - raise ValueError from e - return new_conv - - -def none2none(none): - if none is None: - return None - else: - raise ValueError - - -converter = strconv.Strconv(converters=[ - ('int', strconv.convert_int), - ('float', strconv.convert_float), - ('bool', only_value_error(parser.str2bool)), - ('time', strconv.convert_time), - ('datetime', strconv.convert_datetime), - ('datetime1', lambda time: datetime.datetime.strptime(time, parser.TIME_FORMAT)), - ('date', strconv.convert_date), - ('json', only_value_error(json.loads)), -]) - - -def get_args(path): - with open(path, 'rb') as f: - return Namespace(**yaml.load(f)) - - -class Index(object): - def __init__(self, root): - self.root = pathlib.Path(root) - - @property - def index(self): - return self.root / 'index' - - @property - def marked(self): - return self.root / 'marked' - - def info(self, source=None, nlast=None): - if source is None: - source = self.index - files = source.iterdir() - if nlast is not None: - files = sorted(list(files))[-nlast:] - else: - source = self.marked / source - files = source.glob('**/*/'+parser.PARAMS_FILE) - - def get_dict(cfg): - return configargparse.YAMLConfigFileParser().parse(cfg.open('r')) - - def convert_column(col): - if any(isinstance(v, str) for v in converter.convert_series(col)): - return col - else: - return pd.Series(converter.convert_series(col), name=col.name, index=col.index) - try: - df = (pd.DataFrame - .from_records((get_dict(c) for c in files)) - .apply(lambda s: convert_column(s)) - .sort_values('id') - .assign(root=lambda _: _.root.apply(self.root.__truediv__)) - .reset_index(drop=True)) - cols = df.columns.tolist() - cols.insert(0, cols.pop(cols.index('id'))) - return df.reindex(columns=cols) - except FileNotFoundError as e: - raise KeyError(source.name) from e diff --git a/code/simclr-pytorch-reefs/myexman/parser.py b/code/simclr-pytorch-reefs/myexman/parser.py deleted file mode 100644 index b65ed07..0000000 --- a/code/simclr-pytorch-reefs/myexman/parser.py +++ /dev/null @@ -1,232 +0,0 @@ -import configargparse -import argparse -import pathlib -import datetime -import yaml -import yaml.representer -import os -import functools -import itertools -from filelock import FileLock -__all__ = [ - 'ExParser', - 'simpleroot', -] - - -TIME_FORMAT_DIR = '%Y-%m-%d-%H-%M-%S' -TIME_FORMAT = '%Y-%m-%dT%H:%M:%S' -DIR_FORMAT = '{num}' -EXT = 'yaml' -PARAMS_FILE = 'params.'+EXT -FOLDER_DEFAULT = 'exman' -RESERVED_DIRECTORIES = { - 'runs', 'index', - 'tmp', 'marked' -} - - -def yaml_file(name): - return name + '.' + EXT - - -def simpleroot(__file__): - return pathlib.Path(os.path.dirname(os.path.abspath(__file__)))/FOLDER_DEFAULT - - -def represent_as_str(self, data, tostr=str): - return yaml.representer.Representer.represent_str(self, tostr(data)) - - -def register_str_converter(*types, tostr=str): - for T in types: - yaml.add_representer(T, functools.partial(represent_as_str, tostr=tostr)) - - -register_str_converter(pathlib.PosixPath, pathlib.WindowsPath) - - -def str2bool(s): - true = ('true', 't', 'yes', 'y', 'on', '1') - false = ('false', 'f', 'no', 'n', 'off', '0') - - if s.lower() in true: - return True - elif s.lower() in false: - return False - else: - raise argparse.ArgumentTypeError(s, 'bool argument should be one of {}'.format(str(true + false))) - - -class ParserWithRoot(configargparse.ArgumentParser): - def __init__(self, *args, root=None, zfill=6, - **kwargs): - super().__init__(*args, **kwargs) - if root is None: - raise ValueError('Root directory is not specified') - root = pathlib.Path(root) - if not root.is_absolute(): - raise ValueError(root, 'Root directory is not absolute path') - if not root.exists(): - raise ValueError(root, 'Root directory does not exist') - self.root = pathlib.Path(root) - self.zfill = zfill - self.register('type', bool, str2bool) - for directory in RESERVED_DIRECTORIES: - getattr(self, directory).mkdir(exist_ok=True) - self.lock = FileLock(str(self.root/'lock')) - - @property - def runs(self): - return self.root / 'runs' - - @property - def marked(self): - return self.root / 'marked' - - @property - def index(self): - return self.root / 'index' - - @property - def tmp(self): - return self.root / 'tmp' - - def max_ex(self): - max_num = 0 - for directory in itertools.chain(self.runs.iterdir(), self.tmp.iterdir()): - num = int(directory.name.split('-', 1)[0]) - if num > max_num: - max_num = num - return max_num - - def num_ex(self): - return len(list(self.runs.iterdir())) - - def next_ex(self): - return self.max_ex() + 1 - - def next_ex_str(self): - return str(self.next_ex()).zfill(self.zfill) - - -class ExParser(ParserWithRoot): - """ - Parser responsible for creating the following structure of experiments - ``` - root - |-- runs - | `-- xxxxxx-YYYY-mm-dd-HH-MM-SS - | |-- params.yaml - | `-- ... - |-- index - | `-- xxxxxx-YYYY-mm-dd-HH-MM-SS.yaml (symlink) - |-- marked - | `-- - | `-- xxxxxx-YYYY-mm-dd-HH-MM-SS (symlink) - | |-- params.yaml - | `-- ... - `-- tmp - `-- xxxxxx-YYYY-mm-dd-HH-MM-SS - |-- params.yaml - `-- ... - ``` - """ - def __init__(self, *args, zfill=6, file=None, - args_for_setting_config_path=('--config', ), - automark=(), - parents=[], - **kwargs): - - root = os.path.join(os.path.abspath(os.environ.get('EXMAN_PATH', './logs')), ('exman-' + str(file))) - if not os.path.exists(root): - os.makedirs(root) - - if len(parents) == 1: - self.yaml_params_path = parents[0].yaml_params_path - root = parents[0].root - - super().__init__(*args, root=root, zfill=zfill, - args_for_setting_config_path=args_for_setting_config_path, - config_file_parser_class=configargparse.YAMLConfigFileParser, - ignore_unknown_config_file_keys=True, - parents=parents, - formatter_class=argparse.ArgumentDefaultsHelpFormatter, - **kwargs) - self.automark = automark - if len(parents) == 0: - self.add_argument('--tmp', action='store_true') - - def _initialize_dir(self, tmp): - try: - # with self.lock: # different processes can make it same time, this is needed to avoid collision - time = datetime.datetime.now() - num = self.next_ex_str() - name = DIR_FORMAT.format(num=num, time=time.strftime(TIME_FORMAT_DIR)) - if tmp: - absroot = self.tmp / name - relroot = pathlib.Path('tmp') / name - else: - absroot = self.runs / name - relroot = pathlib.Path('runs') / name - # this process now safely owns root directory - # raises FileExistsError on fail - absroot.mkdir() - except FileExistsError: # shit still happens - return self._initialize_dir(tmp) - return absroot, relroot, name, time, num - - def parse_known_args(self, *args, log_params=True, **kwargs): - args, argv = super().parse_known_args(*args, **kwargs) - if not log_params: - return args, argv - - if hasattr(self, 'yaml_params_path'): - with self.yaml_params_path.open('w') as f: - self.dumpd = args.__dict__.copy() - yaml.dump(self.dumpd, f, default_flow_style=False) - print("\ntime: '{}'".format(self.time.strftime(TIME_FORMAT)), file=f) - print("id:", int(self.num), file=f) - print(self.yaml_params_path.read_text()) - return args, argv - - absroot, relroot, name, time, num = self._initialize_dir(args.tmp) - self.time = time - self.num = num - args.root = absroot - self.yaml_params_path = args.root / PARAMS_FILE - rel_yaml_params_path = pathlib.Path('..', 'runs', name, PARAMS_FILE) - with self.yaml_params_path.open('a') as f: - self.dumpd = args.__dict__.copy() - # dumpd['root'] = relroot - yaml.dump(self.dumpd, f, default_flow_style=False) - print("\ntime: '{}'".format(time.strftime(TIME_FORMAT)), file=f) - print("id:", int(num), file=f) - print(self.yaml_params_path.read_text()) - symlink = self.index / yaml_file(name) - if not args.tmp: - symlink.symlink_to(rel_yaml_params_path) - print('Created symlink from', symlink, '->', rel_yaml_params_path) - if self.automark and not args.tmp: - automark_path_part = pathlib.Path(*itertools.chain.from_iterable( - (mark, str(getattr(args, mark, ''))) - for mark in self.automark)) - markpath = pathlib.Path(self.marked, automark_path_part) - markpath.mkdir(exist_ok=True, parents=True) - relpathmark = pathlib.Path('..', *(['..']*len(automark_path_part.parts))) / 'runs' / name - (markpath / name).symlink_to(relpathmark, target_is_directory=True) - print('Created symlink from', markpath / name, '->', relpathmark) - return args, argv - - def done(self): - print('Success.') - self.dumpd['status'] = 'done' - with self.yaml_params_path.open('a') as f: - yaml.dump(self.dumpd, f, default_flow_style=False) - - def update_params_file(self, args): - dumpd = args.__dict__.copy() - with self.yaml_params_path.open('w') as f: - yaml.dump(dumpd, f, default_flow_style=False) - print("\ntime: '{}'".format(self.time.strftime(TIME_FORMAT)), file=f) - print("id:", int(self.num), file=f)