From 624461bc997f644dce26a05ca090eda2586efd8a Mon Sep 17 00:00:00 2001 From: LoadingByte Date: Thu, 12 Sep 2024 16:49:17 +0200 Subject: [PATCH] Improve reproducibility --- README.md | 49 +++++++++++++++++++++++++++++++++--------- scripts/performance.py | 11 +++------- scripts/visualize.py | 45 ++++++++++++++++++++++++-------------- 3 files changed, 71 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 58e03a2..78ec092 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,13 @@ install our package: pip install -e . Next, download the [dataset](https://neural-gaussian-scale-space-fields.mpi-inf.mpg.de/data.zip) and extract it into the -repository such that, e.g., the folder `/data/picture` exists. +repository such that the new `data/` folder and the `scripts/` folder are siblings. + +Steps 1 and 2 take a long time. To skip them, download the +[pretrained models](https://neural-gaussian-scale-space-fields.mpi-inf.mpg.de/models.zip) and extract them into the +repository such that the new `results/` folder and the `scripts/` folder are siblings. + +The outputs of any experiment or visualizer will be written to the `results/` folder. ### Step 1: Train @@ -64,6 +70,12 @@ third argument. Look in the `data/` folder for all available names. Notice that python scripts/train.py neural picture bbq python scripts/train.py neural mesh armadillo +Training a field takes about 4 hours because it does not halt upon convergence, but stoically continues for the maximum +number of iterations. To speed up training, you can significantly lower the `n_iters` variables in +[`train.py`](scripts/train.py) without notably sacrificing quality. + +Each field takes 24MB of space, summing to 1.3GB for all fields. + ### Step 2: Calibrate Use the [`calibrate.py`](scripts/calibrate.py) script to run our post-training calibration: @@ -85,6 +97,9 @@ Also use this script to generate smoothed ground truths via Monte Carlo convolut python scripts/benchmark.py gauss picture [name] python scripts/benchmark.py gauss mesh [name] +Benchmarking a picture/mesh field takes about 5min/15min. Generating picture/mesh ground truths takes about 30min/4h. +Per picture/mesh, the smoothed versions take 5GB/6.5GB, summing to about 1TB for all results. + ### Step 4: Metrics Use the [`metrics.py`](scripts/metrics.py) script to compare the output of our field with the ground truth: @@ -105,20 +120,20 @@ Numbers very similar to those found in Tables 1-4 in our paper should now be ava Use the [`visualize.py`](scripts/visualize.py) script to get the images, videos, and meshes from our paper and website: - python scripts/visualize.py picture_isotropic - python scripts/visualize.py picture_anisotropic - python scripts/visualize.py picture_foveation - python scripts/visualize.py mesh_isotropic - python scripts/visualize.py mesh_anisotropic + python scripts/visualize.py picture_isotropic [name] + python scripts/visualize.py picture_anisotropic [name] + python scripts/visualize.py picture_foveation [name] + python scripts/visualize.py mesh_isotropic [name] + python scripts/visualize.py mesh_anisotropic [name] python scripts/visualize.py lightstage - python scripts/visualize.py picture_video - python scripts/visualize.py mesh_video_objects - python scripts/visualize.py mesh_video_ellipsoids + python scripts/visualize.py picture_video [name] + python scripts/visualize.py mesh_video_objects [name] + python scripts/visualize.py mesh_video_ellipsoids [name] python scripts/visualize.py lightstage_video ### Ablations -To reproduce our ablations, perform the above four above steps with `neural` replaced by one of the following: +To reproduce our ablations, perform steps 1-4 with `neural` replaced by one of the following: | Configuration as in the paper | Script equivalent | |-------------------------------|-----------------------------------------| @@ -136,6 +151,11 @@ First install the required additional dependencies: pip install -e .[neural-texture] +If you neither downloaded the pretrained models nor ran steps 1-2 yet, run these commands to prepare the neural texture: + + python scripts/train.py neural textured + python scripts/calibrate.py neural textured + Open the 3D renderer window using the [`textured_render_uv.py`](scripts/textured_render_uv.py) script. Press ESC to quit, W/A/S/D/SPACE/SHIFT to move, and use the mouse to look around. You will see the mesh of a fish, but instead of a texture, it is covered in UV coordinates. If you feel dizzy, set `gizmo_and_grid` in the script to true to render a @@ -149,3 +169,12 @@ factor here and increased the window size in `textured_render_uv.py` to still ge demonstrate our method, you can replace `16` with `1`: python scripts/textured_apply_neural_texture.py moving 16 + +### Performance + +Use the [`performance.py`](scripts/performance.py) script to reproduce our timing experiment: + + python scripts/performance.py vanilla picture 30 + python scripts/performance.py vanilla mesh 0.004 + python scripts/performance.py neural picture 30 + python scripts/performance.py neural mesh 0.004 diff --git a/scripts/performance.py b/scripts/performance.py index f78a078..b466fa2 100644 --- a/scripts/performance.py +++ b/scripts/performance.py @@ -13,10 +13,6 @@ def main(field_type, category, stop_metric): stop_metric = float(stop_metric) - out_dir = ngssf.results.performance_dir() - pred_dir = out_dir / f"predictions_{field_type}_{category}_{stop_metric}" - pred_dir.mkdir(parents=True, exist_ok=True) - # Warm-up time_method(field_type, category, perf_names(category)[0], 0, stop_metric) @@ -33,7 +29,7 @@ def main(field_type, category, stop_metric): its.append(it) rows.append((f"{var_bench_idx}", field_type, f"{np.mean(tts):.4f}", f"{np.mean(its):.4f}")) - with open(out_dir / f"timings_{field_type}_{category}_{stop_metric}.csv", "w") as f: + with open(ngssf.results.performance_dir() / f"timings_{field_type}_{category}_{stop_metric}.csv", "w") as f: f.write("\n".join(",".join(row) for row in rows)) @@ -70,8 +66,7 @@ def start_fn(): start_time = cur_time() def loop_fn(itr, pred_fn): - # Bacon produces extremely complex meshes initially, which slow down our Chamfer code significantly. - if (itr + 1) % 50 != 0 or field_type == "bacon" and category == "mesh" and itr < 15_000: + if (itr + 1) % 50 != 0: return False inference_start_time = cur_time() with torch.no_grad(): @@ -117,7 +112,7 @@ def train(field_type, category, true_grid, true_mesh, res, start_fn, loop_fn): sampler = ngssf.MinibatchSampler(2 ** 24, ngssf.SDFSampler(true_mesh)).cuda() enc_kw["length_distribution_param"] = 100 n_samples = 200_000 - if field_type.startswith("neural"): + if field_type == "neural": scaler = ngssf.MinibatchScaler(10_000_000, ngssf.RandomScaler(True, sig.coords)).cuda() field = ngssf.nn.prefab.Smoothable4x1024NeuralField(sig.coords, sig.channels, True, None, enc_kw) elif field_type == "vanilla": diff --git a/scripts/visualize.py b/scripts/visualize.py index 726cb34..9f1e362 100644 --- a/scripts/visualize.py +++ b/scripts/visualize.py @@ -31,10 +31,10 @@ def picture_anisotropic(): def _picture(base_dir, scale_set, indices): - for name in tqdm([ + for name in tqdm(args_or([ "bbq", "cliffs", "colosseo", "crystals", "firenze", "firewood", "mutter", "peak", "portal", "rue", "schaumbrunnen", "steepshore", "toomuchbleach", "tunnelrampe", "zebras" - ], desc="name", leave=False): + ]), desc="name", leave=False): for index in tqdm(indices, desc="scale", leave=False): gauss_img = _prepare_picture_image(ngssf.results.load_benchmark("gauss", "picture", name, scale_set, index)) pred_img = _prepare_picture_image(ngssf.results.load_benchmark("neural", "picture", name, scale_set, index)) @@ -45,12 +45,14 @@ def _picture(base_dir, scale_set, indices): def picture_foveation(): - field = ngssf.results.load_neural_field("neural", "picture", "squirrel").cuda() + name = arg_or("squirrel") + field = ngssf.results.load_neural_field("neural", "picture", name).cuda() res = 512 X = ngssf.util.grid_coords(res, 2, device="cuda") with torch.no_grad(): pred_img = field(X, (X.norm(dim=1) - 0.35).clamp(0) ** 3 / 200).T.reshape(3, res, res) - _write_image(ngssf.results.visualizations_dir() / "picture_foveation.jpg", _prepare_picture_image(pred_img)) + img = _prepare_picture_image(pred_img) + _write_image(ngssf.results.visualizations_dir() / "picture_foveation" / f"{name}.jpg", img) def _prepare_picture_image(img): @@ -59,7 +61,7 @@ def _prepare_picture_image(img): def mesh_isotropic(): scale_set = "variance_benchmark" - for name in tqdm(ngssf.data.names("mesh"), desc="name", leave=False): + for name in tqdm(args_or(ngssf.data.names("mesh")), desc="name", leave=False): d = ngssf.results.visualizations_dir() / "mesh_isotropic" dg = d / name / "gauss" dn = d / name / "neural" @@ -73,9 +75,10 @@ def mesh_isotropic(): def mesh_anisotropic(): - d = ngssf.results.visualizations_dir() / "mesh_anisotropic" + name = arg_or("thai") + d = ngssf.results.visualizations_dir() / "mesh_anisotropic" / name d.mkdir(parents=True, exist_ok=True) - field = ngssf.results.load_neural_field("neural", "mesh", "thai").cuda() + field = ngssf.results.load_neural_field("neural", "mesh", name).cuda() for label, variances in [ ("isotropic", [1e-2, 1e-2, 1e-2]), ("anisotropic_horizontal", [1e-2, 1e-8, 1e-2]), @@ -84,12 +87,13 @@ def mesh_anisotropic(): scale = torch.diag(torch.tensor(variances)) with torch.no_grad(): grid = ngssf.util.eval_grid(256, field, scale.cuda(), batch_size=2 ** 18).cpu() - ngssf.util.mesh_from_grid(grid).export(d / f"thai_{label}.ply") + ngssf.util.mesh_from_grid(grid).export(d / f"{label}.ply") def lightstage(): + name = arg_or("cute") light_positions = ngssf.data.lightstage_light_positions() - field = ngssf.results.load_neural_field("neural", "lightstage", "cute").cuda() + field = ngssf.results.load_neural_field("neural", "lightstage", name).cuda() w, h = 512, 384 X = torch.cat([ torch.cartesian_prod(torch.linspace(-0.75, 0.75, h), torch.linspace(-1, 1, w)).flip(1), @@ -99,7 +103,7 @@ def lightstage(): with torch.no_grad(): Y = field(X.cuda(), scale) img = _prepare_image(Y.T.reshape(3, h, w)) - _write_image(ngssf.results.visualizations_dir() / "lightstage" / f"{i}.jpg", img) + _write_image(ngssf.results.visualizations_dir() / "lightstage" / name / f"{i}.jpg", img) def picture_video(): @@ -114,7 +118,7 @@ def picture_video(): spectrum_label = _label("Spectrum", 1300, (128, 32), overlay=True) cov_label = _label("Covariance", 1300, (128, 32)) - for name in tqdm(["bbq", "firewood", "schaumbrunnen", "tunnelrampe"], leave=False): + for name in tqdm(args_or(["bbq", "firewood", "schaumbrunnen", "tunnelrampe"]), leave=False): orig_picture = torch.as_tensor(resize(ngssf.data.load("picture", name).numpy(), (3, 512, 512)), device="cuda") neural_field = ngssf.results.load_neural_field("neural", "picture", name).cuda() gauss_field = ngssf.GaussianMonteCarloSmoothableField(ngssf.GridField(orig_picture, padding_mode="reflection")) @@ -146,7 +150,7 @@ def _spectrum(picture): def mesh_video_objects(): cov_mats = torch.tensor(_mesh_video_covariance_matrices(), dtype=torch.float32, device="cuda") - for name in tqdm(ngssf.data.names("mesh"), leave=False): + for name in tqdm(args_or(ngssf.data.names("mesh")), leave=False): orig_mesh = ngssf.data.load("mesh", name) neural_field = ngssf.results.load_neural_field("neural", "mesh", name).cuda() gauss_field = ngssf.GaussianMonteCarloSmoothableField( @@ -191,12 +195,13 @@ def _mesh_video_covariance_matrices(): def lightstage_video(): - d = ngssf.results.visualizations_dir() + name = arg_or("cute") + d = ngssf.results.visualizations_dir() / "lightstage_video" d.mkdir(parents=True, exist_ok=True) - light_shots = ngssf.data.load("lightstage", "cute") + light_shots = ngssf.data.load("lightstage", name) light_pos = ngssf.data.lightstage_light_positions() - neural_field = ngssf.results.load_neural_field("neural", "lightstage", "cute").cuda() + neural_field = ngssf.results.load_neural_field("neural", "lightstage", name).cuda() neural_field.calibration_factors[1] = 500 gauss_field = ngssf.GaussianMonteCarloSmoothableField(ngssf.LightStageField(light_shots, light_pos), {2, 3}).cuda() @@ -218,7 +223,7 @@ def lightstage_video(): cov_label = _label("Covariance", 1300, (128, 32)) plotted_pos = light_pos[(light_pos - (light_pos[2] + light_pos[22]) / 2).norm(dim=1) < 0.15] - video = VideoWriter(str(d / "lightstage_video.mp4"), VideoWriter.fourcc('a', 'v', 'c', '1'), 60, (1152, 384)) + video = VideoWriter(str(d / f"{name}.mp4"), VideoWriter.fourcc('a', 'v', 'c', '1'), 60, (1152, 384)) for x_light, cov_mat in tqdm(list(zip(xs_light, cov_mats)), leave=False): X = torch.cat([X_pixel, x_light.tile(w * h, 1)], dim=1).cuda() with torch.no_grad(): @@ -304,5 +309,13 @@ def _write_image(file, img): iio.imwrite(file, (img * 255).astype(np.uint8), quality=90) +def arg_or(default): + return sys.argv[2] if len(sys.argv) > 2 else default + + +def args_or(default): + return sys.argv[2:] if len(sys.argv) > 2 else default + + if __name__ == "__main__": globals()[sys.argv[1]]()