1
+ name : " asd_mv_triplane_100k_1GPU"
2
+ tag : " ${rmspace:${system.prompt_processor.prompt_library},_}"
3
+ exp_root_dir : " outputs"
4
+ seed : 0
5
+
6
+ data_type : " multiprompt-multiview-camera-datamodule"
7
+ data :
8
+ batch_size : 4
9
+ n_view : 4
10
+ # 0-4999: 64x64, >=5000: 512x512
11
+ # this drastically reduces VRAM usage as empty space is pruned in early training
12
+ width : 64
13
+ height : 64
14
+ camera_distance_range : [0.8, 1.0] # relative
15
+ fovy_range : [15, 60]
16
+ elevation_range : [0, 30]
17
+ camera_perturb : 0.
18
+ center_perturb : 0.
19
+ up_perturb : 0.
20
+ eval_camera_distance : 3.0
21
+ eval_fovy_deg : 40.
22
+ n_val_views : 40
23
+ # generation related
24
+ prompt_library : ${system.prompt_processor.prompt_library}
25
+ dim_gaussian : 1 # not used
26
+
27
+ system_type : " multiprompt-radience-field-generator-system"
28
+ system :
29
+ stage : " coarse"
30
+ initialize_shape : false # #true #
31
+ visualize_samples : false
32
+ validation_via_video : true
33
+
34
+ geometry_type : " Triplane-transformer-sdf"
35
+ geometry :
36
+ radius : 2.0
37
+ normal_type : " finite_difference"
38
+ finite_difference_normal_eps : 0.01
39
+
40
+ sdf_bias : sphere
41
+ sdf_bias_params : 0.8 # easier to converge than 0.5
42
+
43
+ space_generator_config : # adopt from OpenLRM
44
+ inner_dim : 768
45
+ condition_dim : 1024
46
+ triplane_low_res : 32
47
+ triplane_high_res : 64
48
+ triplane_dim : 32
49
+ num_layers : 12
50
+ num_heads : 16
51
+ mlp_ratio : 4
52
+ local_text : true
53
+
54
+ material_type : no-material
55
+ material :
56
+ n_output_dims : 3
57
+ color_activation : sigmoid-mipnerf # follow OpenLRM
58
+ requires_normal : true
59
+
60
+ background_type : " neural-environment-map-background"
61
+ background :
62
+ color_activation : sigmoid-mipnerf # follow OpenLRM
63
+ random_aug : false
64
+
65
+ renderer_type : " generative-space-volsdf-volume-renderer"
66
+ renderer :
67
+ radius : ${system.geometry.radius}
68
+ use_volsdf : true
69
+ trainable_variance : false # important!
70
+ learned_variance_init : 0.340119 # 0.340119 = log(30) / 10, 30 is the most common variance across the prompts
71
+
72
+ randomized : false
73
+
74
+ estimator : importance
75
+ num_samples_per_ray : 64
76
+ num_samples_per_ray_importance : 128
77
+ near_plane : 0.1
78
+ far_plane : 4.0
79
+ train_chunk_size : 0 # 100000
80
+
81
+ prompt_processor_type : " stable-diffusion-multi-prompt-processor"
82
+ prompt_processor :
83
+ pretrained_model_name_or_path : " pretrained/stable-diffusion-2-1-base"
84
+ use_local_text_embeddings : ${system.geometry.space_generator_config.local_text}
85
+ prompt_library : ???
86
+ negative_prompt : " ugly, bad anatomy, blurry, pixelated obscure, unnatural colors, poor lighting, dull, and unclear, cropped, lowres, low quality, artifacts, duplicate, morbid, mutilated, poorly drawn face, deformed, dehydrated, bad proportions"
87
+
88
+ guidance_type : " mvdream-asynchronous-score-distillation-guidance"
89
+ guidance :
90
+ model_name : " sd-v2.1-base-4view"
91
+ ckpt_path : " pretrained/sd-v2.1-base-4view.pt" # path to a pre-downloaded checkpoint file (null for loading from URL)
92
+ guidance_scale : 7.5
93
+ plus_ratio : 0.1
94
+ plus_random : true
95
+ min_step_percent : [0, 0.5, 0.02, 50000] # follow MVDream
96
+ max_step_percent : [0, 0.98, 0.5, 50000] # same as vsd
97
+ loggers :
98
+ wandb :
99
+ enable : false
100
+ project : " threestudio"
101
+ name : None
102
+
103
+ loss :
104
+ lambda_asd : 1.
105
+ lambda_orient : 0.
106
+ lambda_sparsity : 20
107
+ lambda_opaque : [40000, 0, 1., 50000] # final 1/5 iterations
108
+ lambda_z_variance : 0.
109
+ lambda_eikonal : 0.01 # [1, 10., 1., 10000] # fisrt 1/10 iterations
110
+
111
+ optimizer :
112
+ name : Adan # for fast convergence
113
+ args :
114
+ betas : [0.98, 0.92, 0.99]
115
+ eps : 1.e-15
116
+ params :
117
+ geometry :
118
+ lr : 0.0002
119
+ background :
120
+ lr : 0.0002
121
+
122
+
123
+ trainer :
124
+ max_steps : 50000
125
+ log_every_n_steps : 1
126
+ num_sanity_val_steps : 0
127
+ val_check_interval : 10000
128
+ enable_progress_bar : true
129
+ precision : 32
130
+ strategy : " ddp" # "deepspeed_stage_2"
131
+ accumulate_grad_batches : 8
132
+
133
+ checkpoint :
134
+ save_last : true
135
+ save_top_k : -1
136
+ every_n_train_steps : ${trainer.val_check_interval}
0 commit comments