Skip to content

Commit 2e63f76

Browse files
committed
Added profiling
1 parent 9bb91a9 commit 2e63f76

File tree

3 files changed

+250
-159
lines changed

3 files changed

+250
-159
lines changed

examples/ibm/wt.py

+105-96
Original file line numberDiff line numberDiff line change
@@ -90,51 +90,54 @@ def save_usd_vorticity(
9090
vorticity_threshold,
9191
usd_stage,
9292
):
93-
device = "cuda:0"
94-
macro_wp = Macroscopic(
95-
compute_backend=ComputeBackend.WARP,
96-
precision_policy=precision_policy,
97-
velocity_set=xlb.velocity_set.D3Q27(precision_policy=precision_policy, compute_backend=ComputeBackend.WARP),
98-
)
99-
rho = wp.zeros((1, *grid_shape), dtype=wp.float32, device=device)
100-
u = wp.zeros((3, *grid_shape), dtype=wp.float32, device=device)
101-
rho, u = macro_wp(f_current, rho, u)
102-
u = u[:, 20:-20, 20:-20, 0:-20]
103-
104-
vorticity = wp.zeros((3, *u.shape[1:]), dtype=wp.float32, device=device)
105-
vorticity_magnitude = wp.zeros((1, *u.shape[1:]), dtype=wp.float32, device=device)
106-
vorticity, vorticity_magnitude = vorticity_operator(u, bc_mask, vorticity, vorticity_magnitude)
107-
108-
max_verts = grid_shape[0] * grid_shape[1] * grid_shape[2] * 5
109-
max_tris = grid_shape[0] * grid_shape[1] * grid_shape[2] * 3
110-
mc = wp.MarchingCubes(nx=u.shape[1], ny=u.shape[2], nz=u.shape[3], max_verts=max_verts, max_tris=max_tris, device=device)
111-
mc.surface(vorticity_magnitude[0], vorticity_threshold)
112-
if mc.verts.shape[0] == 0:
113-
print(f"Warning: No vertices found for vorticity at timestep {timestep}.")
114-
return
93+
device = "cuda:1"
94+
f_current_new = wp.clone(f_current, device=device)
95+
bc_mask_new = wp.clone(bc_mask, device=device)
96+
with wp.ScopedDevice(device):
97+
macro_wp = Macroscopic(
98+
compute_backend=ComputeBackend.WARP,
99+
precision_policy=precision_policy,
100+
velocity_set=xlb.velocity_set.D3Q27(precision_policy=precision_policy, compute_backend=ComputeBackend.WARP),
101+
)
102+
rho = wp.zeros((1, *grid_shape), dtype=wp.float32, device=device)
103+
u = wp.zeros((3, *grid_shape), dtype=wp.float32, device=device)
104+
rho, u = macro_wp(f_current_new, rho, u)
105+
u = u[:, 20:-20, 20:-20, 0:-20]
106+
107+
vorticity = wp.zeros((3, *u.shape[1:]), dtype=wp.float32, device=device)
108+
vorticity_magnitude = wp.zeros((1, *u.shape[1:]), dtype=wp.float32, device=device)
109+
vorticity, vorticity_magnitude = vorticity_operator(u, bc_mask_new, vorticity, vorticity_magnitude)
110+
111+
max_verts = grid_shape[0] * grid_shape[1] * grid_shape[2] * 5
112+
max_tris = grid_shape[0] * grid_shape[1] * grid_shape[2] * 3
113+
mc = wp.MarchingCubes(nx=u.shape[1], ny=u.shape[2], nz=u.shape[3], max_verts=max_verts, max_tris=max_tris, device=device)
114+
mc.surface(vorticity_magnitude[0], vorticity_threshold)
115+
if mc.verts.shape[0] == 0:
116+
print(f"Warning: No vertices found for vorticity at timestep {timestep}.")
117+
return
118+
119+
grid_to_point_op = GridToPoint(precision_policy=precision_policy, compute_backend=ComputeBackend.WARP)
120+
scalars = wp.zeros(mc.verts.shape[0], dtype=wp.float32, device=device)
121+
scalars = grid_to_point_op(vorticity_magnitude, mc.verts, scalars)
122+
123+
colors = wp.empty(mc.verts.shape[0], dtype=wp.vec3, device=device)
124+
scalars_np = scalars.numpy()
125+
vorticity_min = float(np.percentile(scalars_np, 5))
126+
vorticity_max = float(np.percentile(scalars_np, 95))
127+
if abs(vorticity_max - vorticity_min) < 1e-6:
128+
vorticity_max = vorticity_min + 1e-6
129+
130+
wp.launch(
131+
get_color,
132+
dim=mc.verts.shape[0],
133+
inputs=(vorticity_min, vorticity_max, scalars),
134+
outputs=(colors,),
135+
device=device,
136+
)
115137

116-
grid_to_point_op = GridToPoint(precision_policy=precision_policy, compute_backend=ComputeBackend.WARP)
117-
scalars = wp.zeros(mc.verts.shape[0], dtype=wp.float32, device=device)
118-
scalars = grid_to_point_op(vorticity_magnitude, mc.verts, scalars)
119-
120-
colors = wp.empty(mc.verts.shape[0], dtype=wp.vec3, device=device)
121-
scalars_np = scalars.numpy()
122-
vorticity_min = float(np.percentile(scalars_np, 5))
123-
vorticity_max = float(np.percentile(scalars_np, 95))
124-
if abs(vorticity_max - vorticity_min) < 1e-6:
125-
vorticity_max = vorticity_min + 1e-6
126-
127-
wp.launch(
128-
get_color,
129-
dim=mc.verts.shape[0],
130-
inputs=(vorticity_min, vorticity_max, scalars),
131-
outputs=(colors,),
132-
device=device,
133-
)
134-
135-
vertices = mc.verts.numpy()
136-
indices = mc.indices.numpy()
137-
tri_count = len(indices) // 3
138+
vertices = mc.verts.numpy()
139+
indices = mc.indices.numpy()
140+
tri_count = len(indices) // 3
138141

139142
usd_mesh.GetPointsAttr().Set(vertices.tolist(), time=timestep // post_process_interval)
140143
usd_mesh.GetFaceVertexCountsAttr().Set([3] * tri_count, time=timestep // post_process_interval)
@@ -160,43 +163,46 @@ def save_usd_q_criterion(
160163
q_threshold,
161164
usd_stage,
162165
):
163-
device = "cuda:0"
164-
macro_wp = Macroscopic(
165-
compute_backend=ComputeBackend.WARP,
166-
precision_policy=precision_policy,
167-
velocity_set=xlb.velocity_set.D3Q27(precision_policy=precision_policy, compute_backend=ComputeBackend.WARP),
168-
)
169-
rho = wp.zeros((1, *grid_shape), dtype=wp.float32, device=device)
170-
u = wp.zeros((3, *grid_shape), dtype=wp.float32, device=device)
171-
rho, u = macro_wp(f_current, rho, u)
172-
u = u[:, 20:-20, 20:-20, 0:-20]
173-
174-
norm_mu = wp.zeros((1, *u.shape[1:]), dtype=wp.float32, device=device)
175-
q_field = wp.zeros((1, *u.shape[1:]), dtype=wp.float32, device=device)
176-
norm_mu, q_field = q_criterion_operator(u, bc_mask, norm_mu, q_field)
177-
178-
max_verts = grid_shape[0] * grid_shape[1] * grid_shape[2] * 5
179-
max_tris = grid_shape[0] * grid_shape[1] * grid_shape[2] * 3
180-
mc = wp.MarchingCubes(nx=u.shape[1], ny=u.shape[2], nz=u.shape[3], max_verts=max_verts, max_tris=max_tris, device=device)
181-
mc.surface(q_field[0], q_threshold)
182-
if mc.verts.shape[0] == 0:
183-
print(f"Warning: No vertices found for Q-criterion at timestep {timestep}.")
184-
return
185-
186-
grid_to_point_op = GridToPoint(precision_policy=precision_policy, compute_backend=ComputeBackend.WARP)
187-
scalars = wp.zeros(mc.verts.shape[0], dtype=wp.float32, device=device)
188-
scalars = grid_to_point_op(norm_mu, mc.verts, scalars)
189-
190-
colors = wp.empty(mc.verts.shape[0], dtype=wp.vec3, device=device)
191-
vorticity_min = 0.0
192-
vorticity_max = 0.1
193-
wp.launch(
194-
get_color,
195-
dim=mc.verts.shape[0],
196-
inputs=(vorticity_min, vorticity_max, scalars),
197-
outputs=(colors,),
198-
device=device,
199-
)
166+
device = "cuda:1"
167+
f_current_new = wp.clone(f_current, device=device)
168+
bc_mask_new = wp.clone(bc_mask, device=device)
169+
with wp.ScopedDevice(device):
170+
macro_wp = Macroscopic(
171+
compute_backend=ComputeBackend.WARP,
172+
precision_policy=precision_policy,
173+
velocity_set=xlb.velocity_set.D3Q27(precision_policy=precision_policy, compute_backend=ComputeBackend.WARP),
174+
)
175+
rho = wp.zeros((1, *grid_shape), dtype=wp.float32, device=device)
176+
u = wp.zeros((3, *grid_shape), dtype=wp.float32, device=device)
177+
rho, u = macro_wp(f_current_new, rho, u)
178+
u = u[:, 20:-20, 20:-20, 0:-20]
179+
180+
norm_mu = wp.zeros((1, *u.shape[1:]), dtype=wp.float32, device=device)
181+
q_field = wp.zeros((1, *u.shape[1:]), dtype=wp.float32, device=device)
182+
norm_mu, q_field = q_criterion_operator(u, bc_mask_new, norm_mu, q_field)
183+
184+
max_verts = grid_shape[0] * grid_shape[1] * grid_shape[2] * 5
185+
max_tris = grid_shape[0] * grid_shape[1] * grid_shape[2] * 3
186+
mc = wp.MarchingCubes(nx=u.shape[1], ny=u.shape[2], nz=u.shape[3], max_verts=max_verts, max_tris=max_tris, device=device)
187+
mc.surface(q_field[0], q_threshold)
188+
if mc.verts.shape[0] == 0:
189+
print(f"Warning: No vertices found for Q-criterion at timestep {timestep}.")
190+
return
191+
192+
grid_to_point_op = GridToPoint(precision_policy=precision_policy, compute_backend=ComputeBackend.WARP)
193+
scalars = wp.zeros(mc.verts.shape[0], dtype=wp.float32, device=device)
194+
scalars = grid_to_point_op(norm_mu, mc.verts, scalars)
195+
196+
colors = wp.empty(mc.verts.shape[0], dtype=wp.vec3, device=device)
197+
vorticity_min = 0.0
198+
vorticity_max = 0.1
199+
wp.launch(
200+
get_color,
201+
dim=mc.verts.shape[0],
202+
inputs=(vorticity_min, vorticity_max, scalars),
203+
outputs=(colors,),
204+
device=device,
205+
)
200206

201207
vertices = mc.verts.numpy()
202208
indices = mc.indices.numpy()
@@ -302,7 +308,7 @@ def load_and_prepare_meshes_car(grid_shape, stl_dir="/home/mehdi/Repos/stl-files
302308

303309
# Calculate scale based on BODY width only
304310
Nx, Ny, Nz = grid_shape
305-
target_body_width = Ny / 4.0
311+
target_body_width = Ny / 3.0
306312
scale_factor = target_body_width / orig_body_width # Use body's original width
307313

308314
# Apply scale to ALL components
@@ -468,7 +474,7 @@ def bc_profile_warp(index: wp.vec3i):
468474
z = _dtype(index[2])
469475
y_center = y - (L_y / _dtype(2.0))
470476
z_center = z - (L_z / _dtype(2.0))
471-
r_sq = ((2.0 * y_center) / L_y) ** _dtype(2.0) + ((2.0 * z_center) / L_z) ** _dtype(2.0)
477+
r_sq = ((_dtype(2.0) * y_center) / L_y) ** _dtype(2.0) + ((_dtype(2.0) * z_center) / L_z) ** _dtype(2.0)
472478
velocity_x = u_max_d * wp.max(_dtype(0.0), _dtype(1.0) - r_sq)
473479
return wp.vec(velocity_x, length=1)
474480

@@ -573,6 +579,7 @@ def post_process(
573579
}
574580
slice_idy = grid_shape[1] // 2
575581
save_image(fields["u_magnitude"][:, slice_idy, :], timestep=i)
582+
# save_fields_vtk(fields, i)
576583

577584
save_usd_vorticity(
578585
i,
@@ -596,20 +603,20 @@ def post_process(
596603
usd_mesh_q_criterion,
597604
q_criterion_operator,
598605
precision_policy=precision_policy,
599-
q_threshold=1e-6,
606+
q_threshold=5e-6,
600607
usd_stage=usd_stage,
601608
)
602609

603-
grid_shape = (300, 200, 100)
604-
u_max = 0.05
610+
grid_shape = (1024, 500, 200)
611+
u_max = 0.02
605612
iter_per_flow_passes = grid_shape[0] / u_max
606-
num_steps = 20000
607-
post_process_interval = 100
613+
num_steps = 2
614+
post_process_interval = 1000
608615
print_interval = 1000
609616
num_wheels = 4
610617
wheel_rotation_speed = -0.002
611618

612-
Re = 2000000.0
619+
Re = 2e6
613620
clength = grid_shape[0] - 1
614621
visc = u_max * clength / Re
615622
omega = 1.0 / (3.0 * visc + 0.5)
@@ -672,16 +679,18 @@ def post_process(
672679
stepper = setup_stepper(grid, bc_list, omega)
673680
f_0, f_1, bc_mask, missing_mask = stepper.prepare_fields()
674681

675-
q_criterion_operator = QCriterion(
676-
velocity_set=velocity_set,
677-
precision_policy=precision_policy,
678-
compute_backend=compute_backend,
679-
)
680-
vorticity_operator = Vorticity(
682+
device = "cuda:1"
683+
with wp.ScopedDevice(device):
684+
q_criterion_operator = QCriterion(
685+
velocity_set=velocity_set,
686+
precision_policy=precision_policy,
687+
compute_backend=compute_backend,
688+
)
689+
vorticity_operator = Vorticity(
681690
velocity_set=velocity_set,
682691
precision_policy=precision_policy,
683692
compute_backend=compute_backend,
684-
)
693+
)
685694

686695
velocities_wp = wp.zeros(shape=vertices_wp.shape[0], dtype=wp.vec3)
687696

xlb/operator/boundary_condition/bc_regularized.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def functional_velocity(
150150
# Find the value of u from the missing directions
151151
# Since we are only considering normal velocity, we only need to find one value (stored at the center of f_1)
152152
# Create velocity vector by multiplying the prescribed value with the normal vector
153-
prescribed_value = f_1[0, index[0], index[1], index[2]]
153+
prescribed_value = self.compute_dtype(f_1[0, index[0], index[1], index[2]])
154154
_u = -prescribed_value * normals
155155

156156
# calculate rho
@@ -186,7 +186,7 @@ def functional_pressure(
186186

187187
# Find the value of rho from the missing directions
188188
# Since we need only one scalar value, we only need to find one value (stored at the center of f_1)
189-
_rho = f_1[0, index[0], index[1], index[2]]
189+
_rho = self.compute_dtype(f_1[0, index[0], index[1], index[2]])
190190

191191
# calculate velocity
192192
fsum = bc_helper.get_bc_fsum(_f, missing_mask)

0 commit comments

Comments
 (0)