slam_real.py

import torch
import cv2
import numpy as np
import matplotlib.pyplot as plt
from torchvision.transforms import Compose, Resize, ToTensor, Normalize
from PIL import Image 

# 1. Load MiDaS_small Model
midas = torch.hub.load("intel-isl/MiDaS", "MiDaS_small")
midas.eval()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
midas.to(device)

# 2. Correct manual transforms
transform = Compose([
    Resize((256, 256)),
    ToTensor(),
    Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]),
])

# 3. Load Local Image
image_path = "C:/Users/user/Desktop/0000000000.png"
image = cv2.imread(image_path)
if image is None:
    raise ValueError(f"Cannot read image at {image_path}. Check the path.")

image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# NumPy to PIL.Image
image_pil = Image.fromarray(image)

input_image = transform(image_pil).unsqueeze(0).to(device)

# 4. Predict depth
with torch.no_grad():
    prediction = midas(input_image)
    prediction = torch.nn.functional.interpolate(
        prediction.unsqueeze(1),
        size=image.shape[:2],
        mode="bicubic",
        align_corners=False,
    ).squeeze()

depth_map = prediction.cpu().numpy()

# 5. Normalize depth map
depth_map = (depth_map - depth_map.min()) / (depth_map.max() - depth_map.min())

# 6. Visualize Depth Map
plt.figure(figsize=(10,5))
plt.subplot(1,2,1)
plt.imshow(image)
plt.title("Input Image")

plt.subplot(1,2,2)
plt.imshow(depth_map, cmap='plasma')
plt.title("Predicted Depth Map")
plt.colorbar()
plt.show()

# 7. Generate Point Cloud
fx = fy = 500
cx, cy = image.shape[1] / 2, image.shape[0] / 2

points = []
colors = []

for v in range(image.shape[0]):
    for u in range(image.shape[1]):
        Z = depth_map[v, u]
        X = (u - cx) * Z / fx
        Y = (v - cy) * Z / fy
        points.append((X, Y, Z))
        colors.append(image[v, u] / 255.0)

points = np.array(points)
colors = np.array(colors)

# 8. Visualize Point Cloud
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111, projection='3d')
ax.scatter(points[::50,0], points[::50,1], points[::50,2], c=colors[::50], s=0.5)
ax.set_title("3D Mapping from Single Local Image")
plt.show()