Skip to content

Commit

Permalink
Merge pull request #27 from mpelchat04/pytests-update
Browse files Browse the repository at this point in the history
Update pytests
  • Loading branch information
mpelchat04 authored Oct 24, 2024
2 parents ac51770 + 8c689d5 commit ff4fed8
Show file tree
Hide file tree
Showing 10 changed files with 113 additions and 36 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ geo_inference = GeoInference(

# Perform feature extraction on a TIFF image
image_path = "/path/to/image.tif"
bands_requested = [1,2,3]
bands_requested = "1,2,3"
patch_size = 1024
workers = 0
patch_size = 512
Expand Down
5 changes: 2 additions & 3 deletions geo_inference/geo_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,8 @@ def __init__(
transformer_rotate: bool = False,
):
self.work_dir: Path = get_directory(work_dir)
self.device = (
device if device == "cpu" else select_model_device(gpu_id, multi_gpu)
)
self.device = select_model_device(gpu_id, multi_gpu, device)

self.model = torch.jit.load(
get_model(
model_path_or_url=model,
Expand Down
53 changes: 50 additions & 3 deletions geo_inference/utils/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -196,9 +196,56 @@ def get_model(model_path_or_url: str, work_dir: Path) -> Path:
raise ValueError("Invalid model path")


def select_model_device(gpu_id: int, multi_gpu: bool):
device = "cpu"
if torch.cuda.is_available():
def select_model_device(gpu_id: int, multi_gpu: bool, device: str="cpu"):
"""
Selects an appropriate GPU device based on memory usage and GPU utilization.
The function checks if a GPU is available using `torch.cuda.is_available()`
and then evaluates either a single GPU or multiple GPUs based on the provided `multi_gpu` flag.
It analyzes memory and utilization for each available GPU, and selects a device that has memory
and utilization usage below a specific threshold.
Parameters:
-----------
multi_gpu : bool
If True, checks multiple GPUs and selects one with suitable memory and utilization stats.
gpu_id : int
The index of the GPU to evaluate when not in multi-GPU mode.
device : str
The device string representing the current device (e.g., "cpu" or "cuda:X").
Returns:
--------
device : str
The updated device string, specifying which GPU to use (e.g., "cuda:0", "cuda:1", etc.),
or retains the original device (e.g., "cpu") if no suitable GPU is found.
Logic:
------
1. If a GPU is available and the device is not set to "cpu":
- **Single GPU Mode (multi_gpu=False)**:
- Checks the specified `gpu_id`'s memory and utilization.
- If the memory usage is below 70% and GPU utilization is below 70%, sets the device to the appropriate GPU.
- **Multi-GPU Mode (multi_gpu=True)**:
- Iterates over all available GPUs.
- For each GPU, checks memory usage and utilization.
- Selects the first GPU that has memory and utilization below the 70% threshold.
2. If no GPU meets the criteria, retains the current device (usually "cpu").
Note:
-----
- Memory is calculated as the difference between total and available memory (via `torch.cuda.mem_get_info`).
- Both memory and GPU utilization thresholds are set to 70%.
- The GPU utilization is retrieved via `torch.cuda.utilization()`.
Exceptions:
-----------
- Assumes that GPU-related PyTorch functions like `torch.cuda.utilization()` and `torch.cuda.mem_get_info()` are available and accessible.
- GPU-related functions will fail if run in an environment without CUDA support.
"""

if torch.cuda.is_available() and device != "cpu":
if not multi_gpu:
res = {"gpu": torch.cuda.utilization(gpu_id)}
torch_cuda_mem = torch.cuda.mem_get_info(gpu_id)
Expand Down
9 changes: 9 additions & 0 deletions geo_inference/utils/polygon.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ def gdf_to_yolo(geojson_path="", mask_path="", output_path="", column='value',
except pyogrio.errors.DataSourceError as e:
logger.error(f"Error reading GeoJSON file: {geojson_path}: {e}")
return

if len(gdf) == 0:
logger.info(f"No vector to write to yolo file.")
return

[x0, y0, x1, y1] = [0, 0, im_size[0], im_size[1]]
out_coords = [[x0, y0], [x0, y1], [x1, y1], [x1, y0]]
Expand Down Expand Up @@ -196,6 +200,11 @@ def geojson2coco(image_src, label_src, output_path=None, category_attribute="val
except pyogrio.errors.DataSourceError as e:
logger.error(f"Error reading GeoJSON file: {label_src}: {e}")
return

if len(curr_gdf) == 0:
logger.info(f"No vector to write to coco file.")
return

curr_gdf['label_fname'] = label_src
curr_gdf['image_fname'] = ''
curr_gdf['image_id'] = 1
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ version = {file = ["VERSION"]}
dev = ["black", "bumpver", "isort", "pip-tools", "pytest"]

[project.urls]
Homepage = "https://github.com/valhassan/geo-inference"
Homepage = "https://github.com/NRCan/geo-inference"

[project.scripts]
geo_inference = "geo_inference.geo_inference:main"
Expand Down
Binary file not shown.
Binary file not shown.
Binary file removed tests/data/inference/test_model/test_model.pt
Binary file not shown.
68 changes: 45 additions & 23 deletions tests/test_geo_inference.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,49 +5,71 @@
from geo_inference.geo_inference import GeoInference
from pathlib import Path


@pytest.fixture
def test_data_dir():
return Path(__file__).parent / "data"


class TestGeoInference:

@pytest.fixture
def geo_inference(self, test_data_dir):
model = str(test_data_dir / "inference"/ "test_model" / "test_model.pt")
model = str(test_data_dir / "inference" / "test_model" / "cpu_scripted.pt")
work_dir = str(test_data_dir / "inference")
mask_to_vec = True
mask_to_yolo = True
mask_to_coco = True
device = 'cpu'
device = "cpu"
gpu_id = 0
return GeoInference(model, work_dir, mask_to_vec, mask_to_yolo, mask_to_coco, device, gpu_id)
num_classes = 5
prediction_threshold = 0.3
transformer = True
transform_flip = True
transform_rotate = True
return GeoInference(
model=model,
work_dir=work_dir,
mask_to_vec=mask_to_vec,
mask_to_yolo=mask_to_yolo,
mask_to_coco=mask_to_coco,
device=device,
gpu_id=gpu_id,
multi_gpu=False,
num_classes=num_classes,
prediction_threshold=prediction_threshold,
transformers=transformer,
transformer_flip=transform_flip,
transformer_rotate=transform_rotate,
)

def test_init(self, geo_inference, test_data_dir):

assert geo_inference.work_dir == test_data_dir / "inference"
assert geo_inference.device == 'cpu'
assert geo_inference.device == "cpu"
assert geo_inference.mask_to_vec == True
assert geo_inference.mask_to_yolo == True
assert geo_inference.mask_to_coco == True
assert isinstance(geo_inference.model, torch.jit.ScriptModule)
assert geo_inference.classes >0
assert isinstance(geo_inference.model.model, torch.jit.ScriptModule)
assert geo_inference.classes > 0

def test_call(self, geo_inference, test_data_dir):
tiff_image = test_data_dir / '0.tif'
def test_call(self, geo_inference: GeoInference, test_data_dir: Path):
tiff_image = test_data_dir / "0.tif"
# bbox = '0,0,100,100'
bbox = None
patch_size = 512
bands_requested="1,2,3"
geo_inference(str(tiff_image), bands_requested, patch_size, None)
mask_path = geo_inference.work_dir / "0_mask.tif"
bands_requested = "1,2,3"
workers = 10
mask_name = geo_inference(
inference_input=str(tiff_image),
bands_requested=bands_requested,
patch_size=patch_size,
workers=workers,
bbox=bbox,
)
mask_path = geo_inference.work_dir / mask_name
assert mask_path.exists()
if geo_inference.mask_to_vec:
polygons_path = geo_inference.work_dir / "0_polygons.geojson"
yolo_csv_path = geo_inference.work_dir / "0_yolo.csv"
coco_path = geo_inference.work_dir / "0_coco.json"
assert polygons_path.exists()
assert yolo_csv_path.exists()
assert coco_path.exists()
os.remove(polygons_path)
os.remove(yolo_csv_path)
os.remove(coco_path)
os.remove(mask_path)
polygons_path = geo_inference.work_dir / "0_polygons.geojson"
assert polygons_path.exists()
os.remove(polygons_path)
os.remove(mask_path)
10 changes: 5 additions & 5 deletions tests/utils/test_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ def test_validate_asset_type(test_data_dir):
reopened_dataset = validate_asset_type(dataset)
assert reopened_dataset.name == dataset.name
assert not reopened_dataset.closed
assert validate_asset_type(local_tiff_path).name == local_tiff_path
assert Path(validate_asset_type(local_tiff_path).name) == Path(local_tiff_path)

def test_calculate_gpu_stats():
with patch('torch.cuda.utilization', return_value=50), patch('torch.cuda.mem_get_info', return_value=(500, 1000)):
Expand All @@ -105,22 +105,22 @@ def test_get_device():
with patch('geo_inference.utils.helpers.calculate_gpu_stats') as mock_calculate_gpu_stats:
mock_calculate_gpu_stats.return_value = ({"gpu": 10}, {"used": 100, "total": 1024})
device = select_model_device(gpu_id=1, multi_gpu=False)
assert device == 'cpu'
assert device == "cpu"

def test_get_directory():
with patch('pathlib.Path.is_dir', return_value=False), patch('pathlib.Path.mkdir'):
assert get_directory('test') == Path('test')

def test_get_model_local_file(test_data_dir):
model_file = test_data_dir / "inference" / "test_model" / "test_model.pt"
model_file = test_data_dir / "inference" / "test_model" / "cpu_scripted.pt"
model_path = get_model(str(model_file), test_data_dir)
assert model_path == model_file

@patch('geo_inference.utils.helpers.download_file_from_url')
def test_get_model_url(mock_download_file_from_url, test_data_dir):
mock_download_file_from_url.return_value = None
model_path = get_model("https://example.com/test_model.pt", test_data_dir)
assert model_path == test_data_dir / "test_model.pt"
model_path = get_model("https://example.com/cpu_scripted.pt", test_data_dir)
assert model_path == test_data_dir / "cpu_scripted.pt"

def test_get_model_file_not_exists(test_data_dir):
with pytest.raises(ValueError):
Expand Down

0 comments on commit ff4fed8

Please sign in to comment.