-
Notifications
You must be signed in to change notification settings - Fork 4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #282 from weingartlorenz/main
this is the updated metadata preserving component
- Loading branch information
Showing
8 changed files
with
204 additions
and
165 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
353 changes: 196 additions & 157 deletions
353
...nent-library/transform/image-tiling.ipynb → ...age-tiling-with-metadata_adjustment.ipynb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,157 +1,196 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "2e8a0fac-adbd-4428-90ea-869aee2b95bf", | ||
"metadata": {}, | ||
"source": [ | ||
"## Xview Dataset clipping\n", | ||
"This is a component designed to clip the dataset provided by xview. " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "c0421aaf-4da4-4a66-b626-e8962315afe8", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#!pip install Pillow claimed aiobotocore botocore s3fs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "0b7ad533-dabc-488e-bf65-59a54a85f3f9", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import os\n", | ||
"from PIL import Image\n", | ||
"from c3 import operator_utils\n", | ||
"import s3fs\n", | ||
"import pathlib" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "c8df047b-6167-43f1-9a31-d836580180ac", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#source is the path to the folder with the unzipped .tif images from xview dataset \n", | ||
"source = os.environ.get(\"directory_path\")\n", | ||
"\n", | ||
"#destination is the path to the folder which saves all the extracted tiles. \n", | ||
"destination = os.environ.get(\"destination\")\n", | ||
"\n", | ||
"#Each image is cropped using a rectangular window with edge_length tile_size_x and tile_size_y which has to be given in number of pixels \n", | ||
"tile_size_x = int(os.environ.get(\"tile_size_x\", 64))\n", | ||
"tile_size_y = int(os.environ.get(\"tile_size_y\", 64))\n", | ||
"\n", | ||
"#stride_x is the length in pixels the sliding window is moved to the right after each step\n", | ||
"#For tumbling window stride_x must equal tile_size_x and stride_y must equal tile_size_y\n", | ||
"stride_x = int(os.environ.get(\"stride_x\", 32))\n", | ||
"#stride_y is the length in pixels the sliding window is moved down after completing a row\n", | ||
"stride_y = int(os.environ.get(\"stride_y\", 32))" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "9a2a67d1-c8a4-49f7-b0bf-8710e27c8387", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"(access_key_id_source, secret_access_key_source, endpoint_source, source) = operator_utils.explode_connection_string(source)\n", | ||
"(access_key_id_destination, secret_access_key_destination, endpoint_destination, destination) = operator_utils.explode_connection_string(destination)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "12241cb1-1d84-48a6-9f9e-ba534822a661", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"s3source = s3fs.S3FileSystem(\n", | ||
" anon=False,\n", | ||
" key=access_key_id_source,\n", | ||
" secret=secret_access_key_source,\n", | ||
" client_kwargs={'endpoint_url': endpoint_source}\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "24926242-b0ec-46cd-bbf3-4da1dbd989e0", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"s3destination = s3fs.S3FileSystem(\n", | ||
" anon=False,\n", | ||
" key=access_key_id_destination,\n", | ||
" secret=secret_access_key_destination,\n", | ||
" client_kwargs={'endpoint_url': endpoint_destination}\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e8370f73-dcef-4865-ab70-fd0a0891ab71", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"for item in s3source.ls(source):\n", | ||
" s3source.get(item, item)\n", | ||
" _, file_extension = os.path.splitext(item)\n", | ||
" clipped_item = os.path.join(destination, item)\n", | ||
"\n", | ||
" image = Image.open(item)\n", | ||
" width, height = image.size\n", | ||
"\n", | ||
" x_range = [0]\n", | ||
" while(x_range[-1] + stride_x + tile_size_x < width):\n", | ||
" x_range += [x_range[-1] + stride_x]\n", | ||
" \n", | ||
" y_range = [0]\n", | ||
" while(y_range[-1] + stride_y + tile_size_y < height):\n", | ||
" y_range += [y_range[-1] + stride_y]\n", | ||
"\n", | ||
" counter = 0\n", | ||
" for x in x_range:\n", | ||
" for y in y_range:\n", | ||
" cropped = image.crop((x,y, x+tile_size_x, y+tile_size_y)) \n", | ||
" dest_path = f'{clipped_item}.{x}.{y}{file_extension}'\n", | ||
" pathlib.Path(os.path.dirname(dest_path)).mkdir(parents=True, exist_ok=True)\n", | ||
" cropped.save(dest_path)\n", | ||
" clipped_item_upload = os.path.join(destination, os.path.basename(dest_path))\n", | ||
" s3destination.put(dest_path, clipped_item_upload)" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.12.3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"id": "2e8a0fac-adbd-4428-90ea-869aee2b95bf", | ||
"metadata": {}, | ||
"source": [ | ||
"## Xview Dataset clipping\n", | ||
"This is a component designed to clip the dataset provided by xview. " | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "c0421aaf-4da4-4a66-b626-e8962315afe8", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"#!pip install Pillow claimed aiobotocore botocore s3fs" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "0b7ad533-dabc-488e-bf65-59a54a85f3f9", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"import os\n", | ||
"from PIL import Image\n", | ||
"from c3 import operator_utils\n", | ||
"import s3fs\n", | ||
"import pathlib\n", | ||
"import math\n", | ||
"from PIL import Image, TiffTags, TiffImagePlugin\n", | ||
"import imageio\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "c8df047b-6167-43f1-9a31-d836580180ac", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"#source is the path to the folder with the unzipped .tif images from xview dataset \n", | ||
"source = os.environ.get(\"directory_path\")\n", | ||
"\n", | ||
"#destination is the path to the folder which saves all the extracted tiles. \n", | ||
"destination = os.environ.get(\"destination\")\n", | ||
"\n", | ||
"#Each image is cropped using a rectangular window with edge_length tile_size_x and tile_size_y which has to be given in number of pixels \n", | ||
"tile_size_x = int(os.environ.get(\"tile_size_x\", 64))\n", | ||
"tile_size_y = int(os.environ.get(\"tile_size_y\", 64))\n", | ||
"\n", | ||
"#stride_x is the length in pixels the sliding window is moved to the right after each step\n", | ||
"#For tumbling window stride_x must equal tile_size_x and stride_y must equal tile_size_y\n", | ||
"stride_x = int(os.environ.get(\"stride_x\", 32))\n", | ||
"#stride_y is the length in pixels the sliding window is moved down after completing a row\n", | ||
"stride_y = int(os.environ.get(\"stride_y\", 32))\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "9a2a67d1-c8a4-49f7-b0bf-8710e27c8387", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"(access_key_id_source, secret_access_key_source, endpoint_source, source) = operator_utils.explode_connection_string(source)\n", | ||
"(access_key_id_destination, secret_access_key_destination, endpoint_destination, destination) = operator_utils.explode_connection_string(destination)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "12241cb1-1d84-48a6-9f9e-ba534822a661", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"s3source = s3fs.S3FileSystem(\n", | ||
" anon=False,\n", | ||
" key=access_key_id_source,\n", | ||
" secret=secret_access_key_source,\n", | ||
" client_kwargs={'endpoint_url': endpoint_source}\n", | ||
")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "24926242-b0ec-46cd-bbf3-4da1dbd989e0", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"s3destination = s3fs.S3FileSystem(\n", | ||
" anon=False,\n", | ||
" key=access_key_id_destination,\n", | ||
" secret=secret_access_key_destination,\n", | ||
" client_kwargs={'endpoint_url': endpoint_destination}\n", | ||
")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"id": "e8370f73-dcef-4865-ab70-fd0a0891ab71", | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"\n", | ||
"for item in s3source.ls(source):\n", | ||
" s3source.get(item, item)\n", | ||
" _, file_extension = os.path.splitext(item)\n", | ||
" clipped_item = os.path.join(destination, item)\n", | ||
"\n", | ||
" image = Image.open(item) \n", | ||
" width, height = image.size\n", | ||
"\n", | ||
" x_range = [0]\n", | ||
" while(x_range[-1] + stride_x + tile_size_x < width):\n", | ||
" x_range += [x_range[-1] + stride_x]\n", | ||
" \n", | ||
" y_range = [0]\n", | ||
" while(y_range[-1] + stride_y + tile_size_y < height):\n", | ||
" y_range += [y_range[-1] + stride_y]\n", | ||
"\n", | ||
" og_metadata = image.tag_v2\n", | ||
" initial_lon = og_metadata[33922][3]\n", | ||
" initial_lat = og_metadata[33922][4]\n", | ||
" pixel_height_deg = og_metadata[33550][1]\n", | ||
" pixel_width_deg = og_metadata[33550][0]\n", | ||
"\n", | ||
" for x in x_range:\n", | ||
" for y in y_range:\n", | ||
" left = x * tile_size_x\n", | ||
" upper = y * tile_size_y\n", | ||
" right = left + tile_size_x\n", | ||
" lower = upper + tile_size_y\n", | ||
"\n", | ||
" upper_left_lat = initial_lat + y * pixel_height_deg\n", | ||
" upper_left_lon = initial_lon + x * pixel_width_deg\n", | ||
"\n", | ||
" cropped = image.crop((x,y, x+tile_size_x, y+tile_size_y))\n", | ||
" \n", | ||
" cropped.save(\"temporary_exchange.tif\")\n", | ||
" cropped = Image.open(\"temporary_exchange.tif\")\n", | ||
"\n", | ||
" available_metadata = cropped.tag_v2\n", | ||
" \n", | ||
" tiffinfo = TiffImagePlugin.ImageFileDirectory_v2()\n", | ||
" \n", | ||
" for tag, value in og_metadata.items():\n", | ||
" if not(tag in available_metadata or tag == 339 or tag == 33922):\n", | ||
" tiffinfo[tag] = value\n", | ||
" \n", | ||
" tiffinfo[33922] = (0.0,0.0,0.0, upper_left_lon, upper_left_lat, 0.0)\n", | ||
"\n", | ||
" dest_path = f'{clipped_item}.{x}.{y}{file_extension}'\n", | ||
" pathlib.Path(os.path.dirname(dest_path)).mkdir(parents=True, exist_ok=True)\n", | ||
" \n", | ||
" cropped.save(dest_path, tiffinfo = tiffinfo, save_all=True, compression=\"tiff_deflate\")\n", | ||
" \n", | ||
" clipped_item_upload = os.path.join(destination, os.path.basename(dest_path))\n", | ||
" s3destination.put(dest_path, clipped_item_upload)\n", | ||
" " | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3 (ipykernel)", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.9" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 5 | ||
} |
Empty file.
8 changes: 4 additions & 4 deletions
8
...t-library/transform/image-tiling.job.yaml → ...-tiling-with-metadata_adjustment.job.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.