Skip to content

Commit

Permalink
this is the updated metadata preserving component
Browse files Browse the repository at this point in the history
Signed-off-by: weingartlorenz <[email protected]>
  • Loading branch information
weingartlorenz committed Sep 4, 2024
1 parent 412e9d0 commit f884ad3
Show file tree
Hide file tree
Showing 8 changed files with 204 additions and 165 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ baseCommand: "claimed"
inputs:
component:
type: string
default: docker.io/claimed/claimed-image-tiling:0.1
default: docker.io/lorenzweingart/claimed-image-tiling-with-metadata-adjustment:0.1
inputBinding:
position: 1
prefix: --component
Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -1,157 +1,196 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "2e8a0fac-adbd-4428-90ea-869aee2b95bf",
"metadata": {},
"source": [
"## Xview Dataset clipping\n",
"This is a component designed to clip the dataset provided by xview. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c0421aaf-4da4-4a66-b626-e8962315afe8",
"metadata": {},
"outputs": [],
"source": [
"#!pip install Pillow claimed aiobotocore botocore s3fs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b7ad533-dabc-488e-bf65-59a54a85f3f9",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"from PIL import Image\n",
"from c3 import operator_utils\n",
"import s3fs\n",
"import pathlib"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8df047b-6167-43f1-9a31-d836580180ac",
"metadata": {},
"outputs": [],
"source": [
"#source is the path to the folder with the unzipped .tif images from xview dataset \n",
"source = os.environ.get(\"directory_path\")\n",
"\n",
"#destination is the path to the folder which saves all the extracted tiles. \n",
"destination = os.environ.get(\"destination\")\n",
"\n",
"#Each image is cropped using a rectangular window with edge_length tile_size_x and tile_size_y which has to be given in number of pixels \n",
"tile_size_x = int(os.environ.get(\"tile_size_x\", 64))\n",
"tile_size_y = int(os.environ.get(\"tile_size_y\", 64))\n",
"\n",
"#stride_x is the length in pixels the sliding window is moved to the right after each step\n",
"#For tumbling window stride_x must equal tile_size_x and stride_y must equal tile_size_y\n",
"stride_x = int(os.environ.get(\"stride_x\", 32))\n",
"#stride_y is the length in pixels the sliding window is moved down after completing a row\n",
"stride_y = int(os.environ.get(\"stride_y\", 32))"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a2a67d1-c8a4-49f7-b0bf-8710e27c8387",
"metadata": {},
"outputs": [],
"source": [
"(access_key_id_source, secret_access_key_source, endpoint_source, source) = operator_utils.explode_connection_string(source)\n",
"(access_key_id_destination, secret_access_key_destination, endpoint_destination, destination) = operator_utils.explode_connection_string(destination)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12241cb1-1d84-48a6-9f9e-ba534822a661",
"metadata": {},
"outputs": [],
"source": [
"s3source = s3fs.S3FileSystem(\n",
" anon=False,\n",
" key=access_key_id_source,\n",
" secret=secret_access_key_source,\n",
" client_kwargs={'endpoint_url': endpoint_source}\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24926242-b0ec-46cd-bbf3-4da1dbd989e0",
"metadata": {},
"outputs": [],
"source": [
"s3destination = s3fs.S3FileSystem(\n",
" anon=False,\n",
" key=access_key_id_destination,\n",
" secret=secret_access_key_destination,\n",
" client_kwargs={'endpoint_url': endpoint_destination}\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8370f73-dcef-4865-ab70-fd0a0891ab71",
"metadata": {},
"outputs": [],
"source": [
"for item in s3source.ls(source):\n",
" s3source.get(item, item)\n",
" _, file_extension = os.path.splitext(item)\n",
" clipped_item = os.path.join(destination, item)\n",
"\n",
" image = Image.open(item)\n",
" width, height = image.size\n",
"\n",
" x_range = [0]\n",
" while(x_range[-1] + stride_x + tile_size_x < width):\n",
" x_range += [x_range[-1] + stride_x]\n",
" \n",
" y_range = [0]\n",
" while(y_range[-1] + stride_y + tile_size_y < height):\n",
" y_range += [y_range[-1] + stride_y]\n",
"\n",
" counter = 0\n",
" for x in x_range:\n",
" for y in y_range:\n",
" cropped = image.crop((x,y, x+tile_size_x, y+tile_size_y)) \n",
" dest_path = f'{clipped_item}.{x}.{y}{file_extension}'\n",
" pathlib.Path(os.path.dirname(dest_path)).mkdir(parents=True, exist_ok=True)\n",
" cropped.save(dest_path)\n",
" clipped_item_upload = os.path.join(destination, os.path.basename(dest_path))\n",
" s3destination.put(dest_path, clipped_item_upload)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
{
"cells": [
{
"cell_type": "markdown",
"id": "2e8a0fac-adbd-4428-90ea-869aee2b95bf",
"metadata": {},
"source": [
"## Xview Dataset clipping\n",
"This is a component designed to clip the dataset provided by xview. "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c0421aaf-4da4-4a66-b626-e8962315afe8",
"metadata": {},
"outputs": [],
"source": [
"#!pip install Pillow claimed aiobotocore botocore s3fs"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0b7ad533-dabc-488e-bf65-59a54a85f3f9",
"metadata": {},
"outputs": [],
"source": [
"\n",
"import os\n",
"from PIL import Image\n",
"from c3 import operator_utils\n",
"import s3fs\n",
"import pathlib\n",
"import math\n",
"from PIL import Image, TiffTags, TiffImagePlugin\n",
"import imageio\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c8df047b-6167-43f1-9a31-d836580180ac",
"metadata": {},
"outputs": [],
"source": [
"\n",
"#source is the path to the folder with the unzipped .tif images from xview dataset \n",
"source = os.environ.get(\"directory_path\")\n",
"\n",
"#destination is the path to the folder which saves all the extracted tiles. \n",
"destination = os.environ.get(\"destination\")\n",
"\n",
"#Each image is cropped using a rectangular window with edge_length tile_size_x and tile_size_y which has to be given in number of pixels \n",
"tile_size_x = int(os.environ.get(\"tile_size_x\", 64))\n",
"tile_size_y = int(os.environ.get(\"tile_size_y\", 64))\n",
"\n",
"#stride_x is the length in pixels the sliding window is moved to the right after each step\n",
"#For tumbling window stride_x must equal tile_size_x and stride_y must equal tile_size_y\n",
"stride_x = int(os.environ.get(\"stride_x\", 32))\n",
"#stride_y is the length in pixels the sliding window is moved down after completing a row\n",
"stride_y = int(os.environ.get(\"stride_y\", 32))\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a2a67d1-c8a4-49f7-b0bf-8710e27c8387",
"metadata": {},
"outputs": [],
"source": [
"\n",
"(access_key_id_source, secret_access_key_source, endpoint_source, source) = operator_utils.explode_connection_string(source)\n",
"(access_key_id_destination, secret_access_key_destination, endpoint_destination, destination) = operator_utils.explode_connection_string(destination)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "12241cb1-1d84-48a6-9f9e-ba534822a661",
"metadata": {},
"outputs": [],
"source": [
"\n",
"s3source = s3fs.S3FileSystem(\n",
" anon=False,\n",
" key=access_key_id_source,\n",
" secret=secret_access_key_source,\n",
" client_kwargs={'endpoint_url': endpoint_source}\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24926242-b0ec-46cd-bbf3-4da1dbd989e0",
"metadata": {},
"outputs": [],
"source": [
"\n",
"s3destination = s3fs.S3FileSystem(\n",
" anon=False,\n",
" key=access_key_id_destination,\n",
" secret=secret_access_key_destination,\n",
" client_kwargs={'endpoint_url': endpoint_destination}\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e8370f73-dcef-4865-ab70-fd0a0891ab71",
"metadata": {},
"outputs": [],
"source": [
"\n",
"for item in s3source.ls(source):\n",
" s3source.get(item, item)\n",
" _, file_extension = os.path.splitext(item)\n",
" clipped_item = os.path.join(destination, item)\n",
"\n",
" image = Image.open(item) \n",
" width, height = image.size\n",
"\n",
" x_range = [0]\n",
" while(x_range[-1] + stride_x + tile_size_x < width):\n",
" x_range += [x_range[-1] + stride_x]\n",
" \n",
" y_range = [0]\n",
" while(y_range[-1] + stride_y + tile_size_y < height):\n",
" y_range += [y_range[-1] + stride_y]\n",
"\n",
" og_metadata = image.tag_v2\n",
" initial_lon = og_metadata[33922][3]\n",
" initial_lat = og_metadata[33922][4]\n",
" pixel_height_deg = og_metadata[33550][1]\n",
" pixel_width_deg = og_metadata[33550][0]\n",
"\n",
" for x in x_range:\n",
" for y in y_range:\n",
" left = x * tile_size_x\n",
" upper = y * tile_size_y\n",
" right = left + tile_size_x\n",
" lower = upper + tile_size_y\n",
"\n",
" upper_left_lat = initial_lat + y * pixel_height_deg\n",
" upper_left_lon = initial_lon + x * pixel_width_deg\n",
"\n",
" cropped = image.crop((x,y, x+tile_size_x, y+tile_size_y))\n",
" \n",
" cropped.save(\"temporary_exchange.tif\")\n",
" cropped = Image.open(\"temporary_exchange.tif\")\n",
"\n",
" available_metadata = cropped.tag_v2\n",
" \n",
" tiffinfo = TiffImagePlugin.ImageFileDirectory_v2()\n",
" \n",
" for tag, value in og_metadata.items():\n",
" if not(tag in available_metadata or tag == 339 or tag == 33922):\n",
" tiffinfo[tag] = value\n",
" \n",
" tiffinfo[33922] = (0.0,0.0,0.0, upper_left_lon, upper_left_lat, 0.0)\n",
"\n",
" dest_path = f'{clipped_item}.{x}.{y}{file_extension}'\n",
" pathlib.Path(os.path.dirname(dest_path)).mkdir(parents=True, exist_ok=True)\n",
" \n",
" cropped.save(dest_path, tiffinfo = tiffinfo, save_all=True, compression=\"tiff_deflate\")\n",
" \n",
" clipped_item_upload = os.path.join(destination, os.path.basename(dest_path))\n",
" s3destination.put(dest_path, clipped_item_upload)\n",
" "
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Empty file.
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
apiVersion: batch/v1
kind: Job
metadata:
name: image-tiling
name: image-tiling-with-metadata-adjustment
spec:
template:
spec:
containers:
- name: image-tiling
image: docker.io/claimed/claimed-image-tiling:0.1
- name: image-tiling-with-metadata-adjustment
image: docker.io/lorenzweingart/claimed-image-tiling-with-metadata-adjustment:0.1
workingDir: /opt/app-root/src/
command: ["/opt/app-root/bin/ipython","claimed_image-tiling.ipynb"]
command: ["/opt/app-root/bin/ipython","claimed_image-tiling-with-metadata_adjustment.ipynb"]
env:
- name: log_level
value: value_of_log_level
Expand Down
Empty file.
Loading

0 comments on commit f884ad3

Please sign in to comment.