From f884ad328b996d5416bba2319e387276da914698 Mon Sep 17 00:00:00 2001 From: weingartlorenz Date: Wed, 4 Sep 2024 16:35:19 +0200 Subject: [PATCH] this is the updated metadata preserving component Signed-off-by: weingartlorenz --- ...image-tiling-with-metadata_adjustment.cwl} | 2 +- ...th-metadata_adjustment.cwl:Zone.Identifier | 0 ...age-tiling-with-metadata_adjustment.ipynb} | 353 ++++++++++-------- ...-metadata_adjustment.ipynb:Zone.Identifier | 0 ...-tiling-with-metadata_adjustment.job.yaml} | 8 +- ...tadata_adjustment.job.yaml:Zone.Identifier | 0 ...mage-tiling-with-metadata_adjustment.yaml} | 6 +- ...h-metadata_adjustment.yaml:Zone.Identifier | 0 8 files changed, 204 insertions(+), 165 deletions(-) rename component-library/transform/{image-tiling.cwl => image-tiling-with-metadata_adjustment.cwl} (91%) create mode 100644 component-library/transform/image-tiling-with-metadata_adjustment.cwl:Zone.Identifier rename component-library/transform/{image-tiling.ipynb => image-tiling-with-metadata_adjustment.ipynb} (69%) create mode 100644 component-library/transform/image-tiling-with-metadata_adjustment.ipynb:Zone.Identifier rename component-library/transform/{image-tiling.job.yaml => image-tiling-with-metadata_adjustment.job.yaml} (76%) create mode 100644 component-library/transform/image-tiling-with-metadata_adjustment.job.yaml:Zone.Identifier rename component-library/transform/{image-tiling.yaml => image-tiling-with-metadata_adjustment.yaml} (80%) create mode 100644 component-library/transform/image-tiling-with-metadata_adjustment.yaml:Zone.Identifier diff --git a/component-library/transform/image-tiling.cwl b/component-library/transform/image-tiling-with-metadata_adjustment.cwl similarity index 91% rename from component-library/transform/image-tiling.cwl rename to component-library/transform/image-tiling-with-metadata_adjustment.cwl index 21dbc205..9dd96bde 100644 --- a/component-library/transform/image-tiling.cwl +++ b/component-library/transform/image-tiling-with-metadata_adjustment.cwl @@ -6,7 +6,7 @@ baseCommand: "claimed" inputs: component: type: string - default: docker.io/claimed/claimed-image-tiling:0.1 + default: docker.io/lorenzweingart/claimed-image-tiling-with-metadata-adjustment:0.1 inputBinding: position: 1 prefix: --component diff --git a/component-library/transform/image-tiling-with-metadata_adjustment.cwl:Zone.Identifier b/component-library/transform/image-tiling-with-metadata_adjustment.cwl:Zone.Identifier new file mode 100644 index 00000000..e69de29b diff --git a/component-library/transform/image-tiling.ipynb b/component-library/transform/image-tiling-with-metadata_adjustment.ipynb similarity index 69% rename from component-library/transform/image-tiling.ipynb rename to component-library/transform/image-tiling-with-metadata_adjustment.ipynb index 5bd170ec..179007f8 100644 --- a/component-library/transform/image-tiling.ipynb +++ b/component-library/transform/image-tiling-with-metadata_adjustment.ipynb @@ -1,157 +1,196 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "2e8a0fac-adbd-4428-90ea-869aee2b95bf", - "metadata": {}, - "source": [ - "## Xview Dataset clipping\n", - "This is a component designed to clip the dataset provided by xview. " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c0421aaf-4da4-4a66-b626-e8962315afe8", - "metadata": {}, - "outputs": [], - "source": [ - "#!pip install Pillow claimed aiobotocore botocore s3fs" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0b7ad533-dabc-488e-bf65-59a54a85f3f9", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "from PIL import Image\n", - "from c3 import operator_utils\n", - "import s3fs\n", - "import pathlib" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c8df047b-6167-43f1-9a31-d836580180ac", - "metadata": {}, - "outputs": [], - "source": [ - "#source is the path to the folder with the unzipped .tif images from xview dataset \n", - "source = os.environ.get(\"directory_path\")\n", - "\n", - "#destination is the path to the folder which saves all the extracted tiles. \n", - "destination = os.environ.get(\"destination\")\n", - "\n", - "#Each image is cropped using a rectangular window with edge_length tile_size_x and tile_size_y which has to be given in number of pixels \n", - "tile_size_x = int(os.environ.get(\"tile_size_x\", 64))\n", - "tile_size_y = int(os.environ.get(\"tile_size_y\", 64))\n", - "\n", - "#stride_x is the length in pixels the sliding window is moved to the right after each step\n", - "#For tumbling window stride_x must equal tile_size_x and stride_y must equal tile_size_y\n", - "stride_x = int(os.environ.get(\"stride_x\", 32))\n", - "#stride_y is the length in pixels the sliding window is moved down after completing a row\n", - "stride_y = int(os.environ.get(\"stride_y\", 32))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9a2a67d1-c8a4-49f7-b0bf-8710e27c8387", - "metadata": {}, - "outputs": [], - "source": [ - "(access_key_id_source, secret_access_key_source, endpoint_source, source) = operator_utils.explode_connection_string(source)\n", - "(access_key_id_destination, secret_access_key_destination, endpoint_destination, destination) = operator_utils.explode_connection_string(destination)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12241cb1-1d84-48a6-9f9e-ba534822a661", - "metadata": {}, - "outputs": [], - "source": [ - "s3source = s3fs.S3FileSystem(\n", - " anon=False,\n", - " key=access_key_id_source,\n", - " secret=secret_access_key_source,\n", - " client_kwargs={'endpoint_url': endpoint_source}\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "24926242-b0ec-46cd-bbf3-4da1dbd989e0", - "metadata": {}, - "outputs": [], - "source": [ - "s3destination = s3fs.S3FileSystem(\n", - " anon=False,\n", - " key=access_key_id_destination,\n", - " secret=secret_access_key_destination,\n", - " client_kwargs={'endpoint_url': endpoint_destination}\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8370f73-dcef-4865-ab70-fd0a0891ab71", - "metadata": {}, - "outputs": [], - "source": [ - "for item in s3source.ls(source):\n", - " s3source.get(item, item)\n", - " _, file_extension = os.path.splitext(item)\n", - " clipped_item = os.path.join(destination, item)\n", - "\n", - " image = Image.open(item)\n", - " width, height = image.size\n", - "\n", - " x_range = [0]\n", - " while(x_range[-1] + stride_x + tile_size_x < width):\n", - " x_range += [x_range[-1] + stride_x]\n", - " \n", - " y_range = [0]\n", - " while(y_range[-1] + stride_y + tile_size_y < height):\n", - " y_range += [y_range[-1] + stride_y]\n", - "\n", - " counter = 0\n", - " for x in x_range:\n", - " for y in y_range:\n", - " cropped = image.crop((x,y, x+tile_size_x, y+tile_size_y)) \n", - " dest_path = f'{clipped_item}.{x}.{y}{file_extension}'\n", - " pathlib.Path(os.path.dirname(dest_path)).mkdir(parents=True, exist_ok=True)\n", - " cropped.save(dest_path)\n", - " clipped_item_upload = os.path.join(destination, os.path.basename(dest_path))\n", - " s3destination.put(dest_path, clipped_item_upload)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} +{ + "cells": [ + { + "cell_type": "markdown", + "id": "2e8a0fac-adbd-4428-90ea-869aee2b95bf", + "metadata": {}, + "source": [ + "## Xview Dataset clipping\n", + "This is a component designed to clip the dataset provided by xview. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c0421aaf-4da4-4a66-b626-e8962315afe8", + "metadata": {}, + "outputs": [], + "source": [ + "#!pip install Pillow claimed aiobotocore botocore s3fs" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0b7ad533-dabc-488e-bf65-59a54a85f3f9", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import os\n", + "from PIL import Image\n", + "from c3 import operator_utils\n", + "import s3fs\n", + "import pathlib\n", + "import math\n", + "from PIL import Image, TiffTags, TiffImagePlugin\n", + "import imageio\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8df047b-6167-43f1-9a31-d836580180ac", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "#source is the path to the folder with the unzipped .tif images from xview dataset \n", + "source = os.environ.get(\"directory_path\")\n", + "\n", + "#destination is the path to the folder which saves all the extracted tiles. \n", + "destination = os.environ.get(\"destination\")\n", + "\n", + "#Each image is cropped using a rectangular window with edge_length tile_size_x and tile_size_y which has to be given in number of pixels \n", + "tile_size_x = int(os.environ.get(\"tile_size_x\", 64))\n", + "tile_size_y = int(os.environ.get(\"tile_size_y\", 64))\n", + "\n", + "#stride_x is the length in pixels the sliding window is moved to the right after each step\n", + "#For tumbling window stride_x must equal tile_size_x and stride_y must equal tile_size_y\n", + "stride_x = int(os.environ.get(\"stride_x\", 32))\n", + "#stride_y is the length in pixels the sliding window is moved down after completing a row\n", + "stride_y = int(os.environ.get(\"stride_y\", 32))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9a2a67d1-c8a4-49f7-b0bf-8710e27c8387", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "(access_key_id_source, secret_access_key_source, endpoint_source, source) = operator_utils.explode_connection_string(source)\n", + "(access_key_id_destination, secret_access_key_destination, endpoint_destination, destination) = operator_utils.explode_connection_string(destination)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "12241cb1-1d84-48a6-9f9e-ba534822a661", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "s3source = s3fs.S3FileSystem(\n", + " anon=False,\n", + " key=access_key_id_source,\n", + " secret=secret_access_key_source,\n", + " client_kwargs={'endpoint_url': endpoint_source}\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24926242-b0ec-46cd-bbf3-4da1dbd989e0", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "s3destination = s3fs.S3FileSystem(\n", + " anon=False,\n", + " key=access_key_id_destination,\n", + " secret=secret_access_key_destination,\n", + " client_kwargs={'endpoint_url': endpoint_destination}\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e8370f73-dcef-4865-ab70-fd0a0891ab71", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "for item in s3source.ls(source):\n", + " s3source.get(item, item)\n", + " _, file_extension = os.path.splitext(item)\n", + " clipped_item = os.path.join(destination, item)\n", + "\n", + " image = Image.open(item) \n", + " width, height = image.size\n", + "\n", + " x_range = [0]\n", + " while(x_range[-1] + stride_x + tile_size_x < width):\n", + " x_range += [x_range[-1] + stride_x]\n", + " \n", + " y_range = [0]\n", + " while(y_range[-1] + stride_y + tile_size_y < height):\n", + " y_range += [y_range[-1] + stride_y]\n", + "\n", + " og_metadata = image.tag_v2\n", + " initial_lon = og_metadata[33922][3]\n", + " initial_lat = og_metadata[33922][4]\n", + " pixel_height_deg = og_metadata[33550][1]\n", + " pixel_width_deg = og_metadata[33550][0]\n", + "\n", + " for x in x_range:\n", + " for y in y_range:\n", + " left = x * tile_size_x\n", + " upper = y * tile_size_y\n", + " right = left + tile_size_x\n", + " lower = upper + tile_size_y\n", + "\n", + " upper_left_lat = initial_lat + y * pixel_height_deg\n", + " upper_left_lon = initial_lon + x * pixel_width_deg\n", + "\n", + " cropped = image.crop((x,y, x+tile_size_x, y+tile_size_y))\n", + " \n", + " cropped.save(\"temporary_exchange.tif\")\n", + " cropped = Image.open(\"temporary_exchange.tif\")\n", + "\n", + " available_metadata = cropped.tag_v2\n", + " \n", + " tiffinfo = TiffImagePlugin.ImageFileDirectory_v2()\n", + " \n", + " for tag, value in og_metadata.items():\n", + " if not(tag in available_metadata or tag == 339 or tag == 33922):\n", + " tiffinfo[tag] = value\n", + " \n", + " tiffinfo[33922] = (0.0,0.0,0.0, upper_left_lon, upper_left_lat, 0.0)\n", + "\n", + " dest_path = f'{clipped_item}.{x}.{y}{file_extension}'\n", + " pathlib.Path(os.path.dirname(dest_path)).mkdir(parents=True, exist_ok=True)\n", + " \n", + " cropped.save(dest_path, tiffinfo = tiffinfo, save_all=True, compression=\"tiff_deflate\")\n", + " \n", + " clipped_item_upload = os.path.join(destination, os.path.basename(dest_path))\n", + " s3destination.put(dest_path, clipped_item_upload)\n", + " " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/component-library/transform/image-tiling-with-metadata_adjustment.ipynb:Zone.Identifier b/component-library/transform/image-tiling-with-metadata_adjustment.ipynb:Zone.Identifier new file mode 100644 index 00000000..e69de29b diff --git a/component-library/transform/image-tiling.job.yaml b/component-library/transform/image-tiling-with-metadata_adjustment.job.yaml similarity index 76% rename from component-library/transform/image-tiling.job.yaml rename to component-library/transform/image-tiling-with-metadata_adjustment.job.yaml index b6fc4c76..4b0acc51 100644 --- a/component-library/transform/image-tiling.job.yaml +++ b/component-library/transform/image-tiling-with-metadata_adjustment.job.yaml @@ -1,15 +1,15 @@ apiVersion: batch/v1 kind: Job metadata: - name: image-tiling + name: image-tiling-with-metadata-adjustment spec: template: spec: containers: - - name: image-tiling - image: docker.io/claimed/claimed-image-tiling:0.1 + - name: image-tiling-with-metadata-adjustment + image: docker.io/lorenzweingart/claimed-image-tiling-with-metadata-adjustment:0.1 workingDir: /opt/app-root/src/ - command: ["/opt/app-root/bin/ipython","claimed_image-tiling.ipynb"] + command: ["/opt/app-root/bin/ipython","claimed_image-tiling-with-metadata_adjustment.ipynb"] env: - name: log_level value: value_of_log_level diff --git a/component-library/transform/image-tiling-with-metadata_adjustment.job.yaml:Zone.Identifier b/component-library/transform/image-tiling-with-metadata_adjustment.job.yaml:Zone.Identifier new file mode 100644 index 00000000..e69de29b diff --git a/component-library/transform/image-tiling.yaml b/component-library/transform/image-tiling-with-metadata_adjustment.yaml similarity index 80% rename from component-library/transform/image-tiling.yaml rename to component-library/transform/image-tiling-with-metadata_adjustment.yaml index e49b07f3..75658e65 100644 --- a/component-library/transform/image-tiling.yaml +++ b/component-library/transform/image-tiling-with-metadata_adjustment.yaml @@ -1,4 +1,4 @@ -name: image-tiling +name: image-tiling-with-metadata-adjustment description: "## Xview Dataset clipping – CLAIMED V0.1" inputs: @@ -16,12 +16,12 @@ outputs: implementation: container: - image: docker.io/claimed/claimed-image-tiling:0.1 + image: docker.io/lorenzweingart/claimed-image-tiling-with-metadata-adjustment:0.1 command: - sh - -ec - | - ipython ./claimed_image-tiling.ipynb log_level="${0}" directory_path="${1}" destination="${2}" tile_size_x="${3}" tile_size_y="${4}" stride_x="${5}" stride_y="${6}" + ipython ./claimed_image-tiling-with-metadata_adjustment.ipynb log_level="${0}" directory_path="${1}" destination="${2}" tile_size_x="${3}" tile_size_y="${4}" stride_x="${5}" stride_y="${6}" - {inputValue: log_level} - {inputValue: directory_path} - {inputValue: destination} diff --git a/component-library/transform/image-tiling-with-metadata_adjustment.yaml:Zone.Identifier b/component-library/transform/image-tiling-with-metadata_adjustment.yaml:Zone.Identifier new file mode 100644 index 00000000..e69de29b