Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve upload to cos operator #252

Merged
merged 6 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions component-library/filter/filter.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ baseCommand: "claimed"
inputs:
component:
type: string
default: romeokienzler/claimed-filter:0.5
default: romeokienzler/claimed-filter:0.6
inputBinding:
position: 1
prefix: --component
Expand Down Expand Up @@ -35,8 +35,5 @@ inputs:
position: 5
prefix: --output_file_name

outputs:
dummy_out:
type: File
outputBinding:
glob: query_result.csv

outputs: []
4 changes: 2 additions & 2 deletions component-library/filter/filter.job.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ spec:
spec:
containers:
- name: filter
image: romeokienzler/claimed-filter:0.5
image: romeokienzler/claimed-filter:0.6
workingDir: /opt/app-root/src/
command: ["/opt/app-root/bin/ipython","filter.py"]
command: ["/opt/app-root/bin/ipython","claimed_filter.ipynb"]
env:
- name: log_level
value: value_of_log_level
Expand Down
6 changes: 3 additions & 3 deletions component-library/filter/filter.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: output_file_name
description: "# filter Filters rows based on predicate on pandas data frame Example 'predicate=~metadata.filename.str.contains('.gz') ' => filters all rows where column 'filename' contains '.gz' – CLAIMED V0.1"
description: "# filter Filters rows based on predicate on pandas data frame – CLAIMED V0.1"

inputs:
- {name: log_level, type: String, description: "update log level", default: "INFO"}
Expand All @@ -13,12 +13,12 @@ outputs:

implementation:
container:
image: romeokienzler/claimed-output_file_name:0.5
image: romeokienzler/claimed-output_file_name:0.6
command:
- sh
- -ec
- |
ipython ./filter.py log_level="${0}" predicate="${1}" file_name="${2}" output_file_name="${3}"
ipython ./claimed_filter.ipynb log_level="${0}" predicate="${1}" file_name="${2}" output_file_name="${3}"
- {inputValue: log_level}
- {inputValue: predicate}
- {inputValue: file_name}
Expand Down
45 changes: 45 additions & 0 deletions component-library/output/upload-to-cos.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
cwlVersion: v1.2
class: CommandLineTool

baseCommand: "claimed"

inputs:
component:
type: string
default: romeokienzler/claimed-upload-to-cos:0.8
inputBinding:
position: 1
prefix: --component
log_level:
type: string
default: "INFO"
inputBinding:
position: 2
prefix: --log_level
target:
type: string
default: None
inputBinding:
position: 3
prefix: --target
source_file_pattern:
type: string
default: None
inputBinding:
position: 4
prefix: --source_file_pattern
find_recursive:
type: bool
default: True
inputBinding:
position: 5
prefix: --find_recursive
process_target_file_pattern:
type: string
default: None
inputBinding:
position: 6
prefix: --process_target_file_pattern


outputs: []
4 changes: 2 additions & 2 deletions component-library/output/upload-to-cos.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
},
"outputs": [],
"source": [
"#!pip install s3fs"
"#!pip install aiobotocore botocore s3fs"
]
},
{
Expand Down Expand Up @@ -186,7 +186,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.8"
"version": "3.11.7"
},
"papermill": {
"default_parameters": {},
Expand Down
26 changes: 26 additions & 0 deletions component-library/output/upload-to-cos.job.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
apiVersion: batch/v1
kind: Job
metadata:
name: upload-to-cos
spec:
template:
spec:
containers:
- name: upload-to-cos
image: romeokienzler/claimed-upload-to-cos:0.8
workingDir: /opt/app-root/src/
command: ["/opt/app-root/bin/ipython","claimed_upload-to-cos.ipynb"]
env:
- name: log_level
value: value_of_log_level
- name: target
value: value_of_target
- name: source_file_pattern
value: value_of_source_file_pattern
- name: find_recursive
value: value_of_find_recursive
- name: process_target_file_pattern
value: value_of_process_target_file_pattern
restartPolicy: OnFailure
imagePullSecrets:
- name: image_pull_secret
34 changes: 14 additions & 20 deletions component-library/output/upload-to-cos.yaml
Original file line number Diff line number Diff line change
@@ -1,33 +1,27 @@
name: output-upload-to-cos
description: Uploads a file to any S3 compliant Cloud Object Storage CLAIMED v0.2n
name: process_target_file_pattern
description: "# output-upload-to-cos Uploads a file to any S3 compliant Cloud Object Storage CLAIMED V0.1"

inputs:
- {name: access_key_id, type: String, description: 'access key id'}
- {name: secret_access_key, type: String, description: 'secret access key'}
- {name: endpoint, type: String, description: 'cos/s3 endpoint'}
- {name: bucket_name, type: String, description: 'cos bucket name'}
- {name: source_file, type: OutputPath, description: 'source file to be uploaded'}
- {name: destination_file, type: String, description: 'destination file name'}
- {name: data_dir, type: String, description: 'temporary data folder'}
- {name: log_level, type: String, description: "update log level", default: "INFO"}
- {name: target, type: String, description: "target in format: cos://access_key_id:secret_access_key@endpoint/bucket/path"}
- {name: source_file_pattern, type: String, description: "source folder and file pattern (glob)"}
- {name: find_recursive, type: Boolean, description: "find_recursive, if True, will search for files in subfolders specified in source_file_pattern. Default is True", default: "True'"}
- {name: process_target_file_pattern, type: String, description: "process source file path on target using regex. Default is None", default: "None"}


outputs:
- {name: output_dummy, type: String, description: 'dummy_output (to be fixed once C3 supports < 1 outputs)'}


implementation:
container:
image: romeokienzler/claimed-output-upload-to-cos:0.2n
image: romeokienzler/claimed-process_target_file_pattern:0.8
command:
- sh
- -ec
- |
ipython ./upload-to-cos.ipynb output_dummy="$0" access_key_id="$1" secret_access_key="$2" endpoint="$3" bucket_name="$4" source_file="$5" destination_file="$6" data_dir="$7"
- {outputPath: output_dummy}
- {inputValue: access_key_id}
- {inputValue: secret_access_key}
- {inputValue: endpoint}
- {inputValue: bucket_name}
- {inputValue: source_file}
- {inputValue: destination_file}
- {inputValue: data_dir}
ipython ./claimed_upload-to-cos.ipynb log_level="${0}" target="${1}" source_file_pattern="${2}" find_recursive="${3}" process_target_file_pattern="${4}"
- {inputValue: log_level}
- {inputValue: target}
- {inputValue: source_file_pattern}
- {inputValue: find_recursive}
- {inputValue: process_target_file_pattern}
Loading