From 969e456302c087a29bba4b30579e04ce0a792166 Mon Sep 17 00:00:00 2001 From: gmanipon Date: Thu, 12 Oct 2017 21:39:33 +0000 Subject: [PATCH] initial clean import; archived history at archived_history.txt --- COPYING | 14 + LICENSE | 201 +++++ README.md | 7 + archived_history.txt | 749 ++++++++++++++++++ aws_get.py | 103 +++ aws_get.sh | 30 + docker/.dockerignore | 2 + docker/Dockerfile | 29 + docker/hysds-io.json.lightweight-echo | 11 + .../hysds-io.json.lw-mozart-notify-by-email | 33 + docker/hysds-io.json.lw-mozart-purge | 24 + docker/hysds-io.json.lw-mozart-retry | 23 + docker/hysds-io.json.lw-mozart-revoke | 24 + docker/hysds-io.json.lw-tosca-aws_get | 22 + docker/hysds-io.json.lw-tosca-notify-by-email | 33 + docker/hysds-io.json.lw-tosca-purge | 24 + docker/hysds-io.json.lw-tosca-wget | 22 + docker/job-spec.json.lightweight-echo | 10 + .../job-spec.json.lw-mozart-notify-by-email | 27 + docker/job-spec.json.lw-mozart-purge | 19 + docker/job-spec.json.lw-mozart-retry | 19 + docker/job-spec.json.lw-mozart-revoke | 20 + docker/job-spec.json.lw-tosca-aws_get | 20 + docker/job-spec.json.lw-tosca-notify-by-email | 27 + docker/job-spec.json.lw-tosca-purge | 20 + docker/job-spec.json.lw-tosca-wget | 20 + lib/__init__.py | 0 lib/get_component_configuration.py | 16 + notify_by_email.py | 233 ++++++ notify_by_email.sh | 37 + purge.py | 117 +++ purge.sh | 32 + retry.py | 130 +++ retry.sh | 30 + settings.json | 11 + wget.py | 147 ++++ wget.sh | 30 + 37 files changed, 2316 insertions(+) create mode 100644 COPYING create mode 100644 LICENSE create mode 100644 README.md create mode 100644 archived_history.txt create mode 100644 aws_get.py create mode 100755 aws_get.sh create mode 100644 docker/.dockerignore create mode 100644 docker/Dockerfile create mode 100644 docker/hysds-io.json.lightweight-echo create mode 100644 docker/hysds-io.json.lw-mozart-notify-by-email create mode 100644 docker/hysds-io.json.lw-mozart-purge create mode 100644 docker/hysds-io.json.lw-mozart-retry create mode 100644 docker/hysds-io.json.lw-mozart-revoke create mode 100644 docker/hysds-io.json.lw-tosca-aws_get create mode 100644 docker/hysds-io.json.lw-tosca-notify-by-email create mode 100644 docker/hysds-io.json.lw-tosca-purge create mode 100644 docker/hysds-io.json.lw-tosca-wget create mode 100644 docker/job-spec.json.lightweight-echo create mode 100644 docker/job-spec.json.lw-mozart-notify-by-email create mode 100644 docker/job-spec.json.lw-mozart-purge create mode 100644 docker/job-spec.json.lw-mozart-retry create mode 100644 docker/job-spec.json.lw-mozart-revoke create mode 100644 docker/job-spec.json.lw-tosca-aws_get create mode 100644 docker/job-spec.json.lw-tosca-notify-by-email create mode 100644 docker/job-spec.json.lw-tosca-purge create mode 100644 docker/job-spec.json.lw-tosca-wget create mode 100644 lib/__init__.py create mode 100644 lib/get_component_configuration.py create mode 100644 notify_by_email.py create mode 100755 notify_by_email.sh create mode 100644 purge.py create mode 100755 purge.sh create mode 100644 retry.py create mode 100755 retry.sh create mode 100644 settings.json create mode 100644 wget.py create mode 100755 wget.sh diff --git a/COPYING b/COPYING new file mode 100644 index 0000000..3701087 --- /dev/null +++ b/COPYING @@ -0,0 +1,14 @@ +Copyright 2017, California Institute of Technology. + ALL RIGHTS RESERVED. + U.S. Government Sponsorship acknowledged. + +Any commercial use must be negotiated with the Office of Technology +Transfer t the California Institute of Technology. + +This software may be subject to U.S. export control laws and +regulations. By accepting this document, the user agrees to comply +with all applicable U.S. export laws and regulations. + +User has the responsibility to obtain export licenses, or other export +authority as may be required before exporting such information to +foreign countries or providing access to foreign persons. diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..0cbb939 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2017 California Institute of Technology. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md new file mode 100644 index 0000000..29c995b --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# lightweight-jobs +Set of lightweight HySDS system jobs + +# HySDS system jobs +- job retry +- job purge +- notify by email diff --git a/archived_history.txt b/archived_history.txt new file mode 100644 index 0000000..4312481 --- /dev/null +++ b/archived_history.txt @@ -0,0 +1,749 @@ +commit c513c6f71ed4d0a9940413b44d478e25601568f0 +Merge: 1ca0f6b 4ee0aba +Author: Namrata Malarout +Date: Mon Sep 25 08:48:51 2017 -0700 + + Merge pull request #1 from hysds-org/gman-dev + + AWS get script + +commit 4ee0aba2578abefca680a4f2c16eb64a3363d073 +Author: gmanipon +Date: Sat Sep 23 21:51:49 2017 +0000 + + comment out query + +commit f2df561be829aee73e926c103f6f09ce3c0c014d +Author: gmanipon +Date: Sat Sep 23 21:45:53 2017 +0000 + + truncate leading / in path + +commit 19357eb02e6af1a4db3ce36d685235f81df8c914 +Author: gmanipon +Date: Sat Sep 23 21:37:52 2017 +0000 + + implement generation of AWS get script + +commit f540ab8b8f66e6a7c0ff4660aa6f2af3e0c5b2f0 +Author: gmanipon +Date: Sat Sep 23 21:19:14 2017 +0000 + + initial PGE configs/scripts for AWS get + +commit 1ca0f6b70e043d41f4a04fc2fd6d67d957623992 +Author: gmanipon +Date: Tue Aug 15 17:39:33 2017 +0000 + + fix disk_usage param + +commit f735884c2b6304943c7800707f5c35ecca13b00f +Author: gmanipon +Date: Fri Aug 4 15:07:33 2017 -0700 + + resolve hostname to generate fully qualified email address + +commit 155501f3033c1995bb23cfd8b391d3d257e04066 +Author: gmanipon +Date: Fri Aug 4 09:06:53 2017 -0700 + + continue with rest of jobs instead of exiting out + +commit 5f57c3c0bf09da3502d092e2eac682ab09839d27 +Author: gmanipon +Date: Fri Aug 4 08:55:06 2017 -0700 + + sprinkle random sleeps to reduce ES merge throttling + +commit ad76370d4034b5c68cae9e2f2f50ae7e8ad2085c +Author: Malarout +Date: Thu Aug 3 12:53:36 2017 -0700 + + malarout: setting dedup to false in retry + +commit c5e92503f9b01b0c929150c1a33ed2fac21c4a90 +Author: Malarout +Date: Thu Aug 3 12:31:47 2017 -0700 + + malarout: setting dedup to false in retry + +commit b1f1953901e18cf2944cb5cda060e8be067fc381 +Author: Malarout +Date: Thu Aug 3 10:08:35 2017 -0700 + + malarout: assign retry_count_max + +commit 5fecac403bb32fa1d1b072b56233c93a176e6a0a +Author: Malarout +Date: Wed Aug 2 14:37:29 2017 -0700 + + malarout: removing logger debug line + +commit cb0172e1d303deed4fc5b1aef84f07b223e6097c +Author: Malarout +Date: Wed Aug 2 12:30:22 2017 -0700 + + malarout: updated wroung index + +commit 0d83573b421122618b592b2ceebc4953df8465c6 +Author: Malarout +Date: Wed Aug 2 12:06:11 2017 -0700 + + changing retry to use job id as input + +commit 33a906ceaa46fe8bfa2d01201ac5a7d13d1730b9 +Author: Malarout +Date: Tue Jul 18 11:49:29 2017 -0700 + + malarout: addded From address to email + +commit dceb109713c2189c1f08578213823cc1b790bf02 +Author: Malarout +Date: Wed May 10 12:07:53 2017 -0700 + + malarout:wget script bug fix + +commit 518b23197c7bfeb767da0db2f6d94f74c71912da +Author: Malarout +Date: Wed May 10 11:12:21 2017 -0700 + + malarout:wget script tar attachment + +commit bcd203ba84816a25f57713011d272ffa0652c724 +Author: Malarout +Date: Mon May 8 10:56:10 2017 -0700 + + malarout: tar zipped wget script + +commit 1aae77cdb9c427892096198fea1eb59c43c52544 +Author: Malarout +Date: Tue May 2 11:09:27 2017 -0700 + + malarout: wget fix + +commit 6db04c00fd40a4176d1e7cb4ea4a9586526f1a17 +Author: M Starch +Date: Wed Apr 19 23:11:30 2017 +0000 + + mstarch: fixing email + +commit 1f5945fca4b4d50148b61e1a1b9b2f6613b18d78 +Author: Namrata Malarout +Date: Wed Apr 19 15:54:30 2017 -0700 + + malarout: update purge to handle all forms of passthrough query + +commit 8999257d607677d863ef24e112c29255cc8e2c32 +Merge: d8ccbaa 588519d +Author: jlinick +Date: Wed Apr 19 20:56:54 2017 +0000 + + Merge branch 'master-mstarch' of https://github.jpl.nasa.gov/hysds-org/lightweight-jobs + +commit d8ccbaa4461b6f0afaf9632c562b4c810dd3878d +Author: Malarout +Date: Wed Apr 19 11:45:36 2017 -0700 + + malarout: added input field specs + +commit 7f77b00a29d0dc22866d966868ce8f27749485da +Author: Namrata Malarout +Date: Tue Apr 18 10:47:09 2017 -0700 + + malarout: Fixed query object construction + +commit 7dfcfc19e12eb891904cc815423546328232890a +Author: Namrata Malarout +Date: Thu Apr 6 20:30:17 2017 -0700 + + fix for decoded passthrough + +commit 588519d56f2952491357b71a884893bf2ce51895 +Author: M Starch +Date: Tue Apr 4 19:53:03 2017 +0000 + + mstarch: working retry with internal iteration + +commit cb8d594b8298dcf2159a16db5be0fd4a98963148 +Author: M Starch +Date: Tue Apr 4 19:15:26 2017 +0000 + + mstarch: making retry properly iterate the job sets + +commit f2abdb36ae805f462cdd881bc5b535bdb50a0726 +Author: M Starch +Date: Wed Mar 22 23:15:50 2017 +0000 + + mstarch: undoing misguided fix + +commit 8572316b1c3c0f939ac70033c5357aeca3112e54 +Author: M Starch +Date: Wed Mar 22 23:09:31 2017 +0000 + + mstarch: fixing submission_type + +commit 88b088524e26fb1ed9a30f26b8499e9b3118a655 +Author: M Starch +Date: Wed Mar 22 22:15:31 2017 +0000 + + mstarch: fixing purge and wget query parameter handling and job-spec definitions + +commit 113e9e21473e5316718ce0651e33b5983f1204f7 +Author: gmanipon +Date: Tue Mar 14 18:20:50 2017 +0000 + + standardize on hysds/pge-base:latest + +commit 1c3059e7b0dd66fff63259913acbfe998e5a2a2c +Author: gmanipon +Date: Mon Mar 13 16:19:39 2017 +0000 + + fix query for objectid and create clean url + +commit 1bad4c95818c263ba37046c06c599f500f575ca6 +Author: gmanipon +Date: Mon Mar 13 15:54:03 2017 +0000 + + use TOSCA_URL config for link to product + +commit 6ef3cc9bc63f3c2feda6f76882155579a3d775f3 +Author: gmanipon +Date: Wed Mar 8 15:51:49 2017 +0000 + + yet another test of release tagging + +commit 86beba00e7855b4abfe8577fc543551ca5d9532e +Author: gmanipon +Date: Wed Mar 8 15:22:50 2017 +0000 + + test jenkins config for building release tags + +commit d19a40b8aaa71e001e8248daca7e2572ff562ff6 +Author: gmanipon +Date: Wed Mar 8 15:10:55 2017 +0000 + + add header to README.md; need commit for new release + +commit f7adf00ca07583dc045e4e03ccc45c94c1fd2bcb +Author: gmanipon +Date: Mon Mar 6 19:19:19 2017 +0000 + + malarout: fixed all logic errors in retry.py + +commit a1d555873ff5b3685d5b1db41f9c75aedc920c47 +Author: gmanipon +Date: Mon Mar 6 18:33:14 2017 +0000 + + malarout: fixed syntax error in retry.py + +commit 0f1f46282d8c66049b34b70ae9a286d4bc0ca7e2 +Author: gmanipon +Date: Mon Mar 6 18:20:36 2017 +0000 + + malarout: renamed variable to retry_count_max + +commit f73eb86ed01fe8a0f66db17caaa2110594958950 +Author: gmanipon +Date: Mon Mar 6 18:14:02 2017 +0000 + + malarout: added retry count to jobs + +commit 97e32eebef1ca0e597743b7ec327dae7e866dc88 +Author: gmanipon +Date: Wed Mar 1 22:11:11 2017 +0000 + + malarout: fixed condition to check for type 'worker' + +commit a6f595c0ff9d460928045b4c2b5eda95bd731ee7 +Author: gmanipon +Date: Wed Mar 1 21:55:23 2017 +0000 + + malarout: fixed condition to check for type 'worker' + +commit b5f1f053eb75801db07048cbd41a8204ad821296 +Author: gmanipon +Date: Mon Feb 27 22:16:06 2017 +0000 + + remove job-iterator script and job spec; migrated to task + +commit 52860475264d7e5a41b0eace30cc5c8cb972418e +Author: N Malarout +Date: Thu Feb 23 18:22:39 2017 +0000 + + malarout: Added check in retry job to avoid retrying workers + +commit d6b8e6baeb3dcc34513a8cdb04aed864d8d62967 +Author: N Malarout +Date: Wed Feb 22 23:22:47 2017 +0000 + + malarout: modified label for retry job + +commit 5db583a45e4715094940e20447443fd661f91af5 +Merge: ede326c 5f90ef6 +Author: M Starch +Date: Thu Feb 16 00:44:57 2017 +0000 + + Merge branch 'master' of https://github.jpl.nasa.gov/hysds-org/lightweight-jobs + +commit ede326c47e4944cc87b9f8aca408b9b9045149b3 +Author: M Starch +Date: Thu Feb 16 00:40:56 2017 +0000 + + mstarch: fixing itertools -> functools + +commit 5f90ef601be803634813b93fdf1bc21c7df02300 +Author: N Malarout +Date: Wed Feb 15 19:54:40 2017 +0000 + + Revert "malarout: Added create AOI job" + + This reverts commit 20c5be2993313d984b30e7e703587631ab5dc9ec. + +commit c10091165412317cbd4e0656241430f631cabe1c +Author: M Starch +Date: Wed Feb 15 17:59:04 2017 +0000 + + mstarch: fixing job iterator job + +commit 20c5be2993313d984b30e7e703587631ab5dc9ec +Author: N Malarout +Date: Wed Feb 15 15:42:07 2017 +0000 + + malarout: Added create AOI job + +commit 3539a06bd7dd0acf55662dc5f89aba5928e1889c +Author: N Malarout +Date: Thu Feb 9 01:24:55 2017 +0000 + + malarout: added required-queues to job specs + +commit 3bdcd46359521a07322bfeb82861a23583aba4a2 +Author: N Malarout +Date: Thu Feb 9 00:17:57 2017 +0000 + + malarout: corrected typo in wget script + +commit f622541d85e364f19849523e71fa23e1cd976aab +Merge: bd52746 53a755f +Author: N Malarout +Date: Thu Feb 9 00:09:00 2017 +0000 + + Merge branch 'master' of https://github.jpl.nasa.gov/hysds-org/lightweight-jobs + +commit bd52746d67fd4118a57da6a91aeb907d8e162d12 +Author: N Malarout +Date: Thu Feb 9 00:08:00 2017 +0000 + + malarout: modified checks in wget to be more generic + +commit 53a755f8a9d4a8a2fedd9d5b692b708c64f6cc2a +Author: M Starch +Date: Wed Feb 8 21:19:30 2017 +0000 + + mstarch: checking for figaro component + +commit 8523e09d43cc4fac7b376a64fd241bf88320c2da +Author: N Malarout +Date: Wed Feb 8 18:52:02 2017 +0000 + + malarout: wget fix for grfn-ops + +commit 97023af505574d62019d7539cd1b80f03278b3da +Author: N Malarout +Date: Wed Feb 8 01:53:23 2017 +0000 + + malarout: fix for wget script + +commit 276c7a8ffbf30338e2c41b12e49958c7cd3b7e0c +Author: N Malarout +Date: Fri Feb 3 00:20:10 2017 +0000 + + malarout: fixes for purge and retry + +commit 9b4b7ccab0e11db1d31be4ddbc90f7c185e2d1a5 +Author: N Malarout +Date: Thu Feb 2 07:41:27 2017 +0000 + + malarout: updated field for job id for retry job + +commit 19a0c8ec3e109055e28d425cec838d3b30e2403a +Author: N Malarout +Date: Thu Feb 2 04:35:12 2017 +0000 + + small fix for retry job + +commit 83ff1d782ce7b8abc4e692153a4d834ebef3c632 +Author: N Malarout +Date: Thu Feb 2 01:55:49 2017 +0000 + + malarout: modified permission for retry.sh + +commit 7fe2d379f9a9e69b66f47e456725f2b338703046 +Author: N Malarout +Date: Thu Feb 2 01:36:39 2017 +0000 + + malarout: corrected typo in mozart retry + +commit f5b27a8b94d305b2d40f1c6f215a76ddb9674f54 +Author: N Malarout +Date: Thu Feb 2 01:12:58 2017 +0000 + + malarout: corrected key names for mozart from celery config + +commit c9d300882606f8d562352662efa3226471edcce2 +Author: N Malarout +Date: Thu Feb 2 00:15:24 2017 +0000 + + malarout: updated mozart hysds io for email + +commit ce74c2f79d5534db16f7cc43b1a7c1710458adff +Merge: 1d2aab4 e62ed73 +Author: N Malarout +Date: Wed Feb 1 23:11:55 2017 +0000 + + Merge branch 'master' of https://github.jpl.nasa.gov/hysds-org/lightweight-jobs + +commit 1d2aab465717d8d4413bc4789ff5d025924548ac +Author: N Malarout +Date: Wed Feb 1 23:10:51 2017 +0000 + + malarout: added component key for mozart hysds.io + +commit e62ed73b8c9a49b69fbea1a8b888281a93e2da52 +Author: Namrata Malarout +Date: Wed Feb 1 14:43:48 2017 -0800 + + malarout: deleted datasets.json + +commit c275bdb576ecde1cc266ce82995439fffd0b6d4c +Author: N Malarout +Date: Wed Feb 1 22:30:19 2017 +0000 + + malarout: added purge and revoke for mozart + +commit de894d494e92c7ec87025ef0c629fdeebbc97e70 +Author: N Malarout +Date: Wed Feb 1 19:01:18 2017 +0000 + + malarout: fixed attachment problem in wget + +commit b58191b32ce8747e51927b0fb61665b3e5c14f6d +Author: N Malarout +Date: Wed Feb 1 00:40:48 2017 +0000 + + malarout: fixed wget script + +commit bdd3f82500ef2a5a39ad75815c4fcfb54a325b04 +Author: N Malarout +Date: Wed Feb 1 00:15:22 2017 +0000 + + malarout: fixed wget script + +commit 37636443829ea52ed2bef76ebfab7484b40db9f6 +Author: N Malarout +Date: Wed Feb 1 00:00:48 2017 +0000 + + malarout: removing unnecessary params from wget + +commit 8e46dba4db6b0510b5873d03cf7ba3fcf4797fd4 +Author: N Malarout +Date: Tue Jan 31 23:16:46 2017 +0000 + + malarout: modified SMTP setup in notify_by_email + +commit 8b9a700fa10181a6d8400f24c23b3c112bcadf45 +Author: N Malarout +Date: Tue Jan 31 22:04:00 2017 +0000 + + malarout:added retry bash script + +commit 690cc5fee165e4b5669015524bbe2e037d072fbd +Merge: b43f052 e7bc64b +Author: N Malarout +Date: Fri Jan 27 19:34:21 2017 +0000 + + Merge branch 'master' of https://github.jpl.nasa.gov/hysds-org/lightweight-jobs + +commit b43f0522ae86d69fb77ee10931eda166c8ad2a43 +Author: N Malarout +Date: Fri Jan 27 19:31:04 2017 +0000 + + added retry and wget scripts + +commit e7bc64b5f1a54454e71b9897ebd9f7149649ea08 +Author: M Starch +Date: Fri Jan 27 01:17:42 2017 +0000 + + mstarch: fixing JOBs name + +commit a296d2380cf53b22d5eea67c21a8ba88d861f987 +Author: M Starch +Date: Fri Jan 27 01:17:13 2017 +0000 + + mstarch: reverrting postfix changes + +commit 013fbbf6f1a6d2411451cab92e3cf4ec794bf7f5 +Merge: dbf919e ade312a +Author: M Starch +Date: Fri Jan 27 01:16:12 2017 +0000 + + mstarch: merged + +commit ade312a9a3c7ff78b46f8fd0c0875fc36ac08c62 +Author: N Malarout +Date: Wed Jan 25 19:49:19 2017 +0000 + + malarout: updated permissons for bash scripts + +commit 17689d094e8c7658d59e241d025ab36b9624c190 +Author: N Malarout +Date: Wed Jan 25 19:02:54 2017 +0000 + + malarout: Fix for notify by email and deleting renamed job spec files + +commit f824202c0c9947bba2e3e50f48b9e637d45d6123 +Author: N Malarout +Date: Wed Jan 25 17:33:00 2017 +0000 + + malarout: added wget script, fixed input handling in purge + +commit 7fa3e7e4d9ddb7342f97f1aa92ad2a4bdfd435b0 +Author: N Malarout +Date: Wed Jan 25 17:05:00 2017 +0000 + + malarout: fixed input handling in purge script + +commit dbf919e596c5336b7c2d949e68129f13e3f13051 +Author: M Starch +Date: Wed Jan 25 16:32:13 2017 +0000 + + mstarch: e-mail sending test changes + +commit 97be9fabb4d2b8ff8bbb85c64e5680acdea7cef1 +Author: N Malarout +Date: Thu Jan 19 08:14:24 2017 +0000 + + mstarch: adding sendmail and server start + +commit cc33f0d43260dc16bcb6ad825d045a5f54a6e868 +Merge: 31503e1 ca10366 +Author: M Starch +Date: Thu Jan 19 03:36:03 2017 +0000 + + Merge branch 'temp1' + +commit ca103664c7e05480ec0fc7b6ea3f3d8ae02e997c +Author: M Starch +Date: Thu Jan 19 03:33:43 2017 +0000 + + mstarch: fixing job-iterator errors + +commit 31503e1f231d30b6fac767f9020ff0c7041c464b +Author: M Starch +Date: Thu Jan 19 02:17:47 2017 +0000 + + mstarch: adding __inint__.py + +commit 676dd3018612f201a71fb9ec079b64c6bbcc5feb +Author: N Malarout +Date: Thu Jan 19 00:28:20 2017 +0000 + + malarout: modified Dockerfile + +commit 7b51cb12e9bad67bb1431127e656eb90e88fcd3d +Author: N Malarout +Date: Thu Jan 19 00:20:49 2017 +0000 + + malarout: modified Dockerfile + +commit 469743e23629b1e735baac619bec67312fb92b9a +Merge: cab14cb 050b065 +Author: N Malarout +Date: Wed Jan 18 23:07:15 2017 +0000 + + Merge branch 'master' of https://github.jpl.nasa.gov/hysds-org/lightweight-jobs + +commit cab14cb0b43e5caf76e5c11835d08d8989a718cf +Author: N Malarout +Date: Wed Jan 18 23:01:06 2017 +0000 + + malarout: updated Dockerfiles + +commit 050b065cb0521b8cd66c675e10cb349ba01c79f9 +Author: M Starch +Date: Wed Jan 18 22:39:14 2017 +0000 + + mstarch: adding fixes for the job-iterator job + +commit 0450b14c86ea74cd96ec0c051522906d333c84ff +Author: N Malarout +Date: Wed Jan 18 21:11:13 2017 +0000 + + Fixes in purge and email + +commit 05077f4a2d9a1d7f99735aee9ac480280e73a492 +Author: M Starch +Date: Wed Jan 18 20:40:46 2017 +0000 + + mstarch: job iterator updated to use new common REST submit lib + +commit 505eab6444ae057f3c64f88c9205d604a7c7f89a +Author: N Malarout +Date: Wed Jan 18 18:51:16 2017 +0000 + + malarout: added from field in hysds io for notify by email + +commit 300ac9ad57624770226e4201ba7765e7a5f564e3 +Author: N Malarout +Date: Wed Jan 18 18:32:56 2017 +0000 + + malarout: Modified notify by email to support both tosca and mozart + +commit b0594a3410bbbba58339eb97abf54d9184b1bee6 +Author: N Malarout +Date: Tue Jan 17 17:48:04 2017 +0000 + + malarout: modified hysds io and renamed purge action files + +commit 037a351a68636add90c760bc6899a105de9bebdb +Author: M Starch +Date: Fri Jan 13 17:25:25 2017 +0000 + + mstarch: moving datasey_jpath to have root at top os ES result + +commit b5618409223d4937634088a14359d4834461f8f7 +Merge: a4b40d5 39ab645 +Author: M Starch +Date: Fri Jan 13 01:08:42 2017 +0000 + + Merge branch 'master' of https://github.jpl.nasa.gov/hysds-org/lightweight-jobs + +commit a4b40d53660875ac867bfbdcbc8bc517a89a366a +Author: M Starch +Date: Fri Jan 13 01:08:34 2017 +0000 + + mstarch: fixing kwargs and exposing errors + +commit 39ab645b829e22c3f9b085ed268d1e2c1a0568e8 +Author: N Malarout +Date: Thu Jan 12 23:10:27 2017 +0000 + + malarout: fixed purge products job spec + +commit 5ad73ef767501c7e0d3ba8a3a15e40b44e0dffef +Author: N Malarout +Date: Thu Jan 12 22:37:46 2017 +0000 + + malarout: added .dockerignore + +commit f3200f291320c2b55e5a046b52bb271b03860c8d +Merge: 51a3161 683ef50 +Author: N Malarout +Date: Thu Jan 12 19:36:04 2017 +0000 + + Merge branch 'master' of https://github.jpl.nasa.gov/hysds-org/lightweight-jobs + +commit 51a3161be0f715f9d4fbdb5c7c0caf6dcf3c77f8 +Author: N Malarout +Date: Thu Jan 12 19:24:51 2017 +0000 + + malarout: Added purge products job + +commit 683ef505a95e860192e9987cf79b9e3d7607a982 +Author: M Starch +Date: Thu Jan 12 04:06:19 2017 +0000 + + mstarch: removing best test jobs + +commit 7f4c48f9552eb5f54a5d66de43b0571ece573d4b +Author: M Starch +Date: Thu Jan 12 03:42:11 2017 +0000 + + mstarch: fixing job-iterator-job lambda + +commit 3bbe3ac4bf2c4bfcc504887ba3b5e54e1108fc95 +Author: M Starch +Date: Thu Jan 12 03:07:55 2017 +0000 + + mstarch: more safe lambda functions + +commit 9648743e88ee7fe5bb1b076cf9e44a7d29d27685 +Author: M Starch +Date: Thu Jan 12 02:46:41 2017 +0000 + + mstarch: created echo-best jobs for testing lambda + +commit e1dee7b95a431c8e345401f6a216af9e02d6a2d4 +Author: M Starch +Date: Thu Jan 12 02:42:26 2017 +0000 + + mstarch: addinglambda functions to job-iterator-job + +commit 76e337a3ba449d09854effe5ff828a39df42f335 +Author: M Starch +Date: Wed Jan 11 23:38:21 2017 +0000 + + mstarch: removing test code + +commit 3d9981151256704a9e2524e37897caa2e55e8b5a +Author: M Starch +Date: Wed Jan 11 22:58:28 2017 +0000 + + mstarch: adding mozart_url to job-iterator-job + +commit 652267247439a519be4367b0c95e918a41e87bb3 +Merge: 710908b c73f81b +Author: M Starch +Date: Wed Jan 11 22:18:36 2017 +0000 + + Merge branch 'master' of https://github.jpl.nasa.gov/hysds-org/lightweight-jobs + +commit c73f81bc7b8b0314807c1411ebb986058cc32dd0 +Merge: 1509860 9d786b4 +Author: M Starch +Date: Wed Jan 11 22:17:48 2017 +0000 + + merging with job iterator + +commit 710908b6557e1f94c4b53b76657d577db5328e5b +Merge: c3fa22e 1509860 +Author: M Starch +Date: Wed Jan 11 22:01:19 2017 +0000 + + mstarch: mergeing in from tag v0.4-malarout + +commit c3fa22eb3f51e4e6b9d1a069b81c467bad1f31e6 +Author: M Starch +Date: Wed Jan 11 21:52:30 2017 +0000 + + mstarch: job-iterator working v1 + +commit 9d786b488f9f992451fdbb83ba1e00a1ef4f7b65 +Author: M Starch +Date: Wed Jan 11 19:02:45 2017 +0000 + + mstarch: adding job-iterator-job files + +commit 1509860facc77ff15d0043adb2d825e2ea37f8cf +Author: M Starch +Date: Wed Jan 11 18:33:56 2017 +0000 + + malarout: added job iterator job spec + +commit 23c16bc788ff85de1e08292ee456f5be12986152 +Author: M Starch +Date: Wed Jan 11 18:07:41 2017 +0000 + + malarout: added job specs for notify by email + +commit 9ac38363eedf34faaf1fd213b5d733903808565b +Author: M Starch +Date: Tue Jan 10 22:21:46 2017 +0000 + + lightweight hello world + +commit 5b6ba52fe915847cf475ac5a4d640a85aaa366a4 +Author: M Starch +Date: Fri Nov 18 01:10:39 2016 +0000 + + added readme diff --git a/aws_get.py b/aws_get.py new file mode 100644 index 0000000..f7296a2 --- /dev/null +++ b/aws_get.py @@ -0,0 +1,103 @@ +import json, requests, types, re, getpass, sys, os +from pprint import pformat +import logging +import tarfile +import notify_by_email +from hysds.celery import app +import boto3 +from urlparse import urlparse + +#TODO: Setup logger for this job here. Should log to STDOUT or STDERR as this is a job +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger("aws_get") + + +def aws_get_script(dataset=None): + """Return AWS get script.""" + + # query + es_url = app.conf["GRQ_ES_URL"] + index = app.conf["DATASET_ALIAS"] + #facetview_url = app.conf["GRQ_URL"] + print('%s/%s/_search?search_type=scan&scroll=10m&size=100' % (es_url, index)) + logging.debug('%s/%s/_search?search_type=scan&scroll=10m&size=100' % (es_url, index)) + print json.dumps(dataset) + logging.debug(json.dumps(dataset)) + + r = requests.post('%s/%s/_search?search_type=scan&scroll=10m&size=100' % (es_url, index), json.dumps(dataset)) + if r.status_code != 200: + print("Failed to query ES. Got status code %d:\n%s" %(r.status_code, json.dumps(r.json(), indent=2))) + logger.debug("Failed to query ES. Got status code %d:\n%s" % + (r.status_code, json.dumps(r.json(), indent=2))) + r.raise_for_status() + logger.debug("result: %s" % pformat(r.json())) + + scan_result = r.json() + count = scan_result['hits']['total'] + scroll_id = scan_result['_scroll_id'] + + # stream output a page at a time for better performance and lower memory footprint + def stream_aws_get(scroll_id): + #formatted_source = format_source(source) + yield '#!/bin/bash\n#\n' + \ + '# query:\n#\n' + \ + '#%s#\n#\n#' % json.dumps(dataset) + \ + '# total datasets matched: %d\n\n' % count + \ + 'echo ""\n' + aws_get_cmd = 'aws s3 sync {} {}\n' + + while True: + r = requests.post('%s/_search/scroll?scroll=10m' % es_url, data=scroll_id) + res = r.json() + logger.debug("res: %s" % pformat(res)) + scroll_id = res['_scroll_id'] + if len(res['hits']['hits']) == 0: break + # Elastic Search seems like it's returning duplicate urls. Remove duplicates + unique_urls=[] + for hit in res['hits']['hits']: + [unique_urls.append(url) for url in hit['_source']['urls'] if url not in unique_urls and url.startswith("s3")] + + for url in unique_urls: + logging.debug("urls in unique urls: %s",url) + parsed_url = urlparse(url) + yield 'echo "downloading %s"\n' % os.path.basename(parsed_url.path) + yield aws_get_cmd.format("{}://{}".format(parsed_url.scheme, + parsed_url.path[1:] if parsed_url.path.startswith('/') else parsed_url.path), + os.path.basename(parsed_url.path)) + + # malarout: interate over each line of stream_aws_get response, and write to a file which is later attached to the email. + with open('aws_get_script.sh','w') as f: + for i in stream_aws_get(scroll_id): + f.write(i) + + # for gzip compressed use file extension .tar.gz and modifier "w:gz" + os.rename('aws_get_script.sh','aws_get_script.bash') + tar = tarfile.open("aws_get.tar.gz", "w:gz") + tar.add('aws_get_script.bash') + tar.close() + + +if __name__ == "__main__": + ''' + Main program of aws_get_script + ''' + #encoding to a JSON object + query = {} + query = json.loads(sys.argv[1]) + emails = sys.argv[2] + rule_name = sys.argv[3] + + # getting the script + aws_get_script(query) + # now email the query + attachments = None + cc_recipients = [i.strip() for i in emails.split(',')] + bcc_recipients = [] + subject = "[monitor] (aws_get_script:%s)" % (rule_name) + body = "Product was ingested from query: %s" % query + body += "\n\nYou can use this AWS get script attached to download products.\n" + body += "Please rename aws_get_script.bash to aws_get_script.sh before running it." + if os.path.isfile('aws_get.tar.gz'): + aws_get_content = open('aws_get.tar.gz','r').read() + attachments = { 'aws_get.tar.gz':aws_get_content} + notify_by_email.send_email(getpass.getuser(), cc_recipients, bcc_recipients, subject, body, attachments=attachments) diff --git a/aws_get.sh b/aws_get.sh new file mode 100755 index 0000000..8dd75a8 --- /dev/null +++ b/aws_get.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +source $HOME/verdi/bin/activate + +BASE_PATH=$(dirname "${BASH_SOURCE}") + +# check args +if [ "$#" -eq 3 ]; then + query=$1 + emails=$2 + rule_name=$3 + +else + echo "Invalid number or arguments ($#) $*" 1>&2 + exit 1 +fi + +# generate AWS get script and email +echo "##########################################" 1>&2 +echo -n "Generating AWS get script: " 1>&2 +date 1>&2 +python $BASE_PATH/aws_get.py "$query" "$emails" "$rule_name" > aws_get.log 2>&1 +STATUS=$? +echo -n "Finished with AWS get script: " 1>&2 +date 1>&2 +if [ $STATUS -ne 0 ]; then + echo "Failed to send AWS get script." 1>&2 + cat aws_get.log 1>&2 + exit $STATUS +fi diff --git a/docker/.dockerignore b/docker/.dockerignore new file mode 100644 index 0000000..f981a7c --- /dev/null +++ b/docker/.dockerignore @@ -0,0 +1,2 @@ +# changes to the docker/ directory shouldn't force a container rebuild +*/docker* diff --git a/docker/Dockerfile b/docker/Dockerfile new file mode 100644 index 0000000..9b6537a --- /dev/null +++ b/docker/Dockerfile @@ -0,0 +1,29 @@ +FROM hysds/pge-base:latest + +MAINTAINER malarout "Namrata.Malarout@jpl.nasa.gov" +LABEL description="Lightweight System Jobs" + +ARG id +ARG gid +#Change UID and GID from build +USER root +RUN set -ex && \ + usermod -u ${id} ops && \ + groupmod -g ${gid} ops && \ + chown -R ops:ops /home/ops/ /data && \ + usermod -g ${gid} ops + +# create work directory +RUN set -ex \ + && mkdir -p /data/work \ + && chmod -R 755 /data \ + && chown -R ops:ops /data + + +# provision lightweight-jobs PGE +USER ops +COPY . /home/ops/verdi/ops/lightweight-jobs + +# set entrypoint +WORKDIR /home/ops +CMD ["/bin/bash", "--login"] diff --git a/docker/hysds-io.json.lightweight-echo b/docker/hysds-io.json.lightweight-echo new file mode 100644 index 0000000..a4028a0 --- /dev/null +++ b/docker/hysds-io.json.lightweight-echo @@ -0,0 +1,11 @@ +{ + "label" : "echo", + "submission_type":"individual", + "params" : [ + { + "name": "echo_var", + "from": "value", + "value": "Hello world!" + } + ] +} diff --git a/docker/hysds-io.json.lw-mozart-notify-by-email b/docker/hysds-io.json.lw-mozart-notify-by-email new file mode 100644 index 0000000..037ae48 --- /dev/null +++ b/docker/hysds-io.json.lw-mozart-notify-by-email @@ -0,0 +1,33 @@ +{ + "label":"Notify by email", + "component":"mozart", + "submission_type":"iteration", + "params" : [ + { + "name": "id", + "type": "text", + "from": "dataset_jpath:_source.job_id" + }, + { + "name": "url", + "type": "text", + "from": "dataset_jpath:_source.job.job_info.job_url" + }, + { + "name": "emails", + "type": "text", + "from": "submitter" + }, + { + "name": "name", + "type": "text", + "from": "passthrough" + }, + { + "name": "component", + "from": "value", + "type": "text", + "value": "mozart" + } + ] +} diff --git a/docker/hysds-io.json.lw-mozart-purge b/docker/hysds-io.json.lw-mozart-purge new file mode 100644 index 0000000..c91f727 --- /dev/null +++ b/docker/hysds-io.json.lw-mozart-purge @@ -0,0 +1,24 @@ +{ + "label":"Purge jobs", + "component":"mozart", + "submission_type":"individual", + "params" : [ + { + "name": "query", + "type": "text", + "from": "passthrough" + }, + { + "name": "component", + "from": "value", + "type": "text", + "value": "mozart" + }, + { + "name": "operation", + "from": "value", + "type": "text", + "value": "purge" + } + ] +} diff --git a/docker/hysds-io.json.lw-mozart-retry b/docker/hysds-io.json.lw-mozart-retry new file mode 100644 index 0000000..8eea909 --- /dev/null +++ b/docker/hysds-io.json.lw-mozart-retry @@ -0,0 +1,23 @@ +{ + "label":"Retry Jobs/Tasks", + "component":"mozart", + "submission_type":"individual", + "params" : [ + { + "name": "retry_job_id", + "type": "text", + "from": "dataset_jpath:_source.job.job_info.id" + }, + { + "name": "type", + "type": "text", + "from": "dataset_jpath:_type" + }, + { + "name": "retry_count_max", + "type": "text", + "from": "submitter", + "lambda": "lambda x: int(x)" + } + ] +} diff --git a/docker/hysds-io.json.lw-mozart-revoke b/docker/hysds-io.json.lw-mozart-revoke new file mode 100644 index 0000000..c7ea083 --- /dev/null +++ b/docker/hysds-io.json.lw-mozart-revoke @@ -0,0 +1,24 @@ +{ + "label":"Revoke / Stop job", + "component":"mozart", + "submission_type":"individual", + "params" : [ + { + "name": "query", + "type": "text", + "from": "passthrough" + }, + { + "name": "component", + "from": "value", + "type": "text", + "value": "mozart" + }, + { + "name": "operation", + "from": "value", + "type": "text", + "value": "revoke" + } + ] +} diff --git a/docker/hysds-io.json.lw-tosca-aws_get b/docker/hysds-io.json.lw-tosca-aws_get new file mode 100644 index 0000000..dfe3906 --- /dev/null +++ b/docker/hysds-io.json.lw-tosca-aws_get @@ -0,0 +1,22 @@ +{ + "label":"AWS get script", + "component":"tosca", + "submission_type":"individual", + "params" : [ + { + "name": "query", + "type": "text", + "from": "passthrough" + }, + { + "name": "emails", + "type": "text", + "from": "submitter" + }, + { + "name": "name", + "type": "text", + "from": "passthrough" + } + ] +} diff --git a/docker/hysds-io.json.lw-tosca-notify-by-email b/docker/hysds-io.json.lw-tosca-notify-by-email new file mode 100644 index 0000000..36b64ee --- /dev/null +++ b/docker/hysds-io.json.lw-tosca-notify-by-email @@ -0,0 +1,33 @@ +{ + "label" : "Notify by email", + "component" : "tosca", + "submission_type":"iteration", + "params" : [ + { + "name": "id", + "type": "text", + "from": "dataset_jpath:_source.objectid" + }, + { + "name": "url", + "type": "text", + "from": "dataset_jpath:_source.urls" + }, + { + "name": "emails", + "type": "text", + "from": "submitter" + }, + { + "name": "name", + "type": "text", + "from": "passthrough" + }, + { + "name": "component", + "from": "value", + "type": "text", + "value": "tosca" + } + ] +} diff --git a/docker/hysds-io.json.lw-tosca-purge b/docker/hysds-io.json.lw-tosca-purge new file mode 100644 index 0000000..f614469 --- /dev/null +++ b/docker/hysds-io.json.lw-tosca-purge @@ -0,0 +1,24 @@ +{ + "label":"Purge datasets", + "component":"tosca", + "submission_type":"individual", + "params" : [ + { + "name": "query", + "type": "text", + "from": "passthrough" + }, + { + "name": "component", + "from": "value", + "type": "text", + "value": "tosca" + }, + { + "name": "operation", + "from": "value", + "type": "text", + "value": "purge" + } + ] +} diff --git a/docker/hysds-io.json.lw-tosca-wget b/docker/hysds-io.json.lw-tosca-wget new file mode 100644 index 0000000..7077537 --- /dev/null +++ b/docker/hysds-io.json.lw-tosca-wget @@ -0,0 +1,22 @@ +{ + "label":"Wget script", + "component":"tosca", + "submission_type":"individual", + "params" : [ + { + "name": "query", + "type": "text", + "from": "passthrough" + }, + { + "name": "emails", + "type": "text", + "from": "submitter" + }, + { + "name": "name", + "type": "text", + "from": "passthrough" + } + ] +} diff --git a/docker/job-spec.json.lightweight-echo b/docker/job-spec.json.lightweight-echo new file mode 100644 index 0000000..5ff35a5 --- /dev/null +++ b/docker/job-spec.json.lightweight-echo @@ -0,0 +1,10 @@ +{ + "command":"echo", + "disk_usage":"1GB", + "params" : [ + { + "name": "echo_var", + "destination": "positional" + } + ] +} diff --git a/docker/job-spec.json.lw-mozart-notify-by-email b/docker/job-spec.json.lw-mozart-notify-by-email new file mode 100644 index 0000000..f6065f0 --- /dev/null +++ b/docker/job-spec.json.lw-mozart-notify-by-email @@ -0,0 +1,27 @@ +{ + "required_queues":["system-jobs-queue"], + "command":"/home/ops/verdi/ops/lightweight-jobs/notify_by_email.sh", + "disk_usage":"3GB", + "params" : [ + { + "name": "id", + "destination": "positional" + }, + { + "name": "url", + "destination": "positional" + }, + { + "name": "emails", + "destination": "positional" + }, + { + "name": "name", + "destination": "positional" + }, + { + "name": "component", + "destination": "positional" + } + ] +} diff --git a/docker/job-spec.json.lw-mozart-purge b/docker/job-spec.json.lw-mozart-purge new file mode 100644 index 0000000..34e3e19 --- /dev/null +++ b/docker/job-spec.json.lw-mozart-purge @@ -0,0 +1,19 @@ +{ + "required_queues":["system-jobs-queue"], + "command":"/home/ops/verdi/ops/lightweight-jobs/purge.sh", + "disk_usage":"3GB", + "params" : [ + { + "name": "query", + "destination": "positional" + }, + { + "name": "component", + "destination": "positional" + }, + { + "name": "operation", + "destination": "positional" + } + ] +} diff --git a/docker/job-spec.json.lw-mozart-retry b/docker/job-spec.json.lw-mozart-retry new file mode 100644 index 0000000..109090e --- /dev/null +++ b/docker/job-spec.json.lw-mozart-retry @@ -0,0 +1,19 @@ +{ + "required_queues":["system-jobs-queue"], + "command":"/home/ops/verdi/ops/lightweight-jobs/retry.sh", + "disk_usage":"3GB", + "params" : [ + { + "name": "retry_job_id", + "destination": "context" + }, + { + "name": "type", + "destination": "positional" + }, + { + "name": "retry_count_max", + "destination": "context" + } + ] +} diff --git a/docker/job-spec.json.lw-mozart-revoke b/docker/job-spec.json.lw-mozart-revoke new file mode 100644 index 0000000..e6831e9 --- /dev/null +++ b/docker/job-spec.json.lw-mozart-revoke @@ -0,0 +1,20 @@ +{ + "required_queues":["system-jobs-queue"], + "command":"/home/ops/verdi/ops/lightweight-jobs/purge.sh", + "disk_usage":"3GB", + "params" : [ + { + "name": "query", + "destination": "positional" + }, + { + "name": "component", + "destination": "positional" + }, + { + "name": "operation", + "destination": "positional" + } + ] +} + diff --git a/docker/job-spec.json.lw-tosca-aws_get b/docker/job-spec.json.lw-tosca-aws_get new file mode 100644 index 0000000..b4d6ec6 --- /dev/null +++ b/docker/job-spec.json.lw-tosca-aws_get @@ -0,0 +1,20 @@ +{ + "required_queues":["system-jobs-queue"], + "command":"/home/ops/verdi/ops/lightweight-jobs/aws_get.sh", + "imported_worker_files":{"/home/ops/.aws":"/home/ops/.aws"}, + "disk_usage":"3GB", + "params" : [ + { + "name": "query", + "destination": "positional" + }, + { + "name": "emails", + "destination": "positional" + }, + { + "name": "name", + "destination": "positional" + } + ] +} diff --git a/docker/job-spec.json.lw-tosca-notify-by-email b/docker/job-spec.json.lw-tosca-notify-by-email new file mode 100644 index 0000000..f6065f0 --- /dev/null +++ b/docker/job-spec.json.lw-tosca-notify-by-email @@ -0,0 +1,27 @@ +{ + "required_queues":["system-jobs-queue"], + "command":"/home/ops/verdi/ops/lightweight-jobs/notify_by_email.sh", + "disk_usage":"3GB", + "params" : [ + { + "name": "id", + "destination": "positional" + }, + { + "name": "url", + "destination": "positional" + }, + { + "name": "emails", + "destination": "positional" + }, + { + "name": "name", + "destination": "positional" + }, + { + "name": "component", + "destination": "positional" + } + ] +} diff --git a/docker/job-spec.json.lw-tosca-purge b/docker/job-spec.json.lw-tosca-purge new file mode 100644 index 0000000..94a850f --- /dev/null +++ b/docker/job-spec.json.lw-tosca-purge @@ -0,0 +1,20 @@ +{ + "required_queues":["system-jobs-queue"], + "command":"/home/ops/verdi/ops/lightweight-jobs/purge.sh", + "imported_worker_files":{"/home/ops/.aws":"/home/ops/.aws"}, + "disk_usage":"3GB", + "params" : [ + { + "name": "query", + "destination": "positional" + }, + { + "name": "component", + "destination": "positional" + }, + { + "name": "operation", + "destination": "positional" + } + ] +} diff --git a/docker/job-spec.json.lw-tosca-wget b/docker/job-spec.json.lw-tosca-wget new file mode 100644 index 0000000..052cd74 --- /dev/null +++ b/docker/job-spec.json.lw-tosca-wget @@ -0,0 +1,20 @@ +{ + "required_queues":["system-jobs-queue"], + "command":"/home/ops/verdi/ops/lightweight-jobs/wget.sh", + "imported_worker_files":{"/home/ops/.aws":"/home/ops/.aws"}, + "disk_usage":"3GB", + "params" : [ + { + "name": "query", + "destination": "positional" + }, + { + "name": "emails", + "destination": "positional" + }, + { + "name": "name", + "destination": "positional" + } + ] +} diff --git a/lib/__init__.py b/lib/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/lib/get_component_configuration.py b/lib/get_component_configuration.py new file mode 100644 index 0000000..f482d72 --- /dev/null +++ b/lib/get_component_configuration.py @@ -0,0 +1,16 @@ +from hysds.celery import app + +def get_component_config(component): + ''' + From a component get the common configuration values + @param component - component + ''' + if component=="mozart" or component=="figaro": + es_url = app.conf["JOBS_ES_URL"] + query_idx = app.conf["STATUS_ALIAS"] + facetview_url = app.conf["MOZART_URL"] + elif component=="tosca": + es_url = app.conf["GRQ_ES_URL"] + query_idx = app.conf["DATASET_ALIAS"] + facetview_url = app.conf["GRQ_URL"] + return (es_url, query_idx, facetview_url) diff --git a/notify_by_email.py b/notify_by_email.py new file mode 100644 index 0000000..2afdf0d --- /dev/null +++ b/notify_by_email.py @@ -0,0 +1,233 @@ +#!/usr/bin/env python +import os, sys, getpass, requests, json, types, base64, socket +from smtplib import SMTP +from email.MIMEMultipart import MIMEMultipart +from email.MIMEText import MIMEText +from email.MIMEBase import MIMEBase +from email.Header import Header +from email.Utils import parseaddr, formataddr, COMMASPACE, formatdate +from email import Encoders +from hysds.celery import app +from hysds_commons.net_utils import get_container_host_ip + + +def get_hostname(): + """Get hostname.""" + + # get hostname + try: return socket.getfqdn() + except: + # get IP + try: return socket.gethostbyname(socket.gethostname()) + except: + raise RuntimeError("Failed to resolve hostname for full email address. Check system.") + + +def send_email(sender, cc_recipients, bcc_recipients, subject, body, attachments=None): + """Send an email. + + All arguments should be Unicode strings (plain ASCII works as well). + + Only the real name part of sender and recipient addresses may contain + non-ASCII characters. + + The email will be properly MIME encoded and delivered though SMTP to + 172.17.0.1. This is easy to change if you want something different. + + The charset of the email will be the first one out of US-ASCII, ISO-8859-1 + and UTF-8 that can represent all the characters occurring in the email. + """ + + # combined recipients + recipients = cc_recipients + bcc_recipients + + # Header class is smart enough to try US-ASCII, then the charset we + # provide, then fall back to UTF-8. + header_charset = 'ISO-8859-1' + + # We must choose the body charset manually + for body_charset in 'US-ASCII', 'ISO-8859-1', 'UTF-8': + try: + body.encode(body_charset) + except UnicodeError: + pass + else: + break + + # Split real name (which is optional) and email address parts + sender_name, sender_addr = parseaddr(sender) + parsed_cc_recipients = [parseaddr(rec) for rec in cc_recipients] + parsed_bcc_recipients = [parseaddr(rec) for rec in bcc_recipients] + #recipient_name, recipient_addr = parseaddr(recipient) + + # We must always pass Unicode strings to Header, otherwise it will + # use RFC 2047 encoding even on plain ASCII strings. + sender_name = str(Header(unicode(sender_name), header_charset)) + unicode_parsed_cc_recipients = [] + for recipient_name, recipient_addr in parsed_cc_recipients: + recipient_name = str(Header(unicode(recipient_name), header_charset)) + # Make sure email addresses do not contain non-ASCII characters + recipient_addr = recipient_addr.encode('ascii') + unicode_parsed_cc_recipients.append((recipient_name, recipient_addr)) + unicode_parsed_bcc_recipients = [] + for recipient_name, recipient_addr in parsed_bcc_recipients: + recipient_name = str(Header(unicode(recipient_name), header_charset)) + # Make sure email addresses do not contain non-ASCII characters + recipient_addr = recipient_addr.encode('ascii') + unicode_parsed_bcc_recipients.append((recipient_name, recipient_addr)) + + # Make sure email addresses do not contain non-ASCII characters + sender_addr = sender_addr.encode('ascii') + + # Create the message ('plain' stands for Content-Type: text/plain) + msg = MIMEMultipart() + msg['CC'] = COMMASPACE.join([formataddr((recipient_name, recipient_addr)) + for recipient_name, recipient_addr in unicode_parsed_cc_recipients]) + msg['BCC'] = COMMASPACE.join([formataddr((recipient_name, recipient_addr)) + for recipient_name, recipient_addr in unicode_parsed_bcc_recipients]) + msg['Subject'] = Header(unicode(subject), header_charset) + msg['FROM'] = "no-reply@jpl.nasa.gov" + msg.attach(MIMEText(body.encode(body_charset), 'plain', body_charset)) + + # Add attachments + if isinstance(attachments, types.DictType): + for fname in attachments: + part = MIMEBase('application', "octet-stream") + part.set_payload(attachments[fname]) + Encoders.encode_base64(part) + part.add_header('Content-Disposition', 'attachment; filename="%s"' % fname) + msg.attach(part) + + #print "#" * 80 + #print msg.as_string() + + # Send the message via SMTP to docker host + smtp_url = "smtp://%s:25" % get_container_host_ip() + print "smtp_url : %s",smtp_url + smtp = SMTP(get_container_host_ip()) + smtp.sendmail(sender, recipients, msg.as_string()) + smtp.quit() + + +def get_source(es_url, query_idx, objectid): + """Return source metadata for objectid.""" + + query = { + "sort": { + "_timestamp": { + "order": "desc" + } + }, + "query": { + "term": { + "_id": objectid + } + } + } + print 'get_source debug:', '%s/%s/_search',es_url," ", query_idx,' ',json.dumps(query) + r = requests.post('%s/%s/_search' % (es_url, query_idx), data=json.dumps(query)) + r.raise_for_status() + result = r.json() + if result['hits']['total'] == 0: return None + else: return result['hits']['hits'][0]['_source'] + + +def get_cities(src): + """Return list of cities.""" + + cities = [] + for city in src.get('city',[]): + cities.append("%s, %s" % (city.get('name', ''), city.get('admin1_name', ''))) + return cities + + +def get_value(d, key): + """Return value from source based on key.""" + + for k in key.split('.'): + if k in d: d = d[k] + else: return None + if isinstance(d, types.ListType): return ', '.join([str(i) for i in d]) + else: return d + + +def get_metadata_snippet(src, snippet_cfg): + """Return body text for metadata snippet.""" + + body = "" + for k, label in snippet_cfg: + val = get_value(src, k) + if val is not None: body += "%s: %s\n" % (label, val) + body += "location type: %s\n" % src.get('location', {}).get('type', None) + body += "location coordinates: %s\n" % src.get('location', {}).get('coordinates', []) + cities = get_cities(src) + body += "Closest cities: %s" % "\n ".join(cities) + return body + + +def get_facetview_link(facetview_url, objectid, system_version=None): + """Return link to objectid in FacetView interface.""" + + if system_version is None: + b64 = base64.urlsafe_b64encode('{"query":{"query_string":{"query":"_id:%s"}}}' % objectid) + else: + b64 = base64.urlsafe_b64encode('{"query":{"query_string":{"query":"_id:%s AND system_version:%s"}}}' % (objectid, system_version)) + if facetview_url.endswith('/'): facetview_url = facetview_url[:-1] + return '%s/?base64=%s' % (facetview_url, b64) + + +if __name__ == "__main__": + settings_file = os.path.normpath( + os.path.join( + os.path.dirname(os.path.realpath(__file__)), + 'settings.json') + ) + settings = json.load(open(settings_file)) + + objectid = sys.argv[1] + url = sys.argv[2] + emails = sys.argv[3] + rule_name = sys.argv[4] + component = sys.argv[5] + + if component=="mozart" or component=="figaro": + es_url = app.conf["JOBS_ES_URL"] + query_idx = app.conf["STATUS_ALIAS"] + facetview_url = app.conf["MOZART_URL"] + elif component=="tosca": + es_url = app.conf["GRQ_ES_URL"] + query_idx = app.conf["DATASET_ALIAS"] + facetview_url = app.conf["TOSCA_URL"] + + cc_recipients = [i.strip() for i in emails.split(',')] + bcc_recipients = [] + subject = "[monitor] (notify_by_email:%s) %s" % (rule_name, objectid) + body = "Product with id %s was ingested." % objectid + attachments = None + src = get_source(es_url, query_idx, objectid) + if src is not None: + # attach metadata json + body += "\n\n%s" % get_metadata_snippet(src, settings['SNIPPET_CFG']) + body += "\n\nThe entire metadata json for this product has been attached for your convenience.\n\n" + attachments = { 'metadata.json': json.dumps(src, indent=2) } + + # attach browse images + if len(src['browse_urls']) > 0: + browse_url = src['browse_urls'][0] + if len(src['images']) > 0: + body += "Browse images have been attached as well.\n\n" + for i in src['images']: + small_img = i['small_img'] + small_img_url = os.path.join(browse_url, small_img) + r = requests.get(small_img_url) + if r.status_code != 200: continue + attachments[small_img] = r.content + else: body += "\n\n" + body += "You may access the product here:\n\n%s" % url + facet_url = get_facetview_link(facetview_url, objectid, None if src is None else src.get('system_version', None)) + if facet_url is not None: + body += "\n\nYou may view this product in FacetView here:\n\n%s" % facet_url + body += "\n\nNOTE: You may have to cut and paste the FacetView link into your " + body += "browser's address bar to prevent your email client from escaping the curly brackets." + send_email("%s@%s" % (getpass.getuser(), get_hostname()), cc_recipients, + bcc_recipients, subject, body, attachments=attachments) diff --git a/notify_by_email.sh b/notify_by_email.sh new file mode 100755 index 0000000..b0d77ec --- /dev/null +++ b/notify_by_email.sh @@ -0,0 +1,37 @@ +#!/bin/bash + +source $HOME/verdi/bin/activate + +BASE_PATH=$(dirname "${BASH_SOURCE}") + +# check args +if [ "$#" -eq 5 ]; then + id=$1 + url=$2 + emails=$3 + rule_name=$4 + component=$5 +else + echo "Invalid number or arguments ($#) $*" 1>&2 + exit 1 +fi + + + +# send email +echo "##########################################" 1>&2 +echo -n "Sending email: " 1>&2 +date 1>&2 +python $BASE_PATH/notify_by_email.py "$id" "$url" "$emails" "$rule_name" "$component" > notify_by_email.log 2>&1 +STATUS=$? + +echo -n "Finished sending email: " 1>&2 +date 1>&2 +if [ $STATUS -ne 0 ]; then + echo "Failed to send email." 1>&2 + cat notify_by_email.log 1>&2 + echo "{}" + exit $STATUS +fi + +exit 0 diff --git a/purge.py b/purge.py new file mode 100644 index 0000000..743311a --- /dev/null +++ b/purge.py @@ -0,0 +1,117 @@ +#!/bin/env python +import json +import logging +import sys +import hysds_commons.request_utils +import hysds_commons.metadata_rest_utils +import osaka.main +from hysds.celery import app + +#TODO: Setup logger for this job here. Should log to STDOUT or STDERR as this is a job +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger("hysds") + +def purge_products(query,component,operation): + ''' + Iterator used to iterate across a query result and submit jobs for every hit + @param es_url - ElasticSearch URL to hit with query + @param es_index - Index in ElasticSearch to hit with query (usually an alias) + @param username - name of the user owning this submission + @param query - query to post to ElasticSearch and whose result will be iterated, JSON sting enc + @param kwargs - key-word args to match to HySDS IO + ''' + logger.debug("Doing %s for %s with query: %s",operation,component,query) + + if component=="mozart" or component=="figaro": + es_url = app.conf["JOBS_ES_URL"] + es_index = app.conf["STATUS_ALIAS"] + facetview_url = app.conf["MOZART_URL"] + elif component=="tosca": + es_url = app.conf["GRQ_ES_URL"] + es_index = app.conf["DATASET_ALIAS"] + facetview_url = app.conf["GRQ_URL"] + + #Querying for products + start_url = "{0}/{1}/_search".format(es_url,es_index) + scroll_url = "{0}/_search".format(es_url,es_index) + + results = hysds_commons.request_utils.post_scrolled_json_responses(start_url,scroll_url,data=json.dumps(query),logger=logger) + print results + + if component=='tosca': + for result in results: + es_type = result["_type"] + ident = result["_id"] + index = result["_index"] + #find the Best URL first + best = None + for url in result["_source"]["urls"]: + if best is None or not url.startswith("http"): + best = url + + #making osaka call to delete product + print 'paramater being passed to osaka.main.rmall: ',best + osaka.main.rmall(best) + #removing the metadata + hysds_commons.metadata_rest_utils.remove_metadata(es_url,index,es_type,ident,logger) + + else: + if operation=='purge': + purge = True + else: + purge = False + # purge job from index + + for result in results: + uuid = result["_source"]['uuid'] + payload_id = result["_source"]['payload_id'] + index = result["_index"] + es_type = result['_type'] + #Always grab latest state (not state from query result) + task = app.AsyncResult(uuid) + state = task.state + #Active states may only revoke + logger.info("Job state: %s\n",state) + if state in ["RETRY","STARTED"] or (state == "PENDING" and not purge): + if not purge: + logger.info('Revoking %s\n',uuid) + app.control.revoke(uuid,terminate=True) + else: + logger.info( 'Cannot remove active job %s\n',uuid) + continue + elif not purge: + logger.info( 'Cannot stop inactive job: %s\n',uuid) + continue + #Saftey net to revoke job if in PENDING state + if state == "PENDING": + logger.info( 'Revoking %s\n',uuid) + app.control.revoke(uuid,terminate=True) + + # Both associated task and job from ES + logger.info( 'Removing ES for %s:%s',es_type,payload_id) + r = hysds_commons.metadata_rest_utils.remove_metadata(es_url,index,es_type,payload_id,logger) + #r.raise_for_status() #not req + #res = r.json() #not req + logger.info('done.\n') + logger.info('Finished\n') + + +if __name__ == "__main__": + ''' + Main program of purge_products + ''' + #encoding to a JSON object + #decoded_string = sys.argv[1].decode('string_escape') + #dec = decoded_string.replace('u""','"') + #decoded_inp = dec.replace('""','"') + decoded_inp = sys.argv[1] + print decoded_inp + if decoded_inp.startswith('{"query"') or decoded_inp.startswith("{u'query'") or decoded_inp.startswith("{'query'"): + query_obj = json.loads(decoded_inp) + else: + query_obj["query"]=json.loads(decoded_inp) + + component = sys.argv[2] + operation = sys.argv[3] + purge_products(query_obj,component,operation) + diff --git a/purge.sh b/purge.sh new file mode 100755 index 0000000..39475d4 --- /dev/null +++ b/purge.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +source $HOME/verdi/bin/activate + +BASE_PATH=$(dirname "${BASH_SOURCE}") + +# check args +if [ "$#" -eq 3 ]; then + query=$1 + component=$2 + operation=$3 +else + echo "Invalid number or arguments ($#) $*" 1>&2 + exit 1 +fi + +# purge products +echo "##########################################" 1>&2 +echo -n "Purge/Stop/Revoke products: " 1>&2 +date 1>&2 +python $BASE_PATH/purge.py "$query" "$component" "$operation" > purge.log 2>&1 +STATUS=$? +echo -n "Finished purging/revoking: " 1>&2 +date 1>&2 +if [ $STATUS -ne 0 ]; then + echo "Failed to purge/revoke." 1>&2 + cat purge.log 1>&2 + echo "{}" + exit $STATUS +fi + +exit 0 diff --git a/retry.py b/retry.py new file mode 100644 index 0000000..bda30de --- /dev/null +++ b/retry.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python +import os, sys, json, requests, time, traceback +from random import randint, uniform +from datetime import datetime +from celery import uuid +from hysds.celery import app +from hysds.orchestrator import run_job +from hysds.log_utils import log_job_status + +def query_ES(job_id): + #get the ES_URL + es_url = app.conf["JOBS_ES_URL"] + index = app.conf["STATUS_ALIAS"] + query_json = {"query":{"bool": {"must": [{"term": {"job.job_info.id": "job_id"}}]}}} + query_json["query"]["bool"]["must"][0]["term"]["job.job_info.id"] = job_id + r = requests.post('%s/%s/_search?' % (es_url, index), json.dumps(query_json)) + return r + + +def rand_sleep(sleep_min=0.1, sleep_max=1): time.sleep(uniform(sleep_min, sleep_max)) + + +def resubmit_jobs(): + # random sleep to prevent from getting ElasticSearch errors: + # 429 Client Error: Too Many Requests + time.sleep(randint(1,5)) + # can call submit_job + + #iterate through job ids and query to get the job json + with open('_context.json') as f: + ctx = json.load(f) + + retry_count_max = ctx['retry_count_max'] + for job_id in ctx['retry_job_id']: + try: + ## get job json for ES + rand_sleep() + response = query_ES(job_id) + if response.status_code != 200: + print("Failed to query ES. Got status code %d:\n%s" % (response.status_code, json.dumps)(response.json(), indent = 2)) + response.raise_for_status() + resp_json = response.json() + + #check retry_remaining_count + job_json = resp_json["hits"]["hits"][0]["_source"]["job"] + + if 'retry_count' in job_json: + if job_json['retry_count'] < retry_count_max : + job_json['retry_count'] = int(job_json['retry_count']) + 1 + elif job_json['retry_count'] == retry_count_max : + print "Job reached retry_count_max limit. Cannot retry again." + continue + else: + job_json['retry_count'] = 1 + + job_json["job_info"]["dedup"] = False + # clean up job execution info + for i in ( 'duration', 'execute_node', 'facts', 'job_dir', 'job_url', + 'metrics', 'pid', 'public_ip', 'status', 'stderr', + 'stdout', 'time_end', 'time_queued', 'time_start' ): + if i in job_json.get('job_info', {}): del job_json['job_info'][i] + + # set queue time + job_json['job_info']['time_queued'] = datetime.utcnow().isoformat() + 'Z' + + # use priority from context + priority = ctx['job_priority'] + + # reset priority + job_json['priority'] = priority + + # revoke original job + rand_sleep() + try: + app.control.revoke(job_json['job_id'], terminate=True) + print "revoked original job: %s" % job_json['job_id'] + except Exception, e: + print "Got error issuing revoke on job %s: %s" % (job_json['job_id'], traceback.format_exc()) + print "Continuing." + + # generate celery task id + job_json['task_id'] = uuid() + + # delete old job status + rand_sleep() + try: + r = requests.delete("%s/%s/job/_query?q=_id:%s" % (es_url, query_idx, job_json['job_id'])) + r.raise_for_status() + print "deleted original job status: %s" % job_json['job_id'] + except Exception, e: + print "Got error deleting job status %s: %s" % (ctx_json['retry_job_id'], traceback.format_exc()) + print "Continuing." + + # log queued status + rand_sleep() + job_status_json = { 'uuid': job_json['task_id'], + 'job_id': job_json['job_id'], + 'payload_id': job_json['job_info']['job_payload']['payload_task_id'], + 'status': 'job-queued', + 'job': job_json } + log_job_status(job_status_json) + + # submit job + queue = job_json['job_info']['job_queue'] + res = run_job.apply_async((job_json,), queue=queue, + time_limit=None, + soft_time_limit=None, + priority=priority, + task_id=job_json['task_id']) + except Exception as ex: + print >> sys.stderr, "[ERROR] Exception occured {0}:{1} {2}".format(type(ex),ex,traceback.format_exc()) + +if __name__ == "__main__": + + es_url = app.conf['JOBS_ES_URL'] + query_idx = app.conf['STATUS_ALIAS'] + facetview_url = app.conf['MOZART_URL'] + + input_type = sys.argv[1] + + #set dedup to false + with open('_job.json') as f: + job = json.load(f) + + job["job_info"]["dedup"] = False + if input_type != "worker": + resubmit_jobs() + else: + print "Cannot retry a worker." + diff --git a/retry.sh b/retry.sh new file mode 100755 index 0000000..a564a16 --- /dev/null +++ b/retry.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +source $HOME/verdi/bin/activate + +BASE_PATH=$(dirname "${BASH_SOURCE}") + +if [ "$#" -eq 1 ]; then + types=$1 +else + echo "Invalid number or arguments ($#) $*" 1>&2 + exit 1 +fi + +# retry job +echo "##########################################" 1>&2 +echo -n "Retrying job: " 1>&2 +date 1>&2 +python $BASE_PATH/retry.py "$types" > retry.log 2>&1 +STATUS=$? + +echo -n "Finished retrying job: " 1>&2 +date 1>&2 +if [ $STATUS -ne 0 ]; then + echo "Failed to retry job." 1>&2 + cat retry.log 1>&2 + echo "{}" + exit $STATUS +fi + +exit 0 diff --git a/settings.json b/settings.json new file mode 100644 index 0000000..09ed265 --- /dev/null +++ b/settings.json @@ -0,0 +1,11 @@ +{ + "SNIPPET_CFG": [ + ["metadata.tags", "tags"], + ["metadata.user_tags", "user tags"], + ["metadata.version", "version"], + ["system_version", "system version"], + ["starttime", "start time"], + ["endtime", "end time"], + ["continent", "continent"] + ] +} diff --git a/wget.py b/wget.py new file mode 100644 index 0000000..5aea44a --- /dev/null +++ b/wget.py @@ -0,0 +1,147 @@ +import json, requests, types, re, getpass, sys, os +from pprint import pformat +import logging +import tarfile +import notify_by_email +from hysds.celery import app +import boto3 +from urlparse import urlparse + +#TODO: Setup logger for this job here. Should log to STDOUT or STDERR as this is a job +logging.basicConfig(level=logging.DEBUG) +logger = logging.getLogger("hysds") + +def wget_script(dataset=None): + """Return wget script.""" + + # query + es_url = app.conf["GRQ_ES_URL"] + index = app.conf["DATASET_ALIAS"] + #facetview_url = app.conf["GRQ_URL"] + print('%s/%s/_search?search_type=scan&scroll=10m&size=100' % (es_url, index)) + logging.debug('%s/%s/_search?search_type=scan&scroll=10m&size=100' % (es_url, index)) + print json.dumps(dataset) + logging.debug(json.dumps(dataset)) + + r = requests.post('%s/%s/_search?search_type=scan&scroll=10m&size=100' % (es_url, index), json.dumps(dataset)) + if r.status_code != 200: + print("Failed to query ES. Got status code %d:\n%s" %(r.status_code, json.dumps(r.json(), indent=2))) + logger.debug("Failed to query ES. Got status code %d:\n%s" % + (r.status_code, json.dumps(r.json(), indent=2))) + r.raise_for_status() + logger.debug("result: %s" % pformat(r.json())) + + scan_result = r.json() + count = scan_result['hits']['total'] + scroll_id = scan_result['_scroll_id'] + + # stream output a page at a time for better performance and lower memory footprint + def stream_wget(scroll_id): + #formatted_source = format_source(source) + yield '#!/bin/bash\n#\n' + \ + '# query:\n#\n' + \ + '%s#\n#\n#' % json.dumps(dataset) + \ + '# total datasets matched: %d\n\n' % count + \ + 'read -s -p "JPL Username: " user\n' + \ + 'echo ""\n' + \ + 'read -s -p "JPL Password: " password\n' + \ + 'echo ""\n' + wget_cmd = 'wget --no-check-certificate --mirror -np -nH --reject "index.html*"' + wget_cmd_password = wget_cmd + ' --user=$user --password=$password' + + while True: + r = requests.post('%s/_search/scroll?scroll=10m' % es_url, data=scroll_id) + res = r.json() + logger.debug("res: %s" % pformat(res)) + scroll_id = res['_scroll_id'] + if len(res['hits']['hits']) == 0: break + # Elastic Search seems like it's returning duplicate urls. Remove duplicates + unique_urls=[] + for hit in res['hits']['hits']: + [unique_urls.append(url) for url in hit['_source']['urls'] if url not in unique_urls and url.startswith("http")] + + for url in unique_urls: + logging.debug("urls in unique urls: %s",url) + if '.s3-website' in url or 'amazonaws.com' in url: + parsed_url = urlparse(url) + cut_dirs = len(parsed_url.path[1:].split('/')) - 1 + else: + if 's1a_ifg' in url: + cut_dirs = 3 + else: + cut_dirs = 6 + if '.s3-website' in url or 'amazonaws.com' in url: + files = get_s3_files(url) + for file in files: + yield 'echo "downloading %s"\n' % file + if 's1a_ifg' in url: + yield "%s --cut-dirs=%d %s\n" % (wget_cmd, cut_dirs, file) + else: + yield "%s --cut-dirs=%d %s\n" % (wget_cmd, cut_dirs, file) + if 'aria2-dav.jpl.nasa.gov' in url: + yield 'echo "downloading %s"\n' % url + yield "%s --cut-dirs=%d %s/\n" % (wget_cmd_password, (cut_dirs+1), url) + if 'aria-csk-dav.jpl.nasa.gov' in url: + yield 'echo "downloading %s"\n' % url + yield "%s --cut-dirs=%d %s/\n" % (wget_cmd_password, (cut_dirs+1), url) + if 'aria-dst-dav.jpl.nasa.gov' in url: + yield 'echo "downloading %s"\n' % url + yield "%s --cut-dirs=%d %s/\n" % (wget_cmd, cut_dirs, url) + break + + # malarout: interate over each line of stream_wget response, and write to a file which is later attached to the email. + with open('wget_script.sh','w') as f: + for i in stream_wget(scroll_id): + f.write(i) + + # for gzip compressed use file extension .tar.gz and modifier "w:gz" + os.rename('wget_script.sh','wget_script.bash') + tar = tarfile.open("wget.tar.gz", "w:gz") + tar.add('wget_script.bash') + tar.close() + + +def get_s3_files(url): + files = [] + print("Url in the get_s3_files function: %s",url) + parsed_url = urlparse(url) + bucket = parsed_url.hostname.split('.', 1)[0] + client = boto3.client('s3') + results = client.list_objects(Bucket=bucket, Delimiter='/', Prefix=parsed_url.path[1:] + '/') + + if results.get('Contents'): + for result in results.get('Contents'): + files.append(parsed_url.scheme + "://" + parsed_url.hostname + '/' + result.get('Key')) + + if results.get('CommonPrefixes'): + for result in results.get('CommonPrefixes'): + # Prefix values have a trailing '/'. Let's remove it to be consistent with our dir urls + folder = parsed_url.scheme + "://" + parsed_url.hostname + '/' + result.get('Prefix')[:-1] + files.extend(get_s3_files(folder)) + return files + +if __name__ == "__main__": + ''' + Main program of wget_script + ''' + #encoding to a JSON object + query = {} + query = json.loads(sys.argv[1]) + emails = sys.argv[2] + rule_name = sys.argv[3] + + # getting the script + wget_script(query) + # now email the query + attachments = None + cc_recipients = [i.strip() for i in emails.split(',')] + bcc_recipients = [] + subject = "[monitor] (wget_script:%s)" % (rule_name) + body = "Product was ingested from query: %s" % query + body += "\n\nYou can use this wget script attached to download products.\n" + body += "Please rename wget_script.bash to wget_script.sh before running it." + if os.path.isfile('wget.tar.gz'): + wget_content = open('wget.tar.gz','r').read() + attachments = { 'wget.tar.gz':wget_content} + notify_by_email.send_email(getpass.getuser(), cc_recipients, bcc_recipients, subject, body, attachments=attachments) + diff --git a/wget.sh b/wget.sh new file mode 100755 index 0000000..e2dbc99 --- /dev/null +++ b/wget.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +source $HOME/verdi/bin/activate + +BASE_PATH=$(dirname "${BASH_SOURCE}") + +# check args +if [ "$#" -eq 3 ]; then + query=$1 + emails=$2 + rule_name=$3 + +else + echo "Invalid number or arguments ($#) $*" 1>&2 + exit 1 +fi + +# generate wget scripy and email +echo "##########################################" 1>&2 +echo -n "Generating wget script: " 1>&2 +date 1>&2 +python $BASE_PATH/wget.py "$query" "$emails" "$rule_name" > wget.log 2>&1 +STATUS=$? +echo -n "Finished with wget script: " 1>&2 +date 1>&2 +if [ $STATUS -ne 0 ]; then + echo "Failed to send wget script." 1>&2 + cat wget.log 1>&2 + exit $STATUS +fi