diff --git a/LICENSE b/LICENSE index 634e95e75..ed4390463 100644 --- a/LICENSE +++ b/LICENSE @@ -242,7 +242,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ------------- -Various Kubeflow Pipelines component examples (elyra/pipeline/resources/kfp) +Various Kubeflow Pipelines component examples (etc/config/components/kfp) Copyright 2021 Google LLC @@ -259,3 +259,29 @@ See the License for the specific language governing permissions and limitations under the License. ------------- + +Various Apache Airflow component examples (etc/config/components/airflow) +Originated from: https://github.com/apache/airflow/tree/1.10.15/airflow/operators +and https://github.com/apache/airflow/tree/1.10.15/airflow/contrib/operators + +Copyright 2016-2021 The Apache Software Foundation + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +------------- \ No newline at end of file diff --git a/docs/source/user_guide/pipeline-components.md b/docs/source/user_guide/pipeline-components.md index 2db097398..f7724dfdc 100644 --- a/docs/source/user_guide/pipeline-components.md +++ b/docs/source/user_guide/pipeline-components.md @@ -114,9 +114,9 @@ $ elyra-metadata list component-registries Available metadata instances for component-registries (includes invalid): -Schema Instance Resource ------- -------- -------- -component-registry elyra-airflow-filesystem-preconfig .../jupyter/metadata/component-registries/elyra-airflow-filesystem-preconfig.json +Schema Instance Resource +------ -------- -------- +component-registry elyra-airflow-directory-preconfig .../jupyter/metadata/component-registries/elyra-airflow-directory-preconfig.json ``` #### Adding components to the registry diff --git a/elyra/tests/cli/resources/kfp_3_node_custom.pipeline b/elyra/tests/cli/resources/kfp_3_node_custom.pipeline index 1ade751cb..0ae4c3dae 100644 --- a/elyra/tests/cli/resources/kfp_3_node_custom.pipeline +++ b/elyra/tests/cli/resources/kfp_3_node_custom.pipeline @@ -76,7 +76,7 @@ "output_data": "" }, "label": "", - "component_source": "https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/web/Download/component.yaml", + "component_source": "{'catalog_type': 'url-catalog', 'component_ref': {'url': 'https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/basics/Calculate_hash/component.yaml'}}", "ui_data": { "label": "Download data", "x_pos": 30, diff --git a/elyra/tests/pipeline/resources/validation_pipelines/kf_inputpath_parameter.pipeline b/elyra/tests/pipeline/resources/validation_pipelines/kf_inputpath_parameter.pipeline index e86bb47ef..7041640c2 100644 --- a/elyra/tests/pipeline/resources/validation_pipelines/kf_inputpath_parameter.pipeline +++ b/elyra/tests/pipeline/resources/validation_pipelines/kf_inputpath_parameter.pipeline @@ -19,7 +19,7 @@ "output_data": "" }, "label": "", - "component_source": "https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/web/Download/component.yaml", + "component_source": "{'catalog_type': 'url-catalog', 'component_ref': {'url': 'https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/web/Download/component.yaml'}}", "ui_data": { "label": "Download data", "image": "data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%20276.93%20274.55%22%3E%3Cg%20id%3D%22Layer_2%22%20data-name%3D%22Layer%202%22%3E%3Cg%20id%3D%22Layer_1-2%22%20data-name%3D%22Layer%201%22%3E%3Cpath%20d%3D%22M95.9%2C62.15%2C100%2C164.25l73.75-94.12a6.79%2C6.79%2C0%2C0%2C1%2C9.6-1.11l46%2C36.92-15-65.61Z%22%20fill%3D%22%234279f4%22%2F%3E%3Cpolygon%20points%3D%22102.55%20182.98%20167.97%20182.98%20127.8%20150.75%20102.55%20182.98%22%20fill%3D%22%230028aa%22%2F%3E%3Cpolygon%20points%3D%22180.18%2083.92%20136.18%20140.06%20183.06%20177.67%20227.53%20121.91%20180.18%2083.92%22%20fill%3D%22%23014bd1%22%2F%3E%3Cpolygon%20points%3D%2283.56%2052.3%2083.57%2052.29%20122.26%203.77%2059.87%2033.82%2044.46%20101.33%2083.56%2052.3%22%20fill%3D%22%23bedcff%22%2F%3E%3Cpolygon%20points%3D%2245.32%20122.05%2086.76%20174.01%2082.81%2075.03%2045.32%20122.05%22%20fill%3D%22%236ca1ff%22%2F%3E%3Cpolygon%20points%3D%22202.31%2028.73%20142.65%200%20105.52%2046.56%20202.31%2028.73%22%20fill%3D%22%23a1c3ff%22%2F%3E%3Cpath%20d%3D%22M1.6%2C272V227.22H7.34v23.41l20.48-23.41h6.4l-17.39%2C19.7%2C19%2C25.07H29.1l-15.92-20.8-5.84%2C6.65V272Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M41.62%2C262.21V240h5.43v22.39a4.67%2C4.67%2C0%2C0%2C0%2C2.35%2C4.19%2C11%2C11%2C0%2C0%2C0%2C11%2C0%2C4.69%2C4.69%2C0%2C0%2C0%2C2.33-4.19V240h5.43v22.19a9.08%2C9.08%2C0%2C0%2C1-4.1%2C7.87%2C16.2%2C16.2%2C0%2C0%2C1-18.37%2C0A9.07%2C9.07%2C0%2C0%2C1%2C41.62%2C262.21Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M77.46%2C272V224h5.43v16.81a29.29%2C29.29%2C0%2C0%2C1%2C9.32-1.73%2C13.1%2C13.1%2C0%2C0%2C1%2C6.2%2C1.41%2C10.71%2C10.71%2C0%2C0%2C1%2C4.18%2C3.74%2C18.07%2C18.07%2C0%2C0%2C1%2C2.23%2C5.06%2C21.26%2C21.26%2C0%2C0%2C1%2C.73%2C5.58q0%2C8.43-4.38%2C12.79T87.35%2C272Zm5.43-4.87h4.55q6.77%2C0%2C9.72-2.95t3-9.51a14.21%2C14.21%2C0%2C0%2C0-2-7.52%2C6.55%2C6.55%2C0%2C0%2C0-6-3.22%2C24.73%2C24.73%2C0%2C0%2C0-9.25%2C1.54Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M112.36%2C255.94q0-7.71%2C4.09-12.3a13.75%2C13.75%2C0%2C0%2C1%2C10.8-4.59q13.35%2C0%2C13.36%2C18.86H117.79a12.3%2C12.3%2C0%2C0%2C0%2C2.9%2C7.07q2.59%2C3.11%2C7.9%2C3.1a24.92%2C24.92%2C0%2C0%2C0%2C10.55-2v5a27.74%2C27.74%2C0%2C0%2C1-9.86%2C1.87%2C19.83%2C19.83%2C0%2C0%2C1-7.7-1.37%2C13.31%2C13.31%2C0%2C0%2C1-5.28-3.76%2C16.21%2C16.21%2C0%2C0%2C1-3-5.38A20.84%2C20.84%2C0%2C0%2C1%2C112.36%2C255.94Zm5.62-2.12h17.26a14.91%2C14.91%2C0%2C0%2C0-2.37-7.12%2C6.44%2C6.44%2C0%2C0%2C0-5.62-2.78%2C8.2%2C8.2%2C0%2C0%2C0-6.21%2C2.72A12.07%2C12.07%2C0%2C0%2C0%2C118%2C253.82Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M147.32%2C244.89V240h5v-7.59a8.14%2C8.14%2C0%2C0%2C1%2C2.31-6.05%2C7.79%2C7.79%2C0%2C0%2C1%2C5.69-2.28h7.86V229h-5c-2.21%2C0-3.67.45-4.37%2C1.34s-1.06%2C2.55-1.06%2C5V240h8.46v4.87h-8.46V272h-5.44v-27.1Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M175.26%2C272V224h5.43v48Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M194.41%2C268.05a17.86%2C17.86%2C0%2C1%2C1%2C12.33%2C4.9A16.57%2C16.57%2C0%2C0%2C1%2C194.41%2C268.05Zm3.84-20.65a13.16%2C13.16%2C0%2C0%2C0%2C0%2C17.2%2C12.07%2C12.07%2C0%2C0%2C0%2C17%2C0%2C13.09%2C13.09%2C0%2C0%2C0%2C0-17.2%2C12.07%2C12.07%2C0%2C0%2C0-17%2C0Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M228.45%2C240h5.75l7.3%2C25.32L248.93%2C240h5.36l7.34%2C25.34L269%2C240h5.74L264.7%2C272h-6.12l-6.83-24.58L245%2C272h-6.47Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3C%2Fg%3E%3C%2Fg%3E%3C%2Fsvg%3E", diff --git a/elyra/tests/pipeline/resources/validation_pipelines/kf_invalid_inputpath_missing_connection.pipeline b/elyra/tests/pipeline/resources/validation_pipelines/kf_invalid_inputpath_missing_connection.pipeline index 75c993cc2..ee74e7203 100644 --- a/elyra/tests/pipeline/resources/validation_pipelines/kf_invalid_inputpath_missing_connection.pipeline +++ b/elyra/tests/pipeline/resources/validation_pipelines/kf_invalid_inputpath_missing_connection.pipeline @@ -79,7 +79,7 @@ "url": "https://raw.githubusercontent.com/elyra-ai/examples/master/pipelines/kubeflow_pipelines_component_examples/filter_file_component/data.csv" }, "label": "", - "component_source": "https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/web/Download/component.yaml", + "component_source": "{'catalog_type': 'url-catalog', 'component_ref': {'url': 'https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/web/Download/component.yaml'}}", "ui_data": { "label": "Download data", "image": "data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%20276.93%20274.55%22%3E%3Cg%20id%3D%22Layer_2%22%20data-name%3D%22Layer%202%22%3E%3Cg%20id%3D%22Layer_1-2%22%20data-name%3D%22Layer%201%22%3E%3Cpath%20d%3D%22M95.9%2C62.15%2C100%2C164.25l73.75-94.12a6.79%2C6.79%2C0%2C0%2C1%2C9.6-1.11l46%2C36.92-15-65.61Z%22%20fill%3D%22%234279f4%22%2F%3E%3Cpolygon%20points%3D%22102.55%20182.98%20167.97%20182.98%20127.8%20150.75%20102.55%20182.98%22%20fill%3D%22%230028aa%22%2F%3E%3Cpolygon%20points%3D%22180.18%2083.92%20136.18%20140.06%20183.06%20177.67%20227.53%20121.91%20180.18%2083.92%22%20fill%3D%22%23014bd1%22%2F%3E%3Cpolygon%20points%3D%2283.56%2052.3%2083.57%2052.29%20122.26%203.77%2059.87%2033.82%2044.46%20101.33%2083.56%2052.3%22%20fill%3D%22%23bedcff%22%2F%3E%3Cpolygon%20points%3D%2245.32%20122.05%2086.76%20174.01%2082.81%2075.03%2045.32%20122.05%22%20fill%3D%22%236ca1ff%22%2F%3E%3Cpolygon%20points%3D%22202.31%2028.73%20142.65%200%20105.52%2046.56%20202.31%2028.73%22%20fill%3D%22%23a1c3ff%22%2F%3E%3Cpath%20d%3D%22M1.6%2C272V227.22H7.34v23.41l20.48-23.41h6.4l-17.39%2C19.7%2C19%2C25.07H29.1l-15.92-20.8-5.84%2C6.65V272Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M41.62%2C262.21V240h5.43v22.39a4.67%2C4.67%2C0%2C0%2C0%2C2.35%2C4.19%2C11%2C11%2C0%2C0%2C0%2C11%2C0%2C4.69%2C4.69%2C0%2C0%2C0%2C2.33-4.19V240h5.43v22.19a9.08%2C9.08%2C0%2C0%2C1-4.1%2C7.87%2C16.2%2C16.2%2C0%2C0%2C1-18.37%2C0A9.07%2C9.07%2C0%2C0%2C1%2C41.62%2C262.21Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M77.46%2C272V224h5.43v16.81a29.29%2C29.29%2C0%2C0%2C1%2C9.32-1.73%2C13.1%2C13.1%2C0%2C0%2C1%2C6.2%2C1.41%2C10.71%2C10.71%2C0%2C0%2C1%2C4.18%2C3.74%2C18.07%2C18.07%2C0%2C0%2C1%2C2.23%2C5.06%2C21.26%2C21.26%2C0%2C0%2C1%2C.73%2C5.58q0%2C8.43-4.38%2C12.79T87.35%2C272Zm5.43-4.87h4.55q6.77%2C0%2C9.72-2.95t3-9.51a14.21%2C14.21%2C0%2C0%2C0-2-7.52%2C6.55%2C6.55%2C0%2C0%2C0-6-3.22%2C24.73%2C24.73%2C0%2C0%2C0-9.25%2C1.54Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M112.36%2C255.94q0-7.71%2C4.09-12.3a13.75%2C13.75%2C0%2C0%2C1%2C10.8-4.59q13.35%2C0%2C13.36%2C18.86H117.79a12.3%2C12.3%2C0%2C0%2C0%2C2.9%2C7.07q2.59%2C3.11%2C7.9%2C3.1a24.92%2C24.92%2C0%2C0%2C0%2C10.55-2v5a27.74%2C27.74%2C0%2C0%2C1-9.86%2C1.87%2C19.83%2C19.83%2C0%2C0%2C1-7.7-1.37%2C13.31%2C13.31%2C0%2C0%2C1-5.28-3.76%2C16.21%2C16.21%2C0%2C0%2C1-3-5.38A20.84%2C20.84%2C0%2C0%2C1%2C112.36%2C255.94Zm5.62-2.12h17.26a14.91%2C14.91%2C0%2C0%2C0-2.37-7.12%2C6.44%2C6.44%2C0%2C0%2C0-5.62-2.78%2C8.2%2C8.2%2C0%2C0%2C0-6.21%2C2.72A12.07%2C12.07%2C0%2C0%2C0%2C118%2C253.82Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M147.32%2C244.89V240h5v-7.59a8.14%2C8.14%2C0%2C0%2C1%2C2.31-6.05%2C7.79%2C7.79%2C0%2C0%2C1%2C5.69-2.28h7.86V229h-5c-2.21%2C0-3.67.45-4.37%2C1.34s-1.06%2C2.55-1.06%2C5V240h8.46v4.87h-8.46V272h-5.44v-27.1Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M175.26%2C272V224h5.43v48Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M194.41%2C268.05a17.86%2C17.86%2C0%2C1%2C1%2C12.33%2C4.9A16.57%2C16.57%2C0%2C0%2C1%2C194.41%2C268.05Zm3.84-20.65a13.16%2C13.16%2C0%2C0%2C0%2C0%2C17.2%2C12.07%2C12.07%2C0%2C0%2C0%2C17%2C0%2C13.09%2C13.09%2C0%2C0%2C0%2C0-17.2%2C12.07%2C12.07%2C0%2C0%2C0-17%2C0Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M228.45%2C240h5.75l7.3%2C25.32L248.93%2C240h5.36l7.34%2C25.34L269%2C240h5.74L264.7%2C272h-6.12l-6.83-24.58L245%2C272h-6.47Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3C%2Fg%3E%3C%2Fg%3E%3C%2Fsvg%3E", diff --git a/elyra/tests/pipeline/resources/validation_pipelines/kf_invalid_inputpath_parameter.pipeline b/elyra/tests/pipeline/resources/validation_pipelines/kf_invalid_inputpath_parameter.pipeline index af12b22c7..7c1f6a5a6 100644 --- a/elyra/tests/pipeline/resources/validation_pipelines/kf_invalid_inputpath_parameter.pipeline +++ b/elyra/tests/pipeline/resources/validation_pipelines/kf_invalid_inputpath_parameter.pipeline @@ -19,7 +19,7 @@ "output_data": "" }, "label": "", - "component_source": "https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/web/Download/component.yaml", + "component_source": "{'catalog_type': 'url-catalog', 'component_ref': {'url': 'https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/web/Download/component.yaml'}}", "ui_data": { "label": "Download data", "image": "data:image/svg+xml;utf8,%3Csvg%20xmlns%3D%22http%3A%2F%2Fwww.w3.org%2F2000%2Fsvg%22%20viewBox%3D%220%200%20276.93%20274.55%22%3E%3Cg%20id%3D%22Layer_2%22%20data-name%3D%22Layer%202%22%3E%3Cg%20id%3D%22Layer_1-2%22%20data-name%3D%22Layer%201%22%3E%3Cpath%20d%3D%22M95.9%2C62.15%2C100%2C164.25l73.75-94.12a6.79%2C6.79%2C0%2C0%2C1%2C9.6-1.11l46%2C36.92-15-65.61Z%22%20fill%3D%22%234279f4%22%2F%3E%3Cpolygon%20points%3D%22102.55%20182.98%20167.97%20182.98%20127.8%20150.75%20102.55%20182.98%22%20fill%3D%22%230028aa%22%2F%3E%3Cpolygon%20points%3D%22180.18%2083.92%20136.18%20140.06%20183.06%20177.67%20227.53%20121.91%20180.18%2083.92%22%20fill%3D%22%23014bd1%22%2F%3E%3Cpolygon%20points%3D%2283.56%2052.3%2083.57%2052.29%20122.26%203.77%2059.87%2033.82%2044.46%20101.33%2083.56%2052.3%22%20fill%3D%22%23bedcff%22%2F%3E%3Cpolygon%20points%3D%2245.32%20122.05%2086.76%20174.01%2082.81%2075.03%2045.32%20122.05%22%20fill%3D%22%236ca1ff%22%2F%3E%3Cpolygon%20points%3D%22202.31%2028.73%20142.65%200%20105.52%2046.56%20202.31%2028.73%22%20fill%3D%22%23a1c3ff%22%2F%3E%3Cpath%20d%3D%22M1.6%2C272V227.22H7.34v23.41l20.48-23.41h6.4l-17.39%2C19.7%2C19%2C25.07H29.1l-15.92-20.8-5.84%2C6.65V272Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M41.62%2C262.21V240h5.43v22.39a4.67%2C4.67%2C0%2C0%2C0%2C2.35%2C4.19%2C11%2C11%2C0%2C0%2C0%2C11%2C0%2C4.69%2C4.69%2C0%2C0%2C0%2C2.33-4.19V240h5.43v22.19a9.08%2C9.08%2C0%2C0%2C1-4.1%2C7.87%2C16.2%2C16.2%2C0%2C0%2C1-18.37%2C0A9.07%2C9.07%2C0%2C0%2C1%2C41.62%2C262.21Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M77.46%2C272V224h5.43v16.81a29.29%2C29.29%2C0%2C0%2C1%2C9.32-1.73%2C13.1%2C13.1%2C0%2C0%2C1%2C6.2%2C1.41%2C10.71%2C10.71%2C0%2C0%2C1%2C4.18%2C3.74%2C18.07%2C18.07%2C0%2C0%2C1%2C2.23%2C5.06%2C21.26%2C21.26%2C0%2C0%2C1%2C.73%2C5.58q0%2C8.43-4.38%2C12.79T87.35%2C272Zm5.43-4.87h4.55q6.77%2C0%2C9.72-2.95t3-9.51a14.21%2C14.21%2C0%2C0%2C0-2-7.52%2C6.55%2C6.55%2C0%2C0%2C0-6-3.22%2C24.73%2C24.73%2C0%2C0%2C0-9.25%2C1.54Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M112.36%2C255.94q0-7.71%2C4.09-12.3a13.75%2C13.75%2C0%2C0%2C1%2C10.8-4.59q13.35%2C0%2C13.36%2C18.86H117.79a12.3%2C12.3%2C0%2C0%2C0%2C2.9%2C7.07q2.59%2C3.11%2C7.9%2C3.1a24.92%2C24.92%2C0%2C0%2C0%2C10.55-2v5a27.74%2C27.74%2C0%2C0%2C1-9.86%2C1.87%2C19.83%2C19.83%2C0%2C0%2C1-7.7-1.37%2C13.31%2C13.31%2C0%2C0%2C1-5.28-3.76%2C16.21%2C16.21%2C0%2C0%2C1-3-5.38A20.84%2C20.84%2C0%2C0%2C1%2C112.36%2C255.94Zm5.62-2.12h17.26a14.91%2C14.91%2C0%2C0%2C0-2.37-7.12%2C6.44%2C6.44%2C0%2C0%2C0-5.62-2.78%2C8.2%2C8.2%2C0%2C0%2C0-6.21%2C2.72A12.07%2C12.07%2C0%2C0%2C0%2C118%2C253.82Z%22%20fill%3D%22%234279f4%22%20stroke%3D%22%234279f4%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M147.32%2C244.89V240h5v-7.59a8.14%2C8.14%2C0%2C0%2C1%2C2.31-6.05%2C7.79%2C7.79%2C0%2C0%2C1%2C5.69-2.28h7.86V229h-5c-2.21%2C0-3.67.45-4.37%2C1.34s-1.06%2C2.55-1.06%2C5V240h8.46v4.87h-8.46V272h-5.44v-27.1Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M175.26%2C272V224h5.43v48Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M194.41%2C268.05a17.86%2C17.86%2C0%2C1%2C1%2C12.33%2C4.9A16.57%2C16.57%2C0%2C0%2C1%2C194.41%2C268.05Zm3.84-20.65a13.16%2C13.16%2C0%2C0%2C0%2C0%2C17.2%2C12.07%2C12.07%2C0%2C0%2C0%2C17%2C0%2C13.09%2C13.09%2C0%2C0%2C0%2C0-17.2%2C12.07%2C12.07%2C0%2C0%2C0-17%2C0Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3Cpath%20d%3D%22M228.45%2C240h5.75l7.3%2C25.32L248.93%2C240h5.36l7.34%2C25.34L269%2C240h5.74L264.7%2C272h-6.12l-6.83-24.58L245%2C272h-6.47Z%22%20fill%3D%22%230028aa%22%20stroke%3D%22%230028aa%22%20stroke-miterlimit%3D%2210%22%20stroke-width%3D%223.2%22%2F%3E%3C%2Fg%3E%3C%2Fg%3E%3C%2Fsvg%3E", diff --git a/etc/config/components/airflow/bash_operator.py b/etc/config/components/airflow/bash_operator.py new file mode 100644 index 000000000..e27e74656 --- /dev/null +++ b/etc/config/components/airflow/bash_operator.py @@ -0,0 +1,173 @@ +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import os +import signal +from subprocess import Popen, STDOUT, PIPE +from tempfile import gettempdir, NamedTemporaryFile + +from builtins import bytes + +from airflow.exceptions import AirflowException +from airflow.models import BaseOperator +from airflow.utils.decorators import apply_defaults +from airflow.utils.file import TemporaryDirectory +from airflow.utils.operator_helpers import context_to_airflow_vars + + +class BashOperator(BaseOperator): + r""" + Execute a Bash script, command or set of commands. + + .. seealso:: + For more information on how to use this operator, take a look at the guide: + :ref:`howto/operator:BashOperator` + + :param bash_command: The command, set of commands or reference to a + bash script (must be '.sh') to be executed. (templated) + :type bash_command: str + :param xcom_push: If xcom_push is True, the last line written to stdout + will also be pushed to an XCom when the bash command completes. + :type xcom_push: bool + :param env: If env is not None, it must be a mapping that defines the + environment variables for the new process; these are used instead + of inheriting the current process environment, which is the default + behavior. (templated) + :type env: dict + :param output_encoding: Output encoding of bash command + :type output_encoding: str + + .. warning:: + + Care should be taken with "user" input or when using Jinja templates in the + ``bash_command``, as this bash operator does not perform any escaping or + sanitization of the command. + + This applies mostly to using "dag_run" conf, as that can be submitted via + users in the Web UI. Most of the default template variables are not at + risk. + + For example, do **not** do this: + + .. code-block:: python + + bash_task = BashOperator( + task_id="bash_task", + bash_command='echo "Here is the message: \'{{ dag_run.conf["message"] if dag_run else "" }}\'"', + ) + + Instead, you should pass this via the ``env`` kwarg and use double-quotes + inside the bash_command, as below: + + .. code-block:: python + + bash_task = BashOperator( + task_id="bash_task", + bash_command='echo "here is the message: \'$message\'"', + env={'message': '{{ dag_run.conf["message"] if dag_run else "" }}'}, + ) + + """ + template_fields = ('bash_command', 'env') + template_ext = ('.sh', '.bash',) + ui_color = '#f0ede4' + + @apply_defaults + def __init__( + self, + bash_command, + xcom_push=False, + env=None, + output_encoding='utf-8', + *args, **kwargs): + + super(BashOperator, self).__init__(*args, **kwargs) + self.bash_command = bash_command + self.env = env + self.xcom_push_flag = xcom_push + self.output_encoding = output_encoding + self.sub_process = None + + def execute(self, context): + """ + Execute the bash command in a temporary directory + which will be cleaned afterwards + """ + self.log.info("Tmp dir root location: \n %s", gettempdir()) + + # Prepare env for child process. + env = self.env + if env is None: + env = os.environ.copy() + airflow_context_vars = context_to_airflow_vars(context, in_env_var_format=True) + self.log.debug('Exporting the following env vars:\n%s', + '\n'.join(["{}={}".format(k, v) + for k, v in airflow_context_vars.items()])) + env.update(airflow_context_vars) + + self.lineage_data = self.bash_command + + with TemporaryDirectory(prefix='airflowtmp') as tmp_dir: + with NamedTemporaryFile(dir=tmp_dir, prefix=self.task_id) as f: + + f.write(bytes(self.bash_command, 'utf_8')) + f.flush() + fname = f.name + script_location = os.path.abspath(fname) + self.log.info( + "Temporary script location: %s", + script_location + ) + + def pre_exec(): + # Restore default signal disposition and invoke setsid + for sig in ('SIGPIPE', 'SIGXFZ', 'SIGXFSZ'): + if hasattr(signal, sig): + signal.signal(getattr(signal, sig), signal.SIG_DFL) + os.setsid() + + self.log.info("Running command: %s", self.bash_command) + self.sub_process = Popen( + ['bash', fname], + stdout=PIPE, stderr=STDOUT, + cwd=tmp_dir, env=env, + preexec_fn=pre_exec) + + self.log.info("Output:") + line = '' + for line in iter(self.sub_process.stdout.readline, b''): + line = line.decode(self.output_encoding).rstrip() + self.log.info(line) + self.sub_process.wait() + self.log.info( + "Command exited with return code %s", + self.sub_process.returncode + ) + + if self.sub_process.returncode: + raise AirflowException("Bash command failed") + + if self.xcom_push_flag: + return line + + def on_kill(self): + self.log.info('Sending SIGTERM signal to bash process group') + if self.sub_process and hasattr(self.sub_process, 'pid'): + os.killpg(os.getpgid(self.sub_process.pid), signal.SIGTERM) diff --git a/etc/config/components/airflow/email_operator.py b/etc/config/components/airflow/email_operator.py new file mode 100644 index 000000000..0ea7fc0cc --- /dev/null +++ b/etc/config/components/airflow/email_operator.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from airflow.models import BaseOperator +from airflow.utils.email import send_email +from airflow.utils.decorators import apply_defaults + + +class EmailOperator(BaseOperator): + """ + Sends an email. + + :param to: list of emails to send the email to. (templated) + :type to: list or string (comma or semicolon delimited) + :param subject: subject line for the email. (templated) + :type subject: str + :param html_content: content of the email, html markup + is allowed. (templated) + :type html_content: str + :param files: file names to attach in email + :type files: list + :param cc: list of recipients to be added in CC field + :type cc: list or string (comma or semicolon delimited) + :param bcc: list of recipients to be added in BCC field + :type bcc: list or string (comma or semicolon delimited) + :param mime_subtype: MIME sub content type + :type mime_subtype: str + :param mime_charset: character set parameter added to the Content-Type + header. + :type mime_charset: str + """ + + template_fields = ('to', 'subject', 'html_content') + template_ext = ('.html',) + ui_color = '#e6faf9' + + @apply_defaults + def __init__( + self, + to, + subject, + html_content, + files=None, + cc=None, + bcc=None, + mime_subtype='mixed', + mime_charset='us_ascii', + *args, **kwargs): + super(EmailOperator, self).__init__(*args, **kwargs) + self.to = to + self.subject = subject + self.html_content = html_content + self.files = files or [] + self.cc = cc + self.bcc = bcc + self.mime_subtype = mime_subtype + self.mime_charset = mime_charset + + def execute(self, context): + send_email(self.to, self.subject, self.html_content, + files=self.files, cc=self.cc, bcc=self.bcc, + mime_subtype=self.mime_subtype, mime_charset=self.mime_charset) diff --git a/etc/config/components/airflow/http_operator.py b/etc/config/components/airflow/http_operator.py new file mode 100644 index 000000000..8b8532bff --- /dev/null +++ b/etc/config/components/airflow/http_operator.py @@ -0,0 +1,99 @@ +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from airflow.exceptions import AirflowException +from airflow.hooks.http_hook import HttpHook +from airflow.models import BaseOperator +from airflow.utils.decorators import apply_defaults + + +class SimpleHttpOperator(BaseOperator): + """ + Calls an endpoint on an HTTP system to execute an action + + :param http_conn_id: The connection to run the operator against + :type http_conn_id: str + :param endpoint: The relative part of the full url. (templated) + :type endpoint: str + :param method: The HTTP method to use, default = "POST" + :type method: str + :param data: The data to pass. POST-data in POST/PUT and params + in the URL for a GET request. (templated) + :type data: For POST/PUT, depends on the content-type parameter, + for GET a dictionary of key/value string pairs + :param headers: The HTTP headers to be added to the GET request + :type headers: a dictionary of string key/value pairs + :param response_check: A check against the 'requests' response object. + Returns True for 'pass' and False otherwise. + :type response_check: A lambda or defined function. + :param extra_options: Extra options for the 'requests' library, see the + 'requests' documentation (options to modify timeout, ssl, etc.) + :type extra_options: A dictionary of options, where key is string and value + depends on the option that's being modified. + :param xcom_push: Push the response to Xcom (default: False). + If xcom_push is True, response of an HTTP request will also + be pushed to an XCom. + :type xcom_push: bool + :param log_response: Log the response (default: False) + :type log_response: bool + """ + + template_fields = ['endpoint', 'data', 'headers', ] + template_ext = () + ui_color = '#f4a460' + + @apply_defaults + def __init__(self, + endpoint, + method='POST', + data=None, + headers=None, + response_check=None, + extra_options=None, + xcom_push=False, + http_conn_id='http_default', + log_response=False, + *args, **kwargs): + super(SimpleHttpOperator, self).__init__(*args, **kwargs) + self.http_conn_id = http_conn_id + self.method = method + self.endpoint = endpoint + self.headers = headers or {} + self.data = data or {} + self.response_check = response_check + self.extra_options = extra_options or {} + self.xcom_push_flag = xcom_push + self.log_response = log_response + + def execute(self, context): + http = HttpHook(self.method, http_conn_id=self.http_conn_id) + + self.log.info("Calling HTTP method") + + response = http.run(self.endpoint, + self.data, + self.headers, + self.extra_options) + if self.log_response: + self.log.info(response.text) + if self.response_check: + if not self.response_check(response): + raise AirflowException("Response check returned False.") + if self.xcom_push_flag: + return response.text diff --git a/etc/config/components/airflow/spark_sql_operator.py b/etc/config/components/airflow/spark_sql_operator.py new file mode 100644 index 000000000..06a51f7df --- /dev/null +++ b/etc/config/components/airflow/spark_sql_operator.py @@ -0,0 +1,114 @@ +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +from airflow.models import BaseOperator +from airflow.utils.decorators import apply_defaults +from airflow.contrib.hooks.spark_sql_hook import SparkSqlHook + + +class SparkSqlOperator(BaseOperator): + """ + Execute Spark SQL query + + :param sql: The SQL query to execute. (templated) + :type sql: str + :param conf: arbitrary Spark configuration property + :type conf: str (format: PROP=VALUE) + :param conn_id: connection_id string + :type conn_id: str + :param total_executor_cores: (Standalone & Mesos only) Total cores for all + executors (Default: all the available cores on the worker) + :type total_executor_cores: int + :param executor_cores: (Standalone & YARN only) Number of cores per + executor (Default: 2) + :type executor_cores: int + :param executor_memory: Memory per executor (e.g. 1000M, 2G) (Default: 1G) + :type executor_memory: str + :param keytab: Full path to the file that contains the keytab + :type keytab: str + :param master: spark://host:port, mesos://host:port, yarn, or local + :type master: str + :param name: Name of the job + :type name: str + :param num_executors: Number of executors to launch + :type num_executors: int + :param verbose: Whether to pass the verbose flag to spark-sql + :type verbose: bool + :param yarn_queue: The YARN queue to submit to (Default: "default") + :type yarn_queue: str + """ + + template_fields = ["_sql"] + template_ext = [".sql", ".hql"] + + @apply_defaults + def __init__(self, + sql, + conf=None, + conn_id='spark_sql_default', + total_executor_cores=None, + executor_cores=None, + executor_memory=None, + keytab=None, + principal=None, + master='yarn', + name='default-name', + num_executors=None, + verbose=True, + yarn_queue='default', + *args, + **kwargs): + super(SparkSqlOperator, self).__init__(*args, **kwargs) + self._sql = sql + self._conf = conf + self._conn_id = conn_id + self._total_executor_cores = total_executor_cores + self._executor_cores = executor_cores + self._executor_memory = executor_memory + self._keytab = keytab + self._principal = principal + self._master = master + self._name = name + self._num_executors = num_executors + self._verbose = verbose + self._yarn_queue = yarn_queue + self._hook = None + + def execute(self, context): + """ + Call the SparkSqlHook to run the provided sql query + """ + self._hook = SparkSqlHook(sql=self._sql, + conf=self._conf, + conn_id=self._conn_id, + total_executor_cores=self._total_executor_cores, + executor_cores=self._executor_cores, + executor_memory=self._executor_memory, + keytab=self._keytab, + principal=self._principal, + name=self._name, + num_executors=self._num_executors, + master=self._master, + verbose=self._verbose, + yarn_queue=self._yarn_queue + ) + self._hook.run_query() + + def on_kill(self): + self._hook.kill() diff --git a/etc/config/components/airflow/spark_submit_operator.py b/etc/config/components/airflow/spark_submit_operator.py new file mode 100644 index 000000000..caee33597 --- /dev/null +++ b/etc/config/components/airflow/spark_submit_operator.py @@ -0,0 +1,190 @@ +# -*- coding: utf-8 -*- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +from airflow.contrib.hooks.spark_submit_hook import SparkSubmitHook +from airflow.models import BaseOperator +from airflow.settings import WEB_COLORS +from airflow.utils.decorators import apply_defaults + + +class SparkSubmitOperator(BaseOperator): + """ + This hook is a wrapper around the spark-submit binary to kick off a spark-submit job. + It requires that the "spark-submit" binary is in the PATH or the spark-home is set + in the extra on the connection. + + :param application: The application that submitted as a job, either jar or py file. (templated) + :type application: str + :param conf: Arbitrary Spark configuration properties (templated) + :type conf: dict + :param conn_id: The connection id as configured in Airflow administration. When an + invalid connection_id is supplied, it will default to yarn. + :type conn_id: str + :param files: Upload additional files to the executor running the job, separated by a + comma. Files will be placed in the working directory of each executor. + For example, serialized objects. (templated) + :type files: str + :param py_files: Additional python files used by the job, can be .zip, .egg or .py. (templated) + :type py_files: str + :param jars: Submit additional jars to upload and place them in executor classpath. (templated) + :type jars: str + :param driver_class_path: Additional, driver-specific, classpath settings. (templated) + :type driver_class_path: str + :param java_class: the main class of the Java application + :type java_class: str + :param packages: Comma-separated list of maven coordinates of jars to include on the + driver and executor classpaths. (templated) + :type packages: str + :param exclude_packages: Comma-separated list of maven coordinates of jars to exclude + while resolving the dependencies provided in 'packages' (templated) + :type exclude_packages: str + :param repositories: Comma-separated list of additional remote repositories to search + for the maven coordinates given with 'packages' + :type repositories: str + :param total_executor_cores: (Standalone & Mesos only) Total cores for all executors + (Default: all the available cores on the worker) + :type total_executor_cores: int + :param executor_cores: (Standalone & YARN only) Number of cores per executor (Default: 2) + :type executor_cores: int + :param executor_memory: Memory per executor (e.g. 1000M, 2G) (Default: 1G) + :type executor_memory: str + :param driver_memory: Memory allocated to the driver (e.g. 1000M, 2G) (Default: 1G) + :type driver_memory: str + :param keytab: Full path to the file that contains the keytab (templated) + :type keytab: str + :param principal: The name of the kerberos principal used for keytab (templated) + :type principal: str + :param proxy_user: User to impersonate when submitting the application (templated) + :type proxy_user: str + :param name: Name of the job (default airflow-spark). (templated) + :type name: str + :param num_executors: Number of executors to launch + :type num_executors: int + :param status_poll_interval: Seconds to wait between polls of driver status in cluster + mode (Default: 1) + :type status_poll_interval: int + :param application_args: Arguments for the application being submitted (templated) + :type application_args: list + :param env_vars: Environment variables for spark-submit. It supports yarn and k8s mode too. (templated) + :type env_vars: dict + :param verbose: Whether to pass the verbose flag to spark-submit process for debugging + :type verbose: bool + :param spark_binary: The command to use for spark submit. + Some distros may use spark2-submit. + :type spark_binary: str + """ + template_fields = ('_application', '_conf', '_files', '_py_files', '_jars', '_driver_class_path', + '_packages', '_exclude_packages', '_keytab', '_principal', '_proxy_user', '_name', + '_application_args', '_env_vars') + ui_color = WEB_COLORS['LIGHTORANGE'] + + @apply_defaults + def __init__(self, + application='', + conf=None, + conn_id='spark_default', + files=None, + py_files=None, + archives=None, + driver_class_path=None, + jars=None, + java_class=None, + packages=None, + exclude_packages=None, + repositories=None, + total_executor_cores=None, + executor_cores=None, + executor_memory=None, + driver_memory=None, + keytab=None, + principal=None, + proxy_user=None, + name='airflow-spark', + num_executors=None, + status_poll_interval=1, + application_args=None, + env_vars=None, + verbose=False, + spark_binary=None, + *args, + **kwargs): + super(SparkSubmitOperator, self).__init__(*args, **kwargs) + self._application = application + self._conf = conf + self._files = files + self._py_files = py_files + self._archives = archives + self._driver_class_path = driver_class_path + self._jars = jars + self._java_class = java_class + self._packages = packages + self._exclude_packages = exclude_packages + self._repositories = repositories + self._total_executor_cores = total_executor_cores + self._executor_cores = executor_cores + self._executor_memory = executor_memory + self._driver_memory = driver_memory + self._keytab = keytab + self._principal = principal + self._proxy_user = proxy_user + self._name = name + self._num_executors = num_executors + self._status_poll_interval = status_poll_interval + self._application_args = application_args + self._env_vars = env_vars + self._verbose = verbose + self._spark_binary = spark_binary + self._hook = None + self._conn_id = conn_id + + def execute(self, context): + """ + Call the SparkSubmitHook to run the provided spark job + """ + self._hook = SparkSubmitHook( + conf=self._conf, + conn_id=self._conn_id, + files=self._files, + py_files=self._py_files, + archives=self._archives, + driver_class_path=self._driver_class_path, + jars=self._jars, + java_class=self._java_class, + packages=self._packages, + exclude_packages=self._exclude_packages, + repositories=self._repositories, + total_executor_cores=self._total_executor_cores, + executor_cores=self._executor_cores, + executor_memory=self._executor_memory, + driver_memory=self._driver_memory, + keytab=self._keytab, + principal=self._principal, + proxy_user=self._proxy_user, + name=self._name, + num_executors=self._num_executors, + status_poll_interval=self._status_poll_interval, + application_args=self._application_args, + env_vars=self._env_vars, + verbose=self._verbose, + spark_binary=self._spark_binary + ) + self._hook.submit(self._application) + + def on_kill(self): + self._hook.on_kill() diff --git a/etc/config/components/kfp/calculate_hash.yaml b/etc/config/components/kfp/calculate_hash.yaml new file mode 100644 index 000000000..16be33be5 --- /dev/null +++ b/etc/config/components/kfp/calculate_hash.yaml @@ -0,0 +1,43 @@ +name: Calculate data hash +inputs: +- {name: Data} +- {name: Hash algorithm, type: String, default: SHA256, description: "Hash algorithm to use. Supported values are MD5, SHA1, SHA256, SHA512, SHA3"} +outputs: +- {name: Hash} +metadata: + annotations: + author: Alexey Volkov +implementation: + container: + image: alpine + command: + - sh + - -exc + - | + data_path="$0" + hash_algorithm="$1" + hash_path="$2" + mkdir -p "$(dirname "$hash_path")" + + hash_algorithm=$(echo "$hash_algorithm" | tr '[:upper:]' '[:lower:]') + case "$hash_algorithm" in + md5|sha1|sha256|sha512|sha3) hash_program="${hash_algorithm}sum";; + *) echo "Unsupported hash algorithm $hash_algorithm"; exit 1;; + esac + + if [ -d "$data_path" ]; then + # Calculating hash for directory + cd "$data_path" + find . -type f -print0 | + sort -z | + xargs -0 "$hash_program" | + "$hash_program" | + cut -d ' ' -f 1 > "$hash_path" + else + # Calculating hash for file + "$hash_program" "$data_path" | + cut -d ' ' -f 1 > "$hash_path" + fi + - {inputPath: Data} + - {inputValue: Hash algorithm} + - {outputPath: Hash} \ No newline at end of file diff --git a/etc/config/components/kfp/download_data.yaml b/etc/config/components/kfp/download_data.yaml new file mode 100644 index 000000000..2ec079923 --- /dev/null +++ b/etc/config/components/kfp/download_data.yaml @@ -0,0 +1,26 @@ +name: Download data +inputs: +- {name: Url, type: URI} +- {name: curl options, type: string, default: '--location', description: 'Additional options given to the curl program. See https://curl.haxx.se/docs/manpage.html'} +outputs: +- {name: Data} +metadata: + annotations: + author: Alexey Volkov +implementation: + container: + # image: curlimages/curl # Sets a non-root user which cannot write to mounted volumes. See https://github.com/curl/curl-docker/issues/22 + image: byrnedo/alpine-curl@sha256:548379d0a4a0c08b9e55d9d87a592b7d35d9ab3037f4936f5ccd09d0b625a342 + command: + - sh + - -exc + - | + url="$0" + output_path="$1" + curl_options="$2" + + mkdir -p "$(dirname "$output_path")" + curl --get "$url" --output "$output_path" $curl_options + - inputValue: Url + - outputPath: Data + - inputValue: curl options \ No newline at end of file diff --git a/etc/config/metadata/component-registries/elyra-airflow-directory-preconfig.json b/etc/config/metadata/component-registries/elyra-airflow-directory-preconfig.json new file mode 100644 index 000000000..f86a3dde7 --- /dev/null +++ b/etc/config/metadata/component-registries/elyra-airflow-directory-preconfig.json @@ -0,0 +1,12 @@ +{ + "display_name": "Airflow Preloaded Components - Directory", + "metadata": { + "description": "Preloaded directory-based components that are supported by Apache Airflow", + "runtime": "airflow", + "location_type": "Directory", + "categories": ["Preloaded Airflow"], + "paths": ["airflow"] + }, + "schema_name": "local-directory-catalog", + "version": 1 +} diff --git a/etc/config/metadata/component-registries/elyra-airflow-filesystem-preconfig.json b/etc/config/metadata/component-registries/elyra-airflow-filesystem-preconfig.json deleted file mode 100644 index cba8576bd..000000000 --- a/etc/config/metadata/component-registries/elyra-airflow-filesystem-preconfig.json +++ /dev/null @@ -1,11 +0,0 @@ -{ - "display_name": "Airflow Preloaded Components - Filesystem", - "metadata": { - "description": "Preloaded filesystem-based components that are supported by Apache Airflow", - "runtime": "airflow", - "categories": ["Preloaded Airflow"], - "paths": ["airflow/slack_operator.py"] - }, - "schema_name": "local-file-catalog", - "version": 1 -} diff --git a/etc/config/metadata/component-registries/elyra-airflow-url-preconfig.json b/etc/config/metadata/component-registries/elyra-airflow-url-preconfig.json deleted file mode 100644 index 947c0b513..000000000 --- a/etc/config/metadata/component-registries/elyra-airflow-url-preconfig.json +++ /dev/null @@ -1,17 +0,0 @@ -{ - "display_name": "Airflow Preloaded Components - URL", - "metadata": { - "description": "Preloaded URL-based components that are supported by Apache Airflow", - "runtime": "airflow", - "categories": ["Preloaded Airflow"], - "paths": [ - "https://raw.githubusercontent.com/apache/airflow/1.10.15/airflow/operators/bash_operator.py", - "https://raw.githubusercontent.com/apache/airflow/1.10.15/airflow/operators/email_operator.py", - "https://raw.githubusercontent.com/apache/airflow/1.10.15/airflow/operators/http_operator.py", - "https://raw.githubusercontent.com/apache/airflow/1.10.15/airflow/contrib/operators/spark_sql_operator.py", - "https://raw.githubusercontent.com/apache/airflow/1.10.15/airflow/contrib/operators/spark_submit_operator.py" - ] - }, - "schema_name": "url-catalog", - "version": 1 -} diff --git a/etc/config/metadata/component-registries/elyra-kfp-directory-preconfig.json b/etc/config/metadata/component-registries/elyra-kfp-directory-preconfig.json index e18451cc8..04d76826b 100644 --- a/etc/config/metadata/component-registries/elyra-kfp-directory-preconfig.json +++ b/etc/config/metadata/component-registries/elyra-kfp-directory-preconfig.json @@ -3,6 +3,7 @@ "metadata": { "description": "Preloaded directory-based components that are supported by Kubeflow Pipelines", "runtime": "kfp", + "location_type": "Directory", "categories": ["Preloaded KFP"], "paths": ["kfp"] }, diff --git a/etc/config/metadata/component-registries/elyra-kfp-url-preconfig.json b/etc/config/metadata/component-registries/elyra-kfp-url-preconfig.json deleted file mode 100644 index 0d783948c..000000000 --- a/etc/config/metadata/component-registries/elyra-kfp-url-preconfig.json +++ /dev/null @@ -1,14 +0,0 @@ -{ - "display_name": "KFP Preloaded Components - URL", - "metadata": { - "description": "Preloaded URL-based components that are supported by Kubeflow Pipelines", - "runtime": "kfp", - "categories": ["Preloaded KFP"], - "paths": [ - "https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/web/Download/component.yaml", - "https://raw.githubusercontent.com/kubeflow/pipelines/1.6.0/components/basics/Calculate_hash/component.yaml" - ] - }, - "schema_name": "url-catalog", - "version": 1 -}