diff --git a/README.md b/README.md index 3790bbf5..f5bff735 100644 --- a/README.md +++ b/README.md @@ -14,11 +14,11 @@ The following aliases are available: * [Alias `spaceflights-pandas`](spaceflights-pandas): The [spaceflights tutorial](https://docs.kedro.org/en/stable/tutorial/spaceflights_tutorial.html) example code. -* [Alias `spaceflights-pandas-viz`](spaceflights-pandas-viz): The [spaceflights tutorial](https://docs.kedro.org/en/stable/tutorial/spaceflights_tutorial.html) example code with viz feature examples (experiment tracking, plotting with plotly and matplotlib). +* [Alias `spaceflights-pandas-viz`](spaceflights-pandas-viz): The [spaceflights tutorial](https://docs.kedro.org/en/stable/tutorial/spaceflights_tutorial.html) example code with viz feature examples (plotting with plotly and matplotlib). * [Alias `spaceflights-pyspark`](spaceflights-pyspark): An alternative Kedro Spaceflights example, using [PySpark](https://docs.kedro.org/en/stable/integrations/pyspark_integration.html). -* [Alias `spaceflights-pyspark-viz`](spaceflights-pyspark-viz): An alternative Kedro Spaceflights example, using [PySpark](https://docs.kedro.org/en/stable/integrations/pyspark_integration.html) with viz feature examples (experiment tracking, plotting with plotly and matplotlib). +* [Alias `spaceflights-pyspark-viz`](spaceflights-pyspark-viz): An alternative Kedro Spaceflights example, using [PySpark](https://docs.kedro.org/en/stable/integrations/pyspark_integration.html) with viz feature examples (plotting with plotly and matplotlib). Archived starters which are no longer maintained: diff --git a/spaceflights-pandas-viz/README.md b/spaceflights-pandas-viz/README.md index 95281d9d..54faf34b 100644 --- a/spaceflights-pandas-viz/README.md +++ b/spaceflights-pandas-viz/README.md @@ -2,7 +2,7 @@ ## Overview -This is a completed version of the [spaceflights tutorial project](https://docs.kedro.org/en/stable/tutorial/spaceflights_tutorial.html) described in the [online Kedro documentation](https://docs.kedro.org) and the extra tutorial sections on [visualisation with Kedro-Viz](https://docs.kedro.org/projects/kedro-viz/en/stable/kedro-viz_visualisation.html) and [experiment tracking with Kedro-Viz](https://docs.kedro.org/projects/kedro-viz/en/stable/experiment_tracking.html). It includes the data required to run the project. +This is a completed version of the [spaceflights tutorial project](https://docs.kedro.org/en/stable/tutorial/spaceflights_tutorial.html) described in the [online Kedro documentation](https://docs.kedro.org) and the extra tutorial sections on [visualisation with Kedro-Viz](https://docs.kedro.org/projects/kedro-viz/en/stable/kedro-viz_visualisation.html). It includes the data required to run the project. To create a project based on this starter, [ensure you have installed Kedro into a virtual environment](https://docs.kedro.org/en/stable/get_started/install.html). Then use the following command: diff --git a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/conf/base/catalog.yml b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/conf/base/catalog.yml index d313a970..2ab04249 100644 --- a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/conf/base/catalog.yml +++ b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/conf/base/catalog.yml @@ -70,14 +70,6 @@ regressor: filepath: data/06_models/regressor.pickle versioned: true -metrics: - type: tracking.MetricsDataset - filepath: data/09_tracking/metrics.json - -companies_columns: - type: tracking.JSONDataset - filepath: data/09_tracking/companies_columns.json - shuttle_passenger_capacity_plot_exp: type: plotly.PlotlyDataset filepath: data/08_reporting/shuttle_passenger_capacity_plot_exp.json diff --git a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py index 9357c8ec..3d1a6579 100755 --- a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py +++ b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py @@ -29,7 +29,7 @@ def preprocess_companies(companies: pd.DataFrame) -> tuple[pd.DataFrame, dict]: """ companies["iata_approved"] = _is_true(companies["iata_approved"]) companies["company_rating"] = _parse_percentage(companies["company_rating"]) - return companies, {"columns": companies.columns.tolist(), "data_type": "companies"} + return companies def preprocess_shuttles(shuttles: pd.DataFrame) -> pd.DataFrame: diff --git a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py index 1da53558..058ad9ea 100755 --- a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py +++ b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py @@ -9,7 +9,7 @@ def create_pipeline(**kwargs) -> Pipeline: node( func=preprocess_companies, inputs="companies", - outputs=["preprocessed_companies", "companies_columns"], + outputs="preprocessed_companies", name="preprocess_companies_node", ), node( diff --git a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/pipeline.py b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/pipeline.py index 01f332c0..8fe85735 100755 --- a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/pipeline.py +++ b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/pipeline.py @@ -22,7 +22,7 @@ def create_pipeline(**kwargs) -> Pipeline: func=evaluate_model, inputs=["regressor", "X_test", "y_test"], name="evaluate_model_node", - outputs="metrics", + outputs=None, ), ] ) diff --git a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py index 249125dd..89c69d7c 100644 --- a/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py +++ b/spaceflights-pandas-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py @@ -12,19 +12,13 @@ # Installed plugins for which to disable hook auto-registration. # DISABLE_HOOKS_FOR_PLUGINS = ("kedro-viz",) -from pathlib import Path # noqa: E402 - -from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore # noqa: E402 - # Class that manages storing KedroSession data. -SESSION_STORE_CLASS = SQLiteStore - -# Setup for Experiment Tracking -# The SQLite DB required for experiment tracking is stored by default -# (supported from python >= 3.9 and Kedro-Viz 9.2.0) in the .viz folder -# of your project. To store it in another directory, provide the keyword argument -# `SESSION_STORE_ARGS` to pass to the `SESSION_STORE_CLASS` constructor. -SESSION_STORE_ARGS = {"path": str(Path(__file__).parents[2])} +# from kedro.framework.session.store import BaseSessionStore +# SESSION_STORE_CLASS = BaseSessionStore +# Keyword arguments to pass to the `SESSION_STORE_CLASS` constructor. +# SESSION_STORE_ARGS = { +# "path": "./sessions" +# } # Directory that holds configuration. # CONF_SOURCE = "conf" diff --git a/spaceflights-pyspark-viz/README.md b/spaceflights-pyspark-viz/README.md index a3a00c76..dec83311 100644 --- a/spaceflights-pyspark-viz/README.md +++ b/spaceflights-pyspark-viz/README.md @@ -2,8 +2,7 @@ ## Overview -This is a completed version of the [spaceflights tutorial project](https://docs.kedro.org/en/stable/tutorial/spaceflights_tutorial.html) and the extra tutorial sections on [visualisation with Kedro-Viz](https://docs.kedro.org/projects/kedro-viz/en/stable/kedro-viz_visualisation.html) and [experiment tracking with Kedro-Viz](https://docs.kedro.org/projects/kedro-viz/en/stable/experiment_tracking.html) with a PySpark setup that originates from the [Kedro documentation about how to work with PySpark](https://docs.kedro.org/en/stable/integrations/pyspark_integration.html). -This project includes the data required to run it. The code in this repository demonstrates best practice when working with Kedro and PySpark. +This is a completed version of the [spaceflights tutorial project](https://docs.kedro.org/en/stable/tutorial/spaceflights_tutorial.html) and the extra tutorial sections on [visualisation with Kedro-Viz](https://docs.kedro.org/projects/kedro-viz/en/stable/kedro-viz_visualisation.html). This project includes the data required to run it. The code in this repository demonstrates best practice when working with Kedro and PySpark. To create a project based on this starter, [ensure you have installed Kedro into a virtual environment](https://docs.kedro.org/en/stable/get_started/install.html). Then use the following command: diff --git a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/conf/base/catalog.yml b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/conf/base/catalog.yml index a0755689..91db72d0 100644 --- a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/conf/base/catalog.yml +++ b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/conf/base/catalog.yml @@ -140,14 +140,6 @@ regressor: filepath: data/06_models/regressor.pickle versioned: true -metrics: - type: tracking.MetricsDataset - filepath: data/09_tracking/metrics.json - -companies_columns: - type: tracking.JSONDataset - filepath: data/09_tracking/companies_columns.json - shuttle_passenger_capacity_plot_exp: type: plotly.PlotlyDataset filepath: data/08_reporting/shuttle_passenger_capacity_plot_exp.json diff --git a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py index 2a791000..230fd8ad 100755 --- a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py +++ b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/nodes.py @@ -37,7 +37,7 @@ def preprocess_companies(companies: SparkDataFrame) -> tuple[SparkDataFrame, dic # Drop columns that aren't used for model training companies = companies.drop('company_location', 'total_fleet_count') - return companies, {"columns": companies.columns, "data_type": "companies"} + return companies def load_shuttles_to_csv(shuttles: pd.DataFrame) -> pd.DataFrame: diff --git a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py index fbe37cd2..44e1b21d 100755 --- a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py +++ b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_processing/pipeline.py @@ -21,7 +21,7 @@ def create_pipeline(**kwargs) -> Pipeline: node( func=preprocess_companies, inputs="companies", - outputs=["preprocessed_companies", "companies_columns"], + outputs="preprocessed_companies", name="preprocess_companies_node", ), node( diff --git a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/pipeline.py b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/pipeline.py index 7fb2ab24..98f9fc08 100755 --- a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/pipeline.py +++ b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/pipelines/data_science/pipeline.py @@ -21,7 +21,7 @@ def create_pipeline(**kwargs) -> Pipeline: node( func=evaluate_model, inputs=["regressor", "X_test", "y_test"], - outputs="metrics", + outputs=None, name="evaluate_model_node", ), ] diff --git a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py index b1066ab2..c808a016 100644 --- a/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py +++ b/spaceflights-pyspark-viz/{{ cookiecutter.repo_name }}/src/{{ cookiecutter.python_package }}/settings.py @@ -11,19 +11,13 @@ # Installed plugins for which to disable hook auto-registration. # DISABLE_HOOKS_FOR_PLUGINS = ("kedro-viz",) -from pathlib import Path # noqa: E402 - -from kedro_viz.integrations.kedro.sqlite_store import SQLiteStore # noqa: E402 - # Class that manages storing KedroSession data. -SESSION_STORE_CLASS = SQLiteStore - -# Setup for Experiment Tracking -# The SQLite DB required for experiment tracking is stored by default -# (supported from python >= 3.9 and Kedro-Viz 9.2.0) in the .viz folder -# of your project. To store it in another directory, provide the keyword argument -# `SESSION_STORE_ARGS` to pass to the `SESSION_STORE_CLASS` constructor. -SESSION_STORE_ARGS = {"path": str(Path(__file__).parents[2])} +# from kedro.framework.session.store import BaseSessionStore +# SESSION_STORE_CLASS = BaseSessionStore +# Keyword arguments to pass to the `SESSION_STORE_CLASS` constructor. +# SESSION_STORE_ARGS = { +# "path": "./sessions" +# } # Directory that holds configuration. # CONF_SOURCE = "conf"