Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(telemetry): Implement telemetry message notification #760

Merged
merged 11 commits into from
Jul 16, 2024
106 changes: 53 additions & 53 deletions kedro-datasets/docs/source/api/kedro_datasets.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,56 +11,56 @@ kedro_datasets
:toctree:
:template: autosummary/class.rst

kedro_datasets.api.APIDataset
kedro_datasets.biosequence.BioSequenceDataset
kedro_datasets.dask.CSVDataset
kedro_datasets.dask.ParquetDataset
kedro_datasets.databricks.ManagedTableDataset
kedro_datasets.email.EmailMessageDataset
kedro_datasets.geopandas.GeoJSONDataset
kedro_datasets.holoviews.HoloviewsWriter
kedro_datasets.huggingface.HFDataset
kedro_datasets.huggingface.HFTransformerPipelineDataset
kedro_datasets.ibis.TableDataset
kedro_datasets.json.JSONDataset
kedro_datasets.matlab.MatlabDataset
kedro_datasets.matplotlib.MatplotlibWriter
kedro_datasets.networkx.GMLDataset
kedro_datasets.networkx.GraphMLDataset
kedro_datasets.networkx.JSONDataset
kedro_datasets.pandas.CSVDataset
kedro_datasets.pandas.DeltaTableDataset
kedro_datasets.pandas.ExcelDataset
kedro_datasets.pandas.FeatherDataset
kedro_datasets.pandas.GBQQueryDataset
kedro_datasets.pandas.GBQTableDataset
kedro_datasets.pandas.GenericDataset
kedro_datasets.pandas.HDFDataset
kedro_datasets.pandas.JSONDataset
kedro_datasets.pandas.ParquetDataset
kedro_datasets.pandas.SQLQueryDataset
kedro_datasets.pandas.SQLTableDataset
kedro_datasets.pandas.XMLDataset
kedro_datasets.partitions.IncrementalDataset
kedro_datasets.partitions.PartitionedDataset
kedro_datasets.pickle.PickleDataset
kedro_datasets.pillow.ImageDataset
kedro_datasets.plotly.JSONDataset
kedro_datasets.plotly.PlotlyDataset
kedro_datasets.polars.CSVDataset
kedro_datasets.polars.EagerPolarsDataset
kedro_datasets.polars.LazyPolarsDataset
kedro_datasets.redis.PickleDataset
kedro_datasets.snowflake.SnowparkTableDataset
kedro_datasets.spark.DeltaTableDataset
kedro_datasets.spark.SparkDataset
kedro_datasets.spark.SparkHiveDataset
kedro_datasets.spark.SparkJDBCDataset
kedro_datasets.spark.SparkStreamingDataset
kedro_datasets.svmlight.SVMLightDataset
kedro_datasets.tensorflow.TensorFlowModelDataset
kedro_datasets.text.TextDataset
kedro_datasets.tracking.JSONDataset
kedro_datasets.tracking.MetricsDataset
kedro_datasets.video.VideoDataset
kedro_datasets.yaml.YAMLDataset
api.APIDataset
biosequence.BioSequenceDataset
dask.CSVDataset
dask.ParquetDataset
databricks.ManagedTableDataset
email.EmailMessageDataset
geopandas.GeoJSONDataset
holoviews.HoloviewsWriter
huggingface.HFDataset
huggingface.HFTransformerPipelineDataset
ibis.TableDataset
json.JSONDataset
matlab.MatlabDataset
matplotlib.MatplotlibWriter
networkx.GMLDataset
networkx.GraphMLDataset
networkx.JSONDataset
pandas.CSVDataset
pandas.DeltaTableDataset
pandas.ExcelDataset
pandas.FeatherDataset
pandas.GBQQueryDataset
pandas.GBQTableDataset
pandas.GenericDataset
pandas.HDFDataset
pandas.JSONDataset
pandas.ParquetDataset
pandas.SQLQueryDataset
pandas.SQLTableDataset
pandas.XMLDataset
partitions.IncrementalDataset
partitions.PartitionedDataset
pickle.PickleDataset
pillow.ImageDataset
plotly.JSONDataset
plotly.PlotlyDataset
polars.CSVDataset
polars.EagerPolarsDataset
polars.LazyPolarsDataset
redis.PickleDataset
snowflake.SnowparkTableDataset
spark.DeltaTableDataset
spark.SparkDataset
spark.SparkHiveDataset
spark.SparkJDBCDataset
spark.SparkStreamingDataset
svmlight.SVMLightDataset
tensorflow.TensorFlowModelDataset
text.TextDataset
tracking.JSONDataset
tracking.MetricsDataset
video.VideoDataset
yaml.YAMLDataset
12 changes: 6 additions & 6 deletions kedro-datasets/docs/source/api/kedro_datasets_experimental.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@ kedro_datasets_experimental
:toctree:
:template: autosummary/class.rst

kedro_datasets_experimental.langchain.ChatAnthropicDataset
kedro_datasets_experimental.langchain.ChatCohereDataset
kedro_datasets_experimental.langchain.ChatOpenAIDataset
kedro_datasets_experimental.langchain.OpenAIEmbeddingsDataset
kedro_datasets_experimental.netcdf.NetCDFDataset
kedro_datasets_experimental.rioxarray.GeoTIFFDataset
langchain.ChatAnthropicDataset
langchain.ChatCohereDataset
langchain.ChatOpenAIDataset
langchain.OpenAIEmbeddingsDataset
netcdf.NetCDFDataset
rioxarray.GeoTIFFDataset
4 changes: 4 additions & 0 deletions kedro-telemetry/kedro_telemetry/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
"""Kedro plugin for collecting Kedro usage data."""

__version__ = "0.5.0"

import logging

logging.getLogger(__name__).setLevel(logging.INFO)
18 changes: 10 additions & 8 deletions kedro-telemetry/kedro_telemetry/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,12 +163,20 @@ def before_command_run(

consent = _check_for_telemetry_consent(project_metadata.project_path)
if not consent:
logger.debug(
logger.info(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected.",
)
return

logger.info(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/stable/configuration/telemetry.html"
)

# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=project_metadata.project_path)
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
Expand All @@ -177,7 +185,6 @@ def before_command_run(
)
main_command = masked_command_args[0] if masked_command_args else "kedro"

logger.debug("You have opted into product usage analytics.")
user_uuid = _get_or_create_uuid()
project_properties = _get_project_properties(
user_uuid, project_metadata.project_path / PYPROJECT_CONFIG_NAME
Expand Down Expand Up @@ -219,15 +226,10 @@ def after_context_created(self, context):

@hook_impl
def after_catalog_created(self, catalog):
# The user notification message is sent only once per command during the before_command_run hook
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What about runs that does not go through CLI, i.e. session.run?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, @noklam, good point. As I understand it, in the current PR, telemetry will be sent in the after_catalog_created() hook without user notification. I propose addressing this issue here: GitHub Issue #730.

In that PR, we can modify the logic to send one heap event per user command and update user notifications accordingly:

  • If it's a CLI command and the catalog is created, send one heap event in the after_catalog_created() hook along with a user notification message.
  • If it's a CLI command and the catalog is not created, send one heap event in the before_command_run() hook along with a user notification message.
  • If it's not a CLI command and the catalog is created, send one heap event in the after_catalog_created() hook along with a user notification message.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agree, let's address this in #730

if not self.consent:
logger.debug(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected.",
)
return

logger.debug("You have opted into product usage analytics.")

default_pipeline = pipelines.get("__default__") # __default__
user_uuid = _get_or_create_uuid()

Expand Down
47 changes: 39 additions & 8 deletions kedro-telemetry/tests/test_plugin.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import sys
from pathlib import Path

Expand Down Expand Up @@ -121,7 +122,7 @@ def fake_sub_pipeline():


class TestKedroTelemetryCLIHooks:
def test_before_command_run(self, mocker, fake_metadata):
def test_before_command_run(self, mocker, fake_metadata, caplog):
mocker.patch(
"kedro_telemetry.plugin._check_for_telemetry_consent", return_value=True
)
Expand All @@ -139,9 +140,10 @@ def test_before_command_run(self, mocker, fake_metadata):
)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
telemetry_hook = KedroTelemetryCLIHooks()
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)
with caplog.at_level(logging.INFO):
telemetry_hook = KedroTelemetryCLIHooks()
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)
expected_properties = {
"username": "user_uuid",
"project_id": "digested",
Expand Down Expand Up @@ -170,6 +172,20 @@ def test_before_command_run(self, mocker, fake_metadata):
),
]
assert mocked_heap_call.call_args_list == expected_calls
assert any(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/stable/configuration/telemetry.html"
in record.message
for record in caplog.records
)
assert not any(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected." in record.message
for record in caplog.records
)

def test_before_command_run_with_tools(self, mocker, fake_metadata):
mocker.patch(
Expand Down Expand Up @@ -276,17 +292,32 @@ def test_before_command_run_empty_args(self, mocker, fake_metadata):

assert mocked_heap_call.call_args_list == expected_calls

def test_before_command_run_no_consent_given(self, mocker, fake_metadata):
def test_before_command_run_no_consent_given(self, mocker, fake_metadata, caplog):
mocker.patch(
"kedro_telemetry.plugin._check_for_telemetry_consent", return_value=False
)

mocked_heap_call = mocker.patch("kedro_telemetry.plugin._send_heap_event")
telemetry_hook = KedroTelemetryCLIHooks()
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)
with caplog.at_level(logging.INFO):
telemetry_hook = KedroTelemetryCLIHooks()
command_args = ["--version"]
telemetry_hook.before_command_run(fake_metadata, command_args)

mocked_heap_call.assert_not_called()
assert not any(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/latest/configuration/telemetry.html"
in record.message
for record in caplog.records
)
assert any(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected." in record.message
for record in caplog.records
)

def test_before_command_run_connection_error(self, mocker, fake_metadata, caplog):
mocker.patch(
Expand Down