Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(telemetry): Combine Telemetry hook to send heap event once #766

Merged
merged 15 commits into from
Jul 19, 2024
171 changes: 86 additions & 85 deletions kedro-telemetry/kedro_telemetry/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import os
import sys
import uuid
from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import Any
Expand Down Expand Up @@ -153,103 +152,119 @@ def _generate_new_uuid(full_path: str) -> str:
return ""


class KedroTelemetryCLIHooks:
class KedroTelemetryHook:
"""Hook to send CLI command data to Heap"""

def __init__(self):
self.consent = None
self._sent = False
DimedS marked this conversation as resolved.
Show resolved Hide resolved
self.event_properties = None
self.project_path = None
self.user_uuid = None

@cli_hook_impl
def before_command_run(
self, project_metadata: ProjectMetadata, command_args: list[str]
):
"""Hook implementation to send command run data to Heap"""
try:
if not project_metadata: # in package mode
return

consent = _check_for_telemetry_consent(project_metadata.project_path)
if not consent:
logger.info(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected.",
)
return

logger.info(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/stable/configuration/telemetry.html"
)

# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=project_metadata.project_path)
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
masked_command_args = _mask_kedro_cli(
cli_struct=cli_struct, command_args=command_args
)
main_command = masked_command_args[0] if masked_command_args else "kedro"
if not project_metadata: # in package mode
return

user_uuid = _get_or_create_uuid()
project_properties = _get_project_properties(
user_uuid, project_metadata.project_path / PYPROJECT_CONFIG_NAME
)
cli_properties = _format_user_cli_data(
project_properties, masked_command_args
)
self.consent = _check_for_telemetry_consent(project_metadata.project_path)
if not self.consent:
self._opt_out_notification()
return

_send_heap_event(
event_name=f"Command run: {main_command}",
identity=user_uuid,
properties=cli_properties,
)
# get KedroCLI and its structure from actual project root
cli = KedroCLI(project_path=project_metadata.project_path)
cli_struct = _get_cli_structure(cli_obj=cli, get_help=False)
masked_command_args = _mask_kedro_cli(
cli_struct=cli_struct, command_args=command_args
)

# send generic event too, so it's easier in data processing
generic_properties = deepcopy(cli_properties)
generic_properties["main_command"] = main_command
_send_heap_event(
event_name="CLI command",
identity=user_uuid,
properties=generic_properties,
)
except Exception as exc:
logger.warning(
"Something went wrong in hook implementation to send command run data to Heap. "
"Exception: %s",
exc,
)
self.user_uuid = _get_or_create_uuid()

event_properties = _get_project_properties(
self.user_uuid, project_metadata.project_path / PYPROJECT_CONFIG_NAME
)
event_properties["command"] = (
f"kedro {' '.join(command_args)}" if command_args else "kedro"
)
event_properties["main_command"] = (
masked_command_args[0] if masked_command_args else "kedro"
)

class KedroTelemetryProjectHooks:
"""Hook to send project statistics data to Heap"""
self.event_properties = event_properties

@cli_hook_impl
def after_command_run(self):
merelcht marked this conversation as resolved.
Show resolved Hide resolved
if self.consent and not self._sent:
self._send_telemetry_heap_event("CLI command")

@hook_impl
def after_context_created(self, context):
"""Hook implementation to send project statistics data to Heap"""
self.consent = _check_for_telemetry_consent(context.project_path)

if self.consent is None:
self.consent = _check_for_telemetry_consent(context.project_path)
if not self.consent:
self._opt_out_notification()
self.project_path = context.project_path

@hook_impl
def after_catalog_created(self, catalog):
# The user notification message is sent only once per command during the before_command_run hook
if not self.consent:
if self.consent is False:
return

default_pipeline = pipelines.get("__default__") # __default__
user_uuid = _get_or_create_uuid()

project_properties = _get_project_properties(
user_uuid, self.project_path / PYPROJECT_CONFIG_NAME
if not self.user_uuid:
self.user_uuid = _get_or_create_uuid()

if not self.event_properties:
self.event_properties = _get_project_properties(
self.user_uuid, self.project_path / PYPROJECT_CONFIG_NAME
)

project_properties = _format_project_statistics_data(
catalog, default_pipeline, pipelines
)
self.event_properties.update(project_properties)

project_statistics_properties = _format_project_statistics_data(
project_properties, catalog, default_pipeline, pipelines
self._send_telemetry_heap_event("Kedro Project Statistics")

def _opt_out_notification(self):
logger.info(
"Kedro-Telemetry is installed, but you have opted out of "
"sharing usage analytics so none will be collected.",
)
_send_heap_event(
event_name="Kedro Project Statistics",
identity=user_uuid,
properties=project_statistics_properties,

def _send_telemetry_heap_event(self, event_name: str):
"""Hook implementation to send command run data to Heap"""

logger.info(
"Kedro is sending anonymous usage data with the sole purpose of improving the product. "
"No personal data or IP addresses are stored on our side. "
"If you want to opt out, set the `KEDRO_DISABLE_TELEMETRY` or `DO_NOT_TRACK` environment variables, "
"or create a `.telemetry` file in the current working directory with the contents `consent: false`. "
"Read more at https://docs.kedro.org/en/stable/configuration/telemetry.html"
)

try:
_send_heap_event(
event_name=event_name,
identity=self.user_uuid,
properties=self.event_properties,
)
self._sent = True
except Exception as exc:
logger.warning(
"Something went wrong in hook implementation to send command run data to Heap. "
"Exception: %s",
exc,
)


def _is_known_ci_env(known_ci_env_var_keys: set[str]):
# Most CI tools will set the CI environment variable to true
Expand Down Expand Up @@ -281,33 +296,20 @@ def _get_project_properties(user_uuid: str, pyproject_path: Path) -> dict:
return properties


def _format_user_cli_data(
properties: dict,
command_args: list[str],
):
"""Add format CLI command data to send to Heap."""
cli_properties = properties.copy()
cli_properties["command"] = (
f"kedro {' '.join(command_args)}" if command_args else "kedro"
)
return cli_properties


def _format_project_statistics_data(
properties: dict,
catalog: DataCatalog,
default_pipeline: Pipeline,
project_pipelines: dict,
):
"""Add project statistics to send to Heap."""
project_statistics_properties = properties.copy()
project_statistics_properties = {}
project_statistics_properties["number_of_datasets"] = sum(
1
for c in catalog.list()
if not c.startswith("parameters") and not c.startswith("params:")
)
project_statistics_properties["number_of_nodes"] = (
len(default_pipeline.nodes) if default_pipeline else None
len(default_pipeline.nodes) if default_pipeline else None # type: ignore
)
project_statistics_properties["number_of_pipelines"] = len(project_pipelines.keys())
return project_statistics_properties
Expand Down Expand Up @@ -375,5 +377,4 @@ def _is_valid_syntax(telemetry: Any) -> bool:
)


cli_hooks = KedroTelemetryCLIHooks()
project_hooks = KedroTelemetryProjectHooks()
telemetry_hook = KedroTelemetryHook()
4 changes: 2 additions & 2 deletions kedro-telemetry/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ test = [
]

[project.entry-points."kedro.cli_hooks"]
kedro-telemetry = "kedro_telemetry.plugin:cli_hooks"
kedro-telemetry = "kedro_telemetry.plugin:telemetry_hook"

[project.entry-points."kedro.hooks"]
kedro-telemetry = "kedro_telemetry.plugin:project_hooks"
kedro-telemetry = "kedro_telemetry.plugin:telemetry_hook"
merelcht marked this conversation as resolved.
Show resolved Hide resolved

[tool.setuptools]
include-package-data = true
Expand Down
Loading