Skip to content

chore(seer grouping): Add debug logging for old excess frames check #83082

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,8 +200,21 @@ def _circuit_breaker_broken(event: Event, project: Project) -> bool:


def _has_empty_stacktrace_string(event: Event, variants: dict[str, BaseVariant]) -> bool:
# For purposes of a temporary debug log - will be removed as soon as the mysterious behavior
# is sorted
logger_extra = {
"event_id": event.event_id,
"project_id": event.project_id,
"platform": event.platform,
"has_too_many_frames_result": has_too_many_contributing_frames(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just checking that this isn't expensive to run here? I assume this log happens only on group creation

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It runs on every event, but no, it's not expensive - all it does is check a few different pre-computed values (and collect metrics, if record_metrics is true, but in this case it's false, so that's a moot point).

def has_too_many_contributing_frames(
event: Event | GroupEvent,
variants: dict[str, BaseVariant],
referrer: ReferrerOptions,
record_metrics: bool = True,
) -> bool:
platform = event.platform
shared_tags = {"referrer": referrer.value, "platform": platform}
contributing_variant, contributing_component = get_contributing_variant_and_component(variants)
# Ideally we're calling this function after we already know the event both has a stacktrace and
# is using it for grouping (in which case none of the below conditions should apply), but still
# worth checking that we have enough information to answer the question just in case
if (
# Fingerprint, checksum, fallback variants
not isinstance(contributing_variant, ComponentVariant)
# Security violations, log-message-based grouping
or contributing_variant.variant_name == "default"
# Any ComponentVariant will have this, but this reassures mypy
or not contributing_component
# Exception-message-based grouping
or not hasattr(contributing_component, "frame_counts")
):
# We don't bother to collect a metric on this outcome, because we shouldn't have called the
# function in the first place
return False
# Certain platforms were backfilled before we added this filter, so to keep new events matching
# with the existing data, we turn off the filter for them (instead their stacktraces will be
# truncated)
if platform in EVENT_PLATFORMS_BYPASSING_FRAME_COUNT_CHECK:
if record_metrics:
metrics.incr(
"grouping.similarity.frame_count_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "bypass"},
)
return False
stacktrace_type = "in_app" if contributing_variant.variant_name == "app" else "system"
key = f"{stacktrace_type}_contributing_frames"
shared_tags["stacktrace_type"] = stacktrace_type
if contributing_component.frame_counts[key] > MAX_FRAME_COUNT:
if record_metrics:
metrics.incr(
"grouping.similarity.frame_count_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "block"},
)
return True
if record_metrics:
metrics.incr(
"grouping.similarity.frame_count_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "pass"},
)
return False

If you still think it's an issue, I can move the call to where the log is, which would drastically cut down the number of times that it runs. I originally had it that way, but in the end, especially since it's hopefully only going to be in place for a day or two while I debug this, it just seemed easier to pass one metadata blob rather than all the stuff needed to compute said blob's values. LMK if you want me to change it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that as long as it won't cause a performance issue in prod I'm fine with it as is

event, variants, ReferrerOptions.INGEST, record_metrics=False
),
}
stacktrace_string = get_stacktrace_string_with_metrics(
get_grouping_info_from_variants(variants), event.platform, ReferrerOptions.INGEST
get_grouping_info_from_variants(variants),
event.platform,
ReferrerOptions.INGEST,
logger_extra=logger_extra,
)
if not stacktrace_string:
if stacktrace_string == "":
Expand Down
44 changes: 30 additions & 14 deletions src/sentry/seer/similarity/utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
import traceback
from collections.abc import Mapping, Sequence
from enum import StrEnum
from typing import Any, TypedDict, TypeVar
Expand Down Expand Up @@ -269,7 +270,10 @@ def is_base64_encoded_frame(frame_dict: Mapping[str, Any]) -> bool:


def get_stacktrace_string_with_metrics(
data: dict[str, Any], platform: str | None, referrer: ReferrerOptions
data: dict[str, Any],
platform: str | None,
referrer: ReferrerOptions,
logger_extra: dict[str, Any] | None = None,
) -> str | None:
stacktrace_string = None
sample_rate = options.get("seer.similarity.metrics_sample_rate")
Expand All @@ -283,6 +287,14 @@ def get_stacktrace_string_with_metrics(
tags={"platform": platform, "referrer": referrer},
)
if referrer == ReferrerOptions.INGEST:
# Temporary log to debug how we're still landing here, which we shouldn't be anymore
logger_extra = logger_extra or {}
logger_extra.update({"current_stacktrace": "".join(traceback.format_stack())})
logger.info(
"record_did_call_seer_metric.over-threshold-frames",
extra=logger_extra,
)

record_did_call_seer_metric(call_made=False, blocker="over-threshold-frames")
except Exception:
logger.exception("Unexpected exception in stacktrace string formatting")
Expand Down Expand Up @@ -312,6 +324,7 @@ def has_too_many_contributing_frames(
event: Event | GroupEvent,
variants: dict[str, BaseVariant],
referrer: ReferrerOptions,
record_metrics: bool = True,
) -> bool:
platform = event.platform
shared_tags = {"referrer": referrer.value, "platform": platform}
Expand Down Expand Up @@ -339,30 +352,33 @@ def has_too_many_contributing_frames(
# with the existing data, we turn off the filter for them (instead their stacktraces will be
# truncated)
if platform in EVENT_PLATFORMS_BYPASSING_FRAME_COUNT_CHECK:
metrics.incr(
"grouping.similarity.frame_count_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "bypass"},
)
if record_metrics:
metrics.incr(
"grouping.similarity.frame_count_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "bypass"},
)
return False

stacktrace_type = "in_app" if contributing_variant.variant_name == "app" else "system"
key = f"{stacktrace_type}_contributing_frames"
shared_tags["stacktrace_type"] = stacktrace_type

if contributing_component.frame_counts[key] > MAX_FRAME_COUNT:
if record_metrics:
metrics.incr(
"grouping.similarity.frame_count_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "block"},
)
return True

if record_metrics:
metrics.incr(
"grouping.similarity.frame_count_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "block"},
tags={**shared_tags, "outcome": "pass"},
)
return True

metrics.incr(
"grouping.similarity.frame_count_filter",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
tags={**shared_tags, "outcome": "pass"},
)
return False


Expand Down
Loading