Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore(replay): add a generic log sampling filter and sample replay ingest logs #83049

Merged
merged 6 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions src/sentry/logging/handlers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import logging
import random
import re
from typing import Any

Expand Down Expand Up @@ -164,3 +165,18 @@ def emit(self, record, logger=None):
key = metrics_badchars_re.sub("", key)
key = ".".join(key.split(".")[:3])
metrics.incr(key, skip_internal=False)


class SamplingFilter(logging.Filter):
"""
A logging filter to sample logs with a fixed probability.

p -- probability log is emitted. Float in range [0.0, 1.0]
"""

def __init__(self, p: float):
super().__init__()
self.sample_probability = p
aliu39 marked this conversation as resolved.
Show resolved Hide resolved

def filter(self, record: logging.LogRecord) -> bool:
return random.random() < self.sample_probability
2 changes: 2 additions & 0 deletions src/sentry/replays/consumers/recording_buffered.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
from sentry_kafka_schemas.schema_types.ingest_replay_recordings_v1 import ReplayRecording

from sentry.conf.types.kafka_definition import Topic, get_topic_codec
from sentry.logging.handlers import SamplingFilter
from sentry.models.project import Project
from sentry.replays.lib.storage import (
RecordingSegmentStorageMeta,
Expand All @@ -72,6 +73,7 @@
from sentry.utils import json, metrics

logger = logging.getLogger(__name__)
logger.addFilter(SamplingFilter(0.1)) # TODO: MAKE OPTION

RECORDINGS_CODEC: Codec[ReplayRecording] = get_topic_codec(Topic.INGEST_REPLAYS_RECORDINGS)

Expand Down
2 changes: 2 additions & 0 deletions src/sentry/replays/usecases/ingest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

from sentry import options
from sentry.constants import DataCategory
from sentry.logging.handlers import SamplingFilter
from sentry.models.project import Project
from sentry.replays.lib.storage import (
RecordingSegmentStorageMeta,
Expand All @@ -26,6 +27,7 @@
from sentry.utils.outcomes import Outcome, track_outcome

logger = logging.getLogger("sentry.replays")
logger.addFilter(SamplingFilter(0.1)) # TODO: MAKE OPTION

CACHE_TIMEOUT = 3600
COMMIT_FREQUENCY_SEC = 1
Expand Down
Loading