diff --git a/migrations_lockfile.txt b/migrations_lockfile.txt index 43fa70b2409c5c..2773ab510295af 100644 --- a/migrations_lockfile.txt +++ b/migrations_lockfile.txt @@ -15,7 +15,7 @@ remote_subscriptions: 0003_drop_remote_subscription replays: 0004_index_together -sentry: 0807_remove_monitor_attachment_id_pt2 +sentry: 0808_change_grouphash_metadata_seer_matched_grouphash_deletion_config social_auth: 0002_default_auto_field diff --git a/src/sentry/migrations/0808_change_grouphash_metadata_seer_matched_grouphash_deletion_config.py b/src/sentry/migrations/0808_change_grouphash_metadata_seer_matched_grouphash_deletion_config.py new file mode 100644 index 00000000000000..fc54cf9e0f7fed --- /dev/null +++ b/src/sentry/migrations/0808_change_grouphash_metadata_seer_matched_grouphash_deletion_config.py @@ -0,0 +1,40 @@ +# Generated by Django 5.1.4 on 2025-01-08 22:51 + +import django.db.models.deletion +from django.db import migrations + +import sentry.db.models.fields.foreignkey +from sentry.new_migrations.migrations import CheckedMigration + + +class Migration(CheckedMigration): + # This flag is used to mark that a migration shouldn't be automatically run in production. + # This should only be used for operations where it's safe to run the migration after your + # code has deployed. So this should not be used for most operations that alter the schema + # of a table. + # Here are some things that make sense to mark as post deployment: + # - Large data migrations. Typically we want these to be run manually so that they can be + # monitored and not block the deploy for a long period of time while they run. + # - Adding indexes to large tables. Since this can take a long time, we'd generally prefer to + # run this outside deployments so that we don't block them. Note that while adding an index + # is a schema change, it's completely safe to run the operation after the code has deployed. + # Once deployed, run these manually via: https://develop.sentry.dev/database-migrations/#migration-deployment + + is_post_deployment = False + + dependencies = [ + ("sentry", "0807_remove_monitor_attachment_id_pt2"), + ] + + operations = [ + migrations.AlterField( + model_name="grouphashmetadata", + name="seer_matched_grouphash", + field=sentry.db.models.fields.foreignkey.FlexibleForeignKey( + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="seer_matchees", + to="sentry.grouphash", + ), + ), + ] diff --git a/src/sentry/models/grouphashmetadata.py b/src/sentry/models/grouphashmetadata.py index f8ad100527d748..c51e03bbd4c15a 100644 --- a/src/sentry/models/grouphashmetadata.py +++ b/src/sentry/models/grouphashmetadata.py @@ -78,7 +78,7 @@ class GroupHashMetadata(Model): seer_model = models.CharField(null=True) # The `GroupHash` record representing the match Seer sent back as a match (if any) seer_matched_grouphash = FlexibleForeignKey( - "sentry.GroupHash", related_name="seer_matchees", on_delete=models.DO_NOTHING, null=True + "sentry.GroupHash", related_name="seer_matchees", on_delete=models.SET_NULL, null=True ) # The similarity between this hash's stacktrace and the parent (matched) hash's stacktrace seer_match_distance = models.FloatField(null=True) diff --git a/tests/sentry/deletions/test_grouphash.py b/tests/sentry/deletions/test_grouphash.py new file mode 100644 index 00000000000000..b46659b552912f --- /dev/null +++ b/tests/sentry/deletions/test_grouphash.py @@ -0,0 +1,166 @@ +from dataclasses import asdict +from unittest.mock import patch + +from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION +from sentry.deletions.tasks.groups import delete_groups +from sentry.models.group import Group +from sentry.models.grouphash import GroupHash +from sentry.models.grouphashmetadata import GroupHashMetadata +from sentry.seer.similarity.types import SeerSimilarIssueData +from sentry.tasks.unmerge import unmerge +from sentry.testutils.cases import TestCase +from sentry.testutils.helpers.features import apply_feature_flag_on_cls +from sentry.testutils.skips import requires_snuba + +pytestmark = [requires_snuba] + + +@apply_feature_flag_on_cls("organizations:grouphash-metadata-creation") +class DeleteGroupHashTest(TestCase): + def test_deleting_group_deletes_grouphash_and_metadata(self): + event = self.store_event(data={"message": "Dogs are great!"}, project_id=self.project.id) + assert event.group + group_id = event.group.id + + grouphash = GroupHash.objects.filter(group_id=group_id).first() + assert grouphash + + grouphash_metadata = GroupHashMetadata.objects.filter(grouphash_id=grouphash.id).first() + assert grouphash_metadata + + with self.tasks(): + delete_groups(object_ids=[group_id]) + + assert not Group.objects.filter(id=group_id).exists() + assert not GroupHash.objects.filter(group_id=group_id).exists() + assert not GroupHashMetadata.objects.filter(grouphash_id=grouphash.id).exists() + + def test_deleting_grouphash_matched_by_seer(self): + existing_event = self.store_event( + data={"message": "Dogs are great!"}, project_id=self.project.id + ) + assert existing_event.group + existing_group_id = existing_event.group.id + + existing_grouphash = GroupHash.objects.filter(group_id=existing_group_id).first() + assert existing_grouphash + + seer_result_data = SeerSimilarIssueData( + parent_hash=existing_event.get_primary_hash(), + parent_group_id=existing_group_id, + stacktrace_distance=0.01, + should_group=True, + ) + + with ( + patch("sentry.grouping.ingest.seer.should_call_seer_for_grouping", return_value=True), + patch( + "sentry.grouping.ingest.seer.get_seer_similar_issues", + return_value=( + { + "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION, + "results": [asdict(seer_result_data)], + }, + existing_grouphash, + ), + ), + ): + new_event = self.store_event( + data={"message": "Adopt, don't shop"}, project_id=self.project.id + ) + new_grouphash = GroupHash.objects.filter(hash=new_event.get_primary_hash()).first() + assert new_grouphash and new_grouphash.metadata + + assert new_grouphash != existing_grouphash + assert new_event.group_id == existing_group_id + assert new_grouphash.metadata.seer_matched_grouphash == existing_grouphash + + with self.tasks(): + delete_groups(object_ids=[existing_group_id]) + + assert not Group.objects.filter(id=existing_group_id).exists() + assert not GroupHash.objects.filter(group_id=existing_group_id).exists() + assert not GroupHashMetadata.objects.filter(grouphash_id=existing_grouphash.id).exists() + assert not GroupHashMetadata.objects.filter(grouphash_id=new_grouphash.id).exists() + + def test_deleting_grouphash_matched_by_seer_after_unmerge(self): + """ + Ensure that `seer_matched_grouphash` references aren't left dangling (and causing integrity + errors) when the matched grouphash is deleted. + """ + existing_event = self.store_event( + data={"message": "Dogs are great!"}, project_id=self.project.id + ) + assert existing_event.group + + existing_grouphash = GroupHash.objects.filter( + hash=existing_event.get_primary_hash() + ).first() + assert existing_grouphash + + seer_result_data = SeerSimilarIssueData( + parent_hash=existing_event.get_primary_hash(), + parent_group_id=existing_event.group.id, + stacktrace_distance=0.01, + should_group=True, + ) + + with ( + patch("sentry.grouping.ingest.seer.should_call_seer_for_grouping", return_value=True), + patch( + "sentry.grouping.ingest.seer.get_seer_similar_issues", + return_value=( + { + "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION, + "results": [asdict(seer_result_data)], + }, + existing_grouphash, + ), + ), + ): + new_event = self.store_event( + data={"message": "Adopt, don't shop"}, project_id=self.project.id + ) + new_grouphash = GroupHash.objects.filter(hash=new_event.get_primary_hash()).first() + assert new_grouphash and new_grouphash.metadata + + assert new_grouphash != existing_grouphash + assert new_event.group_id == existing_event.group.id + assert new_grouphash.metadata.seer_matched_grouphash == existing_grouphash + + with self.tasks(): + unmerge.delay( + self.project.id, + existing_event.group.id, + None, + [new_grouphash.hash], + None, + ) + + # Pull the grouphashes from the DB again to check updated values + existing_grouphash = GroupHash.objects.filter( + hash=existing_event.get_primary_hash() + ).first() + new_grouphash = GroupHash.objects.filter(hash=new_event.get_primary_hash()).first() + assert existing_grouphash and new_grouphash + + # The grouphashes now point to different groups, but the `seer_matched_grouphash` + # link remains + assert existing_grouphash.group_id != new_grouphash.group_id + assert ( + new_grouphash.metadata + and new_grouphash.metadata.seer_matched_grouphash == existing_grouphash + ) + + with self.tasks(): + delete_groups(object_ids=[existing_event.group.id]) + + assert not Group.objects.filter(id=existing_event.group.id).exists() + assert not GroupHash.objects.filter(group_id=existing_event.group.id).exists() + assert not GroupHashMetadata.objects.filter(grouphash_id=existing_grouphash.id).exists() + + # The unmerged grouphash and its metadata remain, but the `seer_matched_grouphash` link has + # been broken so as not to cause integrity errors + new_grouphash = GroupHash.objects.filter(hash=new_event.get_primary_hash()).first() + assert new_grouphash and new_grouphash.metadata + assert new_grouphash.metadata.seer_matched_grouphash is None