From 8d117329563a0ae25fbd3a4f250d0732811a8086 Mon Sep 17 00:00:00 2001 From: David Cain Date: Mon, 24 Jun 2024 09:08:41 -0600 Subject: [PATCH] Fix a bug from trying to scrape all trips I found a scraper (hello, Scrapy user!) trying to scrape all trips from https://mitoc-trips.mit.edu/trips/?after=0001-10-17 The server crashed because I mistakenly assumed you can subtract 365 days from any given year. Let's not do that. Separately, with *thousands* of trips, this view is getting pretty slow. To monitor (and perhaps limit) scrapers, I might try to make the view a bit more locked down. I don't *want* to paginate, but I don't want abuse either. --- ws/tests/views/test_trips.py | 11 +++++++++++ ws/views/trips.py | 10 ++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/ws/tests/views/test_trips.py b/ws/tests/views/test_trips.py index 428076b0..66aac398 100644 --- a/ws/tests/views/test_trips.py +++ b/ws/tests/views/test_trips.py @@ -126,6 +126,17 @@ def test_trips_with_filter(self): self._expect_past_trips(response, [one_week_ago.pk, one_month_ago.pk]) self._expect_link_for_date(soup, "2016-11-15") + def test_trips_with_very_early_date(self): + """You can ask for trips starting after the year 1.""" + trip = factories.TripFactory.create(trip_date="2016-12-23") + + # Filter based on a date in the past + response, soup = self._get("/trips/?after=0001-10-17") + self.assertFalse(response.context["date_invalid"]) + + self._expect_title(soup, "Trips after 1900-01-01") + self._expect_past_trips(response, [trip.pk]) + def test_upcoming_trips_can_be_filtered(self): """If supplying an 'after' date in the future, that still permits filtering!""" _next_week = factories.TripFactory.create(trip_date="2019-02-22") diff --git a/ws/views/trips.py b/ws/views/trips.py index 1669a93a..2a65ae2b 100644 --- a/ws/views/trips.py +++ b/ws/views/trips.py @@ -487,7 +487,7 @@ def get_queryset(self): trips = super().get_queryset() return annotated_for_trip_list(trips) - def _optionally_filter_from_args(self): + def _optionally_filter_from_args(self) -> tuple[date, bool]: """Return the date at which we want to omit previous trips, plus validity boolean. If the user passes an invalid date (for example, because they were @@ -505,7 +505,13 @@ def _optionally_filter_from_args(self): else: start_date_invalid = False - return (start_date, start_date_invalid) + return ( + # Guard against a weird edge case -- we can accept the year 1! + # We'll try to *subtract* time, and end up an exception rather than BCE datetimes + # There were no trips in our database before the 2010s anyway. + start_date and max(start_date, date(1900, 1, 1)), + start_date_invalid, + ) def get_context_data(self, **kwargs): """Sort trips into past and present trips."""