Skip to content

Commit

Permalink
Fix a bug from trying to scrape all trips
Browse files Browse the repository at this point in the history
I found a scraper (hello, Scrapy user!) trying to scrape all trips from
https://mitoc-trips.mit.edu/trips/?after=0001-10-17

The server crashed because I mistakenly assumed you can subtract 365
days from any given year. Let's not do that.

Separately, with *thousands* of trips, this view is getting pretty slow.
To monitor (and perhaps limit) scrapers, I might try to make the view a
bit more locked down. I don't *want* to paginate, but I don't want abuse
either.
  • Loading branch information
DavidCain committed Jun 24, 2024
1 parent 8e62018 commit 8d11732
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
11 changes: 11 additions & 0 deletions ws/tests/views/test_trips.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,17 @@ def test_trips_with_filter(self):
self._expect_past_trips(response, [one_week_ago.pk, one_month_ago.pk])
self._expect_link_for_date(soup, "2016-11-15")

def test_trips_with_very_early_date(self):
"""You can ask for trips starting after the year 1."""
trip = factories.TripFactory.create(trip_date="2016-12-23")

# Filter based on a date in the past
response, soup = self._get("/trips/?after=0001-10-17")
self.assertFalse(response.context["date_invalid"])

self._expect_title(soup, "Trips after 1900-01-01")
self._expect_past_trips(response, [trip.pk])

def test_upcoming_trips_can_be_filtered(self):
"""If supplying an 'after' date in the future, that still permits filtering!"""
_next_week = factories.TripFactory.create(trip_date="2019-02-22")
Expand Down
10 changes: 8 additions & 2 deletions ws/views/trips.py
Original file line number Diff line number Diff line change
Expand Up @@ -487,7 +487,7 @@ def get_queryset(self):
trips = super().get_queryset()
return annotated_for_trip_list(trips)

def _optionally_filter_from_args(self):
def _optionally_filter_from_args(self) -> tuple[date, bool]:
"""Return the date at which we want to omit previous trips, plus validity boolean.
If the user passes an invalid date (for example, because they were
Expand All @@ -505,7 +505,13 @@ def _optionally_filter_from_args(self):
else:
start_date_invalid = False

return (start_date, start_date_invalid)
return (
# Guard against a weird edge case -- we can accept the year 1!
# We'll try to *subtract* time, and end up an exception rather than BCE datetimes
# There were no trips in our database before the 2010s anyway.
start_date and max(start_date, date(1900, 1, 1)),
start_date_invalid,
)

def get_context_data(self, **kwargs):
"""Sort trips into past and present trips."""
Expand Down

0 comments on commit 8d11732

Please sign in to comment.