Skip to content

Commit e5b80d6

Browse files
authored
Use content-length header in ASGI instead of reading request body (#1646, #1631, #1595, #1573)
* Do not read request body to determine content length. * Made AnnotatedValue understandable
1 parent 067d80c commit e5b80d6

File tree

11 files changed

+87
-80
lines changed

11 files changed

+87
-80
lines changed

sentry_sdk/integrations/_wsgi_common.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -64,19 +64,13 @@ def extract_into_event(self, event):
6464
request_info["cookies"] = dict(self.cookies())
6565

6666
if not request_body_within_bounds(client, content_length):
67-
data = AnnotatedValue(
68-
"",
69-
{"rem": [["!config", "x", 0, content_length]], "len": content_length},
70-
)
67+
data = AnnotatedValue.removed_because_over_size_limit()
7168
else:
7269
parsed_body = self.parsed_body()
7370
if parsed_body is not None:
7471
data = parsed_body
7572
elif self.raw_data():
76-
data = AnnotatedValue(
77-
"",
78-
{"rem": [["!raw", "x", 0, content_length]], "len": content_length},
79-
)
73+
data = AnnotatedValue.removed_because_raw_data()
8074
else:
8175
data = None
8276

@@ -110,11 +104,8 @@ def parsed_body(self):
110104
files = self.files()
111105
if form or files:
112106
data = dict(iteritems(form))
113-
for k, v in iteritems(files):
114-
size = self.size_of_file(v)
115-
data[k] = AnnotatedValue(
116-
"", {"len": size, "rem": [["!raw", "x", 0, size]]}
117-
)
107+
for key, _ in iteritems(files):
108+
data[key] = AnnotatedValue.removed_because_raw_data()
118109

119110
return data
120111

@@ -175,7 +166,7 @@ def _filter_headers(headers):
175166
k: (
176167
v
177168
if k.upper().replace("-", "_") not in SENSITIVE_HEADERS
178-
else AnnotatedValue("", {"rem": [["!config", "x", 0, len(v)]]})
169+
else AnnotatedValue.removed_because_over_size_limit()
179170
)
180171
for k, v in iteritems(headers)
181172
}

sentry_sdk/integrations/aiohttp.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,8 @@ def get_aiohttp_request_data(hub, request):
218218
if bytes_body is not None:
219219
# we have body to show
220220
if not request_body_within_bounds(hub.client, len(bytes_body)):
221+
return AnnotatedValue.removed_because_over_size_limit()
221222

222-
return AnnotatedValue(
223-
"",
224-
{"rem": [["!config", "x", 0, len(bytes_body)]], "len": len(bytes_body)},
225-
)
226223
encoding = request.charset or "utf-8"
227224
return bytes_body.decode(encoding, "replace")
228225

sentry_sdk/integrations/aws_lambda.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,7 +377,7 @@ def event_processor(sentry_event, hint, start_time=start_time):
377377
if aws_event.get("body", None):
378378
# Unfortunately couldn't find a way to get structured body from AWS
379379
# event. Meaning every body is unstructured to us.
380-
request["data"] = AnnotatedValue("", {"rem": [["!raw", "x", 0, 0]]})
380+
request["data"] = AnnotatedValue.removed_because_raw_data()
381381

382382
sentry_event["request"] = request
383383

sentry_sdk/integrations/gcp.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ def event_processor(event, hint):
190190
if hasattr(gcp_event, "data"):
191191
# Unfortunately couldn't find a way to get structured body from GCP
192192
# event. Meaning every body is unstructured to us.
193-
request["data"] = AnnotatedValue("", {"rem": [["!raw", "x", 0, 0]]})
193+
request["data"] = AnnotatedValue.removed_because_raw_data()
194194

195195
event["request"] = request
196196

sentry_sdk/integrations/starlette.py

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -438,49 +438,40 @@ async def extract_request_info(self):
438438
if client is None:
439439
return None
440440

441-
data = None # type: Union[Dict[str, Any], AnnotatedValue, None]
442-
443-
content_length = await self.content_length()
444441
request_info = {} # type: Dict[str, Any]
445442

446443
with capture_internal_exceptions():
447444
if _should_send_default_pii():
448445
request_info["cookies"] = self.cookies()
449446

450-
if not request_body_within_bounds(client, content_length):
451-
data = AnnotatedValue(
452-
"",
453-
{
454-
"rem": [["!config", "x", 0, content_length]],
455-
"len": content_length,
456-
},
457-
)
458-
else:
459-
parsed_body = await self.parsed_body()
460-
if parsed_body is not None:
461-
data = parsed_body
462-
elif await self.raw_data():
463-
data = AnnotatedValue(
464-
"",
465-
{
466-
"rem": [["!raw", "x", 0, content_length]],
467-
"len": content_length,
468-
},
469-
)
447+
content_length = await self.content_length()
448+
449+
if content_length:
450+
data = None # type: Union[Dict[str, Any], AnnotatedValue, None]
451+
452+
if not request_body_within_bounds(client, content_length):
453+
data = AnnotatedValue.removed_because_over_size_limit()
454+
470455
else:
471-
data = None
456+
parsed_body = await self.parsed_body()
457+
if parsed_body is not None:
458+
data = parsed_body
459+
elif await self.raw_data():
460+
data = AnnotatedValue.removed_because_raw_data()
461+
else:
462+
data = None
472463

473-
if data is not None:
474-
request_info["data"] = data
464+
if data is not None:
465+
request_info["data"] = data
475466

476467
return request_info
477468

478469
async def content_length(self):
479-
# type: (StarletteRequestExtractor) -> int
480-
raw_data = await self.raw_data()
481-
if raw_data is None:
482-
return 0
483-
return len(raw_data)
470+
# type: (StarletteRequestExtractor) -> Optional[int]
471+
if "content-length" in self.request.headers:
472+
return int(self.request.headers["content-length"])
473+
474+
return None
484475

485476
def cookies(self):
486477
# type: (StarletteRequestExtractor) -> Dict[str, Any]
@@ -525,10 +516,7 @@ async def parsed_body(self):
525516
data = {}
526517
for key, val in iteritems(form):
527518
if isinstance(val, UploadFile):
528-
size = len(await val.read())
529-
data[key] = AnnotatedValue(
530-
"", {"len": size, "rem": [["!raw", "x", 0, size]]}
531-
)
519+
data[key] = AnnotatedValue.removed_because_raw_data()
532520
else:
533521
data[key] = val
534522

sentry_sdk/utils.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -283,13 +283,52 @@ def to_header(self):
283283

284284

285285
class AnnotatedValue(object):
286+
"""
287+
Meta information for a data field in the event payload.
288+
This is to tell Relay that we have tampered with the fields value.
289+
See:
290+
https://github.com/getsentry/relay/blob/be12cd49a0f06ea932ed9b9f93a655de5d6ad6d1/relay-general/src/types/meta.rs#L407-L423
291+
"""
292+
286293
__slots__ = ("value", "metadata")
287294

288295
def __init__(self, value, metadata):
289296
# type: (Optional[Any], Dict[str, Any]) -> None
290297
self.value = value
291298
self.metadata = metadata
292299

300+
@classmethod
301+
def removed_because_raw_data(cls):
302+
# type: () -> AnnotatedValue
303+
"""The value was removed because it could not be parsed. This is done for request body values that are not json nor a form."""
304+
return AnnotatedValue(
305+
value="",
306+
metadata={
307+
"rem": [ # Remark
308+
[
309+
"!raw", # Unparsable raw data
310+
"x", # The fields original value was removed
311+
]
312+
]
313+
},
314+
)
315+
316+
@classmethod
317+
def removed_because_over_size_limit(cls):
318+
# type: () -> AnnotatedValue
319+
"""The actual value was removed because the size of the field exceeded the configured maximum size (specified with the request_bodies sdk option)"""
320+
return AnnotatedValue(
321+
value="",
322+
metadata={
323+
"rem": [ # Remark
324+
[
325+
"!config", # Because of configured maximum size
326+
"x", # The fields original value was removed
327+
]
328+
]
329+
},
330+
)
331+
293332

294333
if MYPY:
295334
from typing import TypeVar

tests/integrations/bottle/test_bottle.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -234,9 +234,7 @@ def index():
234234
assert response[1] == "200 OK"
235235

236236
(event,) = events
237-
assert event["_meta"]["request"]["data"] == {
238-
"": {"len": 2000, "rem": [["!config", "x", 0, 2000]]}
239-
}
237+
assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}}
240238
assert not event["request"]["data"]
241239

242240

@@ -271,9 +269,8 @@ def index():
271269

272270
assert event["_meta"]["request"]["data"]["file"] == {
273271
"": {
274-
"len": -1,
275-
"rem": [["!raw", "x", 0, -1]],
276-
} # bottle default content-length is -1
272+
"rem": [["!raw", "x"]],
273+
}
277274
}
278275
assert not event["request"]["data"]["file"]
279276

tests/integrations/django/test_basic.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -520,8 +520,7 @@ def test_request_body(sentry_init, client, capture_events):
520520
assert event["message"] == "hi"
521521
assert event["request"]["data"] == ""
522522
assert event["_meta"]["request"]["data"][""] == {
523-
"len": 6,
524-
"rem": [["!raw", "x", 0, 6]],
523+
"rem": [["!raw", "x"]],
525524
}
526525

527526
del events[:]

tests/integrations/flask/test_flask.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -414,9 +414,7 @@ def index():
414414
assert response.status_code == 200
415415

416416
(event,) = events
417-
assert event["_meta"]["request"]["data"] == {
418-
"": {"len": 2000, "rem": [["!config", "x", 0, 2000]]}
419-
}
417+
assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}}
420418
assert not event["request"]["data"]
421419

422420

@@ -445,9 +443,7 @@ def index():
445443
}
446444
assert len(event["request"]["data"]["foo"]) == 1024
447445

448-
assert event["_meta"]["request"]["data"]["file"] == {
449-
"": {"len": 0, "rem": [["!raw", "x", 0, 0]]}
450-
}
446+
assert event["_meta"]["request"]["data"]["file"] == {"": {"rem": [["!raw", "x"]]}}
451447
assert not event["request"]["data"]["file"]
452448

453449

tests/integrations/pyramid/test_pyramid.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,9 +213,7 @@ def index(request):
213213
}
214214
assert len(event["request"]["data"]["foo"]) == 1024
215215

216-
assert event["_meta"]["request"]["data"]["file"] == {
217-
"": {"len": 0, "rem": [["!raw", "x", 0, 0]]}
218-
}
216+
assert event["_meta"]["request"]["data"]["file"] == {"": {"rem": [["!raw", "x"]]}}
219217
assert not event["request"]["data"]["file"]
220218

221219

tests/integrations/starlette/test_starlette.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,7 @@
5656
PARSED_BODY = {
5757
"username": "Jane",
5858
"password": "hello123",
59-
"photo": AnnotatedValue(
60-
"", {"len": 28023, "rem": [["!raw", "x", 0, 28023]]}
61-
), # size of photo.jpg read above
59+
"photo": AnnotatedValue("", {"rem": [["!raw", "x"]]}),
6260
}
6361

6462
# Dummy ASGI scope for creating mock Starlette requests
@@ -160,7 +158,11 @@ async def test_starlettrequestextractor_content_length(sentry_init):
160158
"starlette.requests.Request.stream",
161159
return_value=AsyncIterator(json.dumps(BODY_JSON)),
162160
):
163-
starlette_request = starlette.requests.Request(SCOPE)
161+
scope = SCOPE.copy()
162+
scope["headers"] = [
163+
[b"content-length", str(len(json.dumps(BODY_JSON))).encode()],
164+
]
165+
starlette_request = starlette.requests.Request(scope)
164166
extractor = StarletteRequestExtractor(starlette_request)
165167

166168
assert await extractor.content_length() == len(json.dumps(BODY_JSON))
@@ -266,6 +268,7 @@ async def test_starlettrequestextractor_extract_request_info_too_big(sentry_init
266268
scope = SCOPE.copy()
267269
scope["headers"] = [
268270
[b"content-type", b"multipart/form-data; boundary=fd721ef49ea403a6"],
271+
[b"content-length", str(len(BODY_FORM)).encode()],
269272
[b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"],
270273
]
271274
with mock.patch(
@@ -283,10 +286,7 @@ async def test_starlettrequestextractor_extract_request_info_too_big(sentry_init
283286
"yummy_cookie": "choco",
284287
}
285288
# Because request is too big only the AnnotatedValue is extracted.
286-
assert request_info["data"].metadata == {
287-
"rem": [["!config", "x", 0, 28355]],
288-
"len": 28355,
289-
}
289+
assert request_info["data"].metadata == {"rem": [["!config", "x"]]}
290290

291291

292292
@pytest.mark.asyncio
@@ -298,6 +298,7 @@ async def test_starlettrequestextractor_extract_request_info(sentry_init):
298298
scope = SCOPE.copy()
299299
scope["headers"] = [
300300
[b"content-type", b"application/json"],
301+
[b"content-length", str(len(json.dumps(BODY_JSON))).encode()],
301302
[b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"],
302303
]
303304

@@ -327,6 +328,7 @@ async def test_starlettrequestextractor_extract_request_info_no_pii(sentry_init)
327328
scope = SCOPE.copy()
328329
scope["headers"] = [
329330
[b"content-type", b"application/json"],
331+
[b"content-length", str(len(json.dumps(BODY_JSON))).encode()],
330332
[b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"],
331333
]
332334

0 commit comments

Comments
 (0)