Skip to content

Commit

Permalink
Add a more explicit input_spec validation schema, with Cerberus
Browse files Browse the repository at this point in the history
  • Loading branch information
MonsieurV committed Dec 7, 2022
1 parent 84ffb3f commit 4908c82
Show file tree
Hide file tree
Showing 6 changed files with 348 additions and 224 deletions.
1 change: 1 addition & 0 deletions Pipfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ flask-cors = "*"
importlib-metadata = "*"
hyrule = "*"
sentry-sdk = {extras = ["flask"], version = "*"}
cerberus = "*"

[requires]
python_version = "3"
454 changes: 240 additions & 214 deletions Pipfile.lock

Large diffs are not rendered by default.

82 changes: 72 additions & 10 deletions latexonhttp/api/builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
"""
import logging
import pprint
import json
import glom
import cerberus
from flask import Blueprint, request, jsonify, Response
from latexonhttp.compiler import (
latexToPdf,
Expand Down Expand Up @@ -68,6 +70,54 @@
# signature of compilation spec -> in cache? -> directly return.


class JSONInputSpecEncoderForDebug(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, bytes):
return "<binary-content>"
return json.JSONEncoder.default(self, obj)


input_spec_schema = {
"compiler": {"type": "string", "allowed": AVAILABLE_LATEX_COMPILERS},
"resources": {
"type": "list",
"required": True,
"schema": {
"type": "dict",
# For now, we just check the keys.
"keysrules": {
"type": "string",
"allowed": [
"url",
"file",
"git",
"tar",
"cache",
"content",
"main",
"path",
],
},
},
},
"options": {
"type": "dict",
"schema": {
"bibliography": {
"type": "dict",
"schema": {
"command": {
"type": "string",
"allowed": AVAILABLE_BIBLIOGRAPHY_COMMANDS,
}
},
}
},
},
}
input_spec_validator = cerberus.Validator(input_spec_schema)


@builds_app.route("/sync", methods=["GET", "POST"])
def compiler_latex():
input_spec = None
Expand All @@ -76,8 +126,10 @@ def compiler_latex():
# for eg. using GET/param to specify the compiler
# with a POST/json payload (POST:/builds/sync?compiler=xelatex)

input_spec_mode = None
# Support for GET querystring requests.
if request.method == "GET":
input_spec_mode = "querystring"
logger.info(pprint.pformat(request.args.to_dict(False)))
input_spec, error = parse_querystring_resources_spec(
request.args.to_dict(True), request.args.to_dict(False)
Expand All @@ -87,6 +139,7 @@ def compiler_latex():

# Support for multipart/form-data requests.
if request.content_type and "multipart/form-data" in request.content_type:
input_spec_mode = "multipart/form-data"
logger.info(request.content_type)
logger.info(pprint.pformat(request.files))
logger.info(pprint.pformat(request.form))
Expand All @@ -95,6 +148,7 @@ def compiler_latex():
return jsonify(error), 400

if not input_spec:
input_spec_mode = "json"
input_spec, error = parse_json_resources_spec(request.get_json())
if error:
return jsonify(error), 400
Expand All @@ -103,19 +157,27 @@ def compiler_latex():
return jsonify({"error": "MISSING_COMPILATION_SPECIFICATION"}), 400

# Payload validations.
# TODO Use a data validation library tu run checks?
# (Write one in Hy?)
if "resources" in input_spec:
if not isinstance(input_spec, list):
return (
jsonify(
logger.info(request.content_type)
logger.info(pprint.pformat(request.files))
logger.info(pprint.pformat(request.form))
logger.info(input_spec)

if not input_spec_validator.validate(input_spec):
return (
Response(
json.dumps(
{
"error": "INVALID_PAYLOAD_SHAPE",
"message": "resources must be a list",
}
"shape_errors": input_spec_validator.errors,
"input_spec_mode": input_spec_mode,
"input_spec": input_spec,
},
cls=JSONInputSpecEncoderForDebug,
),
400,
)
content_type="application/json",
),
400,
)

# High-level normalizsation.
logger.info(
Expand Down
3 changes: 3 additions & 0 deletions latexonhttp/resources/multipart_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ def parse_multipart_resources_spec(forms, files):
multipart_file = files[resource["multipart"]]
# We uses base64 for encoding file content.
resource["file"] = base64.b64encode(multipart_file.read())
# We can delete the "multipart" entry in the spec,
# to keep it normalized.
del resource["multipart"]
if "path" not in resource:
resource["path"] = multipart_file.filename
return json_spec, None
29 changes: 29 additions & 0 deletions tests/test_api_input_spec.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
"""
tests.test_inpuc_spec
~~~~~~~~~~~~~~~~~~~~~
Tests on LaTeX-on-HTTP input spec.
:copyright: (c) 2022 Yoan Tournade.
:license: AGPL, see LICENSE for more details.
"""
import pytest
import json
import requests


def test_input_spec_must_include_a_list_resources(latex_on_http_api_url):
"""
Compile a Latex document with a bibliography with biblatex.
"""
r = requests.post(
latex_on_http_api_url + "/builds/sync",
json={"resources": '{"main": true, "content": ""}'},
)
assert r.status_code == 400
assert r.json() == {
"error": "INVALID_PAYLOAD_SHAPE",
"shape_errors": {"resources": ["must be of list type"]},
"input_spec_mode": "json",
"input_spec": {"resources": '{"main": true, "content": ""}'},
}
3 changes: 3 additions & 0 deletions tests/test_api_multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ def test_multipart_api_full_spec_simple(latex_on_http_api_url):
"resources": '[{"main": "true", "multipart": "file1"}]',
}
r = requests.post(latex_on_http_api_url + "/builds/sync", files=files, data=form)
# import pprint

# pprint.pprint(r.json())
assert r.status_code == 201
snapshot_pdf(r.content, SAMPLE_HELLO_WORLD)

Expand Down

0 comments on commit 4908c82

Please sign in to comment.