Skip to content

Commit 6bf7e0c

Browse files
committed
Rework schema loading and example validation
Add an OasJson class that can manage a mix of JSON Schema and non-JSON Schema objects, enabled by the patches to jschon previously added through pyproject.toml. Add exceptions to report schema reference resolution problems, some of which are caused by mis-configuration of URLs vs URIs, and some of which are due to loading resources before there is a way to determine if they are schemas or not. This can be fixed with a larger reworking, but for now require a command-line argument to flag stand-along schema documents. Add command-line options: -v for logging verbosity, and -x for managing automatic suffix stripping for URIs vs URLs.
1 parent 14c275a commit 6bf7e0c

File tree

5 files changed

+524
-99
lines changed

5 files changed

+524
-99
lines changed

oascomply/apidescription.py

+206-25
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,14 @@
1919
import yaml_source_map as ymap
2020
from yaml_source_map.errors import InvalidYamlError
2121

22+
from oascomply import schema_catalog
2223
from oascomply.oasgraph import (
2324
OasGraph, OasGraphResult, OUTPUT_FORMATS_LINE, OUTPUT_FORMATS_STRUCTURED,
2425
)
2526
from oascomply.schemaparse import Annotation, SchemaParser
27+
from oascomply.oas30dialect import (
28+
OasJson, OasJsonTypeError, OasJsonRefSuffixError, OAS30_DIALECT_METASCHEMA,
29+
)
2630
import oascomply.resourceid as rid
2731

2832
__all__ = [
@@ -134,6 +138,7 @@ def add_resource(
134138
path: Optional[Path] = None,
135139
url: Optional[str] = None,
136140
sourcemap: Optional[Mapping] = None,
141+
oastype: Optional[str] = None,
137142
) -> None:
138143
"""
139144
Add a resource as part of the API description, and set its URI
@@ -148,14 +153,55 @@ def add_resource(
148153
uri = rid.Iri(uri)
149154
assert uri.fragment is None, "Only complete documenets can be added."
150155

151-
# The jschon.JSON class keeps track of JSON Pointer values for
152-
# every data entry, as well as providing parent links and type
153-
# information.
154-
self._contents[uri] = jschon.JSON(document)
156+
logger.info(f'Adding document "{path}" ...')
157+
logger.info(f'...URL <{url}>')
158+
logger.info(f'...URI <{uri}>')
159+
if oastype and oastype == 'Schema':
160+
logger.info(f'...instantiating JSON Schema <{uri}>')
161+
self._contents[uri] = jschon.JSONSchema(
162+
document,
163+
uri=jschon.URI(str(uri)),
164+
metaschema_uri=jschon.URI(OAS30_DIALECT_METASCHEMA),
165+
)
166+
# assert isinstance(
167+
else:
168+
# The jschon.JSON class keeps track of JSON Pointer values for
169+
# every data entry, as well as providing parent links and type
170+
# information. The OasJson subclass also automatically
171+
# instantiates jschon.JSONSchema classes for Schema Objects
172+
# and (in 3.0) for Reference Objects occupying the place of
173+
# Schema Objects.
174+
logger.info(f'...instantiating OAS Document <{uri}>')
175+
self._contents[uri] = OasJson(
176+
document,
177+
uri=uri,
178+
url=url,
179+
oasversion=self._version[:3],
180+
)
155181
if sourcemap:
156182
self._sources[uri] = sourcemap
157183
self._g.add_resource(url, uri, filename=path.name)
158184

185+
def resolve_references(self):
186+
for document in self._contents.values():
187+
logger.info(
188+
f'Checking JSON Schema references in <{document.uri}>...',
189+
)
190+
if isinstance(document, OasJson):
191+
logger.info(
192+
'...resolving with OasJson.resolve_references()',
193+
)
194+
document.resolve_references()
195+
elif isinstance(document, jschon.JSONSchema):
196+
logger.info(
197+
'...already resolved by jschon.JSONSchema()',
198+
)
199+
else:
200+
logger.warning(
201+
f'Unknown type "{type(document)}" '
202+
f'for document <{document.uri}>',
203+
)
204+
159205
def get_resource(self, uri: Union[str, rid.Iri]) -> Optional[Any]:
160206
if not isinstance(uri, rid.IriWithJsonPtr):
161207
# TODO: IRI vs URI
@@ -172,10 +218,11 @@ def get_resource(self, uri: Union[str, rid.Iri]) -> Optional[Any]:
172218
)
173219
except (KeyError, jschon.JSONPointerError):
174220
logger.warning(f"Could not find resource {uri}")
175-
return None, None, None
221+
raise # return None, None, None
176222

177223
def validate(self, resource_uri=None, oastype='OpenAPI'):
178224
sp = SchemaParser.get_parser({}, annotations=ANNOT_ORDER)
225+
errors = []
179226
if resource_uri is None:
180227
assert oastype == 'OpenAPI'
181228
resource_uri = self._primary_uri
@@ -206,17 +253,21 @@ def validate(self, resource_uri=None, oastype='OpenAPI'):
206253
if annot == 'oasExamples':
207254
# By this point we have set up the necessary reference info
208255
for uri, oastype in to_validate.items():
209-
# TODO: Handle fragments vs whole resources
210256
if uri not in self._validated:
211-
self.validate(uri, oastype)
257+
errors.extend(self.validate(uri, oastype))
212258

213259
method_name = f'add_{annot.lower()}'
214260
method_callable = getattr(self._g, method_name)
215261
for args in by_method[method_name]:
216262
graph_result = method_callable(*args)
263+
for err in graph_result.errors:
264+
errors.append(err)
265+
logger.error(json.dumps(err['error'], indent=2))
217266
for uri, oastype in graph_result.refTargets:
218267
to_validate[uri] = oastype
219268

269+
return errors
270+
220271
def serialize(
221272
self,
222273
*args,
@@ -261,7 +312,7 @@ def serialize(
261312
new_kwargs.update(kwargs)
262313

263314
if destination in (sys.stdout, sys.stderr):
264-
# rdflib serializers write bytes, not str # if destination
315+
# rdflib serializers write bytes, not str if destination
265316
# is not None, which doesn't work with sys.stdout / sys.stderr
266317
destination.flush()
267318
with os.fdopen(
@@ -281,36 +332,78 @@ def serialize(
281332
self._g.serialize(*args, destination=destination, **new_kwargs)
282333

283334
@classmethod
284-
def _process_file_arg(cls, filearg, prefixes, create_source_map):
335+
def _process_file_arg(
336+
cls,
337+
filearg,
338+
prefixes,
339+
create_source_map,
340+
strip_suffix,
341+
):
285342
path = Path(filearg[0])
286343
full_path = path.resolve()
344+
oastype = None
345+
uri = None
346+
logger.debug(
347+
f'Processing {full_path!r}, strip_suffix={strip_suffix}...'
348+
)
287349
if len(filearg) > 1:
288-
# TODO: Support semantic type
289-
uri = filearg[1]
290-
else:
291-
uri = full_path.with_suffix('').as_uri()
350+
try:
351+
uri = rid.IriWithJsonPtr(filearg[1])
352+
logger.debug(f'...assigning URI <{uri}> from 2nd arg')
353+
except ValueError:
354+
# TODO: Verify OAS type
355+
oastype = filearg[1]
356+
logger.debug(f'...assigning OAS type "{oastype}" from 2nd arg')
357+
if len(filearg) > 2:
358+
if uri is None:
359+
raise ValueError('2nd of 3 -f args must be URI')
360+
oastype = filearg[2]
361+
logger.debug(f'...assigning OAS type "{oastype}" from 3rd arg')
362+
292363
for p in prefixes:
293364
try:
294365
rel = full_path.relative_to(p.directory)
295366
uri = rid.Iri(str(p.prefix) + str(rel.with_suffix('')))
367+
logger.debug(
368+
f'...assigning URI <{uri}> using prefix <{p.prefix}>',
369+
)
296370
except ValueError:
297371
pass
372+
298373
filetype = path.suffix[1:] or 'yaml'
299374
if filetype == 'yml':
300375
filetype = 'yaml'
376+
logger.debug('...determined filetype={filetype}')
377+
378+
if uri is None:
379+
if strip_suffix:
380+
uri = rid.Iri(full_path.with_suffix('').as_uri())
381+
else:
382+
uri = rid.Iri(full_path.as_uri())
383+
logger.debug(
384+
f'...assigning URI <{uri}> from URL <{full_path.as_uri()}>',
385+
)
301386

302387
content = path.read_text()
303388
sourcemap = None
304389
if filetype == 'json':
305390
data = json.loads(content)
306391
if create_source_map:
392+
logger.info(
393+
f'Creating JSON sourcemap for {path}, '
394+
'(can disable with -n if slow)',
395+
)
307396
sourcemap = jmap.calculate(content)
308397
elif filetype == 'yaml':
309398
data = yaml.safe_load(content)
310399
if create_source_map:
311400
# The YAML source mapper gets confused sometimes,
312401
# just log a warning and work without the map.
313402
try:
403+
logger.info(
404+
f'Creating YAML sourcemap for {path}, '
405+
'(can disable with -n if slow)',
406+
)
314407
sourcemap = ymap.calculate(content)
315408
except InvalidYamlError:
316409
logger.warn(
@@ -325,6 +418,7 @@ def _process_file_arg(cls, filearg, prefixes, create_source_map):
325418
'sourcemap': sourcemap,
326419
'path': path,
327420
'uri': uri,
421+
'oastype': oastype,
328422
}
329423

330424
@classmethod
@@ -364,6 +458,19 @@ def _process_prefix(cls, p):
364458
)
365459
return UriPrefix(prefix=prefix, directory=path)
366460

461+
@classmethod
462+
def _url_for(cls, uri):
463+
if uri.scheme != 'file':
464+
return None
465+
path = Path(uri.path)
466+
if path.exists():
467+
return uri
468+
for suffix in ('.json', '.yaml', '.ym'):
469+
ps = path.with_suffix(suffix)
470+
if ps.exists():
471+
return rid.Iri(ps.as_uri())
472+
return None
473+
367474
@classmethod
368475
def load(cls):
369476
class CustomArgumentParser(argparse.ArgumentParser):
@@ -433,10 +540,9 @@ def format_help(self):
433540
'-x',
434541
'--strip-suffix',
435542
nargs='?',
436-
type=bool,
437-
default=None,
438-
help="NOT YET IMPLEMENTED "
439-
"Assign URIs to documents by stripping the file extension "
543+
choices=('auto', 'yes', 'no'),
544+
default='auto',
545+
help="Assign URIs to documents by stripping the file extension "
440546
"from their URLs if they have not been assigned URIs by "
441547
"-d or the two-argument form of -f; can be set to false "
442548
"to *disable* prefix-stripping by -d"
@@ -486,6 +592,13 @@ def format_help(self):
486592
help="NOT YET IMPLEMENTED "
487593
"TODO: Support storing to various kinds of databases.",
488594
)
595+
parser.add_argument(
596+
'-v',
597+
'--verbose',
598+
action='count',
599+
default=0,
600+
help="Increase verbosity; can passed twice for full debug output.",
601+
)
489602
parser.add_argument(
490603
'--test-mode',
491604
action='store_true',
@@ -495,6 +608,26 @@ def format_help(self):
495608
"automated testing of the entire system.",
496609
)
497610
args = parser.parse_args()
611+
if args.verbose:
612+
if args.verbose == 1:
613+
logging.basicConfig(level=logging.INFO)
614+
else:
615+
logging.basicConfig(level=logging.DEBUG)
616+
else:
617+
logging.basicConfig(level=logging.WARN)
618+
619+
if args.strip_suffix is None:
620+
# TODO: Write a custom arg action
621+
# For now this simulates '-x' without an arg
622+
# as equivalent to '-x yes' in the debug log.
623+
args.strip_suffix = 'yes'
624+
strip_suffix = {
625+
'auto': None,
626+
'yes': True,
627+
'no': False,
628+
}[args.strip_suffix]
629+
logger.debug(f'Processed arguments:\n{args}')
630+
498631
if args.directories:
499632
raise NotImplementedError('-D option not yet implemented')
500633

@@ -509,18 +642,13 @@ def format_help(self):
509642
filearg,
510643
prefixes,
511644
args.number_lines is True,
645+
strip_suffix,
512646
) for filearg in args.files]
513647

514648
candidates = list(filter(lambda r: 'openapi' in r['data'], resources))
515649
if not candidates:
516650
logger.error("No document contains an 'openapi' field!")
517651
return -1
518-
if len(candidates) > 1:
519-
logger.error(
520-
"Multiple documents with an 'openapi' field "
521-
"not yet supported"
522-
)
523-
return -1
524652
primary = candidates[0]
525653

526654
desc = ApiDescription(
@@ -537,10 +665,63 @@ def format_help(self):
537665
r['uri'],
538666
path=r['path'],
539667
sourcemap=r['sourcemap'],
668+
oastype=r['oastype'],
540669
)
541-
logger.info(f"Adding document {r['path']!r} <{r['uri']}>")
670+
try:
671+
desc.resolve_references()
672+
errors = desc.validate()
673+
if errors:
674+
sys.stderr.write('\nAPI description contains errors\n\n')
675+
sys.exit(-1)
676+
677+
except OasJsonRefSuffixError as e:
678+
path = Path(e.target_resource_uri.path).relative_to(Path.cwd())
679+
logger.error(
680+
f'{e.args[0]}\n\n'
681+
'The above error can be fixed either by using -x:'
682+
f'\n\n\t-x -f {path}\n\n'
683+
'... or by using the two-argument form of -f:'
684+
f'\n\n\t-f {path} {e.ref_resource_uri}\n'
685+
)
686+
sys.exit(-1)
687+
688+
except OasJsonTypeError as e:
689+
url = cls._url_for(e.uri) if e.url is None else e.url
690+
if url is None:
691+
logger.error(
692+
f'Cannot determine URL and path for URI <{e.uri}>, '
693+
f'run with -v and check the logs',
694+
)
695+
url = rid.Iri('about:unknown-url')
696+
path = '<unknown-path>'
697+
else:
698+
path = Path(url.path).relative_to(Path.cwd())
699+
700+
# TODO: This isn't always quite right depending on -d / -D
701+
# when strip_suffix is None
702+
if strip_suffix in (True, None):
703+
uri_len = len(str(e.uri))
704+
truncated_url = str(url)[:uri_len]
705+
missing_suffix = str(url)[uri_len:]
706+
if (
707+
truncated_url == str(e.uri) and
708+
missing_suffix in ('.json', '.yaml', '.yml')
709+
):
710+
path_and_uri = f'-x -f {path}'
711+
712+
if path_and_uri is None:
713+
path_and_uri = (
714+
f'-f {path}' if e.uri == url
715+
else f'-f {path} {e.uri}'
716+
)
717+
718+
logger.error(
719+
f'JSON Schema documents must pass "Schema" (without quotes) '
720+
f'as an additional -f argument:\n\n'
721+
f'\t {path_and_uri} Schema\n'
722+
)
723+
sys.exit(-1)
542724

543-
desc.validate()
544725
if args.output_format is not None or args.test_mode is True:
545726
desc.serialize(output_format=args.output_format)
546727
else:

0 commit comments

Comments
 (0)