Skip to content

Commit

Permalink
Merge pull request #188 from common-workflow-language/track-imports
Browse files Browse the repository at this point in the history
Parser includes $import and $include tracking
  • Loading branch information
tetron authored Jan 26, 2023
2 parents 77669d4 + 86ca045 commit a1da136
Show file tree
Hide file tree
Showing 3 changed files with 150 additions and 78 deletions.
76 changes: 50 additions & 26 deletions cwl_utils/parser/cwl_v1_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@


class LoadingOptions:

idx: IdxType
fileuri: Optional[str]
baseuri: str
Expand All @@ -59,6 +58,8 @@ class LoadingOptions:
vocab: Dict[str, str]
rvocab: Dict[str, str]
cache: CacheType
imports: List[str]
includes: List[str]

def __init__(
self,
Expand All @@ -71,9 +72,10 @@ def __init__(
addl_metadata: Optional[Dict[str, str]] = None,
baseuri: Optional[str] = None,
idx: Optional[IdxType] = None,
imports: Optional[List[str]] = None,
includes: Optional[List[str]] = None,
) -> None:
"""Create a LoadingOptions object."""

self.original_doc = original_doc

if idx is not None:
Expand Down Expand Up @@ -106,6 +108,16 @@ def __init__(
else:
self.addl_metadata = copyfrom.addl_metadata if copyfrom is not None else {}

if imports is not None:
self.imports = imports
else:
self.imports = copyfrom.imports if copyfrom is not None else []

if includes is not None:
self.includes = includes
else:
self.includes = copyfrom.includes if copyfrom is not None else []

if fetcher is not None:
self.fetcher = fetcher
elif copyfrom is not None:
Expand Down Expand Up @@ -151,24 +163,29 @@ def graph(self) -> Graph:
if self.fileuri is not None
else pathlib.Path(schema).resolve().as_uri()
)
try:
if fetchurl not in self.cache or self.cache[fetchurl] is True:
_logger.debug("Getting external schema %s", fetchurl)
if fetchurl not in self.cache or self.cache[fetchurl] is True:
_logger.debug("Getting external schema %s", fetchurl)
try:
content = self.fetcher.fetch_text(fetchurl)
self.cache[fetchurl] = newGraph = Graph()
for fmt in ["xml", "turtle"]:
try:
newGraph.parse(
data=content, format=fmt, publicID=str(fetchurl)
)
break
except (xml.sax.SAXParseException, TypeError, BadSyntax):
pass
graph += self.cache[fetchurl]
except Exception as e:
_logger.warning(
"Could not load extension schema %s: %s", fetchurl, str(e)
)
except Exception as e:
_logger.warning(
"Could not load extension schema %s: %s", fetchurl, str(e)
)
continue
newGraph = Graph()
err_msg = "unknown error"
for fmt in ["xml", "turtle"]:
try:
newGraph.parse(data=content, format=fmt, publicID=str(fetchurl))
self.cache[fetchurl] = newGraph
graph += newGraph
break
except (xml.sax.SAXParseException, TypeError, BadSyntax) as e:
err_msg = str(e)
else:
_logger.warning(
"Could not load extension schema %s: %s", fetchurl, err_msg
)
self.cache[key] = graph
return graph

Expand Down Expand Up @@ -200,18 +217,22 @@ def load_field(val, fieldtype, baseuri, loadingOptions):
if "$import" in val:
if loadingOptions.fileuri is None:
raise SchemaSaladException("Cannot load $import without fileuri")
url = loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$import"])
result, metadata = _document_load_by_url(
fieldtype,
loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$import"]),
url,
loadingOptions,
)
loadingOptions.imports.append(url)
return result
elif "$include" in val:
if loadingOptions.fileuri is None:
raise SchemaSaladException("Cannot load $import without fileuri")
val = loadingOptions.fetcher.fetch_text(
loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$include"])
url = loadingOptions.fetcher.urljoin(
loadingOptions.fileuri, val["$include"]
)
val = loadingOptions.fetcher.fetch_text(url)
loadingOptions.includes.append(url)
return fieldtype.load(val, baseuri, loadingOptions)


Expand Down Expand Up @@ -296,7 +317,10 @@ def expand_url(
split = urlsplit(url)

if (
(bool(split.scheme) and split.scheme in ["http", "https", "file"])
(
bool(split.scheme)
and split.scheme in loadingOptions.fetcher.supported_schemes()
)
or url.startswith("$(")
or url.startswith("${")
):
Expand Down Expand Up @@ -382,7 +406,7 @@ def __init__(self, items):
def load(self, doc, baseuri, loadingOptions, docRoot=None):
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
if not isinstance(doc, MutableSequence):
raise ValidationException("Expected a list, was {}".format(type(doc)))
raise ValidationException(f"Expected a list, was {type(doc)}")
r = [] # type: List[Any]
errors = [] # type: List[SchemaSaladException]
for i in range(0, len(doc)):
Expand Down Expand Up @@ -504,7 +528,7 @@ def __init__(self, classtype):
def load(self, doc, baseuri, loadingOptions, docRoot=None):
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
if not isinstance(doc, MutableMapping):
raise ValidationException("Expected a dict, was {}".format(type(doc)))
raise ValidationException(f"Expected a dict, was {type(doc)}")
return self.classtype.fromDoc(doc, baseuri, loadingOptions, docRoot=docRoot)

def __repr__(self): # type: () -> str
Expand All @@ -518,7 +542,7 @@ def __init__(self, items: Type[str]) -> None:
def load(self, doc, baseuri, loadingOptions, docRoot=None):
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
if not isinstance(doc, str):
raise ValidationException("Expected a str, was {}".format(type(doc)))
raise ValidationException(f"Expected a str, was {type(doc)}")
return doc


Expand Down
76 changes: 50 additions & 26 deletions cwl_utils/parser/cwl_v1_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@


class LoadingOptions:

idx: IdxType
fileuri: Optional[str]
baseuri: str
Expand All @@ -59,6 +58,8 @@ class LoadingOptions:
vocab: Dict[str, str]
rvocab: Dict[str, str]
cache: CacheType
imports: List[str]
includes: List[str]

def __init__(
self,
Expand All @@ -71,9 +72,10 @@ def __init__(
addl_metadata: Optional[Dict[str, str]] = None,
baseuri: Optional[str] = None,
idx: Optional[IdxType] = None,
imports: Optional[List[str]] = None,
includes: Optional[List[str]] = None,
) -> None:
"""Create a LoadingOptions object."""

self.original_doc = original_doc

if idx is not None:
Expand Down Expand Up @@ -106,6 +108,16 @@ def __init__(
else:
self.addl_metadata = copyfrom.addl_metadata if copyfrom is not None else {}

if imports is not None:
self.imports = imports
else:
self.imports = copyfrom.imports if copyfrom is not None else []

if includes is not None:
self.includes = includes
else:
self.includes = copyfrom.includes if copyfrom is not None else []

if fetcher is not None:
self.fetcher = fetcher
elif copyfrom is not None:
Expand Down Expand Up @@ -151,24 +163,29 @@ def graph(self) -> Graph:
if self.fileuri is not None
else pathlib.Path(schema).resolve().as_uri()
)
try:
if fetchurl not in self.cache or self.cache[fetchurl] is True:
_logger.debug("Getting external schema %s", fetchurl)
if fetchurl not in self.cache or self.cache[fetchurl] is True:
_logger.debug("Getting external schema %s", fetchurl)
try:
content = self.fetcher.fetch_text(fetchurl)
self.cache[fetchurl] = newGraph = Graph()
for fmt in ["xml", "turtle"]:
try:
newGraph.parse(
data=content, format=fmt, publicID=str(fetchurl)
)
break
except (xml.sax.SAXParseException, TypeError, BadSyntax):
pass
graph += self.cache[fetchurl]
except Exception as e:
_logger.warning(
"Could not load extension schema %s: %s", fetchurl, str(e)
)
except Exception as e:
_logger.warning(
"Could not load extension schema %s: %s", fetchurl, str(e)
)
continue
newGraph = Graph()
err_msg = "unknown error"
for fmt in ["xml", "turtle"]:
try:
newGraph.parse(data=content, format=fmt, publicID=str(fetchurl))
self.cache[fetchurl] = newGraph
graph += newGraph
break
except (xml.sax.SAXParseException, TypeError, BadSyntax) as e:
err_msg = str(e)
else:
_logger.warning(
"Could not load extension schema %s: %s", fetchurl, err_msg
)
self.cache[key] = graph
return graph

Expand Down Expand Up @@ -200,18 +217,22 @@ def load_field(val, fieldtype, baseuri, loadingOptions):
if "$import" in val:
if loadingOptions.fileuri is None:
raise SchemaSaladException("Cannot load $import without fileuri")
url = loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$import"])
result, metadata = _document_load_by_url(
fieldtype,
loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$import"]),
url,
loadingOptions,
)
loadingOptions.imports.append(url)
return result
elif "$include" in val:
if loadingOptions.fileuri is None:
raise SchemaSaladException("Cannot load $import without fileuri")
val = loadingOptions.fetcher.fetch_text(
loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$include"])
url = loadingOptions.fetcher.urljoin(
loadingOptions.fileuri, val["$include"]
)
val = loadingOptions.fetcher.fetch_text(url)
loadingOptions.includes.append(url)
return fieldtype.load(val, baseuri, loadingOptions)


Expand Down Expand Up @@ -296,7 +317,10 @@ def expand_url(
split = urlsplit(url)

if (
(bool(split.scheme) and split.scheme in ["http", "https", "file"])
(
bool(split.scheme)
and split.scheme in loadingOptions.fetcher.supported_schemes()
)
or url.startswith("$(")
or url.startswith("${")
):
Expand Down Expand Up @@ -382,7 +406,7 @@ def __init__(self, items):
def load(self, doc, baseuri, loadingOptions, docRoot=None):
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
if not isinstance(doc, MutableSequence):
raise ValidationException("Expected a list, was {}".format(type(doc)))
raise ValidationException(f"Expected a list, was {type(doc)}")
r = [] # type: List[Any]
errors = [] # type: List[SchemaSaladException]
for i in range(0, len(doc)):
Expand Down Expand Up @@ -504,7 +528,7 @@ def __init__(self, classtype):
def load(self, doc, baseuri, loadingOptions, docRoot=None):
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
if not isinstance(doc, MutableMapping):
raise ValidationException("Expected a dict, was {}".format(type(doc)))
raise ValidationException(f"Expected a dict, was {type(doc)}")
return self.classtype.fromDoc(doc, baseuri, loadingOptions, docRoot=docRoot)

def __repr__(self): # type: () -> str
Expand All @@ -518,7 +542,7 @@ def __init__(self, items: Type[str]) -> None:
def load(self, doc, baseuri, loadingOptions, docRoot=None):
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
if not isinstance(doc, str):
raise ValidationException("Expected a str, was {}".format(type(doc)))
raise ValidationException(f"Expected a str, was {type(doc)}")
return doc


Expand Down
Loading

0 comments on commit a1da136

Please sign in to comment.