Skip to content

Commit a1da136

Browse files
authored
Merge pull request #188 from common-workflow-language/track-imports
Parser includes $import and $include tracking
2 parents 77669d4 + 86ca045 commit a1da136

File tree

3 files changed

+150
-78
lines changed

3 files changed

+150
-78
lines changed

cwl_utils/parser/cwl_v1_0.py

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747

4848

4949
class LoadingOptions:
50-
5150
idx: IdxType
5251
fileuri: Optional[str]
5352
baseuri: str
@@ -59,6 +58,8 @@ class LoadingOptions:
5958
vocab: Dict[str, str]
6059
rvocab: Dict[str, str]
6160
cache: CacheType
61+
imports: List[str]
62+
includes: List[str]
6263

6364
def __init__(
6465
self,
@@ -71,9 +72,10 @@ def __init__(
7172
addl_metadata: Optional[Dict[str, str]] = None,
7273
baseuri: Optional[str] = None,
7374
idx: Optional[IdxType] = None,
75+
imports: Optional[List[str]] = None,
76+
includes: Optional[List[str]] = None,
7477
) -> None:
7578
"""Create a LoadingOptions object."""
76-
7779
self.original_doc = original_doc
7880

7981
if idx is not None:
@@ -106,6 +108,16 @@ def __init__(
106108
else:
107109
self.addl_metadata = copyfrom.addl_metadata if copyfrom is not None else {}
108110

111+
if imports is not None:
112+
self.imports = imports
113+
else:
114+
self.imports = copyfrom.imports if copyfrom is not None else []
115+
116+
if includes is not None:
117+
self.includes = includes
118+
else:
119+
self.includes = copyfrom.includes if copyfrom is not None else []
120+
109121
if fetcher is not None:
110122
self.fetcher = fetcher
111123
elif copyfrom is not None:
@@ -151,24 +163,29 @@ def graph(self) -> Graph:
151163
if self.fileuri is not None
152164
else pathlib.Path(schema).resolve().as_uri()
153165
)
154-
try:
155-
if fetchurl not in self.cache or self.cache[fetchurl] is True:
156-
_logger.debug("Getting external schema %s", fetchurl)
166+
if fetchurl not in self.cache or self.cache[fetchurl] is True:
167+
_logger.debug("Getting external schema %s", fetchurl)
168+
try:
157169
content = self.fetcher.fetch_text(fetchurl)
158-
self.cache[fetchurl] = newGraph = Graph()
159-
for fmt in ["xml", "turtle"]:
160-
try:
161-
newGraph.parse(
162-
data=content, format=fmt, publicID=str(fetchurl)
163-
)
164-
break
165-
except (xml.sax.SAXParseException, TypeError, BadSyntax):
166-
pass
167-
graph += self.cache[fetchurl]
168-
except Exception as e:
169-
_logger.warning(
170-
"Could not load extension schema %s: %s", fetchurl, str(e)
171-
)
170+
except Exception as e:
171+
_logger.warning(
172+
"Could not load extension schema %s: %s", fetchurl, str(e)
173+
)
174+
continue
175+
newGraph = Graph()
176+
err_msg = "unknown error"
177+
for fmt in ["xml", "turtle"]:
178+
try:
179+
newGraph.parse(data=content, format=fmt, publicID=str(fetchurl))
180+
self.cache[fetchurl] = newGraph
181+
graph += newGraph
182+
break
183+
except (xml.sax.SAXParseException, TypeError, BadSyntax) as e:
184+
err_msg = str(e)
185+
else:
186+
_logger.warning(
187+
"Could not load extension schema %s: %s", fetchurl, err_msg
188+
)
172189
self.cache[key] = graph
173190
return graph
174191

@@ -200,18 +217,22 @@ def load_field(val, fieldtype, baseuri, loadingOptions):
200217
if "$import" in val:
201218
if loadingOptions.fileuri is None:
202219
raise SchemaSaladException("Cannot load $import without fileuri")
220+
url = loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$import"])
203221
result, metadata = _document_load_by_url(
204222
fieldtype,
205-
loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$import"]),
223+
url,
206224
loadingOptions,
207225
)
226+
loadingOptions.imports.append(url)
208227
return result
209228
elif "$include" in val:
210229
if loadingOptions.fileuri is None:
211230
raise SchemaSaladException("Cannot load $import without fileuri")
212-
val = loadingOptions.fetcher.fetch_text(
213-
loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$include"])
231+
url = loadingOptions.fetcher.urljoin(
232+
loadingOptions.fileuri, val["$include"]
214233
)
234+
val = loadingOptions.fetcher.fetch_text(url)
235+
loadingOptions.includes.append(url)
215236
return fieldtype.load(val, baseuri, loadingOptions)
216237

217238

@@ -296,7 +317,10 @@ def expand_url(
296317
split = urlsplit(url)
297318

298319
if (
299-
(bool(split.scheme) and split.scheme in ["http", "https", "file"])
320+
(
321+
bool(split.scheme)
322+
and split.scheme in loadingOptions.fetcher.supported_schemes()
323+
)
300324
or url.startswith("$(")
301325
or url.startswith("${")
302326
):
@@ -382,7 +406,7 @@ def __init__(self, items):
382406
def load(self, doc, baseuri, loadingOptions, docRoot=None):
383407
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
384408
if not isinstance(doc, MutableSequence):
385-
raise ValidationException("Expected a list, was {}".format(type(doc)))
409+
raise ValidationException(f"Expected a list, was {type(doc)}")
386410
r = [] # type: List[Any]
387411
errors = [] # type: List[SchemaSaladException]
388412
for i in range(0, len(doc)):
@@ -504,7 +528,7 @@ def __init__(self, classtype):
504528
def load(self, doc, baseuri, loadingOptions, docRoot=None):
505529
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
506530
if not isinstance(doc, MutableMapping):
507-
raise ValidationException("Expected a dict, was {}".format(type(doc)))
531+
raise ValidationException(f"Expected a dict, was {type(doc)}")
508532
return self.classtype.fromDoc(doc, baseuri, loadingOptions, docRoot=docRoot)
509533

510534
def __repr__(self): # type: () -> str
@@ -518,7 +542,7 @@ def __init__(self, items: Type[str]) -> None:
518542
def load(self, doc, baseuri, loadingOptions, docRoot=None):
519543
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
520544
if not isinstance(doc, str):
521-
raise ValidationException("Expected a str, was {}".format(type(doc)))
545+
raise ValidationException(f"Expected a str, was {type(doc)}")
522546
return doc
523547

524548

cwl_utils/parser/cwl_v1_1.py

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,6 @@
4747

4848

4949
class LoadingOptions:
50-
5150
idx: IdxType
5251
fileuri: Optional[str]
5352
baseuri: str
@@ -59,6 +58,8 @@ class LoadingOptions:
5958
vocab: Dict[str, str]
6059
rvocab: Dict[str, str]
6160
cache: CacheType
61+
imports: List[str]
62+
includes: List[str]
6263

6364
def __init__(
6465
self,
@@ -71,9 +72,10 @@ def __init__(
7172
addl_metadata: Optional[Dict[str, str]] = None,
7273
baseuri: Optional[str] = None,
7374
idx: Optional[IdxType] = None,
75+
imports: Optional[List[str]] = None,
76+
includes: Optional[List[str]] = None,
7477
) -> None:
7578
"""Create a LoadingOptions object."""
76-
7779
self.original_doc = original_doc
7880

7981
if idx is not None:
@@ -106,6 +108,16 @@ def __init__(
106108
else:
107109
self.addl_metadata = copyfrom.addl_metadata if copyfrom is not None else {}
108110

111+
if imports is not None:
112+
self.imports = imports
113+
else:
114+
self.imports = copyfrom.imports if copyfrom is not None else []
115+
116+
if includes is not None:
117+
self.includes = includes
118+
else:
119+
self.includes = copyfrom.includes if copyfrom is not None else []
120+
109121
if fetcher is not None:
110122
self.fetcher = fetcher
111123
elif copyfrom is not None:
@@ -151,24 +163,29 @@ def graph(self) -> Graph:
151163
if self.fileuri is not None
152164
else pathlib.Path(schema).resolve().as_uri()
153165
)
154-
try:
155-
if fetchurl not in self.cache or self.cache[fetchurl] is True:
156-
_logger.debug("Getting external schema %s", fetchurl)
166+
if fetchurl not in self.cache or self.cache[fetchurl] is True:
167+
_logger.debug("Getting external schema %s", fetchurl)
168+
try:
157169
content = self.fetcher.fetch_text(fetchurl)
158-
self.cache[fetchurl] = newGraph = Graph()
159-
for fmt in ["xml", "turtle"]:
160-
try:
161-
newGraph.parse(
162-
data=content, format=fmt, publicID=str(fetchurl)
163-
)
164-
break
165-
except (xml.sax.SAXParseException, TypeError, BadSyntax):
166-
pass
167-
graph += self.cache[fetchurl]
168-
except Exception as e:
169-
_logger.warning(
170-
"Could not load extension schema %s: %s", fetchurl, str(e)
171-
)
170+
except Exception as e:
171+
_logger.warning(
172+
"Could not load extension schema %s: %s", fetchurl, str(e)
173+
)
174+
continue
175+
newGraph = Graph()
176+
err_msg = "unknown error"
177+
for fmt in ["xml", "turtle"]:
178+
try:
179+
newGraph.parse(data=content, format=fmt, publicID=str(fetchurl))
180+
self.cache[fetchurl] = newGraph
181+
graph += newGraph
182+
break
183+
except (xml.sax.SAXParseException, TypeError, BadSyntax) as e:
184+
err_msg = str(e)
185+
else:
186+
_logger.warning(
187+
"Could not load extension schema %s: %s", fetchurl, err_msg
188+
)
172189
self.cache[key] = graph
173190
return graph
174191

@@ -200,18 +217,22 @@ def load_field(val, fieldtype, baseuri, loadingOptions):
200217
if "$import" in val:
201218
if loadingOptions.fileuri is None:
202219
raise SchemaSaladException("Cannot load $import without fileuri")
220+
url = loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$import"])
203221
result, metadata = _document_load_by_url(
204222
fieldtype,
205-
loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$import"]),
223+
url,
206224
loadingOptions,
207225
)
226+
loadingOptions.imports.append(url)
208227
return result
209228
elif "$include" in val:
210229
if loadingOptions.fileuri is None:
211230
raise SchemaSaladException("Cannot load $import without fileuri")
212-
val = loadingOptions.fetcher.fetch_text(
213-
loadingOptions.fetcher.urljoin(loadingOptions.fileuri, val["$include"])
231+
url = loadingOptions.fetcher.urljoin(
232+
loadingOptions.fileuri, val["$include"]
214233
)
234+
val = loadingOptions.fetcher.fetch_text(url)
235+
loadingOptions.includes.append(url)
215236
return fieldtype.load(val, baseuri, loadingOptions)
216237

217238

@@ -296,7 +317,10 @@ def expand_url(
296317
split = urlsplit(url)
297318

298319
if (
299-
(bool(split.scheme) and split.scheme in ["http", "https", "file"])
320+
(
321+
bool(split.scheme)
322+
and split.scheme in loadingOptions.fetcher.supported_schemes()
323+
)
300324
or url.startswith("$(")
301325
or url.startswith("${")
302326
):
@@ -382,7 +406,7 @@ def __init__(self, items):
382406
def load(self, doc, baseuri, loadingOptions, docRoot=None):
383407
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
384408
if not isinstance(doc, MutableSequence):
385-
raise ValidationException("Expected a list, was {}".format(type(doc)))
409+
raise ValidationException(f"Expected a list, was {type(doc)}")
386410
r = [] # type: List[Any]
387411
errors = [] # type: List[SchemaSaladException]
388412
for i in range(0, len(doc)):
@@ -504,7 +528,7 @@ def __init__(self, classtype):
504528
def load(self, doc, baseuri, loadingOptions, docRoot=None):
505529
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
506530
if not isinstance(doc, MutableMapping):
507-
raise ValidationException("Expected a dict, was {}".format(type(doc)))
531+
raise ValidationException(f"Expected a dict, was {type(doc)}")
508532
return self.classtype.fromDoc(doc, baseuri, loadingOptions, docRoot=docRoot)
509533

510534
def __repr__(self): # type: () -> str
@@ -518,7 +542,7 @@ def __init__(self, items: Type[str]) -> None:
518542
def load(self, doc, baseuri, loadingOptions, docRoot=None):
519543
# type: (Any, str, LoadingOptions, Optional[str]) -> Any
520544
if not isinstance(doc, str):
521-
raise ValidationException("Expected a str, was {}".format(type(doc)))
545+
raise ValidationException(f"Expected a str, was {type(doc)}")
522546
return doc
523547

524548

0 commit comments

Comments
 (0)