-
Notifications
You must be signed in to change notification settings - Fork 2
/
skolem.py
executable file
·94 lines (75 loc) · 2.61 KB
/
skolem.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#!/usr/bin/env python
# This script replaces all blank nodes with named nodes by skolemizing [1] the input file. It will
# create a new bnode: namespace where the new named nodes are definde.
#
# [1] http://answers.semanticweb.com/questions/8336/what-is-skolemization
#
# Usage:
# -i --input= The input file
# -o --output= The output file (optional)
# -h --help Usage information
#
# Requirements:
# Redland librdf python interface <http://librdf.org/docs/python.html>
# Debian users cann install these bindings with "apt-get install python-librdf"
#
# Copyright: (c) 2013 AKSW <http://aksw.org/>
# License: GNU General Public License (GPL) <http://opensource.org/licenses/gpl-license.php>
# Author: Natanael Arndt <http://aksw.org/NatanaelArndt>
import sys
import getopt
import re
import RDF
args, opts = getopt.getopt(sys.argv[1:], "i:o:bh", ["input=", "output=", "backward", "help"])
def help():
sys.stderr.write("""
Usage:
-i --input= The input file
-o --output= The output file (optional)
-b --backward Switch to \"deskolemize\" the turtle file (optional)
-h --help Usage information
""")
outputUri = None
inputUri = None
forward = True
bnodePrefix = "bnode"
bnodeNamespace = "http://example.com/bnode/"
for opt, arg in args:
if opt in ("-i", "--input"):
inputUri = "file:" + arg
elif opt in ("-o", "--output"):
outputUri = arg
elif opt in ("-b", "--backward"):
forward = False
elif opt in ("-h", "--help"):
help()
sys.exit(0)
if (inputUri == None) :
sys.stderr.write("\nNo input file given.\n")
help()
sys.exit(1)
sys.stderr.write("Input: " + inputUri + "\n")
ttlParser = RDF.TurtleParser()
inStream = ttlParser.parse_as_stream(inputUri)
namespaces = ttlParser.namespaces_seen()
ntrSerializer = RDF.NTriplesSerializer()
string = ntrSerializer.serialize_stream_to_string(inStream)
if (forward) :
bnode = re.compile(r'_(:[r0-9]+)')
string = re.sub(bnode, r'bnode\1', string)
string = "@prefix " + bnodePrefix + ": <" + bnodeNamespace + "> .\n" + string
else:
bnode = re.compile(r'<' + bnodeNamespace + '([r0-9]+)>')
string = re.sub(bnode, r'_:\1', string)
outStream = ttlParser.parse_string_as_stream(string, inputUri)
ttlSerializer = RDF.Serializer(name="turtle")
ttlSerializer.set_namespace(bnodePrefix, bnodeNamespace)
for prefix, uri in namespaces.iteritems():
ttlSerializer.set_namespace(prefix, uri)
string = ttlSerializer.serialize_stream_to_string(outStream)
if (outputUri):
outFile = open(outputUri, "w")
else:
outFile = sys.stdout
outFile.write(string)
sys.stderr.write("done\n")