Skip to content

Commit 3625b76

Browse files
JerkillerNudin
andauthored
Add Italian lexemes query (#82)
* add italian lexemes query * add it in default query --------- Co-authored-by: Michael F. Schönitzer <[email protected]>
1 parent de36304 commit 3625b76

File tree

3 files changed

+137
-1
lines changed

3 files changed

+137
-1
lines changed

backend.py

+1
Original file line numberDiff line numberDiff line change
@@ -233,6 +233,7 @@ def get_lexeme_values(self) -> Tuple[int, int, Optional[int]]:
233233
"queries/en.sparql",
234234
"queries/fr.sparql",
235235
"queries/he.sparql",
236+
"queries/it.sparql",
236237
"queries/nb.sparql",
237238
"queries/sv.sparql",
238239
"queries/withoutdescriptions.sparql",

queries/default.sparql

+2-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@ SELECT * WHERE {
1212
?lang != wd:Q25167 && #nb
1313
?lang != wd:Q150 && #fr
1414
?lang != wd:Q9035 && #da
15-
?lang != wd:Q9288 #he
15+
?lang != wd:Q9288 && #he
16+
?lang != wd:Q652 #it
1617
).
1718
?lexeme wikibase:lemma ?lemma.
1819
# Search for potentially matching items with descriptions

queries/it.sparql

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
SELECT ?lexeme ?item ?lemma ?desc ?cat ?genus (wd:Q652 as ?lang) WITH {
2+
SELECT *
3+
WHERE {
4+
?lexeme wikibase:lemma ?lemma;
5+
dct:language wd:Q652;
6+
wikibase:lexicalCategory ?cat.
7+
OPTIONAL { ?lexeme wdt:P5185 ?genus. } .
8+
FILTER NOT EXISTS {
9+
?lexeme ontolex:sense ?sense.
10+
?sense wdt:P5137 ?item.
11+
}.
12+
FILTER NOT EXISTS { ?item a wikibase:Property}.
13+
FILTER NOT EXISTS { ?lexeme wdt:P5402 ?homograph }.
14+
?item rdfs:label ?lemma.
15+
?item schema:description ?desc.
16+
FILTER(LANG(?desc) = "it").
17+
}} AS %i
18+
WHERE {
19+
include %i
20+
# Filter out common false-positives
21+
# anything not having a P31 has to go too (this is the first thing to relax when few positives are left)
22+
#filter not exists {?item wdt:P31 [].} # stuff without P31 which is usually false positives
23+
FILTER NOT EXISTS { ?item wdt:P131 [] }. # admin area items
24+
FILTER NOT EXISTS {
25+
VALUES ?not {
26+
wd:Q101352 # surname
27+
wd:Q10429085 # report
28+
wd:Q105543609 # musical work
29+
wd:Q11424 # movie
30+
wd:Q226730 # silent movie
31+
wd:Q202866 # animation movie
32+
wd:Q11446 # ships
33+
wd:Q1145276 # fictional country (star trek)
34+
wd:Q11668273 # shogi pieces
35+
wd:Q1229071 # motif
36+
wd:Q13406463 # wikimedia list article
37+
wd:Q13417114 # noble family
38+
wd:Q1391494 # technical terminology
39+
wd:Q147276 # names of any kind
40+
wd:Q1505023 # interpellation
41+
wd:Q15056993 # aircraft family
42+
wd:Q15632617 # fictive person
43+
wd:Q15642541 # human settlement definitions
44+
wd:Q15831596 # class of fictional entities e.g. star wars sandcrawler
45+
wd:Q17155032 # type of software
46+
wd:Q1725664 # terminology of any kind
47+
wd:Q17537576 # any creative work incl. music, video, text, etc.
48+
wd:Q17638537 # routine - fix computer terminology
49+
wd:Q1897960 # trotting horse
50+
wd:Q19798642 # WD value
51+
wd:Q19798644 # wikibase model terms
52+
wd:Q19798645 # wikibase datatype
53+
wd:Q20202269 # music terms
54+
wd:Q208569 # album
55+
wd:Q21191270 # tv series episode
56+
wd:Q215380 # bands
57+
wd:Q2235308 # types of ships
58+
wd:Q23786 # eye color
59+
wd:Q3305213 # painting
60+
wd:Q3331189 # version, edition, or translation
61+
wd:Q3491429 # military terms
62+
wd:Q355304 # watercourse (this has a lot of false positives that are not relevant in a Swedish lexeme context)
63+
wd:Q3744866 # common charge (heraldic)
64+
wd:Q386724 # work
65+
wd:Q395 # math
66+
wd:Q40056 # software
67+
wd:Q4167410 # disambigpage
68+
wd:Q4167836 # wikimedia cat
69+
wd:Q43229 # organization of any kind
70+
wd:Q482994 # album
71+
wd:Q494452 # heraldry
72+
wd:Q5 # human
73+
wd:Q5185279 # poem
74+
wd:Q56876983 # symbol (heraldic)
75+
wd:Q57814795 # domesticated animal
76+
wd:Q58408484 # Wikimedia list of persons by surname
77+
wd:Q7187 # gene
78+
wd:Q7366 # song
79+
wd:Q7397 # software
80+
wd:Q8134 # economics
81+
wd:Q819989 # lunar crater
82+
wd:Q99045339 # written question
83+
wd:Q99281788 # star trek location
84+
wd:Q29168811 # animated film
85+
wd:Q5398426 # tv series
86+
wd:Q3744880 # nature figure (heraldry)
87+
wd:Q6881511 # enterprise
88+
wd:Q3658341 # literary character
89+
wd:Q1475691 # mars crater
90+
wd:Q1348589 # lunar crater
91+
wd:Q20643955 # biblical human figure
92+
wd:Q134556 # single (music)
93+
wd:Q786820 # automobile manuf.
94+
wd:Q11060274 # print
95+
wd:Q18218093 # other type of art
96+
wd:Q7725634 # literary work
97+
wd:Q22808320 # disambig name type
98+
wd:Q3404720 # tincture
99+
wd:Q66050470 # conjugation table
100+
}.
101+
?item wdt:P31 ?not }.
102+
# filter on properties:
103+
FILTER NOT EXISTS {
104+
VALUES ?not2 {
105+
wdt:P688 # gene stuff
106+
wdt:P195 # collection (of art)
107+
wdt:P1482 # stack exchange
108+
}
109+
?item ?not2 [] }.
110+
# filter on subclass:
111+
FILTER NOT EXISTS {
112+
VALUES ?not3 {
113+
wd:Q783794 # company
114+
wd:Q486839 # member of parliament
115+
}
116+
?item wdt:P279 ?not3 }.
117+
# filter on aspect of:
118+
FILTER NOT EXISTS {
119+
VALUES ?not3 {
120+
# 2021-07-29
121+
wd:Q18336 # heraldry
122+
wd:Q494452 # blazonry
123+
}
124+
?item wdt:P1269 ?not3 }.
125+
# filter on part of:
126+
FILTER NOT EXISTS {
127+
VALUES ?not3 {
128+
wd:Q18336 # heraldry
129+
wd:Q494452 # blazonry
130+
}
131+
?item wdt:P361 ?not3 }.
132+
# special terminology often match these descriptions
133+
# FILTER(!contains(?desc, 'araldica')) .
134+
}

0 commit comments

Comments
 (0)