Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Italian lexemes query #82

Merged
merged 3 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ def get_lexeme_values(self) -> Tuple[int, int, Optional[int]]:
"queries/en.sparql",
"queries/fr.sparql",
"queries/he.sparql",
"queries/it.sparql",
"queries/nb.sparql",
"queries/sv.sparql",
"queries/withoutdescriptions.sparql",
Expand Down
3 changes: 2 additions & 1 deletion queries/default.sparql
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ SELECT * WHERE {
?lang != wd:Q25167 && #nb
?lang != wd:Q150 && #fr
?lang != wd:Q9035 && #da
?lang != wd:Q9288 #he
?lang != wd:Q9288 && #he
?lang != wd:Q652 #it
).
?lexeme wikibase:lemma ?lemma.
# Search for potentially matching items with descriptions
Expand Down
134 changes: 134 additions & 0 deletions queries/it.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
SELECT ?lexeme ?item ?lemma ?desc ?cat ?genus (wd:Q652 as ?lang) WITH {
SELECT *
WHERE {
?lexeme wikibase:lemma ?lemma;
dct:language wd:Q652;
wikibase:lexicalCategory ?cat.
OPTIONAL { ?lexeme wdt:P5185 ?genus. } .
FILTER NOT EXISTS {
?lexeme ontolex:sense ?sense.
?sense wdt:P5137 ?item.
}.
FILTER NOT EXISTS { ?item a wikibase:Property}.
FILTER NOT EXISTS { ?lexeme wdt:P5402 ?homograph }.
?item rdfs:label ?lemma.
?item schema:description ?desc.
FILTER(LANG(?desc) = "it").
}} AS %i
WHERE {
include %i
# Filter out common false-positives
# anything not having a P31 has to go too (this is the first thing to relax when few positives are left)
#filter not exists {?item wdt:P31 [].} # stuff without P31 which is usually false positives
FILTER NOT EXISTS { ?item wdt:P131 [] }. # admin area items
FILTER NOT EXISTS {
VALUES ?not {
wd:Q101352 # surname
wd:Q10429085 # report
wd:Q105543609 # musical work
wd:Q11424 # movie
wd:Q226730 # silent movie
wd:Q202866 # animation movie
wd:Q11446 # ships
wd:Q1145276 # fictional country (star trek)
wd:Q11668273 # shogi pieces
wd:Q1229071 # motif
wd:Q13406463 # wikimedia list article
wd:Q13417114 # noble family
wd:Q1391494 # technical terminology
wd:Q147276 # names of any kind
wd:Q1505023 # interpellation
wd:Q15056993 # aircraft family
wd:Q15632617 # fictive person
wd:Q15642541 # human settlement definitions
wd:Q15831596 # class of fictional entities e.g. star wars sandcrawler
wd:Q17155032 # type of software
wd:Q1725664 # terminology of any kind
wd:Q17537576 # any creative work incl. music, video, text, etc.
wd:Q17638537 # routine - fix computer terminology
wd:Q1897960 # trotting horse
wd:Q19798642 # WD value
wd:Q19798644 # wikibase model terms
wd:Q19798645 # wikibase datatype
wd:Q20202269 # music terms
wd:Q208569 # album
wd:Q21191270 # tv series episode
wd:Q215380 # bands
wd:Q2235308 # types of ships
wd:Q23786 # eye color
wd:Q3305213 # painting
wd:Q3331189 # version, edition, or translation
wd:Q3491429 # military terms
wd:Q355304 # watercourse (this has a lot of false positives that are not relevant in a Swedish lexeme context)
wd:Q3744866 # common charge (heraldic)
wd:Q386724 # work
wd:Q395 # math
wd:Q40056 # software
wd:Q4167410 # disambigpage
wd:Q4167836 # wikimedia cat
wd:Q43229 # organization of any kind
wd:Q482994 # album
wd:Q494452 # heraldry
wd:Q5 # human
wd:Q5185279 # poem
wd:Q56876983 # symbol (heraldic)
wd:Q57814795 # domesticated animal
wd:Q58408484 # Wikimedia list of persons by surname
wd:Q7187 # gene
wd:Q7366 # song
wd:Q7397 # software
wd:Q8134 # economics
wd:Q819989 # lunar crater
wd:Q99045339 # written question
wd:Q99281788 # star trek location
wd:Q29168811 # animated film
wd:Q5398426 # tv series
wd:Q3744880 # nature figure (heraldry)
wd:Q6881511 # enterprise
wd:Q3658341 # literary character
wd:Q1475691 # mars crater
wd:Q1348589 # lunar crater
wd:Q20643955 # biblical human figure
wd:Q134556 # single (music)
wd:Q786820 # automobile manuf.
wd:Q11060274 # print
wd:Q18218093 # other type of art
wd:Q7725634 # literary work
wd:Q22808320 # disambig name type
wd:Q3404720 # tincture
wd:Q66050470 # conjugation table
}.
?item wdt:P31 ?not }.
# filter on properties:
FILTER NOT EXISTS {
VALUES ?not2 {
wdt:P688 # gene stuff
wdt:P195 # collection (of art)
wdt:P1482 # stack exchange
}
?item ?not2 [] }.
# filter on subclass:
FILTER NOT EXISTS {
VALUES ?not3 {
wd:Q783794 # company
wd:Q486839 # member of parliament
}
?item wdt:P279 ?not3 }.
# filter on aspect of:
FILTER NOT EXISTS {
VALUES ?not3 {
# 2021-07-29
wd:Q18336 # heraldry
wd:Q494452 # blazonry
}
?item wdt:P1269 ?not3 }.
# filter on part of:
FILTER NOT EXISTS {
VALUES ?not3 {
wd:Q18336 # heraldry
wd:Q494452 # blazonry
}
?item wdt:P361 ?not3 }.
# special terminology often match these descriptions
# FILTER(!contains(?desc, 'araldica')) .
}
Loading