|
| 1 | +SELECT ?lexeme ?item ?lemma ?desc ?cat ?genus (wd:Q652 as ?lang) WITH { |
| 2 | + SELECT * |
| 3 | + WHERE { |
| 4 | + ?lexeme wikibase:lemma ?lemma; |
| 5 | + dct:language wd:Q652; |
| 6 | + wikibase:lexicalCategory ?cat. |
| 7 | + OPTIONAL { ?lexeme wdt:P5185 ?genus. } . |
| 8 | + FILTER NOT EXISTS { |
| 9 | + ?lexeme ontolex:sense ?sense. |
| 10 | + ?sense wdt:P5137 ?item. |
| 11 | + }. |
| 12 | + FILTER NOT EXISTS { ?item a wikibase:Property}. |
| 13 | + FILTER NOT EXISTS { ?lexeme wdt:P5402 ?homograph }. |
| 14 | + ?item rdfs:label ?lemma. |
| 15 | + ?item schema:description ?desc. |
| 16 | + FILTER(LANG(?desc) = "it"). |
| 17 | + }} AS %i |
| 18 | +WHERE { |
| 19 | + include %i |
| 20 | + # Filter out common false-positives |
| 21 | + # anything not having a P31 has to go too (this is the first thing to relax when few positives are left) |
| 22 | + #filter not exists {?item wdt:P31 [].} # stuff without P31 which is usually false positives |
| 23 | + FILTER NOT EXISTS { ?item wdt:P131 [] }. # admin area items |
| 24 | + FILTER NOT EXISTS { |
| 25 | + VALUES ?not { |
| 26 | + wd:Q101352 # surname |
| 27 | + wd:Q10429085 # report |
| 28 | + wd:Q105543609 # musical work |
| 29 | + wd:Q11424 # movie |
| 30 | + wd:Q226730 # silent movie |
| 31 | + wd:Q202866 # animation movie |
| 32 | + wd:Q11446 # ships |
| 33 | + wd:Q1145276 # fictional country (star trek) |
| 34 | + wd:Q11668273 # shogi pieces |
| 35 | + wd:Q1229071 # motif |
| 36 | + wd:Q13406463 # wikimedia list article |
| 37 | + wd:Q13417114 # noble family |
| 38 | + wd:Q1391494 # technical terminology |
| 39 | + wd:Q147276 # names of any kind |
| 40 | + wd:Q1505023 # interpellation |
| 41 | + wd:Q15056993 # aircraft family |
| 42 | + wd:Q15632617 # fictive person |
| 43 | + wd:Q15642541 # human settlement definitions |
| 44 | + wd:Q15831596 # class of fictional entities e.g. star wars sandcrawler |
| 45 | + wd:Q17155032 # type of software |
| 46 | + wd:Q1725664 # terminology of any kind |
| 47 | + wd:Q17537576 # any creative work incl. music, video, text, etc. |
| 48 | + wd:Q17638537 # routine - fix computer terminology |
| 49 | + wd:Q1897960 # trotting horse |
| 50 | + wd:Q19798642 # WD value |
| 51 | + wd:Q19798644 # wikibase model terms |
| 52 | + wd:Q19798645 # wikibase datatype |
| 53 | + wd:Q20202269 # music terms |
| 54 | + wd:Q208569 # album |
| 55 | + wd:Q21191270 # tv series episode |
| 56 | + wd:Q215380 # bands |
| 57 | + wd:Q2235308 # types of ships |
| 58 | + wd:Q23786 # eye color |
| 59 | + wd:Q3305213 # painting |
| 60 | + wd:Q3331189 # version, edition, or translation |
| 61 | + wd:Q3491429 # military terms |
| 62 | + wd:Q355304 # watercourse (this has a lot of false positives that are not relevant in a Swedish lexeme context) |
| 63 | + wd:Q3744866 # common charge (heraldic) |
| 64 | + wd:Q386724 # work |
| 65 | + wd:Q395 # math |
| 66 | + wd:Q40056 # software |
| 67 | + wd:Q4167410 # disambigpage |
| 68 | + wd:Q4167836 # wikimedia cat |
| 69 | + wd:Q43229 # organization of any kind |
| 70 | + wd:Q482994 # album |
| 71 | + wd:Q494452 # heraldry |
| 72 | + wd:Q5 # human |
| 73 | + wd:Q5185279 # poem |
| 74 | + wd:Q56876983 # symbol (heraldic) |
| 75 | + wd:Q57814795 # domesticated animal |
| 76 | + wd:Q58408484 # Wikimedia list of persons by surname |
| 77 | + wd:Q7187 # gene |
| 78 | + wd:Q7366 # song |
| 79 | + wd:Q7397 # software |
| 80 | + wd:Q8134 # economics |
| 81 | + wd:Q819989 # lunar crater |
| 82 | + wd:Q99045339 # written question |
| 83 | + wd:Q99281788 # star trek location |
| 84 | + wd:Q29168811 # animated film |
| 85 | + wd:Q5398426 # tv series |
| 86 | + wd:Q3744880 # nature figure (heraldry) |
| 87 | + wd:Q6881511 # enterprise |
| 88 | + wd:Q3658341 # literary character |
| 89 | + wd:Q1475691 # mars crater |
| 90 | + wd:Q1348589 # lunar crater |
| 91 | + wd:Q20643955 # biblical human figure |
| 92 | + wd:Q134556 # single (music) |
| 93 | + wd:Q786820 # automobile manuf. |
| 94 | + wd:Q11060274 # print |
| 95 | + wd:Q18218093 # other type of art |
| 96 | + wd:Q7725634 # literary work |
| 97 | + wd:Q22808320 # disambig name type |
| 98 | + wd:Q3404720 # tincture |
| 99 | + wd:Q66050470 # conjugation table |
| 100 | + }. |
| 101 | + ?item wdt:P31 ?not }. |
| 102 | + # filter on properties: |
| 103 | + FILTER NOT EXISTS { |
| 104 | + VALUES ?not2 { |
| 105 | + wdt:P688 # gene stuff |
| 106 | + wdt:P195 # collection (of art) |
| 107 | + wdt:P1482 # stack exchange |
| 108 | + } |
| 109 | + ?item ?not2 [] }. |
| 110 | + # filter on subclass: |
| 111 | + FILTER NOT EXISTS { |
| 112 | + VALUES ?not3 { |
| 113 | + wd:Q783794 # company |
| 114 | + wd:Q486839 # member of parliament |
| 115 | + } |
| 116 | + ?item wdt:P279 ?not3 }. |
| 117 | + # filter on aspect of: |
| 118 | + FILTER NOT EXISTS { |
| 119 | + VALUES ?not3 { |
| 120 | + # 2021-07-29 |
| 121 | + wd:Q18336 # heraldry |
| 122 | + wd:Q494452 # blazonry |
| 123 | + } |
| 124 | + ?item wdt:P1269 ?not3 }. |
| 125 | + # filter on part of: |
| 126 | + FILTER NOT EXISTS { |
| 127 | + VALUES ?not3 { |
| 128 | + wd:Q18336 # heraldry |
| 129 | + wd:Q494452 # blazonry |
| 130 | + } |
| 131 | + ?item wdt:P361 ?not3 }. |
| 132 | + # special terminology often match these descriptions |
| 133 | + # FILTER(!contains(?desc, 'araldica')) . |
| 134 | +} |
0 commit comments