User:Ijon/Lexeme

From Wikidata
Jump to navigation Jump to search

This page lists various resources useful for contributing to and using the lexicographical layer of Wikidata, fondly known as Lexeme.

Tutorial[edit]

Useful queries[edit]

forms needing pronunciation files[edit]

The following query uses these:

  • Properties: pronunciation audio (P443)  View with Reasonator View with SQID
    #title:All forms in Hebrew missing a pronunciation for the form with a representation identical to the lemma of the lexeme
    # So9q 13-01-2021
    SELECT ?l ?lemma ?form ?audio WHERE {
      ?l dct:language wd:Q9288;
         wikibase:lemma ?lemma; 
         ontolex:lexicalForm ?form .
      ?form ontolex:representation ?lemma .
      MINUS {?form wdt:P443 ?audio.}
    }
    

A query for a batch of words needs pronunciation that can be fed directly to LinguaLibre[edit]

The following query uses these:

  • Properties: pronunciation audio (P443)  View with Reasonator View with SQID
    #title:All forms in Hebrew missing a pronunciation for the form with a representation identical to the lemma of the lexeme
    # So9q 13-01-2021
    SELECT ?l ?label ?id ?audio WHERE {
      ?l dct:language wd:Q9288;
         wikibase:lemma ?label; 
         ontolex:lexicalForm ?id .
      ?id ontolex:representation ?label .
      MINUS {?id wdt:P443 ?audio.}
    } LIMIT 100
    

Grammatical features used in a language[edit]

The following query uses these:

  • Items: French (Q150)  View with Reasonator View with SQID
    #title: List of grammatical features used in French language (Q150) lexemes
    SELECT ?grammaticalFeature ?grammaticalFeatureLabel (COUNT(DISTINCT ?lexeme) AS ?count) (SAMPLE(?lexeme) AS ?sample) {
      ?lexeme dct:language wd:Q150 ; ontolex:lexicalForm/wikibase:grammaticalFeature ?grammaticalFeature .
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
    }
    GROUP BY ?grammaticalFeature ?grammaticalFeatureLabel
    ORDER BY DESC(?count) ?grammaticalFeatureLabel
    

List of external identifiers for lexemes[edit]

The following query uses these:

  • Properties: instance of (P31)  View with Reasonator View with SQID, subclass of (P279)  View with Reasonator View with SQID
    #title: List of external identifier properties for lexemes
    SELECT ?prop ?propLabel 
    WHERE 
    {
      ?prop wdt:P31/wdt:P279* wd:Q54076056 . # lexeme property
      ?prop wdt:P31 wd:Q19847637 . # external identifier
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". } # Helps get the label in your language, if not, then en language
    }
    

More example queries[edit]

The following query uses these:

  • Properties: grammatical gender (P5185)  View with Reasonator View with SQID, item for this sense (P5137)  View with Reasonator View with SQID, instance of (P31)  View with Reasonator View with SQID, subclass of (P279)  View with Reasonator View with SQID
    #title: List of lexemes in French about occupation items with gender
    SELECT DISTINCT ?item ?itemLabel ?sense ?lexeme ?lemma ?genre ?genreLabel WHERE {
      ?lexeme rdf:type ontolex:LexicalEntry;
        dct:language wd:Q150;
        wikibase:lemma ?lemma;
        ontolex:sense ?sense;
        wdt:P5185 ?genre.
      ?sense wdt:P5137 ?item.
      ?item (wdt:P31/(wdt:P279*)) wd:Q12737077;
        rdfs:label ?itemLabel.
      FILTER((LANG(?itemLabel)) = "fr")
      ?genre rdfs:label ?genreLabel.
      FILTER((LANG(?genreLabel)) = "fr")
    }
    ORDER BY (?item) (?lemma) (?genre)
    

The following query uses these:

  • Properties: item for this sense (P5137)  View with Reasonator View with SQID
    #title: Items with the most lexemes describing them
    SELECT ?item ?itemLabel (COUNT(DISTINCT ?sense) AS ?count) {
      ?sense wdt:P5137 ?item .
      OPTIONAL { ?item rdfs:label ?itemLabel . FILTER(LANG(?itemLabel) = "en") }
    }
    GROUP BY ?item ?itemLabel
    ORDER BY DESC(?count)
    LIMIT 100
    

The following query uses these:

  • Properties: item for this sense (P5137)  View with Reasonator View with SQID
    #title: Language-pairs of connected lexemes with count of lexemes
    SELECT
      ?language1 ?language1Label ?language2 ?language2Label 
      ?number_of_lexemes
    WITH {
    SELECT ?language1 ?language2 (COUNT(*) AS ?number_of_lexemes) {
      [] ^wdt:P5137 / ^ontolex:sense ?lexeme1, ?lexeme2 .
      FILTER (?lexeme1 != ?lexeme2)
      ?lexeme1 dct:language ?language1 .
      ?lexeme2 dct:language ?language2 .
      FILTER (?language1 != ?language2)
      FILTER (STR(?language1) > STR(?language2)) 
    }
    GROUP BY ?language1 ?language2
    ORDER BY DESC(?number_of_lexemes)
    LIMIT 100
    } AS %result
    {
      INCLUDE %result
      SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
    }
    ORDER BY DESC(?number_of_lexemes)
    

The following query uses these:

  • Properties: grammatical gender (P5185)  View with Reasonator View with SQID
    #title: Ukrainian nouns by gender
    SELECT ?lemma ?gender_label ?lexeme  WITH {
      SELECT DISTINCT ?lemma WHERE {
        VALUES ?gender { wd:Q499327 wd:Q1775415 wd:Q1775461 }
        ?lexeme dct:language wd:Q8798;
                wikibase:lexicalCategory wd:Q1084;
                wdt:P5185 ?gender;
                wikibase:lemma ?lemma.
      }
      ORDER BY CONCAT(MD5(?lemma), STR(NOW()))
    } AS %randomLemmas WHERE {
      INCLUDE %randomLemmas.
      ?lexeme wikibase:lemma ?lemma;
              wdt:P5185 ?gender.
         ?gender rdfs:label ?gender_label .
         FILTER(LANG(?gender_label) = 'en')
    }
    GROUP BY ?lemma ?lexeme ?gender_label
    HAVING(COUNT(?gender) = 1)