Wikidata:SPARQL-frågetjänst/frågor/exempel/underhåll
Frågor för underhåll
For lexicographic maintenance queries see Wikidata:Lexicographical data/Ideas of queries#Maintenance and repairing
The queries below help to maintain the data quality of Wikidata. Note that some of these queries may return no results, it's OK - this means the data is correct, as such queries are meant to detect errors.
Find labels starting with double quotes (")
SELECT *
WHERE
{
?item rdfs:label ?label.
FILTER(
lang(?label) = "en"
&& REGEX(str(?label),"^[\"].*")
)
}
LIMIT 1
Queries for monitoring Wikidata entities sourced from the Disease Ontology
Disease items with redirected values
This query checks for existing redirects in disease statements. Regularly, the ProteinBoxBot synchronizes Disease items in Wikidata with the disease ontology. This query checks for statement value that got redirected to another Wikidata item.
- Expected results: 0 items. >0 items requires manual inspection.
- Properties used: Disease Ontology ID (P699)
#added before 2016-10
SELECT *
WHERE
{
?disease ?p ?o ;
wdt:P699 ?doid.
?o owl:sameAs ?sa .
}
Disease items with statements capturing Disease Ontology IDs that haven't been update for more then a month
ProteinBoxBot regularly synchronizes disease related items in Wikidata with the disease ontology. This query returns all modification dates ?date_update to less than a month.
- Expected results: All update dates older than one month require manual inspection by a curator.
SELECT DISTINCT ?date_range ?date_update (count(?disease) as ?counts)
(GROUP_CONCAT(?disease ;separator = "; ") as ?diseases)
(GROUP_CONCAT(?do_id ;separator = "; ") as ?doids)
{
?disease p:P699/prov:wasDerivedFrom/pr:P813 ?date_update ;
wdt:P699 ?do_id .
BIND (now() - ?date_update as ?date_range)
FILTER (?date_range > 31)
}
GROUP BY ?date_range ?date_update
ORDER BY DESC(?counts)
Items on human genes that haven't been updated for more than a month
- Expected results: All update dates older then one month require manual inspection by a curator.
#added before 2016-10
SELECT DISTINCT ?date_range ?date_update (count(?gene) as ?counts) (GROUP_CONCAT(?gene ;separator = "; ") as ?genes) (GROUP_CONCAT(?ncbi_gene ;separator = "; ") as ?gene_ids) WHERE {
?gene p:P351 ?ncbigene ;
wdt:P351 ?ncbi_gene .
?ncbigene prov:wasDerivedFrom ?derivedFrom .
?derivedFrom pr:P813 ?date_update .
BIND (now() - ?date_update as ?date_range)
FILTER (?date_range > 31)
}
GROUP BY ?date_range ?date_update
ORDER BY DESC(?counts)
Translation of labels and descriptions
Missing labels for a target language
#Search for labels in other languages that do not exist in a target language
#Can be input for Q29032512
SELECT ?item ?itemLabel ?itemDescription WHERE {
?item wdt:P31 wd:Q5. #instance of human
?item wdt:P27 wd:Q31. #country of citizenship Belgium
SERVICE wikibase:label { bd:serviceParam wikibase:language "nl,fr,en,de,it,es,no,pt". }
FILTER(NOT EXISTS {
?item rdfs:label ?lang_label.
FILTER(LANG(?lang_label) = "it") #with missing Italian label
})
}
ORDER BY ?itemLabel
Missing description for a target language
# Search for missing descriptions in a target language.
# Can be input for Q29032512
SELECT ?item ?itemLabel ?genderLabel ?occupationLabel ?domainLabel ?positionLabel ?itemDescription WHERE {
?item wdt:P31 wd:Q5. # instance of human
?item wdt:P27 wd:Q31. # country of citizenship Belgium
optional{ ?item wdt:P21 ?gender.}
optional{ ?item wdt:P106 ?occupation.}
optional{ ?item wdt:P101 ?domain.}
optional{ ?item wdt:P39 ?position.}
SERVICE wikibase:label { bd:serviceParam wikibase:language "nl,fr,en,de,it,es,pt,no". }
FILTER(NOT EXISTS {
?item schema:description ?itemdesc.
FILTER(LANG(?itemdesc) = "nl") # with missing Dutch description
})
}
order by ?itemLabel
Translated labels and aliases for a collection
#title:Non-English labels for Khalili Collections items
SELECT ?lang ?count WITH {
SELECT ?work ?label WHERE {
?work wdt:P195?/wdt:P361? wd:Q63160499. # Khalili Collections, or individual collections, or objects within collections
?work (rdfs:label|skos:altLabel) ?label
FILTER(lang(?label) != "en") } } AS %core
WHERE { {
SELECT ?lang (COUNT(?label) AS ?count) WHERE {
INCLUDE %core.
BIND(lang(?label) AS ?langcode).
OPTIONAL { ?language wdt:P218 ?langcode; rdfs:label ?langname FILTER(lang(?langname) = "en")}
BIND(IF (BOUND(?langname), ?langname, ?langcode) AS ?lang)
} GROUP BY ?lang
} UNION {
SELECT ("TOTAL" AS ?lang) (COUNT(?label) AS ?count) WHERE {
INCLUDE %core.
} }
} order BY DESC(?count) ?lang
Identifying redirects and their targets in a list of QIDs
# Identify redirects using item property owl:sameAs
SELECT (REPLACE(STR(?item),".*Q","Q") AS ?qid) (REPLACE(STR(?tgt),".*Q","Q") AS ?tgtQid) ?tgtLabel
WHERE {
VALUES (?item) {
(wd:Q6636058) (wd:Q8989284) (wd:Q243634) (wd:Q13383295)
(wd:Q659516) (wd:Q1020776) (wd:Q10124662) (wd:Q9066399)
(wd:Q12309669) (wd:Q16033883) (wd:Q733744) (wd:Q226569)
(wd:Q14210256) (wd:Q408041) (wd:Q750594) (wd:Q11075312)
}
?item owl:sameAs ?tgt .
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
ORDER BY ?qid
Find duplicate IMDB identifiers
# added before 2016-10, updated 2020-07
SELECT ?id (COUNT(?obj) AS ?count) (GROUP_CONCAT(?obj; SEPARATOR = " , ") AS ?items)
WHERE { ?obj wdt:P345 ?id. }
GROUP BY ?id
HAVING(?count > 1)
Administrative entities located within themselves
# added before 2016-10
SELECT ?x WHERE {
?x wdt:P131 ?x .
}
People born after year 1601 and before year 1880 with no death date
This was originally before year 1880 but was returning primarily people born thousands of years BCE, which is not a helpful query for people who want to remediate the data. Updated the query to add an earliest birth date of 1602. Users may wish to edit the date to make it even later, e.g. 1800, to find people about whom there's more information. Note: the date 1601 will return people whose birth dates are "17th century" which may not be very useful, hence the choice of people born after 1601.
- Items used: human (Q5)
- Properties used: instance of (P31), date of birth (P569), date of death (P570)
# updated 2019-09-27
SELECT ?h ?date
WHERE
{
?h wdt:P31 wd:Q5 .
?h wdt:P569 ?date .
OPTIONAL {?h wdt:P570 ?d }
FILTER (?date > "1601-01-01T00:00:00Z"^^xsd:dateTime)
FILTER (?date < "1880-01-01T00:00:00Z"^^xsd:dateTime)
FILTER (!bound(?d))
}
LIMIT 100
Properties that are defined as "qualifier only" but are used in references
- Items used: Wikidata qualifier (Q15720608)
- Properties used: instance of (P31)
# added before 2016-10
SELECT ?entity ?claim ?prop
WHERE
{
?prop wdt:P31 wd:Q15720608 .
?prop wikibase:reference ?propClaim .
?ref ?propClaim ?o .
?statement prov:wasDerivedFrom ?ref .
?entity ?claim ?statement .
}
LIMIT 100
Objects that have globe which does not match their coordinates
- Properties used: located on astronomical body (P376), coordinate location (P625)
#added before 2016-10
SELECT ?item ?body ?globe
WHERE {
?item wdt:P376 ?body .
?item p:P625/psv:P625/wikibase:geoGlobe ?globe .
filter (?globe != ?body) .
} LIMIT 30
Aliases of properties which are used more than once
- Items used: none
- Properties used: all
#added before 2016-10
SELECT ?property ?alias ?occurences
WHERE
{
hint:Query hint:optimizer "None" .
{
SELECT ?alias (COUNT (?alias) as ?occurences) WHERE {
?tmp rdf:type wikibase:Property .
?tmp skos:altLabel ?alias FILTER (LANG (?alias) = "en")
} GROUP BY ?alias
}
?property rdf:type wikibase:Property .
?property skos:altLabel ?alias FILTER (?occurences > 1) .
}
ORDER BY ?alias
Actresses without a description in Spanish
- Använda objekt: actor (Q33999) female (Q6581072) human (Q5)
- Använda egenskaper: occupation (P106) sex or gender (P21) instance of (P31)
#added before 2016-10
SELECT DISTINCT ?item ?itemLabel
WHERE
{
?item wdt:P106 wd:Q33999 .
?item wdt:P21 wd:Q6581072 .
OPTIONAL { ?item schema:description ?itemDescription }
FILTER (!BOUND(?itemDescription))
?item wdt:P31 wd:Q5 .
SERVICE wikibase:label { bd:serviceParam wikibase:language "es"}
}
LIMIT 111
French people with an article on the English Wikipedia but not on the French one
- Använda objekt: human (Q5), France (Q142)
- Använda egenskaper: instance of (P31), country of citizenship (P27)
#added before 2016-10
SELECT ?item ?itemLabel (COUNT(distinct ?sitelink) as ?count)
WHERE
{
?item wdt:P31 wd:Q5 .
?item wdt:P27 wd:Q142 .
?sitelink schema:about ?item .
FILTER EXISTS { ?wen schema:about ?item . ?wen schema:inLanguage "en" }
FILTER NOT EXISTS { ?wfr schema:about ?item . ?wfr schema:inLanguage "fr" }
SERVICE wikibase:label {
bd:serviceParam wikibase:language "fr,en" .
}
}
GROUP BY ?item ?itemLabel
ORDER BY DESC(?count)
Deprecated rank statements
#added before 2016-10
SELECT ?wdLabel ?wdPLabel ?valueLabel ?wd ?wdP ?wds ?value
WHERE
{
hint:Query hint:optimizer "None" .
?wds wikibase:rank wikibase:DeprecatedRank .
?wd ?p ?wds .
?wds ?v ?value .
?wdP wikibase:statementProperty ?v .
?wdP wikibase:claim ?p .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
LIMIT 100
VIAF identifier statements with deprecated rank
SELECT ?item ?value WHERE {
?item p:P214 ?statement.
?statement wikibase:rank wikibase:DeprecatedRank;
ps:P214 ?value.
}
Actors whose Spanish label ends with “ (actor)”
- Använda objekt: human (Q5), actor (Q33999)
- Använda egenskaper: instance of (P31), occupation (P106)
#added before 2016-10
# Actors whose Spanish label ends with “ (actor)”.
# This is almost certainly imported from the Wikipedia label, which needs to be unique (distinguish between John Doe the actor, John Doe the author, etc.).
# Wikidata labels have no such restrictions, and therefore should contain no such disambiguatory additions.
# Most likely, all these “ (actor)” suffixes can be removed.
# (This concept obviously applies equally to other professions and languages.)
SELECT ?person ?label (LANG(?label) AS ?lang)
WHERE
{
?person wdt:P31 wd:Q5;
wdt:P106 wd:Q33999;
rdfs:label ?label.
FILTER(LANG(?label) = "es").
FILTER(STRENDS(?label, " (actor)")).
}
Labels containing HTML escape sequences
SELECT DISTINCT ?item ?itemLabel ?snippet
WHERE {
# Old approach was:
# ?item rdfs:label ?label . FILTER CONTAINS(?label, """) .
# It worked before 2017, but due to database growth it started to timeout, eventually.
# Luckily, there is an alternative way to search in labels and aliases with CirrusSearch.
# See [[mw:Help:Extension:WikibaseCirrusSearch]] and [[mw:Wikidata Query Service/User Manual/MWAPI]] for more details.
SERVICE wikibase:mwapi {
bd:serviceParam wikibase:api "Search";
wikibase:endpoint "www.wikidata.org";
mwapi:srsearch "inlabel:\""\"" .
?snippet wikibase:apiOutput "@snippet".
?item wikibase:apiOutputItem mwapi:title.
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
Person labels containing parentheses
- Använda objekt: human (Q5)
- Använda egenskaper: instance of (P31)
#added before 2016-10
SELECT ?person ?label (LANG(?label) AS ?lang)
WHERE
{
?person wdt:P31 wd:Q5;
rdfs:label ?label.
FILTER(LANG(?label) IN ("en", "es", "fr", "de")). # tweak to taste
FILTER(CONTAINS(?label, "(")).
}
LIMIT 10
Non-integer populations
- Använda egenskaper: population (P1082)
#added before 2016-10
SELECT ?country ?countryLabel ?population
WHERE
{
?country wdt:P1082 ?population.
FILTER(?population - ROUND(?population) != 0).
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY ?population
Odd countries
- Använda objekt: state (Q7275), territory (Q183366), former entity (Q15893266), political territorial entity (Q1048835)
- Använda egenskaper: country (P17), instance of (P31), subclass of (P279)
#added before 2016-10
SELECT ?item ?country ?itemLabel ?countryLabel
WHERE
{
?item wdt:P17 ?country.
MINUS { ?country wdt:P31/wdt:P279* wd:Q7275. } # excludes normal countries
MINUS { ?country wdt:P31/wdt:P279* wd:Q183366. } # excludes Scotland
MINUS { ?country wdt:P31/wdt:P279* wd:Q15893266. } # excludes Brittany
MINUS { ?country wdt:P31/wdt:P279* wd:Q1048835. } # excludes the European Union
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
LIMIT 1000
Paintings on taxons
- Använda objekt: painting (Q3305213), painting support (Q861259), taxon (Q16521)
- Använda egenskaper: instance of (P31), subclass of (P279), made from material (P186), applies to part (P518)
#added before 2016-10
SELECT ?material ?painting ?materialLabel ?paintingLabel
WHERE
{
?painting wdt:P31/wdt:P279* wd:Q3305213;
p:P186 [ ps:P186 ?material; pq:P518 wd:Q861259 ].
?material wdt:P31 wd:Q16521.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
Descriptions that are just the default Portuguese description
#added before 2016-10
SELECT ?item ?label
WHERE
{
?item schema:description "introduza uma descrição"@pt.
OPTIONAL {
?item rdfs:label ?label.
FILTER(LANG(?label) = "pt").
}
}
Items that are simultaneously instance and subclass of the same class
- Använda egenskaper: instance of (P31), subclass of (P279)
#added before 2016-10
SELECT ?thingLabel ?classLabel ?thing ?class
WHERE
{
?thing wdt:P31 ?class.
?thing wdt:P279 ?class.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
Items with specific picture
- Använda egenskaper: image (P18)
Note that URL of the filename must be URL encoded.
#added before 2016-10
SELECT ?item ?itemLabel
WHERE
{
?item wdt:P18 <http://commons.wikimedia.org/wiki/Special:FilePath/Flag%20of%20the%20Netherlands.svg>
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
LIMIT 100
Language statements that point to a country
These language statements point to a country instead of a language, which is almost certainly a mistake. In certain languages, a country and its language may have the same label.
- Använda objekt: state (Q7275)
- Använda egenskaper: instance of (P31), subclass of (P279), official language (P37), native language (P103), original language of film or TV show (P364), language of work or name (P407), languages spoken, written or signed (P1412)
#added before 2016-10
SELECT DISTINCT ?work ?workLabel ?languageLabel
WHERE
{
?work wdt:P37|wdt:P103|wdt:P364|wdt:P407|wdt:P1412 ?language.
?language wdt:P31/wdt:P279* wd:Q7275.
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY ?languageLabel
People with statements where start and end time are over 100 years apart
It is rare that a statement for a person is actually valid for over 100 years; most of the results are errors (often simple typos in the year).
- Använda objekt: human (Q5)
- Använda egenskaper: instance of (P31), start time (P580), end time (P582)
#added before 2016-10
SELECT ?person ?personLabel ?duration
WHERE
{
?person wdt:P31 wd:Q5;
?p [ pq:P580 ?start; pq:P582 ?end ].
BIND(ROUND((?end - ?start)/365.2425) AS ?duration).
FILTER(?duration > 100).
FILTER(?p != p:P119) . # "place of burial" can be an exception
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY DESC(?duration)
Human gene items with unreferenced statements
This query returns the number of times a given property exists with in a human gene item without references backing that statement.
- Använda objekt: human (Q5) gene (Q7187)
- Använda egenskaper: subclass of (P279), found in taxon (P703)
#added before 2016-10
SELECT ?prop (count(?prop) as ?counts) WHERE
{SELECT DISTINCT ?item ?prop WHERE {
?item wdt:P279 wd:Q7187 ;
wdt:P703 wd:Q5 ;
?prop ?statement .
FILTER(!(regex(str(?prop), "http://www.wikidata.org/prop/direct/" ) ))
FILTER(regex(str(?prop), "http://www.wikidata.org/prop/" ) )
FILTER NOT EXISTS {
?item ?prop ?statement .
?statement prov:wasDerivedFrom ?derivedFrom .}
}
}
GROUP BY ?prop
ORDER BY desc(?counts)
Huvudstäder som inte är huvudstäder
This query returns items that are instances of capital but not the capital of any item.
- Använda objekt: capital city (Q5119)
- Använda egenskaper: instance of (P31), capital (P36), of (P642), start time (P580), end time (P582)
#added before 2016-10
SELECT ?capital ?capitalLabel ?ofLabel ?start ?end
WHERE
{
?capital p:P31 ?capitalStat.
?capitalStat ps:P31 wd:Q5119.
OPTIONAL { ?capitalStat pq:P642 ?of. }
OPTIONAL { ?capitalStat pq:P580 ?start. }
OPTIONAL { ?capitalStat pq:P582 ?end. }
MINUS { [] wdt:P36 ?capital. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
Chemical compounds with no CAS registry number
- Använda objekt: chemical compound (Q11173)
- Använda egenskaper: CAS Registry Number (P231), chemical formula (P274), InChIKey (P235), ChemSpider ID (P661), PubChem CID (P662)
#added before 2016-10
#All CAS registry numbers in Wikidata
SELECT DISTINCT ?compound ?compoundLabel ?formula ?inchikey ?chemspider ?pubchem WHERE {
?compound wdt:P31 wd:Q11173 .
OPTIONAL { ?compound wdt:P231 ?cas . }
OPTIONAL { ?compound wdt:P274 ?formula . }
OPTIONAL { ?compound wdt:P235 ?inchikey . }
OPTIONAL { ?compound wdt:P661 ?chemspider . }
OPTIONAL { ?compound wdt:P662 ?pubchem . }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
FILTER(!BOUND(?cas))
}
Mänskliga bosättningar utan en artikel på engelskspråkiga Wikipedia
- Använda objekt: town (Q3957), city (Q515), village (Q532), human settlement (Q486972)
- Använda egenskaper: instance of (P31), coordinate location (P625)
# Human settlements without an article in the English Wikipedia
#defaultView:Map
SELECT DISTINCT ?settlement ?settlementLabel ?coor WHERE {
VALUES ?type {wd:Q3957 wd:Q515 wd:Q532 wd:Q486972}
?settlement wdt:P31 wd:Q3957 ;
wdt:P625 ?coor .
?article schema:about ?settlement .
FILTER NOT EXISTS { ?wen schema:about ?settlement ; schema:inLanguage "en" }
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
Mänskliga bosättningar utan en artikel på valfri språkversion av Wikipedia
- Använda objekt: town (Q3957), city (Q515), village (Q532), human settlement (Q486972)
- Använda egenskaper: instance of (P31), coordinate location (P625)
#added before 2016-10
# Human settlements without an article in any language version of Wikipedia
#defaultView:Map
SELECT DISTINCT ?city ?cityLabel ?coor WHERE {
VALUES ?type { wd:Q3957 wd:Q515 wd:Q532 wd:Q486972 } .
?city wdt:P31 wd:Q3957 ;
wdt:P625 ?coor .
FILTER NOT EXISTS {?article schema:about ?city } .
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" } .
}
Files used as “image” in more than 10 items
#added before 2016-10
SELECT ?image (count(?image) AS ?cnt) (group_concat(?item) AS ?items) {
?item wdt:P18 ?image
} group by ?image having (?cnt>10)
Properties, their labels, descriptions and if they exist of which they are a subproperty
Description: This query returns a table with all properties and if they exist their subproperties
# added before 2016-10
SELECT DISTINCT ?property ?propertyLabel ?propertyDescription ?subpropertyOf ?subpropertyOfLabel
WHERE
{
?property rdf:type wikibase:Property .
OPTIONAL {?property wdt:P1647 ?subpropertyOf .}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
Most frequent occupations without a label in a given language
# added before 2016-10
# Most frequent occupations without a label in Latvian (lv)
# by Edgars2007/Jura1, 2016-05-09
SELECT ?occup ?occupLabel ?count
WHERE
{
{
SELECT ?occup (COUNT(?person) as ?count)
WHERE
{
?person wdt:P106 ?occup
}
GROUP BY ?occup
ORDER BY DESC(?count)
LIMIT 1000
}
OPTIONAL {?occup rdfs:label ?label1 filter(lang(?label1) = "lv")}
FILTER(!BOUND(?label1))
SERVICE wikibase:label { bd:serviceParam wikibase:language "en,es,pl,ja,zh,ru" }
}
ORDER BY DESC(?count)
LIMIT 50
Items and properties where the description is the same as the label
#Items and properties where the description is the same as the label
#by YMS, 2016-09-26
select ?item ?label (lang(?label) as ?lang) where {
?item rdfs:label ?label .
?item schema:description ?label .
} limit 50
A batch of entities that were imported from Commons that would need to be merged with pre-existing entities
SELECT ?subject ?subjectLabel ?gallery
WHERE {
?subject wdt:P31 wd:Q5 .
?subject wdt:P935 ?gallery.
FILTER NOT EXISTS { ?subject wdt:P18 ?x }
SERVICE wikibase:label { bd:serviceParam wikibase:language "fr,en" }
FILTER regex(str(?subject), "Q211", "i")
}
LIMIT 100
How many people are there whose "number of children (P1971)" matches with "child (P40)"?
PS: Including the corner case where the number of children equals 0. P1971 is mainly used when full listings of children can't or shouldn't be included.
SELECT (COUNT(DISTINCT(?person)) AS ?countPerson)
WHERE {
{ # non-zero number of children matches
?person wdt:P1971 ?numOfChildren .
{SELECT ?person (COUNT(?child) AS ?numOfChildrenComputed)
WHERE {
?person wdt:P40 ?child
}
GROUP BY ?person}
FILTER (?numOfChildren = ?numOfChildrenComputed)
}
UNION
{ # zero number of children matches
?person wdt:P1971 "0"^^xsd:decimal .
FILTER NOT EXISTS { ?person wdt:P40 ?child }
}
}
Class distribution of a property
PS: In the example, the property used is "child (P40)". This can be changed to any other property.
SELECT ?class (COUNT(?subject) AS ?countSubject) WHERE {
{ SELECT DISTINCT ?subject ?class
WHERE {
?subject wdt:P40 ?object .
?subject wdt:P31 ?class
} }
} GROUP BY ?class ORDER BY DESC(?countSubject)
Disambiguation pages entities used as publishers
SELECT ?publisher ?publisherLabel ?work WHERE {
# entities used as publisher
?work wdt:P123 ?publisher .
# but that are actually a disambiguation page
?publisher wdt:P31 wd:Q4167410 .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en,fr,de" .
}
}
Instances of actual languages
#Instances of actual languages (e.g. "instance of Hindi", often incorrectly set to indicate the language of something or to request a sitelink)
SELECT ?item ?itemLabel
WHERE {
?item wdt:P31/wdt:P31* wd:Q34770 .
FILTER NOT EXISTS {?item wdt:P31 wd:Q34770}
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" }
}
Look up q-codes based on English label
SELECT ?lab ?item WHERE {
?item rdfs:label ?lab .
MINUS {?item wdt:P31 wd:Q4167836 } . # no category items
VALUES ?lab {"Agelenopsis pennsylvanica"@en
"Agelenopsis potteri"@en
"Anelosimus studiosus"@en
} .
}
CONSTRUCT query to get the RDF graph for a Wikidata item (e.g. asthma)
CONSTRUCT {
wd:Q35869 ?p ?o .
?o ?qualifier ?f .
?o prov:wasDerivedFrom ?u .
?u ?a ?b .
}
WHERE {
wd:Q35869 ?p ?o .
optional {?o ?qualifier ?f .}
OPTIONAL {?o prov:wasDerivedFrom ?u .
?u ?a ?b .}
}
Personendaten template equivalent (dewiki)
SELECT
?item
?NAME
(GROUP_CONCAT( DISTINCT ?ALTERNATIVNAMEN_; separator=";") as ?ALTERNATIVNAMEN)
(SAMPLE ( ?KURZBESCHREIBUNG_) as ?KURZBESCHREIBUNG)
(GROUP_CONCAT( DISTINCT ?GEBURTSDATUM_; separator=";") as ?GEBURTSDATUM)
(GROUP_CONCAT( DISTINCT ?GEBURTSORT_; separator=";") as ?GEBURTSORT)
(GROUP_CONCAT( DISTINCT ?STERBEDATUM_; separator=";") as ?STERBEDATUM)
(GROUP_CONCAT( DISTINCT ?STERBEORT_; separator=";") as ?STERBEORT)
?article
WITH { SELECT ?item ?article { ?article schema:about ?item ; schema:isPartOf <https://de.wikipedia.org/> . ?item wdt:P31 wd:Q5 . } LIMIT 200 } AS %SUBQUERY
{
INCLUDE %SUBQUERY .
OPTIONAL { ?item skos:altLabel ?ALTERNATIVNAMEN_ . FILTER(lang(?ALTERNATIVNAMEN_)="de") }.
OPTIONAL { ?item schema:description ?KURZBESCHREIBUNG_ . FILTER(lang(?KURZBESCHREIBUNG_)="de") } .
OPTIONAL { ?item wdt:P569 ?GEBURTSDATUM_ } .
OPTIONAL { ?item wdt:P19 ?pob } .
OPTIONAL { ?item wdt:P570 ?STERBEDATUM_ } .
OPTIONAL { ?item wdt:P20 ?pod } .
SERVICE wikibase:label {bd:serviceParam wikibase:language "de" .
?item rdfs:label ?NAME .
?pob rdfs:label ?GEBURTSORT_ .
?pod rdfs:label ?STERBEORT_ }
}
GROUP BY ?item ?NAME ?article
People that have a label in Hebrew, but their given name doesn't have a label in Hebrew
SELECT ?item ?itemLabelHE ?name WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P735 ?name.
FILTER NOT EXISTS {?name rdfs:label ?nameLabelHE filter (lang(?nameLabelHE) = "he")} .
?item rdfs:label ?itemLabelHE filter (lang(?itemLabelHE) = "he") .
} LIMIT 50
Fathers with non-existent or unusual gender statements
# Fathers with non-existent or unusual gender statements
SELECT
(?father as ?father_id) ?father_name ?father_is_a ?father_gender
(?child as ?child_id) ?child_name ?child_is_a
WHERE {
?child wdt:P22 ?father .
?father wdt:P31 ?f_is_a . #?father wdt:P31 wd:Q5 .
?child wdt:P31 ?c_is_a . #?child wdt:P31 wd:Q5 .
OPTIONAL { ?father wdt:P21 ?f_gender . }
# Aliases for the headings in the results table. Must be done this way because
# SELECT (?var as ?varAlias) works, but (?varLabel as ?varLabelAlias) doesn't.
SERVICE wikibase:label {
bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" .
?father rdfs:label ?father_name .
?f_is_a rdfs:label ?father_is_a .
?f_gender rdfs:label ?father_gender .
?child rdfs:label ?child_name .
?c_is_a rdfs:label ?child_is_a .
}
# Show only results where the father's gender:
FILTER(
# ...is not set
NOT EXISTS { ?father wdt:P21 [] }
# ...or is set to something other than "male" or "male organism", or "unknown value"
|| ?f_gender NOT IN(wd:Q6581097, wd:Q44148) && !wikibase:isSomeValue(?f_gender)
)
}
ORDER BY DESC(?f_gender)
Items created last
Since the item creation date seems not to be available for SPARQL queries, here a query for the 10 latest items of a certain class (lighthouse (Q39715) as an example).
select ?item ?itemLabel
where {
# restrict to some class, or select otherwise
# (does not work with results too large!)
?item wdt:P31 wd:Q39715 .
bind(xsd:integer(strafter(str(?item), 'Q')) as ?id)
service wikibase:label { bd:serviceParam wikibase:language "en" .}
}
order by desc(?id )
limit 10
String search : All items having the string "airport" without having instance of (P31) nor subclass of (P279)
SELECT DISTINCT ?item ?itemLabel
WHERE {
hint:Query hint:optimizer "None".
SERVICE wikibase:mwapi {
bd:serviceParam wikibase:api "Search";
wikibase:endpoint "www.wikidata.org";
mwapi:srsearch "airport -haswbstatement:P31 -haswbstatement:P279" .
?title wikibase:apiOutput mwapi:title.
}
BIND(IRI(CONCAT(STR(wd:), ?title)) AS ?item)
#add any other requirements here
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
Performant way to list 100 scholary articles sorted by linkcount
select ?item ?itemLabel ?itemDescription ?linkCount with {
select ?item where {
?item wdt:P31 wd:Q13442814.
} limit 100
} as %result where {
include %result
?item wikibase:sitelinks ?linkCount.
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
} order by desc(?linkCount)
Display the class tree under a known class (subclass of)
# Tree of classes under a specified root, up to 7 levels deep
SELECT ?level1Label ?level2Label ?level3Label ?level4Label ?level5Label ?level6Label ?level7Label ?level1 ?level2 ?level3 ?level4 ?level5 ?level6 ?level7
WHERE
{
# Adjust the class value to display a different branch of the class tree (here the class 'database')
VALUES ?class { wd:Q8513 }
{
?level1 wdt:P279 ?class .
}
UNION
{
?level1 wdt:P279 ?class .
?level2 wdt:P279 ?level1 .
}
UNION
{
?level1 wdt:P279 ?class .
?level2 wdt:P279 ?level1 .
?level3 wdt:P279 ?level2 .
}
UNION
{
?level1 wdt:P279 ?class .
?level2 wdt:P279 ?level1 .
?level3 wdt:P279 ?level2 .
?level4 wdt:P279 ?level3 .
}
UNION
{
?level1 wdt:P279 ?class .
?level2 wdt:P279 ?level1 .
?level3 wdt:P279 ?level2 .
?level4 wdt:P279 ?level3 .
?level5 wdt:P279 ?level4 .
}
UNION
{
?level1 wdt:P279 ?class .
?level2 wdt:P279 ?level1 .
?level3 wdt:P279 ?level2 .
?level4 wdt:P279 ?level3 .
?level5 wdt:P279 ?level4 .
?level6 wdt:P279 ?level5 .
}
UNION
{
?level1 wdt:P279 ?class .
?level2 wdt:P279 ?level1 .
?level3 wdt:P279 ?level2 .
?level4 wdt:P279 ?level3 .
?level5 wdt:P279 ?level4 .
?level6 wdt:P279 ?level5 .
?level7 wdt:P279 ?level6 .
}
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],fr,en". }
}
ORDER BY ?level1Label ?level2Label ?level3Label ?level4Label ?level5Label ?level6Label ?level7Label
Find items that have a double identical property / value claim
For example: find all humans that have occupation (P106) -> actor (Q33999) twice as a claim.
select ?item where {
?item wdt:P31 wd:Q5;
p:P106 ?a, ?b.
filter(?a != ?b)
?a ps:P106 wd:Q33999.
?b ps:P106 wd:Q33999.
}
Occupations with a German label
SELECT DISTINCT ?occ ?occLabelEN ?occLabelDE {
?occ (wdt:P31/wdt:P31* | wdt:P279/wdt:P279*) wd:Q12737077 .
FILTER NOT EXISTS { ?occ wdt:P31 wd:Q5 }
FILTER NOT EXISTS { ?occ wdt:P31 wd:Q13406463 }
FILTER ( ?occ != wd:Q28640 )
?occ rdfs:label ?occLabelDE filter (lang(?occLabelDE) = "de") .
OPTIONAL {
?occ rdfs:label ?occLabelEN filter (lang(?occLabelEN) = "en")
}
}
Animals owned by people holding any position
SELECT ?animal ?animalLabel ?itemLabel (SAMPLE(?image) as ?image)
(GROUP_CONCAT(DISTINCT(?personLabel); separator=", ") as ?owners) {
?item wdt:P31 wd:Q16521.
?animal wdt:P31 ?item .
?person wdt:P39 ?x .
?animal wdt:P127 ?person .
OPTIONAL { ?animal wdt:P18 ?image }
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
?animal rdfs:label ?animalLabel .
?item rdfs:label ?itemLabel .
?person rdfs:label ?personLabel
}
} GROUP BY ?animal ?animalLabel ?itemLabel
Properties of type external-id
# Make a list of properties of the type external-id
SELECT ?property ?propertyLabel ?propertyDescription WHERE {
?property wikibase:propertyType wikibase:ExternalId .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
}
} ORDER BY ?propertyLabel
Items with a Biografisch Portaal number and fewer than 6 statements
# List items that have a Biografisch Portaal number (P651), but have fewer than 6 statements
# The minimum set of statements expected are:
# * Biografisch Portaal number (P651)
# * instance of (P31) -> human (Q5)
# * sex or gender (P21)
# * place of birth (P19)
# * date of birth (P569)
# The SPARQL database isn't completely up to date for the number of statements, so this query will return fewer or incorrect items.
SELECT ?item ?itemLabel ?itemDescription ?statementcount WHERE {
?item wdt:P651 [] .
?item wikibase:statements ?statementcount .
FILTER (?statementcount < 6 )
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en,nl" .
}
} ORDER BY ?itemLabel
Most specific parent classes of Wikidata classes
# What is or are the lowest common ancestor(s) of a
# church, a mosque, a synagogue and a palace in the
# Wikidata ontology ? Answer : an architectural structure
SELECT ?lcs ?lcsLabel WHERE {
?lcs ^wdt:P279* wd:Q32815, wd:Q34627, wd:Q16970, wd:Q16560 .
filter not exists {
?sublcs ^wdt:P279* wd:Q32815, wd:Q34627, wd:Q16970, wd:Q16560 ;
wdt:P279 ?lcs . }
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE], en" . }
}
Verify women descriptions
SELECT ?item ?itemLabel ?occupationLabel ?itemDescription WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P21 wd:Q6581072.
?item wdt:P27 wd:Q31.
optional{ ?item wdt:P106 ?occupation. }
SERVICE wikibase:label { bd:serviceParam wikibase:language "nl". }
}
If needed you can update inappropriate descriptions.
Tips:
- export to Excel and sort by description
- generate a QuickStatements file
QuickStatements template for adding timestamps to novalue statements
This is needed because PetScan does not support novalue queries. See https://github.com/magnusmanske/petscan_rs/issues/103
#title:QS template for adding timestamps to novalue statements
# 2021-09-04
select ?qid ?P9837 ?qal585 #?lexemeId ?lemma
WHERE {
?lexemeId dct:language wd:Q9027.
?lexemeId a wdno:P9837.
bind(replace(str(?lexemeId), "http://www.wikidata.org/entity/", "") as ?qid)
BIND(str("novalue") AS ?P9837)
BIND("+2021-09-01T00:00:00Z/11" AS ?qal585)
}
QuickStatements-export for main subject on documents from Riksdagen
#author: Jan Ainali
#title: QuickStatements-export för huvudtema
SELECT DISTINCT ?qid ?P921 ?S887 # ?itemLabel ?item # Ta bort den första "#" för att kunna bedöma titlarna
WHERE {
VALUES ?topic { wd:Q61566 } # Ange Wikidataobjektet som du vill ange som huvudtema här
?item wdt:P8433 [] ; # För att bara få med Riksdagsdokument.
rdfs:label ?itemLabel .
MINUS {?item wdt:P921 ?topic } # För att inte råka lägga till dubbla påståenden
# så kontrolleras det här om det redan finns
FILTER(CONTAINS(LCASE(?itemLabel), LCASE('skol')) ) .
#|| CONTAINS(LCASE(?itemLabel), LCASE('Bildemontering'))
# ) . # Ange en lämplig söksträng för huvudtemat här
#FILTER(!CONTAINS(LCASE(?itemLabel), LCASE('gymnasieskol'))) . # Ange en ev. uteslutning här
BIND(REPLACE(STR(?item), 'http://www.wikidata.org/entity/', '' ) AS ?qid)
BIND(REPLACE(STR(?topic), 'http://www.wikidata.org/entity/', '' ) AS ?P921)
BIND('Q69652283' AS ?S887) # ändra inte (ska vara Q69652283)
}
offset 2000 limit 50
#limit 100
Tip:
- download as CSV file and paste into QuickStatements
Articles missing an item for their subject (CRAN R packages)
#author:Azertus
#date:2021-09-07
#title: Articles missing an item for their subject (CRAN R packages)
SELECT ?item ?itemLabel ?rPackage ?rPackageLabel WHERE {
{ SELECT * WHERE {
SERVICE wikibase:mwapi {
bd:serviceParam wikibase:endpoint "www.wikidata.org";
wikibase:api "Search";
mwapi:srsearch 'haswbstatement:P31=Q13442814 inlabel:R@en inlabel:package@en';
mwapi:language "en".
?title wikibase:apiOutput mwapi:title.
}
BIND(URI(CONCAT('http://www.wikidata.org/entity/', ?title)) AS ?item)
} LIMIT 5000 }
MINUS {
?item wdt:P921 ?rPackage .
?rPackage wdt:P31/wd:P279* wd:Q73539779 .
}
SERVICE wikibase:label {
bd:serviceParam wikibase:language "en" .
?item rdfs:label ?itemLabel .
?rPackage rdfs:label ?rPackageLabel .
}
}
limit 10
Items showing two coordinates very distant each to other
SELECT ?item ?itemLabel ?location1 ?location2 ?distance WHERE {
?item wdt:P31/wdt:P279* wd:Q3887 ;
wdt:P625 ?location1 , ?location2 .
BIND(geof:distance(?location1, ?location2) AS ?distance )
FILTER(((geof:longitude(?location1)) < (geof:longitude(?location2))) || (((geof:longitude(?location1)) = (geof:longitude(?location2))) && ((geof:latitude(?location1)) < (geof:latitude(?location2)))))
FILTER(?distance > 80 )
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}order by desc (?distance )