User:TweetsFactsAndQueries/Queries/name phrases

From Wikidata
Jump to navigation Jump to search

Originally posted on Twitter and on Mastodon.

# English-speaking people whose given and family names are a noun and an adjective (both in English)
# uncomment the %nounGivenNames and %adjectiveFamilyNames parts to also include the opposite, e. g. Guy Standing
SELECT DISTINCT ?item ?givenNameLabel ?familyNameLabel
WITH {
  SELECT DISTINCT ?name ?nameLabel WHERE {
    hint:SubQuery hint:optimizer "None".
    ?noun wikibase:lexicalCategory/wdt:P279* wd:Q1084;
          dct:language wd:Q1860;
          wikibase:lemma ?_nameLabel.
    BIND(CONCAT(UCASE(SUBSTR(?_nameLabel, 1, 1)), SUBSTR(?_nameLabel, 2)) AS ?nameLabel)
    BIND(STRLANG(STR(?nameLabel), "mul") AS ?nameLabel_)
    { ?name wdt:P1705 ?nameLabel. } UNION { ?name wdt:P1705 ?nameLabel_. }
  }
} AS %nounNames
# WITH {
#   SELECT (?name AS ?givenName) (?nameLabel AS ?givenNameLabel) WHERE {
#     INCLUDE %nounNames.
#     FILTER EXISTS { [] wdt:P735 ?name. }
#   }
# } AS %nounGivenNames
WITH {
  SELECT (?name AS ?familyName) (?nameLabel AS ?familyNameLabel) WHERE {
    INCLUDE %nounNames.
    FILTER EXISTS { [] wdt:P734 ?name. }
  }
} AS %nounFamilyNames
WITH {
  SELECT DISTINCT ?name ?nameLabel WHERE {
    hint:SubQuery hint:optimizer "None".
    ?noun wikibase:lexicalCategory/wdt:P279* wd:Q34698;
          dct:language wd:Q1860;
          wikibase:lemma ?_nameLabel.
    BIND(CONCAT(UCASE(SUBSTR(?_nameLabel, 1, 1)), SUBSTR(?_nameLabel, 2)) AS ?nameLabel)
    BIND(STRLANG(STR(?nameLabel), "mul") AS ?nameLabel_)
    { ?name wdt:P1705 ?nameLabel. } UNION { ?name wdt:P1705 ?nameLabel_. }
  }
} AS %adjectiveNames
WITH {
  SELECT (?name AS ?givenName) (?nameLabel AS ?givenNameLabel) WHERE {
    INCLUDE %adjectiveNames.
    FILTER EXISTS { [] wdt:P735 ?name. }
  }
} AS %adjectiveGivenNames
# WITH {
#   SELECT (?name AS ?familyName) (?nameLabel AS ?familyNameLabel) WHERE {
#     INCLUDE %adjectiveNames.
#     FILTER EXISTS { [] wdt:P734 ?name. }
#   }
# } AS %adjectiveFamilyNames
WHERE {
  {
#     INCLUDE %nounGivenNames.
#     INCLUDE %adjectiveFamilyNames.
#     ?item wdt:P31 wd:Q5;
#           wdt:P103|wdt:P1412 wd:Q1860;
#           wdt:P735 ?givenName;
#           wdt:P734 ?familyName.
#   } UNION {
    INCLUDE %adjectiveGivenNames.
    INCLUDE %nounFamilyNames.
    ?item wdt:P31 wd:Q5;
          wdt:P103|wdt:P1412 wd:Q1860;
          wdt:P735 ?givenName;
          wdt:P734 ?familyName.
  }
}
LIMIT 50
Try it!

I think I’d like to revisit this in the future, perhaps when we have more data on how common a lemma occurs as an adjective/noun vs. how often it occurs as a name (all the Franks and Tonys in the results are technically correct, but not especially interesting).

Here’s an earlier version of the query that goes in the other direction (from names to words instead of from words to names) and doesn’t work unless you kill the optimizer:

# English-speaking people whose given and family names are a noun and an adjective (or vice versa)
SELECT ?item ?givenNameLabel ?familyNameLabel WHERE {
  hint:Query hint:optimizer "None".
  BIND(wd:Q3122156 AS ?item)
  ?item wdt:P31 wd:Q5;
        wdt:P735 ?givenName;
        wdt:P734 ?familyName;
        wdt:P103|wdt:P1412 wd:Q1860.
  ?givenName wdt:P407 wd:Q1860;
             wdt:P1705 ?givenNameLabel_.
  ?familyName wdt:P407 wd:Q1860;
              wdt:P1705 ?familyNameLabel_.
  # original value may have had language code "mul", explicitly change to "en"
  BIND(STRLANG(STR(?givenNameLabel_), "en") AS ?givenNameLabel)
  BIND(STRLANG(STR(?familyNameLabel_), "en") AS ?familyNameLabel)
  # names are typically title case, noun and adjective lemmas are typically lower case
  BIND(LCASE(?givenNameLabel) AS ?_givenNameLabel)
  BIND(LCASE(?familyNameLabel) AS ?_familyNameLabel)
  {
    ?adjective wikibase:lemma ?_givenNameLabel;
               dct:language wd:Q1860;
               wikibase:lexicalCategory/wdt:P279* wd:Q34698.
    ?noun wikibase:lemma ?_familyNameLabel;
          dct:language wd:Q1860;
          wikibase:lexicalCategory/wdt:P279* wd:Q1084.
  } UNION {
    ?noun wikibase:lemma ?_givenNameLabel;
          dct:language wd:Q1860;
          wikibase:lexicalCategory/wdt:P279* wd:Q1084.
    ?adjective wikibase:lemma ?_familyNameLabel;
               dct:language wd:Q1860;
               wikibase:lexicalCategory/wdt:P279* wd:Q34698.
  }
}
LIMIT 10
Try it!