User:Peter F. Patel-Schneider/fixed-level-program

From Wikidata
Jump to navigation Jump to search
#!/bin/python
import csv
import subprocess
import re


#	     class           first-order c    W instance c     metaclass       Wikidata meta   second-order    third-order     fourth-order    fifth-order     higher-order    fixed-order     variable-ord  
excluded = ['wd:Q16889133', 'wd:Q104086571', 'wd:Q21522908',  'wd:Q19478619', 'wd:Q19361238', 'wd:Q24017414', 'wd:Q24017465', 'wd:Q24027474', 'wd:Q24027515', 'wd:Q24027526', 'wd:Q23959932', 'wd:Q23958852']
exclude =  '(' + ', '.join(excluded) + ')'

def tablefy(output):
   table = [x.split('\t') for x in output.split('\n') if x.strip()]
   try:
      for row in table[1:]:
         for i in range(0,len(row),2):
            row[i] = re.search(r"[LQ][0-9]+",row[i]).group(0)
         for i in range(1,len(row),2):
            row[i] = re.sub(r".@en$",'',row[i])[1:]
      return table[1:]
   except Exception as e:
      # print("Query result cannot be tablefied", e, re.search(r'[a-zA-Z]*Exception',output).group(0))
      try:
         return re.search(r'[-a-zA-Z]*Exception',output).group(0)
      except Exception as e:
         return "Unexpected exception " + str(e) + " when trying to extract from " + output

def label(classq):
   query = '''SELECT DISTINCT ?classLabel WHERE {
         SERVICE wikibase:label { bd:serviceParam wikibase:language "en,de,fr,it". }
         BIND ( wd:''' + classq + ''' AS ?class ) ?class ?p ?v . }'''
   result = subprocess.check_output(['/usr/bin/curl','--http1.1','-s','-H','Accept: text/tab-separated-values','-G','https://query.wikidata.org/sparql','--data-urlencode','query=' + query], text=True)
   return result.replace('?classLabel\n', '').strip()

def query_table(query):
   try:
      result = subprocess.check_output(['/usr/bin/curl','--http1.1','-s','-H','Accept: text/tab-separated-values','-G','https://query.wikidata.org/sparql','--data-urlencode','query=' + query], text=True)
   except Exception as e:
      print("CURL returned error - retrying")
      try:
         result = subprocess.check_output(['/usr/bin/curl','--http1.1','-s','-H','Accept: text/tab-separated-values','-G','https://query.wikidata.org/sparql','--data-urlencode','query=' + query], text=True)
      except Exception as e:
         result = "CURL-error-return-Exception"
   return tablefy(result)

def instances(classq, indirect=True):
   clss = 'wd:' + classq
   query = '''SELECT DISTINCT ?class ?classLabel ?sub ?subLabel WHERE {
       ''' + ( '?sub wdt:P279 ' + clss + '.' if indirect else 'BIND (' + clss + ' AS ?sub)' ) + '''
       ?class wdt:P31''' + ('/wdt:P279*' if indirect else '') + ''' ?sub .
       FILTER (?class NOT IN ''' + exclude + ''')
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }
       } ORDER BY ?sub'''
   return query_table(query)

def subclasses(classq, indirect=True):
   query = '''SELECT DISTINCT  ?class ?classLabel WHERE {
       ?class wdt:P279''' + ('+' if indirect else '') + ''' wd:''' + classq + ''' .
       FILTER (?class NOT IN ''' + exclude + ''')
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }
       }'''
   return query_table(query)

def class_instances(classq, indirect=True, labels=True):
   query = '''SELECT DISTINCT  ?class ?classLabel WHERE {
       ?class wdt:P31''' + ('/wdt:P279*' if indirect else '') + ''' wd:''' + classq + ''' .
       { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .}
       ''' + ( 'SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '' ) + '''
       }'''
   return query_table(query)

def non_first_order_class_instances(classq, indirect=True, labels=True):
   query = '''SELECT DISTINCT  ?nfo ?nfoLabel WHERE {
       ?nfo wdt:P31''' + ('/wdt:P279*' if indirect else '') + '''  wd:''' + classq + ''' .
       ?class wdt:P31/wdt:P279* ?nfo .
       { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .}
       ''' + ('SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '') + '''
       }'''
   return query_table(query)

def potentially_first_order_class_instances(classq, indirect=True, labels=True):
   query = '''SELECT DISTINCT  ?pfo ?pfoLabel WHERE {
       ?pfo wdt:P31''' + ('/wdt:P269*' if indirect else '') +++ ''' wd:''' + classq + ''' .
       FILTER NOT EXISTS { 
         ?class wdt:P31/wdt:P279* ?pfo .
         { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} }
       ''' + ('SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '') + '''
       }'''
   return query_table(query)

def potentially_first_order_class_subclasses(classq, indirect=True, labels=True):
   query = '''SELECT ?pfo ?pfoLabel WHERE {
       ?pfo wdt:P279''' + ('+' if indirect else '') + ''' wd:''' + classq + ''' .
       FILTER NOT EXISTS { 
         ?class wdt:P31/wdt:P279* ?pfo .
         { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} }
       ''' + ('SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '') + '''
       }'''
   return query_table(query)

def not_first_order(classq, labels=True):
   query = '''SELECT DISTINCT ?class ?classLabel WHERE {
       ?class wdt:P31/wdt:P279* wd:''' + classq + ''' .
       { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .}
       ''' + ('SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '') + '''
       }'''
   return query_table(query)

def not_second_order(classq):
   query = '''SELECT DISTINCT ?class WHERE {
       ?class wdt:P31/wdt:P279*/wdt:P31/wdt:P279* wd:''' + classq + ''' .
       { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .}
       }'''
   return query_table(query)

def non_second_order_class_subclasses(classq, indirect=True):
   query = '''SELECT DISTINCT ?nso ?nsoLabel WHERE {
       ?nso wdt:P279''' + ('+' if indirect else '') + ''' wd:''' + classq + ''' .
       ?nfo wdt:P31/wdt:P279* ?nso .
       ?class wdt:P31/wdt:P279* ?nfo .
       { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .}
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }
       } ORDER BY ?nso'''
   return query_table(query)

def non_second_order_class_instances(classq, indirect=True):
   query = '''SELECT DISTINCT  ?nso ?nsoLabel ?nfo ?nfoLabel WHERE {
       ?nso wdt:P31''' + ('/wdt:P279*' if indirect else '') + ''' wd:''' + classq + ''' .
       ?nfo wdt:P31/wdt:P279 ?nso .
       ?class wdt:P31/wdt:P279 ?nfo .
       { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .}
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }
       } ORDER BY ?nso'''
   result = subprocess.check_output(['/usr/bin/curl','--http1.1','-s','-H','Accept: text/tab-separated-values','-G','https://query.wikidata.org/sparql','--data-urlencode','query=' + query], text=True)
   return query_table(query)

def non_third_order_class_subclasses(classq, indirect=True):
   query = '''SELECT DISTINCT  ?nto ?ntoLabel WHERE {
       ?nto wdt:P279''' + ('+' if indirect else '') + ''' wd:''' + classq + ''' .
       ?nso wdt:P31/wdt:P279* ?nto .
       ?nfo wdt:P31/wdt:P279* ?nso .
       ?class wdt:P31/wdt:P279* ?nfo .
       { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .}
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }
       } ORDER BY ?nto'''
   return query_table(query)

def show_class_instances(classq, indirect=True):
   ci = class_instances(classq, indirect=indirect)
   print("Class instances of", classq, label(classq), len(ci))
   for i in ci:
      print(f'{i[0]}	{i[1]}')
   print()

def show_nfo_class_instances(classq, indirect=True):
   ci = non_first_order_class_instances(classq, indirect=indirect)
   if isinstance(ci,str):
      print("Cannot retrieve non-first-order-class instances of", classq, label(classq))
   else:
      print("Non-first-order-class", "indirect" if indirect else "direct", "instances of", classq, label(classq), len(ci))
      for i in ci:
         print(f'{i[0]}	{i[1]}')
   print()

def show_nfo_class_instances_iterative(classq, indirect=True):
   soi = instances(classq, indirect=indirect)
   print("Non-first-order-class", "indirect" if indirect else "direct", "instances of", classq, label(classq), "out of", len(soi))
   print("Defects Example      Instance     Label                             Subclass     Label")
   for i in soi:
      table = class_instances(i[0], indirect=True, labels=False)
      if isinstance(table, str):
         print(f'Cannot retrieve class instances of {i[0]} {i[1]} {table}')
      elif table:
         print(f'{len(table)}	{table[0][0]:12s} {i[0]:12s} {i[1]:30s}    {i[2]:12s} {i[3]}')
   print()

def show_nso_class_subclasses(classq, indirect=True):
   ci = non_second_order_class_subclasses(classq, indirect=indirect)
   print("Non-second-order-class", 'all' if indirect else 'direct', "subclasses of", classq, label(classq), len(ci))
   for i in ci:
      print('	'.join(i))
   print()

def show_nso_class_instances(classq, indirect=True):
   ci = non_second_order_class_instances(classq, indirect=indirect)
   if ci is not False:
      print("Non-second-order-class instances of", classq, label(classq), len(ci))
      for i in ci:
         print('	'.join(i))
   else:
      print("Cannot determine non-second-order-class instances of", classq, label(classq))
   print()

def show_nso_class_instances_iterative(classq, indirect=True):
   soi = instances(classq, indirect=indirect)
   print("Non-second-order-class", 'indirect' if indirect else 'direct', "instances of", classq, label(classq), "out of", len(soi))
   print("Defects Example      Instance     Label                             Subclass     Label")
   for i in soi:
      table = non_first_order_class_instances(i[0], indirect=True, labels=False)
      if isinstance(table, str):
         print(f'Cannot retrieve non-first-order instances of {i[0]} {i[1]} {table}')
         table = non_first_order_class_instances(i[0], indirect=False, labels=False)
         if isinstance(table, str):
            print(f'Cannot retrieve non-first-order direct instances of {i[0]} {i[1]} {table}')
         elif table:
            print(f'{len(table)}	{table[0][0]:12s} {i[0]:12s} {i[1]} (direct instances only)')
      elif table:
         print(f'{len(table)}	{table[0][0]:12s} {i[0]:12s} {i[1]:30s}    {i[2]:12s} {i[3]}')
   print()

def show_nto_class_subclasses_iterative(classq, indirect=True):
   soi = subclasses(classq, indirect)
   print('Non-third-order-class ' + ('indirect ' if indirect else 'direct ') + 'subclasses of', classq, label(classq), "out of", len(soi))
   print("Defects Example      Instance     Label")
   for i in soi:
      table = non_second_order_class_instances(i[0], indirect=indirect)
      if isinstance(table, str):
         print(f'Cannot retrieve non-second-order instances of {i[0]} {i[1]} {table}')
      elif table:
         print(f'{len(table)}	{table[0][0]:12s} {i[0]:12s} {i[1]:30s}')
   print()

def show_nto_class_instances_iterative(classq, indirect=True):
   soi = instances(classq, indirect=indirect)
   print('Non-third-order-class instances of', classq, label(classq), "out of", len(soi))
   print("Defects Example      Instance     Label                             Subclass     Label")
   for i in soi:
      table = non_second_order_class_instances(i[0], indirect=indirect)
      if isinstance(table, str):
         print(f'Cannot retrieve non-second-order instances of {i[0]} {i[1]} {table}')
      elif table:
         print(f'{len(table)}	{table[0][0]:12s} {i[0]:12s} {i[1]:30s}    {i[2]:12s} {i[3]}')
   print()

def show_pfo_class_subclasses(classq, indirect=True):
   print("Potentially first-order class", "indirect" if indirect else "direct", "subclasses of", classq, label(classq), end=' ')
   ci = potentially_first_order_class_subclasses(classq, indirect=indirect)
   print(len(ci))
   for i in ci:
      print('	'.join(i))
   print()

def show_pfo_class_subclasses_iterative(classq, indirect=True):
   soi = subclasses(classq, indirect=indirect)
   print("Potentially first-order class", "indirect" if indirect else "direct", "subclasses of", classq, label(classq), "out of", len(soi))
   for i in soi:
      table = not_first_order(i[0], labels=False)
      if isinstance(table, str):
         print(f'Cannot determine first-order status of {i[0]} {i[1]} {table}')
      elif not table:
         print(f'{i[0]:12s} {i[1]}')
   print()

def show_pfo_class_instances_iterative(classq, indirect=True):
   soi = instances(classq, indirect=indirect)
   print("Potentially first-order class ", "indirect" if indirect else "direct", " instances of", classq, label(classq), "out of", len(soi))
   for i in soi:
      table = not_first_order(i[0], labels=False)
      if isinstance(table, str):
         print(f'Cannot determine first-order status of {i[0]} {i[1]} {table}')
      elif not table:
         print(f'{i[0]:12s} {i[1]}')
   print()

def show_pso_class_subclasses_iterative(classq, indirect=True):
   soi = subclasses(classq, indirect=indirect)
   print("Potentially second-order class", "indirect" if indirect else "direct", "subclasses of", classq, label(classq), "out of", len(soi))
   for i in soi:
      table = not_second_order(i[0])
      if isinstance(table,str):
         print(f'Cannot determine second-order status of {i[0]} {i[1]} {table}')
      elif not table:
         print(f'{i[0]:12s} {i[1]}')
   print()

def show_pso_class_instances_iterative(classq, indirect=True):
   soi = instances(classq, indirect=indirect)
   print("Potentially second-order class", "indirect" if indirect else "direct", "instances of", classq, label(classq), "out of", len(soi))
   for i in soi:
      table = not_second_order(i[0])
      if isinstance(table,str):
         print(f'Cannot determine second-order status of {i[0]} {i[1]} {table}')
      elif not table:
         print(f'{i[0]:12s} {i[1]}')
   print()


def show_in_excluded_class_instances(classq, etype, esuper):
   print("Instances of", classq, label(classq), " in excluded class")
   print("Instance     Label                             Type     Label")
   query = '''SELECT DISTINCT ?class ?classLabel ?other ?otherLabel WHERE {
       ?class wdt:P279*/wdt:P31/wdt:P279* wd:''' + classq + '''.
       ?class wdt:P31/wdt:P279* ?other .
       FILTER (?other IN (''' + ', '.join(etype) + ''') )
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }
       }'''
   badis = query_table(query)
   for i in badis:
      if len(i) == 4:
         print(f'{i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}')
   print("Instance     Label                             Super     Label")
   query = '''SELECT DISTINCT ?class ?classLabel ?other ?otherLabel WHERE {
       ?class wdt:P279*/wdt:P31/wdt:P279* wd:''' + classq + '''.
       ?class wdt:P279+ ?other .
       FILTER (?other IN (''' + ', '.join(esuper) + ''') )
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }
       }'''
   badss = query_table(query)
   for i in badss:
      print(f'{i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}')


def show_in_excluded_class_subclasses(classq, etype, esuper):
   print("Subclasses of", classq, label(classq), " in excluded class")
   print("Subclass     Label                             Type     Label")
   query = '''SELECT DISTINCT ?class ?classLabel ?other ?otherLabel WHERE {
       ?class wdt:P279+ wd:''' + classq + '''.
       ?class wdt:P31/wdt:P279* ?other .
       FILTER (?other IN (''' + ', '.join(etype) + ''') )
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }
       }'''
   badis = query_table(query)
   for i in badis:
      print("I", i)
      print(f'{i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}')
   print("Subclass     Label                             Super     Label")
   query = '''SELECT DISTINCT ?class ?classLabel ?other ?otherLabel WHERE {
       ?class wdt:P279+ wd:''' + classq + '''.
       ?class wdt:P31/wdt:P279* ?other .
       FILTER (?other IN (''' + ', '.join(esuper) + ''') )
       SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }
       }'''
   badss = query_table(query)
   for i in badss:
      print("I", i)
      print(f'{i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}')


from datetime import datetime
print(' <nowiki>')
print("Report on some classes that do not match the requirements of the class from the top level of the ontology")
print("that they are related to or are suspicious in some way.")
print("Current as of", str(datetime.now()))
print()

# first-order class 
show_nfo_class_instances_iterative('Q104086571', indirect=False)
##NO SUBCLASSES show_nfo_class_instances_iterative('Q104086571')
##NO SUBCLASSES show_nso_class_subclasses('Q104086571', indirect=False)
##NO SUBCLASSES show_nso_class_subclasses('Q104086571')
##NO SUBCLASSES show_pfo_class_subclasses('Q104086571', indirect=False)
##NO SUBCLASSES show_pfo_class_subclasses('Q104086571')
##LATER show_in_excluded_class_instances('Q104086571', excluded[3:-2], excluded[6:-2])
##LATER show_in_excluded_class_subclasses('Q104086571', excluded[6:-2], excluded[7:-2])

# Wikidata instance class
show_nfo_class_instances_iterative('Q21522908', indirect=False) 
show_nfo_class_instances_iterative('Q21522908') 
show_nso_class_subclasses('Q21522908', indirect=False) 
show_nso_class_subclasses('Q21522908') 
show_pfo_class_subclasses_iterative('Q21522908', indirect=False) 
show_pfo_class_subclasses_iterative('Q21522908') 
##LATER show_in_excluded_class_instances('Q21522908', excluded[3:-2], excluded[6:-2])
##LATER show_in_excluded_class_subclasses('Q21522908', excluded[6:-2], excluded[7:-2])

# second-order class
show_nso_class_instances_iterative('Q24017414', indirect=False) 
show_nso_class_instances_iterative('Q24017414')
show_nto_class_subclasses_iterative('Q24017414', indirect=False)
show_nto_class_subclasses_iterative('Q24017414')
show_pfo_class_instances_iterative('Q24017414', indirect=False)
show_pfo_class_instances_iterative('Q24017414')
show_pso_class_subclasses_iterative('Q24017414', indirect=False)
show_pso_class_subclasses_iterative('Q24017414')
##LATER show_in_excluded_class_instances('Q21522908', excluded[6:-2], excluded[7:-2])
##LATER show_in_excluded_class_subclasses('Q21522908', excluded[7:-2], excluded[8:-2])

# third-order class
show_nto_class_instances_iterative('Q24017465', indirect=False) 
## NO SUBCLASSES show_nto_class_instances_iterative('Q24017465')
## NO SUBCLASSES
show_pso_class_instances_iterative('Q24017465', indirect=False)
## NO SUBCLASSES show_pso_class_instances_iterative('Q24017465')
## NO SUBCLASSES

print("\nThe following reports are not necessarily errors, but may show a problem with a class\n")

# metaclass
show_nso_class_instances_iterative('Q19478619', indirect=False) 
show_nso_class_instances_iterative('Q19478619') 
show_nto_class_subclasses_iterative('Q19478619', indirect=False)
show_nto_class_subclasses_iterative('Q19478619')
show_pso_class_subclasses_iterative('Q19478619', indirect=False)
##TOO LARGE show_pfo_class_instances_iterative('Q19478619')
show_pso_class_subclasses_iterative('Q19478619', indirect=False)
show_pso_class_subclasses_iterative('Q19478619')

# Wikidata metaclass
show_nso_class_instances_iterative('Q19361238', indirect=False) 
show_nso_class_instances_iterative('Q19361238') 
show_nto_class_subclasses_iterative('Q19361238', indirect=False)
show_nto_class_subclasses_iterative('Q19361238')
show_pso_class_subclasses_iterative('Q19361238', indirect=False)
show_pfo_class_instances_iterative('Q19361238')
show_pso_class_subclasses_iterative('Q19361238', indirect=False)
show_pso_class_subclasses_iterative('Q19361238')

print('')