User:Peter F. Patel-Schneider/fixed-level-program
Jump to navigation
Jump to search
#!/bin/python import csv import subprocess import re # class first-order c W instance c metaclass Wikidata meta second-order third-order fourth-order fifth-order higher-order fixed-order variable-ord excluded = ['wd:Q16889133', 'wd:Q104086571', 'wd:Q21522908', 'wd:Q19478619', 'wd:Q19361238', 'wd:Q24017414', 'wd:Q24017465', 'wd:Q24027474', 'wd:Q24027515', 'wd:Q24027526', 'wd:Q23959932', 'wd:Q23958852'] exclude = '(' + ', '.join(excluded) + ')' def tablefy(output): table = [x.split('\t') for x in output.split('\n') if x.strip()] try: for row in table[1:]: for i in range(0,len(row),2): row[i] = re.search(r"[LQ][0-9]+",row[i]).group(0) for i in range(1,len(row),2): row[i] = re.sub(r".@en$",'',row[i])[1:] return table[1:] except Exception as e: # print("Query result cannot be tablefied", e, re.search(r'[a-zA-Z]*Exception',output).group(0)) try: return re.search(r'[-a-zA-Z]*Exception',output).group(0) except Exception as e: return "Unexpected exception " + str(e) + " when trying to extract from " + output def label(classq): query = '''SELECT DISTINCT ?classLabel WHERE { SERVICE wikibase:label { bd:serviceParam wikibase:language "en,de,fr,it". } BIND ( wd:''' + classq + ''' AS ?class ) ?class ?p ?v . }''' result = subprocess.check_output(['/usr/bin/curl','--http1.1','-s','-H','Accept: text/tab-separated-values','-G','https://query.wikidata.org/sparql','--data-urlencode','query=' + query], text=True) return result.replace('?classLabel\n', '').strip() def query_table(query): try: result = subprocess.check_output(['/usr/bin/curl','--http1.1','-s','-H','Accept: text/tab-separated-values','-G','https://query.wikidata.org/sparql','--data-urlencode','query=' + query], text=True) except Exception as e: print("CURL returned error - retrying") try: result = subprocess.check_output(['/usr/bin/curl','--http1.1','-s','-H','Accept: text/tab-separated-values','-G','https://query.wikidata.org/sparql','--data-urlencode','query=' + query], text=True) except Exception as e: result = "CURL-error-return-Exception" return tablefy(result) def instances(classq, indirect=True): clss = 'wd:' + classq query = '''SELECT DISTINCT ?class ?classLabel ?sub ?subLabel WHERE { ''' + ( '?sub wdt:P279 ' + clss + '.' if indirect else 'BIND (' + clss + ' AS ?sub)' ) + ''' ?class wdt:P31''' + ('/wdt:P279*' if indirect else '') + ''' ?sub . FILTER (?class NOT IN ''' + exclude + ''') SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". } } ORDER BY ?sub''' return query_table(query) def subclasses(classq, indirect=True): query = '''SELECT DISTINCT ?class ?classLabel WHERE { ?class wdt:P279''' + ('+' if indirect else '') + ''' wd:''' + classq + ''' . FILTER (?class NOT IN ''' + exclude + ''') SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". } }''' return query_table(query) def class_instances(classq, indirect=True, labels=True): query = '''SELECT DISTINCT ?class ?classLabel WHERE { ?class wdt:P31''' + ('/wdt:P279*' if indirect else '') + ''' wd:''' + classq + ''' . { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} ''' + ( 'SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '' ) + ''' }''' return query_table(query) def non_first_order_class_instances(classq, indirect=True, labels=True): query = '''SELECT DISTINCT ?nfo ?nfoLabel WHERE { ?nfo wdt:P31''' + ('/wdt:P279*' if indirect else '') + ''' wd:''' + classq + ''' . ?class wdt:P31/wdt:P279* ?nfo . { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} ''' + ('SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '') + ''' }''' return query_table(query) def potentially_first_order_class_instances(classq, indirect=True, labels=True): query = '''SELECT DISTINCT ?pfo ?pfoLabel WHERE { ?pfo wdt:P31''' + ('/wdt:P269*' if indirect else '') +++ ''' wd:''' + classq + ''' . FILTER NOT EXISTS { ?class wdt:P31/wdt:P279* ?pfo . { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} } ''' + ('SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '') + ''' }''' return query_table(query) def potentially_first_order_class_subclasses(classq, indirect=True, labels=True): query = '''SELECT ?pfo ?pfoLabel WHERE { ?pfo wdt:P279''' + ('+' if indirect else '') + ''' wd:''' + classq + ''' . FILTER NOT EXISTS { ?class wdt:P31/wdt:P279* ?pfo . { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} } ''' + ('SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '') + ''' }''' return query_table(query) def not_first_order(classq, labels=True): query = '''SELECT DISTINCT ?class ?classLabel WHERE { ?class wdt:P31/wdt:P279* wd:''' + classq + ''' . { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} ''' + ('SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". }' if labels else '') + ''' }''' return query_table(query) def not_second_order(classq): query = '''SELECT DISTINCT ?class WHERE { ?class wdt:P31/wdt:P279*/wdt:P31/wdt:P279* wd:''' + classq + ''' . { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} }''' return query_table(query) def non_second_order_class_subclasses(classq, indirect=True): query = '''SELECT DISTINCT ?nso ?nsoLabel WHERE { ?nso wdt:P279''' + ('+' if indirect else '') + ''' wd:''' + classq + ''' . ?nfo wdt:P31/wdt:P279* ?nso . ?class wdt:P31/wdt:P279* ?nfo . { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". } } ORDER BY ?nso''' return query_table(query) def non_second_order_class_instances(classq, indirect=True): query = '''SELECT DISTINCT ?nso ?nsoLabel ?nfo ?nfoLabel WHERE { ?nso wdt:P31''' + ('/wdt:P279*' if indirect else '') + ''' wd:''' + classq + ''' . ?nfo wdt:P31/wdt:P279 ?nso . ?class wdt:P31/wdt:P279 ?nfo . { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". } } ORDER BY ?nso''' result = subprocess.check_output(['/usr/bin/curl','--http1.1','-s','-H','Accept: text/tab-separated-values','-G','https://query.wikidata.org/sparql','--data-urlencode','query=' + query], text=True) return query_table(query) def non_third_order_class_subclasses(classq, indirect=True): query = '''SELECT DISTINCT ?nto ?ntoLabel WHERE { ?nto wdt:P279''' + ('+' if indirect else '') + ''' wd:''' + classq + ''' . ?nso wdt:P31/wdt:P279* ?nto . ?nfo wdt:P31/wdt:P279* ?nso . ?class wdt:P31/wdt:P279* ?nfo . { ?class wdt:P279 ?x . } UNION { ?x wdt:P279 ?class . } UNION { ?s wdt:P31 ?class . } UNION { ?class wdt:P31/wdt:P279* wdt:Q16889133 .} SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". } } ORDER BY ?nto''' return query_table(query) def show_class_instances(classq, indirect=True): ci = class_instances(classq, indirect=indirect) print("Class instances of", classq, label(classq), len(ci)) for i in ci: print(f'{i[0]} {i[1]}') print() def show_nfo_class_instances(classq, indirect=True): ci = non_first_order_class_instances(classq, indirect=indirect) if isinstance(ci,str): print("Cannot retrieve non-first-order-class instances of", classq, label(classq)) else: print("Non-first-order-class", "indirect" if indirect else "direct", "instances of", classq, label(classq), len(ci)) for i in ci: print(f'{i[0]} {i[1]}') print() def show_nfo_class_instances_iterative(classq, indirect=True): soi = instances(classq, indirect=indirect) print("Non-first-order-class", "indirect" if indirect else "direct", "instances of", classq, label(classq), "out of", len(soi)) print("Defects Example Instance Label Subclass Label") for i in soi: table = class_instances(i[0], indirect=True, labels=False) if isinstance(table, str): print(f'Cannot retrieve class instances of {i[0]} {i[1]} {table}') elif table: print(f'{len(table)} {table[0][0]:12s} {i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}') print() def show_nso_class_subclasses(classq, indirect=True): ci = non_second_order_class_subclasses(classq, indirect=indirect) print("Non-second-order-class", 'all' if indirect else 'direct', "subclasses of", classq, label(classq), len(ci)) for i in ci: print(' '.join(i)) print() def show_nso_class_instances(classq, indirect=True): ci = non_second_order_class_instances(classq, indirect=indirect) if ci is not False: print("Non-second-order-class instances of", classq, label(classq), len(ci)) for i in ci: print(' '.join(i)) else: print("Cannot determine non-second-order-class instances of", classq, label(classq)) print() def show_nso_class_instances_iterative(classq, indirect=True): soi = instances(classq, indirect=indirect) print("Non-second-order-class", 'indirect' if indirect else 'direct', "instances of", classq, label(classq), "out of", len(soi)) print("Defects Example Instance Label Subclass Label") for i in soi: table = non_first_order_class_instances(i[0], indirect=True, labels=False) if isinstance(table, str): print(f'Cannot retrieve non-first-order instances of {i[0]} {i[1]} {table}') table = non_first_order_class_instances(i[0], indirect=False, labels=False) if isinstance(table, str): print(f'Cannot retrieve non-first-order direct instances of {i[0]} {i[1]} {table}') elif table: print(f'{len(table)} {table[0][0]:12s} {i[0]:12s} {i[1]} (direct instances only)') elif table: print(f'{len(table)} {table[0][0]:12s} {i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}') print() def show_nto_class_subclasses_iterative(classq, indirect=True): soi = subclasses(classq, indirect) print('Non-third-order-class ' + ('indirect ' if indirect else 'direct ') + 'subclasses of', classq, label(classq), "out of", len(soi)) print("Defects Example Instance Label") for i in soi: table = non_second_order_class_instances(i[0], indirect=indirect) if isinstance(table, str): print(f'Cannot retrieve non-second-order instances of {i[0]} {i[1]} {table}') elif table: print(f'{len(table)} {table[0][0]:12s} {i[0]:12s} {i[1]:30s}') print() def show_nto_class_instances_iterative(classq, indirect=True): soi = instances(classq, indirect=indirect) print('Non-third-order-class instances of', classq, label(classq), "out of", len(soi)) print("Defects Example Instance Label Subclass Label") for i in soi: table = non_second_order_class_instances(i[0], indirect=indirect) if isinstance(table, str): print(f'Cannot retrieve non-second-order instances of {i[0]} {i[1]} {table}') elif table: print(f'{len(table)} {table[0][0]:12s} {i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}') print() def show_pfo_class_subclasses(classq, indirect=True): print("Potentially first-order class", "indirect" if indirect else "direct", "subclasses of", classq, label(classq), end=' ') ci = potentially_first_order_class_subclasses(classq, indirect=indirect) print(len(ci)) for i in ci: print(' '.join(i)) print() def show_pfo_class_subclasses_iterative(classq, indirect=True): soi = subclasses(classq, indirect=indirect) print("Potentially first-order class", "indirect" if indirect else "direct", "subclasses of", classq, label(classq), "out of", len(soi)) for i in soi: table = not_first_order(i[0], labels=False) if isinstance(table, str): print(f'Cannot determine first-order status of {i[0]} {i[1]} {table}') elif not table: print(f'{i[0]:12s} {i[1]}') print() def show_pfo_class_instances_iterative(classq, indirect=True): soi = instances(classq, indirect=indirect) print("Potentially first-order class ", "indirect" if indirect else "direct", " instances of", classq, label(classq), "out of", len(soi)) for i in soi: table = not_first_order(i[0], labels=False) if isinstance(table, str): print(f'Cannot determine first-order status of {i[0]} {i[1]} {table}') elif not table: print(f'{i[0]:12s} {i[1]}') print() def show_pso_class_subclasses_iterative(classq, indirect=True): soi = subclasses(classq, indirect=indirect) print("Potentially second-order class", "indirect" if indirect else "direct", "subclasses of", classq, label(classq), "out of", len(soi)) for i in soi: table = not_second_order(i[0]) if isinstance(table,str): print(f'Cannot determine second-order status of {i[0]} {i[1]} {table}') elif not table: print(f'{i[0]:12s} {i[1]}') print() def show_pso_class_instances_iterative(classq, indirect=True): soi = instances(classq, indirect=indirect) print("Potentially second-order class", "indirect" if indirect else "direct", "instances of", classq, label(classq), "out of", len(soi)) for i in soi: table = not_second_order(i[0]) if isinstance(table,str): print(f'Cannot determine second-order status of {i[0]} {i[1]} {table}') elif not table: print(f'{i[0]:12s} {i[1]}') print() def show_in_excluded_class_instances(classq, etype, esuper): print("Instances of", classq, label(classq), " in excluded class") print("Instance Label Type Label") query = '''SELECT DISTINCT ?class ?classLabel ?other ?otherLabel WHERE { ?class wdt:P279*/wdt:P31/wdt:P279* wd:''' + classq + '''. ?class wdt:P31/wdt:P279* ?other . FILTER (?other IN (''' + ', '.join(etype) + ''') ) SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". } }''' badis = query_table(query) for i in badis: if len(i) == 4: print(f'{i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}') print("Instance Label Super Label") query = '''SELECT DISTINCT ?class ?classLabel ?other ?otherLabel WHERE { ?class wdt:P279*/wdt:P31/wdt:P279* wd:''' + classq + '''. ?class wdt:P279+ ?other . FILTER (?other IN (''' + ', '.join(esuper) + ''') ) SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". } }''' badss = query_table(query) for i in badss: print(f'{i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}') def show_in_excluded_class_subclasses(classq, etype, esuper): print("Subclasses of", classq, label(classq), " in excluded class") print("Subclass Label Type Label") query = '''SELECT DISTINCT ?class ?classLabel ?other ?otherLabel WHERE { ?class wdt:P279+ wd:''' + classq + '''. ?class wdt:P31/wdt:P279* ?other . FILTER (?other IN (''' + ', '.join(etype) + ''') ) SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". } }''' badis = query_table(query) for i in badis: print("I", i) print(f'{i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}') print("Subclass Label Super Label") query = '''SELECT DISTINCT ?class ?classLabel ?other ?otherLabel WHERE { ?class wdt:P279+ wd:''' + classq + '''. ?class wdt:P31/wdt:P279* ?other . FILTER (?other IN (''' + ', '.join(esuper) + ''') ) SERVICE wikibase:label { bd:serviceParam wikibase:language "en,fr,de". } }''' badss = query_table(query) for i in badss: print("I", i) print(f'{i[0]:12s} {i[1]:30s} {i[2]:12s} {i[3]}') from datetime import datetime print(' <nowiki>') print("Report on some classes that do not match the requirements of the class from the top level of the ontology") print("that they are related to or are suspicious in some way.") print("Current as of", str(datetime.now())) print() # first-order class show_nfo_class_instances_iterative('Q104086571', indirect=False) ##NO SUBCLASSES show_nfo_class_instances_iterative('Q104086571') ##NO SUBCLASSES show_nso_class_subclasses('Q104086571', indirect=False) ##NO SUBCLASSES show_nso_class_subclasses('Q104086571') ##NO SUBCLASSES show_pfo_class_subclasses('Q104086571', indirect=False) ##NO SUBCLASSES show_pfo_class_subclasses('Q104086571') ##LATER show_in_excluded_class_instances('Q104086571', excluded[3:-2], excluded[6:-2]) ##LATER show_in_excluded_class_subclasses('Q104086571', excluded[6:-2], excluded[7:-2]) # Wikidata instance class show_nfo_class_instances_iterative('Q21522908', indirect=False) show_nfo_class_instances_iterative('Q21522908') show_nso_class_subclasses('Q21522908', indirect=False) show_nso_class_subclasses('Q21522908') show_pfo_class_subclasses_iterative('Q21522908', indirect=False) show_pfo_class_subclasses_iterative('Q21522908') ##LATER show_in_excluded_class_instances('Q21522908', excluded[3:-2], excluded[6:-2]) ##LATER show_in_excluded_class_subclasses('Q21522908', excluded[6:-2], excluded[7:-2]) # second-order class show_nso_class_instances_iterative('Q24017414', indirect=False) show_nso_class_instances_iterative('Q24017414') show_nto_class_subclasses_iterative('Q24017414', indirect=False) show_nto_class_subclasses_iterative('Q24017414') show_pfo_class_instances_iterative('Q24017414', indirect=False) show_pfo_class_instances_iterative('Q24017414') show_pso_class_subclasses_iterative('Q24017414', indirect=False) show_pso_class_subclasses_iterative('Q24017414') ##LATER show_in_excluded_class_instances('Q21522908', excluded[6:-2], excluded[7:-2]) ##LATER show_in_excluded_class_subclasses('Q21522908', excluded[7:-2], excluded[8:-2]) # third-order class show_nto_class_instances_iterative('Q24017465', indirect=False) ## NO SUBCLASSES show_nto_class_instances_iterative('Q24017465') ## NO SUBCLASSES show_pso_class_instances_iterative('Q24017465', indirect=False) ## NO SUBCLASSES show_pso_class_instances_iterative('Q24017465') ## NO SUBCLASSES print("\nThe following reports are not necessarily errors, but may show a problem with a class\n") # metaclass show_nso_class_instances_iterative('Q19478619', indirect=False) show_nso_class_instances_iterative('Q19478619') show_nto_class_subclasses_iterative('Q19478619', indirect=False) show_nto_class_subclasses_iterative('Q19478619') show_pso_class_subclasses_iterative('Q19478619', indirect=False) ##TOO LARGE show_pfo_class_instances_iterative('Q19478619') show_pso_class_subclasses_iterative('Q19478619', indirect=False) show_pso_class_subclasses_iterative('Q19478619') # Wikidata metaclass show_nso_class_instances_iterative('Q19361238', indirect=False) show_nso_class_instances_iterative('Q19361238') show_nto_class_subclasses_iterative('Q19361238', indirect=False) show_nto_class_subclasses_iterative('Q19361238') show_pso_class_subclasses_iterative('Q19361238', indirect=False) show_pfo_class_instances_iterative('Q19361238') show_pso_class_subclasses_iterative('Q19361238', indirect=False) show_pso_class_subclasses_iterative('Q19361238') print('')