Wikidata:Database reports/Complex constraint violations/Configuration

From Wikidata
Jump to navigation Jump to search
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0

import pywikibot
import requests
import json
import mwparserfromhell as mwparser
import time
import sys
import re

site = pywikibot.Site('wikidata', 'wikidata')
repo = site.data_repository()

template = 'Complex constraint'

blacklist = ['Q4115189', 'Q13406268', 'Q15397819', 'Q16943273', 'Q17339402']

all = []


def dictify(t):
    data = {}
    for param in t.params:
        data[str(param.name).strip().lower()] = str(param.value).strip()
    return data


def writeOverview():
    row = u'{{{{TR complex constraint|p={property}\n|label={label}\n|description={description}\n|violations={violations}\n}}}}\n'
    text = u'{{/header|'+time.strftime('%Y-%m-%d')+'}}\n\n'
    for m in all:
        text += row.format(**m)
    text += u'{{/footer}}\n[[Category:Database reports|Complex Constraints]]'
    page = pywikibot.Page(site, 'Wikidata:Database reports/Complex constraints')
    page.put(text, comment='upd', minorEdit=False)


def writeText(onePdata, property):
    text = u'{{Complex constraint violations report|date='+time.strftime('%Y-%m-%d %H:%M (%Z)')+'}}\n'
    for m in onePdata:
        text += '== '
        text += m['label']
        text += ' ==\n'
        if m['description']:
            text += m['description']+'\n\n'
        if m['violations'] == 0:
            text += 'no results or query error\n\n'
        else:
            text += 'violations count: '+str(m['violations'])+'\n\n'
            if m['violations'] > 5000:
                m['result'] = m['result'][:5000]
            for i in sorted(m['result'], key = lambda x: (int(re.split('(\d+)', x)[1]))):
                text += i+'\n'

    page = pywikibot.Page(site, 'Wikidata:Database reports/Complex constraint violations/'+property)
    page.put(text, comment='upd', minorEdit=False)


def proceedOne(sparql):
    result = []
    try:
        url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
        payload = {
            'query': sparql,
            'format': 'json'
        }
        r = requests.get(url, params=payload)
        data = r.json()
        for m in data['results']['bindings']:
            line = ''
            val = m['item']['value'].replace('http://www.wikidata.org/entity/', '')
            if val in blacklist:
                continue
            if val[0] == 'P':
                line = '*{{P|'
            else:
                line = '*{{Q|'
            line += val+'}}'
            varcnt = 0
            for var in data['head']['vars']:
                if var != 'item':
                    val = m[var]['value'].replace('T00:00:00Z', '')
                    if 'http://www.wikidata.org/entity/P' in val:
                        val = '{{P|'+val.replace('http://www.wikidata.org/entity/', '')+'}}'
                    elif 'http://www.wikidata.org/entity/Q' in val:
                        val = '{{Q|'+val.replace('http://www.wikidata.org/entity/', '')+'}}'
                    if varcnt == 0:
                        line += ': '
                    else:
                        line += ', '
                    varcnt += 1
                    line += val
            result.append(line)
    except:
        pass
    return result


def onePropertyReport(page):
    onePdata = []
    code = mwparser.parse(page.get())
    property = page.title().split(':')
    for t in code.filter_templates():
        if t.name.strip() == template:
            data = dictify(t)
            data['property'] = property[1]
            data['sparql'] = data['sparql'].replace('{{!!}}', '||')
            if not data['label'] or not data['sparql']:
                continue
            if data['label'] == '' or data['sparql'] == '':
                continue
            data['result'] = proceedOne(data['sparql'])
            data['violations'] = len(data['result'])
            onePdata.append(data)
            all.append(data)
    writeText(onePdata, property[1])


def main():
    if sys.argv[1] == 'all':
        templatepage = pywikibot.Page(site, 'Template:'+template)
        gen = templatepage.getReferences(onlyTemplateInclusion=True, namespaces=[1, 121], content=True)

        for page in gen:
            try:
                onePropertyReport(page)
            except:
                pass
        writeOverview()
    else:
        if sys.argv[1][0] == 'P':
            page = pywikibot.Page(site, 'Property_talk:'+sys.argv[1])
        else:
            page = pywikibot.Page(site, 'Talk:'+sys.argv[1])
        onePropertyReport(page)


if __name__ == "__main__":
    main()