Wikidata:Database reports/Complex constraint violations/Configuration
Jump to navigation
Jump to search
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0
import pywikibot
import requests
import json
import mwparserfromhell as mwparser
import time
import sys
import re
site = pywikibot.Site('wikidata', 'wikidata')
repo = site.data_repository()
template = 'Complex constraint'
blacklist = ['Q4115189', 'Q13406268', 'Q15397819', 'Q16943273', 'Q17339402']
all = []
def dictify(t):
data = {}
for param in t.params:
data[str(param.name).strip().lower()] = str(param.value).strip()
return data
def writeOverview():
row = u'{{{{TR complex constraint|p={property}\n|label={label}\n|description={description}\n|violations={violations}\n}}}}\n'
text = u'{{/header|'+time.strftime('%Y-%m-%d')+'}}\n\n'
for m in all:
text += row.format(**m)
text += u'{{/footer}}\n[[Category:Database reports|Complex Constraints]]'
page = pywikibot.Page(site, 'Wikidata:Database reports/Complex constraints')
page.put(text, comment='upd', minorEdit=False)
def writeText(onePdata, property):
text = u'{{Complex constraint violations report|date='+time.strftime('%Y-%m-%d %H:%M (%Z)')+'}}\n'
for m in onePdata:
text += '== '
text += m['label']
text += ' ==\n'
if m['description']:
text += m['description']+'\n\n'
if m['violations'] == 0:
text += 'no results or query error\n\n'
else:
text += 'violations count: '+str(m['violations'])+'\n\n'
if m['violations'] > 5000:
m['result'] = m['result'][:5000]
for i in sorted(m['result'], key = lambda x: (int(re.split('(\d+)', x)[1]))):
text += i+'\n'
page = pywikibot.Page(site, 'Wikidata:Database reports/Complex constraint violations/'+property)
page.put(text, comment='upd', minorEdit=False)
def proceedOne(sparql):
result = []
try:
url = 'https://query.wikidata.org/bigdata/namespace/wdq/sparql'
payload = {
'query': sparql,
'format': 'json'
}
r = requests.get(url, params=payload)
data = r.json()
for m in data['results']['bindings']:
line = ''
val = m['item']['value'].replace('http://www.wikidata.org/entity/', '')
if val in blacklist:
continue
if val[0] == 'P':
line = '*{{P|'
else:
line = '*{{Q|'
line += val+'}}'
varcnt = 0
for var in data['head']['vars']:
if var != 'item':
val = m[var]['value'].replace('T00:00:00Z', '')
if 'http://www.wikidata.org/entity/P' in val:
val = '{{P|'+val.replace('http://www.wikidata.org/entity/', '')+'}}'
elif 'http://www.wikidata.org/entity/Q' in val:
val = '{{Q|'+val.replace('http://www.wikidata.org/entity/', '')+'}}'
if varcnt == 0:
line += ': '
else:
line += ', '
varcnt += 1
line += val
result.append(line)
except:
pass
return result
def onePropertyReport(page):
onePdata = []
code = mwparser.parse(page.get())
property = page.title().split(':')
for t in code.filter_templates():
if t.name.strip() == template:
data = dictify(t)
data['property'] = property[1]
data['sparql'] = data['sparql'].replace('{{!!}}', '||')
if not data['label'] or not data['sparql']:
continue
if data['label'] == '' or data['sparql'] == '':
continue
data['result'] = proceedOne(data['sparql'])
data['violations'] = len(data['result'])
onePdata.append(data)
all.append(data)
writeText(onePdata, property[1])
def main():
if sys.argv[1] == 'all':
templatepage = pywikibot.Page(site, 'Template:'+template)
gen = templatepage.getReferences(onlyTemplateInclusion=True, namespaces=[1, 121], content=True)
for page in gen:
try:
onePropertyReport(page)
except:
pass
writeOverview()
else:
if sys.argv[1][0] == 'P':
page = pywikibot.Page(site, 'Property_talk:'+sys.argv[1])
else:
page = pywikibot.Page(site, 'Talk:'+sys.argv[1])
onePropertyReport(page)
if __name__ == "__main__":
main()