Wikidata:Database reports/Most sitelinked items/Configuration

From Wikidata
Jump to navigation Jump to search

The most recent version of this script can be found at Github: most_sitelinks. The script is run on Toolforge in the deltabot tool account; Toolforge users usually have read-access to all scripts in that tool account.

A potentially outdated version is stored onwiki at User:DeltaBot/source/most_sitelinks in order to be permanently available and conveniently accessible; it is being displayed on this page with code formatting. Mind that the onwiki source code might have been slightly altered in order to prevent onlyinclude directives from being effective, and that the wikitext parser im some situations messes up the formatted output of the transcluding page content including the source code.

#!/usr/bin/python
# -*- coding: UTF-8 -*-
#licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0

from os.path import expanduser
from time import strftime

import mariadb
import pywikibot as pwb


HEADER = 'A list of items with the most sitelinks. Data as of <only' + 'include>{update_timestamp}</onlyinclude>.\n\n{{| class="wikitable sortable" style="width:100%; margin:auto;"\n|-\n! Item !! Sitelinks\n'
TABLE_ROW = '|-\n| {{qid}} || {cnt}\n'
FOOTER = '|}\n\n[[Category:Wikidata statistics|Most sitelinked items]] [[Category:Database reports|Most sitelinked items]]'


def make_report() -> str:
    db = mariadb.connect(
        host='wikidatawiki.analytics.db.svc.wikimedia.cloud',
        database='wikidatawiki_p',
        default_file=f'{expanduser("~")}/replica.my.cnf'
    )
    cur = db.cursor(dictionary=True)

    query = 'SELECT ips_item_id, COUNT(*) AS cnt FROM wb_items_per_site GROUP BY ips_item_id ORDER BY cnt DESC LIMIT 100'
    cur.execute(query)

    text = ''
    for row in cur:
        qid = row.get('ips_item_id')
        cnt = row.get('cnt')

        if qid is None or cnt is None:
            continue

        text += TABLE_ROW.format(qid=qid, cnt=cnt)

    cur.close()
    db.close()

    return text


def main() -> None:
    text = HEADER.format(update_timestamp=strftime('%Y-%m-%d %H:%M (%Z)')) + make_report() + FOOTER

    page = pwb.Page(pwb.Site('wikidata', 'wikidata'), 'Wikidata:Database reports/Most sitelinked items')
    page.text = text
    page.save(summary='Bot:Updating database report', minor=False)


if __name__ == '__main__':
    main()