Wikidata:Database reports/Unauthorized bots/Configuration

From Wikidata
Jump to navigation Jump to search

The most recent version of this script can be found at Github: unbot. The script is run on Toolforge in the deltabot tool account; Toolforge users usually have read-access to all scripts in that tool account.

A potentially outdated version is stored onwiki at User:DeltaBot/source/unbot in order to be permanently available and conveniently accessible; it is being displayed on this page with code formatting. Mind that the onwiki source code might have been slightly altered in order to prevent onlyinclude directives from being effective, and that the wikitext parser im some situations messes up the formatted output of the transcluding page content including the source code.

#!/usr/bin/python
# -*- coding: UTF-8 -*-
#licensed under CC-Zero: https://creativecommons.org/publicdomain/zero/1.0

from datetime import date, timedelta
from os.path import expanduser
from time import strftime

import mariadb
import pywikibot


HEADER = 'A list of active bots during the last month without bot flag. Update: <only' + 'include>{update_timestamp}</onlyinclude>\n\n{{| class="wikitable sortable" style="width:100%%; margin:auto;"\n|-\n! User !! Edits¹\n'
TABLE_ROW = '|-\n| {{user_name}} || {cnt}\n'
FOOTER = '|}\n\n¹edits in namespace 0 during the last 30 days\n[[Category:Database reports]]'

USER_NAME_WHITELIST = [
    'Paucabot',
    'Reubot',
]


def make_report() -> str:
    db = mariadb.connect(
        host='wikidatawiki.analytics.db.svc.wikimedia.cloud',
        database='wikidatawiki_p',
        default_file=f'{expanduser("~")}/replica.my.cnf',
    )
    cursor = db.cursor(dictionary=True)

    query = """SELECT
    CONVERT(actor_name USING utf8) AS user_name,
    COUNT(*) AS cnt
FROM
    recentchanges
        JOIN actor_recentchanges ON rc_actor=actor_id
WHERE
    rc_bot=0
    AND rc_namespace=0
    AND rc_timestamp>%(rc_timestamp)s
    AND (actor_name LIKE "%bot" OR actor_name LIKE "%Bot")
GROUP BY
    actor_name
HAVING
    actor_name NOT IN (
        SELECT
            user_name
        FROM
            user
                JOIN user_groups ON user_id=ug_user
        WHERE
            ug_group="bot"
    )"""

    params = { 'rc_timestamp' : (date.today()-timedelta(days=30)).strftime('%Y%m%d000000')}

    cursor.execute(query, params)
    text = ''
    for row in cursor:
        user_name = row.get('user_name')
        cnt = row.get('cnt')

        if user_name is None or cnt is None:
            continue

        if user_name in USER_NAME_WHITELIST:
            continue

        text += TABLE_ROW.format(user_name=user_name, cnt=cnt)

    return text


def main() -> None:
    text = HEADER.format(update_timestamp=strftime('%Y-%m-%d %H:%M (%Z)')) + make_report() + FOOTER

    page = pywikibot.Page(pywikibot.Site('wikidata','wikidata'), 'Wikidata:Database reports/Unauthorized bots')
    page.text = text
    page.save(summary='Bot:Updating database report', minor=False)


if __name__=='__main__':
    main()