User:AkkakkBot/code/07-fix-language-mappings

From Wikidata
Jump to navigation Jump to search

python code:

        site = pywikibot.Site("wikidata", "wikidata")
        repo = site.data_repository()
        db = MySQLdb.connect(host="wikidatawiki.labsdb", db="wikidatawiki_p", read_default_file="~/replica.my.cnf")

        editmax = 1000
        replace_languages = {"als":"gsw", "de-formal":"de", "crh":"crh-latn", "no":"nb", "simple":"en", "bat-smg":"sgs", "be-x-old":"be-tarask", "fiu-vro":"vro", "roa-rup":"rup", "zh-classical":"lzh", "zh-min-nan":"nan", "zh-yue":"yue"}
        log = ""
        exit_reason = "end of data"
        summary_max_len = 100

        #get items from file
        print("get items")
        sys.stdout.flush()
        items = []
        cur = db.cursor()
        cur.execute('select term_entity_id from wb_terms where (term_language = "als" or term_language = "de-formal" or term_language = "crh" or term_language = "no" or term_language = "bat-smg" or term_language = "be-x-old" or term_language = "fiu-vro" or term_language = "roa-rup" or term_language = "zh-classical" or term_language = "zh-min-nan" or term_language = "zh-yue" or term_language = "simple") limit 50000;')

        lines = cur.fetchall()
        for line in lines:
            for cell in line:
                items.append("Q{}".format(cell))
        
        
        #iterate items
        print("iterate items")
        editcnt = 0
        try:
            for i, q in enumerate(items):
                if(os.path.isfile("bot-07-fix-language-mappings.stop")):
                    print("stop file")
                    exit_reason = "stop file"
                    break

                item = pywikibot.ItemPage(repo, q)
                try:
                    content = item.get()
                    descriptions = content['descriptions']
                    labels = content['labels']
                    aliases = content['aliases']
                    #labels
                    print("checking {} labels:".format(q)),
                    sys.stdout.flush()
                    changed = False
                    summary_text = ""
                    for lang_source in replace_languages:
                        lang_target = replace_languages[lang_source]
                        if lang_source in labels:
                            if lang_target in labels:
                                if labels[lang_source] == labels[lang_target]:
                                    print("  remove redundant "+lang_source+" label"),
                                    summary_text += "- remove redundant "+lang_source+" label"
                                    labels[lang_source] = ""
                                    if(not changed):
                                        changed = True
                                        log += "* [["+q+"]]: "
                                    log += "- remove redundant "+lang_source+" label "
                                else:
                                    print("- "+lang_source+"/"+lang_target+"-labels differ!"),
                            else:
                                print("- "+lang_source+" lab to "+lang_target),
                                labels[lang_target] = labels[lang_source]
                                labels[lang_source] = ""
                                if(not changed):
                                    changed = True
                                    log += "* [["+q+"]]: "
                                log += "- rename "+lang_source+" label to "+lang_target+" "
                                summary_text += "- rename "+lang_source+" label to "+lang_target
                    if(changed):
                        editcnt += 1
                        print("- edit {}...".format(editcnt))
                        log += "\n"
                        summary_text += " (task 7)"
                        if(len(summary_text) > summary_max_len):
                            summary_text = "fix language mappings in labels (task 7)"
                        item.editLabels(summary=summary_text, labels=labels)
                        if(editcnt >= editmax):
                            print("maximum number of edits reached")
                            exit_reason = "maximum number of edits reached"
                            break 
                    else:
                        print("")
                    #descriptions
                    print("checking {} descriptions:".format(q)),
                    sys.stdout.flush()
                    changed = False
                    summary_text = ""
                    for lang_source in replace_languages:
                        lang_target = replace_languages[lang_source]
                        if lang_source in descriptions:
                            if lang_target in descriptions:
                                if descriptions[lang_source] == descriptions[lang_target]:
                                    print("  remove redundant "+lang_source+" description"),
                                    summary_text += "- remove redundant "+lang_source+" description"
                                    descriptions[lang_source] = ""
                                    if(not changed):
                                        changed = True
                                        log += "* [["+q+"]]: "
                                    log += "- remove redundant "+lang_source+" description "
                                else:
                                    print("- "+lang_source+"/"+lang_target+"-descriptions differ!"),
                            else:
                                print("- "+lang_source+" desc to "+lang_target),
                                descriptions[lang_target] = descriptions[lang_source]
                                descriptions[lang_source] = ""
                                if(not changed):
                                    changed = True
                                    log += "* [["+q+"]]: "
                                log += "- rename "+lang_source+" description to "+lang_target+" "
                                summary_text += "- rename "+lang_source+" description to "+lang_target
                    if(changed):
                        editcnt += 1
                        print("- edit {}...".format(editcnt))
                        log += "\n"
                        summary_text += " (task 7)"
                        if(len(summary_text) > summary_max_len):
                            summary_text = "fix language mappings in descriptions (task 7)"
                        item.editDescriptions(summary=summary_text, descriptions=descriptions)
                        if(editcnt >= editmax):
                            print("maximum number of edits reached")
                            exit_reason = "maximum number of edits reached"
                            break 
                    else:
                        print("")
                    #aliases
                    print("checking {} aliases:".format(q)),
                    sys.stdout.flush()
                    changed = False
                    summary_text = ""
                    for lang_source in replace_languages:
                        lang_target = replace_languages[lang_source]
                        if lang_source in aliases:
                            if lang_target in aliases:
                                if aliases_equals(aliases[lang_source], aliases[lang_target]):
                                    print("- remove redundant "+lang_source+" aliases"),
                                    aliases[lang_source] = [""]
                                    if(not changed):
                                        changed = True
                                        log += "* [["+q+"]]: "
                                    log += "- remove redundant "+lang_source+" aliases"
                                    summary_text += "- remove redundant "+lang_source+" aliases"
                                else:
                                    print("- "+lang_source+"/"+lang_target+"-aliases differ!"),
                                    if(not changed):
                                        changed = True
                                        log += "* [["+q+"]]: "
                                    for alias in aliases[lang_source]:
                                        if not alias in aliases[lang_target]:
                                            print("- adding alias "),
                                            log += "- adding "+lang_target+" alias "
                                            aliases[lang_target].append(alias)
                                    summary_text += "- fixing "+lang_source+"/"+lang_target+" aliases"
                                    log += "- removing "+lang_source+" alias(es)"
                                    aliases[lang_source] = [""]
                            else:
                                print("- rename "+lang_source+" aliases to "+lang_target),
                                aliases[lang_target] = aliases[lang_source]
                                aliases[lang_source] = [""]
                                if(not changed):
                                    changed = True
                                    log += "* [["+q+"]]: "
                                #log += "- copy "+lang_source+" aliases to "+lang_target+" "
                                #summary_text += "- copy "+lang_source+" aliases to "+lang_target
                                log += "- rename "+lang_source+" aliases to "+lang_target+" "
                                summary_text += "- rename "+lang_source+" aliases to "+lang_target
                    if(changed):
                        editcnt += 1
                        print("- edit {}...".format(editcnt))
                        log += "\n"
                        summary_text += " (task 7)"
                        if(len(summary_text) > summary_max_len):
                            summary_text = "fix language mappings in descriptions (task 7)"
                        item.editAliases(summary=summary_text, aliases=aliases)
                        if(editcnt >= editmax):
                            print("maximum number of edits reached")
                            exit_reason = "maximum number of edits reached"
                            break 
                    else:
                        print("")
                except pywikibot.exceptions.NoPage:
                    print("  item does not exist")
                except pywikibot.data.api.APIError:
                    print("  api error. trying to continue.")
                except UnicodeEncodeError:
                    print("  UnicodeEncodeError.why?")
                except TypeError:
                    print("  TypeError")
                except ValueError:
                    print("  ValueError")
        except Exception as exc:
            print("exception")
            traceback.print_exc()
            exit_reason = "exception"
                     
        if log != "":
            log += "exit reason:"+exit_reason
            pageobj = pywikibot.Page(site, u"User:AkkakkBot/log")
            pageobj.put(log, u"log for task 7: fix language mappings", minorEdit = False)
        print("end of script")
        return exit_reason

def aliases_equals(old, new):
    old.sort
    new.sort
    return old == new