User:Edoderoobot/Set-nl-description

From Wikidata
Jump to: navigation, search
#!/usr/local/bin/python
# -*- coding: utf-8 -*-


# (C) Edoderoo/Edoderoobot (meta.wikimedia.org), 2016
#
# Distributed under the terms of the CC-BY-SA 3.0 licence.
#

import pywikibot
#from pywikibot import pagegenerators
from pywikibot import pagegenerators as pg
#import pywikibot.data.wikidataquery as wdquery
from pywikibot.data import api
import codecs
import sys
import datetime
from datetime import datetime, date, time

#demoniem = P1549
geboren =     u'P569'
gestorven=    u'P570'
is_een=       u'P31'
gender =      u'P21'
female_gender=u'Q6581072'
male_gender=  u'Q6581097'
gelegen_in=   u'P131'
items2do = 0

lng_canbeused = ['en','de','fr','it','es','pt','ca','sv','dk','no','pl','hr','cs','si','nl','ro','sh','vi','wa','eo','simple','eu','zea','li','fy','oc','af']
default_query = 'claim[31:5] and claim[106] and claim[27] and link[nlwiki]'  # people with an occupation, a country and a page on nl-wiki
default_query = 'link[nlwiki]'   # just go along all nl-wiki articles, and see by P31 what you can do
update_label_allowed=['Q5',      #human
                      'Q3863',   #planetoide
                      'Q16521',  #taxon
                      'Q134556', #music single
                      'Q202444', #first name
                      'Q215380', #musicband
                      'Q482994', #music album
                      'Q5633421',#scientific magazine
                      ]
default_language = u'nl'
txt2skip = u'|skip!|'
output2screen = False
prelog = False   #if set to True, it will write an extra logfile containing the item before it is processed, helpful in case of an error on unknown items
skiplog= True


commit=False    #no changes online, only to the logfiles
commit=True     #write changes to the database

debugedo=True   #use one item, just to test
debugedo=False  #use wd-query, process a lot


def log_premature(itemno):
  with codecs.open("NL-omschrijving.prelog.csv","a", encoding="utf-8") as logfile:
    logfile.write('%s\n' % (itemno))
  logfile.close

def log_skipped(itemno):
  with codecs.open("NL-omschrijving.skiplog.csv","a", encoding="utf-8") as logfile:
    logfile.write('%s\n' % (itemno))
  logfile.close
  
  
def logme(verbose, formatstring, *parameters):
  with codecs.open("NL-omschrijving.log.csv", "a", encoding="utf-8") as logfile:
    formattedstring = u'%s%s' % (formatstring, '\n')
   
    try:   
      logfile.write(formattedstring % (parameters) )
    except :
      exctype, value = sys.exc_info()[:2]
      print("1) Error writing to logfile on: [%s] [%s]" % (exctype, value))
      verbose = True    #now I want to see what!   
    logfile.close()
  if verbose:
    print(formatstring % (parameters))  

def log_unknown(verbose, formatstring, *parameters):    
  with codecs.open("NL-omschrijving.missing.csv", "a", encoding="utf-8") as logfile:
    formattedstring = u'%s%s' % (formatstring, '\n')
   
    try:   
      logfile.write(formattedstring % (parameters))
    except :
      print("2) Error writing to logfile on")   
      verbose = True    #now I want to see what!
  logfile.close()
  if verbose:
    print(formatstring % (parameters))  

def get_property_id(findProperty):
 switcher={ 'date of birth' : 'P569',
            'is a' : 'P31',
            'occupation' : 'P106',
            'country of citizenship' : 'P27'
          }
 return switcher.get(findProperty,'xxx')

def nameofcitizenship(CountryName,Occupation) :
           switcher = {
             'Koninkrijk der Nederlanden'     : 'Nederlands',
             'Nederland'                      : 'Nederlands',
             'Verenigde Staten van Amerika'   : 'Amerikaans', 
             'Frankrijk'                      : 'Frans', 
             'Spanje'                         : 'Spaans' ,
             'Verenigd Koninkrijk'            : 'Brits',
             'Duitsland'                      : 'Duits',
             'Rusland'                        : 'Russisch' ,
             'Polen'                          : 'Pools',
             'Luxemburg'                      : 'Luxemburgs',
             'Duitse Democratische Republiek' : 'Oost-Duits',
             'India'                          : 'Indiaas',
             'Turkije'                        : 'Turks',
             'Finland'                        : 'Fins',
             'Volksrepubliek China'           : 'Chinees',
             'Japan'       : 'Japans',
             'Zweden'      : 'Zweeds',
             'Canada'      : 'Canadees',
             'Colombia'    : 'Colombiaans',
             'Hongarije'   : 'Hongaars',
             'Argentinië'  : 'Argentijns',
             'Noorwegen'   : 'Noors',
             'Sovjet-Unie' : 'Russisch',
             'Spanje'      : 'Spaans',
             'Portugal'    : 'Portugees',
             u'Denemarken' : 'Deens',
             '' : '',
             u'Italië' : 'Italiaans' ,
             u'België' : 'Belgisch' ,
           }
           demonym = switcher.get(CountryName,"")
           if demonym=="": 
             return Occupation+' uit '+CountryName
           else:
             return demonym+' '+Occupation

def get_description(language, wikidataitem):
  if language in wikidataitem.descriptions:
    return wikidataitem.descriptions[language]
  else:
    return('')  

def make_yeardatestr(thisdate):    
    #import pdb; pdb.set_trace();
    if not(thisdate is None): 
      if thisdate.precision<9 : return('') #not a full year specified  9=only year, 11=dd-mm-yy
      if thisdate.year<0:
          return(str(abs(thisdate.year))+'v Chr')
      else :
          return(str(thisdate.year))
    else :
      return ('')


def get_female_label_form(wditem,lng,male_form):
  female = male_form
  if ('P2521' in wditem.claims):
    for tryclaim in wditem.claims['P2521']:
      if tryclaim.target.language==lng:
        female = tryclaim.target.text
  return (female)
  
def is_female(wditem):
  if gender in wditem.claims:
    LNKgender = wditem.claims.get(gender)[0].getTarget()
    return (LNKgender.title()==female_gender)
  return(False)
  
"""
For a person, we take the occupation and country of citizenship, e.g. "association football player from Germany", 'presentator from Italy', "accountant from the United States", 'politician from Ukraine', etc
"""             

def its_a_firstname(lng,repo,wd):
  
  if ('P31' in wd.claims):
    for p31 in wd.claims['P31']:
      title=p31.getTarget().title()
      if (title=='Q3409032'): return('zowel mannelijke als vrouwelijke voornaam','') #both male/female first name
      if (title=='Q11879590'): return('vrouwelijke voornaam','')
      if (title=='Q12308941'): return('mannelijke voornaam','')       

  if ('en' in wd.descriptions):
   if (wd.descriptions['en']==u'female given name'): return('vrouwelijke voornaam','')
   if (wd.descriptions['en']==u'male given name'): return('mannelijke voornaam','')
   
  if ('de' in wd.descriptions):
   if (wd.descriptions['de']==u'weiblicher Vorname'): return('vrouwelijke voornaam','')
   if (wd.descriptions['de']==u'männlicher Vorname'): return('mannelijke voornaam','')
   
  if ('fr' in wd.descriptions): 
   if (wd.descriptions['fr'] ==u'prénom féminin'): return('vrouwelijke voornaam','')
   if (wd.descriptions['fr'] ==u'prénom masculin'): return('mannelijke voornaam','')
   if (wd.descriptions['fr'] ==u'prénom épicène'): return('zowel mannelijke als vrouwelijke voornaam','')

  if ('pl' in wd.descriptions): 
   if (wd.descriptions['pl'] ==u'imię żeńskie'): return('vrouwelijke voornaam','')
   if (wd.descriptions['pl'] ==u'imię męskie'): return('mannelijke voornaam','')
   if (wd.descriptions['pl'] ==u'xxx'): return('zowel mannelijke als vrouwelijke voornaam','')
   
  if ('it' in wd.descriptions): 
   if (wd.descriptions['it'] ==u'prenome femminile'): return('vrouwelijke voornaam','')
   if (wd.descriptions['it'] ==u'prenome maschile'): return('mannelijke voornaam','')
   if (wd.descriptions['it'] ==u'xxx'): return('zowel mannelijke als vrouwelijke voornaam','')

  if ('hy' in wd.descriptions): 
   if (wd.descriptions['hy'] ==u'իգական անձնանուն'): return('vrouwelijke voornaam','')
   if (wd.descriptions['hy'] ==u'արական անձնանուն'): return('mannelijke voornaam','')
   if (wd.descriptions['hy'] ==u'xxx'): return('zowel mannelijke als vrouwelijke voornaam','')

  if ('nb' in wd.descriptions): 
   if (wd.descriptions['nb'] ==u'kvinnenavn'): return('vrouwelijke voornaam','')
   if (wd.descriptions['nb'] ==u'mannlig fornavn'): return('mannelijke voornaam','')
   if (wd.descriptions['nb'] ==u'xxx'): return('zowel mannelijke als vrouwelijke voornaam','')
  
  return ('voornaam','')
  

  
def its_a_person(lng, repo, WDIperson):
    missing = u'' 
    if lng in WDIperson.descriptions:
       if not( (WDIperson.descriptions[lng] in ['','person','persoon']) or (WDIperson.descriptions[lng].find('n/a') > 0) or (WDIperson.descriptions[lng].find('sporter') > 0) or (WDIperson.descriptions[lng].find('(-)')>0) ):
         if not(lng in WDIperson.labels):
           return(WDIperson.descriptions[lng],'')
         else :
           return('',txt2skip) #there is already a description
    
    LNKinstance_of = WDIperson.claims.get(is_een)[0].getTarget()
    if not(LNKinstance_of is None): 
      WDIinstance =  pywikibot.ItemPage(repo,LNKinstance_of.title())
      WDIinstance.get(get_redirect=True)
      if lng in WDIinstance.labels:
        prnInstance = WDIinstance.labels[lng]
      else : 
        prnInstance = u''
        missing = WDIperson.title()
    else :
      prnInstance = u'1 n/a: '+WDIperson.title()+' '
      return('',txt2skip)

    prnOccupation = u'2 n/a '
    if 'P106' in WDIperson.claims:   #beroep
        LNKoccupation = WDIperson.claims.get('P106')[0].getTarget()
        try :
          WDIoccupation = pywikibot.ItemPage(repo,LNKoccupation.title())
          WDIoccupation.get(get_redirect=True)
          if (lng in WDIoccupation.labels) : 
            if (is_female(WDIperson)):
              prnOccupation = get_female_label_form(WDIoccupation,lng,WDIoccupation.labels[lng])
            else:
              prnOccupation = WDIoccupation.labels[lng]
          else : 
            prnOccupation = u'3 n/a'
            missing = txt2skip
        except :
          missing = WDIperson.title()
          prnOccupation = u'4 n/a'
          print("Except :-(")
    else :
      return(u'',txt2skip)

    if 'P27' in WDIperson.claims:  #country of origin
        prnCountry = u'n/a = '
        LNKcountry = WDIperson.claims.get('P27')[0].getTarget()
        try :
          WDIcountry = pywikibot.ItemPage(repo,LNKcountry.title())
          WDIcountry.get(get_redirect=True)
          if (lng in WDIcountry.labels): 
            prnCountry = WDIcountry.labels[lng]
          else : 
            missing = WDIcountry.title()
        except :
          missing = WDIperson.title()
    else :
      return('',txt2skip) 

    geboortejaarstr = '?'
    if (geboren in WDIperson.claims):
      geboortejaarstr=make_yeardatestr(WDIperson.claims.get(geboren)[0].getTarget())    
    
    if (gestorven in WDIperson.claims):
      sterfdatum = WDIperson.claims.get(gestorven)[0].getTarget()
      if not(sterfdatum is None): 
        levenjaarstr = ' ('+geboortejaarstr+'-'+make_yeardatestr(sterfdatum)+')'
      else:
        levenjaarstr = ' ('+geboortejaarstr+'-?)'
    else:
      geboortejaarstr=''
      levenjaarstr=''

    my_description = nameofcitizenship(prnCountry,prnOccupation)+levenjaarstr  
    print('%s' % my_description)
    return(my_description, missing)

    
def its_something_in_a_country(lng,repo,wdi,something):
    prnCountry = u'unknown'
    if (lng in wdi.descriptions):
      return(wdi.descriptions[lng],txt2skip)
    missing = u''
    if 'P17' in wdi.claims:  
      LNKcountry = wdi.claims.get('P17')[0].getTarget()
      try :
        WDIcountry = pywikibot.ItemPage(repo,LNKcountry.title())
        WDIcountry.get(get_redirect=True)
        if (lng in WDIcountry.labels): 
          prnCountry = WDIcountry.labels[lng]
        else : 
          prnCountry = u'n/a = '
          missing = WDIcountry.title()
      except :
        missing = wdi.title()
    elif 'P495' in wdi.claims :
      LNKcountry = wdi.claims.get('P495')[0].getTarget()
      try :
        WDIcountry = pywikibot.ItemPage(repo,LNKcountry.title())
        WDIcountry.get(get_redirect=True)
        if (lng in WDIcountry.labels): 
          prnCountry = WDIcountry.labels[lng]
        else : 
          prnCountry = u'n/a = '
          missing = WDIcountry.title()
      except :
        missing = wdi.title()
    else:
      return('' ,txt2skip) 
      
    return(something+ ' '+prnCountry,missing)      
        
def its_canton_of_France(lng,repo,wdi):
  current_desc = u''
  missing = ''
  my_desc = u'Kanton in Frankrijk'
  if (lng in wdi.descriptions):
    current_desc = wdi.descriptions[lng]
  if (current_desc==''): 
    if gelegen_in in wdi.claims:
      LNKcommunity = wdi.claims.get('P131')[0].getTarget()
      WDIcommunity = pywikibot.ItemPage(repo,LNKcommunity.title())
      WDIcommunity.get(get_redirect=True)
      if lng in WDIcommunity.labels:
        my_desc = u'Kanton in '+WDIcommunity.labels[lng]+u', Frankrijk'
    #get P131  'Kanton in P131, Frankrijk'
  else:
    missing = txt2skip  
  return(my_desc,missing)
        
def its_a_mountain(lng,repo,wdi):
    return(its_something_in_a_country(lng,repo,wdi,'berg in'))      
      
def its_disambigue(lng,repo,wdi):
  if (lng in wdi.descriptions):   #there is already a description, skip this one
    return('',txt2skip)
  return('Wikimedia-doorverwijspagina','')      #use default description

def its_a_municipality_in_country(lng,repo,wditem,countryname):
    if (lng in wditem.descriptions):
      return(wditem.descriptions[lng],txt2skip)
    if ('P131' in wditem.claims):
      LNKcommunity = wditem.claims.get('P131')[0].getTarget()
      WDIcommunity = pywikibot.ItemPage(repo,LNKcommunity.title())
      WDIcommunity.get(get_redirect=True)
      
      if (lng in WDIcommunity.labels):
        return(u'gemeente in '+WDIcommunity.labels[lng]+u', '+countryname, '')     
      else:
        return(u'gemeente in '+countryname, WDIcommunity.title())
    else:
      return(u'gemeente in '+countryname,wditem.title())
    
def its_an_episode(lng, repo, wditem):
  if lng in wditem.descriptions:
    return(wditem.descriptions[lng],txt2skip)
  if 'P179' in wditem.claims: #serie
    LNKseries = wditem.claims.get('P179')[0].getTarget()
    WDIseries = pywikibot.ItemPage(repo,LNKseries.title())
    WDIseries.get(get_redirect=True)
    
    if (lng in WDIseries.labels):
      return('aflevering van '+WDIseries.labels[lng],'')
    else :
      return('aflevering van '+WDIseries.title(), txt2skip)
  else:
    return('',txt2skip)
    
def its_a_list(lng,repo,wditem):
  if lng in wditem.descriptions:
    return(wditem.descriptions[lng],txt2skip)  
  return('Wikimedia-lijst','')
    
def its_a_town(lng,repo,wditem):
  return its_something_in_a_country(lng,repo,wditem,'stad in')  
  
def its_a_football_team(lng, repo, wditem):
  if lng in wditem.descriptions:
    return(wditem.descriptions[lng],txt2skip)  
  return(its_something_in_a_country(lng,repo,wditem,'voetbalteam uit'))  

def its_a_band(lng,repo,wditem):
    if (lng in wditem.descriptions):
         return(wditem.descriptions[lng],txt2skip)
    missing = u''
    return its_something_in_a_country(lng,repo,wditem,'muziekgroep uit')
    
  
def its_a_tvseries(lng,repo,wditem):
  if lng in wditem.descriptions:
    return(wditem.descriptions[lng],txt2skip)
  return(its_something_in_a_country(lng,repo,wditem,'televisieserie uit'))

def its_a_tvprogram(lng,repo,wditem):
  if lng in wditem.descriptions:
    return(wditem.descriptions[lng],txt2skip)
  return(its_something_in_a_country(lng,repo,wditem,'televisieprogramma uit'))

def its_a_musicalbum(lng,repo,wditem):
  if lng in wditem.descriptions:
    return(wditem.descriptions[lng],txt2skip)
  if 'P175' in wditem.claims:
    LNKartist = wditem.claims.get('P175')[0].getTarget()
    WDIartist=pywikibot.ItemPage(repo,LNKartist.title())
    WDIartist.get(get_redirect=True)
    if (lng in WDIartist.labels):
      return('muziekalbum van '+WDIartist.labels[lng],'')
  
  return('muziekalbum','')
  
  
def its_a_galaxy(lng,repo,wditem):
  return its_a_generalthing(lng,repo,wditem,'sterrenstelsel','sterrenstelsel in ','P59')
  if ('P59' in wditem.claims):
    LNKgalaxy = wditem.claims.get('P59')[0].getTarget()
    WDIgalaxy = pywikibot.ItemPage(repo,LNKgalaxy.title())
    WDIgalaxy.get(get_redirect=True)
    if lng in WDIgalaxy.labels:
        return('sterrenstelsel in '+WDIgalaxy.labels[lng],'') 
    
  return('sterrenstelsel','')
  
def its_a_book(lng,repo,wditem):
  authorname = u''
  if (lng in wditem.descriptions):
    if (wditem.descriptions[lng] != u'boek'):
      return('',txt2skip)
  return its_a_generalthing(lng,repo,wditem,'boek','boek van ','P50')

def its_a_single(lng,repo,wditem):
  artistname = u''
  if (lng in wditem.descriptions):
    if(wditem.descriptions[lng] not in [u'single','']):
      return('',txt2skip)
  return its_a_generalthing(lng,repo,wditem,'single','single van ','P175')

def its_a_carmodel(lng,repo,wditem):  #automodel van P176
  manufacturer = u''
  if (lng in wditem.descriptions):
    if(wditem.descriptions[lng] not in [u'automodel','']):
      return('',txt2skip)
  return its_a_generalthing(lng,repo,wditem,'automodel','automodel van ','P176')
      
def its_a_generalthing(lng,repo,wditem,shortstr,longdescrstr,myclaim):
  claimstr=u''
  if (myclaim in wditem.claims):
    LNKitem=wditem.claims.get(myclaim)[0].getTarget()
    if not (LNKitem is None):
      WDIitem = pywikibot.ItemPage(repo,LNKitem.title())
      WDIitem.get(get_redirect=True)
      if lng in WDIitem.labels:
        claimstr=WDIitem.labels[lng]
      else:
        for trylng in lng_canbeused:
          if (claimstr=='') and (trylng in WDIitem.labels):
             claimstr = WDIitem.labels[trylng]          
  if claimstr=='':
    return(shortstr,'')
  else:
    return(longdescrstr+claimstr,'')
     
def its_a_discography(lng,repo,wditem):
  if 'P175' in wditem.claims:
    artistLNK = wditem.claims.get('P175')[0].getTarget()
    if not(artistLNK is None):
      wdArtist=pywikibot.ItemPage(repo,artistLNK.title())
      wdArtist.get(get_redirect=True)
      if lng in wdArtist.labels:
        return ('discografie van '+wdArtist.labels[lng],'')
      else:
        for trylng in lng_canbeused:
          if trylng in wdArtist.labels:
            return('discografie van '+wdArtist.labels[trylng],'')
  return('discografie','')
     
def its_a_taxon(lng,repo,wditem):
  """
  read P171/mother taxon until taxo-rang/P105 is <Q19970288/no value> -> that mother taxon is the first part (insect/)
  """    
  if (lng in wditem.descriptions):
    return(wditem.descriptions[lng],'')
  return(u'taxon',u'')
  
def action_one_item(repo, wditem, lng):
    global output2screen
    global items2do
    
    items_found=0
    missing = u''
    my_description=u''
    orig_desc = get_description(lng,wditem)
    en_description = u''
    en_description = get_description('en',wditem)
    type_of_item=u''
    placefound=''

    
    items2do -= 1
    
    str1 = '{:>10d}'.format(items2do)
    str2 = '{:>10}'.format(wditem.title())
    
    sys.stdout.write("\r%s%s" % (str1, str2))
                                    
    if (is_een in wditem.claims):
      type_id = wditem.claims.get(is_een)[0].getTarget()
      if type_id != None:
        type_of_item = type_id.title()  #we only use the first one, and assume the first one is most relevant
    if (type_of_item!='') :  
      if (False):
        pass #just to have the same structure below
      elif (type_of_item == 'Q5'): #and (my_description==''):
        my_description,missing = its_a_person(lng, repo, wditem)
        placefound='person'
      elif (orig_desc!=''):
        pass #skip if something already filled in. All below items will never replace an existing description
      elif type_of_item=='Q4167410': #disambiguation-page
        if (orig_desc.lower() in ['','dp','doorverwijzing','doorverwijspagina']):
          my_description,missing = its_disambigue(lng,repo,wditem)
        placefound='disamb'
      elif type_of_item=='Q8502': #a mountain
        my_description,missing = its_a_mountain(lng,repo,wditem)
        placefound='mountain'
      elif type_of_item=='Q16521': #it is a taxon
        if (orig_desc in ['','']): #old one is blank
          my_description,missing=its_a_taxon(lng,repo,wditem) #fix it to "taxon"
        placefound='taxon'
      elif type_of_item=='Q577': #jaar
        my_description='jaar'
        placefound='jaar'
      elif (type_of_item=='Q515') or (type_of_item=='Q5119') or (type_of_item=='Q1549591') or (type_of_item=='Q3957') or (type_of_item=='Q486972'): #stad
        my_description,missing = its_a_town(lng,repo,wditem)
        placefound='town'
      elif type_of_item=='Q747074': #Italian communiity
        my_description,missing = its_a_municipality_in_country(lng,repo,wditem,u'Italië')
        placefound='IT-gemeente'
      elif type_of_item=='Q484170': #Franse gemeente 
        my_description,missing = its_a_municipality_in_country(lng,repo,wditem,u'Frankrijk')
        placefound='FR-gemeente'
      elif type_of_item=='Q262166' or (type_of_item=='Q22865' ): #Duitse gemeente
        my_description,missing = its_a_municipality_in_country(lng,repo,wditem,u'Duitsland')
        placefound='DE-gemeente'
      elif type_of_item=='Q13406463': #wikimedia-lijst
        my_description,missing = its_a_list(lng,repo,wditem)
        placefound='lijst'
      elif type_of_item=='Q476028': #voetbalteam
        my_description,missing = its_a_football_team(lng,repo,wditem)
        placefound='voetbalteam'
      elif type_of_item=='Q11173':
        if (orig_desc in ['','']):               #was blank, it should be, but better double check
          my_description='chemische stof' #then use default description
        placefound='chemische stof'
      elif type_of_item=='Q5398426': #tv_series
        my_description,missing=its_a_tvseries(lng,repo,wditem)
        placefound='tvserie'
      elif type_of_item=='Q1983062': #aflevering/episode
        my_description,missing=its_an_episode(lng,repo,wditem)
        placefound='episode'
      elif type_of_item=='Q21191270': #aflevering/episode van tv-serie
        my_description,missing=its_an_episode(lng,repo,wditem)
        placefound='tv-episode'
      elif type_of_item=='Q184188': #Frans kanton
        my_description,missing=its_canton_of_France(lng,repo,wditem)
        placefound='Frans kanton'
      elif type_of_item=='Q318': #sterrenstelsel
        my_description,missing=its_a_galaxy(lng,repo,wditem)      
        placefound='galaxy'
      elif type_of_item=='Q215380': #muziekband
        my_description,missing = its_a_band(lng,repo,wditem)
        placefound='band'
      elif type_of_item=='Q39367': #hondenras
        my_description=u'hondenras'
        placefound='hondenras'
      elif type_of_item=='Q34770': #taal
         my_description='taal'
         placefound='taal'
      elif type_of_item=='Q482994': #muziekalbum
        if (orig_desc.lower() in ['','muziekalbum','album','cd']):
          my_description,missing=its_a_musicalbum(lng,repo,wditem)
          placefound='muziekalbum'
      elif type_of_item=='Q11266439':
        if (orig_desc.lower() in ['','template','sjabloon']):
          my_description='Wikimedia-sjabloon'
          placefound='template'
      elif type_of_item=='Q310890':  #monotypiscal taxon
        if (orig_desc in ['','']):  
          my_description = 'monotypische taxon'
          placefound='x'
      elif type_of_item=='Q877358': #resolution of the UN
        if (orig_desc in ['','']):
          my_description='resolutie van de Veiligheidsraad van de Verenigde Naties'
          placefound='VN-resolutie'
      elif type_of_item=='Q14752149':  #amateur football club
        if (orig_desc in ['','']):
          my_description,missing=its_something_in_a_country(lng,repo,wditem,'amateurvoetbalclub uit')
          placefound='amateurvoetbalclub'
      elif type_of_item=='Q15416':   #television program
        if (orig_desc in ['','']):
          my_description,missing = its_a_tvprogram(lng,repo,wditem)
          placefound='televisieprogramma'
      elif type_of_item=='Q43229': #organisation
        if (orig_desc in ['','']):      
          my_description,missing = its_something_in_a_country(lng,repo,wditem,'organisatie uit')
          placefound='organisatie'
      elif type_of_item=='Q728937': #railway line
        if (orig_desc in ['','']):      
          my_description,missing=its_something_in_a_country(lng,repo,wditem,'spoorlijn in')
          placefound='railwayline'
      elif type_of_item=='Q7278': #political party
        if (orig_desc in ['','']):      
          my_description,missing=its_something_in_a_country(lng,repo,wditem,'politieke partij uit')
          placefound='political party'
      elif ((type_of_item=='Q783794') or (type_of_item=='Q4830453')):
        pass
        placefound='bedrijf'
      elif type_of_item=='Q11424': #film uit P495 (P577)
        pass
        placefound='film'
      elif type_of_item=='Q18340514': #gebeurtenis in jaar
        pass
        placefound='gebeurtenis'
      elif type_of_item=='Q1539532':
        pass
        placefound='sportseizoen'

      elif type_of_item=='Q3231690': #automodel van P176
        if (orig_desc in ['','']):      
          my_description,missing = its_a_carmodel(lng,repo,wditem)
          placefound='automodel'
      elif type_of_item=='Q3192808': #commune in Madagascar
        if (orig_desc in ['','']):      
          my_description='commune in Madagascar'
          placefound='madagascar'
      elif type_of_item=='Q532':  #dorp in P17
        if (orig_desc.lower in ['dorp','']):      
          my_description,missing = its_something_in_a_country(lng,repo,wditem,'dorp in ')
          placefound='dorp'
      elif type_of_item=='Q1092563': #periodiek in het genre P136
        if (orig_desc in ['periodiek','']):      
          my_description,missing = its_a_generalthing(lng,repo,wditem,'periodiek','periodiek in het genre ','P136')
          placefound='periodiek'
      elif type_of_item=='Q18536594': #sportevenement op de Olympische Spelen
        if (orig_desc in ['','']):      
          my_description = 'sportevenement op de Olympische Spelen'
          placefound='OS-event'
      elif type_of_item=='Q7889':  #computerspel  genre=P136   ontwikkelaar=P178  uitgeverij=P123
        if (orig_desc in ['','']):      
          pass
          placefound='computerspel'
      elif type_of_item=='Q659103':  #gemeente in P131, Roemenië
        if (orig_desc in ['','']):      
          pass
          placefound='Roemenie'
      elif type_of_item=='Q46970': #luchtvaartmaatschappij uit P17
        if (orig_desc in ['luchtvaartmaatschappij','']):      
          my_description,missing = its_something_in_a_country(lng,repo,wditem,'luchtvaartmaatschappij uit')
          placefound='airliner'
      elif type_of_item=='Q15081032': #historisch motorfietsmerk
        if (orig_desc in ['','']):       
          pass
          placefound='motorfiets'
      elif type_of_item=='Q178561': #veldslag in #P17
        if (orig_desc in ['','']):      
          pass
          placefound='veldslag'
      elif type_of_item=='Q618779': #onderscheiding
        if (orig_desc in ['','']):      
          pass
          placefound='awardy'
      elif type_of_item=='Q106259': #polder in P131, P17
        if (orig_desc in ['','']):      
          pass
          placefound='polder'
      elif type_of_item=='Q18127': #platenlabel uit P17
        if (orig_desc in ['','']):      
          pass
          placefound='recordlabel'
      elif type_of_item=='Q3184121': #gemeente in Brazilië
        if (orig_desc in ['','']):      
          my_description = u'gemeente in Brazilië'
          placefound='Brazil'
      elif type_of_item=='Q2635894':  #hoorspel uit de serie P179/uit P495
        if (orig_desc in ['','']):      
          pass
          placefound='hoorspel'
      elif type_of_item=='Q523':  #ster uit het sterrenbeeld P59
        if (orig_desc in ['','']):      
          pass
          placefound='ster'
      elif type_of_item=='Q197': #vliegtuig van P176
        if (orig_desc in ['','']):      
          pass
          placefound='plane'
      elif type_of_item=='Q847017': #sportvereniging uit P17
        if (orig_desc in ['','']):      
          pass
          placefound='x'
      elif type_of_item=='Q2590631': #gemeente in Hongarije
        if (orig_desc in ['','']):      
          my_description = u'gemeente in Hongarije'
          placefound='hungary'
      elif type_of_item=='Q3024240': #historisch land in P30
        if (orig_desc in ['','']):      
          my_description = u'historisch land'
          placefound='histland'
      elif type_of_item=='Q41710': #etniciteit
        if (orig_desc in ['','']):      
          my_description = u'etnische groep'
          placefound='etnic'
      elif type_of_item=='Q11446':  #schip
        if (orig_desc in ['','']):      
          my_description = u'schip'
          placefound='schip'
      elif type_of_item=='Q180684': #conflict
        if (orig_desc in ['','']):      
          pass
          placefound='conflict'
      elif type_of_item=='Q5153359': #cz gemeente
        if (orig_desc in ['','']):      
          my_description = u'gemeente in Tsjechië'
          placefound='CZ-gemeente'
      elif type_of_item=='Q1131296': #portugese bestuurslaag, fregusia
        if (orig_desc in ['','']):      
          my_description = u'freguesia in Portugal'
          placefound='fregusia'
      elif type_of_item=='Q123705':  #wijk in P131, P17
        if (orig_desc in ['','']):      
          pass
          placefound='wijk'
      elif type_of_item=='Q14659': #heraldisch wapen uit P17
        if (orig_desc in ['','']):      
          pass
          placefound='heraldiek'
      elif type_of_item=='Q2831984': #stripalbum uit de serie P179
        if (orig_desc in ['','']):      
          pass
          placefound='strip'
      elif type_of_item=='Q207628': #compositie van P86
        if (orig_desc in ['','']):      
          pass
          placefound='compositie'
      elif type_of_item=='Q95074':  #personage uit P1080
        if (orig_desc in ['','']):      
          pass
          placefound='personage'
      elif type_of_item=='Q42032':  #ccTLD top level domain van P17
        if (orig_desc in ['','']):      
          pass
          placefound='ccTLD'
      elif type_of_item=='Q106658':  #Landkreis in P131
        if (orig_desc in ['','']):      
          pass
          placefound='landkreis'
      elif type_of_item=='Q2912397': #eendagswielerwedstrijd in P17
        if (orig_desc in ['','']):      
          my_description,missing = its_something_in_a_country(lng,repo,wditem,'eendaagse wielerwedstrijd in ')
          placefound='1dagswielerkoers'
      elif type_of_item=='Q571': #boek van P50
        if (orig_desc in ['','boek','book']):      
          my_description,missing = its_a_book(lng,repo,wditem)
          placefound='boek'
      elif type_of_item=='Q134556':  #single van P175
        if (orig_desc in ['','single']):      
          its_a_single(lng,repo,wditem)
          placefound='single'
      elif type_of_item=='Q355304':
        if (orig_desc in ['','watergang']):      
          my_description,missing = its_something_in_a_country(lng,repo,wditem,u'watergang in ')
          placefound='watergang'
      elif type_of_item=='Q34763':
        if (orig_desc in ['schiereiland','']):      
          my_description,missing = its_something_in_a_country(lng,repo,wditem,u'schiereiland in ')
          placefound='schiereiland'
      elif type_of_item=='Q23442':
        if (orig_desc in ['eiland','']):      
          my_description,missing = its_something_in_a_country(lng,repo,wditem,u'eiland in ')
          placefound='eiland'
      elif type_of_item=='Q165':
        if (orig_desc in ['zee','']):      
          my_description,missing = its_something_in_a_country(lng,repo,wditem,u'zee in ')
          placefound='zee'
      elif type_of_item=='Q814648':
        if (orig_desc.lower() in ['parochie','']):      
          my_description,missing = its_something_in_a_country(lng,repo,wditem,u'parochie in ')
          placefound='parochie'
      elif type_of_item=='Q273057': #discografie
        if (orig_desc in ['','discografie']):      
          my_description,missing = its_a_discography(lng,repo,wditem)
          placefound='discografie'
      elif type_of_item=='Q3966183': #pokemon
        if (orig_desc in [u'Pokemonwezen',u'Pokémon-wezen','Pokemon',u'Pokémon','']):      
          my_description = u'Pokémonwezen'
          missing = ''
          placefound='Pokemonwezen'
      elif type_of_item=='Q5633421':
        if (orig_desc in ['tijdschrift','']):      
          my_description = u'wetenschappelijk tijdschrift'  #van P123
          missing = ''
          placefound='wetenschappelijk tijdschrift'
      elif type_of_item=='Q202444':
        if (orig_desc in ['voornaam','']):      
          my_description,missing=its_a_firstname(lng,repo,wditem)
          placefound='voornaam'
      elif type_of_item=='Q4167836':
        if (orig_desc in ['','categorie','Categorie','category']):      
          my_description='Wikimedia-categorie'
          missing=''
          placefound='categorie'
      elif type_of_item=='Q16970': #kerkgebouw
        if (orig_desc in ['kerkgebouw','']):
          my_description = 'kerkgebouw'
          placefound='kerkgebouw'
      elif type_of_item=='Q3863': #planetoide
        if (orig_desc in ['']):
          my_description=u'planetoïde'
          placefound='planetoide'
      elif type_of_item=='Q':
        if (orig_desc in ['','']):      
          pass
          placefound='x'
      elif my_description=='':
        #log_unknown(False,'%s|%s|%s',wditem.title(),type_of_item,en_description)
        placefound='unknown: %s' %type_of_item

      newlabel = u''
      if (my_description!='') and (missing!=txt2skip) or not(lng in wditem.labels):  
        try :
          data = {}
          if not (lng in wditem.labels):
            if (type_of_item in update_label_allowed):
              if (lng in wditem.sitelinks):  #use link-name of same lng as label 
                newlabel=wditem.sitelinks[lng+u'wiki']
                data.update({'labels': { lng: newlabel }})
              else:
                for trylng in lng_canbeused:
                  if trylng in wditem.labels: #same lng is not there, use en-wiki instead
                    mylabel = wditem.labels[trylng]
                    if (',' in mylabel):
                      pass #don't use this label, there are unacceptable characters in the label
                    elif ('(' in mylabel):
                      mylabel = mylabel[0:mylabel.index('(')]
                    data.update({'labels': { lng: mylabel }})
                    newlabel=mylabel #to put in logfile
                    break  #found, leave for-loop
              
          if (my_description!='') and (missing!=txt2skip):
            data.update( {'descriptions': {lng:my_description}} )
          if commit:  
             wditem.editEntity(data,summary=u'nl-description, [[User:Edoderoobot/Set-nl-description|python code]], logfile on https://goo .gl/BezTim')
             logme(False, '%s|%s|%s|%s|%s|%s|%s',datetime.now().strftime("%Y-%b-%d/%H:%M:%S"),wditem.title(),lng,newlabel,orig_desc,my_description,placefound)
          else:
             print('No commit, item not changed: %s' % wditem.title())          
        except ValueError:
          logme(False, "ValueError occured on %s",wditem.title())
        except :
          logme(False, "Undefined error occured on %s-[%s]",wditem.title(),missing)
        else :
          pass #print("Else:")

        items_found += 1
        
    return items_found
    




def testrun():        
 global output2screen
 output2screen = True
 site = pywikibot.Site()
 repo = site.data_repository()


 item2get = 'Q2251998' #Sadet Karabulut
 item2get = 'Q6184'
 item2get = 'Q565062' #Anne Bous, birthdate & date of death : only year is specified
 item2get = 'Q23433337' #Chris Niehuis
 item2get = 'Q26261265'
 item2get='Q20431412123123' #redirected page
        
 print('a)')        
 x = pywikibot.ItemPage(repo, item2get)
 if (x==None):
    print('b)')
 else:
    print('c)')
 if not x.exists():
   print('1) bestaat niet')
 else:
   print('2) bestaat dus wel?') 
 x.get(get_redirect=True) 
 if x.exists():
   a,b=its_a_firstname('nl',repo,x)
 else:
   print('Does NOT exist!') 
   a=b=0
 print(a)
 print(b)
 print (action_one_item(repo,x,'nl'))
 
 """
 #claim = x.claims.get('P2046')# [0].getTarget()
 #claim = x.claims#.get('P1082')# [0].getTarget()
 
 for findsources in x.claims['P2044'][0].sources:
   for onesource in findsources:
      for xxx in findsources[onesource]:
         print ("%s - %s" % (onesource, dir(xxx)))
   print("================================")
 
 for ref in x.getReferences():  #alt-shift-J / what-links-here
  print(ref)
  print("================================")
 """

def wd_all_items():
  #stoprange= 
  startrange=
  stoprange =26990215
  #startrange=21100000
  #stoprange =21000000
  #startrange=26695727
  #stoprange =26693144
  
  repo=pywikibot.Site('wikidata','wikidata').data_repository()
  for itemno in range(startrange,stoprange,-1):
    wd=pywikibot.ItemPage(repo,'Q%d' % itemno)
    if not wd.isRedirectPage():
     if wd.exists():
      wd.get(get_redirect=True)
      yield wd
    itemno -= 1
 
def wd_sparql_query(spq):
  wikidatasite=pywikibot.Site('wikidata','wikidata') 
  generator=pg.WikidataSPARQLPageGenerator(spq,site=wikidatasite)
  for wd in generator:
    wd.get(get_redirect=True)
    yield wd
  
def wd_from_file():
  repo=pywikibot.Site('wikidata','wikidata').data_repository()
  csvfile=open('query.csv','r')
  for alllines in csvfile:
    qitem=alllines[alllines.find('Q'):alllines.find(',')]
    if (len(qitem)>0):
      wditem=pywikibot.ItemPage(repo,qitem)
      wditem.get(get_redirect=True)
      yield wditem

def wd_user_edits(username,ucsite,totaledits):
  repo=pywikibot.Site('wikidata','wikidata').data_repository()
  useredits=pg.UserContributionsGenerator(username,site=ucsite,total=totaledits,namespaces=[0])      
  for oneedit in useredits:
    oneedit.get(get_redirect=True)
    if (oneedit.exists()):
      wd=pywikibot.ItemPage(repo,oneedit.title())
      wd.get(get_redirect=True,pimpampet=False)
      if (wd.exists()):
        yield wd
  
def sparql_nodescription(sparql):
  return 'select distinct ?item where {{%s}filter (!bound(?itemDescription))}' % sparql  
 
def main(debug=False):
    print ("main")
    
    """
    query = default_query #later, I want to manage this with params
    query = u'(claim[31:747074] or claim[31:484170] or claim[31:262166]) and link[nlwiki]'
    query = u'claim[31:16521] and link[nlwiki]'
    query = u'claim[31:13406463] and link[nlwiki]'
    query = u'claim[31:4167410] and link[nlwiki]'
    query = u'claim[31:476028] and link[nlwiki]'
    query = u'claim[31:5398426] and link[nlwiki]'

    query = u'claim[31:5] and link[nlwiki]'
    query = u'claim[31:318]' #alle sterrenstelsels   
    query = u'claim[31:5] and claim[106] and claim[27] and link[frwiki]'

    query = u'claim[31:215380] and (claim[495] or claim[17] or claim[27])'  #muziekgroep / land van herkomst
    query = u'claim[31:5] and claim[106] and claim[21:6581072]' #x
    query = u'claim[31:5] or noclaim[31:5]'

    query = u'link[zeawiki]'
    query = u'claim[31:16521]' #taxon
    query = u'claim[31:486972] and claim[17]'
    
    query = u'claim[31:5] and claim[27]' #persoon uit een land
    query = u'claim[31:273057]' #discography
    query = u'claim[31:5633421]' #wetenschappelijk tijdschrift
    

    query = u'claim[31:21191270] and claim[179]' #episode in a series
    sparql_query = u'SELECT ?item WHERE {   ?item wdt:P31 wd:Q21191270 .   ?item wdt:P179 ?dummy0 . }'
    sparql_query = u'select ?item where{{select ?item ?itemLabel ?itemDescription WHERE {   ?item wdt:P31 wd:Q21191270 .   ?item wdt:P179 ?dummy0 . {service wikibase:label{bd:serviceParam wikibase:language "nl" . }}}} filter (!bound(?itemDescription))}'
    
    
    sparql_query=u'SELECT ?item WHERE { ?item wdt:P31 wd:Q5 . ?item wdt:P106 ?dummy0 . ?wiki0 <http://schema.org/about> ?item . ?wiki0 <http://schema.org/isPartOf> <https://nl.wikipedia.org/> {service wikibase:label{bd:serviceParam wikibase:language "nl" . }}}'  #claim[31:5] and claim[106] and link[nlwiki]
    #sparql_query=sparql_nodescription(sparql_query)
    sparql_query=u'SELECT ?item WHERE { ?item wdt:P31 wd:Q5 . ?item wdt:P106 ?dummy0 . ?wiki0 <http://schema.org/about> ?item . ?wiki0 <http://schema.org/isPartOf> <https://nl.wikipedia.org/> }'  #claim[31:5] and claim[106] and link[nlwiki]

    query = u'claim[31:4167836]'  #categorie
    sparql_query = u'select * {{SELECT ?item ?itemDescription WHERE {{ ?item wdt:P31 wd:Q4167836 }  service wikibase:label{bd:serviceParam wikibase:language "nl" . }  }}}'

    """
    query = u'link[nlwiki]'  
    sparql_query = u'SELECT * {{SELECT ?item WHERE { ?wiki0 <http://schema.org/about> ?item . ?wiki0 <http://schema.org/isPartOf> <https://nl.wikipedia.org/> {service wikibase:label{bd:serviceParam wikibase:language "nl" . }}}} filter (!bound(?itemDescription))}   '
    sparql_query = u'SELECT * {{SELECT ?item WHERE { ?wiki0 <http://schema.org/about> ?item . ?wiki0 <http://schema.org/isPartOf> <https://nl.wikipedia.org/> }} }   '
    """    
    query = u'claim[31:8502] and claim[17]'
    sparql_query=u'select * where {{ SELECT ?item  WHERE { ?item wdt:P31 wd:Q8502 . ?item wdt:P17 ?dummy0 {service wikibase:label{bd:serviceParam wikibase:language "nl" . }}}} filter (!bound(?itemDescription))}'
    """
    
    sparql_query = u'SELECT ?item WHERE {{SELECT ?item WHERE {hint:Query hint:optimizer "None" .{SELECT ?item WHERE {?item wdt:P31 wd:Q4167836 .} LIMIT 275000}OPTIONAL { ?item schema:description ?itemDescription  }filter (!bound(?itemDescription)) }} SERVICE wikibase:label {  bd:serviceParam wikibase:language "nl" .  }}'
    sparql_query=sparql_nodescription('select ?item where {?item wdt:P31 wd:Q5633421. OPTIONAL { ?item schema:description ?itemDescription  } }')
    sparql_query='select ?item where {?item wdt:P31 wd:Q202444 }'
    sparql_query='select ?item where {?item wdt:P31 wd:Q5633421 }'
    sparql_query='select ?item where {?item wdt:P31 wd:Q3863}'    
    
    site=pywikibot.Site('wikidata','wikidata')
    repo=site.data_repository()
    
    items_processed=0

    lng = default_language
    if debug: print("main-1")
    pigenerator = wd_from_file()
    pigenerator = wd_sparql_query(sparql_query)
    pigenerator = wd_all_items()
    pigenerator = wd_user_edits('Andre Engels',site,10000)
    pigenerator = wd_user_edits('GerardM',site,10000)
    pigenerator = wd_user_edits('Sjoerddebruin',site,10000)
    pigenerator = wd_user_edits('Mbch331',site,10000)
    pigenerator = wd_user_edits('Edoderoo',site,10000)
    pigenerator = wd_all_items()
    print('Start')
    for wd in pigenerator:
      if prelog: log_premature(wd.title())            
      thisone = action_one_item(repo,wd,'nl')
      items_processed += thisone
      #if (thisone==0) and (skiplog): log_skipped(wd.title())
      #if (items_processed>1): break
    print('Klaar: %d' % items_processed)


    
try:    
  if(debugedo):
    print("debug mode: start")
    testrun()
  else : 
    main()
finally:
  pywikibot.stopme()