Wikipedysta:JarektBot/Dodaj Szablon Kontrola autorytatywna.py

#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
Script for maintenance and automatic improvements of people categories on Wikimedia Commons (called "source"):
* Script goes through all sub categories of provided category that also belong to [[:Category:People by name]]
* read dates of birth and death from categories
* if no interwiki than scan through list of "target" wikis and use commons name to see if there exist article with the same name
** if an article is found than we read dates of birth and death from categories
** if name and at least one date matches than we consider it a match and add interwiki links to commons category
** if name matches and the article has a "Commons category" link than we consider it a match and add interwiki links to commons category
* if there exist interwiki links than see if we can copy from other wikipedia:
** date or birth
** date of death
** DEFAULTSORT
** Authority control template

Script was also adapted for adding {{Kontrola autorytatywna}} templates to Wikipedia articles.  

"""
#
# (C) Jarekt, 2011
#
# Distributed under the terms of the MIT license.
#

import sys, os.path, glob, string, codecs, add_text, time
import wikipedia as pywikibot
import config, catlib, time, re, pagegenerators

# === Definitions ======================================================
interwiki_sites = ['en', 'de', 'fr', 'nl', 'es', 'it', 'pt', 'sv'] # 'pl',

birth_category = {
   u'commons:commons': u'\[\[[Cc]ategory:(\d\d\d\d?) births[\]\|]',
   u'wikipedia:de'   : u'\[\[[Kk]ategorie:Geboren (\d\d\d\d?)[\]\|]',
   u'wikipedia:en'   : u'\[\[[Cc]ategory:(\d\d\d\d?) births[\]\|]',
   u'wikipedia:es'   : u'\[\[[Cc]ategoría:Nacidos en (\d\d\d\d?)[\]\|]',
   u'wikipedia:fr'   : u'\[\[[Cc]atégorie:Naissance en (\d\d\d\d?)[\]\|]',
   u'wikipedia:it'   : u'\|AnnoNascita *= *(\d\d\d\d?)',
   u'wikipedia:pl'   : u'\[\[[Kk]ategoria:Urodzeni w (\d\d\d\d?)[\]\|]',
   u'wikipedia:pt'   : u'\[\[[Cc]ategoria:Nascidos em (\d\d\d\d?)[\]\|]',
   u'wikipedia:sv'   : u'\[\[[Kk]ategori:Födda (\d\d\d\d?)[\]\|]',
   u'wikipedia:ru'   : u'\[\[Категория:Родившиеся в (\d\d\d\d?) году[\]\|]',
   u'wikipedia:zh'   : u'\[\[[Cc]ategory:(\d\d\d\d?)年出生[\]\|]',
   u'wikipedia:ja'   : u'\[\[[Cc]ategory:(\d\d\d\d?)年生[\]\|]',
}

death_category = {
   u'commons:commons': u'\[\[[Cc]ategory:(\d\d\d\d?) deaths[\]\|]',
   u'wikipedia:de'   : u'\[\[[Kk]ategorie:Gestorben (\d\d\d\d?)[\]\|]',
   u'wikipedia:en'   : u'\[\[[Cc]ategory:(\d\d\d\d?) deaths[\]\|]',
   u'wikipedia:es'   : u'\[\[[Cc]ategoría:Fallecidos en (\d\d\d\d?)[\]\|]',
   u'wikipedia:fr'   : u'\[\[[Cc]atégorie:Décès en (\d\d\d\d?)[\]\|]',
   u'wikipedia:it'   : u'\|AnnoMorte *= *(\d\d\d\d?)',
   u'wikipedia:pl'   : u'\[\[[Kk]ategoria:Zmarli w (\d\d\d\d?)[\]\|]',
   u'wikipedia:pt'   : u'\[\[[Cc]ategoria:Mortos em (\d\d\d\d?)[\]\|]',
   u'wikipedia:sv'   : u'\[\[[Kk]ategori:Avlidna (\d\d\d\d?)[\]\|]',
   u'wikipedia:ru'   : u'\[\[Категория:Умершие в (\d\d\d\d?) году[\]\|]',
   u'wikipedia:zh'   : u'\[\[[Cc]ategory:(\d\d\d\d?)年逝世[\]\|]',
   u'wikipedia:ja'   : u'\[\[[Cc]ategory:(\d\d\d\d?)年没[\]\|]',
}

DEFAULTSORT = {
   'commons:commons': u'DEFAULTSORT',
   'wikipedia:de'   : u'SORTIERUNG',
   'wikipedia:en'   : u'DEFAULTSORT',
   'wikipedia:es'   : u'DEFAULTSORT',
   'wikipedia:fr'   : u'DEFAULTSORT',
   'wikipedia:it'   : u'DEFAULTSORT',
   'wikipedia:pl'   : u'DEFAULTSORT',
   'wikipedia:pt'   : u'DEFAULTSORT',
   'wikipedia:sv'   : u'STANDARDSORTERING',
}

authority_control_sites = ['en', 'de', 'fr'] #, 'es', 'fr', 'pt', 'ru']
authority_control_fields =['VIAF', 'LCCN', 'GND', 'SELIBR', 'SUDOC', 'ULAN', 'BNF', 'TYP']
authority_control_template = {
   'commons:commons': u'Authority control',
   'wikipedia:en'   : u'Authority control',
   'wikipedia:de'   : u'Normdaten',
   'wikipedia:ru'   : u'Библиоинформация',
   'wikipedia:pl'   : u'Kontrola autorytatywna',
   'wikipedia:fr'   : u'Autorité',
   'wikipedia:es'   : u'Normdaten',
   'wikipedia:pt'   : u'Normdaten',
   'wikipedia:it'   : u'Controllo di autorità',
}
# ================================================   
def year_find(pattern, text):
  output = '0'
  m=re.search(pattern, text) 
  if m!=None:
    output = m.group(1).strip()
  return int(output)

# ================================================   
class PeoplePageBot:

  def __init__(self, generator, always=False):
    self.generator    = generator
    self.always       = False  # always save without asking: should be false
    self.dry_run      = False  # run the code without saving?
    self.iEditCount   = 0      # initialize edit counter
    self.maxEditCount = -50     # stop after number of edits (used only if >0)
    self.d_year       = 1;

  def run(self):
    skip = False
    for page in self.generator:
      try:
        if 'Memmed Emin Resulzade' in page.title():
          skip=False;
        if not skip:  
          #self.commons_page_maitenance(page)
          self.wikipedia_page_maitenance(page)
      except:
        pywikibot.output(u"   Error: skip %s" % page.title(asLink=True))
        time.sleep(60)


  def load(self, page):
      """
      Loads the given page, does some changes, and saves it.
      """
      if page.isRedirectPage():
        page = page.getRedirectTarget()
        pywikibot.output(u"   Redirect -> %s" % page.title(asLink=True))
      if '#' in page.title():
        return None, page
      try:
        # Load the page
        text = page.get()
      except pywikibot.NoPage:
        pywikibot.output(u"   Page %s does not exist; skipping."
                           % page.title(asLink=True))
      except pywikibot.IsRedirectPage:
        pywikibot.output(u"   Page %s is a redirect; skipping."
                           % page.title(asLink=True))
      else:
        return text, page
      return None, page
    
  # === Save page ======================================================================
  def save(self, text, page, comment, minorEdit=False, botflag=True):
      # only save if something was changed
      if text != page.get():
          # Show the title of the page we're working on.
          # Highlight the title in purple.
          pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                           % page.title())
          # show what was changed
          pywikibot.showDiff(page.get(), text)
          pywikibot.output(u'Comment: %s' %comment)
          if not self.dry_run:
              if not self.always:
                  choice = pywikibot.inputChoice(
                      u'Do you want to accept these changes?',
                      ['Yes', 'No', 'Always', 'Quit'],
                      ['y', 'N', 'a', 'q'], 'N')
                  if choice == 'a':
                      self.always = True
                  elif choice == 'q':
                      import sys
                      sys.exit()
              if self.always or choice == 'y':
                  try:
                      # Save the page
                      page.put(text, comment=comment,
                               minorEdit=minorEdit, botflag=botflag)
                      #time.sleep(1)
                      self.iEditCount+=1
                      if (self.iEditCount==self.maxEditCount):
                        sys.exit()
                  except pywikibot.LockedPage:
                      pywikibot.output(u"Page %s is locked; skipping."
                                       % page.title(asLink=True))
                  except pywikibot.EditConflict:
                      pywikibot.output(
                          u'Skipping %s because of edit conflict'
                          % (page.title()))
                  except pywikibot.SpamfilterError, error:
                      pywikibot.output(
u'Cannot change %s because of spam blacklist entry %s'
                          % (page.title(), error.url))
                  else:
                      return True
      return False

  # =====================================================================================================
  def reportProblemPage(self, page, type):
    if config.without_interwiki:
      f = codecs.open(
                pywikibot.config.datafilepath('match_people.txt'),
                'a', 'utf-8')
      f.write(u"# %s, %s\n" % (page, type))
      f.close()

  # === match biography pages without interwikis =========================================================
  def match(self, src_page, trg_page, src_birth_year, src_death_year):
     if trg_page.site() not in birth_category.keys():
       return False
     trg_text, trg_page = self.load(trg_page)
     if (trg_text==None):
       return False   
     pywikibot.output(u'\n  >>>> %s <<<<' % (trg_page.title(asLink=True)))
     
     # Read birth / death years from target page
     trg_birth_year = year_find(birth_category[trg_page.site().sitename()], trg_text)
     trg_death_year = year_find(death_category[trg_page.site().sitename()], trg_text)
     birth_match = (trg_birth_year>0 and trg_birth_year==src_birth_year)	   
     death_match = (trg_death_year>0 and trg_death_year==src_death_year)		   
     approx_match = (trg_birth_year*src_birth_year>0 and abs(trg_birth_year-src_birth_year)+abs(trg_death_year-src_death_year)<=self.d_year)		   
     date_match = birth_match or death_match or approx_match	   
     pywikibot.output(u"   %s birth=%i" % (trg_page.site().sitename(), trg_birth_year))
     pywikibot.output(u"   %s death=%i" % (trg_page.site().sitename(), trg_death_year))
                 
     # Commonscat
     if src_page.site().sitename()=='commons:commons':  
       trg_text_l = trg_text.lower()
       pat1 = u'\{\{commons\|%s\}\}' % src_page.title() 
       pat2 = u'\{\{commons\s?[ck]at\|%s\}\}' % src_page.titleWithoutNamespace() 
       pat3 = u'\{\{commons category\|%s\}\}' % src_page.titleWithoutNamespace()
       Commonscat = (re.search(pat1, trg_text_l)!=None) or (re.search(pat2, trg_text_l)!=None) or (re.search(pat3, trg_text_l)!=None)
     else:
       Commonscat = False
       
     if (Commonscat or date_match):
       pywikibot.output(u"   MATCH")
     else:
       pywikibot.output(u"   NO MATCH")

     # if match than copy interwiki links from target to source 
     return (Commonscat or date_match)

  # === Add Interwiki Links ======================================================================
  def add_interwiki(self, src_page, src_page_title, src_birth_year, src_death_year, new_src_text, comment):
  # Check if name of the commons category matches article title in few big latin alphabet based wikipedias
     src_interwiki = []
     for siteCode in interwiki_sites:     
       trg_site = pywikibot.getSite(siteCode, u'wikipedia')
       trg_page = pywikibot.Page(trg_site, src_page_title)
       if trg_page.isRedirectPage():
         trg_page = trg_page.getRedirectTarget()
         pywikibot.output(u"   Redirect -> %s" % trg_page.title(asLink=True))
       if '#' in trg_page.title():
         continue
       if trg_page.isDisambig():
         same = False
         for page in trg_page.linkedPages():
           same = self.match(src_page, page, src_birth_year, src_death_year)
           if same:
             trg_page = page
             break
           else:
             return (src_interwiki, new_src_text, comment)
       if self.match(src_page, trg_page, src_birth_year, src_death_year):
          src_interwiki = trg_page.interwiki()
          src_interwiki.append(trg_page)
          interwikis = {}
          for interwikiPage in src_interwiki:
            interwikis[interwikiPage.site()]=interwikiPage
          if len(src_interwiki)>0:
            new_src_text = pywikibot.replaceLanguageLinks(new_src_text, interwikis, site=src_page.site())
            pywikibot.output(u"\03{green}Mathed with %s.\03{default}"% trg_page.title(asLink=True))
            comment = comment + u'copy interwiki from %s, ' % trg_page.title(asLink=True)
            break
     return (src_interwiki, new_src_text, comment)


  # === Add birth/death category ======================================================================
  def add_birth_death_category(self, src_page, src_interwiki, src_birth_year, src_death_year, new_src_text, comment):
    no_defaultsort = '{{'+DEFAULTSORT[src_page.site().sitename()] not in new_src_text
    src_site = src_page.site()
    src_cats = src_page.categories(get_redirect=False)
    for trg_page in src_interwiki:
      trg_site = trg_page.site().sitename()
      if trg_site in birth_category.keys():
        # Read page
        trg_text, trg_page = self.load(trg_page)
        if (trg_text==None):
          continue
        # Read birth/death years from target page
        trg_birth_year = year_find(birth_category[trg_page.site().sitename()], trg_text)
        trg_death_year = year_find(death_category[trg_page.site().sitename()], trg_text)

        # add birth_year
        if (src_birth_year==0 and trg_birth_year>0):
          birth_cat = u'%i births' % trg_birth_year
          src_birth_year = trg_birth_year
          category = pywikibot.Page(src_site, birth_cat, defaultNamespace=14)
          if not category in src_cats:
             src_cats.append(category)
             new_src_text = pywikibot.replaceCategoryLinks(new_src_text, src_cats)
             comment = comment + u'copy birth year category from %s, ' % trg_page.title(asLink=True)
        # add death_year
        if (src_death_year==0 and trg_death_year>0):
          death_cat = u'%i deaths' % trg_death_year
          src_death_year = trg_death_year
          category = pywikibot.Page(src_site, death_cat, defaultNamespace=14)
          if not category in src_cats:
             src_cats.append(category)
             new_src_text = pywikibot.replaceCategoryLinks(new_src_text, src_cats)
             comment = comment + u'copy death year category from %s, ' % trg_page.title(asLink=True)
        # add DEFAULTSORT if I happen to find one     
        if (no_defaultsort and trg_site in DEFAULTSORT.keys()):     
          m=re.search(u'\{\{'+DEFAULTSORT[trg_page.site().sitename()]+':([^\}]*)\}\}', trg_text) 
          if m!=None:
            new_src_text =  '{{DEFAULTSORT:' +m.group(1).strip()+ '}}\n' + new_src_text
            comment = comment + u'copy DEFAULTSORT from %s, ' % trg_page.title(asLink=True)
        if (src_birth_year*src_death_year>0):
          break
    d = src_death_year-src_birth_year
    if (src_birth_year*src_death_year>0) and (d<0 or d>120):
      self.reportProblemPage(src_page, 'long lifetime')  

    return (new_src_text, comment)

  # === Add authority data ======================================================================
  def add_authority_control_to_commons(self, src_page, src_interwiki, new_src_text, comment):   
     # Is there creator page associated with this category?
     hasCreator = False
     if src_page.site().sitename()=='commons:commons':  
       src_pages  = src_page.articlesList()
       for page in src_pages:
         if page.namespace()==100: # if creator page
           creator_page = page
           hasCreator = True
           break

     # Does Creator or Category already have Authority control template?
     if hasCreator:
       page = creator_page
     else:
       page = src_page
     templates = page.templates()
     hasAuthority = False
     #pywikibot.output('       %s templates:' % page.title(asLink=True))
     for template in templates:                          # get all the templates
       if (template == 'Authority control'): 
          hasAuthority = True;
          pywikibot.output('Authority Control found at %s -> do not add' % page.title(asLink=True))
          break
        
     # If no Authority control template at source page than check interwiki pages
     authority_text = ''
     if not hasAuthority:
       interwikis = {}
       for interwikiPage in src_interwiki:
         interwikis[interwikiPage.site()]=interwikiPage
       for siteCode in authority_control_sites: # go in the order of authority_control_template keys
         trg_site = pywikibot.getSite(siteCode, u'wikipedia')
         if trg_site in interwikis.keys():
           trg_page = interwikis[trg_site]
           trg_text, trg_page = self.load(trg_page)
           if (trg_text==None):
             continue
           pywikibot.output('     scan interwiki %s' % trg_page.title(asLink=True))
           search_str = u'\{\{'+authority_control_template[trg_page.site().sitename()]+u'\|([^\}]*)\}\}'
           m=re.search(search_str, trg_text) 
           if m!=None:
             if trg_page.site().sitename()=='wikipedia:de':
               title = trg_page.title()
               title = title.replace(' ','_')
               authority_text = u'{{Kontrola autorytatywna|' + m.group(1).strip()+'|TSURL='+title+'}}'
               authority_text = re.sub(u'\|GNDName=[^\|\}]*', u'', authority_text)
               authority_text = re.sub(u'\|GNDfehlt=[^\|\}]*', u'', authority_text)
               authority_text = re.sub(u'\|GNDCheck=[^\|\}]*', u'', authority_text)
               authority_text = re.sub(u'\|REMARK=[^\|\}]*', u'', authority_text)
             else:
               authority_text = u'{{Kontrola autorytatywna|' + m.group(1).strip()+'}}'
             #pywikibot.output(authority_text)
             break 

     # Add template
     if len(authority_text)>0:
       if hasCreator:
         authority_text = authority_text.replace('}}', '|bare=1}}')
         creator_txt  = creator_page.get()
         new_creator_txt = creator_txt
         # Remove commented out Authority control templates
         new_creator_txt = new_creator_txt.replace('<!-- {{Kontrola autorytatywna|PND=|VIAF=|LCCN=|ULAN=|bare=1}} -->', '') # common case
         new_creator_txt = re.sub(u'<!--\s*\{\{Authority control\|[^\}\n]*\}\}\s*-->', u'', new_creator_txt) # more general case
         new_creator_txt = re.sub(u'(\|\s*Authority\s*=)', ur'\1 '+authority_text, new_creator_txt) # if we have Authority field
         if authority_text not in new_creator_txt: # if we do not have Authority field
           new_creator_txt = re.sub(u'(\n\s*\|\s*)(Option\s*=)', ur'\1Authority         = '+authority_text+ur'\1\2', new_creator_txt)
         comment = 'Copy {{Authority Control}} from %s' % trg_page.title(asLink=True)
         self.save(new_creator_txt, creator_page, comment)
       else:
         new_src_text = authority_text + '\n' + new_src_text
         comment = comment + 'copy {{Authority Control}} from %s, ' % trg_page.title(asLink=True)
     return (new_src_text, comment)

  # === Add authority data ======================================================================
  def add_authority_control_to_wikipedia(self, src_page, src_interwiki, new_src_text, comment):
     templates = src_page.templates()
     src_authority_control = authority_control_template[src_page.site().sitename()]
     #pywikibot.output('       %s templates:' % page.title(asLink=True))
     for template in templates:                          # get all the templates
       if (template == src_authority_control or template == u'Authority control'): 
          pywikibot.output('   Authority Control found at %s -> do not add' % src_page.title(asLink=True))
          return (new_src_text, comment)

     # Process interwikis ordinarly only interwikis[interwikiPage.site()]=interwikiPage is needed but it does not seem to work
     interwikis = {}
     for interwikiPage in src_interwiki:
       site = pywikibot.getSite(interwikiPage.site().language(), u'wikipedia')
       page = pywikibot.Page(site, interwikiPage.title())
       interwikis[site]=page
       #print site.sitename() + ' : ' + page.title()

     # If no Authority control template at source page than check interwiki pages
     authority_text = ''
     summary        = ''
     AC             = {}
     for field in authority_control_fields:
       AC[field] = ''
     for siteCode in authority_control_sites: # go in the order of authority_control_template keys
       trg_site = pywikibot.getSite(siteCode, u'wikipedia')
       site_used = False
       if trg_site in interwikis.keys():
         trg_page = interwikis[trg_site]
         trg_text, trg_page = self.load(trg_page)
         if (trg_text==None):
           continue
         pywikibot.output('     scan interwiki %s' % trg_page.title(asLink=True))
         search_str = ur'\{\{'+authority_control_template[trg_page.site().sitename()]+ur'\|([^\}]*)\}\}'
         m=re.search(search_str, trg_text) 
         if m!=None:
           params =  m.group(1).strip()
           for param in params.split('|'):
             p = param.partition('=')
             name  = p[0]
             value = p[2]
             print name + '=' + value
             if name in authority_control_fields:
               if len(AC[name])==0:
                 AC[name] = value. strip()
                 site_used = True
       if site_used:       
         summary = summary + trg_page.title(asLink=True) + ', '
     if len(AC["VIAF"])>0 or len(AC["LCCN"])>0 or len(AC["GND"])>0:    
       authority_text = u'{{'+src_authority_control
       for field in authority_control_fields:
         if len(AC[field])>0 :
           authority_text = u'%s|%s=%s' % (authority_text, field, AC[field])
       authority_text = authority_text + u'}}\n\n' 
       print authority_text
     summary = summary[:(len(summary)-2)]
     # Add template
     if len(authority_text)>0:
       #summary = 'copy {{%s}} from %s, ' % (src_authority_control, trg_page.title(asLink=True))
       #(success, status, self.always) = add_text.add_text(src_page, authority_text, summary, None,  None, self.always)
       #comment = comment+u'Bot: importazione dei codici di {{%s}} da %s; vedi [[Progetto:Coordinamento/Bibliografia e fonti]].' % (src_authority_control, AC_page.title(asLink=True))
       #new_src_text = re.sub(ur'\{\{[Pp]ortale\|', authority_text+u'{{Portale|', new_src_text,1)
       comment = comment+u'Robot skopiował szablon {{[[szablon:Kontrola autorytatywna|Kontrola autorytatywna]]}} z %s.' % summary
       new_src_text = re.sub(ur'\{\{DEFAULTSORT', authority_text+u'{{DEFAULTSORT', new_src_text,1)
       if authority_text not in new_src_text:
         new_src_text = re.sub(ur'\[\[Kategoria:', authority_text+u'[[Kategoria:', new_src_text,1)
         if authority_text not in new_src_text:
           new_src_text = new_src_text + u'\n' + authority_text
     else:
       pywikibot.output('   Found interwiki but nothing to improve -> Skipping')
         #sys.exit()
     return (new_src_text, comment)

    
  #===================================================================================            
  def commons_page_maitenance(self, src_page):
       comment = u'';
       pywikibot.output(u"\03{lightpurple}Processing page %s.\03{default}"% src_page.title(asLink=True))
       src_text, src_page = self.load(src_page)
       src_sitename = src_page.site().sitename()
       if (src_text==None):
         return   
       if ('Category:People by name' not in src_text):
         return
       new_src_text = src_text
       
       # Read birth / death years from source page
       src_birth_year = year_find(birth_category[src_sitename], src_text)
       src_death_year = year_find(death_category[src_sitename], src_text)
       d = src_death_year-src_birth_year
       if (src_death_year*src_birth_year>0 and (d<0 or d>120)):
         self.reportProblemPage(src_page, 'long lifetime')    
       pywikibot.output(u"   commons birth=%i" % src_birth_year)
       pywikibot.output(u"   commons death=%i" % src_death_year)

       # === Add Interwiki Links ======================================================================
       # Check Interwiki: if not found -> try to match people by birth/death dates   
       src_interwiki = src_page.interwiki()
       src_page_title = src_page.title(withNamespace=False)
       # try original name
       if (src_birth_year+src_birth_year>0 and len(src_interwiki)==0):
         (src_interwiki, new_src_text, comment) = self.add_interwiki(src_page, src_page_title ,
                                                                     src_birth_year, src_death_year,
                                                                     new_src_text, comment)
       # remove non name parts
       if (len(src_interwiki)==0 and '(' in src_page_title):
         src_page_title = re.sub('\([^\)]*\)', '', src_page_title)
         (src_interwiki, new_src_text, comment) = self.add_interwiki(src_page, src_page_title ,
                                                                     src_birth_year, src_death_year,
                                                                     new_src_text, comment)

       # Check Interwiki again: if still not found -> go to next category
       if (len(src_interwiki)==0):
         pywikibot.output('  No interwiki, no maches -> Skipping');
         self.reportProblemPage(src_page, 'no interwiki')    
         return

       # === Add birth/death category ======================================================================
       if (src_birth_year==0 or src_death_year==0):
         (new_src_text, comment) = self.add_birth_death_category(src_page, src_interwiki, 
                                     src_birth_year, src_death_year, new_src_text, comment)

       # === Add DEFAULTSORT ======================================================================
       no_defaultsort = '{{'+DEFAULTSORT[src_sitename] not in new_src_text
       if (no_defaultsort):
         for trg_page in src_interwiki:
           if trg_page.site().sitename() in DEFAULTSORT.keys():
             trg_text, trg_page = self.load(trg_page)
             if (trg_text==None):
               continue
             m=re.search(u'\{\{'+DEFAULTSORT[trg_page.site().sitename()]+':([^\}]*)\}\}', trg_text) 
             if m!=None:
               new_src_text =  '{{DEFAULTSORT:' +m.group(1).strip()+ '}}\n' + new_src_text
               comment = comment + u'copy DEFAULTSORT from %s, ' % trg_page.title(asLink=True)
               no_defaultsort = False 
               break          

       # === Add authority data ======================================================================
       (new_src_text, comment) = self.add_authority_control_to_commons(src_page, src_interwiki, new_src_text, comment)

       # === save changed text ===================================================================
       if (src_text!=new_src_text):
         self.save(new_src_text, src_page, comment)
       else:
         pywikibot.output('   Found interwiki but nothing to improve -> Skipping');
       if no_defaultsort:  
         self.reportProblemPage(src_page, 'no DEFAULTSORT')  


  #===================================================================================            
  def wikipedia_page_maitenance(self, src_page):
       comment = u'';
       pywikibot.output(u"\03{lightpurple}Processing page %s.\03{default}"% src_page.title(asLink=True))
       src_text, src_page = self.load(src_page)
       src_sitename = src_page.site().sitename()
       if (src_text==None):
         return   
       new_src_text = src_text
       data = pywikibot.DataPage(src_page)
       src_interwiki = ''
       try:
         if data.exists():
           src_interwiki = data.interwiki()
           print 'Number of interwikis = %i\n' % len(src_interwiki)
       except:
         src_interwiki = ''

       #src_interwiki = src_page.interwiki()

       # Check Interwiki again: if still not found -> go to next category
       if (len(src_interwiki)==0):
         pywikibot.output('  No interwiki -> Skipping')
         #self.reportProblemPage(src_page, 'no interwiki')    
         return         

       # === Add authority data ==================================================================
       (new_src_text, comment) = self.add_authority_control_to_wikipedia(src_page, src_interwiki, new_src_text, comment)
       
       # === save changed text ===================================================================
       if (src_text!=new_src_text):
         self.save(new_src_text, src_page, comment)


    
# ================================================             
def main():
    #src_site = pywikibot.getSite(u'commons', u'commons')
    src_site = pywikibot.getSite(u'pl', u'wikipedia')
    src_sitename = src_site.sitename()
    #people_cat = catlib.Category(src_site, u'Kategoria:Biografie kanonu polskiej Wikipedii')
    #people_cat = catlib.Category(src_site, u'Category:People by name')
    #people_cat = catlib.Category(src_site, u'Category:Home categories of creator templates without authority control data')
    #people_cat = catlib.Category(src_site, u'Category:Person category without DEFAULTSORT')
    #people_cat = catlib.Category(src_site, u'Categoria:Morti nel 1850')
    pregenerator = pagegenerators.TextfilePageGenerator('PL people by name.txt',src_site)

    #pregenerator = people_cat.subcategories()
    #pregenerator = people_cat.articles()
    bot = PeoplePageBot(pregenerator)
    bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()