#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Script for maintenance and automatic improvements of people categories on Wikimedia Commons (called "source"):
* Script goes through all sub categories of provided category that also belong to [[:Category:People by name]]
* read dates of birth and death from categories
* if no interwiki than scan through list of "target" wikis and use commons name to see if there exist article with the same name
** if an article is found than we read dates of birth and death from categories
** if name and at least one date matches than we consider it a match and add interwiki links to commons category
** if name matches and the article has a "Commons category" link than we consider it a match and add interwiki links to commons category
* if there exist interwiki links than see if we can copy from other wikipedia:
** date or birth
** date of death
** DEFAULTSORT
** Authority control template
Script was also adapted for adding {{Kontrola autorytatywna}} templates to Wikipedia articles.
"""
#
# (C) Jarekt, 2011
#
# Distributed under the terms of the MIT license.
#
import sys, os.path, glob, string, codecs, add_text, time
import wikipedia as pywikibot
import config, catlib, time, re, pagegenerators
# === Definitions ======================================================
interwiki_sites = ['en', 'de', 'fr', 'nl', 'es', 'it', 'pt', 'sv'] # 'pl',
birth_category = {
u'commons:commons': u'\[\[[Cc]ategory:(\d\d\d\d?) births[\]\|]',
u'wikipedia:de' : u'\[\[[Kk]ategorie:Geboren (\d\d\d\d?)[\]\|]',
u'wikipedia:en' : u'\[\[[Cc]ategory:(\d\d\d\d?) births[\]\|]',
u'wikipedia:es' : u'\[\[[Cc]ategoría:Nacidos en (\d\d\d\d?)[\]\|]',
u'wikipedia:fr' : u'\[\[[Cc]atégorie:Naissance en (\d\d\d\d?)[\]\|]',
u'wikipedia:it' : u'\|AnnoNascita *= *(\d\d\d\d?)',
u'wikipedia:pl' : u'\[\[[Kk]ategoria:Urodzeni w (\d\d\d\d?)[\]\|]',
u'wikipedia:pt' : u'\[\[[Cc]ategoria:Nascidos em (\d\d\d\d?)[\]\|]',
u'wikipedia:sv' : u'\[\[[Kk]ategori:Födda (\d\d\d\d?)[\]\|]',
u'wikipedia:ru' : u'\[\[Категория:Родившиеся в (\d\d\d\d?) году[\]\|]',
u'wikipedia:zh' : u'\[\[[Cc]ategory:(\d\d\d\d?)年出生[\]\|]',
u'wikipedia:ja' : u'\[\[[Cc]ategory:(\d\d\d\d?)年生[\]\|]',
}
death_category = {
u'commons:commons': u'\[\[[Cc]ategory:(\d\d\d\d?) deaths[\]\|]',
u'wikipedia:de' : u'\[\[[Kk]ategorie:Gestorben (\d\d\d\d?)[\]\|]',
u'wikipedia:en' : u'\[\[[Cc]ategory:(\d\d\d\d?) deaths[\]\|]',
u'wikipedia:es' : u'\[\[[Cc]ategoría:Fallecidos en (\d\d\d\d?)[\]\|]',
u'wikipedia:fr' : u'\[\[[Cc]atégorie:Décès en (\d\d\d\d?)[\]\|]',
u'wikipedia:it' : u'\|AnnoMorte *= *(\d\d\d\d?)',
u'wikipedia:pl' : u'\[\[[Kk]ategoria:Zmarli w (\d\d\d\d?)[\]\|]',
u'wikipedia:pt' : u'\[\[[Cc]ategoria:Mortos em (\d\d\d\d?)[\]\|]',
u'wikipedia:sv' : u'\[\[[Kk]ategori:Avlidna (\d\d\d\d?)[\]\|]',
u'wikipedia:ru' : u'\[\[Категория:Умершие в (\d\d\d\d?) году[\]\|]',
u'wikipedia:zh' : u'\[\[[Cc]ategory:(\d\d\d\d?)年逝世[\]\|]',
u'wikipedia:ja' : u'\[\[[Cc]ategory:(\d\d\d\d?)年没[\]\|]',
}
DEFAULTSORT = {
'commons:commons': u'DEFAULTSORT',
'wikipedia:de' : u'SORTIERUNG',
'wikipedia:en' : u'DEFAULTSORT',
'wikipedia:es' : u'DEFAULTSORT',
'wikipedia:fr' : u'DEFAULTSORT',
'wikipedia:it' : u'DEFAULTSORT',
'wikipedia:pl' : u'DEFAULTSORT',
'wikipedia:pt' : u'DEFAULTSORT',
'wikipedia:sv' : u'STANDARDSORTERING',
}
authority_control_sites = ['en', 'de', 'fr'] #, 'es', 'fr', 'pt', 'ru']
authority_control_fields =['VIAF', 'LCCN', 'GND', 'SELIBR', 'SUDOC', 'ULAN', 'BNF', 'TYP']
authority_control_template = {
'commons:commons': u'Authority control',
'wikipedia:en' : u'Authority control',
'wikipedia:de' : u'Normdaten',
'wikipedia:ru' : u'Библиоинформация',
'wikipedia:pl' : u'Kontrola autorytatywna',
'wikipedia:fr' : u'Autorité',
'wikipedia:es' : u'Normdaten',
'wikipedia:pt' : u'Normdaten',
'wikipedia:it' : u'Controllo di autorità',
}
# ================================================
def year_find(pattern, text):
output = '0'
m=re.search(pattern, text)
if m!=None:
output = m.group(1).strip()
return int(output)
# ================================================
class PeoplePageBot:
def __init__(self, generator, always=False):
self.generator = generator
self.always = False # always save without asking: should be false
self.dry_run = False # run the code without saving?
self.iEditCount = 0 # initialize edit counter
self.maxEditCount = -50 # stop after number of edits (used only if >0)
self.d_year = 1;
def run(self):
skip = False
for page in self.generator:
try:
if 'Memmed Emin Resulzade' in page.title():
skip=False;
if not skip:
#self.commons_page_maitenance(page)
self.wikipedia_page_maitenance(page)
except:
pywikibot.output(u" Error: skip %s" % page.title(asLink=True))
time.sleep(60)
def load(self, page):
"""
Loads the given page, does some changes, and saves it.
"""
if page.isRedirectPage():
page = page.getRedirectTarget()
pywikibot.output(u" Redirect -> %s" % page.title(asLink=True))
if '#' in page.title():
return None, page
try:
# Load the page
text = page.get()
except pywikibot.NoPage:
pywikibot.output(u" Page %s does not exist; skipping."
% page.title(asLink=True))
except pywikibot.IsRedirectPage:
pywikibot.output(u" Page %s is a redirect; skipping."
% page.title(asLink=True))
else:
return text, page
return None, page
# === Save page ======================================================================
def save(self, text, page, comment, minorEdit=False, botflag=True):
# only save if something was changed
if text != page.get():
# Show the title of the page we're working on.
# Highlight the title in purple.
pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
# show what was changed
pywikibot.showDiff(page.get(), text)
pywikibot.output(u'Comment: %s' %comment)
if not self.dry_run:
if not self.always:
choice = pywikibot.inputChoice(
u'Do you want to accept these changes?',
['Yes', 'No', 'Always', 'Quit'],
['y', 'N', 'a', 'q'], 'N')
if choice == 'a':
self.always = True
elif choice == 'q':
import sys
sys.exit()
if self.always or choice == 'y':
try:
# Save the page
page.put(text, comment=comment,
minorEdit=minorEdit, botflag=botflag)
#time.sleep(1)
self.iEditCount+=1
if (self.iEditCount==self.maxEditCount):
sys.exit()
except pywikibot.LockedPage:
pywikibot.output(u"Page %s is locked; skipping."
% page.title(asLink=True))
except pywikibot.EditConflict:
pywikibot.output(
u'Skipping %s because of edit conflict'
% (page.title()))
except pywikibot.SpamfilterError, error:
pywikibot.output(
u'Cannot change %s because of spam blacklist entry %s'
% (page.title(), error.url))
else:
return True
return False
# =====================================================================================================
def reportProblemPage(self, page, type):
if config.without_interwiki:
f = codecs.open(
pywikibot.config.datafilepath('match_people.txt'),
'a', 'utf-8')
f.write(u"# %s, %s\n" % (page, type))
f.close()
# === match biography pages without interwikis =========================================================
def match(self, src_page, trg_page, src_birth_year, src_death_year):
if trg_page.site() not in birth_category.keys():
return False
trg_text, trg_page = self.load(trg_page)
if (trg_text==None):
return False
pywikibot.output(u'\n >>>> %s <<<<' % (trg_page.title(asLink=True)))
# Read birth / death years from target page
trg_birth_year = year_find(birth_category[trg_page.site().sitename()], trg_text)
trg_death_year = year_find(death_category[trg_page.site().sitename()], trg_text)
birth_match = (trg_birth_year>0 and trg_birth_year==src_birth_year)
death_match = (trg_death_year>0 and trg_death_year==src_death_year)
approx_match = (trg_birth_year*src_birth_year>0 and abs(trg_birth_year-src_birth_year)+abs(trg_death_year-src_death_year)<=self.d_year)
date_match = birth_match or death_match or approx_match
pywikibot.output(u" %s birth=%i" % (trg_page.site().sitename(), trg_birth_year))
pywikibot.output(u" %s death=%i" % (trg_page.site().sitename(), trg_death_year))
# Commonscat
if src_page.site().sitename()=='commons:commons':
trg_text_l = trg_text.lower()
pat1 = u'\{\{commons\|%s\}\}' % src_page.title()
pat2 = u'\{\{commons\s?[ck]at\|%s\}\}' % src_page.titleWithoutNamespace()
pat3 = u'\{\{commons category\|%s\}\}' % src_page.titleWithoutNamespace()
Commonscat = (re.search(pat1, trg_text_l)!=None) or (re.search(pat2, trg_text_l)!=None) or (re.search(pat3, trg_text_l)!=None)
else:
Commonscat = False
if (Commonscat or date_match):
pywikibot.output(u" MATCH")
else:
pywikibot.output(u" NO MATCH")
# if match than copy interwiki links from target to source
return (Commonscat or date_match)
# === Add Interwiki Links ======================================================================
def add_interwiki(self, src_page, src_page_title, src_birth_year, src_death_year, new_src_text, comment):
# Check if name of the commons category matches article title in few big latin alphabet based wikipedias
src_interwiki = []
for siteCode in interwiki_sites:
trg_site = pywikibot.getSite(siteCode, u'wikipedia')
trg_page = pywikibot.Page(trg_site, src_page_title)
if trg_page.isRedirectPage():
trg_page = trg_page.getRedirectTarget()
pywikibot.output(u" Redirect -> %s" % trg_page.title(asLink=True))
if '#' in trg_page.title():
continue
if trg_page.isDisambig():
same = False
for page in trg_page.linkedPages():
same = self.match(src_page, page, src_birth_year, src_death_year)
if same:
trg_page = page
break
else:
return (src_interwiki, new_src_text, comment)
if self.match(src_page, trg_page, src_birth_year, src_death_year):
src_interwiki = trg_page.interwiki()
src_interwiki.append(trg_page)
interwikis = {}
for interwikiPage in src_interwiki:
interwikis[interwikiPage.site()]=interwikiPage
if len(src_interwiki)>0:
new_src_text = pywikibot.replaceLanguageLinks(new_src_text, interwikis, site=src_page.site())
pywikibot.output(u"\03{green}Mathed with %s.\03{default}"% trg_page.title(asLink=True))
comment = comment + u'copy interwiki from %s, ' % trg_page.title(asLink=True)
break
return (src_interwiki, new_src_text, comment)
# === Add birth/death category ======================================================================
def add_birth_death_category(self, src_page, src_interwiki, src_birth_year, src_death_year, new_src_text, comment):
no_defaultsort = '{{'+DEFAULTSORT[src_page.site().sitename()] not in new_src_text
src_site = src_page.site()
src_cats = src_page.categories(get_redirect=False)
for trg_page in src_interwiki:
trg_site = trg_page.site().sitename()
if trg_site in birth_category.keys():
# Read page
trg_text, trg_page = self.load(trg_page)
if (trg_text==None):
continue
# Read birth/death years from target page
trg_birth_year = year_find(birth_category[trg_page.site().sitename()], trg_text)
trg_death_year = year_find(death_category[trg_page.site().sitename()], trg_text)
# add birth_year
if (src_birth_year==0 and trg_birth_year>0):
birth_cat = u'%i births' % trg_birth_year
src_birth_year = trg_birth_year
category = pywikibot.Page(src_site, birth_cat, defaultNamespace=14)
if not category in src_cats:
src_cats.append(category)
new_src_text = pywikibot.replaceCategoryLinks(new_src_text, src_cats)
comment = comment + u'copy birth year category from %s, ' % trg_page.title(asLink=True)
# add death_year
if (src_death_year==0 and trg_death_year>0):
death_cat = u'%i deaths' % trg_death_year
src_death_year = trg_death_year
category = pywikibot.Page(src_site, death_cat, defaultNamespace=14)
if not category in src_cats:
src_cats.append(category)
new_src_text = pywikibot.replaceCategoryLinks(new_src_text, src_cats)
comment = comment + u'copy death year category from %s, ' % trg_page.title(asLink=True)
# add DEFAULTSORT if I happen to find one
if (no_defaultsort and trg_site in DEFAULTSORT.keys()):
m=re.search(u'\{\{'+DEFAULTSORT[trg_page.site().sitename()]+':([^\}]*)\}\}', trg_text)
if m!=None:
new_src_text = '{{DEFAULTSORT:' +m.group(1).strip()+ '}}\n' + new_src_text
comment = comment + u'copy DEFAULTSORT from %s, ' % trg_page.title(asLink=True)
if (src_birth_year*src_death_year>0):
break
d = src_death_year-src_birth_year
if (src_birth_year*src_death_year>0) and (d<0 or d>120):
self.reportProblemPage(src_page, 'long lifetime')
return (new_src_text, comment)
# === Add authority data ======================================================================
def add_authority_control_to_commons(self, src_page, src_interwiki, new_src_text, comment):
# Is there creator page associated with this category?
hasCreator = False
if src_page.site().sitename()=='commons:commons':
src_pages = src_page.articlesList()
for page in src_pages:
if page.namespace()==100: # if creator page
creator_page = page
hasCreator = True
break
# Does Creator or Category already have Authority control template?
if hasCreator:
page = creator_page
else:
page = src_page
templates = page.templates()
hasAuthority = False
#pywikibot.output(' %s templates:' % page.title(asLink=True))
for template in templates: # get all the templates
if (template == 'Authority control'):
hasAuthority = True;
pywikibot.output('Authority Control found at %s -> do not add' % page.title(asLink=True))
break
# If no Authority control template at source page than check interwiki pages
authority_text = ''
if not hasAuthority:
interwikis = {}
for interwikiPage in src_interwiki:
interwikis[interwikiPage.site()]=interwikiPage
for siteCode in authority_control_sites: # go in the order of authority_control_template keys
trg_site = pywikibot.getSite(siteCode, u'wikipedia')
if trg_site in interwikis.keys():
trg_page = interwikis[trg_site]
trg_text, trg_page = self.load(trg_page)
if (trg_text==None):
continue
pywikibot.output(' scan interwiki %s' % trg_page.title(asLink=True))
search_str = u'\{\{'+authority_control_template[trg_page.site().sitename()]+u'\|([^\}]*)\}\}'
m=re.search(search_str, trg_text)
if m!=None:
if trg_page.site().sitename()=='wikipedia:de':
title = trg_page.title()
title = title.replace(' ','_')
authority_text = u'{{Kontrola autorytatywna|' + m.group(1).strip()+'|TSURL='+title+'}}'
authority_text = re.sub(u'\|GNDName=[^\|\}]*', u'', authority_text)
authority_text = re.sub(u'\|GNDfehlt=[^\|\}]*', u'', authority_text)
authority_text = re.sub(u'\|GNDCheck=[^\|\}]*', u'', authority_text)
authority_text = re.sub(u'\|REMARK=[^\|\}]*', u'', authority_text)
else:
authority_text = u'{{Kontrola autorytatywna|' + m.group(1).strip()+'}}'
#pywikibot.output(authority_text)
break
# Add template
if len(authority_text)>0:
if hasCreator:
authority_text = authority_text.replace('}}', '|bare=1}}')
creator_txt = creator_page.get()
new_creator_txt = creator_txt
# Remove commented out Authority control templates
new_creator_txt = new_creator_txt.replace('<!-- {{Kontrola autorytatywna|PND=|VIAF=|LCCN=|ULAN=|bare=1}} -->', '') # common case
new_creator_txt = re.sub(u'<!--\s*\{\{Authority control\|[^\}\n]*\}\}\s*-->', u'', new_creator_txt) # more general case
new_creator_txt = re.sub(u'(\|\s*Authority\s*=)', ur'\1 '+authority_text, new_creator_txt) # if we have Authority field
if authority_text not in new_creator_txt: # if we do not have Authority field
new_creator_txt = re.sub(u'(\n\s*\|\s*)(Option\s*=)', ur'\1Authority = '+authority_text+ur'\1\2', new_creator_txt)
comment = 'Copy {{Authority Control}} from %s' % trg_page.title(asLink=True)
self.save(new_creator_txt, creator_page, comment)
else:
new_src_text = authority_text + '\n' + new_src_text
comment = comment + 'copy {{Authority Control}} from %s, ' % trg_page.title(asLink=True)
return (new_src_text, comment)
# === Add authority data ======================================================================
def add_authority_control_to_wikipedia(self, src_page, src_interwiki, new_src_text, comment):
templates = src_page.templates()
src_authority_control = authority_control_template[src_page.site().sitename()]
#pywikibot.output(' %s templates:' % page.title(asLink=True))
for template in templates: # get all the templates
if (template == src_authority_control or template == u'Authority control'):
pywikibot.output(' Authority Control found at %s -> do not add' % src_page.title(asLink=True))
return (new_src_text, comment)
# Process interwikis ordinarly only interwikis[interwikiPage.site()]=interwikiPage is needed but it does not seem to work
interwikis = {}
for interwikiPage in src_interwiki:
site = pywikibot.getSite(interwikiPage.site().language(), u'wikipedia')
page = pywikibot.Page(site, interwikiPage.title())
interwikis[site]=page
#print site.sitename() + ' : ' + page.title()
# If no Authority control template at source page than check interwiki pages
authority_text = ''
summary = ''
AC = {}
for field in authority_control_fields:
AC[field] = ''
for siteCode in authority_control_sites: # go in the order of authority_control_template keys
trg_site = pywikibot.getSite(siteCode, u'wikipedia')
site_used = False
if trg_site in interwikis.keys():
trg_page = interwikis[trg_site]
trg_text, trg_page = self.load(trg_page)
if (trg_text==None):
continue
pywikibot.output(' scan interwiki %s' % trg_page.title(asLink=True))
search_str = ur'\{\{'+authority_control_template[trg_page.site().sitename()]+ur'\|([^\}]*)\}\}'
m=re.search(search_str, trg_text)
if m!=None:
params = m.group(1).strip()
for param in params.split('|'):
p = param.partition('=')
name = p[0]
value = p[2]
print name + '=' + value
if name in authority_control_fields:
if len(AC[name])==0:
AC[name] = value. strip()
site_used = True
if site_used:
summary = summary + trg_page.title(asLink=True) + ', '
if len(AC["VIAF"])>0 or len(AC["LCCN"])>0 or len(AC["GND"])>0:
authority_text = u'{{'+src_authority_control
for field in authority_control_fields:
if len(AC[field])>0 :
authority_text = u'%s|%s=%s' % (authority_text, field, AC[field])
authority_text = authority_text + u'}}\n\n'
print authority_text
summary = summary[:(len(summary)-2)]
# Add template
if len(authority_text)>0:
#summary = 'copy {{%s}} from %s, ' % (src_authority_control, trg_page.title(asLink=True))
#(success, status, self.always) = add_text.add_text(src_page, authority_text, summary, None, None, self.always)
#comment = comment+u'Bot: importazione dei codici di {{%s}} da %s; vedi [[Progetto:Coordinamento/Bibliografia e fonti]].' % (src_authority_control, AC_page.title(asLink=True))
#new_src_text = re.sub(ur'\{\{[Pp]ortale\|', authority_text+u'{{Portale|', new_src_text,1)
comment = comment+u'Robot skopiował szablon {{[[szablon:Kontrola autorytatywna|Kontrola autorytatywna]]}} z %s.' % summary
new_src_text = re.sub(ur'\{\{DEFAULTSORT', authority_text+u'{{DEFAULTSORT', new_src_text,1)
if authority_text not in new_src_text:
new_src_text = re.sub(ur'\[\[Kategoria:', authority_text+u'[[Kategoria:', new_src_text,1)
if authority_text not in new_src_text:
new_src_text = new_src_text + u'\n' + authority_text
else:
pywikibot.output(' Found interwiki but nothing to improve -> Skipping')
#sys.exit()
return (new_src_text, comment)
#===================================================================================
def commons_page_maitenance(self, src_page):
comment = u'';
pywikibot.output(u"\03{lightpurple}Processing page %s.\03{default}"% src_page.title(asLink=True))
src_text, src_page = self.load(src_page)
src_sitename = src_page.site().sitename()
if (src_text==None):
return
if ('Category:People by name' not in src_text):
return
new_src_text = src_text
# Read birth / death years from source page
src_birth_year = year_find(birth_category[src_sitename], src_text)
src_death_year = year_find(death_category[src_sitename], src_text)
d = src_death_year-src_birth_year
if (src_death_year*src_birth_year>0 and (d<0 or d>120)):
self.reportProblemPage(src_page, 'long lifetime')
pywikibot.output(u" commons birth=%i" % src_birth_year)
pywikibot.output(u" commons death=%i" % src_death_year)
# === Add Interwiki Links ======================================================================
# Check Interwiki: if not found -> try to match people by birth/death dates
src_interwiki = src_page.interwiki()
src_page_title = src_page.title(withNamespace=False)
# try original name
if (src_birth_year+src_birth_year>0 and len(src_interwiki)==0):
(src_interwiki, new_src_text, comment) = self.add_interwiki(src_page, src_page_title ,
src_birth_year, src_death_year,
new_src_text, comment)
# remove non name parts
if (len(src_interwiki)==0 and '(' in src_page_title):
src_page_title = re.sub('\([^\)]*\)', '', src_page_title)
(src_interwiki, new_src_text, comment) = self.add_interwiki(src_page, src_page_title ,
src_birth_year, src_death_year,
new_src_text, comment)
# Check Interwiki again: if still not found -> go to next category
if (len(src_interwiki)==0):
pywikibot.output(' No interwiki, no maches -> Skipping');
self.reportProblemPage(src_page, 'no interwiki')
return
# === Add birth/death category ======================================================================
if (src_birth_year==0 or src_death_year==0):
(new_src_text, comment) = self.add_birth_death_category(src_page, src_interwiki,
src_birth_year, src_death_year, new_src_text, comment)
# === Add DEFAULTSORT ======================================================================
no_defaultsort = '{{'+DEFAULTSORT[src_sitename] not in new_src_text
if (no_defaultsort):
for trg_page in src_interwiki:
if trg_page.site().sitename() in DEFAULTSORT.keys():
trg_text, trg_page = self.load(trg_page)
if (trg_text==None):
continue
m=re.search(u'\{\{'+DEFAULTSORT[trg_page.site().sitename()]+':([^\}]*)\}\}', trg_text)
if m!=None:
new_src_text = '{{DEFAULTSORT:' +m.group(1).strip()+ '}}\n' + new_src_text
comment = comment + u'copy DEFAULTSORT from %s, ' % trg_page.title(asLink=True)
no_defaultsort = False
break
# === Add authority data ======================================================================
(new_src_text, comment) = self.add_authority_control_to_commons(src_page, src_interwiki, new_src_text, comment)
# === save changed text ===================================================================
if (src_text!=new_src_text):
self.save(new_src_text, src_page, comment)
else:
pywikibot.output(' Found interwiki but nothing to improve -> Skipping');
if no_defaultsort:
self.reportProblemPage(src_page, 'no DEFAULTSORT')
#===================================================================================
def wikipedia_page_maitenance(self, src_page):
comment = u'';
pywikibot.output(u"\03{lightpurple}Processing page %s.\03{default}"% src_page.title(asLink=True))
src_text, src_page = self.load(src_page)
src_sitename = src_page.site().sitename()
if (src_text==None):
return
new_src_text = src_text
data = pywikibot.DataPage(src_page)
src_interwiki = ''
try:
if data.exists():
src_interwiki = data.interwiki()
print 'Number of interwikis = %i\n' % len(src_interwiki)
except:
src_interwiki = ''
#src_interwiki = src_page.interwiki()
# Check Interwiki again: if still not found -> go to next category
if (len(src_interwiki)==0):
pywikibot.output(' No interwiki -> Skipping')
#self.reportProblemPage(src_page, 'no interwiki')
return
# === Add authority data ==================================================================
(new_src_text, comment) = self.add_authority_control_to_wikipedia(src_page, src_interwiki, new_src_text, comment)
# === save changed text ===================================================================
if (src_text!=new_src_text):
self.save(new_src_text, src_page, comment)
# ================================================
def main():
#src_site = pywikibot.getSite(u'commons', u'commons')
src_site = pywikibot.getSite(u'pl', u'wikipedia')
src_sitename = src_site.sitename()
#people_cat = catlib.Category(src_site, u'Kategoria:Biografie kanonu polskiej Wikipedii')
#people_cat = catlib.Category(src_site, u'Category:People by name')
#people_cat = catlib.Category(src_site, u'Category:Home categories of creator templates without authority control data')
#people_cat = catlib.Category(src_site, u'Category:Person category without DEFAULTSORT')
#people_cat = catlib.Category(src_site, u'Categoria:Morti nel 1850')
pregenerator = pagegenerators.TextfilePageGenerator('PL people by name.txt',src_site)
#pregenerator = people_cat.subcategories()
#pregenerator = people_cat.articles()
bot = PeoplePageBot(pregenerator)
bot.run()
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()