Discussioni utente:Wisbot/coordbot.py
Vai alla navigazione
Vai alla ricerca
Hi, we worked on your bot in Persian(Farsi) fa.wiki and we modiffied your bot and solved some of it's bugs also added another solutions and I share my changes here if you want you can use or change it. thank you for your sharing.Reza1615 (msg)
# -*- coding: utf-8 -*-
"""
This bot will make direct text replacements. It will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
change a single page.
You can run the bot with the following commandline parameters:
-file - Work on all pages given in a local text file.
Will read any [[wiki link]] and use these articles.
Argument can also be given as "-file:filename".
-cat - Work on all pages which are in a specific category.
Argument can also be given as "-cat:categoryname".
-page - Only edit a specific page.
Argument can also be given as "-page:pagetitle". You can give this
parameter multiple times to edit multiple pages.
-ref - Work on all pages that link to a certain page.
Argument can also be given as "-ref:referredpagetitle".
-filelinks - Works on all pages that link to a certain image.
Argument can also be given as "-filelinks:ImageName".
-links - Work on all pages that are linked to from a certain page.
Argument can also be given as "-links:linkingpagetitle".
-start - Work on all pages in the wiki, starting at a given page. Choose
"-start:!" to start at the beginning.
NOTE: You are advised to use -xml instead of this option; this is
meant for cases where there is no recent XML dump.
-except:XYZ - Ignore pages which contain XYZ. If the -regex argument is given,
XYZ will be regarded as a regular expression.
-summary:XYZ - Set the summary message text for the edit to XYZ, bypassing the
predefined message texts with original and replacements inserted.
-template:XYZ-
-namespace:n - Number of namespace to process. The parameter can be used
multiple times. It works in combination with all other
parameters, except for the -start parameter. If you e.g. want to
iterate over all user pages starting at User:M, use
-start:User:M.
-always - Don't prompt you for each replacement
other: -
NOTE: Only use either -xml or -file or -page, but don't mix them.
Examples:
"""
#
# [[Utente:Wiso]] 2007
#
# Distributed under the terms of the GPL licence
#
from __future__ import generators
import sys,re,pprint
import wikipedia,pagegenerators,catlib,config
__version__ = '$Id: coordbot.py,v 0.1 $'
# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.`v
msg = u'ط±ط¨ط§طھ ط§ط¶ط§ظپظ‡â€Œع©ظ†ظ†ط¯ظ‡ظ” ظ…ط®طھطµط§طھ %s'
templates = {
'safe': [
#Every Wiki:
( r'\{\{ ?[Cc]oord(.*?)\}\}',r"{{Coord\1|display=title}}\n" ),
( r'{{coor[_ ]title[_ ]d\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"{{coord|\1|\2|\3|\4|\5|display=title}}\n" ),
( r'{{coor[_ ]title[_ ]dm\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^\}]*?)\}\}',r"{{coord|\1|\2|\3|\4|\5|\6|\7|display=title}}\n" ),
( r'{{coor[_ ]title[_ ]dms\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([NS])\|([0-9\.-]+)\|([0-9\.-]+)\|([0-9\.-]+)\|([EW])\|?([^}]*?)}}',r"{{coord|\1|\2|\3|\4|\5|\6|\7|\8|\9|display=title}}\n" ),
( r'\{\{ ?[Cc]oor[ _]d\|([0-9\.+-]+)\|([0-9\.+-])(\|?[^\|]*)\}\}',r"{{Coord|\1|\2\3|display=title}}\n" ),
( r'\{\{.*latd *= *([0-9\.]+).*longd ?= ?([0-9\.]+)',r"{{Coord|\1|\2|display=title}}\n" ),
( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*',r"{{Coord|\1|\2|N|\3|\4|E|display=title}}\n" ),
# English Wiki:
( r'.*\|lat_deg *= *([0-9\.]+).*\|lat_min *= *([0-9\.]+).*\|lat_sec *= *([0-9\.]+).*\n.*\|lon_deg *= *([0-9\.]+).*\|lon_min *= *([0-9\.]+).*\|lon_sec *= *([0-9\.]+).*',r"{{Coord|\1|\2|\3|N|\4|\5|\6|E|display=title}}\n" ),
( r'.*\|latd *= *([0-9\.]+).*\|*latm *= *([0-9\.]+).*\|*lats *= *([0-9\.]+).*\|*latNS *= (.*?[NS])\n.*\|longd *= *([0-9\.]+).*\|*longm *= *([0-9\.]+).*\|*longs *= *([0-9\.]+).*\| longEW = (.*?[EW])*',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8|display=title}}\n" ),
( r'.*\|*source_lat_d *= *([0-9\.]+).*\|*source_lat_m *= *([0-9\.]+).*\|*source_lat_s *= *([0-9\.]+).*\|*source_lat_NS *=*(.*?[NS])\n.*\| source_long_d *= *([0-9\.]+).*\|*source_long_m *= *([0-9\.]+).*\|*source_long_s *= *([0-9\.]+).*\| source_long_EW =*(.*?[EW])*',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8|display=title}}\n" ),
#Italian Wiki:
( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|*',r"{{Coord|\1|\2|N|\3|\4|E|display=title}}\n" ),
( r'.*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*',r"{{Coord|\1|\2|\3|N|\4|\5|\6|E|display=title}}\n" ),
( r'..*\|latitudineGradi *= *([0-9\.]+).*\n.*\|latitudinePrimi *= *([0-9\.]+).*\n.*\|latSecondi *= *([0-9\.]+).*\n.*\|latitudineNS *=(.*?[NS])\n.*\|longitudineGradi *= *([0-9\.]+).*\n.*\|longitudinePrimi *= *([0-9\.]+).*\n.*\|longSecondi *= *([0-9\.]+).*\n.*\|longitudineEW *=(.*?[EW])*',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8|display=title}}\n" ),
],
'notsafe': [
( r'\{\{ ?[Cc]oord[ _]dm\|([0-9]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{Coord|\1|\2|\3|\4|\5|\6\7|display=title}}\n" ),
( r'\{\{ ?[Cc]oor[ _]dms\|([0-9]+)\|([0-9\.]+)\|([0-9\.]+)\|([NS])\|([0-9\.]+)\|([0-9\.]+)\|([0-9\.]+)\|([EW])(\|?[^\|]*)\}\}',r"{{Coord|\1|\2|\3|\4|\5|\6|\7|\8\9|display=title}}\n" ),
]
}
#Add Execption (for pages that don't need any Coordinaion or have a Coord:)
exceptions = [ r'\{\{ *?Geobox',
r'\{\{ ?[Cc]oord',
r'\{\{ ?Template:[Cc]oord',
r'\{\{ ?[mM]ontagna',
r'\{\{ ?(Template:)?[cC]omune',
r'\{\{ ?[cC]ittأ ',
r'\{\{ ?[mM]unicipalitأ ',
r'\{\{ ?[aA]eroporto\|',
r'\{\{ ?[Mm]unicipi',
r'\{\{ ?[iI]nfobox[ _]Azienda\|',
r'\{\{ ?[Ss]\|aziende',
r'\{\{ ?[Dd]isambigua\|',
r'\{\{ ?[Ff]razione',
r'\{\{ ?[Ss]quadra',
r'\{\{ ?[Pp]asso ?(\||\n)',
r'\{\{ ?[Bb]undesland[ _]tedesco'
]
class CoordRobot:
"""
A bot that import coordinates from other wikipedia.
"""
def __init__( self,generator,autoTitle = False,autoText = False ):
self.generator = generator
self.compileregex()
def compileregex( self ):
for key in templates.keys():
for i in range( len( templates[key] ) ):
old,new = templates[key][i]
oldR = re.compile( old,re.UNICODE )
templates[key][i] = oldR,new
for i in range( len( exceptions ) ):
exceptions[i] = re.compile( exceptions[i] )
def checkExceptions( self,text ):
for exception in exceptions:
hit = exception.search( text )
if hit:
return hit.group( 0 )
return None
def change( self,page,new_text ):
try:
page.put( new_text )
except wikipedia.EditConflict:
wikipedia.output( u'Skipping %s because of edit conflict' % ( page.title() ) )
except wikipedia.SpamfilterError,url:
wikipedia.output( u'Cannot change %s because of blacklist entry %s' % ( page.title(),url ) )
# Spceify the Wiki You want to get the Coords from (Now Italian):
def run( self ):
trovato_en = False
sen = wikipedia.Site( 'en' )
interwiki_list = []
for page in self.generator:
try:
if not page.canBeEdited():
wikipedia.output( u'Skipping locked page %s' % page.title() )
continue
text_it = page.get()
match = self.checkExceptions( text_it )
# skip all pages that contain certain texts
if match:
colors = [None] * 9 + [None] * len( page.title() ) + [None] * 21 + [10] * len( match )
wikipedia.output( u'Skipping %s because it contains %s' % ( page.title(),match ) )
continue
interwiki_list = page.interwiki()
except wikipedia.NoPage:
wikipedia.output( u'Page %s not found' % page.title() )
continue
except wikipedia.IsRedirectPage:
wikipedia.output( u'Page %s is a redirect, skip' % page.title() )
continue
trovato_en = False
for page_en in interwiki_list:
if page_en.site() == sen:
trovato_en = True
break
if not trovato_en:
continue
wikipedia.output( page.title() )
wikipedia.output( u'en: %s' % page_en.title() )
try:
text_en = page_en.get()
except wikipedia.NoPage:
wikipedia.output( u'Page %s not found' % page_en.title() )
continue
except wikipedia.IsRedirectPage:
wikipedia.output( u'Page %s is a redirect, follow redirect' % page_en.title() )
text_en = page_en.get( get_redirect = True )
coordfind=False
for old,new in templates['safe']:
text_en=text_en.replace('{{Coord missing' ,'')
text_en = re.sub( "\|\s*display\s*\=\s*(inline,)?title(,inline)?","",text_en )
text_en=text_en.replace('|display=inline' ,'')
match = old.search( text_en )
if not match:
if coordfind==False:
new_text_it = text_it
continue
# colors = [None] * 5 + [13] * len(page.title()) + [None] * 4
# wikipedia.output(u'\n>>> %s <<<' % page.title(), colors = colors)
# pprint.pprint( str( match ) )
wikipedia.output( u'Coord %s: ' % text_en[match.start():match.end()] )
template_new = old.sub( new,text_en[match.start():match.end()] )
template_new = template_new.replace( u'||','|' )
wikipedia.output( template_new )
if template_new.find( '{{Coord missing' ) != -1:
new_text_it = text_it
else:
new_text_it = template_new + text_it
coordfind=True
# choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No'], ['y', 'N'], 'N')
# if choice in ['y', 'Y']:
if new_text_it.find( '{{Coord missing' ) == -1:
wikipedia.setAction( msg % page_en.aslink() )
self.change( page,new_text_it )
coordfind=False
def main():
gen = None
# summary message
summary_commandline = None
# Don't edit pages which contain certain texts.
exceptions = []
# commandline paramater.
# Which namespaces should be processed?
# default to [] which means all namespaces will be processed
namespaces = []
template = None
PageTitles = []
autoText = False
autoTitle = False
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# Load default summary message.
# BUG WARNING: This is probably incompatible with the -lang parameter.
wikipedia.setAction( msg )
# Read commandline parameters.
for arg in wikipedia.handleArgs():
if arg == '-autotitle':
autoTitle = True
elif arg == '-autotext':
autoText = True
elif arg.startswith( '-page' ):
if len( arg ) == 5:
PageTitles.append( wikipedia.input( u'Which page do you want to chage?' ) )
else:
PageTitles.append( arg[6:] )
elif arg.startswith( '-except:' ):
exceptions.append( arg[8:] )
elif arg.startswith( '-template:' ):
template = arg[10:]
elif arg.startswith( '-namespace:' ):
namespaces.append( int( arg[11:] ) )
elif arg.startswith( '-summary:' ):
wikipedia.setAction( arg[9:] )
summary_commandline = True
else:
generator = genFactory.handleArg( arg )
if generator:
gen = generator
print namespaces
if PageTitles:
pages = [wikipedia.Page( wikipedia.getSite(),PageTitle ) for PageTitle in PageTitles]
gen = iter( pages )
if not gen:
# syntax error, show help text from the top of this file
wikipedia.showHelp( 'coordbot' )
wikipedia.stopme()
sys.exit()
if namespaces != []:
gen = pagegenerators.NamespaceFilterPageGenerator( gen,namespaces )
# gen = pagegenerators.RedirectFilterPageGenerator(gen)
preloadingGen = pagegenerators.PreloadingGenerator( gen,pageNumber = 120 )
bot = CoordRobot( preloadingGen,autoTitle,autoText )
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()