- fixed a few bugs with the parser for subtypes - fixed a bug for plague rats - fixed a bug for P/T parser - First release for the following sets: Mirrodin, Mirage, Lorwyn
513 lines
16 KiB
Python
513 lines
16 KiB
Python
from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
|
|
import re
|
|
import codecs
|
|
import sys
|
|
|
|
sets ={'BE':{'name':'Beta',
|
|
'dir':'BE',
|
|
'abbrev':'BE',
|
|
'gathabbrev': '2E',
|
|
'gathname':'LimitedEditionBeta',
|
|
},
|
|
'RV':{'name':'Revised',
|
|
'dir':'RV',
|
|
'abbrev':'RV',
|
|
'gathdirs':['3E/en-us'],
|
|
'gathabbrev': '3E',
|
|
'gathname':'RevisedEdition'
|
|
},
|
|
'4E':{'name':'4th Edition',
|
|
'dir':'4E',
|
|
'abbrev':'4E',
|
|
'gathdirs':['4E/en-us'],
|
|
'gathabbrev': '4E',
|
|
'gathname':'FourthEdition',
|
|
},
|
|
'5E':{'name':'5th Edition',
|
|
'dir':'5E',
|
|
'abbrev':'5E',
|
|
'gathdirs':['5E/en-us'],
|
|
'gathabbrev': '5E',
|
|
'gathname':'FifthEdition'
|
|
},
|
|
'6E':{'name':'6th Edition',
|
|
'dir':'6E',
|
|
'abbrev':'6E',
|
|
'gathdirs':['6E/en-us'],
|
|
'gathabbrev': '6E',
|
|
'gathname':'ClassicSixthEdition',
|
|
},
|
|
'7E':{'name':'7th Edition',
|
|
'dir':'7E',
|
|
'abbrev':'7E',
|
|
'gathdirs':['7E/en-us'],
|
|
'gathabbrev': '7E',
|
|
'gathname':'SeventhEdition',
|
|
},
|
|
'8E':{'name':'8th Edition',
|
|
'dir':'8E',
|
|
'abbrev':'8E',
|
|
'gathdirs':['8ED/en-us'],
|
|
'gathabbrev': '8ED',
|
|
'gathname':'EighthEdition',
|
|
},
|
|
'9E':{'name':'9th Edition',
|
|
'dir':'9E',
|
|
'abbrev':'9E',
|
|
'gathdirs':['9ED/en-us'],
|
|
'gathabbrev': '9ED',
|
|
'gathname':'NinthEdition',
|
|
},
|
|
'10E':{'name':'10th Edition',
|
|
'dir':'10E',
|
|
'abbrev':'10E',
|
|
'gathdirs':['10E/EN'],
|
|
'gathabbrev': '10E',
|
|
'gathname':'TenthEdition',
|
|
},
|
|
'EVE':{'name':'Eventide',
|
|
'dir':'EVE',
|
|
'abbrev':'EVE',
|
|
'gathdirs':['EVE/EN'],
|
|
'gathabbrev': 'EVE',
|
|
'gathname':'Eventide',
|
|
},
|
|
'SHM':{'name':'Shadowmoor',
|
|
'dir':'SHM',
|
|
'abbrev':'SHM',
|
|
'gathdirs':['SHM/EN'],
|
|
'gathabbrev': 'SHM',
|
|
'gathname':'Shadowmoor',
|
|
},
|
|
'ALA':{'name':'Shards of Alara',
|
|
'dir':'ALA',
|
|
'abbrev':'ALA',
|
|
'gathdirs':['ALA/EN'],
|
|
'gathabbrev': 'ALA',
|
|
'gathname':'ShardsOfAlara',
|
|
},
|
|
'CFX':{'name':'Conflux',
|
|
'dir':'CFX',
|
|
'abbrev':'CFX',
|
|
'gathdirs':['CFX/EN'],
|
|
'gathabbrev': 'CFX',
|
|
'gathname':'Conflux'
|
|
},
|
|
'LRW':{'name':'Lorwyn',
|
|
'dir':'LRW',
|
|
'abbrev':'LRW',
|
|
'gathdirs':['LRW/EN'],
|
|
'gathabbrev': 'LRW',
|
|
'gathname':'Lorwyn',
|
|
},
|
|
'UH':{'name':'Unhinged',
|
|
'dir':'UH',
|
|
'abbrev':'UH',
|
|
'gathdirs':['UNH/en-us'],
|
|
'gathabbrev': 'UNH',
|
|
'gathname':'Unhinged',
|
|
},
|
|
'UG':{'name':'Unglued',
|
|
'dir':'UG',
|
|
'abbrev':'UG',
|
|
'gathdirs':['UG/en-us'],
|
|
'gathabbrev':'UG',
|
|
'gathname':'Unglued',
|
|
},
|
|
'P1':{'name':'Portal',
|
|
'dir':'PO',
|
|
'abbrev':'PO',
|
|
'gathdirs':['PO/en-us'],
|
|
'gathabbrev':'PO',
|
|
'gathname':'Portal',
|
|
},
|
|
'P2':{'name':'Portal Second Age',
|
|
'dir':'P2',
|
|
'abbrev':'P2',
|
|
'gathdirs':['P2/en-us'],
|
|
'gathabbrev':'P2',
|
|
'gathname':'PortalSecondAge',
|
|
},
|
|
'P3':{'name':'Portal Three Kingdoms',
|
|
'dir':'P3',
|
|
'abbrev':'P3',
|
|
'gathdirs':['PK/en-us'],
|
|
'gathabbrev': 'PK',
|
|
'gathname':'PortalThreeKingdoms',
|
|
},
|
|
'AN':{'name':'Arabian Nights',
|
|
'dir':'AN',
|
|
'abbrev':'AN',
|
|
'gathdirs':['AN/en-us'],
|
|
'gathabbrev': 'AN',
|
|
'gathname':'ArabianNights'
|
|
},
|
|
'AQ':{'name':'Antiquities',
|
|
'dir':'AQ',
|
|
'abbrev':'AQ',
|
|
'gathdirs':['AQ/en-us'],
|
|
'gathabbrev': 'AQ',
|
|
'gathname':'Antiquities',
|
|
},
|
|
'LG':{'name':'Legends',
|
|
'dir':'LG',
|
|
'abbrev':'LG',
|
|
'gathdirs':['LG/en-us'],
|
|
'gathabbrev': 'LE',
|
|
'gathname':'Legends',
|
|
},
|
|
'DK':{'name':'The Dark',
|
|
'dir':'DK',
|
|
'abbrev':'DK',
|
|
'gathdirs':['DK/en-us'],
|
|
'gathabbrev': 'DK',
|
|
'gathname':'TheDark',
|
|
},
|
|
'FE':{'name':'Fallen Empires',
|
|
'dir':'FE',
|
|
'abbrev':'FE',
|
|
'gathdirs':['FE/en-us'],
|
|
'gathabbrev': 'FE',
|
|
'gathname':'FallenEmpires',
|
|
},
|
|
'IA':{'name':'Ice Age',
|
|
'dir':'IA',
|
|
'abbrev':'IA',
|
|
'gathdirs':['IA/en-us'],
|
|
'gathabbrev': 'IA',
|
|
'gathname':'IceAge',
|
|
},
|
|
'HL':{'name':'Homelands',
|
|
'dir':'HL',
|
|
'abbrev':'HL',
|
|
'gathdirs':['HM/en-us'],
|
|
'gathabbrev': 'HM',
|
|
'gathname':'Homelands'
|
|
},
|
|
'AL':{'name':'Alliances',
|
|
'dir':'AL',
|
|
'abbrev':'AL',
|
|
'gathdirs':['AL/en-us'],
|
|
'gathabbrev': 'AL',
|
|
'gathname':'Alliances',
|
|
},
|
|
'MIR':{'name':'Mirage',
|
|
'dir':'MIR',
|
|
'abbrev':'MI',
|
|
'gathdirs':['MI/en-us'],
|
|
'gathabbrev': 'MI',
|
|
'gathname':'Mirage',
|
|
},
|
|
'VI':{'name':'Visions',
|
|
'dir':'VI',
|
|
'abbrev':'VI',
|
|
'gathabbrev': 'VI',
|
|
'gathname':'Visions',
|
|
},
|
|
'WL':{'name':'Weatherlight',
|
|
'dir':'WL',
|
|
'abbrev':'WL',
|
|
'gathabbrev': 'WL',
|
|
'gathname':'Weatherlight',
|
|
},
|
|
'TE':{'name':'Tempest',
|
|
'dir':'TE',
|
|
'gathdirs':['TE/en-us'],
|
|
'abbrev':'TE',
|
|
'gathabbrev': 'TE',
|
|
'gathname':'Tempest',
|
|
},
|
|
'SH':{'name':'Stronghold',
|
|
'dir':'SH',
|
|
'abbrev':'SH',
|
|
'gathabbrev': 'ST',
|
|
'gathname':'Stronghold',
|
|
},
|
|
'EX':{'name':'Exodus',
|
|
'dir':'EX',
|
|
'abbrev':'EX',
|
|
'gathabbrev': 'EX',
|
|
'gathname':'Exodus',
|
|
},
|
|
'US':{'name':'Urza\'s Saga',
|
|
'dir':'US',
|
|
'abbrev':'US',
|
|
'gathabbrev': 'UZ',
|
|
'gathname':'UrzasSaga',
|
|
},
|
|
'UL':{'name':'Urza\'s Legacy',
|
|
'dir':'UL',
|
|
'abbrev':'UL',
|
|
'gathabbrev': 'GU',
|
|
'gathname':'UrzasDestiny',
|
|
},
|
|
'UD':{'name':'Urza\'s Destiny',
|
|
'dir':'UD',
|
|
'abbrev':'UD',
|
|
'gathabbrev': 'CG',
|
|
'gathname':'UrzasLegacy',
|
|
},
|
|
'MM':{'name':'Mercadian Masques',
|
|
'dir':'MM',
|
|
'abbrev':'MM',
|
|
'gathabbrev': 'MM',
|
|
'gathname':'MercadianMasques',
|
|
},
|
|
'NE':{'name':'Nemesis',
|
|
'dir':'NE',
|
|
'abbrev':'NE',
|
|
'gathabbrev': 'NE',
|
|
'gathname':'Nemesis',
|
|
},
|
|
'PY':{'name':'Prophecy',
|
|
'dir':'PY',
|
|
'abbrev':'PY',
|
|
'gathabbrev': 'PR',
|
|
'gathname':'Prophecy',
|
|
},
|
|
'IN':{'name':'Invasion',
|
|
'dir':'IN',
|
|
'abbrev':'IN',
|
|
'gathabbrev': 'IN',
|
|
'gathname':'Invasion',
|
|
},
|
|
'PS':{'name':'Planeshift',
|
|
'dir':'PS',
|
|
'abbrev':'PS',
|
|
'gathabbrev': 'PS',
|
|
'gathname':'Planeshift',
|
|
},
|
|
'AP':{'name':'Apocalypse',
|
|
'dir':'AP',
|
|
'abbrev':'AP',
|
|
'gathabbrev': 'AP',
|
|
'gathname':'Apocalypse',
|
|
},
|
|
'OD':{'name':'Odyssey',
|
|
'dir':'OD',
|
|
'abbrev':'OD',
|
|
'gathabbrev': 'OD',
|
|
'gathname':'Odyssey',
|
|
},
|
|
'TO':{'name':'Torment',
|
|
'dir':'TO',
|
|
'abbrev':'TO',
|
|
'gathabbrev': 'TOR',
|
|
'gathname':'Torment',
|
|
},
|
|
'JD':{'name':'Judgment',
|
|
'dir':'JD',
|
|
'abbrev':'JD',
|
|
'gathabbrev': 'JUD',
|
|
'gathname':'Judgment',
|
|
},
|
|
'ON':{'name':'Onslaught',
|
|
'dir':'ON',
|
|
'abbrev':'ON',
|
|
'gathabbrev': 'ONS',
|
|
'gathname':'Onslaught',
|
|
},
|
|
'LE':{'name':'Legions',
|
|
'dir':'LE',
|
|
'abbrev':'LE',
|
|
'gathabbrev': 'LGN',
|
|
'gathname':'Legions',
|
|
},
|
|
'SC':{'name':'Scourge',
|
|
'dir':'SC',
|
|
'abbrev':'SC',
|
|
'gathabbrev': 'SCG',
|
|
'gathname':'Scourge',
|
|
},
|
|
'MRD':{'name':'Mirrodin',
|
|
'dir':'MRD',
|
|
'abbrev':'MRD',
|
|
'gathabbrev': 'MRD',
|
|
'gathdirs':['MRD/en-us'],
|
|
'gathname':'Mirrodin',
|
|
},
|
|
'DS':{'name':'Darksteel',
|
|
'dir':'DS',
|
|
'abbrev':'DS',
|
|
'gathabbrev': 'DST',
|
|
'gathname':'Darksteel',
|
|
},
|
|
'FD':{'name':'Fifth Dawn',
|
|
'dir':'FD',
|
|
'abbrev':'FD',
|
|
'gathabbrev': '5DN',
|
|
'gathname':'FifthDawn',
|
|
},
|
|
'CK':{'name':'Champions of Kamigawa',
|
|
'dir':'CK',
|
|
'abbrev':'CK',
|
|
'gathdirs':['CHK/en-us'],
|
|
'gathabbrev': 'CHK',
|
|
'gathname':'ChampionsofKamigawa',
|
|
},
|
|
'BK':{'name':'Betrayers of Kamigawa',
|
|
'dir':'BK',
|
|
'abbrev':'BK',
|
|
'gathabbrev': 'BOK',
|
|
'gathname':'BetrayersofKamigawa',
|
|
},
|
|
'SK':{'name':'Saviors of Kamigawa',
|
|
'dir':'SK',
|
|
'abbrev':'SK',
|
|
'gathabbrev': 'SOK',
|
|
'gathname':'SaviorsofKamigawa',
|
|
},
|
|
'RA':{'name':'Ravnica: City of Guilds',
|
|
'dir':'RA',
|
|
'abbrev':'RA',
|
|
'gathabbrev': 'RAV',
|
|
'gathname':'RavnicaCityofGuilds',
|
|
},
|
|
'GP':{'name':'Guildpact',
|
|
'dir':'GP',
|
|
'abbrev':'GP',
|
|
'gathabbrev': 'GPT',
|
|
'gathname':'Guildpact',
|
|
},
|
|
'DI':{'name':'Dissension',
|
|
'dir':'DI',
|
|
'abbrev':'DI',
|
|
'gathabbrev': 'DIS',
|
|
'gathname':'Dissension',
|
|
},
|
|
'CS':{'name':'Coldsnap',
|
|
'dir':'CS',
|
|
'abbrev':'CS',
|
|
'gathabbrev':'CSP',
|
|
'gathname':'Coldsnap',
|
|
},
|
|
'TS':{'name':'Time Spiral',
|
|
'gathname':'TimeSpiralBlock',
|
|
'gathabbrev':'(?:(?:TSP)|(?:TSB))',
|
|
'dir':'TS',
|
|
'abbrev':'TS',
|
|
'gathdirs' : ('TSP','TSB'),
|
|
},
|
|
'PC':{'name':'Planar Chaos',
|
|
'gathname':'Planar%20Chaos',
|
|
'gathabbrev':'PLC',
|
|
'dir':'PC',
|
|
'abbrev':'PC',
|
|
},
|
|
'S1':{'name':'Starter 1999',
|
|
'gathname':'Starter%201999',
|
|
'gathabbrev':'P3',
|
|
'gathdirs':['P3'],
|
|
'dir':'S1',
|
|
'abbrev':'S1'
|
|
},
|
|
'S2':{'name':'Starter 2000',
|
|
'gathname':'Starter%202000',
|
|
'gathabbrev':'P4',
|
|
'dir':'S1',
|
|
'abbrev':'S1'
|
|
},
|
|
'FS':{'name':'Future Sight',
|
|
'gathname':'Future%20Sight',
|
|
'gathabbrev':'FUT',
|
|
'gathdirs':['FUT'],
|
|
'dir':'FS',
|
|
'abbrev':'FS'
|
|
},
|
|
|
|
}
|
|
|
|
def maketransU(s1, s2, todel=""):
|
|
trans_tab = dict( zip( map(ord, s1), map(ord, s2) ) )
|
|
trans_tab.update( (ord(c),None) for c in todel )
|
|
return trans_tab
|
|
|
|
imagetrans = maketransU(u'\xe2\xea\xee\xf4\xfb\xe1\xe9\xed\xf3\xfa\xfd\xe4\xeb\xef\xf6\xfc\xff\xe5\xc2\xca\xce\xd4\xdb\xc1\xc9\xcd\xd3\xda\xdd\xc4\xcb\xcf\xd6\xdc\xc5',u'aeiouaeiouyaeiouyaAEIOUAEIOUYAEIOUA',u"'/,. &;!")
|
|
imagetrans[198]=u'AE'
|
|
|
|
nametrans = maketransU(u'\xe2\xea\xee\xf4\xfb\xe1\xe9\xed\xf3\xfa\xfd\xe4\xeb\xef\xf6\xfc\xff\xe5\xc2\xca\xce\xd4\xdb\xc1\xc9\xcd\xd3\xda\xdd\xc4\xcb\xcf\xd6\xdc\xc5',u'aeiouaeiouyaeiouyaAEIOUAEIOUYAEIOUA')
|
|
nametrans[198]=u'AE'
|
|
|
|
cleanuptrans = {ord(u'\r'):u' ',
|
|
ord(u'"'):u'"',
|
|
ord(u'\u2018'):ord(u'\''),
|
|
ord(u'&'):u'&',
|
|
}
|
|
|
|
colorSymbols = {'red':'R',
|
|
'green' : 'G',
|
|
'blue':'U',
|
|
'black':'B',
|
|
'white':'W'}
|
|
symbolColors = dict([reversed(a) for a in colorSymbols.items()])
|
|
basic_lands = ('Mountain','Forest','Island','Swamp','Plains')
|
|
color_re = re.compile(".*(?P<color>[Rr]ed|[Gg]reen|[Bb]lue|[Bb]lack|[Ww]hite).*")
|
|
mana_re = re.compile(".*Symbol_(?P<type>.*)_mana\.gif.*")
|
|
tap_re = re.compile(".*tap.gif.*")
|
|
basicLand_re = re.compile("\[(?P<mana>.)\]")
|
|
split_re = re.compile("(?P<t1>.*) // (?P<name2>.*) (?P<mana2>\{.*\}) (?P<type2>.*) (?P<t2>.*)")
|
|
id_re = re.compile(".*id=(?P<id>\d*).*")
|
|
reminder_re = re.compile('(\A[^\(]*)|((?<=\))[^\(]*)')
|
|
|
|
_stripReminderText = True;
|
|
|
|
def replaceSymbols(soup):
|
|
for symbol in soup.findAll('img'):
|
|
m = color_re.match(str(symbol['src']))
|
|
if m:
|
|
s = colorSymbols[m.group('color').lower()]
|
|
symbol.replaceWith('{' + s + '}')
|
|
|
|
m = mana_re.match(str(symbol))
|
|
if m:
|
|
if m.group('type') == "Snow":
|
|
symbol.replaceWith('{S}')
|
|
else:
|
|
symbol.replaceWith('{' + m.group('type') + '}')
|
|
|
|
m = tap_re.match(str(symbol))
|
|
if m:
|
|
symbol.replaceWith('{T}')
|
|
|
|
return soup
|
|
|
|
|
|
def getCardTypes(soup):
|
|
types = [t.strip()
|
|
for t in soup('td')[2]('font')[0].string.split('-',1)]
|
|
if (len(types) == 2):
|
|
supertype = types[0]
|
|
subtype = types[1]
|
|
else:
|
|
supertype = types[0]
|
|
subtype = ''
|
|
# replace entities, since gccg doesn't undertand them in attributes
|
|
subtype = subtype.replace("â", u"\342")
|
|
|
|
return supertype, subtype
|
|
|
|
|
|
def cleanupHTMLText(htmlText, stripReminder = _stripReminderText):
|
|
for i in htmlText.findAll('br'):
|
|
i.replaceWith(' ')
|
|
for i in htmlText.findAll('i'):
|
|
i.replaceWith(''.join(i.contents))
|
|
for i in htmlText.findAll('b'):
|
|
i.replaceWith(''.join(i.contents))
|
|
|
|
text = htmlText('font')[0].renderContents(None)
|
|
# add text for Basic Land
|
|
m = basicLand_re.match(text)
|
|
if m:
|
|
text = u"{T}: Add {" + m.group('mana') + u"} to your mana pool."
|
|
if text == u" ":
|
|
text = u""
|
|
|
|
text = text.translate(cleanuptrans)
|
|
|
|
if stripReminder:
|
|
text = ''.join([''.join(m) for m in reminder_re.findall(text)])
|
|
|
|
return text
|
|
|