This commit is contained in:
1767
projects/mtg/tools/gatherer/BeautifulSoup.py
Normal file
1767
projects/mtg/tools/gatherer/BeautifulSoup.py
Normal file
File diff suppressed because it is too large
Load Diff
BIN
projects/mtg/tools/gatherer/BeautifulSoup.pyc
Normal file
BIN
projects/mtg/tools/gatherer/BeautifulSoup.pyc
Normal file
Binary file not shown.
188
projects/mtg/tools/gatherer/gatherer-builder.py
Normal file
188
projects/mtg/tools/gatherer/gatherer-builder.py
Normal file
@@ -0,0 +1,188 @@
|
||||
#!/usr/bin/python
|
||||
#
|
||||
# Requires BeautifulSoup verion 3, available from
|
||||
# http://www.crummy.com/software/BeautifulSoup/
|
||||
#
|
||||
# Usage: python gatherer-builder.py <output file>
|
||||
#
|
||||
# Copyright 2006: Nathan Callahan
|
||||
# Feel free to do what you want with this file, but give credit
|
||||
# where it's due.
|
||||
|
||||
|
||||
from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
|
||||
import re
|
||||
import codecs
|
||||
import sys
|
||||
import urllib
|
||||
import os
|
||||
import os.path
|
||||
from mtgCommon import *
|
||||
|
||||
setinfo=sets['EVE']
|
||||
stripReminderText = False
|
||||
conffile = open(setinfo['dir'] + ".conf", 'w')
|
||||
|
||||
# FETCH_IMAGES = False
|
||||
FETCH_IMAGES = True
|
||||
|
||||
url = "http://ww2.wizards.com/gatherer/Index.aspx?setfilter=%s\\&output=Spoiler" % setinfo['gathname']
|
||||
gatherer = urllib.urlopen(url)
|
||||
soup = BeautifulSoup(gatherer.read(), smartQuotesTo=None)
|
||||
|
||||
xml = BeautifulStoneSoup('<?xml version=\'1.0\' encoding=\'latin1\'?><!DOCTYPE ccg-setinfo SYSTEM "../gccg-set.dtd"><ccg-setinfo name="%s" dir="%s" abbrev="%s" game="Magic The Gathering"><cards></cards></ccg-setinfo>' % (setinfo['name'], setinfo['dir'], setinfo['abbrev']),selfClosingTags=['attr'])
|
||||
|
||||
rarity_re=re.compile(".*%s_(?P<rarity>.)\.gif.*" % setinfo['gathabbrev'])
|
||||
|
||||
|
||||
def fetchImage(id, filename):
|
||||
if (not os.path.exists(setinfo['abbrev'] + "/" + filename)):
|
||||
for i in setinfo['gathdirs']:
|
||||
url="http://resources.wizards.com/Magic/Cards/%s/Card%s.jpg" % (i, id)
|
||||
print url
|
||||
try:
|
||||
pic = urllib.urlopen(url)
|
||||
except:
|
||||
pass
|
||||
if (not pic): # this is completely wrong, supposed to check if it's not found
|
||||
raise IOError
|
||||
|
||||
if (not os.path.exists(setinfo['abbrev'])):
|
||||
os.mkdir(setinfo['abbrev'])
|
||||
else:
|
||||
assert os.path.isdir(setinfo['abbrev'])
|
||||
f = open(setinfo['abbrev'] + "/" + filename, 'w')
|
||||
f.write(pic.read())
|
||||
f.close()
|
||||
|
||||
|
||||
|
||||
for cardRow in soup.find(id="_gridResults").findAll('tr',onmouseover="this.style.backgroundColor='#F5DEB3';"):
|
||||
name = cardRow('b')[0].string
|
||||
name = name.replace('"','')
|
||||
name = name.replace(u'\xe2', 'a')
|
||||
|
||||
|
||||
manaCost = replaceSymbols(cardRow('td')[1]('font')[0])
|
||||
manaCost = ''.join(manaCost.contents)
|
||||
print manaCost
|
||||
if manaCost == " ":
|
||||
manaCost="";
|
||||
|
||||
htmlText = cardRow('td')[3]
|
||||
htmlText = replaceSymbols(htmlText)
|
||||
text = cleanupHTMLText(htmlText, stripReminderText)
|
||||
|
||||
supertype, subtype = getCardTypes(cardRow)
|
||||
|
||||
splitCard = split_re.match(text)
|
||||
if splitCard:
|
||||
text = splitCard.group('t1') + " // " + splitCard.group('t2')
|
||||
manaCost = manaCost + " // " + splitCard.group('mana2')
|
||||
supertype = supertype + " // " + splitCard.group('type2')
|
||||
|
||||
power = cardRow('td')[4]('font')[0].string
|
||||
if power == " ":
|
||||
power = None
|
||||
|
||||
toughness = cardRow('td')[5]('font')[0].string
|
||||
if toughness == " ":
|
||||
toughness = None
|
||||
|
||||
colors = set()
|
||||
for c in manaCost:
|
||||
if c in symbolColors:
|
||||
colors.add(symbolColors[c].capitalize())
|
||||
color = ''.join(sorted([c+" " for c in colors])).rstrip()
|
||||
if not color:
|
||||
if (supertype.find("Artifact") != -1):
|
||||
color = "Artifact"
|
||||
elif (supertype.find("Land") != -1):
|
||||
color = "Land"
|
||||
else:
|
||||
ss = "%s is " % name
|
||||
start = text.find(ss) + len(ss)
|
||||
end = text.find('.',start)
|
||||
color = text[start:end].capitalize()
|
||||
|
||||
printings = 1
|
||||
for printing in cardRow('td')[6].findAll(src=rarity_re):
|
||||
print name
|
||||
if name in basic_lands:
|
||||
rarity = 'L'
|
||||
else:
|
||||
rarity = rarity_re.match(str(printing)).group('rarity')
|
||||
|
||||
card = Tag(xml, 'card')
|
||||
cards=xml('ccg-setinfo')[0]('cards')[0]
|
||||
|
||||
cards.insert(len(cards),card)
|
||||
card=cards('card')[-1]
|
||||
|
||||
card['name']=name
|
||||
|
||||
if printings > 1:
|
||||
card['graphics']=name.translate(imagetrans)+str(printings)+".jpg"
|
||||
else:
|
||||
card['graphics']=name.translate(imagetrans)+".jpg"
|
||||
|
||||
id = id_re.match(printing.parent['onclick']).group('id')
|
||||
if FETCH_IMAGES:
|
||||
fetchImage(id, id + ".jpg")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
text = text.replace(u'\xe2', 'a')
|
||||
card['text']=text
|
||||
|
||||
card.insert(0,Tag(xml,'attr'))
|
||||
card('attr')[0]['key']='rarity'
|
||||
card('attr')[0]['value']=rarity
|
||||
card.insert(1,Tag(xml,'attr'))
|
||||
card('attr')[1]['key']='color'
|
||||
card('attr')[1]['value']=color
|
||||
|
||||
conffile.write("[card]")
|
||||
# conffile.write("\nimage=" + card['graphics'])
|
||||
conffile.write("\ntext=" + text)
|
||||
conffile.write("\nid=" + id)
|
||||
conffile.write("\nname=" + name)
|
||||
conffile.write("\nrarity=" + rarity)
|
||||
# conffile.write("\ncolor=" + color)
|
||||
conffile.write("\ntype=" + supertype)
|
||||
|
||||
if manaCost:
|
||||
card.insert(2,Tag(xml,'attr'))
|
||||
card('attr')[2]['key']='cost'
|
||||
card('attr')[2]['value']=manaCost
|
||||
conffile.write("\nmana=" + manaCost)
|
||||
if power:
|
||||
card.insert(len(card),Tag(xml,'attr'))
|
||||
card('attr')[-1]['key']='power'
|
||||
card('attr')[-1]['value']=power
|
||||
conffile.write("\npower=" + power)
|
||||
if subtype:
|
||||
subtype = subtype.replace(u'\xe2', 'a')
|
||||
card.insert(len(card),Tag(xml,'attr'))
|
||||
card('attr')[-1]['key']='subtype'
|
||||
card('attr')[-1]['value']=subtype
|
||||
conffile.write("\nsubtype=" + subtype)
|
||||
if toughness:
|
||||
card.insert(len(card),Tag(xml,'attr'))
|
||||
card('attr')[-1]['key']='toughness'
|
||||
card('attr')[-1]['value']=toughness
|
||||
conffile.write("\ntoughness=" + toughness)
|
||||
card.insert(len(card),Tag(xml,'attr'))
|
||||
card('attr')[-1]['key']='type'
|
||||
card('attr')[-1]['value']=supertype
|
||||
|
||||
printings += 1
|
||||
conffile.write("\n[/card]\n")
|
||||
f = file(sys.argv[1],'w')
|
||||
f.write(xml.prettify('latin1'))
|
||||
f.close()
|
||||
conffile.close()
|
||||
|
||||
487
projects/mtg/tools/gatherer/mtgCommon.py
Normal file
487
projects/mtg/tools/gatherer/mtgCommon.py
Normal file
@@ -0,0 +1,487 @@
|
||||
from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
|
||||
import re
|
||||
import codecs
|
||||
import sys
|
||||
|
||||
sets ={'BE':{'name':'Beta',
|
||||
'dir':'BE',
|
||||
'abbrev':'BE',
|
||||
'gathabbrev': '2E',
|
||||
'gathname':'LimitedEditionBeta',
|
||||
},
|
||||
'RV':{'name':'Revised',
|
||||
'dir':'RV',
|
||||
'abbrev':'RV',
|
||||
'gathdirs':['3E/en-us'],
|
||||
'gathabbrev': '3E',
|
||||
'gathname':'RevisedEdition'
|
||||
},
|
||||
'4E':{'name':'4th Edition',
|
||||
'dir':'4E',
|
||||
'abbrev':'4E',
|
||||
'gathdirs':['4E/en-us'],
|
||||
'gathabbrev': '4E',
|
||||
'gathname':'FourthEdition',
|
||||
},
|
||||
'5E':{'name':'5th Edition',
|
||||
'dir':'5E',
|
||||
'abbrev':'5E',
|
||||
'gathdirs':['5E/en-us'],
|
||||
'gathabbrev': '5E',
|
||||
'gathname':'FifthEdition'
|
||||
},
|
||||
'6E':{'name':'6th Edition',
|
||||
'dir':'6E',
|
||||
'abbrev':'6E',
|
||||
'gathdirs':['6E/en-us'],
|
||||
'gathabbrev': '6E',
|
||||
'gathname':'ClassicSixthEdition',
|
||||
},
|
||||
'7E':{'name':'7th Edition',
|
||||
'dir':'7E',
|
||||
'abbrev':'7E',
|
||||
'gathdirs':['7E/en-us'],
|
||||
'gathabbrev': '7E',
|
||||
'gathname':'SeventhEdition',
|
||||
},
|
||||
'8E':{'name':'8th Edition',
|
||||
'dir':'8E',
|
||||
'abbrev':'8E',
|
||||
'gathdirs':['8ED/en-us'],
|
||||
'gathabbrev': '8ED',
|
||||
'gathname':'EighthEdition',
|
||||
},
|
||||
'9E':{'name':'9th Edition',
|
||||
'dir':'9E',
|
||||
'abbrev':'9E',
|
||||
'gathdirs':['9ED/en-us'],
|
||||
'gathabbrev': '9ED',
|
||||
'gathname':'NinthEdition',
|
||||
},
|
||||
'10E':{'name':'10th Edition',
|
||||
'dir':'10E',
|
||||
'abbrev':'10E',
|
||||
'gathdirs':['10E/EN'],
|
||||
'gathabbrev': '10E',
|
||||
'gathname':'TenthEdition',
|
||||
},
|
||||
'EVE':{'name':'Eventide',
|
||||
'dir':'EVE',
|
||||
'abbrev':'EVE',
|
||||
'gathdirs':['EVE/EN'],
|
||||
'gathabbrev': 'EVE',
|
||||
'gathname':'Eventide',
|
||||
},
|
||||
'ALA':{'name':'Shards of Alara',
|
||||
'dir':'ALA',
|
||||
'abbrev':'ALA',
|
||||
'gathdirs':['ALA/EN'],
|
||||
'gathabbrev': 'ALA',
|
||||
'gathname':'ShardsOfAlara',
|
||||
},
|
||||
'UH':{'name':'Unhinged',
|
||||
'dir':'UH',
|
||||
'abbrev':'UH',
|
||||
'gathdirs':['UNH/en-us'],
|
||||
'gathabbrev': 'UNH',
|
||||
'gathname':'Unhinged',
|
||||
},
|
||||
'UG':{'name':'Unglued',
|
||||
'dir':'UG',
|
||||
'abbrev':'UG',
|
||||
'gathdirs':['UG/en-us'],
|
||||
'gathabbrev':'UG',
|
||||
'gathname':'Unglued',
|
||||
},
|
||||
'P1':{'name':'Portal',
|
||||
'dir':'P1',
|
||||
'abbrev':'P1',
|
||||
'gathdirs':['P1/en-us'],
|
||||
'gathabbrev':'P1',
|
||||
'gathname':'Portal',
|
||||
},
|
||||
'P2':{'name':'Portal Second Age',
|
||||
'dir':'P2',
|
||||
'abbrev':'P2',
|
||||
'gathdirs':['P2/en-us'],
|
||||
'gathabbrev':'P2',
|
||||
'gathname':'PortalSecondAge',
|
||||
},
|
||||
'P3':{'name':'Portal Three Kingdoms',
|
||||
'dir':'P3',
|
||||
'abbrev':'P3',
|
||||
'gathdirs':['PK/en-us'],
|
||||
'gathabbrev': 'PK',
|
||||
'gathname':'PortalThreeKingdoms',
|
||||
},
|
||||
'AN':{'name':'Arabian Nights',
|
||||
'dir':'AN',
|
||||
'abbrev':'AN',
|
||||
'gathdirs':['AN/en-us'],
|
||||
'gathabbrev': 'AN',
|
||||
'gathname':'ArabianNights'
|
||||
},
|
||||
'AQ':{'name':'Antiquities',
|
||||
'dir':'AQ',
|
||||
'abbrev':'AQ',
|
||||
'gathdirs':['AQ/en-us'],
|
||||
'gathabbrev': 'AQ',
|
||||
'gathname':'Antiquities',
|
||||
},
|
||||
'LG':{'name':'Legends',
|
||||
'dir':'LG',
|
||||
'abbrev':'LG',
|
||||
'gathdirs':['LG/en-us'],
|
||||
'gathabbrev': 'LE',
|
||||
'gathname':'Legends',
|
||||
},
|
||||
'DK':{'name':'The Dark',
|
||||
'dir':'DK',
|
||||
'abbrev':'DK',
|
||||
'gathdirs':['DK/en-us'],
|
||||
'gathabbrev': 'DK',
|
||||
'gathname':'TheDark',
|
||||
},
|
||||
'FE':{'name':'Fallen Empires',
|
||||
'dir':'FE',
|
||||
'abbrev':'FE',
|
||||
'gathdirs':['FE/en-us'],
|
||||
'gathabbrev': 'FE',
|
||||
'gathname':'FallenEmpires',
|
||||
},
|
||||
'IA':{'name':'Ice Age',
|
||||
'dir':'IA',
|
||||
'abbrev':'IA',
|
||||
'gathdirs':['IA/en-us'],
|
||||
'gathabbrev': 'IA',
|
||||
'gathname':'IceAge',
|
||||
},
|
||||
'HL':{'name':'Homelands',
|
||||
'dir':'HL',
|
||||
'abbrev':'HL',
|
||||
'gathdirs':['HM/en-us'],
|
||||
'gathabbrev': 'HM',
|
||||
'gathname':'Homelands'
|
||||
},
|
||||
'AL':{'name':'Alliances',
|
||||
'dir':'AL',
|
||||
'abbrev':'AL',
|
||||
'gathdirs':['AL/en-us'],
|
||||
'gathabbrev': 'AL',
|
||||
'gathname':'Alliances',
|
||||
},
|
||||
'MI':{'name':'Mirage',
|
||||
'dir':'MI',
|
||||
'abbrev':'MI',
|
||||
'gathdirs':['MI/en-us'],
|
||||
'gathabbrev': 'MI',
|
||||
'gathname':'Mirage',
|
||||
},
|
||||
'VI':{'name':'Visions',
|
||||
'dir':'VI',
|
||||
'abbrev':'VI',
|
||||
'gathabbrev': 'VI',
|
||||
'gathname':'Visions',
|
||||
},
|
||||
'WL':{'name':'Weatherlight',
|
||||
'dir':'WL',
|
||||
'abbrev':'WL',
|
||||
'gathabbrev': 'WL',
|
||||
'gathname':'Weatherlight',
|
||||
},
|
||||
'TE':{'name':'Tempest',
|
||||
'dir':'TE',
|
||||
'abbrev':'TE',
|
||||
'gathabbrev': 'TE',
|
||||
'gathname':'Tempest',
|
||||
},
|
||||
'SH':{'name':'Stronghold',
|
||||
'dir':'SH',
|
||||
'abbrev':'SH',
|
||||
'gathabbrev': 'ST',
|
||||
'gathname':'Stronghold',
|
||||
},
|
||||
'EX':{'name':'Exodus',
|
||||
'dir':'EX',
|
||||
'abbrev':'EX',
|
||||
'gathabbrev': 'EX',
|
||||
'gathname':'Exodus',
|
||||
},
|
||||
'US':{'name':'Urza\'s Saga',
|
||||
'dir':'US',
|
||||
'abbrev':'US',
|
||||
'gathabbrev': 'UZ',
|
||||
'gathname':'UrzasSaga',
|
||||
},
|
||||
'UL':{'name':'Urza\'s Legacy',
|
||||
'dir':'UL',
|
||||
'abbrev':'UL',
|
||||
'gathabbrev': 'GU',
|
||||
'gathname':'UrzasDestiny',
|
||||
},
|
||||
'UD':{'name':'Urza\'s Destiny',
|
||||
'dir':'UD',
|
||||
'abbrev':'UD',
|
||||
'gathabbrev': 'CG',
|
||||
'gathname':'UrzasLegacy',
|
||||
},
|
||||
'MM':{'name':'Mercadian Masques',
|
||||
'dir':'MM',
|
||||
'abbrev':'MM',
|
||||
'gathabbrev': 'MM',
|
||||
'gathname':'MercadianMasques',
|
||||
},
|
||||
'NE':{'name':'Nemesis',
|
||||
'dir':'NE',
|
||||
'abbrev':'NE',
|
||||
'gathabbrev': 'NE',
|
||||
'gathname':'Nemesis',
|
||||
},
|
||||
'PY':{'name':'Prophecy',
|
||||
'dir':'PY',
|
||||
'abbrev':'PY',
|
||||
'gathabbrev': 'PR',
|
||||
'gathname':'Prophecy',
|
||||
},
|
||||
'IN':{'name':'Invasion',
|
||||
'dir':'IN',
|
||||
'abbrev':'IN',
|
||||
'gathabbrev': 'IN',
|
||||
'gathname':'Invasion',
|
||||
},
|
||||
'PS':{'name':'Planeshift',
|
||||
'dir':'PS',
|
||||
'abbrev':'PS',
|
||||
'gathabbrev': 'PS',
|
||||
'gathname':'Planeshift',
|
||||
},
|
||||
'AP':{'name':'Apocalypse',
|
||||
'dir':'AP',
|
||||
'abbrev':'AP',
|
||||
'gathabbrev': 'AP',
|
||||
'gathname':'Apocalypse',
|
||||
},
|
||||
'OD':{'name':'Odyssey',
|
||||
'dir':'OD',
|
||||
'abbrev':'OD',
|
||||
'gathabbrev': 'OD',
|
||||
'gathname':'Odyssey',
|
||||
},
|
||||
'TO':{'name':'Torment',
|
||||
'dir':'TO',
|
||||
'abbrev':'TO',
|
||||
'gathabbrev': 'TOR',
|
||||
'gathname':'Torment',
|
||||
},
|
||||
'JD':{'name':'Judgment',
|
||||
'dir':'JD',
|
||||
'abbrev':'JD',
|
||||
'gathabbrev': 'JUD',
|
||||
'gathname':'Judgment',
|
||||
},
|
||||
'ON':{'name':'Onslaught',
|
||||
'dir':'ON',
|
||||
'abbrev':'ON',
|
||||
'gathabbrev': 'ONS',
|
||||
'gathname':'Onslaught',
|
||||
},
|
||||
'LE':{'name':'Legions',
|
||||
'dir':'LE',
|
||||
'abbrev':'LE',
|
||||
'gathabbrev': 'LGN',
|
||||
'gathname':'Legions',
|
||||
},
|
||||
'SC':{'name':'Scourge',
|
||||
'dir':'SC',
|
||||
'abbrev':'SC',
|
||||
'gathabbrev': 'SCG',
|
||||
'gathname':'Scourge',
|
||||
},
|
||||
'MR':{'name':'Mirrodin',
|
||||
'dir':'MR',
|
||||
'abbrev':'MR',
|
||||
'gathabbrev': 'MRD',
|
||||
'gathname':'Mirrodin',
|
||||
},
|
||||
'DS':{'name':'Darksteel',
|
||||
'dir':'DS',
|
||||
'abbrev':'DS',
|
||||
'gathabbrev': 'DST',
|
||||
'gathname':'Darksteel',
|
||||
},
|
||||
'FD':{'name':'Fifth Dawn',
|
||||
'dir':'FD',
|
||||
'abbrev':'FD',
|
||||
'gathabbrev': '5DN',
|
||||
'gathname':'FifthDawn',
|
||||
},
|
||||
'CK':{'name':'Champions of Kamigawa',
|
||||
'dir':'CK',
|
||||
'abbrev':'CK',
|
||||
'gathabbrev': 'CHK',
|
||||
'gathname':'ChampionsofKamigawa',
|
||||
},
|
||||
'BK':{'name':'Betrayers of Kamigawa',
|
||||
'dir':'BK',
|
||||
'abbrev':'BK',
|
||||
'gathabbrev': 'BOK',
|
||||
'gathname':'BetrayersofKamigawa',
|
||||
},
|
||||
'SK':{'name':'Saviors of Kamigawa',
|
||||
'dir':'SK',
|
||||
'abbrev':'SK',
|
||||
'gathabbrev': 'SOK',
|
||||
'gathname':'SaviorsofKamigawa',
|
||||
},
|
||||
'RA':{'name':'Ravnica: City of Guilds',
|
||||
'dir':'RA',
|
||||
'abbrev':'RA',
|
||||
'gathabbrev': 'RAV',
|
||||
'gathname':'RavnicaCityofGuilds',
|
||||
},
|
||||
'GP':{'name':'Guildpact',
|
||||
'dir':'GP',
|
||||
'abbrev':'GP',
|
||||
'gathabbrev': 'GPT',
|
||||
'gathname':'Guildpact',
|
||||
},
|
||||
'DI':{'name':'Dissension',
|
||||
'dir':'DI',
|
||||
'abbrev':'DI',
|
||||
'gathabbrev': 'DIS',
|
||||
'gathname':'Dissension',
|
||||
},
|
||||
'CS':{'name':'Coldsnap',
|
||||
'dir':'CS',
|
||||
'abbrev':'CS',
|
||||
'gathabbrev':'CSP',
|
||||
'gathname':'Coldsnap',
|
||||
},
|
||||
'TS':{'name':'Time Spiral',
|
||||
'gathname':'TimeSpiralBlock',
|
||||
'gathabbrev':'(?:(?:TSP)|(?:TSB))',
|
||||
'dir':'TS',
|
||||
'abbrev':'TS',
|
||||
'gathdirs' : ('TSP','TSB'),
|
||||
},
|
||||
'PC':{'name':'Planar Chaos',
|
||||
'gathname':'Planar%20Chaos',
|
||||
'gathabbrev':'PLC',
|
||||
'dir':'PC',
|
||||
'abbrev':'PC',
|
||||
},
|
||||
'S1':{'name':'Starter 1999',
|
||||
'gathname':'Starter%201999',
|
||||
'gathabbrev':'P3',
|
||||
'gathdirs':['P3'],
|
||||
'dir':'S1',
|
||||
'abbrev':'S1'
|
||||
},
|
||||
'S2':{'name':'Starter 2000',
|
||||
'gathname':'Starter%202000',
|
||||
'gathabbrev':'P4',
|
||||
'dir':'S1',
|
||||
'abbrev':'S1'
|
||||
},
|
||||
'FS':{'name':'Future Sight',
|
||||
'gathname':'Future%20Sight',
|
||||
'gathabbrev':'FUT',
|
||||
'gathdirs':['FUT'],
|
||||
'dir':'FS',
|
||||
'abbrev':'FS'
|
||||
},
|
||||
}
|
||||
|
||||
def maketransU(s1, s2, todel=""):
|
||||
trans_tab = dict( zip( map(ord, s1), map(ord, s2) ) )
|
||||
trans_tab.update( (ord(c),None) for c in todel )
|
||||
return trans_tab
|
||||
|
||||
imagetrans = maketransU(u'\xe2\xea\xee\xf4\xfb\xe1\xe9\xed\xf3\xfa\xfd\xe4\xeb\xef\xf6\xfc\xff\xe5\xc2\xca\xce\xd4\xdb\xc1\xc9\xcd\xd3\xda\xdd\xc4\xcb\xcf\xd6\xdc\xc5',u'aeiouaeiouyaeiouyaAEIOUAEIOUYAEIOUA',u"'/,. &;!")
|
||||
imagetrans[198]=u'AE'
|
||||
|
||||
nametrans = maketransU(u'\xe2\xea\xee\xf4\xfb\xe1\xe9\xed\xf3\xfa\xfd\xe4\xeb\xef\xf6\xfc\xff\xe5\xc2\xca\xce\xd4\xdb\xc1\xc9\xcd\xd3\xda\xdd\xc4\xcb\xcf\xd6\xdc\xc5',u'aeiouaeiouyaeiouyaAEIOUAEIOUYAEIOUA')
|
||||
nametrans[198]=u'AE'
|
||||
|
||||
cleanuptrans = {ord(u'\r'):u' ',
|
||||
ord(u'"'):u'"',
|
||||
ord(u'\u2018'):ord(u'\''),
|
||||
ord(u'&'):u'&',
|
||||
}
|
||||
|
||||
colorSymbols = {'red':'R',
|
||||
'green' : 'G',
|
||||
'blue':'U',
|
||||
'black':'B',
|
||||
'white':'W'}
|
||||
symbolColors = dict([reversed(a) for a in colorSymbols.items()])
|
||||
basic_lands = ('Mountain','Forest','Island','Swamp','Plains')
|
||||
color_re = re.compile(".*(?P<color>[Rr]ed|[Gg]reen|[Bb]lue|[Bb]lack|[Ww]hite).*")
|
||||
mana_re = re.compile(".*Symbol_(?P<type>.*)_mana\.gif.*")
|
||||
tap_re = re.compile(".*tap.gif.*")
|
||||
basicLand_re = re.compile("\[(?P<mana>.)\]")
|
||||
split_re = re.compile("(?P<t1>.*) // (?P<name2>.*) (?P<mana2>\{.*\}) (?P<type2>.*) (?P<t2>.*)")
|
||||
id_re = re.compile(".*id=(?P<id>\d*).*")
|
||||
reminder_re = re.compile('(\A[^\(]*)|((?<=\))[^\(]*)')
|
||||
|
||||
_stripReminderText = True;
|
||||
|
||||
def replaceSymbols(soup):
|
||||
for symbol in soup.findAll('img'):
|
||||
m = color_re.match(str(symbol['src']))
|
||||
if m:
|
||||
s = colorSymbols[m.group('color').lower()]
|
||||
symbol.replaceWith('{' + s + '}')
|
||||
|
||||
m = mana_re.match(str(symbol))
|
||||
if m:
|
||||
if m.group('type') == "Snow":
|
||||
symbol.replaceWith('{S}')
|
||||
else:
|
||||
symbol.replaceWith('{' + m.group('type') + '}')
|
||||
|
||||
m = tap_re.match(str(symbol))
|
||||
if m:
|
||||
symbol.replaceWith('{T}')
|
||||
|
||||
return soup
|
||||
|
||||
|
||||
def getCardTypes(soup):
|
||||
types = [t.strip()
|
||||
for t in soup('td')[2]('font')[0].string.split('-',1)]
|
||||
if (len(types) == 2):
|
||||
supertype = types[0]
|
||||
subtype = types[1]
|
||||
else:
|
||||
supertype = types[0]
|
||||
subtype = ''
|
||||
# replace entities, since gccg doesn't undertand them in attributes
|
||||
subtype = subtype.replace("â", u"\342")
|
||||
|
||||
return supertype, subtype
|
||||
|
||||
|
||||
def cleanupHTMLText(htmlText, stripReminder = _stripReminderText):
|
||||
for i in htmlText.findAll('br'):
|
||||
i.replaceWith(' ')
|
||||
for i in htmlText.findAll('i'):
|
||||
i.replaceWith(''.join(i.contents))
|
||||
for i in htmlText.findAll('b'):
|
||||
i.replaceWith(''.join(i.contents))
|
||||
|
||||
text = htmlText('font')[0].renderContents(None)
|
||||
# add text for Basic Land
|
||||
m = basicLand_re.match(text)
|
||||
if m:
|
||||
text = u"{T}: Add {" + m.group('mana') + u"} to your mana pool."
|
||||
if text == u" ":
|
||||
text = u""
|
||||
|
||||
text = text.translate(cleanuptrans)
|
||||
|
||||
if stripReminder:
|
||||
text = ''.join([''.join(m) for m in reminder_re.findall(text)])
|
||||
|
||||
return text
|
||||
|
||||
BIN
projects/mtg/tools/gatherer/mtgCommon.pyc
Normal file
BIN
projects/mtg/tools/gatherer/mtgCommon.pyc
Normal file
Binary file not shown.
110
projects/mtg/tools/gatherer/soup-parser.py
Normal file
110
projects/mtg/tools/gatherer/soup-parser.py
Normal file
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/python
|
||||
#
|
||||
# Requires BeautifulSoup verion 3, available from
|
||||
# http://www.crummy.com/software/BeautifulSoup/
|
||||
#
|
||||
# Usage soup-parser.py <xml file> [...]
|
||||
#
|
||||
# Updates text, subtype and name for all cards in the given xml file/s
|
||||
#
|
||||
# *** Overwrites the file/s given... use copies ***
|
||||
#
|
||||
# Copyright 2006: Nathan Callahan
|
||||
# Feel free to do what you want with this file, but give credit
|
||||
# where it's due.
|
||||
|
||||
|
||||
from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
|
||||
import re
|
||||
import codecs
|
||||
import sys
|
||||
import urllib
|
||||
from mtgCommon import *
|
||||
|
||||
stripReminderText = True
|
||||
|
||||
def matchNames(tag, name):
|
||||
if tag.name == 'card':
|
||||
return tag['name'].translate(nametrans) == name.translate(nametrans)
|
||||
else:
|
||||
return False
|
||||
|
||||
def doGathererUpdate(xml, soup):
|
||||
xmlCards = [c['name'] for c in xml.findAll('card')]
|
||||
gathererCards =[]
|
||||
|
||||
for cardRow in soup.find(id="_gridResults").findAll('tr',onmouseover="this.style.backgroundColor='#F5DEB3';"):
|
||||
name = cardRow('b')[0].string
|
||||
name = name.replace('"','"')
|
||||
|
||||
htmlText = cardRow('td')[3]
|
||||
htmlText = replaceSymbols(htmlText)
|
||||
text = cleanupHTMLText(htmlText, stripReminderText)
|
||||
|
||||
supertype, subtype = getCardTypes(cardRow)
|
||||
|
||||
splitCard = split_re.match(text)
|
||||
if splitCard:
|
||||
text = splitCard.group('t1') + " // " + splitCard.group('t2')
|
||||
supertype = supertype + " // " + splitCard.group('type2')
|
||||
|
||||
cards = xml.findAll(lambda tag: matchNames(tag, name))
|
||||
if cards:
|
||||
for card in cards:
|
||||
card['name']=name
|
||||
card['text']=text
|
||||
card.find('attr',key='type')['value']=supertype
|
||||
if subtype:
|
||||
s = card.find('attr', key='subtype')
|
||||
if not s:
|
||||
card.insert(-1,Tag(xml,"attr"))
|
||||
s = card('attr')[-1]
|
||||
s['key'] = 'subtype'
|
||||
s['value']=subtype
|
||||
costTag = card.find('attr',key='cost')
|
||||
|
||||
# Remove some useless attributes from previous versions
|
||||
if costTag:
|
||||
if not costTag['value']:
|
||||
costTag.extract()
|
||||
subtypeTag = card.find('attr',key='subtype')
|
||||
if subtypeTag:
|
||||
if not subtypeTag['value']:
|
||||
subtypeTag.extract()
|
||||
|
||||
xmlCards.remove(name)
|
||||
|
||||
else:
|
||||
gathererCards.append(name)
|
||||
|
||||
if xmlCards:
|
||||
print "Cards in file not found in gatherer:"
|
||||
print ' ' + '\n '.join(xmlCards)
|
||||
if gathererCards:
|
||||
print "Cards in gatherer not found in file:"
|
||||
print ' ' + '\n '.join(gathererCards)
|
||||
|
||||
|
||||
for arg in sys.argv[1:]:
|
||||
print 'Reading:', arg
|
||||
f = file(arg)
|
||||
xml = BeautifulStoneSoup(f.read(),selfClosingTags=['attr'])
|
||||
f.close()
|
||||
|
||||
setinfo = sets[xml.find('ccg-setinfo')['abbrev']]
|
||||
url = "http://ww2.wizards.com/gatherer/Index.aspx?setfilter=%s\\&output=Spoiler" % setinfo['gathname']
|
||||
print 'Fetching/Parsing:', url
|
||||
gatherer = urllib.urlopen(url)
|
||||
soup = BeautifulSoup(gatherer.read(), smartQuotesTo=None)
|
||||
|
||||
print 'Processing:' + arg
|
||||
doGathererUpdate(xml,soup)
|
||||
|
||||
print 'Writing:', arg
|
||||
f = file(arg,'w')
|
||||
f.write(xml.prettify('ISO-8859-1'))
|
||||
f.close()
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user