This commit is contained in:
@@ -0,0 +1,110 @@
|
||||
#!/usr/bin/python
|
||||
#
|
||||
# Requires BeautifulSoup verion 3, available from
|
||||
# http://www.crummy.com/software/BeautifulSoup/
|
||||
#
|
||||
# Usage soup-parser.py <xml file> [...]
|
||||
#
|
||||
# Updates text, subtype and name for all cards in the given xml file/s
|
||||
#
|
||||
# *** Overwrites the file/s given... use copies ***
|
||||
#
|
||||
# Copyright 2006: Nathan Callahan
|
||||
# Feel free to do what you want with this file, but give credit
|
||||
# where it's due.
|
||||
|
||||
|
||||
from BeautifulSoup import BeautifulSoup, BeautifulStoneSoup, Tag
|
||||
import re
|
||||
import codecs
|
||||
import sys
|
||||
import urllib
|
||||
from mtgCommon import *
|
||||
|
||||
stripReminderText = True
|
||||
|
||||
def matchNames(tag, name):
|
||||
if tag.name == 'card':
|
||||
return tag['name'].translate(nametrans) == name.translate(nametrans)
|
||||
else:
|
||||
return False
|
||||
|
||||
def doGathererUpdate(xml, soup):
|
||||
xmlCards = [c['name'] for c in xml.findAll('card')]
|
||||
gathererCards =[]
|
||||
|
||||
for cardRow in soup.find(id="_gridResults").findAll('tr',onmouseover="this.style.backgroundColor='#F5DEB3';"):
|
||||
name = cardRow('b')[0].string
|
||||
name = name.replace('"','"')
|
||||
|
||||
htmlText = cardRow('td')[3]
|
||||
htmlText = replaceSymbols(htmlText)
|
||||
text = cleanupHTMLText(htmlText, stripReminderText)
|
||||
|
||||
supertype, subtype = getCardTypes(cardRow)
|
||||
|
||||
splitCard = split_re.match(text)
|
||||
if splitCard:
|
||||
text = splitCard.group('t1') + " // " + splitCard.group('t2')
|
||||
supertype = supertype + " // " + splitCard.group('type2')
|
||||
|
||||
cards = xml.findAll(lambda tag: matchNames(tag, name))
|
||||
if cards:
|
||||
for card in cards:
|
||||
card['name']=name
|
||||
card['text']=text
|
||||
card.find('attr',key='type')['value']=supertype
|
||||
if subtype:
|
||||
s = card.find('attr', key='subtype')
|
||||
if not s:
|
||||
card.insert(-1,Tag(xml,"attr"))
|
||||
s = card('attr')[-1]
|
||||
s['key'] = 'subtype'
|
||||
s['value']=subtype
|
||||
costTag = card.find('attr',key='cost')
|
||||
|
||||
# Remove some useless attributes from previous versions
|
||||
if costTag:
|
||||
if not costTag['value']:
|
||||
costTag.extract()
|
||||
subtypeTag = card.find('attr',key='subtype')
|
||||
if subtypeTag:
|
||||
if not subtypeTag['value']:
|
||||
subtypeTag.extract()
|
||||
|
||||
xmlCards.remove(name)
|
||||
|
||||
else:
|
||||
gathererCards.append(name)
|
||||
|
||||
if xmlCards:
|
||||
print "Cards in file not found in gatherer:"
|
||||
print ' ' + '\n '.join(xmlCards)
|
||||
if gathererCards:
|
||||
print "Cards in gatherer not found in file:"
|
||||
print ' ' + '\n '.join(gathererCards)
|
||||
|
||||
|
||||
for arg in sys.argv[1:]:
|
||||
print 'Reading:', arg
|
||||
f = file(arg)
|
||||
xml = BeautifulStoneSoup(f.read(),selfClosingTags=['attr'])
|
||||
f.close()
|
||||
|
||||
setinfo = sets[xml.find('ccg-setinfo')['abbrev']]
|
||||
url = "http://ww2.wizards.com/gatherer/Index.aspx?setfilter=%s\\&output=Spoiler" % setinfo['gathname']
|
||||
print 'Fetching/Parsing:', url
|
||||
gatherer = urllib.urlopen(url)
|
||||
soup = BeautifulSoup(gatherer.read(), smartQuotesTo=None)
|
||||
|
||||
print 'Processing:' + arg
|
||||
doGathererUpdate(xml,soup)
|
||||
|
||||
print 'Writing:', arg
|
||||
f = file(arg,'w')
|
||||
f.write(xml.prettify('ISO-8859-1'))
|
||||
f.close()
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user