pathfinder-2-sqlite-MIRROR/data/yaml/deprecated/clean-up-monsters.py

490 lines
17 KiB
Python
Raw Normal View History

import yaml
2020-02-19 00:30:31 -05:00
import re
def main():
2020-02-20 16:41:40 -05:00
cleanuphtmlstuff()
with open("tmp-monsters-html-cleanup.yaml", 'r') as content_file:
data = yaml.full_load(content_file)
counter = 0
for i in data:
2020-02-18 22:48:59 -05:00
counter += 1
2020-02-19 01:11:21 -05:00
#print("{}\t{}".format(counter, i['name']))
2020-02-18 22:48:59 -05:00
# Set some data points before iteration
i['saves_special'] = {}
2020-02-18 22:48:59 -05:00
# clean up ability mods
for k, v in i['ability_mods'].items():
i['ability_mods'][k] = int(v)
# print(k, i['ability_mods'][k])
2020-02-18 22:48:59 -05:00
# clean up skills
for k, v in i['skills'].items():
i['skills'][k] = int(v)
# print(k, i['ability_mods'][k])
2020-02-18 22:48:59 -05:00
# clean up saves
for k, v in i['saves'].items():
# print(k, v)
i['saves_special'][k] = None
if ',' in v:
# print("WE GOT A COMMA")
i['saves'][k] = v.replace(',', '')
if ';' in v:
# print("WE GOT A SPECIAL")
a = v.split('; ')
# print(a)
i['saves_special'][k] = a[1]
i['saves'][k] = a[0]
elif '(' in v:
2020-02-19 00:30:31 -05:00
#print("we got a parentheses")
#print(v)
a = v.split(' (')
# print(a)
i['saves_special'][k] = a[1]
i['saves'][k] = a[0]
i['saves'][k] = int(i['saves'][k])
# print(k, i['saves'][k])
2020-02-18 22:48:59 -05:00
# clean up immunities
if i['immunities'] == "None":
i['immunities'] = None
else:
tmp = i['immunities']
splits = tmp.split(',')
res = []
for z in splits:
res.append(z.strip())
i['immunities'] = res
2020-02-19 00:30:31 -05:00
# clean up traits with trailing or leading whitespace
traitslist = []
2020-02-19 01:11:21 -05:00
#print(i['traits'])
2020-02-19 00:30:31 -05:00
for x in i['traits']:
traitslist.append(x.strip())
2020-02-19 01:11:21 -05:00
#print(x.strip())
#print(traitslist)
2020-02-19 00:30:31 -05:00
i['traits'] = traitslist
# clean up traits underneath proactive actions with trailing or leading whitespace
2020-02-19 01:11:21 -05:00
#print(i['proactive_abilities'])
2020-02-19 00:30:31 -05:00
for x in i['proactive_abilities']:
traitslist = []
if x['traits'] != None:
if len(x['traits']) == 0:
x['traits'] = None
else:
for y in x['traits']:
traitslist.append(y.strip())
2020-02-19 01:11:21 -05:00
#print(y.strip())
2020-02-19 00:30:31 -05:00
x['traits'] = traitslist
# clean up traits underneath automatic_abilities with trailing or leading whitespace
for x in i['automatic_abilities']:
# this one line fixes leading and trailing whitespace
x['description'] = x['description'].strip()
traitslist = []
if x['traits'] != None:
if len(x['traits']) == 0:
x['traits'] = None
else:
for y in x['traits']:
traitslist.append(y.strip())
#print(y.strip())
x['traits'] = traitslist
# clean up traits underneath melee with trailing or leading whitespace
for x in i['melee']:
traitslist = []
if x['traits'] != None:
if len(x['traits']) == 0:
x['traits'] = None
else:
for y in x['traits']:
traitslist.append(y.strip())
#print(y.strip())
x['traits'] = traitslist
# clean up traits underneath ranged with trailing or leading whitespace
for x in i['ranged']:
traitslist = []
if x['traits'] != None:
if len(x['traits']) == 0:
x['traits'] = None
else:
for y in x['traits']:
traitslist.append(y.strip())
#print(y.strip())
x['traits'] = traitslist
2020-02-19 00:30:31 -05:00
# clean up senses
senseslist = []
if i['senses'] != None:
if len(i['senses']) == 0:
i['senses'] = None
else:
for x in i['senses']:
senseslist.append(x.strip())
i['senses'] = senseslist
# clean up spell DC
if i['spell_dc'] == "None":
i['spell_dc'] = None
else:
i['spell_dc'] = int(i['spell_dc'])
# clean up innate_spell levels
if 'innate_spells' not in i:
i['innate_spells'] = None
elif i['innate_spells'] == "None":
i['innate_spells'] = None
else:
for x in i['innate_spells']:
x['level'] = int(x['level'])
2020-02-19 01:11:21 -05:00
# clean up level
i['level'] = int(i['level'])
# clean up AC and HP and Perception
###print("AC:\t{}".format(i['ac']))
###print("HP:\t{}".format(i['hp']))
res = re.match('(\d+)', i['ac'].strip())
i['ac'] = int(res.group(1))
res = re.match('(\d+)', i['hp'].strip())
i['hp'] = int(res.group(1))
# clean up melee to hit
if 'melee' not in i:
i['melee'] = None
elif i['melee'] == "None":
i['melee'] = None
else:
for x in i['melee']:
if 'to_hit' not in x:
x['to_hit'] = None
else:
res = re.search('(\d+)', x['to_hit'])
#print("{}\t{}".format(counter, i['name']))
if res:
x['to_hit'] = int(res.group(1))
# clean up ranged to hit
if 'ranged' not in i:
i['ranged'] = None
elif i['ranged'] == "None":
i['ranged'] = None
else:
for x in i['ranged']:
if 'to_hit' not in x:
x['to_hit'] = None
else:
res = re.search('(\d+)', x['to_hit'])
#print("{}\t{}".format(counter, i['name']))
if res:
x['to_hit'] = int(res.group(1))
2020-02-19 01:11:21 -05:00
# clean up spell attack to hit
if 'spell_attack_to_hit' not in i:
i['spell_attack_to_hit'] = None
elif i['spell_attack_to_hit'] == "None":
i['spell_attack_to_hit'] = None
else:
res = re.search('(\d+)', i['spell_attack_to_hit'])
#print("{}\t{}".format(counter, i['name']))
2020-02-19 01:11:21 -05:00
if res:
i['spell_attack_to_hit'] = int(res.group(1))
###if i['perception'] == None or i['perception'] == "":
### print("{}\t{}".format(counter, i['name']))
### print("\t\t\t\t{}".format(i['perception']))
if type(i['perception']) is str:
#print("{}\t{}".format(counter, i['name']))
res = re.match('(\d+)', i['perception'])
#print(res.group(1))
i['perception'] = int(res.group(1))
# clean up nulls for empty lists
if i['automatic_abilities'] == []:
i['automatic_abilities'] = None
if i['ranged'] == []:
i['ranged'] = None
if i['melee'] == []:
i['melee'] = None
2020-02-19 00:30:31 -05:00
# clean up resistances
# set Nones to null
if i['resistances'] == "None":
i['resistances'] = None
# remove trailing ;
elif i['resistances'].endswith(";"):
i['resistances'] = i['resistances'][:-1]
#print("\t{}".format(i['resistances']))
if i['resistances'] != None:
#print("{}\t{}".format(counter, i['name']))
#print("\t{}".format(i['resistances']))
2020-02-20 15:23:14 -05:00
# print("{}\t{}".format(counter, i['name']))
2020-02-19 00:30:31 -05:00
res = processResistances(i['resistances'])
i['resistances'] = res
#print(res)
2020-02-18 22:48:59 -05:00
2020-02-20 15:23:14 -05:00
#if type(i['resistances']) == str:
# print("{}\t{}".format(counter, i['name']))
# print("\t\t\t\t{}".format(i['resistances']))
2020-02-20 00:39:33 -05:00
# clean up speeds
newspeed = []
if 'land' in i['speed']:
# get number
res = re.match('(\d+)', i['speed']['land'])
amt = int(res.group(1))
newspeed.append({"type": "Land", "amount": amt })
if 'fly' in i['speed']:
if 'from fly' in i['speed']['fly']:
# do the from fly version
res = re.match('(\d+)', i['speed']['fly'])
amt = int(res.group(1))
newspeed.append({"type": "Fly (from Fly)", "amount": amt })
else:
# get number
res = re.match('(\d+)', i['speed']['fly'])
amt = int(res.group(1))
newspeed.append({"type": "Fly", "amount": amt })
if 'swim' in i['speed']:
# get number
res = re.match('(\d+)', i['speed']['swim'])
amt = int(res.group(1))
newspeed.append({"type": "Swim", "amount": amt })
if 'burrow' in i['speed']:
# get number
res = re.match('(\d+)', i['speed']['burrow'])
amt = int(res.group(1))
newspeed.append({"type": "Burrow", "amount": amt })
if 'climb' in i['speed']:
# get number
res = re.match('(\d+)', i['speed']['climb'])
amt = int(res.group(1))
newspeed.append({"type": "Climb", "amount": amt })
if 'iceclimb' in i['speed']:
# get number
res = re.match('(\d+)', i['speed']['iceclimb'])
amt = int(res.group(1))
newspeed.append({"type": "Ice Climb", "amount": amt })
if 'climbstone' in i['speed']:
# get number
res = re.match('(\d+)', i['speed']['climbstone'])
amt = int(res.group(1))
newspeed.append({"type": "Climb Stone", "amount": amt })
if 'burrowsnowonly' in i['speed']:
# get number
res = re.match('(\d+)', i['speed']['burrowsnowonly'])
amt = int(res.group(1))
newspeed.append({"type": "Burrow (snow only)", "amount": amt })
if 'burrowsandonly' in i['speed']:
# get number
res = re.match('(\d+)', i['speed']['burrowsandonly'])
amt = int(res.group(1))
newspeed.append({"type": "Burrow (sand only)", "amount": amt })
if 'special' in i['speed']:
for x in i['speed']['special']:
if x == "ice stride":
newspeed.append({"type": "Ice Stride", "amount": None })
if x == "trickster's step":
newspeed.append({"type": "Trickster's Step", "amount": None })
if x == "earth glide":
newspeed.append({"type": "Earth Glide", "amount": None })
if x == "sand glide":
newspeed.append({"type": "Sand Glide", "amount": None })
if x == "glide":
newspeed.append({"type": "Glide", "amount": None })
if x == "compression":
newspeed.append({"type": "Compression", "amount": None })
if x == "suction":
newspeed.append({"type": "Suction", "amount": None })
if x == "swamp stride":
newspeed.append({"type": "Swamp Stride", "amount": None })
if x == "powerful jumper":
newspeed.append({"type": "Powerful Jumper", "amount": None })
if x == "woodland stride":
newspeed.append({"type": "Woodland Stride", "amount": None })
if x == "trackless step":
newspeed.append({"type": "Trackless Step", "amount": None })
if x == "cloud walk":
newspeed.append({"type": "Cloud Walk", "amount": None })
if x == "swiftness":
newspeed.append({"type": "Swiftness", "amount": None })
if x == "freedom of movement (constant)":
newspeed.append({"type": "Freedom of Movement (constant)", "amount": None })
if x == "spider climb (constant)":
newspeed.append({"type": "Spider Climb (constant)", "amount": None })
if x == "unstoppable burrow":
newspeed.append({"type": "Unstoppable Burrow", "amount": None })
if x == "walk in shadow":
newspeed.append({"type": "Walk in Shadow", "amount": None })
if x == "magma swim":
newspeed.append({"type": "Magma Swim", "amount": None })
if x == "Can't Move":
newspeed.append({"type": "Can't Move", "amount": None })
if x == "Air Walk (constant)":
newspeed.append({"type": "Air Walk (constant)", "amount": None })
i['speed'] = newspeed
final = yaml.safe_dump(data, allow_unicode=True)
with open("tmp-monsters.yaml", 'w') as f:
f.write(final)
2020-02-20 16:41:40 -05:00
def cleanuphtmlstuff():
with open("monsters.yaml", 'r') as f:
lines = f.readlines()
for num, _ in enumerate(lines):
lines[num] = cleanupalinks(lines[num])
lines[num] = cleanupilinks(lines[num])
lines[num] = cleanupblinks(lines[num])
lines[num] = cleanupbrlinks(lines[num])
lines[num] = cleanupulinks(lines[num])
lines[num] = cleanupspellslinks(lines[num])
lines[num] = cleanupimglinks(lines[num])
lines[num] = cleanuptlinks(lines[num])
with open("tmp-monsters-html-cleanup.yaml", 'w') as f:
f.writelines(lines)
def cleanupalinks(x):
tmp = re.sub('<a.+?>', '', x)
tmp2 = re.sub('</a>', '', tmp)
return tmp2
def cleanuptlinks(x):
tmp = re.sub('<t>', '', x)
return tmp
def cleanupimglinks(x):
res = re.search('(<img.+?>)', x)
if res != None:
print(res)
if res:
if 'Single Action' in res.group(0):
print("SingleAction")
tmp = re.sub('<img.+?>', '|1|', x)
tmp = re.sub('\|1\|\|1\|', '|1|', tmp)
elif 'Two Actions' in res.group(0):
print("TwoActions")
tmp = re.sub('<img.+?>', '|2|', x)
tmp = re.sub('\|2\|\|2\|', '|2|', tmp)
elif 'Three Actions' in res.group(0):
print("ThreeActions")
tmp = re.sub('<img.+?>', '|3|', x)
tmp = re.sub('\|3\|\|3\|', '|3|', tmp)
elif 'Free Action' in res.group(0):
print("FreeAction")
tmp = re.sub('<img.+?>', '|F|', x)
tmp = re.sub('\|F\|\|F\|', '|F|', tmp)
print(tmp)
return tmp
else:
return x
def cleanupspellslinks(x):
tmp = re.sub('<spells.+?>', '', x)
tmp2 = re.sub('</spells.+?>', '', tmp)
return tmp2
def cleanupilinks(x):
tmp = re.sub('<i>', '*', x)
tmp2 = re.sub('</i>', '*', tmp)
return tmp2
def cleanupulinks(x):
tmp = re.sub('<u>', '', x)
tmp2 = re.sub('</u>', '', tmp)
return tmp2
def cleanupblinks(x):
tmp = re.sub('<b>', '**', x)
tmp2 = re.sub('</b>', '**', tmp)
return tmp2
def cleanupbrlinks(x):
return re.sub('<br>', '\n', x)
2020-02-19 00:30:31 -05:00
def processResistances(r):
if '(' in r:
# TODO This is what needs to be done
#print("\t\tTODO: Need to process with parentheses")
2020-02-20 00:39:33 -05:00
# this tree does the simple ones without multiple items
if ',' not in r:
rr = re.split('(\d+)', r)
return [{"type": rr[0].strip() + ' ' + rr[2].strip(), "amount": int(rr[1])},]
elif ',' in r:
2020-02-20 15:23:14 -05:00
# FIGURE OUT BRANCH FOR THE COMMA IS NOT INSIDE THE PARENTHETICAL
test = re.search('(\(.+\))', r)
tstr = test.group(0)
if ',' in tstr:
# TODO here's the case for comma IS inside the parenthetical
print("\n")
print(r)
if 'haunted form' not in r:
# DO the majority of them
rr = re.split('(\d+)', r)
print(rr)
return [{"type": rr[0].strip() + ' ' + rr[2].strip(), "amount": int(rr[1])},]
elif 'haunted form' in r:
# DO the single haunted form one
##### NOTE I'm hard coding this in as it will be faster than writing regex
result = [{"type": "all (except force, ghost touch, or positive; double resistance against non-magical)", "amount": 8},
{"type": "haunted form", "amount": 12}]
print(result)
return result
else:
# TODO Here's the case for comma is NOT inside the parenthetical
splits = re.split(',', r)
mysplitlist = []
for x in splits:
rr = re.split('(\d+)', x)
mysplitlist.append({"type": rr[0].strip() + ' ' + rr[2].strip(), "amount": int(rr[1])})
return mysplitlist
2020-02-20 00:39:33 -05:00
else:
2020-02-20 15:23:14 -05:00
print("else has been run")
2020-02-20 00:39:33 -05:00
return None
2020-02-19 00:30:31 -05:00
else:
#print("\t\tNo parentheses")
# split on commas
res = r.split(',')
#print(r.split(','))
#print(res)
results_list = []
for i in res:
#print(i)
#tmp = re.search('(\d+)', i)
#print(tmp.groups())
tmp = re.split('(\d+)', i)
#print(tmp)
tmpres = {"type": tmp[0].strip(), "amount": int(tmp[1])}
results_list.append(tmpres)
return results_list
return "Something went wrong"
if __name__ == "__main__":
main()