From 70f0b7cb3be2714201c00f319bc6c18b2ca8d70c Mon Sep 17 00:00:00 2001 From: James Miller Date: Mon, 17 Feb 2020 21:45:02 -0600 Subject: [PATCH] wrote python script to decode html entities in scraped text --- scripts/utilities/monsters-html-escaper.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 scripts/utilities/monsters-html-escaper.py diff --git a/scripts/utilities/monsters-html-escaper.py b/scripts/utilities/monsters-html-escaper.py new file mode 100644 index 0000000..5e974e9 --- /dev/null +++ b/scripts/utilities/monsters-html-escaper.py @@ -0,0 +1,13 @@ +import html + +def main(): + + # read in the file + with open("../data/yaml/monsters.yaml", 'r') as content_file: + content = content_file.read() + # print(html.unescape(content)) + with open("../data/yaml/monsters-fixed.yaml", 'w') as outfile: + outfile.write(html.unescape(content)) + +if __name__ == "__main__": + main()