Game Files/Scripts/parse-wiki-data.py

From The Alchemist Code Wiki
Jump to: navigation, search

Parses data from a generated wiki page into a file. Used in get-wikified-data.py to retain data between edits.

Usage[edit | edit source]

parse-wiki-data.py PAGE_FILE JSON_OUTPUT

  • PAGE_FILE: The source of the wiki page to parse the data from
  • JSON_OUTPUT: The JSON file that will be generated

Code[edit | edit source]

#!/usr/bin/python
import sys
import os
import re
import json

if len(sys.argv) != 3:
    sys.exit("Usage: parse-wiki-data.py PAGE_FILE JSON_OUTPUT");
    
page_file = sys.argv[1]
json_output = sys.argv[2]

# Parses an attribute from a section into the data object
def parse_attr(section, data):
    attr_data = section.split('\n')
    attr = { 'desc': attr_data[1][1:], 'notes': attr_data[2][1:] }
    data[current_type]['attrs'][attr_data[0][1:]] = attr

# Tokens used for parsing lines
type_pattern = re.compile('== ([^=]+) ==')
attrlist_start_token = '{|'
attrlist_end_token = '|}'
attr_start_token = '|-'

data = { } # The final data object that will get written
section = '' # Data in between tokens
current_type = '' # Current type of objects being parsed
parsing_attr = False # True when an attribute is being parsed

# Open the page file and start parsing
for line in open(page_file,'r'):

    # Start of a new object type
    typematch = type_pattern.match(line)
    if typematch:
        if current_type == '':
            data['filedesc'] = section.strip()
        section = ''
        current_type = typematch.group(1)
        data[current_type] = { 'desc': '', 'attrs': {} }
        continue

    # Start of the current type's attributes
    if line.startswith(attrlist_start_token):
        data[current_type]['desc'] = section.strip()        
        section = ''
        parsing_attr = False
        continue

    # End of current type's attributes
    if line.startswith(attrlist_end_token):
        if parsing_attr:
            parse_attr(section, data)
        section = ''
        continue

    # Start of a new attribute
    if line.startswith(attr_start_token):
        if parsing_attr:
            parse_attr(section, data)
        section = ''
        parsing_attr = True
        continue

    # If no tokens were found, then append the line to the section
    section += line

# Write the file
json.dump(data, open(json_output, 'w'))