Want to get involved? We're always looking for ideas and content for Weekly Challenges.
SUBMIT YOUR IDEAFirst time really using the XML tool. Stumbled on to a solution and it worked!
Got there in the end with a little assistance
Did it without the xml parse because I remembered about it too late!
I had some trouble with the XML tool in Alteryx. It kept saying columns weren't there when they were, so I decided to do this challenge in Python. Took me a while to figure out that there are occasional NULL values for certain ship-to tags (i.e., phone and email). By the way, I put each customer into their own XML file. There are 10 XML files that are read by the script.
import xml.etree.ElementTree as ET
import pandas as pd
import os
class XML:
def __init__(self, dir: str, data: list, columns: list, output_path: str) -> None:
self.dir = dir
self.data = data
self.columns = columns
self.output_path = output_path
def parse_xml(self):
for file in os.listdir(self.dir):
if file.endswith('.xml'):
mini_list = list()
tree = ET.parse(os.path.join(self.dir, file))
root = tree.getroot()
for child in root:
for item in child:
for thing in item:
if file == 'test1.xml':
name = child.tag + '_' + thing.tag
self.columns.append(name)
value = thing.text.upper()
mini_list.append(value)
self.data.append(mini_list)
def check_ship_to(self):
for item in self.data:
if len(item) == 18:
item.insert(12, 'NULL')
item.insert(13, 'NULL')
def to_pandas(self):
df = pd.DataFrame(data=self.data, columns=self.columns)
df.to_excel(excel_writer=self.output_path, index=False, freeze_panes=(1,0))
if __name__ == '__main__':
path_to_xml_files = input('Please input the path to the folder containing the XML files:\n')
output_path = input('Please input the output filepath:\n')
c = XML(dir=path_to_xml_files, data=list(), columns=list(), output_path=output_path)
c.parse_xml()
c.check_ship_to()
c.to_pandas()
Output: