# %% [markdown] # ## Alteryx Weekly Challenge 410 # This is Jupyter notebook exported as Python script. Original Alteryx input was imported as CSV file and used within this script. # %% # Import pandas library used for data transformation import pandas as pd # %% # Load Alteryx input as CSV file df = pd.read_csv(r"C:\Users\rpospisil002\Downloads\awc410\awc410.csv") display(df.head(5)) # %% # Create column 'Product_ID' f1_a = df['Field1'].dropna().reset_index() f1 = f1_a[f1_a['Field1'] != 'Product ID'] f1 = f1.rename(columns={'Field1':'Product_ID'}) f1['id1'] = range(len(f1)) f1 = f1[['id1','Product_ID']] display(f1.head(5)) # %% # Create column 'Launch_Date' f2_a = df['Field2'].dropna().reset_index() f2 = f2_a[f2_a['Field2'] != 'Launch Date'] f2 = f2.rename(columns={'Field2':'Launch_Date'}) f2['id2'] = range(len(f2)) f2 = f2[['id2','Launch_Date']] display(f2.head(5)) # %% # Create column 'Units_Produced' f3_a = df['Field4'].dropna().reset_index() f3_b = f3_a.reset_index() f3_b = f3_b.rename(columns={'level_0':'id_for_modulo'}) f3_c = f3_b[(f3_b['Field4'] != 'Units Produced') & (f3_b['id_for_modulo'] % 2 == 0)] f3_c = f3_c.rename(columns={'Field4':'Units_Produced'}) f3_d = pd.DataFrame(f3_c['Units_Produced']).reset_index(drop=True) f3_d['id3'] = range(len(f3_d)) f3 = f3_d[['id3','Units_Produced']] display(f3.head(5)) # %% # Create column 'Spoilage' f4_a = df['Field4'].dropna().reset_index() f4_b = f4_a.reset_index() f4_b = f4_b.rename(columns={'level_0':'id_for_modulo'}) f4_c = f4_b[(f4_b['Field4'] != 'Spoilage') & (f4_b['id_for_modulo'] % 2 != 0)] f4_c = f4_c.rename(columns={'Field4':'Spoilage'}) f4_d = pd.DataFrame(f4_c['Spoilage']).reset_index(drop=True) f4_d['id4'] = range(len(f4_d)) f4 = f4_d[['id4','Spoilage']] display(f4.head(5)) # %% # Create column 'Units_Sold' f5_a = df['Field5'].dropna().reset_index() f5_b = f5_a.reset_index() f5_b = f5_b.rename(columns={'level_0':'id_for_modulo'}) f5_c = f5_b[(f5_b['Field5'] != 'Units Sold') & (f5_b['id_for_modulo'] % 2 == 0)] f5_c = f5_c.rename(columns={'Field5':'Units_Sold'}) f5_d = pd.DataFrame(f5_c['Units_Sold']).reset_index(drop=True) f5_d['id5'] = range(len(f5_d)) f5 = f5_d[['id5','Units_Sold']] display(f5.head(5)) # %% # Create column 'Sales_USD' f6_a = df['Field5'].dropna().reset_index() f6_b = f6_a.reset_index() f6_b = f6_b.rename(columns={'level_0':'id_for_modulo'}) f6_c = f6_b[(f6_b['Field5'] != 'Sales (USD)') & (f6_b['id_for_modulo'] % 2 != 0)] f6_c = f6_c.rename(columns={'Field5':'Sales_USD'}) f6_d = pd.DataFrame(f6_c['Sales_USD']).reset_index(drop=True) f6_d['id6'] = range(len(f6_d)) f6 = f6_d[['id6','Sales_USD']] display(f6.head(5)) # %% # Combine all columns together and create df_transformed df_f1_f2 = f1.merge(f2,left_on='id1',right_on='id2') df_f1_f2_f3 = df_f1_f2.merge(f3,left_on='id1',right_on='id3') df_f1_f2_f3_f4 = df_f1_f2_f3.merge(f4,left_on='id1',right_on='id4') df_f1_f2_f3_f4_f5 = df_f1_f2_f3_f4.merge(f5,left_on='id1',right_on='id5') df_f1_f2_f3_f4_f5_f6 = df_f1_f2_f3_f4_f5.merge(f6,left_on='id1',right_on='id6') df_transformed = df_f1_f2_f3_f4_f5_f6[['Product_ID','Launch_Date','Units_Produced','Spoilage','Units_Sold','Sales_USD']] df_transformed.loc[:,'Spoilage'] = df_transformed['Spoilage'].astype('Int64') df_transformed.loc[:,'Units_Produced'] = df_transformed['Units_Produced'].astype('Int64') df_transformed.loc[:,'Sales_USD'] = df_transformed['Sales_USD'].astype('Float64') df_transformed.loc[:,'Sale_per_unit'] = df_transformed.loc[:,'Sales_USD'] / df_transformed.loc[:,'Units_Produced'] display(df_transformed.head(5)) # %% # Result 1 - TOP5 Spoilage result_1 = df_transformed.sort_values('Spoilage',ascending=False) display(result_1.head(5)) # %% # Result 2 - bottom 5 Sale_per_unit result_2 = df_transformed.sort_values('Sale_per_unit') display(result_2.head(5))