# %% import pandas as pd # %% # Construct Alteryx input records = [ "29-01-2020,dd.mm.yyyy", "03/26/1920,mm/dd/yyyy", "11/16/2008,mm-dd-yyyy", "15/12/2005,dd/mm/yyyy", "1996-15-03,yyyy-dd-mm", ] input = pd.DataFrame({"Date_field":records}) display(input) # %% # Create index column import numpy as np input['Row_number'] = np.arange(5) # %% # Split the fields into separate columns with information locate_separator = input['Date_field'].str.find(",").max() input['Date_unformatted'] = input['Date_field'].apply(lambda x: x[:locate_separator]) input['Date_format'] = input['Date_field'].apply(lambda x: x[locate_separator+1:]) display(input) # %% # Crate function which loops over every unformatted date and formats it # Function only accommodates the dates variations in input def parseDate(date_string,date_format): if date_format[1] == 'd': day = date_string[:2] month = date_string[3:5] year = date_string[6:] return year+"-"+month+"-"+day elif date_format[1] == 'm': month = date_string[:2] day = date_string[3:5] year = date_string[6:] return year+"-"+month+"-"+day else: year = date_string[:4] day = date_string[5:7] month = date_string[8:] return year+"-"+month+"-"+day # Create lists to store column values date_unformatted = [] date_format = [] date_formatted = [] # Store column values in lists for line in input['Date_unformatted'].to_numpy(): date_unformatted.append(line) for line in input['Date_format'].to_numpy(): date_format.append(line) print(date_unformatted) print(date_format) # Reformat the date strings for i in range(5): date_formatted.append(parseDate(date_unformatted[i],date_format[i])) print(date_formatted) # Create output DF with formatted date strings output = pd.DataFrame({'Date_formatted':date_formatted}) output['Row_number'] = np.arange(5) display(output) # %% # Add the output DF data to input DF and present the final result result = input.merge(output, on='Row_number') result['Date_formatted'] = result['Date_formatted'].astype('datetime64[ns]') display(result.sort_values('Date_formatted'))