# %% import pandas as pd # %% # Define excel sheet index and initial year to be used years = [0,1,2,3,4] initial_year = 2023 # %% # Define main dataframe years_df = pd.DataFrame() # %% # Load all individual sheets in for year in years: # First sheet is year 2022 and it keeps decreasing with increased sheet index initial_year -= 1 # Header starts on the second line individual_year = pd.read_excel(r"C:\Users\rpospisil002\Downloads\Challenge_412_start_file\Fantasty Football Stats.xlsx", sheet_name=year,header=1) # Each newly loaded sheet populates new column with year reference individual_year['Year'] = initial_year # All sheets are combined together years_df = pd.concat([individual_year,years_df]) # %% # Check the resulted number of rows display(years_df.shape) # %% # Aggregate and sort by average rank years_df_agg = years_df.groupby('POS').aggregate({'RK':'mean'}).reset_index() years_df_agg = years_df_agg.sort_values('RK') # Limit to TOP 5 years_df_agg_top5 = years_df_agg.iloc[:5] # Present result 1 display(years_df_agg_top5) # %% # Limit main dataframe to TOP5 positions only years_df_top5 = years_df.merge(years_df_agg_top5, on='POS') display(years_df_top5) # %% # Create pivot table (partial result 2) years_df_top5_pivot = years_df_top5.pivot_table(values='RK_x',aggfunc='mean',columns='Year',index='POS').reset_index() display(years_df_top5_pivot) # %% # Aggregate (by Year and POS) and sort by average rank years_df_agg_years = years_df.groupby(['Year','POS']).aggregate({'RK':'mean'}).reset_index() years_df_agg_years = years_df_agg_years.sort_values('RK') display(years_df_agg_years) # %% import matplotlib.pyplot as plt # %% # Create pivot table (data for partial result 2) years_df_top5_pivot_2 = years_df_top5.pivot_table(values='RK_x',aggfunc='mean',columns='POS',index='Year').reset_index() display(years_df_top5_pivot_2) # %% # Plot a line chart (part of result 2) import numpy as np years_df_top5_pivot_2.plot(x='Year',y=['ILB','LB','QB','RB','WR'],xlabel='Year',ylabel='Avg_Rank',xticks=np.arange(2018,2023, step=1))