# %% [markdown] # # Alteryx Weekly Challenge 409 # The below python code is exported Jupyter notebook from Databricks. Pre-condition to work is storing the CSV files of ALteryx inputs in External Storage linked to Databricks # %% from databricks_sdk_v2.databricks_ws import * # for previous SDK version use: from databricks_sdk.databricks_ws import * db_ws = DatabricksWS(dbutils, spark, display) db_ws = DatabricksWS(dbutils, spark, display, storage_zone = "TRUSTED_ZONE", storage_protocol = "abfss") # %% db_ws.list_files() # %% # Load all inputs as separate DataFrames (all are Alteryx inputs converted to CSV files) die1 = spark.read.csv('abfss://68ce2fe2-2134-4ca1-8dfc-aec04d64078f@pzigxpwesrbluzp001.dfs.core.windows.net/Die_1.csv', header=True) die2 = spark.read.csv('abfss://68ce2fe2-2134-4ca1-8dfc-aec04d64078f@pzigxpwesrbluzp001.dfs.core.windows.net/Die_2.csv', header=True) die3 = spark.read.csv('abfss://68ce2fe2-2134-4ca1-8dfc-aec04d64078f@pzigxpwesrbluzp001.dfs.core.windows.net/Die_3.csv', header=True) wr_mapping = spark.read.csv('abfss://68ce2fe2-2134-4ca1-8dfc-aec04d64078f@pzigxpwesrbluzp001.dfs.core.windows.net/WR mapping and multiplier.csv', header=True) player_rating = spark.read.csv('abfss://68ce2fe2-2134-4ca1-8dfc-aec04d64078f@pzigxpwesrbluzp001.dfs.core.windows.net/player_rating.csv', header=True) # %% import pyspark.sql.functions as F import random # Combine 3 dies to create DataFrame with all possible die rolls combinations d1_d2_combinations = die1.crossJoin(die2) # First two dies die_roll_combinations = d1_d2_combinations.crossJoin(die3) # Add 3rd die die_roll_combinations_id = die_roll_combinations.withColumn('Index',F.monotonically_increasing_id()) # Add index as random line filter helper # Filter to only one random combination of 3 dies roll random_die_roll = die_roll_combinations_id.where(F.col('Index') == random.randint(0,215)) # Range of Index column # Add up 3 dies roll to get roll result random_die_roll_result = random_die_roll.withColumn('Roll',F.col('Die1')+F.col('Die2')+F.col('Die3')) display(random_die_roll_result) # Display result # %% # Obtain WR mapping based on random roll random_wr_mapping = wr_mapping.join(random_die_roll_result, on='Roll') # Join on Roll result display(random_wr_mapping) # Display result # %% # Get first 2 teams teams = player_rating.select('Team').distinct().sort('Team').limit(2) display(teams) # Display result # %% # Split 2 team names into 2 separate DataFrames for further joins of players team_1 = teams.limit(1) team_2 = spark.createDataFrame(teams.tail(1)) # Display results display(team_1) display(team_2) # %% # Get best WR from team_1 team_1_wr = (player_rating.where(F.col('Position') == 'WR') .join(team_1, on='Team') .sort('Overall', ascending=False) .limit(1) .withColumn('MatchupPosition',(F.col('Catching')+F.col('Catch In Traffic')+F.col('Spec Catch'))/3)) display(team_1_wr) # %% # Get best QB from team_1 team_1_qb = (player_rating.where(F.col('Position') == 'QB') .join(team_1, on='Team') .sort('Overall', ascending=False) .limit(1) .withColumn('MatchupPosition',(F.col('Throw Power')+F.col('Throw Acc Mid')+F.col('Throw Under Pressure')+F.col('Throw On The Run'))/4)) display(team_1_qb) # %% # Get best CB from team_2 team_2_cb = (player_rating.where(F.col('Position') == 'CB') .join(team_2, on='Team') .sort('Overall', ascending=False) .limit(1) .withColumn('MatchupPosition',(F.col('Pursuit')+F.col('Man Coverage')+F.col('Zone Coverage')+F.col('Press')+F.col('Play Recognition'))/5)) display(team_2_cb) # %% # Combine all 3 players together qb_wr = team_1_qb.union(team_1_wr) qb_wr_cb = qb_wr.union(team_2_cb) display(qb_wr_cb) # %% # Add random_wr_mapping to 3 players to allow modifier calculation qb_wr_cb_mapping = qb_wr_cb.crossJoin(random_wr_mapping) display(qb_wr_cb_mapping) # %% # Calculate Modifier column from pyspark.sql import Window w = Window.orderBy('Team') # Crate Window to allow windown function qb_wr_cb_mapping_lag = qb_wr_cb_mapping.withColumn('MatchupPositionLag',F.lag('MatchupPosition', 1, 0).over(w)) # Use LAG window function qb_wr_cb_mapping_modifier = qb_wr_cb_mapping_lag.withColumn('Modifier',F.col('MatchupPosition')-F.col('MatchupPositionLag')) # Use LAG MatchupPosition to calcualte Modifier display(qb_wr_cb_mapping_modifier) # Display results # %% # Calculate Score and final result qb_wr_cb_mapping_score = (qb_wr_cb_mapping_modifier .withColumn('Score',F.col('Modifier')*F.col('Multiplier'))) # Window function used for Result calculation ws = Window.orderBy('Team') qb_wr_cb_mapping_score_lag = qb_wr_cb_mapping_score.withColumn('Score_LAG',F.lag('Score', 1, 0).over(ws)) qb_wr_cb_mapping_score_result = qb_wr_cb_mapping_score_lag.select('*',F.when(F.col('Score') > F.col('Score_LAG'),F.col('Outcome1')).otherwise(F.col('Outcome2')).alias('Result')) # Result of CB is crossJoined to the DataFrame to create 'Final_Result' columns final_result = spark.createDataFrame(qb_wr_cb_mapping_score_result .select('Result') .withColumnRenamed('Result','Result_Final') .tail(1)) qb_wr_cb_mapping_score_result_final = qb_wr_cb_mapping_score_result.crossJoin(final_result) # Display final result (column named 'Final_Result') display(qb_wr_cb_mapping_score_result_final)