Want to get involved? We're always looking for ideas and content for Weekly Challenges.
SUBMIT YOUR IDEAHere is a python based alternative that leverages some of the built in functions from the NLTK.
#################################
# List all non-standard packages to be imported by your
# script here (only missing packages will be installed)
from ayx import Package, Alteryx
Package.installPackages(['nltk'])
#################################
import urllib
import pandas as pd
import nltk
#################################
# Load data (from url)
data = urllib.request.urlopen('https://norvig.com/big.txt').read().decode('utf-8')
# Load data (from canvas)
#canvasInput = Alteryx.read("#1")
#data = canvasInput.Field_1.str.cat(sep=' ')
# Split into words
tokens = nltk.word_tokenize(data)
# Convert tokens to lowercase
tokens = [w.lower() for w in tokens]
# Remove puncuation
words = [word for word in tokens if word.isalpha()]
# Take a quick look
print(words[:50])
# Calculate our frequency distribution
fd = nltk.FreqDist(words)
#################################
# Prepare ouput (sorted by decending frequency)
ayx_output=pd.DataFrame(list(fd.most_common()), columns = ["Word","Frequency"])
# Append total word count
ayx_output.loc[:,'Total'] = fd.N()
Alteryx.write(ayx_output,1)