Weekly Challenges

binu_acs · ‎06-01-2022

solution attached

JamesCharnley · ‎06-04-2022

Spoiler

Wish I had the intelligence suite to try out the text analytics but I'll settle for the fun parse!

davidhardister · ‎07-08-2022

CHarrison · ‎07-21-2022

My solution :)

James_Driver9 · ‎07-22-2022

Spoiler

ARussell34 · ‎07-27-2022

Yay to comments and charting!

mgaronson · ‎08-17-2022

Spoiler

Chose poorly in parsing out subject instead of topic view href. Switched it and all fell into place.

Regex is useful in parsing data. Learning more about regex challenge-by-challenge.

acarter881 · ‎09-02-2022

Here's my solution. I'm uploading the Alteryx workflow, but the parsing happens in Python.

Spoiler

import pandas as pd
import warnings
from bs4 import BeautifulSoup
from tqdm import tqdm

warnings.filterwarnings("ignore", category=UserWarning, module='bs4')

class Challenges:
    def __init__(self, xl_path: str) -> None:
        self.features = 'html.parser'
        self.xl_path = xl_path
        self.body_parsed = list()

    def parse_xl(self) -> pd.DataFrame:
        self.df = pd.read_excel(io=self.xl_path)

        for i in tqdm(range(len(self.df['body']))):
            try:
                soup = BeautifulSoup(markup=self.df['body'][i], features=self.features)
                self.body_parsed.append(soup.text)
            except TypeError:
                self.body_parsed.append('')

        self.df['body_parsed'] = self.body_parsed
        self.df = self.df.drop(columns=['topic view href', 'body'])
 
        return self.df

    def to_pandas(self) -> None:
        self.df = self.df.applymap(lambda x: x.encode('unicode_escape').decode('utf-8') if isinstance(x, str) else x)

        df = pd.DataFrame(data=self.df, index=None)

        df.to_excel(
            excel_writer='my_test_227.xlsx', 
            sheet_name='227', 
            freeze_panes=(1,0),
            index=False
            )

# Instantiate class and call functions
if __name__ == '__main__':
    c = Challenges(xl_path='./challenge_227.xlsx')
    c.parse_xl()
    c.to_pandas()

import pandas as pd import warnings from bs4 import BeautifulSoup from tqdm import tqdm warnings.filterwarnings("ignore", category=UserWarning, module='bs4') class Challenges: def __init__(self, xl_path: str) -> None: self.features = 'html.parser' self.xl_path = xl_path self.body_parsed = list() def parse_xl(self) -> pd.DataFrame: self.df = pd.read_excel(io=self.xl_path) for i in tqdm(range(len(self.df['body']))): try: soup = BeautifulSoup(markup=self.df['body'][i], features=self.features) self.body_parsed.append(soup.text) except TypeError: self.body_parsed.append('') self.df['body_parsed'] = self.body_parsed self.df = self.df.drop(columns=['topic view href', 'body']) return self.df def to_pandas(self) -> None: self.df = self.df.applymap(lambda x: x.encode('unicode_escape').decode('utf-8') if isinstance(x, str) else x) df = pd.DataFrame(data=self.df, index=None) df.to_excel( excel_writer='my_test_227.xlsx', sheet_name='227', freeze_panes=(1,0), index=False ) # Instantiate class and call functions if __name__ == '__main__': c = Challenges(xl_path='./challenge_227.xlsx') c.parse_xl() c.to_pandas()

ahsanaali · ‎09-16-2022

See solution attached.

Brian32 · ‎09-22-2022

Challenge Solution:

Spoiler

Weekly Challenges

IDEAS WANTED

Challenge #227: /comments/

Weekly Challenges

IDEAS WANTED

Challenge #227: /*comments*/

Challenge #227: /comments/