I'm relatively new to python and for an assignment I had to write a program that fetches a webpage with BeautifulSoup, extract all Paragraphs from it, and extract all words ending with "ing", and in the end save it to a file with the format "Word" + tab + "wordcount" + "newline".
This is my code so far. Is there a more pythonic way to handle this? Or generally ways to improve the code?
from bs4 import BeautifulSoup import requests import re def main(): site = "https://en.wikipedia.org/wiki/Data_science" r = requests.get(site).content soup = BeautifulSoup(r) ps = soup.findAll('p') fulltext = '' for p in ps: fulltext += p.get_text() words = match_words(fulltext) formated_words = sort_and_format(words) with open(r"Q1_Part1.txt","w") as file: file.write(formated_words) def match_words(string): pattern = re.compile(r'\b(\w*ing)\b') words = re.findall(pattern, string.lower()) matching_words = {} for word in words: if word in matching_words: matching_words[word] += 1 else: matching_words[word] = 1 return matching_words def sort_and_format(dict): ordered_keys = sorted(dict, key=dict.get, reverse=True) output_string = '' for r in ordered_keys: output_string += f"{r}\t{dict[r]}\n" return output_string main()