You can download this code by clicking the button below.
This code is now available for download.
This function extracts meaningful words from the given text by filtering out common stopwords to improve the quality of the text.
Technology Stack : collections.Counter, string.punctuation, nltk.corpus.stopwords
Code Type : Function
Code Difficulty : Intermediate
def get_meaningful_words(text, language='en'):
from collections import Counter
from string import punctuation
# Remove punctuation from the text
text = text.translate(str.maketrans('', '', punctuation))
# Split the text into words
words = text.split()
# Count the frequency of each word
word_counts = Counter(words)
# Filter out common words (stop words)
if language == 'en':
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))
else:
stop_words = set()
# Get words with meaning
meaningful_words = [word for word in word_counts if word not in stop_words]
return meaningful_words