Meaningful Word Extraction from Text

  • Share this:

Code introduction


This function extracts meaningful words from the given text by filtering out common stopwords to improve the quality of the text.


Technology Stack : collections.Counter, string.punctuation, nltk.corpus.stopwords

Code Type : Function

Code Difficulty : Intermediate


                
                    
def get_meaningful_words(text, language='en'):
    from collections import Counter
    from string import punctuation
    
    # Remove punctuation from the text
    text = text.translate(str.maketrans('', '', punctuation))
    
    # Split the text into words
    words = text.split()
    
    # Count the frequency of each word
    word_counts = Counter(words)
    
    # Filter out common words (stop words)
    if language == 'en':
        from nltk.corpus import stopwords
        stop_words = set(stopwords.words('english'))
    else:
        stop_words = set()
    
    # Get words with meaning
    meaningful_words = [word for word in word_counts if word not in stop_words]
    
    return meaningful_words