You can download this code by clicking the button below.
This code is now available for download.
This code defines a function that takes a text input and performs a random sentence and word analysis using the NLTK library, including the number of words in the sentence after removing stopwords and the number of unique words.
Technology Stack : The code uses the NLTK library for text processing, including sentence tokenization, word tokenization, stopword removal, and lemmatization.
Code Type : The type of code
Code Difficulty : Intermediate
import random
from nltk import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
def random_sentence_analysis(text):
"""
This function takes a text and performs random sentence and word analysis using NLTK.
"""
# Tokenize the text into sentences
sentences = sent_tokenize(text)
# Select a random sentence
random_sentence = random.choice(sentences)
# Tokenize the random sentence into words
words = word_tokenize(random_sentence)
# Remove stopwords
stop_words = set(stopwords.words('english'))
filtered_words = [word for word in words if word.lower() not in stop_words]
# Lemmatize the words
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]
# Return the analysis
return {
'sentence': random_sentence,
'word_count': len(lemmatized_words),
'unique_words': len(set(lemmatized_words))
}
# Example usage
text_example = "NLTK is a leading platform for building Python programs to work with human language data."
analysis = random_sentence_analysis(text_example)
print(analysis)