You can download this code by clicking the button below.
This code is now available for download.
This function uses Non-negative Matrix Factorization (NMF) to extract topics from a list of texts and explains each topic using the eli5 library. It first creates a TF-IDF matrix, then applies NMF, and finally uses eli5 to explain each topic.
Technology Stack : Python, eli5, scikit-learn (TfidfVectorizer, NMF)
Code Type : Python Function
Code Difficulty : Intermediate
import random
import eli5
from eli5.formatters import table
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import NMF
def explain_nmf_topics(texts, n_topics=5):
"""
This function applies Non-negative Matrix Factorization (NMF) to a list of texts to find topics and then explains each topic using eli5.
"""
# Step 1: Create a TF-IDF matrix
tfidf_vectorizer = TfidfVectorizer()
tfidf = tfidf_vectorizer.fit_transform(texts)
# Step 2: Apply NMF to the TF-IDF matrix
nmf = NMF(n_components=n_topics, random_state=1).fit(tfidf)
# Step 3: Explain each topic using eli5
for i in range(n_topics):
# Get the feature names
feature_names = tfidf_vectorizer.get_feature_names_out()
# Get the top words for the current topic
topic_words = feature_names[nmf.components_[i].argsort()[::-1]]
# Explain the topic using eli5
explanation = eli5.explain_weights(tfidf, nmf.components_[i], top_k=10)
print(f"Topic {i+1}: {topic_words[:10]}")
print(table.format(explanation))
# Example usage
texts = [
"The quick brown fox jumps over the lazy dog",
"Never jump over the lazy dog quickly",
"A quick brown dog outpaces a quick fox",
"A lazy dog chases a quick fox",
"The dog is quick, but the fox is quicker"
]
explain_nmf_topics(texts)