You can download this code by clicking the button below.
This code is now available for download.
This code implements a text classification model using CountVectorizer for text vectorization, MultinomialNB for classification, and PermutationImportance from Eli5 to display feature importance.
Technology Stack : CountVectorizer, train_test_split, MultinomialNB, PermutationImportance, eli5
Code Type : The type of code
Code Difficulty : Intermediate
import random
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
import eli5
from eli5.sklearn import PermutationImportance
def random_text_classification_model(texts, labels):
# Split the dataset into training and testing sets
texts_train, texts_test, labels_train, labels_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
# Initialize the CountVectorizer
vectorizer = CountVectorizer()
# Fit and transform the training data
X_train = vectorizer.fit_transform(texts_train)
# Initialize the Multinomial Naive Bayes classifier
clf = MultinomialNB()
# Train the classifier
clf.fit(X_train, labels_train)
# Make predictions on the test data
predictions = clf.predict(vectorizer.transform(texts_test))
# Calculate Permutation Importance
perm_importance = PermutationImportance(clf, random_state=42).fit(X_train, labels_train)
# Display the feature importances
feature_importances = eli5.explain_weights(clf, vectorizer.get_feature_names_out(), perm_importance)
return feature_importances
# Example usage
texts = ["This is a great product", "I love this item", "Bad quality", "Not what I expected"]
labels = [1, 1, 0, 0]
feature_importances = random_text_classification_model(texts, labels)
print(feature_importances)