Text Classification Model with Feature Importance Analysis

  • Share this:

Code introduction


This code implements a text classification model using CountVectorizer for text vectorization, MultinomialNB for classification, and PermutationImportance from Eli5 to display feature importance.


Technology Stack : CountVectorizer, train_test_split, MultinomialNB, PermutationImportance, eli5

Code Type : The type of code

Code Difficulty : Intermediate


                
                    
import random
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
import eli5
from eli5.sklearn import PermutationImportance

def random_text_classification_model(texts, labels):
    # Split the dataset into training and testing sets
    texts_train, texts_test, labels_train, labels_test = train_test_split(texts, labels, test_size=0.2, random_state=42)
    
    # Initialize the CountVectorizer
    vectorizer = CountVectorizer()
    
    # Fit and transform the training data
    X_train = vectorizer.fit_transform(texts_train)
    
    # Initialize the Multinomial Naive Bayes classifier
    clf = MultinomialNB()
    
    # Train the classifier
    clf.fit(X_train, labels_train)
    
    # Make predictions on the test data
    predictions = clf.predict(vectorizer.transform(texts_test))
    
    # Calculate Permutation Importance
    perm_importance = PermutationImportance(clf, random_state=42).fit(X_train, labels_train)
    
    # Display the feature importances
    feature_importances = eli5.explain_weights(clf, vectorizer.get_feature_names_out(), perm_importance)

    return feature_importances

# Example usage
texts = ["This is a great product", "I love this item", "Bad quality", "Not what I expected"]
labels = [1, 1, 0, 0]

feature_importances = random_text_classification_model(texts, labels)
print(feature_importances)