Randomize DataFrame Column Names

  • Share this:

Code introduction


This function accepts a pandas DataFrame and an integer, then randomly changes the names of a specified number of columns in the DataFrame.


Technology Stack : pandas, numpy, string

Code Type : Function

Code Difficulty : Intermediate


                
                    
import pandas as pd
import numpy as np

def randomize_dataframe_columns(df, num_cols_to_change):
    """
    Changes the column names of a pandas DataFrame to random strings of a specified length.

    :param df: The pandas DataFrame whose columns need to be randomized.
    :param num_cols_to_change: The number of columns to change.
    :return: A new DataFrame with randomized column names.
    """
    if not isinstance(df, pd.DataFrame):
        raise ValueError("Input must be a pandas DataFrame.")
    
    if not isinstance(num_cols_to_change, int) or num_cols_to_change > df.shape[1]:
        raise ValueError("Number of columns to change must be an integer less than or equal to the number of columns in the DataFrame.")
    
    import random
    import string
    
    randomized_df = df.copy()
    for _ in range(num_cols_to_change):
        col_name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=5))
        old_col_name = randomized_df.columns[random.randint(0, len(randomized_df.columns) - 1)]
        randomized_df.rename(columns={old_col_name: col_name}, inplace=True)
    
    return randomized_df