You can download this code by clicking the button below.
This code is now available for download.
This function accepts a pandas DataFrame and an integer, then randomly changes the names of a specified number of columns in the DataFrame.
Technology Stack : pandas, numpy, string
Code Type : Function
Code Difficulty : Intermediate
import pandas as pd
import numpy as np
def randomize_dataframe_columns(df, num_cols_to_change):
"""
Changes the column names of a pandas DataFrame to random strings of a specified length.
:param df: The pandas DataFrame whose columns need to be randomized.
:param num_cols_to_change: The number of columns to change.
:return: A new DataFrame with randomized column names.
"""
if not isinstance(df, pd.DataFrame):
raise ValueError("Input must be a pandas DataFrame.")
if not isinstance(num_cols_to_change, int) or num_cols_to_change > df.shape[1]:
raise ValueError("Number of columns to change must be an integer less than or equal to the number of columns in the DataFrame.")
import random
import string
randomized_df = df.copy()
for _ in range(num_cols_to_change):
col_name = ''.join(random.choices(string.ascii_uppercase + string.digits, k=5))
old_col_name = randomized_df.columns[random.randint(0, len(randomized_df.columns) - 1)]
randomized_df.rename(columns={old_col_name: col_name}, inplace=True)
return randomized_df