Luigi Task for Generating Random DataFrame and Saving as CSV

  • Share this:

Code introduction


This code defines a Luigi task to generate a random DataFrame and save it as a CSV file.


Technology Stack : Luigi, Pandas, NumPy

Code Type : The type of code

Code Difficulty : Advanced


                
                    
import random
import luigi
import pandas as pd
import numpy as np

def random_data_processing():
    # Define a simple function to generate a random DataFrame with some operations
    def generate_random_dataframe(rows, cols):
        data = np.random.rand(rows, cols)
        df = pd.DataFrame(data)
        df.columns = [f'Column_{i}' for i in range(cols)]
        return df

    # Define a Luigi task to process the random data
    class RandomDataProcessingTask(luigi.Task):
        def output(self):
            return luigi.LocalTarget('random_data.csv')

        def run(self):
            # Generate a random DataFrame
            df = generate_random_dataframe(100, 5)
            # Save the DataFrame to a CSV file
            df.to_csv(self.output().path, index=False)

    # Instantiate the task and run it
    task = RandomDataProcessingTask()
    luigi.run([task])