CSV Data Analysis: Min, Max, Mean, Median, and Standard Deviation Calculation

  • Share this:

Code introduction


This function takes a path to a CSV file, reads the data from the file, and calculates the minimum, maximum, mean, median, and standard deviation for each column.


Technology Stack : csv, math, statistics, os, re, random, statistics, sys, time

Code Type : Function

Code Difficulty : Intermediate


                
                    
import csv
import math
import os
import re
import random
import statistics
import sys
import time

def analyze_csv_file(file_path):
    """
    分析CSV文件中的数据,并返回统计数据。

    Args:
        file_path (str): CSV文件的路径。

    Returns:
        dict: 包含统计数据的字典。
    """
    with open(file_path, mode='r') as file:
        reader = csv.reader(file)
        data = list(reader)

    # 获取列数
    num_columns = len(data[0])

    # 初始化统计数据字典
    stats = {
        "min_values": [],
        "max_values": [],
        "mean_values": [],
        "median_values": [],
        "std_dev_values": []
    }

    # 遍历每一列
    for col_index in range(num_columns):
        # 获取列数据
        column_data = [float(row[col_index]) for row in data if row[col_index].isdigit()]

        # 计算统计数据
        stats["min_values"].append(min(column_data))
        stats["max_values"].append(max(column_data))
        stats["mean_values"].append(statistics.mean(column_data))
        stats["median_values"].append(statistics.median(column_data))
        stats["std_dev_values"].append(statistics.stdev(column_data))

    return stats