Exploratory Data Analysis with Pandas
Comprehensive EDA workflow with data loading, exploration, missing value analysis, and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Load data
df = pd.read_csv('data.csv', parse_dates=['date'])
# Basic exploration
print(df.info())
print(df.describe())
print(df.head())
# Missing values analysis
missing_pct = df.isna().mean() * 100
print(f"Missing values:\n{missing_pct[missing_pct > 0]}")
# Data type optimization
df['category'] = df['category'].astype('category')
df['date'] = pd.to_datetime(df['date'])
# Statistical summary by group
df.groupby('category').agg({
'value': ['mean', 'std', 'min', 'max'],
'count': 'sum'
})
# Correlation analysis
numeric_cols = df.select_dtypes(include=[np.number]).columns
correlation_matrix = df[numeric_cols].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()