I have a dataframe df, I want to do the following:
- run two stats tests on all the numeric columns (
column_1tocolumn_84) to compare if there is a statistical difference between TypesX,YandZ
The stats tests are
KruskalandDunn'stestsThe comparing group:
X vs Y,Y vs ZandX vs Z
- Export the results to excel spreadsheet ( see screenshot below)
# copy & paste
## generate dataframe "df"
import pandas as pd
import numpy as np
df = pd.DataFrame(
data=np.random.uniform(low=5.5, high=30.75, size=(60, 84)),
columns=[f'column_{i}' for i in range(1, 85)],)
df.insert(loc=0, column='Type',value=np.repeat(['X','Y','Z'], 20, axis=0),)
df
I want to run kruskal wallis test and Dunn test for each column col_1 to col_84
# copy and paste the libraries below
from scipy.stats import kruskal
pip install scikit-posthocs
import scikit_posthocs as sp
# filtering for each Type X,Y and Z
# for column_1
# Extract values for each group
group_x_values = df[df['Type'] == 'X']['column_1'].values
group_y_values = df[df['Type'] == 'Y']['column_1'].values
group_z_values = df[df['Type'] == 'Z']['column_1'].values
# 1st stats test : Kruskal wallis
h_statistic, p_value = kruskal(group_x_values, group_y_values, group_z_values)
# Print the results
print(f"H-statistic: {h_statistic}")
print(f"P-value: {p_value}")
# 2nd stats test: Dunn test
data = [df[df['Group'] == 'X']['column_1'].values,
df[df['Group'] == 'Y']['column_1'].values,
df[df['Group'] == 'Z']['column_1'].values]
p_values = sp.posthoc_dunn(data, p_adjust='bonferroni')
print(p_values)
# for column_2
group_x_values = df[df['Type'] == 'X']['column_2'].values
group_y_values = df[df['Type'] == 'Y']['column_2'].values
group_z_values = df[df['Type'] == 'Z']['column_2'].values
# 1st stats test : Kruskal wallis
h_statistic, p_value = kruskal(group_x_values, group_y_values, group_z_values)
# Print the results
print(f"H-statistic: {h_statistic}")
print(f"P-value: {p_value}")
# 2nd stats test: Dunn test
data = [df[df['Group'] == 'X']['column_2'].values,
df[df['Group'] == 'Y']['column_2'].values,
df[df['Group'] == 'Z']['column_2'].values]
p_values = sp.posthoc_dunn(data, p_adjust='bonferroni')
print(p_values)
.
.
.
#for column_84
group_x_values = df[df['Type'] == 'X']['column_84'].values
group_y_values = df[df['Type'] == 'Y']['column_84'].values
group_z_values = df[df['Type'] == 'Z']['column_84'].values
# 1st stats test : Kruskal wallis
h_statistic, p_value = kruskal(group_x_values, group_y_values, group_z_values)
# Print the results
print(f"H-statistic: {h_statistic}")
print(f"P-value: {p_value}")
# 2nd stats test: Dunn test
data = [df[df['Group'] == 'X']['column_84'].values,
df[df['Group'] == 'Y']['column_84'].values,
df[df['Group'] == 'Z']['column_84'].values]
p_values = sp.posthoc_dunn(data, p_adjust='bonferroni')
print(p_values)
I want to export both results to excel worksheet, something like this:
Kruskal Worksheet
Dunn Worksheet



