Article Categories
- All Categories
-
Data Structure
-
Networking
-
RDBMS
-
Operating System
-
Java
-
MS Excel
-
iOS
-
HTML
-
CSS
-
Android
-
Python
-
C Programming
-
C++
-
C#
-
MongoDB
-
MySQL
-
Javascript
-
PHP
-
Economics & Finance
Indicating the statistically significant difference in bar graph (Matplotlib)
To indicate statistically significant differences in bar graphs using Matplotlib, we need to add statistical annotations that show which groups differ significantly from each other. This involves creating error bars and adding significance indicators like asterisks or brackets.
Basic Bar Plot with Error Bars
First, let's create a bar plot with error bars to show the variability in our data ?
import numpy as np
import matplotlib.pyplot as plt
# Set figure parameters
plt.rcParams["figure.figsize"] = [8, 5]
plt.rcParams["figure.autolayout"] = True
# Sample data
means = [5, 15, 30, 40]
std = [2, 3, 4, 5]
index = np.arange(len(means))
width = 0.7
labels = ['Group A', 'Group B', 'Group C', 'Group D']
# Create the plot
fig, ax = plt.subplots()
# Create bar plot with error bars
bars = ax.bar(index, means, width=width, color="lightblue",
linewidth=2, alpha=0.7, capsize=5)
ax.errorbar(index, means, yerr=std, fmt='none', color='black', capsize=5)
# Customize the plot
ax.set_ylim(0, 50)
ax.set_xticks(index)
ax.set_xticklabels(labels)
ax.set_ylabel('Values')
ax.set_title('Bar Graph with Error Bars')
plt.show()
[Display shows a bar chart with 4 bars representing groups A-D with error bars]
Adding Statistical Significance Indicators
Now let's add statistical significance indicators using brackets and asterisks to show which groups differ significantly ?
import numpy as np
import matplotlib.pyplot as plt
# Sample data
means = [5, 15, 30, 40]
std = [2, 3, 4, 5]
index = np.arange(len(means))
labels = ['Group A', 'Group B', 'Group C', 'Group D']
fig, ax = plt.subplots(figsize=(8, 6))
# Create bar plot
bars = ax.bar(index, means, width=0.6, color=['lightcoral', 'lightblue', 'lightgreen', 'gold'],
alpha=0.8, capsize=5)
ax.errorbar(index, means, yerr=std, fmt='none', color='black', capsize=5)
# Add significance brackets and asterisks
def add_significance_bar(ax, x1, x2, y, p_value):
"""Add significance bar between two bars"""
# Determine significance level
if p_value < 0.001:
sig_symbol = '***'
elif p_value < 0.01:
sig_symbol = '**'
elif p_value < 0.05:
sig_symbol = '*'
else:
sig_symbol = 'ns'
# Draw horizontal line
ax.plot([x1, x2], [y, y], color='black', linewidth=1)
# Draw vertical lines
ax.plot([x1, x1], [y-1, y], color='black', linewidth=1)
ax.plot([x2, x2], [y-1, y], color='black', linewidth=1)
# Add significance symbol
ax.text((x1 + x2) / 2, y + 0.5, sig_symbol, ha='center', va='bottom', fontweight='bold')
# Add significance comparisons (example p-values)
add_significance_bar(ax, 0, 1, 20, 0.03) # A vs B: p < 0.05
add_significance_bar(ax, 1, 2, 37, 0.001) # B vs C: p < 0.001
add_significance_bar(ax, 2, 3, 47, 0.15) # C vs D: not significant
# Customize the plot
ax.set_ylim(0, 55)
ax.set_xticks(index)
ax.set_xticklabels(labels)
ax.set_ylabel('Mean Values')
ax.set_title('Statistical Significance in Bar Graph')
plt.tight_layout()
plt.show()
[Display shows a bar chart with significance brackets and asterisks indicating statistical differences between groups]
Creating a Reusable Function
Here's a more comprehensive approach with a reusable function for adding significance indicators ?
import numpy as np
import matplotlib.pyplot as plt
def create_significance_plot(means, errors, labels, comparisons):
"""
Create a bar plot with statistical significance indicators
Parameters:
means: list of mean values
errors: list of error values
labels: list of group labels
comparisons: list of tuples (group1_idx, group2_idx, p_value)
"""
fig, ax = plt.subplots(figsize=(10, 6))
# Create bar plot
x_pos = np.arange(len(means))
bars = ax.bar(x_pos, means, yerr=errors, capsize=5,
color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4'],
alpha=0.8, error_kw={'color': 'black', 'capthick': 2})
# Calculate maximum height for significance bars
max_height = max([m + e for m, e in zip(means, errors)])
# Add significance indicators
y_offset = max_height * 0.05
for i, (idx1, idx2, p_val) in enumerate(comparisons):
y_pos = max_height + y_offset * (i + 1)
# Significance symbol
if p_val < 0.001:
symbol = '***'
elif p_val < 0.01:
symbol = '**'
elif p_val < 0.05:
symbol = '*'
else:
symbol = 'ns'
# Draw bracket
ax.plot([idx1, idx2], [y_pos, y_pos], 'k-', linewidth=1.5)
ax.plot([idx1, idx1], [y_pos - y_offset*0.3, y_pos], 'k-', linewidth=1.5)
ax.plot([idx2, idx2], [y_pos - y_offset*0.3, y_pos], 'k-', linewidth=1.5)
# Add significance text
ax.text((idx1 + idx2) / 2, y_pos + y_offset*0.2, symbol,
ha='center', va='bottom', fontweight='bold', fontsize=12)
# Customize plot
ax.set_xticks(x_pos)
ax.set_xticklabels(labels)
ax.set_ylabel('Mean ± SEM', fontsize=12)
ax.set_title('Statistical Comparison Between Groups', fontsize=14, fontweight='bold')
ax.set_ylim(0, max_height + y_offset * (len(comparisons) + 2))
# Add legend for significance levels
ax.text(0.02, 0.98, '* p<0.05\n** p<0.01\n*** p<0.001',
transform=ax.transAxes, va='top', fontsize=10,
bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
plt.tight_layout()
return fig, ax
# Example usage
means = [12, 18, 25, 32]
errors = [2, 2.5, 3, 2.8]
labels = ['Control', 'Treatment A', 'Treatment B', 'Treatment C']
comparisons = [(0, 1, 0.02), (1, 2, 0.008), (2, 3, 0.15), (0, 3, 0.0001)]
fig, ax = create_significance_plot(means, errors, labels, comparisons)
plt.show()
[Display shows a professional bar chart with multiple significance brackets and a legend explaining significance levels]
Key Elements for Statistical Significance
| Symbol | P-value Range | Interpretation |
|---|---|---|
| * | p < 0.05 | Significant |
| ** | p < 0.01 | Highly significant |
| *** | p < 0.001 | Very highly significant |
| ns | p ? 0.05 | Not significant |
Conclusion
Adding statistical significance indicators to bar graphs requires combining error bars with bracket annotations and appropriate symbols. Use the reusable function approach for consistent formatting across multiple plots. Always include a legend explaining the significance levels for clarity.
