Indicating the statistically significant difference in bar graph (Matplotlib)

To indicate statistically significant differences in bar graphs using Matplotlib, we need to add statistical annotations that show which groups differ significantly from each other. This involves creating error bars and adding significance indicators like asterisks or brackets.

Basic Bar Plot with Error Bars

First, let's create a bar plot with error bars to show the variability in our data ?

import numpy as np
import matplotlib.pyplot as plt

# Set figure parameters
plt.rcParams["figure.figsize"] = [8, 5]
plt.rcParams["figure.autolayout"] = True

# Sample data
means = [5, 15, 30, 40]
std = [2, 3, 4, 5]
index = np.arange(len(means))
width = 0.7
labels = ['Group A', 'Group B', 'Group C', 'Group D']

# Create the plot
fig, ax = plt.subplots()

# Create bar plot with error bars
bars = ax.bar(index, means, width=width, color="lightblue", 
              linewidth=2, alpha=0.7, capsize=5)
ax.errorbar(index, means, yerr=std, fmt='none', color='black', capsize=5)

# Customize the plot
ax.set_ylim(0, 50)
ax.set_xticks(index)
ax.set_xticklabels(labels)
ax.set_ylabel('Values')
ax.set_title('Bar Graph with Error Bars')

plt.show()
[Display shows a bar chart with 4 bars representing groups A-D with error bars]

Adding Statistical Significance Indicators

Now let's add statistical significance indicators using brackets and asterisks to show which groups differ significantly ?

import numpy as np
import matplotlib.pyplot as plt

# Sample data
means = [5, 15, 30, 40]
std = [2, 3, 4, 5]
index = np.arange(len(means))
labels = ['Group A', 'Group B', 'Group C', 'Group D']

fig, ax = plt.subplots(figsize=(8, 6))

# Create bar plot
bars = ax.bar(index, means, width=0.6, color=['lightcoral', 'lightblue', 'lightgreen', 'gold'], 
              alpha=0.8, capsize=5)
ax.errorbar(index, means, yerr=std, fmt='none', color='black', capsize=5)

# Add significance brackets and asterisks
def add_significance_bar(ax, x1, x2, y, p_value):
    """Add significance bar between two bars"""
    # Determine significance level
    if p_value < 0.001:
        sig_symbol = '***'
    elif p_value < 0.01:
        sig_symbol = '**'
    elif p_value < 0.05:
        sig_symbol = '*'
    else:
        sig_symbol = 'ns'
    
    # Draw horizontal line
    ax.plot([x1, x2], [y, y], color='black', linewidth=1)
    # Draw vertical lines
    ax.plot([x1, x1], [y-1, y], color='black', linewidth=1)
    ax.plot([x2, x2], [y-1, y], color='black', linewidth=1)
    # Add significance symbol
    ax.text((x1 + x2) / 2, y + 0.5, sig_symbol, ha='center', va='bottom', fontweight='bold')

# Add significance comparisons (example p-values)
add_significance_bar(ax, 0, 1, 20, 0.03)  # A vs B: p < 0.05
add_significance_bar(ax, 1, 2, 37, 0.001) # B vs C: p < 0.001
add_significance_bar(ax, 2, 3, 47, 0.15)  # C vs D: not significant

# Customize the plot
ax.set_ylim(0, 55)
ax.set_xticks(index)
ax.set_xticklabels(labels)
ax.set_ylabel('Mean Values')
ax.set_title('Statistical Significance in Bar Graph')

plt.tight_layout()
plt.show()
[Display shows a bar chart with significance brackets and asterisks indicating statistical differences between groups]

Creating a Reusable Function

Here's a more comprehensive approach with a reusable function for adding significance indicators ?

import numpy as np
import matplotlib.pyplot as plt

def create_significance_plot(means, errors, labels, comparisons):
    """
    Create a bar plot with statistical significance indicators
    
    Parameters:
    means: list of mean values
    errors: list of error values  
    labels: list of group labels
    comparisons: list of tuples (group1_idx, group2_idx, p_value)
    """
    fig, ax = plt.subplots(figsize=(10, 6))
    
    # Create bar plot
    x_pos = np.arange(len(means))
    bars = ax.bar(x_pos, means, yerr=errors, capsize=5, 
                  color=['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4'], 
                  alpha=0.8, error_kw={'color': 'black', 'capthick': 2})
    
    # Calculate maximum height for significance bars
    max_height = max([m + e for m, e in zip(means, errors)])
    
    # Add significance indicators
    y_offset = max_height * 0.05
    for i, (idx1, idx2, p_val) in enumerate(comparisons):
        y_pos = max_height + y_offset * (i + 1)
        
        # Significance symbol
        if p_val < 0.001:
            symbol = '***'
        elif p_val < 0.01:
            symbol = '**'  
        elif p_val < 0.05:
            symbol = '*'
        else:
            symbol = 'ns'
        
        # Draw bracket
        ax.plot([idx1, idx2], [y_pos, y_pos], 'k-', linewidth=1.5)
        ax.plot([idx1, idx1], [y_pos - y_offset*0.3, y_pos], 'k-', linewidth=1.5)
        ax.plot([idx2, idx2], [y_pos - y_offset*0.3, y_pos], 'k-', linewidth=1.5)
        
        # Add significance text
        ax.text((idx1 + idx2) / 2, y_pos + y_offset*0.2, symbol, 
                ha='center', va='bottom', fontweight='bold', fontsize=12)
    
    # Customize plot
    ax.set_xticks(x_pos)
    ax.set_xticklabels(labels)
    ax.set_ylabel('Mean ± SEM', fontsize=12)
    ax.set_title('Statistical Comparison Between Groups', fontsize=14, fontweight='bold')
    ax.set_ylim(0, max_height + y_offset * (len(comparisons) + 2))
    
    # Add legend for significance levels
    ax.text(0.02, 0.98, '* p<0.05\n** p<0.01\n*** p<0.001', 
            transform=ax.transAxes, va='top', fontsize=10, 
            bbox=dict(boxstyle='round', facecolor='white', alpha=0.8))
    
    plt.tight_layout()
    return fig, ax

# Example usage
means = [12, 18, 25, 32]
errors = [2, 2.5, 3, 2.8]
labels = ['Control', 'Treatment A', 'Treatment B', 'Treatment C']
comparisons = [(0, 1, 0.02), (1, 2, 0.008), (2, 3, 0.15), (0, 3, 0.0001)]

fig, ax = create_significance_plot(means, errors, labels, comparisons)
plt.show()
[Display shows a professional bar chart with multiple significance brackets and a legend explaining significance levels]

Key Elements for Statistical Significance

Symbol P-value Range Interpretation
* p < 0.05 Significant
** p < 0.01 Highly significant
*** p < 0.001 Very highly significant
ns p ? 0.05 Not significant

Conclusion

Adding statistical significance indicators to bar graphs requires combining error bars with bracket annotations and appropriate symbols. Use the reusable function approach for consistent formatting across multiple plots. Always include a legend explaining the significance levels for clarity.

Updated on: 2026-03-25T22:30:41+05:30

643 Views

Kickstart Your Career

Get certified by completing the course

Get Started
Advertisements