How to find the frequency of a particular word in a cell of an excel table using Python?

Finding the frequency of a specific word in Excel cells is useful for data analysis and text processing. Python's pandas library provides an efficient way to read Excel files and count word occurrences.

We'll demonstrate using a sample Excel file sampleTutorialsPoint.xlsx containing cricket player data.

Sample Excel Data

Player Name Age Type Country Team Runs Wickets
Virat Kohli 33 Batsman India Royal Challengers Bangalore 6500 20
Mahendra Singh Dhoni 39 Batsman India Chennai Super Kings 4600 0
Rashid Khan 24 Bowler Afghanistan Gujarat Titans 500 130
Hardik Pandya 29 All rounder India Gujarat Titans 2400 85
David Warner 34 Batsman Australia Delhi Capitals 5500 12

Using Pandas to Count Word Frequency

The most efficient approach uses pandas to read the Excel file and count occurrences across all cells ?

import pandas as pd

# Create sample data for demonstration
data = {
    'Player Name': ['Virat Kohli', 'MS Dhoni', 'Rashid Khan', 'Hardik Pandya'],
    'Country': ['India', 'India', 'Afghanistan', 'India'],
    'Type': ['Batsman', 'Batsman', 'Bowler', 'All rounder'],
    'Team': ['RCB', 'CSK', 'Gujarat Titans', 'Gujarat Titans']
}

df = pd.DataFrame(data)
print("Sample DataFrame:")
print(df)

# Word to search for
search_word = 'India'

# Count frequency across all cells
frequency = 0
for column in df.columns:
    frequency += df[column].astype(str).str.count(search_word).sum()

print(f"\nFrequency of '{search_word}': {frequency}")
Sample DataFrame:
    Player Name    Country        Type             Team
0   Virat Kohli      India     Batsman              RCB
1      MS Dhoni      India     Batsman              CSK
2   Rashid Khan  Afghanistan      Bowler  Gujarat Titans
3  Hardik Pandya      India  All rounder  Gujarat Titans

Frequency of 'India': 3

Reading from Excel File

For actual Excel files, use pandas read_excel() function ?

import pandas as pd

def count_word_in_excel(file_path, search_word):
    # Read Excel file
    df = pd.read_excel(file_path)
    
    # Initialize counter
    frequency = 0
    
    # Search in all columns
    for column in df.columns:
        frequency += df[column].astype(str).str.count(search_word).sum()
    
    return frequency

# Usage
file_path = 'sampleTutorialsPoint.xlsx'
word = 'India'
result = count_word_in_excel(file_path, word)
print(f"Frequency of '{word}': {result}")

Counting in Specific Column

To count word frequency in a specific column only ?

import pandas as pd

# Sample data
data = {
    'Country': ['India', 'India', 'Afghanistan', 'India', 'Australia'],
    'Team': ['RCB', 'CSK', 'Gujarat', 'Mumbai', 'Delhi']
}

df = pd.DataFrame(data)

# Count 'India' in Country column only
search_word = 'India'
column_name = 'Country'

frequency = df[column_name].astype(str).str.count(search_word).sum()

print(f"Frequency of '{search_word}' in '{column_name}' column: {frequency}")
Frequency of 'India' in 'Country' column: 3

Case-Insensitive Search

For case-insensitive word counting ?

import pandas as pd

# Sample data with mixed case
data = {
    'Country': ['india', 'INDIA', 'India', 'Afghanistan']
}

df = pd.DataFrame(data)

search_word = 'india'

# Case-insensitive search
frequency = df['Country'].astype(str).str.lower().str.count(search_word.lower()).sum()

print(f"Case-insensitive frequency of '{search_word}': {frequency}")
Case-insensitive frequency of 'india': 3

Complete Function

Here's a comprehensive function that handles various scenarios ?

import pandas as pd

def find_word_frequency(data, search_word, column=None, case_sensitive=True):
    """
    Find frequency of a word in DataFrame
    
    Parameters:
    data: DataFrame or dict
    search_word: word to search
    column: specific column name (None for all columns)
    case_sensitive: whether search is case sensitive
    """
    
    if isinstance(data, dict):
        df = pd.DataFrame(data)
    else:
        df = data
    
    frequency = 0
    search_term = search_word if case_sensitive else search_word.lower()
    
    columns_to_search = [column] if column else df.columns
    
    for col in columns_to_search:
        if col in df.columns:
            text_data = df[col].astype(str)
            if not case_sensitive:
                text_data = text_data.str.lower()
            frequency += text_data.str.count(search_term).sum()
    
    return frequency

# Example usage
sample_data = {
    'Player': ['Virat Kohli', 'MS Dhoni', 'Rohit Sharma'],
    'Country': ['India', 'India', 'India'],
    'Team': ['RCB', 'CSK', 'Mumbai']
}

result = find_word_frequency(sample_data, 'India')
print(f"Total frequency: {result}")

result_specific = find_word_frequency(sample_data, 'India', column='Country')
print(f"Frequency in Country column: {result_specific}")
Total frequency: 3
Frequency in Country column: 3

Conclusion

Use pandas for efficient word frequency counting in Excel files. The str.count() method provides flexible text searching capabilities. Consider case sensitivity and specific column requirements based on your analysis needs.

Updated on: 2026-03-26T21:24:28+05:30

852 Views

Kickstart Your Career

Get certified by completing the course

Get Started
Advertisements