How to save HTML Tables data to CSV in Python

Extracting data from HTML tables and saving it to CSV format is a common task in data science and web scraping. When dealing with multiple tables on a webpage, manual copy-paste becomes impractical, making automated extraction essential.

This tutorial demonstrates how to use Python libraries like csv, urllib, and BeautifulSoup to scrape HTML table data and convert it to CSV format efficiently.

Required Libraries

Before starting, ensure you have the necessary libraries installed −

pip install beautifulsoup4

The csv and urllib modules are part of Python's standard library.

Creating a Simple CSV File

Let's start with the basics of creating a CSV file using Python's built-in csv module −

import csv

# Open File in Write mode, if not found it will create one
File = open('test.csv', 'w+')
Data = csv.writer(File)

# My Header
Data.writerow(('Column1', 'Column2', 'Column3'))

# Write data
for i in range(5):
    Data.writerow((i, i+1, i+2))

# Close the file
File.close()

print("CSV file 'test.csv' created successfully!")

The output of the above code creates a test.csv file in the same directory −

CSV file 'test.csv' created successfully!

Contents of test.csv:
Column1,Column2,Column3
0,1,2
1,2,3
2,3,4
3,4,5
4,5,6

Scraping HTML Table Data

Now let's extract HTML table data from a webpage and save it to CSV format. We'll use BeautifulSoup to parse HTML and extract table content.

Step 1: Import Required Modules

import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup

Step 2: Fetch and Parse HTML

url = 'https://www.tutorialspoint.com/python/python_dictionary.htm'
html = urlopen(url)
soup = BeautifulSoup(html, 'html.parser')

Step 3: Extract Table Data

# Find all tables and select the first one
table = soup.find_all('table')[0]
rows = table.find_all('tr')

Step 4: Process and Save Data

# Open CSV file for writing
File = open('html_table_data.csv', 'w', newline='')
Data = csv.writer(File)

try:
    for row in rows:
        FilteredRow = []
        for cell in row.find_all(['td', 'th']):
            # Extract text and clean whitespace
            FilteredRow.append(cell.get_text().strip())
        Data.writerow(FilteredRow)
finally:
    File.close()

Complete Example

Here's the complete script that extracts HTML table data and saves it to a CSV file −

import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup

def scrape_table_to_csv(url, output_filename, table_index=0):
    """
    Scrape HTML table data and save to CSV file
    
    Args:
        url: URL of the webpage containing the table
        output_filename: Name of the output CSV file
        table_index: Index of the table to extract (default: 0)
    """
    try:
        # Open the URL and parse the HTML
        html = urlopen(url)
        soup = BeautifulSoup(html, 'html.parser')
        
        # Extract the specified table
        tables = soup.find_all('table')
        if not tables:
            print("No tables found on the webpage")
            return
        
        if table_index >= len(tables):
            print(f"Table index {table_index} not found. Only {len(tables)} table(s) available.")
            return
            
        table = tables[table_index]
        rows = table.find_all('tr')
        
        # Write the content to CSV file
        with open(output_filename, 'w', newline='', encoding='utf-8') as File:
            Data = csv.writer(File)
            
            for row in rows:
                FilteredRow = []
                for cell in row.find_all(['td', 'th']):
                    # Extract text content and clean whitespace
                    cell_text = cell.get_text().strip().replace('\n', ' ')
                    FilteredRow.append(cell_text)
                
                # Only write non-empty rows
                if FilteredRow:
                    Data.writerow(FilteredRow)
        
        print(f"Table data successfully saved to '{output_filename}'")
        print(f"Total rows extracted: {len(rows)}")
        
    except Exception as e:
        print(f"Error occurred: {e}")

# Example usage
url = 'https://www.tutorialspoint.com/python/python_basic_syntax.htm'
scrape_table_to_csv(url, 'python_syntax_table.csv', 0)

The output shows successful extraction of table data −

Table data successfully saved to 'python_syntax_table.csv'
Total rows extracted: 5

Extracting Multiple Tables

To extract all tables from a webpage, you can modify the script to loop through all tables −

import csv
from urllib.request import urlopen
from bs4 import BeautifulSoup

def scrape_all_tables_to_csv(url, base_filename):
    """
    Scrape all HTML tables from a webpage and save each to separate CSV files
    """
    try:
        html = urlopen(url)
        soup = BeautifulSoup(html, 'html.parser')
        tables = soup.find_all('table')
        
        if not tables:
            print("No tables found on the webpage")
            return
        
        for i, table in enumerate(tables):
            rows = table.find_all('tr')
            filename = f"{base_filename}_table_{i+1}.csv"
            
            with open(filename, 'w', newline='', encoding='utf-8') as File:
                Data = csv.writer(File)
                
                for row in rows:
                    FilteredRow = []
                    for cell in row.find_all(['td', 'th']):
                        cell_text = cell.get_text().strip().replace('\n', ' ')
                        FilteredRow.append(cell_text)
                    
                    if FilteredRow:
                        Data.writerow(FilteredRow)
            
            print(f"Table {i+1} saved to '{filename}' ({len(rows)} rows)")
        
        print(f"Total tables extracted: {len(tables)}")
        
    except Exception as e:
        print(f"Error occurred: {e}")

# Example usage
url = 'https://www.tutorialspoint.com/python/python_basic_operators.htm'
scrape_all_tables_to_csv(url, 'operators')

This script creates separate CSV files for each table found on the webpage −

Table 1 saved to 'operators_table_1.csv' (8 rows)
Table 2 saved to 'operators_table_2.csv' (6 rows)
Table 3 saved to 'operators_table_3.csv' (4 rows)
Total tables extracted: 3
HTML Table to CSV Conversion Process HTML Table BeautifulSoup Parsing Extract Rows & Cells CSV File 1. Fetch HTML 2. Parse DOM 3. Process Data 4. Save CSV Libraries Used urllib.request BeautifulSoup csv module

Error Handling and Best Practices

When scraping web data, it's important to implement proper error handling and follow best practices −

import csv
import time
from urllib.request import urlopen
from urllib.error import URLError, HTTPError
from bs4 import BeautifulSoup

def robust_table_scraper(url, output_filename, table_index=0, delay=1):
    """
    Robust table scraper with error handling and rate limiting
    """
    try:
        # Add delay to be respectful to the server
        time.sleep(delay)
        
        # Open URL with error handling
        html = urlopen(url)
        soup = BeautifulSoup(html, 'html.parser')
        
        # Find tables
        tables = soup.find_all('table')
        if not tables:
            raise ValueError("No tables found on the webpage")
        
        if table_index >= len(tables):
            raise IndexError(f"Table index {table_index} out of range. Found {len(tables)} tables.")
        
        table = tables[table_index]
        rows = table.find_all('tr')
        
        # Save to CSV with proper encoding
        with open(output_filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            
            for row in rows:
                row_data = []
                for cell in row.find_all(['td', 'th']):
                    # Clean the text content
                    text = cell.get_text()
                    text = ' '.join(text.split())  # Remove extra whitespace
                    row_data.append(text)
                
                if row_data:  # Only write non-empty rows
                    writer.writerow(row_data)
        
        return f"Successfully saved {len(rows)} rows to {output_filename}"
        
    except (URLError, HTTPError) as e:
        return f"Network error: {e}"
    except Exception as e:
        return f"Error: {e}"

# Example usage with error handling
result = robust_table_scraper(
    'https://www.tutorialspoint.com/python/python_dictionary.htm',
    'dictionary_methods.csv'
)
print(result)

The enhanced scraper includes proper error handling and rate limiting −

Successfully saved 5 rows to dictionary_methods.csv

Key Features

The HTML table to CSV conversion process offers several advantages −

  • Automation − Eliminates manual copy-paste for multiple tables

  • Data cleaning − Automatically removes HTML tags and extra whitespace

Updated on: 2026-03-16T21:38:54+05:30

2K+ Views

Kickstart Your Career

Get certified by completing the course

Get Started
Advertisements