Article Categories

Selected Reading

How to scrape data from google maps using Python?

Python Server Side Programming Programming

Google Maps is a powerful tool that provides a vast amount of geospatial data, including locations, addresses, reviews, ratings, and more. Being able to extract this data programmatically can be immensely useful for various applications such as business analysis, research, and data-driven decision-making. In this article, we will explore how to scrape data from Google Maps using Python.

Important Note: Web scraping Google Maps may violate their Terms of Service. For production applications, consider using the Google Places API instead, which is the official and recommended approach.

Installing Required Libraries

To begin with, we need to install the necessary Python libraries that will facilitate the web scraping process

pip install requests beautifulsoup4 selenium

The requests library helps send HTTP requests, beautifulsoup4 parses HTML content, and selenium handles JavaScript-rendered content.

Setting Up the Target URL

To scrape data from Google Maps, we need to determine the specific URL that contains the desired data. For example, let's extract information about restaurants in a particular area ?

import requests
from bs4 import BeautifulSoup
import time

# Example URL for restaurants in New York
url = "https://www.google.com/maps/search/restaurants+in+New+York"

# Add headers to mimic a real browser
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

Basic Web Scraping Approach

Using the requests library, we can send an HTTP GET request to retrieve the HTML content ?

import requests
from bs4 import BeautifulSoup

def scrape_google_maps_basic(search_query, location):
    url = f"https://www.google.com/maps/search/{search_query}+in+{location}"
    
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
    }
    
    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        soup = BeautifulSoup(response.content, 'html.parser')
        print("HTML content retrieved successfully")
        return soup
    else:
        print(f"Failed to retrieve content: {response.status_code}")
        return None

# Example usage
soup = scrape_google_maps_basic("restaurants", "New+York")

Using Selenium for Dynamic Content

Since Google Maps loads content dynamically with JavaScript, using Selenium provides better results ?

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time

def scrape_with_selenium(search_term, location):
    # Configure Chrome options
    chrome_options = Options()
    chrome_options.add_argument("--headless")  # Run in background
    chrome_options.add_argument("--no-sandbox")
    chrome_options.add_argument("--disable-dev-shm-usage")
    
    # Initialize the driver
    driver = webdriver.Chrome(options=chrome_options)
    
    try:
        # Navigate to Google Maps
        url = f"https://www.google.com/maps/search/{search_term}+in+{location}"
        driver.get(url)
        
        # Wait for results to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "[role='article']"))
        )
        
        # Get page source after JavaScript execution
        html_content = driver.page_source
        return BeautifulSoup(html_content, 'html.parser')
        
    except Exception as e:
        print(f"Error: {e}")
        return None
    finally:
        driver.quit()

# Example usage
soup = scrape_with_selenium("restaurants", "New+York")

Extracting Business Information

Once we have the HTML content, we can extract various business details using BeautifulSoup ?

def extract_business_info(soup):
    businesses = []
    
    # Find all business listings
    articles = soup.find_all('div', {'role': 'article'})
    
    for article in articles[:5]:  # Limit to first 5 results
        try:
            # Extract business name
            name_element = article.find('div', class_='qBF1Pd fontHeadlineSmall')
            name = name_element.get_text(strip=True) if name_element else "N/A"
            
            # Extract rating
            rating_element = article.find('span', class_='MW4etd')
            rating = rating_element.get_text(strip=True) if rating_element else "N/A"
            
            # Extract address
            address_elements = article.find_all('div', class_='W4Efsd')
            address = address_elements[0].get_text(strip=True) if address_elements else "N/A"
            
            # Extract business type
            type_elements = article.find_all('div', class_='W4Efsd')
            business_type = type_elements[1].get_text(strip=True) if len(type_elements) > 1 else "N/A"
            
            business_info = {
                'name': name,
                'rating': rating,
                'address': address,
                'type': business_type
            }
            
            businesses.append(business_info)
            
        except Exception as e:
            print(f"Error extracting business info: {e}")
            continue
    
    return businesses

# Extract and display results
if soup:
    businesses = extract_business_info(soup)
    
    for i, business in enumerate(businesses, 1):
        print(f"Business {i}:")
        print(f"Name: {business['name']}")
        print(f"Rating: {business['rating']}")
        print(f"Address: {business['address']}")
        print(f"Type: {business['type']}")
        print("-" * 50)

Complete Example

Here's a complete working example that combines all the techniques ?

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time

def scrape_google_maps(search_query, location, max_results=10):
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    driver = webdriver.Chrome(options=chrome_options)
    
    try:
        url = f"https://www.google.com/maps/search/{search_query}+{location}"
        driver.get(url)
        
        # Wait for results to load
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CSS_SELECTOR, "[role='article']"))
        )
        
        # Scroll to load more results
        results_panel = driver.find_element(By.CSS_SELECTOR, "[role='main']")
        for _ in range(3):
            driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", results_panel)
            time.sleep(2)
        
        soup = BeautifulSoup(driver.page_source, 'html.parser')
        businesses = []
        
        articles = soup.find_all('div', {'role': 'article'})[:max_results]
        
        for article in articles:
            try:
                name = article.find('div', class_='qBF1Pd fontHeadlineSmall')
                name = name.get_text(strip=True) if name else "N/A"
                
                rating = article.find('span', class_='MW4etd')
                rating = rating.get_text(strip=True) if rating else "N/A"
                
                address_divs = article.find_all('div', class_='W4Efsd')
                address = address_divs[0].get_text(strip=True) if address_divs else "N/A"
                
                businesses.append({
                    'name': name,
                    'rating': rating,
                    'address': address
                })
                
            except Exception as e:
                continue
        
        return businesses
        
    except Exception as e:
        print(f"Error during scraping: {e}")
        return []
    finally:
        driver.quit()

# Usage example
results = scrape_google_maps("coffee shops", "San Francisco")

print(f"Found {len(results)} businesses:")
for i, business in enumerate(results, 1):
    print(f"{i}. {business['name']}")
    print(f"   Rating: {business['rating']}")
    print(f"   Address: {business['address']}")
    print()

Best Practices and Considerations

When scraping Google Maps, consider these important points:

Rate Limiting: Add delays between requests to avoid being blocked
User Agents: Rotate user agent strings to appear more human-like
Error Handling: Implement robust error handling for network issues
Data Validation: Validate extracted data before processing
Legal Compliance: Ensure your scraping complies with terms of service

Alternative: Google Places API

For production applications, consider using the Google Places API instead ?

import googlemaps

def get_places_with_api(query, location, api_key):
    gmaps = googlemaps.Client(key=api_key)
    
    # Text search
    places_result = gmaps.places(
        query=f"{query} in {location}",
        type='restaurant'
    )
    
    businesses = []
    for place in places_result['results']:
        businesses.append({
            'name': place['name'],
            'rating': place.get('rating', 'N/A'),
            'address': place.get('formatted_address', 'N/A'),
            'place_id': place['place_id']
        })
    
    return businesses

# Usage (requires valid API key)
# results = get_places_with_api("restaurants", "New York", "YOUR_API_KEY")

Conclusion

While web scraping Google Maps is technically possible using Python with Selenium and BeautifulSoup, it comes with significant challenges and legal considerations. For production applications, the Google Places API provides a more reliable, legal, and feature-rich alternative that should be preferred over web scraping.

Rohan Singh

Updated on: 2026-03-27T15:13:27+05:30

4K+ Views

Previous Next