Article Categories
- All Categories
-
Data Structure
-
Networking
-
RDBMS
-
Operating System
-
Java
-
MS Excel
-
iOS
-
HTML
-
CSS
-
Android
-
Python
-
C Programming
-
C++
-
C#
-
MongoDB
-
MySQL
-
Javascript
-
PHP
-
Economics & Finance
How to scrape data from google maps using Python?
Google Maps is a powerful tool that provides a vast amount of geospatial data, including locations, addresses, reviews, ratings, and more. Being able to extract this data programmatically can be immensely useful for various applications such as business analysis, research, and data-driven decision-making. In this article, we will explore how to scrape data from Google Maps using Python.
Important Note: Web scraping Google Maps may violate their Terms of Service. For production applications, consider using the Google Places API instead, which is the official and recommended approach.
Installing Required Libraries
To begin with, we need to install the necessary Python libraries that will facilitate the web scraping process
pip install requests beautifulsoup4 selenium
The requests library helps send HTTP requests, beautifulsoup4 parses HTML content, and selenium handles JavaScript-rendered content.
Setting Up the Target URL
To scrape data from Google Maps, we need to determine the specific URL that contains the desired data. For example, let's extract information about restaurants in a particular area ?
import requests
from bs4 import BeautifulSoup
import time
# Example URL for restaurants in New York
url = "https://www.google.com/maps/search/restaurants+in+New+York"
# Add headers to mimic a real browser
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
Basic Web Scraping Approach
Using the requests library, we can send an HTTP GET request to retrieve the HTML content ?
import requests
from bs4 import BeautifulSoup
def scrape_google_maps_basic(search_query, location):
url = f"https://www.google.com/maps/search/{search_query}+in+{location}"
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.content, 'html.parser')
print("HTML content retrieved successfully")
return soup
else:
print(f"Failed to retrieve content: {response.status_code}")
return None
# Example usage
soup = scrape_google_maps_basic("restaurants", "New+York")
Using Selenium for Dynamic Content
Since Google Maps loads content dynamically with JavaScript, using Selenium provides better results ?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import time
def scrape_with_selenium(search_term, location):
# Configure Chrome options
chrome_options = Options()
chrome_options.add_argument("--headless") # Run in background
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--disable-dev-shm-usage")
# Initialize the driver
driver = webdriver.Chrome(options=chrome_options)
try:
# Navigate to Google Maps
url = f"https://www.google.com/maps/search/{search_term}+in+{location}"
driver.get(url)
# Wait for results to load
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "[role='article']"))
)
# Get page source after JavaScript execution
html_content = driver.page_source
return BeautifulSoup(html_content, 'html.parser')
except Exception as e:
print(f"Error: {e}")
return None
finally:
driver.quit()
# Example usage
soup = scrape_with_selenium("restaurants", "New+York")
Extracting Business Information
Once we have the HTML content, we can extract various business details using BeautifulSoup ?
def extract_business_info(soup):
businesses = []
# Find all business listings
articles = soup.find_all('div', {'role': 'article'})
for article in articles[:5]: # Limit to first 5 results
try:
# Extract business name
name_element = article.find('div', class_='qBF1Pd fontHeadlineSmall')
name = name_element.get_text(strip=True) if name_element else "N/A"
# Extract rating
rating_element = article.find('span', class_='MW4etd')
rating = rating_element.get_text(strip=True) if rating_element else "N/A"
# Extract address
address_elements = article.find_all('div', class_='W4Efsd')
address = address_elements[0].get_text(strip=True) if address_elements else "N/A"
# Extract business type
type_elements = article.find_all('div', class_='W4Efsd')
business_type = type_elements[1].get_text(strip=True) if len(type_elements) > 1 else "N/A"
business_info = {
'name': name,
'rating': rating,
'address': address,
'type': business_type
}
businesses.append(business_info)
except Exception as e:
print(f"Error extracting business info: {e}")
continue
return businesses
# Extract and display results
if soup:
businesses = extract_business_info(soup)
for i, business in enumerate(businesses, 1):
print(f"Business {i}:")
print(f"Name: {business['name']}")
print(f"Rating: {business['rating']}")
print(f"Address: {business['address']}")
print(f"Type: {business['type']}")
print("-" * 50)
Complete Example
Here's a complete working example that combines all the techniques ?
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import time
def scrape_google_maps(search_query, location, max_results=10):
chrome_options = Options()
chrome_options.add_argument("--headless")
driver = webdriver.Chrome(options=chrome_options)
try:
url = f"https://www.google.com/maps/search/{search_query}+{location}"
driver.get(url)
# Wait for results to load
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CSS_SELECTOR, "[role='article']"))
)
# Scroll to load more results
results_panel = driver.find_element(By.CSS_SELECTOR, "[role='main']")
for _ in range(3):
driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", results_panel)
time.sleep(2)
soup = BeautifulSoup(driver.page_source, 'html.parser')
businesses = []
articles = soup.find_all('div', {'role': 'article'})[:max_results]
for article in articles:
try:
name = article.find('div', class_='qBF1Pd fontHeadlineSmall')
name = name.get_text(strip=True) if name else "N/A"
rating = article.find('span', class_='MW4etd')
rating = rating.get_text(strip=True) if rating else "N/A"
address_divs = article.find_all('div', class_='W4Efsd')
address = address_divs[0].get_text(strip=True) if address_divs else "N/A"
businesses.append({
'name': name,
'rating': rating,
'address': address
})
except Exception as e:
continue
return businesses
except Exception as e:
print(f"Error during scraping: {e}")
return []
finally:
driver.quit()
# Usage example
results = scrape_google_maps("coffee shops", "San Francisco")
print(f"Found {len(results)} businesses:")
for i, business in enumerate(results, 1):
print(f"{i}. {business['name']}")
print(f" Rating: {business['rating']}")
print(f" Address: {business['address']}")
print()
Best Practices and Considerations
When scraping Google Maps, consider these important points:
- Rate Limiting: Add delays between requests to avoid being blocked
- User Agents: Rotate user agent strings to appear more human-like
- Error Handling: Implement robust error handling for network issues
- Data Validation: Validate extracted data before processing
- Legal Compliance: Ensure your scraping complies with terms of service
Alternative: Google Places API
For production applications, consider using the Google Places API instead ?
import googlemaps
def get_places_with_api(query, location, api_key):
gmaps = googlemaps.Client(key=api_key)
# Text search
places_result = gmaps.places(
query=f"{query} in {location}",
type='restaurant'
)
businesses = []
for place in places_result['results']:
businesses.append({
'name': place['name'],
'rating': place.get('rating', 'N/A'),
'address': place.get('formatted_address', 'N/A'),
'place_id': place['place_id']
})
return businesses
# Usage (requires valid API key)
# results = get_places_with_api("restaurants", "New York", "YOUR_API_KEY")
Conclusion
While web scraping Google Maps is technically possible using Python with Selenium and BeautifulSoup, it comes with significant challenges and legal considerations. For production applications, the Google Places API provides a more reliable, legal, and feature-rich alternative that should be preferred over web scraping.
