🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
410 lines
13 KiB
Python
410 lines
13 KiB
Python
"""
|
|
Google Search Console Client
|
|
============================
|
|
Purpose: Interact with Google Search Console API for SEO data
|
|
Python: 3.10+
|
|
Usage:
|
|
from gsc_client import SearchConsoleClient
|
|
client = SearchConsoleClient()
|
|
data = client.get_search_analytics("sc-domain:example.com")
|
|
"""
|
|
|
|
import logging
|
|
from dataclasses import dataclass, field
|
|
from datetime import datetime, timedelta
|
|
from typing import Any
|
|
|
|
from google.oauth2 import service_account
|
|
from googleapiclient.discovery import build
|
|
|
|
from base_client import config
|
|
|
|
logging.basicConfig(
|
|
level=logging.INFO,
|
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
|
)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
@dataclass
|
|
class SearchAnalyticsResult:
|
|
"""Search analytics query result."""
|
|
|
|
rows: list[dict] = field(default_factory=list)
|
|
total_clicks: int = 0
|
|
total_impressions: int = 0
|
|
average_ctr: float = 0.0
|
|
average_position: float = 0.0
|
|
|
|
|
|
@dataclass
|
|
class SitemapInfo:
|
|
"""Sitemap information from Search Console."""
|
|
|
|
path: str
|
|
last_submitted: str | None = None
|
|
last_downloaded: str | None = None
|
|
is_pending: bool = False
|
|
is_sitemaps_index: bool = False
|
|
warnings: int = 0
|
|
errors: int = 0
|
|
|
|
|
|
class SearchConsoleClient:
|
|
"""Client for Google Search Console API."""
|
|
|
|
SCOPES = ["https://www.googleapis.com/auth/webmasters.readonly"]
|
|
|
|
def __init__(self, credentials_path: str | None = None):
|
|
"""
|
|
Initialize Search Console client.
|
|
|
|
Args:
|
|
credentials_path: Path to service account JSON key
|
|
"""
|
|
self.credentials_path = credentials_path or config.google_credentials_path
|
|
self._service = None
|
|
|
|
@property
|
|
def service(self):
|
|
"""Get or create Search Console service."""
|
|
if self._service is None:
|
|
if not self.credentials_path:
|
|
raise ValueError(
|
|
"Google credentials not configured. "
|
|
"Set GOOGLE_APPLICATION_CREDENTIALS environment variable."
|
|
)
|
|
|
|
credentials = service_account.Credentials.from_service_account_file(
|
|
self.credentials_path,
|
|
scopes=self.SCOPES,
|
|
)
|
|
self._service = build("searchconsole", "v1", credentials=credentials)
|
|
|
|
return self._service
|
|
|
|
def list_sites(self) -> list[dict]:
|
|
"""List all sites accessible to the service account."""
|
|
response = self.service.sites().list().execute()
|
|
return response.get("siteEntry", [])
|
|
|
|
def get_search_analytics(
|
|
self,
|
|
site_url: str,
|
|
start_date: str | None = None,
|
|
end_date: str | None = None,
|
|
dimensions: list[str] | None = None,
|
|
row_limit: int = 25000,
|
|
filters: list[dict] | None = None,
|
|
) -> SearchAnalyticsResult:
|
|
"""
|
|
Get search analytics data.
|
|
|
|
Args:
|
|
site_url: Site URL (e.g., "sc-domain:example.com" or "https://example.com/")
|
|
start_date: Start date (YYYY-MM-DD), defaults to 30 days ago
|
|
end_date: End date (YYYY-MM-DD), defaults to yesterday
|
|
dimensions: List of dimensions (query, page, country, device, date)
|
|
row_limit: Maximum rows to return
|
|
filters: Dimension filters
|
|
|
|
Returns:
|
|
SearchAnalyticsResult with rows and summary stats
|
|
"""
|
|
# Default date range: last 30 days
|
|
if not end_date:
|
|
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
|
|
if not start_date:
|
|
start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
|
|
|
|
# Default dimensions
|
|
if dimensions is None:
|
|
dimensions = ["query", "page"]
|
|
|
|
request_body = {
|
|
"startDate": start_date,
|
|
"endDate": end_date,
|
|
"dimensions": dimensions,
|
|
"rowLimit": row_limit,
|
|
}
|
|
|
|
if filters:
|
|
request_body["dimensionFilterGroups"] = [{"filters": filters}]
|
|
|
|
try:
|
|
response = self.service.searchanalytics().query(
|
|
siteUrl=site_url,
|
|
body=request_body,
|
|
).execute()
|
|
except Exception as e:
|
|
logger.error(f"Failed to query search analytics: {e}")
|
|
raise
|
|
|
|
rows = response.get("rows", [])
|
|
|
|
# Calculate totals
|
|
total_clicks = sum(row.get("clicks", 0) for row in rows)
|
|
total_impressions = sum(row.get("impressions", 0) for row in rows)
|
|
total_ctr = sum(row.get("ctr", 0) for row in rows)
|
|
total_position = sum(row.get("position", 0) for row in rows)
|
|
|
|
avg_ctr = total_ctr / len(rows) if rows else 0
|
|
avg_position = total_position / len(rows) if rows else 0
|
|
|
|
return SearchAnalyticsResult(
|
|
rows=rows,
|
|
total_clicks=total_clicks,
|
|
total_impressions=total_impressions,
|
|
average_ctr=avg_ctr,
|
|
average_position=avg_position,
|
|
)
|
|
|
|
def get_top_queries(
|
|
self,
|
|
site_url: str,
|
|
limit: int = 100,
|
|
start_date: str | None = None,
|
|
end_date: str | None = None,
|
|
) -> list[dict]:
|
|
"""Get top search queries by clicks."""
|
|
result = self.get_search_analytics(
|
|
site_url=site_url,
|
|
dimensions=["query"],
|
|
row_limit=limit,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
)
|
|
|
|
# Sort by clicks
|
|
sorted_rows = sorted(
|
|
result.rows,
|
|
key=lambda x: x.get("clicks", 0),
|
|
reverse=True,
|
|
)
|
|
|
|
return [
|
|
{
|
|
"query": row["keys"][0],
|
|
"clicks": row.get("clicks", 0),
|
|
"impressions": row.get("impressions", 0),
|
|
"ctr": row.get("ctr", 0),
|
|
"position": row.get("position", 0),
|
|
}
|
|
for row in sorted_rows[:limit]
|
|
]
|
|
|
|
def get_top_pages(
|
|
self,
|
|
site_url: str,
|
|
limit: int = 100,
|
|
start_date: str | None = None,
|
|
end_date: str | None = None,
|
|
) -> list[dict]:
|
|
"""Get top pages by clicks."""
|
|
result = self.get_search_analytics(
|
|
site_url=site_url,
|
|
dimensions=["page"],
|
|
row_limit=limit,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
)
|
|
|
|
sorted_rows = sorted(
|
|
result.rows,
|
|
key=lambda x: x.get("clicks", 0),
|
|
reverse=True,
|
|
)
|
|
|
|
return [
|
|
{
|
|
"page": row["keys"][0],
|
|
"clicks": row.get("clicks", 0),
|
|
"impressions": row.get("impressions", 0),
|
|
"ctr": row.get("ctr", 0),
|
|
"position": row.get("position", 0),
|
|
}
|
|
for row in sorted_rows[:limit]
|
|
]
|
|
|
|
def get_sitemaps(self, site_url: str) -> list[SitemapInfo]:
|
|
"""Get list of sitemaps for a site."""
|
|
try:
|
|
response = self.service.sitemaps().list(siteUrl=site_url).execute()
|
|
except Exception as e:
|
|
logger.error(f"Failed to get sitemaps: {e}")
|
|
raise
|
|
|
|
sitemaps = []
|
|
for sm in response.get("sitemap", []):
|
|
sitemaps.append(SitemapInfo(
|
|
path=sm.get("path", ""),
|
|
last_submitted=sm.get("lastSubmitted"),
|
|
last_downloaded=sm.get("lastDownloaded"),
|
|
is_pending=sm.get("isPending", False),
|
|
is_sitemaps_index=sm.get("isSitemapsIndex", False),
|
|
warnings=sm.get("warnings", 0),
|
|
errors=sm.get("errors", 0),
|
|
))
|
|
|
|
return sitemaps
|
|
|
|
def submit_sitemap(self, site_url: str, sitemap_url: str) -> bool:
|
|
"""Submit a sitemap for indexing."""
|
|
try:
|
|
self.service.sitemaps().submit(
|
|
siteUrl=site_url,
|
|
feedpath=sitemap_url,
|
|
).execute()
|
|
logger.info(f"Submitted sitemap: {sitemap_url}")
|
|
return True
|
|
except Exception as e:
|
|
logger.error(f"Failed to submit sitemap: {e}")
|
|
return False
|
|
|
|
def inspect_url(self, site_url: str, inspection_url: str) -> dict:
|
|
"""
|
|
Inspect a URL's indexing status.
|
|
|
|
Note: This uses the URL Inspection API which may have different quotas.
|
|
"""
|
|
try:
|
|
response = self.service.urlInspection().index().inspect(
|
|
body={
|
|
"inspectionUrl": inspection_url,
|
|
"siteUrl": site_url,
|
|
}
|
|
).execute()
|
|
|
|
result = response.get("inspectionResult", {})
|
|
|
|
return {
|
|
"url": inspection_url,
|
|
"indexing_state": result.get("indexStatusResult", {}).get(
|
|
"coverageState", "Unknown"
|
|
),
|
|
"last_crawl_time": result.get("indexStatusResult", {}).get(
|
|
"lastCrawlTime"
|
|
),
|
|
"crawled_as": result.get("indexStatusResult", {}).get("crawledAs"),
|
|
"robots_txt_state": result.get("indexStatusResult", {}).get(
|
|
"robotsTxtState"
|
|
),
|
|
"mobile_usability": result.get("mobileUsabilityResult", {}).get(
|
|
"verdict", "Unknown"
|
|
),
|
|
"rich_results": result.get("richResultsResult", {}).get(
|
|
"verdict", "Unknown"
|
|
),
|
|
}
|
|
except Exception as e:
|
|
logger.error(f"Failed to inspect URL: {e}")
|
|
raise
|
|
|
|
def get_performance_summary(
|
|
self,
|
|
site_url: str,
|
|
days: int = 30,
|
|
) -> dict:
|
|
"""Get a summary of search performance."""
|
|
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
|
|
start_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
|
|
|
|
# Get overall stats
|
|
overall = self.get_search_analytics(
|
|
site_url=site_url,
|
|
dimensions=[],
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
)
|
|
|
|
# Get top queries
|
|
top_queries = self.get_top_queries(
|
|
site_url=site_url,
|
|
limit=10,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
)
|
|
|
|
# Get top pages
|
|
top_pages = self.get_top_pages(
|
|
site_url=site_url,
|
|
limit=10,
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
)
|
|
|
|
# Get by device
|
|
by_device = self.get_search_analytics(
|
|
site_url=site_url,
|
|
dimensions=["device"],
|
|
start_date=start_date,
|
|
end_date=end_date,
|
|
)
|
|
|
|
device_breakdown = {}
|
|
for row in by_device.rows:
|
|
device = row["keys"][0]
|
|
device_breakdown[device] = {
|
|
"clicks": row.get("clicks", 0),
|
|
"impressions": row.get("impressions", 0),
|
|
"ctr": row.get("ctr", 0),
|
|
"position": row.get("position", 0),
|
|
}
|
|
|
|
return {
|
|
"period": f"{start_date} to {end_date}",
|
|
"total_clicks": overall.total_clicks,
|
|
"total_impressions": overall.total_impressions,
|
|
"average_ctr": overall.average_ctr,
|
|
"average_position": overall.average_position,
|
|
"top_queries": top_queries,
|
|
"top_pages": top_pages,
|
|
"by_device": device_breakdown,
|
|
}
|
|
|
|
|
|
def main():
|
|
"""Test the Search Console client."""
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser(description="Google Search Console Client")
|
|
parser.add_argument("--site", "-s", required=True, help="Site URL")
|
|
parser.add_argument("--action", "-a", default="summary",
|
|
choices=["summary", "queries", "pages", "sitemaps", "inspect"],
|
|
help="Action to perform")
|
|
parser.add_argument("--url", help="URL to inspect")
|
|
parser.add_argument("--days", type=int, default=30, help="Days of data")
|
|
|
|
args = parser.parse_args()
|
|
|
|
client = SearchConsoleClient()
|
|
|
|
if args.action == "summary":
|
|
summary = client.get_performance_summary(args.site, args.days)
|
|
import json
|
|
print(json.dumps(summary, indent=2, default=str))
|
|
|
|
elif args.action == "queries":
|
|
queries = client.get_top_queries(args.site)
|
|
for q in queries[:20]:
|
|
print(f"{q['query']}: {q['clicks']} clicks, pos {q['position']:.1f}")
|
|
|
|
elif args.action == "pages":
|
|
pages = client.get_top_pages(args.site)
|
|
for p in pages[:20]:
|
|
print(f"{p['page']}: {p['clicks']} clicks, pos {p['position']:.1f}")
|
|
|
|
elif args.action == "sitemaps":
|
|
sitemaps = client.get_sitemaps(args.site)
|
|
for sm in sitemaps:
|
|
print(f"{sm.path}: errors={sm.errors}, warnings={sm.warnings}")
|
|
|
|
elif args.action == "inspect" and args.url:
|
|
result = client.inspect_url(args.site, args.url)
|
|
import json
|
|
print(json.dumps(result, indent=2))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|