""" Google Search Console Client ============================ Purpose: Interact with Google Search Console API for SEO data Python: 3.10+ Usage: from gsc_client import SearchConsoleClient client = SearchConsoleClient() data = client.get_search_analytics("sc-domain:example.com") """ import logging from dataclasses import dataclass, field from datetime import datetime, timedelta from typing import Any from google.oauth2 import service_account from googleapiclient.discovery import build from base_client import config logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", ) logger = logging.getLogger(__name__) @dataclass class SearchAnalyticsResult: """Search analytics query result.""" rows: list[dict] = field(default_factory=list) total_clicks: int = 0 total_impressions: int = 0 average_ctr: float = 0.0 average_position: float = 0.0 @dataclass class SitemapInfo: """Sitemap information from Search Console.""" path: str last_submitted: str | None = None last_downloaded: str | None = None is_pending: bool = False is_sitemaps_index: bool = False warnings: int = 0 errors: int = 0 class SearchConsoleClient: """Client for Google Search Console API.""" SCOPES = ["https://www.googleapis.com/auth/webmasters.readonly"] def __init__(self, credentials_path: str | None = None): """ Initialize Search Console client. Args: credentials_path: Path to service account JSON key """ self.credentials_path = credentials_path or config.google_credentials_path self._service = None @property def service(self): """Get or create Search Console service.""" if self._service is None: if not self.credentials_path: raise ValueError( "Google credentials not configured. " "Set GOOGLE_APPLICATION_CREDENTIALS environment variable." ) credentials = service_account.Credentials.from_service_account_file( self.credentials_path, scopes=self.SCOPES, ) self._service = build("searchconsole", "v1", credentials=credentials) return self._service def list_sites(self) -> list[dict]: """List all sites accessible to the service account.""" response = self.service.sites().list().execute() return response.get("siteEntry", []) def get_search_analytics( self, site_url: str, start_date: str | None = None, end_date: str | None = None, dimensions: list[str] | None = None, row_limit: int = 25000, filters: list[dict] | None = None, ) -> SearchAnalyticsResult: """ Get search analytics data. Args: site_url: Site URL (e.g., "sc-domain:example.com" or "https://example.com/") start_date: Start date (YYYY-MM-DD), defaults to 30 days ago end_date: End date (YYYY-MM-DD), defaults to yesterday dimensions: List of dimensions (query, page, country, device, date) row_limit: Maximum rows to return filters: Dimension filters Returns: SearchAnalyticsResult with rows and summary stats """ # Default date range: last 30 days if not end_date: end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") if not start_date: start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d") # Default dimensions if dimensions is None: dimensions = ["query", "page"] request_body = { "startDate": start_date, "endDate": end_date, "dimensions": dimensions, "rowLimit": row_limit, } if filters: request_body["dimensionFilterGroups"] = [{"filters": filters}] try: response = self.service.searchanalytics().query( siteUrl=site_url, body=request_body, ).execute() except Exception as e: logger.error(f"Failed to query search analytics: {e}") raise rows = response.get("rows", []) # Calculate totals total_clicks = sum(row.get("clicks", 0) for row in rows) total_impressions = sum(row.get("impressions", 0) for row in rows) total_ctr = sum(row.get("ctr", 0) for row in rows) total_position = sum(row.get("position", 0) for row in rows) avg_ctr = total_ctr / len(rows) if rows else 0 avg_position = total_position / len(rows) if rows else 0 return SearchAnalyticsResult( rows=rows, total_clicks=total_clicks, total_impressions=total_impressions, average_ctr=avg_ctr, average_position=avg_position, ) def get_top_queries( self, site_url: str, limit: int = 100, start_date: str | None = None, end_date: str | None = None, ) -> list[dict]: """Get top search queries by clicks.""" result = self.get_search_analytics( site_url=site_url, dimensions=["query"], row_limit=limit, start_date=start_date, end_date=end_date, ) # Sort by clicks sorted_rows = sorted( result.rows, key=lambda x: x.get("clicks", 0), reverse=True, ) return [ { "query": row["keys"][0], "clicks": row.get("clicks", 0), "impressions": row.get("impressions", 0), "ctr": row.get("ctr", 0), "position": row.get("position", 0), } for row in sorted_rows[:limit] ] def get_top_pages( self, site_url: str, limit: int = 100, start_date: str | None = None, end_date: str | None = None, ) -> list[dict]: """Get top pages by clicks.""" result = self.get_search_analytics( site_url=site_url, dimensions=["page"], row_limit=limit, start_date=start_date, end_date=end_date, ) sorted_rows = sorted( result.rows, key=lambda x: x.get("clicks", 0), reverse=True, ) return [ { "page": row["keys"][0], "clicks": row.get("clicks", 0), "impressions": row.get("impressions", 0), "ctr": row.get("ctr", 0), "position": row.get("position", 0), } for row in sorted_rows[:limit] ] def get_sitemaps(self, site_url: str) -> list[SitemapInfo]: """Get list of sitemaps for a site.""" try: response = self.service.sitemaps().list(siteUrl=site_url).execute() except Exception as e: logger.error(f"Failed to get sitemaps: {e}") raise sitemaps = [] for sm in response.get("sitemap", []): sitemaps.append(SitemapInfo( path=sm.get("path", ""), last_submitted=sm.get("lastSubmitted"), last_downloaded=sm.get("lastDownloaded"), is_pending=sm.get("isPending", False), is_sitemaps_index=sm.get("isSitemapsIndex", False), warnings=sm.get("warnings", 0), errors=sm.get("errors", 0), )) return sitemaps def submit_sitemap(self, site_url: str, sitemap_url: str) -> bool: """Submit a sitemap for indexing.""" try: self.service.sitemaps().submit( siteUrl=site_url, feedpath=sitemap_url, ).execute() logger.info(f"Submitted sitemap: {sitemap_url}") return True except Exception as e: logger.error(f"Failed to submit sitemap: {e}") return False def inspect_url(self, site_url: str, inspection_url: str) -> dict: """ Inspect a URL's indexing status. Note: This uses the URL Inspection API which may have different quotas. """ try: response = self.service.urlInspection().index().inspect( body={ "inspectionUrl": inspection_url, "siteUrl": site_url, } ).execute() result = response.get("inspectionResult", {}) return { "url": inspection_url, "indexing_state": result.get("indexStatusResult", {}).get( "coverageState", "Unknown" ), "last_crawl_time": result.get("indexStatusResult", {}).get( "lastCrawlTime" ), "crawled_as": result.get("indexStatusResult", {}).get("crawledAs"), "robots_txt_state": result.get("indexStatusResult", {}).get( "robotsTxtState" ), "mobile_usability": result.get("mobileUsabilityResult", {}).get( "verdict", "Unknown" ), "rich_results": result.get("richResultsResult", {}).get( "verdict", "Unknown" ), } except Exception as e: logger.error(f"Failed to inspect URL: {e}") raise def get_performance_summary( self, site_url: str, days: int = 30, ) -> dict: """Get a summary of search performance.""" end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") start_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d") # Get overall stats overall = self.get_search_analytics( site_url=site_url, dimensions=[], start_date=start_date, end_date=end_date, ) # Get top queries top_queries = self.get_top_queries( site_url=site_url, limit=10, start_date=start_date, end_date=end_date, ) # Get top pages top_pages = self.get_top_pages( site_url=site_url, limit=10, start_date=start_date, end_date=end_date, ) # Get by device by_device = self.get_search_analytics( site_url=site_url, dimensions=["device"], start_date=start_date, end_date=end_date, ) device_breakdown = {} for row in by_device.rows: device = row["keys"][0] device_breakdown[device] = { "clicks": row.get("clicks", 0), "impressions": row.get("impressions", 0), "ctr": row.get("ctr", 0), "position": row.get("position", 0), } return { "period": f"{start_date} to {end_date}", "total_clicks": overall.total_clicks, "total_impressions": overall.total_impressions, "average_ctr": overall.average_ctr, "average_position": overall.average_position, "top_queries": top_queries, "top_pages": top_pages, "by_device": device_breakdown, } def main(): """Test the Search Console client.""" import argparse parser = argparse.ArgumentParser(description="Google Search Console Client") parser.add_argument("--site", "-s", required=True, help="Site URL") parser.add_argument("--action", "-a", default="summary", choices=["summary", "queries", "pages", "sitemaps", "inspect"], help="Action to perform") parser.add_argument("--url", help="URL to inspect") parser.add_argument("--days", type=int, default=30, help="Days of data") args = parser.parse_args() client = SearchConsoleClient() if args.action == "summary": summary = client.get_performance_summary(args.site, args.days) import json print(json.dumps(summary, indent=2, default=str)) elif args.action == "queries": queries = client.get_top_queries(args.site) for q in queries[:20]: print(f"{q['query']}: {q['clicks']} clicks, pos {q['position']:.1f}") elif args.action == "pages": pages = client.get_top_pages(args.site) for p in pages[:20]: print(f"{p['page']}: {p['clicks']} clicks, pos {p['position']:.1f}") elif args.action == "sitemaps": sitemaps = client.get_sitemaps(args.site) for sm in sitemaps: print(f"{sm.path}: errors={sm.errors}, warnings={sm.warnings}") elif args.action == "inspect" and args.url: result = client.inspect_url(args.site, args.url) import json print(json.dumps(result, indent=2)) if __name__ == "__main__": main()