Files
our-claude-skills/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/gsc_client.py
Andrew Yim eea49f9f8c refactor(skills): Restructure skills to dual-platform architecture
Major refactoring of ourdigital-custom-skills with new numbering system:

## Structure Changes
- Each skill now has code/ (Claude Code) and desktop/ (Claude Desktop) versions
- New progressive numbering: 01-09 General, 10-19 SEO, 20-29 GTM, 30-39 OurDigital, 40-49 Jamie

## Skill Reorganization
- 01-notion-organizer (from 02)
- 10-18: SEO tools split into focused skills (technical, on-page, local, schema, vitals, gsc, gateway)
- 20-21: GTM audit and manager
- 30-32: OurDigital designer, research, presentation
- 40-41: Jamie brand editor and audit

## New Files
- .claude/commands/: Slash command definitions for all skills
- CLAUDE.md: Updated with new skill structure documentation
- REFACTORING_PLAN.md: Migration documentation
- COMPATIBILITY_REPORT.md, SKILLS_COMPARISON.md: Analysis docs

## Removed
- Old skill directories (02-05, 10-14, 20-21 old numbering)
- Consolidated into new structure with _archive/ for reference

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-22 01:58:24 +09:00

410 lines
13 KiB
Python

"""
Google Search Console Client
============================
Purpose: Interact with Google Search Console API for SEO data
Python: 3.10+
Usage:
from gsc_client import SearchConsoleClient
client = SearchConsoleClient()
data = client.get_search_analytics("sc-domain:example.com")
"""
import logging
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from typing import Any
from google.oauth2 import service_account
from googleapiclient.discovery import build
from base_client import config
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
@dataclass
class SearchAnalyticsResult:
"""Search analytics query result."""
rows: list[dict] = field(default_factory=list)
total_clicks: int = 0
total_impressions: int = 0
average_ctr: float = 0.0
average_position: float = 0.0
@dataclass
class SitemapInfo:
"""Sitemap information from Search Console."""
path: str
last_submitted: str | None = None
last_downloaded: str | None = None
is_pending: bool = False
is_sitemaps_index: bool = False
warnings: int = 0
errors: int = 0
class SearchConsoleClient:
"""Client for Google Search Console API."""
SCOPES = ["https://www.googleapis.com/auth/webmasters.readonly"]
def __init__(self, credentials_path: str | None = None):
"""
Initialize Search Console client.
Args:
credentials_path: Path to service account JSON key
"""
self.credentials_path = credentials_path or config.google_credentials_path
self._service = None
@property
def service(self):
"""Get or create Search Console service."""
if self._service is None:
if not self.credentials_path:
raise ValueError(
"Google credentials not configured. "
"Set GOOGLE_APPLICATION_CREDENTIALS environment variable."
)
credentials = service_account.Credentials.from_service_account_file(
self.credentials_path,
scopes=self.SCOPES,
)
self._service = build("searchconsole", "v1", credentials=credentials)
return self._service
def list_sites(self) -> list[dict]:
"""List all sites accessible to the service account."""
response = self.service.sites().list().execute()
return response.get("siteEntry", [])
def get_search_analytics(
self,
site_url: str,
start_date: str | None = None,
end_date: str | None = None,
dimensions: list[str] | None = None,
row_limit: int = 25000,
filters: list[dict] | None = None,
) -> SearchAnalyticsResult:
"""
Get search analytics data.
Args:
site_url: Site URL (e.g., "sc-domain:example.com" or "https://example.com/")
start_date: Start date (YYYY-MM-DD), defaults to 30 days ago
end_date: End date (YYYY-MM-DD), defaults to yesterday
dimensions: List of dimensions (query, page, country, device, date)
row_limit: Maximum rows to return
filters: Dimension filters
Returns:
SearchAnalyticsResult with rows and summary stats
"""
# Default date range: last 30 days
if not end_date:
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
if not start_date:
start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
# Default dimensions
if dimensions is None:
dimensions = ["query", "page"]
request_body = {
"startDate": start_date,
"endDate": end_date,
"dimensions": dimensions,
"rowLimit": row_limit,
}
if filters:
request_body["dimensionFilterGroups"] = [{"filters": filters}]
try:
response = self.service.searchanalytics().query(
siteUrl=site_url,
body=request_body,
).execute()
except Exception as e:
logger.error(f"Failed to query search analytics: {e}")
raise
rows = response.get("rows", [])
# Calculate totals
total_clicks = sum(row.get("clicks", 0) for row in rows)
total_impressions = sum(row.get("impressions", 0) for row in rows)
total_ctr = sum(row.get("ctr", 0) for row in rows)
total_position = sum(row.get("position", 0) for row in rows)
avg_ctr = total_ctr / len(rows) if rows else 0
avg_position = total_position / len(rows) if rows else 0
return SearchAnalyticsResult(
rows=rows,
total_clicks=total_clicks,
total_impressions=total_impressions,
average_ctr=avg_ctr,
average_position=avg_position,
)
def get_top_queries(
self,
site_url: str,
limit: int = 100,
start_date: str | None = None,
end_date: str | None = None,
) -> list[dict]:
"""Get top search queries by clicks."""
result = self.get_search_analytics(
site_url=site_url,
dimensions=["query"],
row_limit=limit,
start_date=start_date,
end_date=end_date,
)
# Sort by clicks
sorted_rows = sorted(
result.rows,
key=lambda x: x.get("clicks", 0),
reverse=True,
)
return [
{
"query": row["keys"][0],
"clicks": row.get("clicks", 0),
"impressions": row.get("impressions", 0),
"ctr": row.get("ctr", 0),
"position": row.get("position", 0),
}
for row in sorted_rows[:limit]
]
def get_top_pages(
self,
site_url: str,
limit: int = 100,
start_date: str | None = None,
end_date: str | None = None,
) -> list[dict]:
"""Get top pages by clicks."""
result = self.get_search_analytics(
site_url=site_url,
dimensions=["page"],
row_limit=limit,
start_date=start_date,
end_date=end_date,
)
sorted_rows = sorted(
result.rows,
key=lambda x: x.get("clicks", 0),
reverse=True,
)
return [
{
"page": row["keys"][0],
"clicks": row.get("clicks", 0),
"impressions": row.get("impressions", 0),
"ctr": row.get("ctr", 0),
"position": row.get("position", 0),
}
for row in sorted_rows[:limit]
]
def get_sitemaps(self, site_url: str) -> list[SitemapInfo]:
"""Get list of sitemaps for a site."""
try:
response = self.service.sitemaps().list(siteUrl=site_url).execute()
except Exception as e:
logger.error(f"Failed to get sitemaps: {e}")
raise
sitemaps = []
for sm in response.get("sitemap", []):
sitemaps.append(SitemapInfo(
path=sm.get("path", ""),
last_submitted=sm.get("lastSubmitted"),
last_downloaded=sm.get("lastDownloaded"),
is_pending=sm.get("isPending", False),
is_sitemaps_index=sm.get("isSitemapsIndex", False),
warnings=sm.get("warnings", 0),
errors=sm.get("errors", 0),
))
return sitemaps
def submit_sitemap(self, site_url: str, sitemap_url: str) -> bool:
"""Submit a sitemap for indexing."""
try:
self.service.sitemaps().submit(
siteUrl=site_url,
feedpath=sitemap_url,
).execute()
logger.info(f"Submitted sitemap: {sitemap_url}")
return True
except Exception as e:
logger.error(f"Failed to submit sitemap: {e}")
return False
def inspect_url(self, site_url: str, inspection_url: str) -> dict:
"""
Inspect a URL's indexing status.
Note: This uses the URL Inspection API which may have different quotas.
"""
try:
response = self.service.urlInspection().index().inspect(
body={
"inspectionUrl": inspection_url,
"siteUrl": site_url,
}
).execute()
result = response.get("inspectionResult", {})
return {
"url": inspection_url,
"indexing_state": result.get("indexStatusResult", {}).get(
"coverageState", "Unknown"
),
"last_crawl_time": result.get("indexStatusResult", {}).get(
"lastCrawlTime"
),
"crawled_as": result.get("indexStatusResult", {}).get("crawledAs"),
"robots_txt_state": result.get("indexStatusResult", {}).get(
"robotsTxtState"
),
"mobile_usability": result.get("mobileUsabilityResult", {}).get(
"verdict", "Unknown"
),
"rich_results": result.get("richResultsResult", {}).get(
"verdict", "Unknown"
),
}
except Exception as e:
logger.error(f"Failed to inspect URL: {e}")
raise
def get_performance_summary(
self,
site_url: str,
days: int = 30,
) -> dict:
"""Get a summary of search performance."""
end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
start_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
# Get overall stats
overall = self.get_search_analytics(
site_url=site_url,
dimensions=[],
start_date=start_date,
end_date=end_date,
)
# Get top queries
top_queries = self.get_top_queries(
site_url=site_url,
limit=10,
start_date=start_date,
end_date=end_date,
)
# Get top pages
top_pages = self.get_top_pages(
site_url=site_url,
limit=10,
start_date=start_date,
end_date=end_date,
)
# Get by device
by_device = self.get_search_analytics(
site_url=site_url,
dimensions=["device"],
start_date=start_date,
end_date=end_date,
)
device_breakdown = {}
for row in by_device.rows:
device = row["keys"][0]
device_breakdown[device] = {
"clicks": row.get("clicks", 0),
"impressions": row.get("impressions", 0),
"ctr": row.get("ctr", 0),
"position": row.get("position", 0),
}
return {
"period": f"{start_date} to {end_date}",
"total_clicks": overall.total_clicks,
"total_impressions": overall.total_impressions,
"average_ctr": overall.average_ctr,
"average_position": overall.average_position,
"top_queries": top_queries,
"top_pages": top_pages,
"by_device": device_breakdown,
}
def main():
"""Test the Search Console client."""
import argparse
parser = argparse.ArgumentParser(description="Google Search Console Client")
parser.add_argument("--site", "-s", required=True, help="Site URL")
parser.add_argument("--action", "-a", default="summary",
choices=["summary", "queries", "pages", "sitemaps", "inspect"],
help="Action to perform")
parser.add_argument("--url", help="URL to inspect")
parser.add_argument("--days", type=int, default=30, help="Days of data")
args = parser.parse_args()
client = SearchConsoleClient()
if args.action == "summary":
summary = client.get_performance_summary(args.site, args.days)
import json
print(json.dumps(summary, indent=2, default=str))
elif args.action == "queries":
queries = client.get_top_queries(args.site)
for q in queries[:20]:
print(f"{q['query']}: {q['clicks']} clicks, pos {q['position']:.1f}")
elif args.action == "pages":
pages = client.get_top_pages(args.site)
for p in pages[:20]:
print(f"{p['page']}: {p['clicks']} clicks, pos {p['position']:.1f}")
elif args.action == "sitemaps":
sitemaps = client.get_sitemaps(args.site)
for sm in sitemaps:
print(f"{sm.path}: errors={sm.errors}, warnings={sm.warnings}")
elif args.action == "inspect" and args.url:
result = client.inspect_url(args.site, args.url)
import json
print(json.dumps(result, indent=2))
if __name__ == "__main__":
main()