directory changes and restructuring

🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-22 02:01:41 +09:00
parent eea49f9f8c
commit 236be6c580
598 changed files with 0 additions and 0 deletions
--- a/custom-skills/16-seo-search-console/code/CLAUDE.md
+++ b/custom-skills/16-seo-search-console/code/CLAUDE.md
@@ -0,0 +1,122 @@
+# CLAUDE.md
+
+## Overview
+
+Google Search Console data retriever: search analytics (rankings, CTR, impressions), sitemap status, and index coverage.
+
+## Quick Start
+
+```bash
+pip install -r scripts/requirements.txt
+
+# Requires service account credentials
+# ~/.credential/ourdigital-seo-agent.json
+
+python scripts/gsc_client.py --site sc-domain:example.com --action summary
+```
+
+## Scripts
+
+| Script | Purpose |
+|--------|---------|
+| `gsc_client.py` | Search Console API client |
+| `base_client.py` | Shared utilities |
+
+## Configuration
+
+Service account setup:
+```bash
+# Credentials file location
+~/.credential/ourdigital-seo-agent.json
+
+# Add service account email to GSC property as user
+ourdigital-seo-agent@ourdigital-insights.iam.gserviceaccount.com
+```
+
+## Usage
+
+```bash
+# Performance summary (last 28 days)
+python scripts/gsc_client.py --site sc-domain:example.com --action summary
+
+# Query-level data
+python scripts/gsc_client.py --site sc-domain:example.com --action queries --limit 100
+
+# Page-level data
+python scripts/gsc_client.py --site sc-domain:example.com --action pages
+
+# Custom date range
+python scripts/gsc_client.py --site sc-domain:example.com --action queries \
+  --start 2024-01-01 --end 2024-01-31
+
+# Sitemap status
+python scripts/gsc_client.py --site sc-domain:example.com --action sitemaps
+
+# JSON output
+python scripts/gsc_client.py --site sc-domain:example.com --action summary --json
+```
+
+## Actions
+
+| Action | Description |
+|--------|-------------|
+| `summary` | Overview metrics (clicks, impressions, CTR, position) |
+| `queries` | Top search queries |
+| `pages` | Top pages by clicks |
+| `sitemaps` | Sitemap submission status |
+| `coverage` | Index coverage issues |
+
+## Output: Summary
+
+```json
+{
+  "site": "sc-domain:example.com",
+  "date_range": "2024-01-01 to 2024-01-28",
+  "totals": {
+    "clicks": 15000,
+    "impressions": 500000,
+    "ctr": 3.0,
+    "position": 12.5
+  }
+}
+```
+
+## Output: Queries
+
+```json
+{
+  "queries": [
+    {
+      "query": "keyword",
+      "clicks": 500,
+      "impressions": 10000,
+      "ctr": 5.0,
+      "position": 3.2
+    }
+  ]
+}
+```
+
+## Rate Limits
+
+| Limit | Value |
+|-------|-------|
+| Queries per minute | 1,200 |
+| Rows per request | 25,000 |
+
+## Site Property Formats
+
+| Format | Example |
+|--------|---------|
+| Domain property | `sc-domain:example.com` |
+| URL prefix | `https://www.example.com/` |
+
+## Dependencies
+
+```
+google-api-python-client>=2.100.0
+google-auth>=2.23.0
+python-dotenv>=1.0.0
+rich>=13.7.0
+pandas>=2.1.0
+```
--- a/custom-skills/16-seo-search-console/code/scripts/base_client.py
+++ b/custom-skills/16-seo-search-console/code/scripts/base_client.py
@@ -0,0 +1,207 @@
+"""
+Base Client - Shared async client utilities
+===========================================
+Purpose: Rate-limited async operations for API clients
+Python: 3.10+
+"""
+
+import asyncio
+import logging
+import os
+from asyncio import Semaphore
+from datetime import datetime
+from typing import Any, Callable, TypeVar
+
+from dotenv import load_dotenv
+from tenacity import (
+    retry,
+    stop_after_attempt,
+    wait_exponential,
+    retry_if_exception_type,
+)
+
+# Load environment variables
+load_dotenv()
+
+# Logging setup
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+
+T = TypeVar("T")
+
+
+class RateLimiter:
+    """Rate limiter using token bucket algorithm."""
+
+    def __init__(self, rate: float, per: float = 1.0):
+        """
+        Initialize rate limiter.
+
+        Args:
+            rate: Number of requests allowed
+            per: Time period in seconds (default: 1 second)
+        """
+        self.rate = rate
+        self.per = per
+        self.tokens = rate
+        self.last_update = datetime.now()
+        self._lock = asyncio.Lock()
+
+    async def acquire(self) -> None:
+        """Acquire a token, waiting if necessary."""
+        async with self._lock:
+            now = datetime.now()
+            elapsed = (now - self.last_update).total_seconds()
+            self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
+            self.last_update = now
+
+            if self.tokens < 1:
+                wait_time = (1 - self.tokens) * (self.per / self.rate)
+                await asyncio.sleep(wait_time)
+                self.tokens = 0
+            else:
+                self.tokens -= 1
+
+
+class BaseAsyncClient:
+    """Base class for async API clients with rate limiting."""
+
+    def __init__(
+        self,
+        max_concurrent: int = 5,
+        requests_per_second: float = 3.0,
+        logger: logging.Logger | None = None,
+    ):
+        """
+        Initialize base client.
+
+        Args:
+            max_concurrent: Maximum concurrent requests
+            requests_per_second: Rate limit
+            logger: Logger instance
+        """
+        self.semaphore = Semaphore(max_concurrent)
+        self.rate_limiter = RateLimiter(requests_per_second)
+        self.logger = logger or logging.getLogger(self.__class__.__name__)
+        self.stats = {
+            "requests": 0,
+            "success": 0,
+            "errors": 0,
+            "retries": 0,
+        }
+
+    @retry(
+        stop=stop_after_attempt(3),
+        wait=wait_exponential(multiplier=1, min=2, max=10),
+        retry=retry_if_exception_type(Exception),
+    )
+    async def _rate_limited_request(
+        self,
+        coro: Callable[[], Any],
+    ) -> Any:
+        """Execute a request with rate limiting and retry."""
+        async with self.semaphore:
+            await self.rate_limiter.acquire()
+            self.stats["requests"] += 1
+            try:
+                result = await coro()
+                self.stats["success"] += 1
+                return result
+            except Exception as e:
+                self.stats["errors"] += 1
+                self.logger.error(f"Request failed: {e}")
+                raise
+
+    async def batch_requests(
+        self,
+        requests: list[Callable[[], Any]],
+        desc: str = "Processing",
+    ) -> list[Any]:
+        """Execute multiple requests concurrently."""
+        try:
+            from tqdm.asyncio import tqdm
+            has_tqdm = True
+        except ImportError:
+            has_tqdm = False
+
+        async def execute(req: Callable) -> Any:
+            try:
+                return await self._rate_limited_request(req)
+            except Exception as e:
+                return {"error": str(e)}
+
+        tasks = [execute(req) for req in requests]
+
+        if has_tqdm:
+            results = []
+            for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
+                result = await coro
+                results.append(result)
+            return results
+        else:
+            return await asyncio.gather(*tasks, return_exceptions=True)
+
+    def print_stats(self) -> None:
+        """Print request statistics."""
+        self.logger.info("=" * 40)
+        self.logger.info("Request Statistics:")
+        self.logger.info(f"  Total Requests: {self.stats['requests']}")
+        self.logger.info(f"  Successful: {self.stats['success']}")
+        self.logger.info(f"  Errors: {self.stats['errors']}")
+        self.logger.info("=" * 40)
+
+
+class ConfigManager:
+    """Manage API configuration and credentials."""
+
+    def __init__(self):
+        load_dotenv()
+
+    @property
+    def google_credentials_path(self) -> str | None:
+        """Get Google service account credentials path."""
+        # Prefer SEO-specific credentials, fallback to general credentials
+        seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
+        if os.path.exists(seo_creds):
+            return seo_creds
+        return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
+
+    @property
+    def pagespeed_api_key(self) -> str | None:
+        """Get PageSpeed Insights API key."""
+        return os.getenv("PAGESPEED_API_KEY")
+
+    @property
+    def custom_search_api_key(self) -> str | None:
+        """Get Custom Search API key."""
+        return os.getenv("CUSTOM_SEARCH_API_KEY")
+
+    @property
+    def custom_search_engine_id(self) -> str | None:
+        """Get Custom Search Engine ID."""
+        return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
+
+    @property
+    def notion_token(self) -> str | None:
+        """Get Notion API token."""
+        return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
+
+    def validate_google_credentials(self) -> bool:
+        """Validate Google credentials are configured."""
+        creds_path = self.google_credentials_path
+        if not creds_path:
+            return False
+        return os.path.exists(creds_path)
+
+    def get_required(self, key: str) -> str:
+        """Get required environment variable or raise error."""
+        value = os.getenv(key)
+        if not value:
+            raise ValueError(f"Missing required environment variable: {key}")
+        return value
+
+
+# Singleton config instance
+config = ConfigManager()
--- a/custom-skills/16-seo-search-console/code/scripts/gsc_client.py
+++ b/custom-skills/16-seo-search-console/code/scripts/gsc_client.py
@@ -0,0 +1,409 @@
+"""
+Google Search Console Client
+============================
+Purpose: Interact with Google Search Console API for SEO data
+Python: 3.10+
+Usage:
+    from gsc_client import SearchConsoleClient
+    client = SearchConsoleClient()
+    data = client.get_search_analytics("sc-domain:example.com")
+"""
+
+import logging
+from dataclasses import dataclass, field
+from datetime import datetime, timedelta
+from typing import Any
+
+from google.oauth2 import service_account
+from googleapiclient.discovery import build
+
+from base_client import config
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(levelname)s - %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SearchAnalyticsResult:
+    """Search analytics query result."""
+
+    rows: list[dict] = field(default_factory=list)
+    total_clicks: int = 0
+    total_impressions: int = 0
+    average_ctr: float = 0.0
+    average_position: float = 0.0
+
+
+@dataclass
+class SitemapInfo:
+    """Sitemap information from Search Console."""
+
+    path: str
+    last_submitted: str | None = None
+    last_downloaded: str | None = None
+    is_pending: bool = False
+    is_sitemaps_index: bool = False
+    warnings: int = 0
+    errors: int = 0
+
+
+class SearchConsoleClient:
+    """Client for Google Search Console API."""
+
+    SCOPES = ["https://www.googleapis.com/auth/webmasters.readonly"]
+
+    def __init__(self, credentials_path: str | None = None):
+        """
+        Initialize Search Console client.
+
+        Args:
+            credentials_path: Path to service account JSON key
+        """
+        self.credentials_path = credentials_path or config.google_credentials_path
+        self._service = None
+
+    @property
+    def service(self):
+        """Get or create Search Console service."""
+        if self._service is None:
+            if not self.credentials_path:
+                raise ValueError(
+                    "Google credentials not configured. "
+                    "Set GOOGLE_APPLICATION_CREDENTIALS environment variable."
+                )
+
+            credentials = service_account.Credentials.from_service_account_file(
+                self.credentials_path,
+                scopes=self.SCOPES,
+            )
+            self._service = build("searchconsole", "v1", credentials=credentials)
+
+        return self._service
+
+    def list_sites(self) -> list[dict]:
+        """List all sites accessible to the service account."""
+        response = self.service.sites().list().execute()
+        return response.get("siteEntry", [])
+
+    def get_search_analytics(
+        self,
+        site_url: str,
+        start_date: str | None = None,
+        end_date: str | None = None,
+        dimensions: list[str] | None = None,
+        row_limit: int = 25000,
+        filters: list[dict] | None = None,
+    ) -> SearchAnalyticsResult:
+        """
+        Get search analytics data.
+
+        Args:
+            site_url: Site URL (e.g., "sc-domain:example.com" or "https://example.com/")
+            start_date: Start date (YYYY-MM-DD), defaults to 30 days ago
+            end_date: End date (YYYY-MM-DD), defaults to yesterday
+            dimensions: List of dimensions (query, page, country, device, date)
+            row_limit: Maximum rows to return
+            filters: Dimension filters
+
+        Returns:
+            SearchAnalyticsResult with rows and summary stats
+        """
+        # Default date range: last 30 days
+        if not end_date:
+            end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
+        if not start_date:
+            start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d")
+
+        # Default dimensions
+        if dimensions is None:
+            dimensions = ["query", "page"]
+
+        request_body = {
+            "startDate": start_date,
+            "endDate": end_date,
+            "dimensions": dimensions,
+            "rowLimit": row_limit,
+        }
+
+        if filters:
+            request_body["dimensionFilterGroups"] = [{"filters": filters}]
+
+        try:
+            response = self.service.searchanalytics().query(
+                siteUrl=site_url,
+                body=request_body,
+            ).execute()
+        except Exception as e:
+            logger.error(f"Failed to query search analytics: {e}")
+            raise
+
+        rows = response.get("rows", [])
+
+        # Calculate totals
+        total_clicks = sum(row.get("clicks", 0) for row in rows)
+        total_impressions = sum(row.get("impressions", 0) for row in rows)
+        total_ctr = sum(row.get("ctr", 0) for row in rows)
+        total_position = sum(row.get("position", 0) for row in rows)
+
+        avg_ctr = total_ctr / len(rows) if rows else 0
+        avg_position = total_position / len(rows) if rows else 0
+
+        return SearchAnalyticsResult(
+            rows=rows,
+            total_clicks=total_clicks,
+            total_impressions=total_impressions,
+            average_ctr=avg_ctr,
+            average_position=avg_position,
+        )
+
+    def get_top_queries(
+        self,
+        site_url: str,
+        limit: int = 100,
+        start_date: str | None = None,
+        end_date: str | None = None,
+    ) -> list[dict]:
+        """Get top search queries by clicks."""
+        result = self.get_search_analytics(
+            site_url=site_url,
+            dimensions=["query"],
+            row_limit=limit,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        # Sort by clicks
+        sorted_rows = sorted(
+            result.rows,
+            key=lambda x: x.get("clicks", 0),
+            reverse=True,
+        )
+
+        return [
+            {
+                "query": row["keys"][0],
+                "clicks": row.get("clicks", 0),
+                "impressions": row.get("impressions", 0),
+                "ctr": row.get("ctr", 0),
+                "position": row.get("position", 0),
+            }
+            for row in sorted_rows[:limit]
+        ]
+
+    def get_top_pages(
+        self,
+        site_url: str,
+        limit: int = 100,
+        start_date: str | None = None,
+        end_date: str | None = None,
+    ) -> list[dict]:
+        """Get top pages by clicks."""
+        result = self.get_search_analytics(
+            site_url=site_url,
+            dimensions=["page"],
+            row_limit=limit,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        sorted_rows = sorted(
+            result.rows,
+            key=lambda x: x.get("clicks", 0),
+            reverse=True,
+        )
+
+        return [
+            {
+                "page": row["keys"][0],
+                "clicks": row.get("clicks", 0),
+                "impressions": row.get("impressions", 0),
+                "ctr": row.get("ctr", 0),
+                "position": row.get("position", 0),
+            }
+            for row in sorted_rows[:limit]
+        ]
+
+    def get_sitemaps(self, site_url: str) -> list[SitemapInfo]:
+        """Get list of sitemaps for a site."""
+        try:
+            response = self.service.sitemaps().list(siteUrl=site_url).execute()
+        except Exception as e:
+            logger.error(f"Failed to get sitemaps: {e}")
+            raise
+
+        sitemaps = []
+        for sm in response.get("sitemap", []):
+            sitemaps.append(SitemapInfo(
+                path=sm.get("path", ""),
+                last_submitted=sm.get("lastSubmitted"),
+                last_downloaded=sm.get("lastDownloaded"),
+                is_pending=sm.get("isPending", False),
+                is_sitemaps_index=sm.get("isSitemapsIndex", False),
+                warnings=sm.get("warnings", 0),
+                errors=sm.get("errors", 0),
+            ))
+
+        return sitemaps
+
+    def submit_sitemap(self, site_url: str, sitemap_url: str) -> bool:
+        """Submit a sitemap for indexing."""
+        try:
+            self.service.sitemaps().submit(
+                siteUrl=site_url,
+                feedpath=sitemap_url,
+            ).execute()
+            logger.info(f"Submitted sitemap: {sitemap_url}")
+            return True
+        except Exception as e:
+            logger.error(f"Failed to submit sitemap: {e}")
+            return False
+
+    def inspect_url(self, site_url: str, inspection_url: str) -> dict:
+        """
+        Inspect a URL's indexing status.
+
+        Note: This uses the URL Inspection API which may have different quotas.
+        """
+        try:
+            response = self.service.urlInspection().index().inspect(
+                body={
+                    "inspectionUrl": inspection_url,
+                    "siteUrl": site_url,
+                }
+            ).execute()
+
+            result = response.get("inspectionResult", {})
+
+            return {
+                "url": inspection_url,
+                "indexing_state": result.get("indexStatusResult", {}).get(
+                    "coverageState", "Unknown"
+                ),
+                "last_crawl_time": result.get("indexStatusResult", {}).get(
+                    "lastCrawlTime"
+                ),
+                "crawled_as": result.get("indexStatusResult", {}).get("crawledAs"),
+                "robots_txt_state": result.get("indexStatusResult", {}).get(
+                    "robotsTxtState"
+                ),
+                "mobile_usability": result.get("mobileUsabilityResult", {}).get(
+                    "verdict", "Unknown"
+                ),
+                "rich_results": result.get("richResultsResult", {}).get(
+                    "verdict", "Unknown"
+                ),
+            }
+        except Exception as e:
+            logger.error(f"Failed to inspect URL: {e}")
+            raise
+
+    def get_performance_summary(
+        self,
+        site_url: str,
+        days: int = 30,
+    ) -> dict:
+        """Get a summary of search performance."""
+        end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d")
+        start_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d")
+
+        # Get overall stats
+        overall = self.get_search_analytics(
+            site_url=site_url,
+            dimensions=[],
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        # Get top queries
+        top_queries = self.get_top_queries(
+            site_url=site_url,
+            limit=10,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        # Get top pages
+        top_pages = self.get_top_pages(
+            site_url=site_url,
+            limit=10,
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        # Get by device
+        by_device = self.get_search_analytics(
+            site_url=site_url,
+            dimensions=["device"],
+            start_date=start_date,
+            end_date=end_date,
+        )
+
+        device_breakdown = {}
+        for row in by_device.rows:
+            device = row["keys"][0]
+            device_breakdown[device] = {
+                "clicks": row.get("clicks", 0),
+                "impressions": row.get("impressions", 0),
+                "ctr": row.get("ctr", 0),
+                "position": row.get("position", 0),
+            }
+
+        return {
+            "period": f"{start_date} to {end_date}",
+            "total_clicks": overall.total_clicks,
+            "total_impressions": overall.total_impressions,
+            "average_ctr": overall.average_ctr,
+            "average_position": overall.average_position,
+            "top_queries": top_queries,
+            "top_pages": top_pages,
+            "by_device": device_breakdown,
+        }
+
+
+def main():
+    """Test the Search Console client."""
+    import argparse
+
+    parser = argparse.ArgumentParser(description="Google Search Console Client")
+    parser.add_argument("--site", "-s", required=True, help="Site URL")
+    parser.add_argument("--action", "-a", default="summary",
+                       choices=["summary", "queries", "pages", "sitemaps", "inspect"],
+                       help="Action to perform")
+    parser.add_argument("--url", help="URL to inspect")
+    parser.add_argument("--days", type=int, default=30, help="Days of data")
+
+    args = parser.parse_args()
+
+    client = SearchConsoleClient()
+
+    if args.action == "summary":
+        summary = client.get_performance_summary(args.site, args.days)
+        import json
+        print(json.dumps(summary, indent=2, default=str))
+
+    elif args.action == "queries":
+        queries = client.get_top_queries(args.site)
+        for q in queries[:20]:
+            print(f"{q['query']}: {q['clicks']} clicks, pos {q['position']:.1f}")
+
+    elif args.action == "pages":
+        pages = client.get_top_pages(args.site)
+        for p in pages[:20]:
+            print(f"{p['page']}: {p['clicks']} clicks, pos {p['position']:.1f}")
+
+    elif args.action == "sitemaps":
+        sitemaps = client.get_sitemaps(args.site)
+        for sm in sitemaps:
+            print(f"{sm.path}: errors={sm.errors}, warnings={sm.warnings}")
+
+    elif args.action == "inspect" and args.url:
+        result = client.inspect_url(args.site, args.url)
+        import json
+        print(json.dumps(result, indent=2))
+
+
+if __name__ == "__main__":
+    main()
--- a/custom-skills/16-seo-search-console/code/scripts/requirements.txt
+++ b/custom-skills/16-seo-search-console/code/scripts/requirements.txt
@@ -0,0 +1,7 @@
+# 16-seo-search-console dependencies
+google-api-python-client>=2.100.0
+google-auth>=2.23.0
+pandas>=2.1.0
+python-dotenv>=1.0.0
+rich>=13.7.0
+typer>=0.9.0