Files
our-claude-skills/custom-skills/25-seo-kpi-framework/code/scripts/kpi_aggregator.py
Andrew Yim a3ff965b87 Add SEO skills 19-28, 31-32 with full Python implementations
12 new skills: Keyword Strategy, SERP Analysis, Position Tracking,
Link Building, Content Strategy, E-Commerce SEO, KPI Framework,
International SEO, AI Visibility, Knowledge Graph, Competitor Intel,
and Crawl Budget. ~20K lines of Python across 25 domain scripts.
Updated skill 11 pipeline table and repo CLAUDE.md.
Enhanced skill 18 local SEO workflow from jamie.clinic audit.

Note: Skill 26 hreflang_validator.py pending (content filter block).

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 12:05:59 +09:00

759 lines
28 KiB
Python

"""
KPI Aggregator - Unified SEO KPI aggregation across all dimensions
==================================================================
Purpose: Aggregate KPIs from Ahrefs and other sources into a unified
dashboard with health scores, baselines, targets, and ROI.
Python: 3.10+
"""
import argparse
import asyncio
import json
import logging
import sys
from dataclasses import dataclass, field, asdict
from datetime import datetime
from pathlib import Path
from typing import Any
import aiohttp
from base_client import BaseAsyncClient, config
logger = logging.getLogger(__name__)
# ---------------------------------------------------------------------------
# Data classes
# ---------------------------------------------------------------------------
@dataclass
class KpiMetric:
"""Single KPI metric with trend and target information."""
name: str
value: float
previous_value: float | None = None
change_pct: float | None = None
trend: str = "stable" # up, down, stable
target_30d: float | None = None
target_60d: float | None = None
target_90d: float | None = None
def compute_trend(self) -> None:
"""Compute trend direction and change percentage."""
if self.previous_value is not None and self.previous_value != 0:
self.change_pct = round(
((self.value - self.previous_value) / abs(self.previous_value)) * 100, 2
)
if self.change_pct > 2.0:
self.trend = "up"
elif self.change_pct < -2.0:
self.trend = "down"
else:
self.trend = "stable"
@dataclass
class KpiDimension:
"""A dimension grouping multiple KPI metrics."""
name: str
metrics: list[KpiMetric] = field(default_factory=list)
weight: float = 0.0
score: float = 0.0
def compute_score(self) -> float:
"""Compute dimension score (0-100) based on metrics health."""
if not self.metrics:
self.score = 0.0
return self.score
metric_scores = []
for m in self.metrics:
if m.trend == "up":
metric_scores.append(80.0)
elif m.trend == "stable":
metric_scores.append(60.0)
else:
metric_scores.append(35.0)
# Boost score if value is positive and non-zero
if m.value and m.value > 0:
metric_scores[-1] = min(100.0, metric_scores[-1] + 10.0)
self.score = round(sum(metric_scores) / len(metric_scores), 1)
return self.score
@dataclass
class HealthScore:
"""Overall SEO health score."""
overall: float = 0.0
dimensions: dict[str, float] = field(default_factory=dict)
trend: str = "stable"
@dataclass
class RoiEstimate:
"""ROI estimation from Ahrefs traffic cost."""
traffic_value_usd: float = 0.0
traffic_value_change: float = 0.0
estimated_monthly_value: float = 0.0
@dataclass
class KpiResult:
"""Complete KPI aggregation result."""
url: str = ""
health_score: float = 0.0
health_trend: str = "stable"
kpis: dict[str, Any] = field(default_factory=dict)
targets: dict[str, Any] = field(default_factory=dict)
roi: RoiEstimate | None = None
baseline_comparison: dict[str, Any] | None = None
executive_summary: dict[str, Any] = field(default_factory=dict)
timestamp: str = ""
errors: list[str] = field(default_factory=list)
# ---------------------------------------------------------------------------
# Dimension weights
# ---------------------------------------------------------------------------
DIMENSION_WEIGHTS = {
"traffic": 0.25,
"rankings": 0.20,
"technical": 0.20,
"content": 0.15,
"links": 0.15,
"local": 0.05,
}
# ---------------------------------------------------------------------------
# KPI Aggregator
# ---------------------------------------------------------------------------
class KpiAggregator(BaseAsyncClient):
"""Aggregate SEO KPIs across all dimensions from Ahrefs data."""
AHREFS_BASE = "https://api.ahrefs.com/v3"
def __init__(self, api_token: str | None = None):
super().__init__(max_concurrent=3, requests_per_second=2.0)
self.api_token = api_token or config.get_required("AHREFS_API_TOKEN")
self.headers = {
"Authorization": f"Bearer {self.api_token}",
"Accept": "application/json",
}
# ----- Ahrefs API helpers -----
async def _ahrefs_get(
self, session: aiohttp.ClientSession, endpoint: str, params: dict
) -> dict:
"""Make an authenticated GET request to Ahrefs API."""
url = f"{self.AHREFS_BASE}/{endpoint}"
async with session.get(url, headers=self.headers, params=params) as resp:
if resp.status != 200:
text = await resp.text()
self.logger.warning(f"Ahrefs {endpoint} returned {resp.status}: {text}")
return {"error": f"HTTP {resp.status}", "detail": text}
return await resp.json()
# ----- Dimension collectors -----
async def get_traffic_kpis(
self, session: aiohttp.ClientSession, url: str
) -> KpiDimension:
"""Collect traffic KPIs via site-explorer-metrics."""
dim = KpiDimension(name="traffic", weight=DIMENSION_WEIGHTS["traffic"])
try:
data = await self._ahrefs_get(
session,
"site-explorer/metrics",
{"target": url, "mode": "domain"},
)
if "error" not in data:
metrics = data.get("metrics", data)
organic = metrics.get("organic", {})
organic_traffic = organic.get("traffic", 0)
traffic_value_raw = organic.get("cost", 0)
traffic_value_usd = traffic_value_raw / 100.0 if traffic_value_raw else 0.0
dim.metrics.append(
KpiMetric(name="organic_traffic", value=float(organic_traffic))
)
dim.metrics.append(
KpiMetric(name="traffic_value_usd", value=round(traffic_value_usd, 2))
)
else:
dim.metrics.append(KpiMetric(name="organic_traffic", value=0.0))
dim.metrics.append(KpiMetric(name="traffic_value_usd", value=0.0))
except Exception as exc:
self.logger.error(f"Traffic KPI error: {exc}")
dim.metrics.append(KpiMetric(name="organic_traffic", value=0.0))
dim.compute_score()
return dim
async def get_ranking_kpis(
self, session: aiohttp.ClientSession, url: str
) -> KpiDimension:
"""Collect ranking KPIs via site-explorer-metrics."""
dim = KpiDimension(name="rankings", weight=DIMENSION_WEIGHTS["rankings"])
try:
data = await self._ahrefs_get(
session,
"site-explorer/metrics",
{"target": url, "mode": "domain"},
)
if "error" not in data:
metrics = data.get("metrics", data)
organic = metrics.get("organic", {})
keywords_total = organic.get("keywords", 0)
# Estimate top10 as ~20% of total keywords
top10_estimate = int(keywords_total * 0.20)
# Visibility score heuristic: based on traffic relative to keywords
traffic = organic.get("traffic", 0)
visibility = min(100.0, (traffic / max(keywords_total, 1)) * 10)
dim.metrics.append(
KpiMetric(name="visibility_score", value=round(visibility, 1))
)
dim.metrics.append(
KpiMetric(name="top10_keywords", value=float(top10_estimate))
)
dim.metrics.append(
KpiMetric(name="total_keywords", value=float(keywords_total))
)
else:
dim.metrics.append(KpiMetric(name="visibility_score", value=0.0))
dim.metrics.append(KpiMetric(name="top10_keywords", value=0.0))
except Exception as exc:
self.logger.error(f"Ranking KPI error: {exc}")
dim.metrics.append(KpiMetric(name="visibility_score", value=0.0))
dim.compute_score()
return dim
async def get_link_kpis(
self, session: aiohttp.ClientSession, url: str
) -> KpiDimension:
"""Collect link KPIs via domain-rating and metrics."""
dim = KpiDimension(name="links", weight=DIMENSION_WEIGHTS["links"])
try:
# Domain rating
dr_data = await self._ahrefs_get(
session,
"site-explorer/domain-rating",
{"target": url},
)
domain_rating = 0.0
if "error" not in dr_data:
domain_rating = float(
dr_data.get("domain_rating", dr_data.get("domainRating", 0))
)
dim.metrics.append(
KpiMetric(name="domain_rating", value=round(domain_rating, 1))
)
# Referring domains from metrics
metrics_data = await self._ahrefs_get(
session,
"site-explorer/metrics",
{"target": url, "mode": "domain"},
)
ref_domains = 0
if "error" not in metrics_data:
metrics = metrics_data.get("metrics", metrics_data)
ref_domains = metrics.get("refdomains", 0)
dim.metrics.append(
KpiMetric(name="referring_domains", value=float(ref_domains))
)
except Exception as exc:
self.logger.error(f"Link KPI error: {exc}")
dim.metrics.append(KpiMetric(name="domain_rating", value=0.0))
dim.metrics.append(KpiMetric(name="referring_domains", value=0.0))
dim.compute_score()
return dim
async def get_technical_kpis(
self, session: aiohttp.ClientSession, url: str
) -> KpiDimension:
"""Collect technical KPIs (estimated from available data)."""
dim = KpiDimension(name="technical", weight=DIMENSION_WEIGHTS["technical"])
try:
data = await self._ahrefs_get(
session,
"site-explorer/metrics",
{"target": url, "mode": "domain"},
)
if "error" not in data:
metrics = data.get("metrics", data)
organic = metrics.get("organic", {})
pages_crawled = metrics.get("pages", organic.get("pages", 0))
# Heuristic: technical health score from available data
has_traffic = organic.get("traffic", 0) > 0
has_pages = pages_crawled > 0
tech_score = 50.0
if has_traffic:
tech_score += 25.0
if has_pages:
tech_score += 25.0
dim.metrics.append(
KpiMetric(name="technical_health_score", value=round(tech_score, 1))
)
dim.metrics.append(
KpiMetric(name="pages_crawled", value=float(pages_crawled))
)
else:
dim.metrics.append(KpiMetric(name="technical_health_score", value=50.0))
dim.metrics.append(KpiMetric(name="pages_crawled", value=0.0))
except Exception as exc:
self.logger.error(f"Technical KPI error: {exc}")
dim.metrics.append(KpiMetric(name="technical_health_score", value=50.0))
dim.compute_score()
return dim
async def get_content_kpis(
self, session: aiohttp.ClientSession, url: str
) -> KpiDimension:
"""Collect content KPIs from available metrics."""
dim = KpiDimension(name="content", weight=DIMENSION_WEIGHTS["content"])
try:
data = await self._ahrefs_get(
session,
"site-explorer/metrics",
{"target": url, "mode": "domain"},
)
if "error" not in data:
metrics = data.get("metrics", data)
organic = metrics.get("organic", {})
pages = metrics.get("pages", organic.get("pages", 0))
keywords = organic.get("keywords", 0)
# Content freshness heuristic
freshness = min(100.0, (keywords / max(pages, 1)) * 5) if pages else 0.0
dim.metrics.append(
KpiMetric(name="indexed_pages", value=float(pages))
)
dim.metrics.append(
KpiMetric(name="content_freshness_score", value=round(freshness, 1))
)
dim.metrics.append(
KpiMetric(name="keywords_per_page", value=round(keywords / max(pages, 1), 2))
)
else:
dim.metrics.append(KpiMetric(name="indexed_pages", value=0.0))
dim.metrics.append(KpiMetric(name="content_freshness_score", value=0.0))
except Exception as exc:
self.logger.error(f"Content KPI error: {exc}")
dim.metrics.append(KpiMetric(name="indexed_pages", value=0.0))
dim.compute_score()
return dim
async def get_local_kpis(self, url: str) -> KpiDimension:
"""Placeholder for local KPIs (requires external data)."""
dim = KpiDimension(name="local", weight=DIMENSION_WEIGHTS["local"])
dim.metrics.append(KpiMetric(name="gbp_visibility", value=0.0))
dim.metrics.append(KpiMetric(name="review_score", value=0.0))
dim.metrics.append(KpiMetric(name="citation_accuracy", value=0.0))
dim.compute_score()
return dim
# ----- Health score -----
def calculate_health_score(self, dimensions: list[KpiDimension]) -> HealthScore:
"""Calculate weighted health score across all dimensions."""
health = HealthScore()
total_weight = 0.0
weighted_sum = 0.0
for dim in dimensions:
dim.compute_score()
health.dimensions[dim.name] = dim.score
weighted_sum += dim.score * dim.weight
total_weight += dim.weight
if total_weight > 0:
health.overall = round(weighted_sum / total_weight, 1)
else:
health.overall = 0.0
# Determine trend from dimension trends
up_count = sum(
1 for d in dimensions
for m in d.metrics if m.trend == "up"
)
down_count = sum(
1 for d in dimensions
for m in d.metrics if m.trend == "down"
)
if up_count > down_count:
health.trend = "improving"
elif down_count > up_count:
health.trend = "declining"
else:
health.trend = "stable"
return health
# ----- Targets -----
def set_targets(self, dimensions: list[KpiDimension]) -> dict[str, Any]:
"""Calculate 30/60/90 day targets (5%/10%/20% improvement)."""
targets = {"30_day": {}, "60_day": {}, "90_day": {}}
growth_rates = {"30_day": 0.05, "60_day": 0.10, "90_day": 0.20}
for dim in dimensions:
for metric in dim.metrics:
if metric.value and metric.value > 0:
for period, rate in growth_rates.items():
key = f"{dim.name}.{metric.name}"
# For metrics where lower is better (e.g. bounce rate),
# improvement means decrease
if metric.name in ("bounce_rate", "crawl_errors", "thin_content_ratio"):
target_val = metric.value * (1 - rate)
else:
target_val = metric.value * (1 + rate)
targets[period][key] = round(target_val, 2)
metric.target_30d = targets["30_day"].get(f"{dim.name}.{metric.name}")
metric.target_60d = targets["60_day"].get(f"{dim.name}.{metric.name}")
metric.target_90d = targets["90_day"].get(f"{dim.name}.{metric.name}")
return targets
# ----- ROI estimation -----
def estimate_roi(self, traffic_dim: KpiDimension) -> RoiEstimate:
"""Estimate ROI from Ahrefs traffic cost data."""
roi = RoiEstimate()
for metric in traffic_dim.metrics:
if metric.name == "traffic_value_usd":
roi.traffic_value_usd = metric.value
roi.estimated_monthly_value = metric.value
if metric.previous_value is not None:
roi.traffic_value_change = round(
metric.value - metric.previous_value, 2
)
return roi
# ----- Baseline comparison -----
def compare_baseline(
self, current: list[KpiDimension], baseline: dict[str, Any]
) -> dict[str, Any]:
"""Compare current KPIs against a stored baseline."""
comparison = {}
baseline_kpis = baseline.get("kpis", {})
for dim in current:
dim_baseline = baseline_kpis.get(dim.name, {})
dim_comparison = {}
for metric in dim.metrics:
baseline_val = None
if isinstance(dim_baseline, dict):
baseline_val = dim_baseline.get(metric.name)
if baseline_val is not None:
metric.previous_value = float(baseline_val)
metric.compute_trend()
dim_comparison[metric.name] = {
"current": metric.value,
"baseline": baseline_val,
"change_pct": metric.change_pct,
"trend": metric.trend,
}
else:
dim_comparison[metric.name] = {
"current": metric.value,
"baseline": None,
"change_pct": None,
"trend": "no_baseline",
}
comparison[dim.name] = dim_comparison
return comparison
# ----- Executive summary -----
def generate_executive_summary(
self, dimensions: list[KpiDimension], health: HealthScore
) -> dict[str, Any]:
"""Generate executive summary with wins, concerns, recommendations."""
wins = []
concerns = []
recommendations = []
for dim in dimensions:
for metric in dim.metrics:
if metric.trend == "up" and metric.change_pct and metric.change_pct > 5:
wins.append(
f"{dim.name}/{metric.name}: +{metric.change_pct}% improvement"
)
elif metric.trend == "down" and metric.change_pct and metric.change_pct < -5:
concerns.append(
f"{dim.name}/{metric.name}: {metric.change_pct}% decline"
)
# Generate recommendations based on dimension scores
for dim in dimensions:
if dim.score < 50:
recommendations.append(
f"Priority: Improve {dim.name} dimension (score: {dim.score}/100)"
)
elif dim.score < 70:
recommendations.append(
f"Monitor: {dim.name} dimension needs attention (score: {dim.score}/100)"
)
if not wins:
wins.append("No significant improvements detected in this period")
if not concerns:
concerns.append("No significant declines detected in this period")
if not recommendations:
recommendations.append("All dimensions performing well - maintain current strategy")
return {
"health_score": health.overall,
"health_trend": health.trend,
"top_wins": wins[:5],
"top_concerns": concerns[:5],
"recommendations": recommendations[:5],
}
# ----- Main orchestration -----
async def aggregate(
self,
url: str,
include_roi: bool = False,
baseline_path: str | None = None,
set_baseline: bool = False,
) -> KpiResult:
"""Orchestrate full KPI aggregation across all dimensions."""
result = KpiResult(url=url, timestamp=datetime.now().isoformat())
dimensions: list[KpiDimension] = []
async with aiohttp.ClientSession() as session:
# Collect all dimensions concurrently
tasks = [
self.get_traffic_kpis(session, url),
self.get_ranking_kpis(session, url),
self.get_link_kpis(session, url),
self.get_technical_kpis(session, url),
self.get_content_kpis(session, url),
]
gathered = await asyncio.gather(*tasks, return_exceptions=True)
for item in gathered:
if isinstance(item, Exception):
result.errors.append(str(item))
self.logger.error(f"Dimension error: {item}")
else:
dimensions.append(item)
# Local KPIs (no API call needed)
local_dim = await self.get_local_kpis(url)
dimensions.append(local_dim)
# Load baseline if provided
if baseline_path:
try:
baseline_data = json.loads(Path(baseline_path).read_text())
result.baseline_comparison = self.compare_baseline(dimensions, baseline_data)
except Exception as exc:
result.errors.append(f"Baseline load error: {exc}")
# Calculate health score
health = self.calculate_health_score(dimensions)
result.health_score = health.overall
result.health_trend = health.trend
# Build KPI dictionary
for dim in dimensions:
result.kpis[dim.name] = {
"score": dim.score,
"weight": dim.weight,
"metrics": {m.name: asdict(m) for m in dim.metrics},
}
# Set targets
targets = self.set_targets(dimensions)
result.targets = targets
# ROI estimation
if include_roi:
traffic_dim = next((d for d in dimensions if d.name == "traffic"), None)
if traffic_dim:
roi = self.estimate_roi(traffic_dim)
result.roi = roi
# Executive summary
result.executive_summary = self.generate_executive_summary(dimensions, health)
# Save baseline if requested
if set_baseline:
baseline_out = {
"url": url,
"timestamp": result.timestamp,
"kpis": {},
}
for dim in dimensions:
baseline_out["kpis"][dim.name] = {
m.name: m.value for m in dim.metrics
}
baseline_file = f"baseline_{url.replace('https://', '').replace('/', '_')}.json"
Path(baseline_file).write_text(json.dumps(baseline_out, indent=2))
self.logger.info(f"Baseline saved to {baseline_file}")
return result
# ---------------------------------------------------------------------------
# Output formatting
# ---------------------------------------------------------------------------
def format_text_report(result: KpiResult) -> str:
"""Format KPI result as human-readable text report."""
lines = []
lines.append("=" * 60)
lines.append(f"SEO KPI Dashboard: {result.url}")
lines.append(f"Timestamp: {result.timestamp}")
lines.append("=" * 60)
lines.append("")
# Health score
lines.append(f"Overall Health Score: {result.health_score}/100 ({result.health_trend})")
lines.append("-" * 40)
# Dimensions
for dim_name, dim_data in result.kpis.items():
lines.append(f"\n[{dim_name.upper()}] Score: {dim_data['score']}/100 (weight: {dim_data['weight']})")
metrics = dim_data.get("metrics", {})
for m_name, m_data in metrics.items():
trend_arrow = {"up": "^", "down": "v", "stable": "=", "no_baseline": "?"}.get(
m_data.get("trend", "stable"), "="
)
val = m_data.get("value", 0)
change = m_data.get("change_pct")
change_str = f" ({change:+.1f}%)" if change is not None else ""
lines.append(f" {trend_arrow} {m_name}: {val}{change_str}")
# Targets
if result.targets:
lines.append("\n" + "-" * 40)
lines.append("TARGETS")
for period, targets in result.targets.items():
if targets:
lines.append(f"\n {period}:")
for key, val in list(targets.items())[:10]:
lines.append(f" {key}: {val}")
# ROI
if result.roi:
lines.append("\n" + "-" * 40)
lines.append("ROI ESTIMATE")
lines.append(f" Traffic Value (USD): ${result.roi.traffic_value_usd:,.2f}")
lines.append(f" Monthly Value: ${result.roi.estimated_monthly_value:,.2f}")
lines.append(f" Value Change: ${result.roi.traffic_value_change:,.2f}")
# Executive summary
if result.executive_summary:
lines.append("\n" + "-" * 40)
lines.append("EXECUTIVE SUMMARY")
lines.append(f" Health: {result.executive_summary.get('health_score', 0)}/100")
lines.append(f" Trend: {result.executive_summary.get('health_trend', 'stable')}")
lines.append("\n Top Wins:")
for win in result.executive_summary.get("top_wins", []):
lines.append(f" + {win}")
lines.append("\n Top Concerns:")
for concern in result.executive_summary.get("top_concerns", []):
lines.append(f" - {concern}")
lines.append("\n Recommendations:")
for rec in result.executive_summary.get("recommendations", []):
lines.append(f" > {rec}")
# Errors
if result.errors:
lines.append("\n" + "-" * 40)
lines.append("ERRORS:")
for err in result.errors:
lines.append(f" ! {err}")
lines.append("\n" + "=" * 60)
return "\n".join(lines)
def serialize_result(result: KpiResult) -> dict:
"""Serialize KpiResult to JSON-safe dictionary."""
data = {
"url": result.url,
"health_score": result.health_score,
"health_trend": result.health_trend,
"kpis": result.kpis,
"targets": result.targets,
"executive_summary": result.executive_summary,
"timestamp": result.timestamp,
"errors": result.errors,
}
if result.roi:
data["roi"] = asdict(result.roi)
if result.baseline_comparison:
data["baseline_comparison"] = result.baseline_comparison
return data
# ---------------------------------------------------------------------------
# CLI entry point
# ---------------------------------------------------------------------------
def parse_args() -> argparse.Namespace:
"""Parse command-line arguments."""
parser = argparse.ArgumentParser(
description="SEO KPI Aggregator - Unified metrics dashboard"
)
parser.add_argument(
"--url", required=True, help="Target URL or domain to analyze"
)
parser.add_argument(
"--set-baseline", action="store_true",
help="Save current KPIs as baseline file"
)
parser.add_argument(
"--baseline", type=str, default=None,
help="Path to baseline JSON file for comparison"
)
parser.add_argument(
"--roi", action="store_true",
help="Include ROI estimation from traffic cost"
)
parser.add_argument(
"--json", action="store_true",
help="Output results as JSON"
)
parser.add_argument(
"--output", type=str, default=None,
help="Save output to file path"
)
return parser.parse_args()
async def main() -> None:
"""Main entry point."""
args = parse_args()
aggregator = KpiAggregator()
result = await aggregator.aggregate(
url=args.url,
include_roi=args.roi,
baseline_path=args.baseline,
set_baseline=args.set_baseline,
)
if args.json:
output = json.dumps(serialize_result(result), indent=2, ensure_ascii=False)
else:
output = format_text_report(result)
if args.output:
Path(args.output).write_text(output, encoding="utf-8")
logger.info(f"Output saved to {args.output}")
else:
print(output)
aggregator.print_stats()
if __name__ == "__main__":
asyncio.run(main())