""" KPI Aggregator - Unified SEO KPI aggregation across all dimensions ================================================================== Purpose: Aggregate KPIs from Ahrefs and other sources into a unified dashboard with health scores, baselines, targets, and ROI. Python: 3.10+ """ import argparse import asyncio import json import logging import sys from dataclasses import dataclass, field, asdict from datetime import datetime from pathlib import Path from typing import Any import aiohttp from base_client import BaseAsyncClient, config logger = logging.getLogger(__name__) # --------------------------------------------------------------------------- # Data classes # --------------------------------------------------------------------------- @dataclass class KpiMetric: """Single KPI metric with trend and target information.""" name: str value: float previous_value: float | None = None change_pct: float | None = None trend: str = "stable" # up, down, stable target_30d: float | None = None target_60d: float | None = None target_90d: float | None = None def compute_trend(self) -> None: """Compute trend direction and change percentage.""" if self.previous_value is not None and self.previous_value != 0: self.change_pct = round( ((self.value - self.previous_value) / abs(self.previous_value)) * 100, 2 ) if self.change_pct > 2.0: self.trend = "up" elif self.change_pct < -2.0: self.trend = "down" else: self.trend = "stable" @dataclass class KpiDimension: """A dimension grouping multiple KPI metrics.""" name: str metrics: list[KpiMetric] = field(default_factory=list) weight: float = 0.0 score: float = 0.0 def compute_score(self) -> float: """Compute dimension score (0-100) based on metrics health.""" if not self.metrics: self.score = 0.0 return self.score metric_scores = [] for m in self.metrics: if m.trend == "up": metric_scores.append(80.0) elif m.trend == "stable": metric_scores.append(60.0) else: metric_scores.append(35.0) # Boost score if value is positive and non-zero if m.value and m.value > 0: metric_scores[-1] = min(100.0, metric_scores[-1] + 10.0) self.score = round(sum(metric_scores) / len(metric_scores), 1) return self.score @dataclass class HealthScore: """Overall SEO health score.""" overall: float = 0.0 dimensions: dict[str, float] = field(default_factory=dict) trend: str = "stable" @dataclass class RoiEstimate: """ROI estimation from Ahrefs traffic cost.""" traffic_value_usd: float = 0.0 traffic_value_change: float = 0.0 estimated_monthly_value: float = 0.0 @dataclass class KpiResult: """Complete KPI aggregation result.""" url: str = "" health_score: float = 0.0 health_trend: str = "stable" kpis: dict[str, Any] = field(default_factory=dict) targets: dict[str, Any] = field(default_factory=dict) roi: RoiEstimate | None = None baseline_comparison: dict[str, Any] | None = None executive_summary: dict[str, Any] = field(default_factory=dict) timestamp: str = "" errors: list[str] = field(default_factory=list) # --------------------------------------------------------------------------- # Dimension weights # --------------------------------------------------------------------------- DIMENSION_WEIGHTS = { "traffic": 0.25, "rankings": 0.20, "technical": 0.20, "content": 0.15, "links": 0.15, "local": 0.05, } # --------------------------------------------------------------------------- # KPI Aggregator # --------------------------------------------------------------------------- class KpiAggregator(BaseAsyncClient): """Aggregate SEO KPIs across all dimensions from Ahrefs data.""" AHREFS_BASE = "https://api.ahrefs.com/v3" def __init__(self, api_token: str | None = None): super().__init__(max_concurrent=3, requests_per_second=2.0) self.api_token = api_token or config.get_required("AHREFS_API_TOKEN") self.headers = { "Authorization": f"Bearer {self.api_token}", "Accept": "application/json", } # ----- Ahrefs API helpers ----- async def _ahrefs_get( self, session: aiohttp.ClientSession, endpoint: str, params: dict ) -> dict: """Make an authenticated GET request to Ahrefs API.""" url = f"{self.AHREFS_BASE}/{endpoint}" async with session.get(url, headers=self.headers, params=params) as resp: if resp.status != 200: text = await resp.text() self.logger.warning(f"Ahrefs {endpoint} returned {resp.status}: {text}") return {"error": f"HTTP {resp.status}", "detail": text} return await resp.json() # ----- Dimension collectors ----- async def get_traffic_kpis( self, session: aiohttp.ClientSession, url: str ) -> KpiDimension: """Collect traffic KPIs via site-explorer-metrics.""" dim = KpiDimension(name="traffic", weight=DIMENSION_WEIGHTS["traffic"]) try: data = await self._ahrefs_get( session, "site-explorer/metrics", {"target": url, "mode": "domain"}, ) if "error" not in data: metrics = data.get("metrics", data) organic = metrics.get("organic", {}) organic_traffic = organic.get("traffic", 0) traffic_value_raw = organic.get("cost", 0) traffic_value_usd = traffic_value_raw / 100.0 if traffic_value_raw else 0.0 dim.metrics.append( KpiMetric(name="organic_traffic", value=float(organic_traffic)) ) dim.metrics.append( KpiMetric(name="traffic_value_usd", value=round(traffic_value_usd, 2)) ) else: dim.metrics.append(KpiMetric(name="organic_traffic", value=0.0)) dim.metrics.append(KpiMetric(name="traffic_value_usd", value=0.0)) except Exception as exc: self.logger.error(f"Traffic KPI error: {exc}") dim.metrics.append(KpiMetric(name="organic_traffic", value=0.0)) dim.compute_score() return dim async def get_ranking_kpis( self, session: aiohttp.ClientSession, url: str ) -> KpiDimension: """Collect ranking KPIs via site-explorer-metrics.""" dim = KpiDimension(name="rankings", weight=DIMENSION_WEIGHTS["rankings"]) try: data = await self._ahrefs_get( session, "site-explorer/metrics", {"target": url, "mode": "domain"}, ) if "error" not in data: metrics = data.get("metrics", data) organic = metrics.get("organic", {}) keywords_total = organic.get("keywords", 0) # Estimate top10 as ~20% of total keywords top10_estimate = int(keywords_total * 0.20) # Visibility score heuristic: based on traffic relative to keywords traffic = organic.get("traffic", 0) visibility = min(100.0, (traffic / max(keywords_total, 1)) * 10) dim.metrics.append( KpiMetric(name="visibility_score", value=round(visibility, 1)) ) dim.metrics.append( KpiMetric(name="top10_keywords", value=float(top10_estimate)) ) dim.metrics.append( KpiMetric(name="total_keywords", value=float(keywords_total)) ) else: dim.metrics.append(KpiMetric(name="visibility_score", value=0.0)) dim.metrics.append(KpiMetric(name="top10_keywords", value=0.0)) except Exception as exc: self.logger.error(f"Ranking KPI error: {exc}") dim.metrics.append(KpiMetric(name="visibility_score", value=0.0)) dim.compute_score() return dim async def get_link_kpis( self, session: aiohttp.ClientSession, url: str ) -> KpiDimension: """Collect link KPIs via domain-rating and metrics.""" dim = KpiDimension(name="links", weight=DIMENSION_WEIGHTS["links"]) try: # Domain rating dr_data = await self._ahrefs_get( session, "site-explorer/domain-rating", {"target": url}, ) domain_rating = 0.0 if "error" not in dr_data: domain_rating = float( dr_data.get("domain_rating", dr_data.get("domainRating", 0)) ) dim.metrics.append( KpiMetric(name="domain_rating", value=round(domain_rating, 1)) ) # Referring domains from metrics metrics_data = await self._ahrefs_get( session, "site-explorer/metrics", {"target": url, "mode": "domain"}, ) ref_domains = 0 if "error" not in metrics_data: metrics = metrics_data.get("metrics", metrics_data) ref_domains = metrics.get("refdomains", 0) dim.metrics.append( KpiMetric(name="referring_domains", value=float(ref_domains)) ) except Exception as exc: self.logger.error(f"Link KPI error: {exc}") dim.metrics.append(KpiMetric(name="domain_rating", value=0.0)) dim.metrics.append(KpiMetric(name="referring_domains", value=0.0)) dim.compute_score() return dim async def get_technical_kpis( self, session: aiohttp.ClientSession, url: str ) -> KpiDimension: """Collect technical KPIs (estimated from available data).""" dim = KpiDimension(name="technical", weight=DIMENSION_WEIGHTS["technical"]) try: data = await self._ahrefs_get( session, "site-explorer/metrics", {"target": url, "mode": "domain"}, ) if "error" not in data: metrics = data.get("metrics", data) organic = metrics.get("organic", {}) pages_crawled = metrics.get("pages", organic.get("pages", 0)) # Heuristic: technical health score from available data has_traffic = organic.get("traffic", 0) > 0 has_pages = pages_crawled > 0 tech_score = 50.0 if has_traffic: tech_score += 25.0 if has_pages: tech_score += 25.0 dim.metrics.append( KpiMetric(name="technical_health_score", value=round(tech_score, 1)) ) dim.metrics.append( KpiMetric(name="pages_crawled", value=float(pages_crawled)) ) else: dim.metrics.append(KpiMetric(name="technical_health_score", value=50.0)) dim.metrics.append(KpiMetric(name="pages_crawled", value=0.0)) except Exception as exc: self.logger.error(f"Technical KPI error: {exc}") dim.metrics.append(KpiMetric(name="technical_health_score", value=50.0)) dim.compute_score() return dim async def get_content_kpis( self, session: aiohttp.ClientSession, url: str ) -> KpiDimension: """Collect content KPIs from available metrics.""" dim = KpiDimension(name="content", weight=DIMENSION_WEIGHTS["content"]) try: data = await self._ahrefs_get( session, "site-explorer/metrics", {"target": url, "mode": "domain"}, ) if "error" not in data: metrics = data.get("metrics", data) organic = metrics.get("organic", {}) pages = metrics.get("pages", organic.get("pages", 0)) keywords = organic.get("keywords", 0) # Content freshness heuristic freshness = min(100.0, (keywords / max(pages, 1)) * 5) if pages else 0.0 dim.metrics.append( KpiMetric(name="indexed_pages", value=float(pages)) ) dim.metrics.append( KpiMetric(name="content_freshness_score", value=round(freshness, 1)) ) dim.metrics.append( KpiMetric(name="keywords_per_page", value=round(keywords / max(pages, 1), 2)) ) else: dim.metrics.append(KpiMetric(name="indexed_pages", value=0.0)) dim.metrics.append(KpiMetric(name="content_freshness_score", value=0.0)) except Exception as exc: self.logger.error(f"Content KPI error: {exc}") dim.metrics.append(KpiMetric(name="indexed_pages", value=0.0)) dim.compute_score() return dim async def get_local_kpis(self, url: str) -> KpiDimension: """Placeholder for local KPIs (requires external data).""" dim = KpiDimension(name="local", weight=DIMENSION_WEIGHTS["local"]) dim.metrics.append(KpiMetric(name="gbp_visibility", value=0.0)) dim.metrics.append(KpiMetric(name="review_score", value=0.0)) dim.metrics.append(KpiMetric(name="citation_accuracy", value=0.0)) dim.compute_score() return dim # ----- Health score ----- def calculate_health_score(self, dimensions: list[KpiDimension]) -> HealthScore: """Calculate weighted health score across all dimensions.""" health = HealthScore() total_weight = 0.0 weighted_sum = 0.0 for dim in dimensions: dim.compute_score() health.dimensions[dim.name] = dim.score weighted_sum += dim.score * dim.weight total_weight += dim.weight if total_weight > 0: health.overall = round(weighted_sum / total_weight, 1) else: health.overall = 0.0 # Determine trend from dimension trends up_count = sum( 1 for d in dimensions for m in d.metrics if m.trend == "up" ) down_count = sum( 1 for d in dimensions for m in d.metrics if m.trend == "down" ) if up_count > down_count: health.trend = "improving" elif down_count > up_count: health.trend = "declining" else: health.trend = "stable" return health # ----- Targets ----- def set_targets(self, dimensions: list[KpiDimension]) -> dict[str, Any]: """Calculate 30/60/90 day targets (5%/10%/20% improvement).""" targets = {"30_day": {}, "60_day": {}, "90_day": {}} growth_rates = {"30_day": 0.05, "60_day": 0.10, "90_day": 0.20} for dim in dimensions: for metric in dim.metrics: if metric.value and metric.value > 0: for period, rate in growth_rates.items(): key = f"{dim.name}.{metric.name}" # For metrics where lower is better (e.g. bounce rate), # improvement means decrease if metric.name in ("bounce_rate", "crawl_errors", "thin_content_ratio"): target_val = metric.value * (1 - rate) else: target_val = metric.value * (1 + rate) targets[period][key] = round(target_val, 2) metric.target_30d = targets["30_day"].get(f"{dim.name}.{metric.name}") metric.target_60d = targets["60_day"].get(f"{dim.name}.{metric.name}") metric.target_90d = targets["90_day"].get(f"{dim.name}.{metric.name}") return targets # ----- ROI estimation ----- def estimate_roi(self, traffic_dim: KpiDimension) -> RoiEstimate: """Estimate ROI from Ahrefs traffic cost data.""" roi = RoiEstimate() for metric in traffic_dim.metrics: if metric.name == "traffic_value_usd": roi.traffic_value_usd = metric.value roi.estimated_monthly_value = metric.value if metric.previous_value is not None: roi.traffic_value_change = round( metric.value - metric.previous_value, 2 ) return roi # ----- Baseline comparison ----- def compare_baseline( self, current: list[KpiDimension], baseline: dict[str, Any] ) -> dict[str, Any]: """Compare current KPIs against a stored baseline.""" comparison = {} baseline_kpis = baseline.get("kpis", {}) for dim in current: dim_baseline = baseline_kpis.get(dim.name, {}) dim_comparison = {} for metric in dim.metrics: baseline_val = None if isinstance(dim_baseline, dict): baseline_val = dim_baseline.get(metric.name) if baseline_val is not None: metric.previous_value = float(baseline_val) metric.compute_trend() dim_comparison[metric.name] = { "current": metric.value, "baseline": baseline_val, "change_pct": metric.change_pct, "trend": metric.trend, } else: dim_comparison[metric.name] = { "current": metric.value, "baseline": None, "change_pct": None, "trend": "no_baseline", } comparison[dim.name] = dim_comparison return comparison # ----- Executive summary ----- def generate_executive_summary( self, dimensions: list[KpiDimension], health: HealthScore ) -> dict[str, Any]: """Generate executive summary with wins, concerns, recommendations.""" wins = [] concerns = [] recommendations = [] for dim in dimensions: for metric in dim.metrics: if metric.trend == "up" and metric.change_pct and metric.change_pct > 5: wins.append( f"{dim.name}/{metric.name}: +{metric.change_pct}% improvement" ) elif metric.trend == "down" and metric.change_pct and metric.change_pct < -5: concerns.append( f"{dim.name}/{metric.name}: {metric.change_pct}% decline" ) # Generate recommendations based on dimension scores for dim in dimensions: if dim.score < 50: recommendations.append( f"Priority: Improve {dim.name} dimension (score: {dim.score}/100)" ) elif dim.score < 70: recommendations.append( f"Monitor: {dim.name} dimension needs attention (score: {dim.score}/100)" ) if not wins: wins.append("No significant improvements detected in this period") if not concerns: concerns.append("No significant declines detected in this period") if not recommendations: recommendations.append("All dimensions performing well - maintain current strategy") return { "health_score": health.overall, "health_trend": health.trend, "top_wins": wins[:5], "top_concerns": concerns[:5], "recommendations": recommendations[:5], } # ----- Main orchestration ----- async def aggregate( self, url: str, include_roi: bool = False, baseline_path: str | None = None, set_baseline: bool = False, ) -> KpiResult: """Orchestrate full KPI aggregation across all dimensions.""" result = KpiResult(url=url, timestamp=datetime.now().isoformat()) dimensions: list[KpiDimension] = [] async with aiohttp.ClientSession() as session: # Collect all dimensions concurrently tasks = [ self.get_traffic_kpis(session, url), self.get_ranking_kpis(session, url), self.get_link_kpis(session, url), self.get_technical_kpis(session, url), self.get_content_kpis(session, url), ] gathered = await asyncio.gather(*tasks, return_exceptions=True) for item in gathered: if isinstance(item, Exception): result.errors.append(str(item)) self.logger.error(f"Dimension error: {item}") else: dimensions.append(item) # Local KPIs (no API call needed) local_dim = await self.get_local_kpis(url) dimensions.append(local_dim) # Load baseline if provided if baseline_path: try: baseline_data = json.loads(Path(baseline_path).read_text()) result.baseline_comparison = self.compare_baseline(dimensions, baseline_data) except Exception as exc: result.errors.append(f"Baseline load error: {exc}") # Calculate health score health = self.calculate_health_score(dimensions) result.health_score = health.overall result.health_trend = health.trend # Build KPI dictionary for dim in dimensions: result.kpis[dim.name] = { "score": dim.score, "weight": dim.weight, "metrics": {m.name: asdict(m) for m in dim.metrics}, } # Set targets targets = self.set_targets(dimensions) result.targets = targets # ROI estimation if include_roi: traffic_dim = next((d for d in dimensions if d.name == "traffic"), None) if traffic_dim: roi = self.estimate_roi(traffic_dim) result.roi = roi # Executive summary result.executive_summary = self.generate_executive_summary(dimensions, health) # Save baseline if requested if set_baseline: baseline_out = { "url": url, "timestamp": result.timestamp, "kpis": {}, } for dim in dimensions: baseline_out["kpis"][dim.name] = { m.name: m.value for m in dim.metrics } baseline_file = f"baseline_{url.replace('https://', '').replace('/', '_')}.json" Path(baseline_file).write_text(json.dumps(baseline_out, indent=2)) self.logger.info(f"Baseline saved to {baseline_file}") return result # --------------------------------------------------------------------------- # Output formatting # --------------------------------------------------------------------------- def format_text_report(result: KpiResult) -> str: """Format KPI result as human-readable text report.""" lines = [] lines.append("=" * 60) lines.append(f"SEO KPI Dashboard: {result.url}") lines.append(f"Timestamp: {result.timestamp}") lines.append("=" * 60) lines.append("") # Health score lines.append(f"Overall Health Score: {result.health_score}/100 ({result.health_trend})") lines.append("-" * 40) # Dimensions for dim_name, dim_data in result.kpis.items(): lines.append(f"\n[{dim_name.upper()}] Score: {dim_data['score']}/100 (weight: {dim_data['weight']})") metrics = dim_data.get("metrics", {}) for m_name, m_data in metrics.items(): trend_arrow = {"up": "^", "down": "v", "stable": "=", "no_baseline": "?"}.get( m_data.get("trend", "stable"), "=" ) val = m_data.get("value", 0) change = m_data.get("change_pct") change_str = f" ({change:+.1f}%)" if change is not None else "" lines.append(f" {trend_arrow} {m_name}: {val}{change_str}") # Targets if result.targets: lines.append("\n" + "-" * 40) lines.append("TARGETS") for period, targets in result.targets.items(): if targets: lines.append(f"\n {period}:") for key, val in list(targets.items())[:10]: lines.append(f" {key}: {val}") # ROI if result.roi: lines.append("\n" + "-" * 40) lines.append("ROI ESTIMATE") lines.append(f" Traffic Value (USD): ${result.roi.traffic_value_usd:,.2f}") lines.append(f" Monthly Value: ${result.roi.estimated_monthly_value:,.2f}") lines.append(f" Value Change: ${result.roi.traffic_value_change:,.2f}") # Executive summary if result.executive_summary: lines.append("\n" + "-" * 40) lines.append("EXECUTIVE SUMMARY") lines.append(f" Health: {result.executive_summary.get('health_score', 0)}/100") lines.append(f" Trend: {result.executive_summary.get('health_trend', 'stable')}") lines.append("\n Top Wins:") for win in result.executive_summary.get("top_wins", []): lines.append(f" + {win}") lines.append("\n Top Concerns:") for concern in result.executive_summary.get("top_concerns", []): lines.append(f" - {concern}") lines.append("\n Recommendations:") for rec in result.executive_summary.get("recommendations", []): lines.append(f" > {rec}") # Errors if result.errors: lines.append("\n" + "-" * 40) lines.append("ERRORS:") for err in result.errors: lines.append(f" ! {err}") lines.append("\n" + "=" * 60) return "\n".join(lines) def serialize_result(result: KpiResult) -> dict: """Serialize KpiResult to JSON-safe dictionary.""" data = { "url": result.url, "health_score": result.health_score, "health_trend": result.health_trend, "kpis": result.kpis, "targets": result.targets, "executive_summary": result.executive_summary, "timestamp": result.timestamp, "errors": result.errors, } if result.roi: data["roi"] = asdict(result.roi) if result.baseline_comparison: data["baseline_comparison"] = result.baseline_comparison return data # --------------------------------------------------------------------------- # CLI entry point # --------------------------------------------------------------------------- def parse_args() -> argparse.Namespace: """Parse command-line arguments.""" parser = argparse.ArgumentParser( description="SEO KPI Aggregator - Unified metrics dashboard" ) parser.add_argument( "--url", required=True, help="Target URL or domain to analyze" ) parser.add_argument( "--set-baseline", action="store_true", help="Save current KPIs as baseline file" ) parser.add_argument( "--baseline", type=str, default=None, help="Path to baseline JSON file for comparison" ) parser.add_argument( "--roi", action="store_true", help="Include ROI estimation from traffic cost" ) parser.add_argument( "--json", action="store_true", help="Output results as JSON" ) parser.add_argument( "--output", type=str, default=None, help="Save output to file path" ) return parser.parse_args() async def main() -> None: """Main entry point.""" args = parse_args() aggregator = KpiAggregator() result = await aggregator.aggregate( url=args.url, include_roi=args.roi, baseline_path=args.baseline, set_baseline=args.set_baseline, ) if args.json: output = json.dumps(serialize_result(result), indent=2, ensure_ascii=False) else: output = format_text_report(result) if args.output: Path(args.output).write_text(output, encoding="utf-8") logger.info(f"Output saved to {args.output}") else: print(output) aggregator.print_stats() if __name__ == "__main__": asyncio.run(main())