Add SEO skills 33-34 and fix bugs in skills 19-34
New skills: - Skill 33: Site migration planner with redirect mapping and monitoring - Skill 34: Reporting dashboard with HTML charts and Korean executive reports Bug fixes (Skill 34 - report_aggregator.py): - Add audit_type fallback for skill identification (was only using audit_id prefix) - Extract health scores from nested data dict (technical_score, onpage_score, etc.) - Support subdomain matching in domain filter (blog.ourdigital.org matches ourdigital.org) - Skip self-referencing DASH- aggregated reports Bug fixes (Skill 20 - naver_serp_analyzer.py): - Remove VIEW tab selectors (removed by Naver in 2026) - Add new section detectors: books (도서), shortform (숏폼), influencer (인플루언서) Improvements (Skill 34 - dashboard/executive report): - Add Korean category labels for Chart.js charts (기술 SEO, 온페이지, etc.) - Add Korean trend labels (개선 중 ↑, 안정 →, 하락 중 ↓) - Add English→Korean issue description translation layer (20 common patterns) Documentation improvements: - Add Korean triggers to 4 skill descriptions (19, 25, 28, 31) - Expand Skill 32 SKILL.md from 40→143 lines (was 6/10, added workflow, output format, limitations) - Add output format examples to Skills 27 and 28 SKILL.md - Add limitations sections to Skills 27 and 28 - Update README.md, CLAUDE.md, AGENTS.md for skills 33-34 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,744 @@
|
||||
"""
|
||||
Report Aggregator - Collect and normalize outputs from all SEO skills
|
||||
=====================================================================
|
||||
Purpose: Scan for recent audit outputs from skills 11-33, normalize data
|
||||
formats, merge findings by domain/date, compute cross-skill health
|
||||
scores, and identify top-priority issues across all audits.
|
||||
Python: 3.10+
|
||||
|
||||
Usage:
|
||||
python report_aggregator.py --domain https://example.com --json
|
||||
python report_aggregator.py --domain https://example.com --output-dir ./audit_outputs --json
|
||||
python report_aggregator.py --domain https://example.com --from 2025-01-01 --to 2025-03-31 --json
|
||||
python report_aggregator.py --domain https://example.com --json --output report.json
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
from dataclasses import dataclass, field, asdict
|
||||
from datetime import datetime, date
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from base_client import BaseAsyncClient, config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Skill registry
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
SKILL_REGISTRY = {
|
||||
11: {"name": "comprehensive-audit", "category": "comprehensive", "weight": 1.0},
|
||||
12: {"name": "technical-audit", "category": "technical", "weight": 0.20},
|
||||
13: {"name": "on-page-audit", "category": "on_page", "weight": 0.20},
|
||||
14: {"name": "core-web-vitals", "category": "performance", "weight": 0.25},
|
||||
15: {"name": "search-console", "category": "search_console", "weight": 0.10},
|
||||
16: {"name": "schema-validator", "category": "schema", "weight": 0.15},
|
||||
17: {"name": "schema-generator", "category": "schema", "weight": 0.10},
|
||||
18: {"name": "local-audit", "category": "local", "weight": 0.10},
|
||||
19: {"name": "keyword-strategy", "category": "keywords", "weight": 0.15},
|
||||
20: {"name": "serp-analysis", "category": "keywords", "weight": 0.10},
|
||||
21: {"name": "position-tracking", "category": "keywords", "weight": 0.15},
|
||||
22: {"name": "link-building", "category": "links", "weight": 0.15},
|
||||
23: {"name": "content-strategy", "category": "content", "weight": 0.15},
|
||||
24: {"name": "ecommerce-seo", "category": "ecommerce", "weight": 0.10},
|
||||
25: {"name": "kpi-framework", "category": "kpi", "weight": 0.20},
|
||||
26: {"name": "international-seo", "category": "international", "weight": 0.10},
|
||||
27: {"name": "ai-visibility", "category": "ai_search", "weight": 0.10},
|
||||
28: {"name": "knowledge-graph", "category": "entity_seo", "weight": 0.10},
|
||||
31: {"name": "competitor-intel", "category": "competitor", "weight": 0.15},
|
||||
32: {"name": "crawl-budget", "category": "technical", "weight": 0.10},
|
||||
33: {"name": "page-experience", "category": "performance", "weight": 0.10},
|
||||
}
|
||||
|
||||
CATEGORY_WEIGHTS = {
|
||||
"technical": 0.20,
|
||||
"on_page": 0.15,
|
||||
"performance": 0.15,
|
||||
"content": 0.10,
|
||||
"links": 0.10,
|
||||
"local": 0.05,
|
||||
"keywords": 0.10,
|
||||
"competitor": 0.05,
|
||||
"schema": 0.05,
|
||||
"kpi": 0.05,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data classes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
@dataclass
|
||||
class SkillOutput:
|
||||
"""Normalized output from a single SEO skill."""
|
||||
skill_id: int = 0
|
||||
skill_name: str = ""
|
||||
domain: str = ""
|
||||
audit_date: str = ""
|
||||
category: str = ""
|
||||
data: dict[str, Any] = field(default_factory=dict)
|
||||
health_score: float = 0.0
|
||||
issues: list[dict[str, Any]] = field(default_factory=list)
|
||||
wins: list[dict[str, Any]] = field(default_factory=list)
|
||||
source_file: str = ""
|
||||
|
||||
|
||||
@dataclass
|
||||
class AggregatedReport:
|
||||
"""Full aggregated report from all SEO skill outputs."""
|
||||
domain: str = ""
|
||||
report_date: str = ""
|
||||
skills_included: list[dict[str, Any]] = field(default_factory=list)
|
||||
overall_health: float = 0.0
|
||||
health_trend: str = "stable"
|
||||
category_scores: dict[str, float] = field(default_factory=dict)
|
||||
top_issues: list[dict[str, Any]] = field(default_factory=list)
|
||||
top_wins: list[dict[str, Any]] = field(default_factory=list)
|
||||
timeline: list[dict[str, Any]] = field(default_factory=list)
|
||||
raw_outputs: list[dict[str, Any]] = field(default_factory=list)
|
||||
audit_id: str = ""
|
||||
timestamp: str = ""
|
||||
errors: list[str] = field(default_factory=list)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Aggregator
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class ReportAggregator(BaseAsyncClient):
|
||||
"""Aggregate outputs from all SEO skills into unified reports."""
|
||||
|
||||
NOTION_DB_ID = "2c8581e5-8a1e-8035-880b-e38cefc2f3ef"
|
||||
|
||||
def __init__(self):
|
||||
super().__init__(max_concurrent=5, requests_per_second=2.0)
|
||||
|
||||
@staticmethod
|
||||
def _extract_domain(url: str) -> str:
|
||||
"""Extract bare domain from URL or return as-is if already bare."""
|
||||
if "://" in url:
|
||||
parsed = urlparse(url)
|
||||
return parsed.netloc.lower().replace("www.", "")
|
||||
return url.lower().replace("www.", "")
|
||||
|
||||
@staticmethod
|
||||
def _generate_audit_id() -> str:
|
||||
"""Generate audit ID in DASH-YYYYMMDD-NNN format."""
|
||||
now = datetime.now()
|
||||
return f"DASH-{now.strftime('%Y%m%d')}-001"
|
||||
|
||||
def scan_local_outputs(
|
||||
self,
|
||||
output_dir: str,
|
||||
domain: str | None = None,
|
||||
date_from: str | None = None,
|
||||
date_to: str | None = None,
|
||||
) -> list[SkillOutput]:
|
||||
"""Find JSON output files from other SEO skills in a directory.
|
||||
|
||||
Scans for files matching patterns from skills 11-33 and parses
|
||||
them into normalized SkillOutput objects.
|
||||
"""
|
||||
outputs: list[SkillOutput] = []
|
||||
output_path = Path(output_dir)
|
||||
|
||||
if not output_path.exists():
|
||||
self.logger.warning(f"Output directory not found: {output_dir}")
|
||||
return outputs
|
||||
|
||||
# Scan for JSON files matching skill output patterns
|
||||
json_files = list(output_path.rglob("*.json"))
|
||||
self.logger.info(f"Found {len(json_files)} JSON files in {output_dir}")
|
||||
|
||||
for json_file in json_files:
|
||||
try:
|
||||
data = json.loads(json_file.read_text(encoding="utf-8"))
|
||||
|
||||
# Attempt to identify which skill produced this output
|
||||
skill_output = self._identify_and_parse(data, str(json_file))
|
||||
|
||||
if skill_output is None:
|
||||
continue
|
||||
|
||||
# Filter by domain if specified (supports subdomains)
|
||||
if domain:
|
||||
target_domain = self._extract_domain(domain)
|
||||
if skill_output.domain:
|
||||
file_domain = skill_output.domain
|
||||
# Match exact domain OR subdomains (e.g., blog.example.com matches example.com)
|
||||
if file_domain != target_domain and not file_domain.endswith("." + target_domain):
|
||||
continue
|
||||
|
||||
# Filter by date range
|
||||
if date_from and skill_output.audit_date < date_from:
|
||||
continue
|
||||
if date_to and skill_output.audit_date > date_to:
|
||||
continue
|
||||
|
||||
outputs.append(skill_output)
|
||||
self.logger.info(
|
||||
f"Parsed output from skill {skill_output.skill_id} "
|
||||
f"({skill_output.skill_name}): {json_file.name}"
|
||||
)
|
||||
|
||||
except (json.JSONDecodeError, KeyError, TypeError) as e:
|
||||
self.logger.warning(f"Could not parse {json_file}: {e}")
|
||||
|
||||
self.logger.info(f"Successfully parsed {len(outputs)} skill outputs")
|
||||
return outputs
|
||||
|
||||
def _identify_and_parse(
|
||||
self, data: dict[str, Any], source_file: str
|
||||
) -> SkillOutput | None:
|
||||
"""Identify which skill produced the output and parse it."""
|
||||
skill_output = SkillOutput(source_file=source_file)
|
||||
|
||||
# Strategy 1: Parse skill from audit_id prefix (e.g., KPI-20250115-001)
|
||||
audit_id = data.get("audit_id", "")
|
||||
if isinstance(audit_id, str):
|
||||
prefix_map = {
|
||||
"COMP": 11, "TECH": 12, "PAGE": 13, "CWV": 14,
|
||||
"GSC": 15, "SCHEMA": 16, "LOCAL": 18, "KW": 19,
|
||||
"SERP": 20, "RANK": 21, "LINK": 22, "CONTENT": 23,
|
||||
"ECOM": 24, "KPI": 25, "INTL": 26, "AI": 27,
|
||||
"KG": 28, "COMPET": 31, "CRAWL": 32, "MIGR": 33,
|
||||
"DASH": None, # Skip self-referencing dashboard reports
|
||||
}
|
||||
for prefix, skill_id in prefix_map.items():
|
||||
if audit_id.startswith(prefix):
|
||||
if skill_id is None:
|
||||
return None # Skip aggregated reports
|
||||
skill_info = SKILL_REGISTRY.get(skill_id, {})
|
||||
skill_output.skill_id = skill_id
|
||||
skill_output.skill_name = skill_info.get("name", "unknown")
|
||||
skill_output.category = skill_info.get("category", "unknown")
|
||||
break
|
||||
|
||||
# Strategy 2: Fallback to audit_type field (used by our-seo-agent outputs)
|
||||
if not skill_output.skill_id:
|
||||
audit_type = data.get("audit_type", "")
|
||||
if isinstance(audit_type, str) and audit_type:
|
||||
type_map = {
|
||||
"comprehensive": 11, "technical": 12, "onpage": 13,
|
||||
"cwv": 14, "core-web-vitals": 14,
|
||||
"gsc": 15, "search-console": 15,
|
||||
"schema": 16, "local": 18,
|
||||
"keyword": 19, "serp": 20, "position": 21,
|
||||
"link": 22, "backlink": 22,
|
||||
"content": 23, "ecommerce": 24, "kpi": 25,
|
||||
"international": 26, "hreflang": 26,
|
||||
"ai-visibility": 27, "knowledge-graph": 28, "entity": 28,
|
||||
"competitor": 31, "crawl-budget": 32, "crawl": 32,
|
||||
"migration": 33,
|
||||
}
|
||||
for type_key, skill_id in type_map.items():
|
||||
if audit_type.lower() == type_key:
|
||||
skill_info = SKILL_REGISTRY.get(skill_id, {})
|
||||
skill_output.skill_id = skill_id
|
||||
skill_output.skill_name = skill_info.get("name", "unknown")
|
||||
skill_output.category = skill_info.get("category", "unknown")
|
||||
break
|
||||
|
||||
# Extract domain
|
||||
for key in ("url", "target", "domain", "site"):
|
||||
if key in data:
|
||||
skill_output.domain = self._extract_domain(str(data[key]))
|
||||
break
|
||||
|
||||
# Extract health score — check top-level first, then nested data dict
|
||||
score_found = False
|
||||
for key in ("health_score", "overall_health", "score"):
|
||||
if key in data:
|
||||
try:
|
||||
skill_output.health_score = float(data[key])
|
||||
score_found = True
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
break
|
||||
|
||||
if not score_found:
|
||||
nested = data.get("data", {})
|
||||
if isinstance(nested, dict):
|
||||
for key in ("technical_score", "onpage_score", "schema_score",
|
||||
"local_seo_score", "cwv_score", "performance_score",
|
||||
"content_score", "link_score", "keyword_score",
|
||||
"competitor_score", "efficiency_score",
|
||||
"health_score", "overall_score", "score"):
|
||||
val = nested.get(key)
|
||||
if val is not None:
|
||||
try:
|
||||
skill_output.health_score = float(val)
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
break
|
||||
|
||||
# Extract audit date
|
||||
for key in ("audit_date", "report_date", "timestamp", "found_date"):
|
||||
if key in data:
|
||||
date_str = str(data[key])[:10]
|
||||
skill_output.audit_date = date_str
|
||||
break
|
||||
|
||||
if not skill_output.audit_date:
|
||||
skill_output.audit_date = date.today().isoformat()
|
||||
|
||||
# Extract issues
|
||||
issues_raw = data.get("issues", data.get("critical_issues", []))
|
||||
if isinstance(issues_raw, list):
|
||||
for issue in issues_raw:
|
||||
if isinstance(issue, dict):
|
||||
skill_output.issues.append(issue)
|
||||
elif isinstance(issue, str):
|
||||
skill_output.issues.append({"description": issue, "severity": "medium"})
|
||||
|
||||
# Extract wins / recommendations
|
||||
wins_raw = data.get("wins", data.get("top_wins", []))
|
||||
if isinstance(wins_raw, list):
|
||||
for win in wins_raw:
|
||||
if isinstance(win, dict):
|
||||
skill_output.wins.append(win)
|
||||
elif isinstance(win, str):
|
||||
skill_output.wins.append({"description": win})
|
||||
|
||||
# Store full data
|
||||
skill_output.data = data
|
||||
|
||||
# Skip if no useful data was extracted
|
||||
if not skill_output.skill_id and not skill_output.domain:
|
||||
return None
|
||||
|
||||
return skill_output
|
||||
|
||||
async def query_notion_audits(
|
||||
self,
|
||||
domain: str,
|
||||
date_from: str | None = None,
|
||||
date_to: str | None = None,
|
||||
) -> list[SkillOutput]:
|
||||
"""Fetch past audit entries from Notion SEO Audit Log database.
|
||||
|
||||
In production, this uses the Notion MCP tools to query the database.
|
||||
Returns normalized SkillOutput objects.
|
||||
"""
|
||||
outputs: list[SkillOutput] = []
|
||||
self.logger.info(
|
||||
f"Querying Notion audits for {domain} "
|
||||
f"(db: {self.NOTION_DB_ID}, from={date_from}, to={date_to})"
|
||||
)
|
||||
|
||||
# In production, this would call:
|
||||
# mcp__notion__query-database with filters for Site URL and Found Date
|
||||
# For now, return empty list as placeholder
|
||||
self.logger.info(
|
||||
"Notion query is a placeholder; use MCP tools in Claude Desktop "
|
||||
"or manually provide JSON files via --output-dir."
|
||||
)
|
||||
|
||||
return outputs
|
||||
|
||||
def normalize_output(self, skill_output: SkillOutput) -> dict[str, Any]:
|
||||
"""Normalize a skill output into a unified format."""
|
||||
return {
|
||||
"skill_id": skill_output.skill_id,
|
||||
"skill_name": skill_output.skill_name,
|
||||
"domain": skill_output.domain,
|
||||
"audit_date": skill_output.audit_date,
|
||||
"category": skill_output.category,
|
||||
"health_score": skill_output.health_score,
|
||||
"issues_count": len(skill_output.issues),
|
||||
"wins_count": len(skill_output.wins),
|
||||
"issues": skill_output.issues[:10],
|
||||
"wins": skill_output.wins[:10],
|
||||
}
|
||||
|
||||
def compute_cross_skill_health(
|
||||
self, outputs: list[SkillOutput]
|
||||
) -> tuple[float, dict[str, float]]:
|
||||
"""Compute weighted overall health score across all skills.
|
||||
|
||||
Returns (overall_score, category_scores_dict).
|
||||
"""
|
||||
category_scores: dict[str, list[float]] = {}
|
||||
|
||||
for output in outputs:
|
||||
cat = output.category
|
||||
if cat and output.health_score > 0:
|
||||
category_scores.setdefault(cat, []).append(output.health_score)
|
||||
|
||||
# Average scores per category
|
||||
avg_category: dict[str, float] = {}
|
||||
for cat, scores in category_scores.items():
|
||||
avg_category[cat] = round(sum(scores) / len(scores), 1)
|
||||
|
||||
# Weighted overall score
|
||||
total_weight = 0.0
|
||||
weighted_sum = 0.0
|
||||
for cat, avg_score in avg_category.items():
|
||||
weight = CATEGORY_WEIGHTS.get(cat, 0.05)
|
||||
weighted_sum += avg_score * weight
|
||||
total_weight += weight
|
||||
|
||||
overall = round(weighted_sum / total_weight, 1) if total_weight > 0 else 0.0
|
||||
|
||||
return overall, avg_category
|
||||
|
||||
def identify_priorities(
|
||||
self, outputs: list[SkillOutput]
|
||||
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
||||
"""Identify top issues and wins across all skill outputs.
|
||||
|
||||
Returns (top_issues, top_wins).
|
||||
"""
|
||||
all_issues: list[dict[str, Any]] = []
|
||||
all_wins: list[dict[str, Any]] = []
|
||||
|
||||
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
||||
|
||||
for output in outputs:
|
||||
for issue in output.issues:
|
||||
enriched = {
|
||||
**issue,
|
||||
"source_skill": output.skill_name,
|
||||
"source_skill_id": output.skill_id,
|
||||
"category": output.category,
|
||||
}
|
||||
all_issues.append(enriched)
|
||||
|
||||
for win in output.wins:
|
||||
enriched = {
|
||||
**win,
|
||||
"source_skill": output.skill_name,
|
||||
"source_skill_id": output.skill_id,
|
||||
"category": output.category,
|
||||
}
|
||||
all_wins.append(enriched)
|
||||
|
||||
# Sort issues by severity
|
||||
all_issues.sort(
|
||||
key=lambda i: severity_order.get(
|
||||
i.get("severity", "medium"), 2
|
||||
)
|
||||
)
|
||||
|
||||
return all_issues[:20], all_wins[:20]
|
||||
|
||||
def build_timeline(self, outputs: list[SkillOutput]) -> list[dict[str, Any]]:
|
||||
"""Build an audit history timeline from all skill outputs."""
|
||||
timeline: list[dict[str, Any]] = []
|
||||
|
||||
for output in outputs:
|
||||
entry = {
|
||||
"date": output.audit_date,
|
||||
"skill": output.skill_name,
|
||||
"skill_id": output.skill_id,
|
||||
"health_score": output.health_score,
|
||||
"category": output.category,
|
||||
"issues_count": len(output.issues),
|
||||
}
|
||||
timeline.append(entry)
|
||||
|
||||
# Sort by date descending
|
||||
timeline.sort(key=lambda e: e.get("date", ""), reverse=True)
|
||||
return timeline
|
||||
|
||||
async def run(
|
||||
self,
|
||||
domain: str,
|
||||
output_dir: str | None = None,
|
||||
date_from: str | None = None,
|
||||
date_to: str | None = None,
|
||||
) -> AggregatedReport:
|
||||
"""Orchestrate the full report aggregation pipeline."""
|
||||
target_domain = self._extract_domain(domain)
|
||||
report = AggregatedReport(
|
||||
domain=target_domain,
|
||||
report_date=date.today().isoformat(),
|
||||
audit_id=self._generate_audit_id(),
|
||||
timestamp=datetime.now().isoformat(),
|
||||
)
|
||||
|
||||
all_outputs: list[SkillOutput] = []
|
||||
|
||||
# Step 1: Scan local outputs
|
||||
if output_dir:
|
||||
self.logger.info(f"Step 1/5: Scanning local outputs in {output_dir}...")
|
||||
local_outputs = self.scan_local_outputs(
|
||||
output_dir, domain=target_domain,
|
||||
date_from=date_from, date_to=date_to,
|
||||
)
|
||||
all_outputs.extend(local_outputs)
|
||||
else:
|
||||
self.logger.info("Step 1/5: No output directory specified, skipping local scan.")
|
||||
|
||||
# Step 2: Query Notion for past audits
|
||||
self.logger.info("Step 2/5: Querying Notion for past audits...")
|
||||
try:
|
||||
notion_outputs = await self.query_notion_audits(
|
||||
domain=target_domain,
|
||||
date_from=date_from,
|
||||
date_to=date_to,
|
||||
)
|
||||
all_outputs.extend(notion_outputs)
|
||||
except Exception as e:
|
||||
msg = f"Notion query error: {e}"
|
||||
self.logger.error(msg)
|
||||
report.errors.append(msg)
|
||||
|
||||
if not all_outputs:
|
||||
self.logger.warning(
|
||||
"No skill outputs found. Provide --output-dir with JSON files "
|
||||
"from SEO skills 11-33, or ensure Notion audit log has entries."
|
||||
)
|
||||
report.errors.append("No skill outputs found to aggregate.")
|
||||
return report
|
||||
|
||||
# Step 3: Normalize and compute health scores
|
||||
self.logger.info(
|
||||
f"Step 3/5: Normalizing {len(all_outputs)} skill outputs..."
|
||||
)
|
||||
report.skills_included = [
|
||||
{
|
||||
"skill_id": o.skill_id,
|
||||
"skill_name": o.skill_name,
|
||||
"audit_date": o.audit_date,
|
||||
}
|
||||
for o in all_outputs
|
||||
]
|
||||
report.raw_outputs = [self.normalize_output(o) for o in all_outputs]
|
||||
|
||||
overall_health, category_scores = self.compute_cross_skill_health(all_outputs)
|
||||
report.overall_health = overall_health
|
||||
report.category_scores = category_scores
|
||||
|
||||
# Determine health trend from timeline
|
||||
scores_by_date = sorted(
|
||||
[(o.audit_date, o.health_score) for o in all_outputs if o.health_score > 0],
|
||||
key=lambda x: x[0],
|
||||
)
|
||||
if len(scores_by_date) >= 2:
|
||||
older_avg = sum(s for _, s in scores_by_date[:len(scores_by_date)//2]) / max(len(scores_by_date)//2, 1)
|
||||
newer_avg = sum(s for _, s in scores_by_date[len(scores_by_date)//2:]) / max(len(scores_by_date) - len(scores_by_date)//2, 1)
|
||||
if newer_avg > older_avg + 3:
|
||||
report.health_trend = "improving"
|
||||
elif newer_avg < older_avg - 3:
|
||||
report.health_trend = "declining"
|
||||
else:
|
||||
report.health_trend = "stable"
|
||||
|
||||
# Step 4: Identify priorities
|
||||
self.logger.info("Step 4/5: Identifying top issues and wins...")
|
||||
top_issues, top_wins = self.identify_priorities(all_outputs)
|
||||
report.top_issues = top_issues
|
||||
report.top_wins = top_wins
|
||||
|
||||
# Step 5: Build timeline
|
||||
self.logger.info("Step 5/5: Building audit history timeline...")
|
||||
report.timeline = self.build_timeline(all_outputs)
|
||||
|
||||
self.logger.info(
|
||||
f"Aggregation complete: {len(all_outputs)} skills, "
|
||||
f"health={report.overall_health}/100, "
|
||||
f"trend={report.health_trend}, "
|
||||
f"issues={len(report.top_issues)}, wins={len(report.top_wins)}"
|
||||
)
|
||||
|
||||
return report
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Output formatting
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _format_text_report(report: AggregatedReport) -> str:
|
||||
"""Format aggregated report as human-readable text."""
|
||||
lines: list[str] = []
|
||||
lines.append("=" * 70)
|
||||
lines.append(" SEO REPORTING DASHBOARD - AGGREGATED REPORT")
|
||||
lines.append(f" Domain: {report.domain}")
|
||||
lines.append(f" Report Date: {report.report_date}")
|
||||
lines.append(f" Audit ID: {report.audit_id}")
|
||||
lines.append("=" * 70)
|
||||
|
||||
# Health score
|
||||
lines.append("")
|
||||
lines.append(f" Overall Health: {report.overall_health}/100 ({report.health_trend})")
|
||||
lines.append("-" * 50)
|
||||
|
||||
# Category scores
|
||||
if report.category_scores:
|
||||
lines.append("")
|
||||
lines.append("--- CATEGORY SCORES ---")
|
||||
for cat, score in sorted(
|
||||
report.category_scores.items(), key=lambda x: x[1], reverse=True
|
||||
):
|
||||
bar = "#" * int(score / 5) + "." * (20 - int(score / 5))
|
||||
lines.append(f" {cat:<20} [{bar}] {score:.1f}/100")
|
||||
|
||||
# Skills included
|
||||
if report.skills_included:
|
||||
lines.append("")
|
||||
lines.append("--- SKILLS INCLUDED ---")
|
||||
for skill in report.skills_included:
|
||||
lines.append(
|
||||
f" [{skill['skill_id']:>2}] {skill['skill_name']:<30} "
|
||||
f"({skill['audit_date']})"
|
||||
)
|
||||
|
||||
# Top issues
|
||||
if report.top_issues:
|
||||
lines.append("")
|
||||
lines.append("--- TOP ISSUES ---")
|
||||
for i, issue in enumerate(report.top_issues[:10], 1):
|
||||
severity = issue.get("severity", "medium").upper()
|
||||
desc = issue.get("description", "No description")
|
||||
cat = issue.get("category", "")
|
||||
lines.append(f" {i:>2}. [{severity}] ({cat}) {desc}")
|
||||
|
||||
# Top wins
|
||||
if report.top_wins:
|
||||
lines.append("")
|
||||
lines.append("--- TOP WINS ---")
|
||||
for i, win in enumerate(report.top_wins[:10], 1):
|
||||
desc = win.get("description", "No description")
|
||||
cat = win.get("category", "")
|
||||
lines.append(f" {i:>2}. ({cat}) {desc}")
|
||||
|
||||
# Timeline
|
||||
if report.timeline:
|
||||
lines.append("")
|
||||
lines.append("--- AUDIT TIMELINE ---")
|
||||
lines.append(f" {'Date':<12} {'Skill':<25} {'Score':>8} {'Issues':>8}")
|
||||
lines.append(" " + "-" * 55)
|
||||
for entry in report.timeline[:15]:
|
||||
lines.append(
|
||||
f" {entry['date']:<12} {entry['skill']:<25} "
|
||||
f"{entry['health_score']:>7.1f} {entry['issues_count']:>7}"
|
||||
)
|
||||
|
||||
# Errors
|
||||
if report.errors:
|
||||
lines.append("")
|
||||
lines.append("--- ERRORS ---")
|
||||
for err in report.errors:
|
||||
lines.append(f" - {err}")
|
||||
|
||||
lines.append("")
|
||||
lines.append("=" * 70)
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def _serialize_report(report: AggregatedReport) -> dict:
|
||||
"""Convert report to JSON-serializable dict."""
|
||||
return {
|
||||
"domain": report.domain,
|
||||
"report_date": report.report_date,
|
||||
"overall_health": report.overall_health,
|
||||
"health_trend": report.health_trend,
|
||||
"skills_included": report.skills_included,
|
||||
"category_scores": report.category_scores,
|
||||
"top_issues": report.top_issues,
|
||||
"top_wins": report.top_wins,
|
||||
"timeline": report.timeline,
|
||||
"raw_outputs": report.raw_outputs,
|
||||
"audit_id": report.audit_id,
|
||||
"timestamp": report.timestamp,
|
||||
"errors": report.errors if report.errors else None,
|
||||
}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def parse_args(argv: list[str] | None = None) -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="SEO Report Aggregator - Collect and normalize outputs from all SEO skills",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""\
|
||||
Examples:
|
||||
python report_aggregator.py --domain https://example.com --json
|
||||
python report_aggregator.py --domain https://example.com --output-dir ./audit_outputs --json
|
||||
python report_aggregator.py --domain https://example.com --from 2025-01-01 --to 2025-03-31 --json
|
||||
""",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--domain",
|
||||
required=True,
|
||||
help="Target domain to aggregate reports for",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Directory containing JSON outputs from SEO skills",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--from",
|
||||
type=str,
|
||||
default=None,
|
||||
dest="date_from",
|
||||
help="Start date for filtering (YYYY-MM-DD)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--to",
|
||||
type=str,
|
||||
default=None,
|
||||
dest="date_to",
|
||||
help="End date for filtering (YYYY-MM-DD)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--json",
|
||||
action="store_true",
|
||||
default=False,
|
||||
help="Output in JSON format",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output",
|
||||
type=str,
|
||||
default=None,
|
||||
help="Save output to file path",
|
||||
)
|
||||
return parser.parse_args(argv)
|
||||
|
||||
|
||||
async def async_main(args: argparse.Namespace) -> None:
|
||||
aggregator = ReportAggregator()
|
||||
|
||||
report = await aggregator.run(
|
||||
domain=args.domain,
|
||||
output_dir=args.output_dir,
|
||||
date_from=args.date_from,
|
||||
date_to=args.date_to,
|
||||
)
|
||||
|
||||
if args.json:
|
||||
output_str = json.dumps(
|
||||
_serialize_report(report), indent=2, ensure_ascii=False
|
||||
)
|
||||
else:
|
||||
output_str = _format_text_report(report)
|
||||
|
||||
if args.output:
|
||||
Path(args.output).write_text(output_str, encoding="utf-8")
|
||||
logger.info(f"Report saved to {args.output}")
|
||||
else:
|
||||
print(output_str)
|
||||
|
||||
aggregator.print_stats()
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = parse_args()
|
||||
asyncio.run(async_main(args))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user