feat(seo-audit): Add comprehensive SEO audit skill

Add ourdigital-seo-audit skill with:
- Full site audit orchestrator (full_audit.py)
- Google Search Console and PageSpeed API clients
- Schema.org JSON-LD validation and generation
- XML sitemap and robots.txt validation
- Notion database integration for findings export
- Core Web Vitals measurement and analysis
- 7 schema templates (article, faq, product, etc.)

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2025-12-14 02:30:02 +09:00
parent b3ceebdf63
commit 9426787ba6
28 changed files with 9279 additions and 0 deletions

View File

@@ -0,0 +1,951 @@
"""
Notion Reporter - Create SEO audit findings in Notion
=====================================================
Purpose: Output SEO audit findings to Notion databases
Python: 3.10+
Usage:
from notion_reporter import NotionReporter, SEOFinding, AuditReport
reporter = NotionReporter()
# Create audit report with checklist table
report = AuditReport(site="https://example.com")
report.add_finding(SEOFinding(...))
reporter.create_audit_report(report)
"""
import json
import logging
import os
from dataclasses import dataclass, field
from datetime import datetime
from pathlib import Path
from typing import Any
from notion_client import Client
from base_client import config
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger(__name__)
# Template directory
TEMPLATE_DIR = Path(__file__).parent.parent / "templates"
# Default OurDigital SEO Audit Log database
DEFAULT_DATABASE_ID = "2c8581e5-8a1e-8035-880b-e38cefc2f3ef"
# Default parent page for audit reports (OurDigital SEO Audit Log)
DEFAULT_AUDIT_REPORTS_PAGE_ID = "2c8581e5-8a1e-8035-880b-e38cefc2f3ef"
@dataclass
class SEOFinding:
"""Represents an SEO audit finding."""
issue: str
category: str
priority: str
status: str = "To Fix"
url: str | None = None
description: str | None = None
impact: str | None = None
recommendation: str | None = None
site: str | None = None # The audited site URL
audit_id: str | None = None # Groups findings from same audit session
affected_urls: list[str] = field(default_factory=list) # List of all affected URLs
@dataclass
class AuditReport:
"""Represents a complete SEO audit report with checklist."""
site: str
audit_id: str = field(default_factory=lambda: datetime.now().strftime("%Y%m%d-%H%M%S"))
audit_date: datetime = field(default_factory=datetime.now)
findings: list[SEOFinding] = field(default_factory=list)
# Audit check results
robots_txt_status: str = "Not checked"
sitemap_status: str = "Not checked"
schema_status: str = "Not checked"
performance_status: str = "Not checked"
# Summary statistics
total_urls_checked: int = 0
total_issues: int = 0
def add_finding(self, finding: SEOFinding) -> None:
"""Add a finding to the report."""
finding.site = self.site
finding.audit_id = f"{self.site.replace('https://', '').replace('http://', '').split('/')[0]}-{self.audit_id}"
self.findings.append(finding)
self.total_issues = len(self.findings)
def get_findings_by_priority(self) -> dict[str, list[SEOFinding]]:
"""Group findings by priority."""
result = {"Critical": [], "High": [], "Medium": [], "Low": []}
for f in self.findings:
if f.priority in result:
result[f.priority].append(f)
return result
def get_findings_by_category(self) -> dict[str, list[SEOFinding]]:
"""Group findings by category."""
result = {}
for f in self.findings:
if f.category not in result:
result[f.category] = []
result[f.category].append(f)
return result
class NotionReporter:
"""Create and manage SEO audit findings in Notion."""
CATEGORIES = [
"Technical SEO",
"On-page SEO",
"Content",
"Local SEO",
"Performance",
"Schema/Structured Data",
"Sitemap",
"Robots.txt",
]
PRIORITIES = ["Critical", "High", "Medium", "Low"]
STATUSES = ["To Fix", "In Progress", "Fixed", "Monitoring"]
CATEGORY_COLORS = {
"Technical SEO": "blue",
"On-page SEO": "green",
"Content": "purple",
"Local SEO": "orange",
"Performance": "red",
"Schema/Structured Data": "yellow",
"Sitemap": "pink",
"Robots.txt": "gray",
}
PRIORITY_COLORS = {
"Critical": "red",
"High": "orange",
"Medium": "yellow",
"Low": "gray",
}
def __init__(self, token: str | None = None):
"""
Initialize Notion reporter.
Args:
token: Notion API token
"""
self.token = token or config.notion_token
if not self.token:
raise ValueError(
"Notion token not configured. "
"Set NOTION_TOKEN or NOTION_API_KEY environment variable."
)
self.client = Client(auth=self.token)
def create_findings_database(
self,
parent_page_id: str,
title: str = "SEO Audit Findings",
) -> str:
"""
Create a new SEO findings database.
Args:
parent_page_id: Parent page ID for the database
title: Database title
Returns:
Database ID
"""
# Build database schema
properties = {
"Issue": {"title": {}},
"Category": {
"select": {
"options": [
{"name": cat, "color": self.CATEGORY_COLORS.get(cat, "default")}
for cat in self.CATEGORIES
]
}
},
"Priority": {
"select": {
"options": [
{"name": pri, "color": self.PRIORITY_COLORS.get(pri, "default")}
for pri in self.PRIORITIES
]
}
},
"Status": {
"status": {
"options": [
{"name": "To Fix", "color": "red"},
{"name": "In Progress", "color": "yellow"},
{"name": "Fixed", "color": "green"},
{"name": "Monitoring", "color": "blue"},
],
"groups": [
{"name": "To-do", "option_ids": [], "color": "gray"},
{"name": "In progress", "option_ids": [], "color": "blue"},
{"name": "Complete", "option_ids": [], "color": "green"},
],
}
},
"URL": {"url": {}},
"Description": {"rich_text": {}},
"Impact": {"rich_text": {}},
"Recommendation": {"rich_text": {}},
"Found Date": {"date": {}},
}
try:
response = self.client.databases.create(
parent={"page_id": parent_page_id},
title=[{"type": "text", "text": {"content": title}}],
properties=properties,
)
database_id = response["id"]
logger.info(f"Created database: {database_id}")
return database_id
except Exception as e:
logger.error(f"Failed to create database: {e}")
raise
def add_finding(
self,
finding: SEOFinding,
database_id: str | None = None,
) -> str:
"""
Add a finding to the database with page content.
Args:
finding: SEOFinding object
database_id: Target database ID (defaults to OurDigital SEO Audit Log)
Returns:
Page ID of created entry
"""
db_id = database_id or DEFAULT_DATABASE_ID
# Database properties (metadata)
properties = {
"Issue": {"title": [{"text": {"content": finding.issue}}]},
"Category": {"select": {"name": finding.category}},
"Priority": {"select": {"name": finding.priority}},
"Found Date": {"date": {"start": datetime.now().strftime("%Y-%m-%d")}},
}
if finding.url:
properties["URL"] = {"url": finding.url}
if finding.site:
properties["Site"] = {"url": finding.site}
if finding.audit_id:
properties["Audit ID"] = {
"rich_text": [{"text": {"content": finding.audit_id}}]
}
# Page content blocks (Description, Impact, Recommendation)
children = []
if finding.description:
children.extend([
{
"object": "block",
"type": "heading_2",
"heading_2": {
"rich_text": [{"type": "text", "text": {"content": "Description"}}]
}
},
{
"object": "block",
"type": "paragraph",
"paragraph": {
"rich_text": [{"type": "text", "text": {"content": finding.description}}]
}
}
])
if finding.impact:
children.extend([
{
"object": "block",
"type": "heading_2",
"heading_2": {
"rich_text": [{"type": "text", "text": {"content": "Impact"}}]
}
},
{
"object": "block",
"type": "callout",
"callout": {
"rich_text": [{"type": "text", "text": {"content": finding.impact}}],
"icon": {"type": "emoji", "emoji": "⚠️"}
}
}
])
if finding.recommendation:
children.extend([
{
"object": "block",
"type": "heading_2",
"heading_2": {
"rich_text": [{"type": "text", "text": {"content": "Recommendation"}}]
}
},
{
"object": "block",
"type": "callout",
"callout": {
"rich_text": [{"type": "text", "text": {"content": finding.recommendation}}],
"icon": {"type": "emoji", "emoji": "💡"}
}
}
])
try:
response = self.client.pages.create(
parent={"database_id": db_id},
properties=properties,
children=children if children else None,
)
return response["id"]
except Exception as e:
logger.error(f"Failed to add finding: {e}")
raise
def add_findings_batch(
self,
findings: list[SEOFinding],
database_id: str | None = None,
) -> list[str]:
"""
Add multiple findings to the database.
Args:
findings: List of SEOFinding objects
database_id: Target database ID (defaults to OurDigital SEO Audit Log)
Returns:
List of created page IDs
"""
page_ids = []
for finding in findings:
try:
page_id = self.add_finding(finding, database_id)
page_ids.append(page_id)
except Exception as e:
logger.error(f"Failed to add finding '{finding.issue}': {e}")
return page_ids
def create_audit_summary_page(
self,
parent_page_id: str,
url: str,
summary: dict,
) -> str:
"""
Create a summary page for the audit.
Args:
parent_page_id: Parent page ID
url: Audited URL
summary: Audit summary data
Returns:
Page ID
"""
# Build page content
children = [
{
"object": "block",
"type": "heading_1",
"heading_1": {
"rich_text": [{"type": "text", "text": {"content": f"SEO Audit: {url}"}}]
},
},
{
"object": "block",
"type": "paragraph",
"paragraph": {
"rich_text": [
{
"type": "text",
"text": {"content": f"Audit Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}"},
}
]
},
},
{
"object": "block",
"type": "divider",
"divider": {},
},
{
"object": "block",
"type": "heading_2",
"heading_2": {
"rich_text": [{"type": "text", "text": {"content": "Summary"}}]
},
},
]
# Add summary statistics
if "stats" in summary:
stats = summary["stats"]
stats_text = "\n".join([f"{k}: {v}" for k, v in stats.items()])
children.append({
"object": "block",
"type": "paragraph",
"paragraph": {
"rich_text": [{"type": "text", "text": {"content": stats_text}}]
},
})
# Add findings by priority
if "findings_by_priority" in summary:
children.append({
"object": "block",
"type": "heading_2",
"heading_2": {
"rich_text": [{"type": "text", "text": {"content": "Findings by Priority"}}]
},
})
for priority, count in summary["findings_by_priority"].items():
children.append({
"object": "block",
"type": "bulleted_list_item",
"bulleted_list_item": {
"rich_text": [{"type": "text", "text": {"content": f"{priority}: {count}"}}]
},
})
try:
response = self.client.pages.create(
parent={"page_id": parent_page_id},
properties={
"title": {"title": [{"text": {"content": f"SEO Audit - {url}"}}]}
},
children=children,
)
return response["id"]
except Exception as e:
logger.error(f"Failed to create summary page: {e}")
raise
def query_findings(
self,
database_id: str,
category: str | None = None,
priority: str | None = None,
status: str | None = None,
) -> list[dict]:
"""
Query findings from database.
Args:
database_id: Database ID
category: Filter by category
priority: Filter by priority
status: Filter by status
Returns:
List of finding records
"""
filters = []
if category:
filters.append({
"property": "Category",
"select": {"equals": category},
})
if priority:
filters.append({
"property": "Priority",
"select": {"equals": priority},
})
if status:
filters.append({
"property": "Status",
"status": {"equals": status},
})
query_params = {"database_id": database_id}
if filters:
if len(filters) == 1:
query_params["filter"] = filters[0]
else:
query_params["filter"] = {"and": filters}
try:
response = self.client.databases.query(**query_params)
return response.get("results", [])
except Exception as e:
logger.error(f"Failed to query findings: {e}")
raise
def update_finding_status(
self,
page_id: str,
status: str,
) -> None:
"""Update the status of a finding."""
if status not in self.STATUSES:
raise ValueError(f"Invalid status: {status}")
try:
self.client.pages.update(
page_id=page_id,
properties={"Status": {"status": {"name": status}}},
)
logger.info(f"Updated finding {page_id} to {status}")
except Exception as e:
logger.error(f"Failed to update status: {e}")
raise
def create_audit_report(
self,
report: "AuditReport",
database_id: str | None = None,
) -> dict:
"""
Create a comprehensive audit report page with checklist table.
This creates:
1. Individual finding pages in the database
2. A summary page with all findings in table format for checklist tracking
Args:
report: AuditReport object with all findings
database_id: Target database ID (defaults to OurDigital SEO Audit Log)
Returns:
Dict with summary_page_id and finding_page_ids
"""
db_id = database_id or DEFAULT_DATABASE_ID
# Generate full audit ID
site_domain = report.site.replace('https://', '').replace('http://', '').split('/')[0]
full_audit_id = f"{site_domain}-{report.audit_id}"
result = {
"audit_id": full_audit_id,
"site": report.site,
"summary_page_id": None,
"finding_page_ids": [],
}
# 1. Create individual finding pages in database
logger.info(f"Creating {len(report.findings)} finding pages...")
for finding in report.findings:
finding.audit_id = full_audit_id
finding.site = report.site
try:
page_id = self.add_finding(finding, db_id)
result["finding_page_ids"].append(page_id)
except Exception as e:
logger.error(f"Failed to add finding '{finding.issue}': {e}")
# 2. Create summary page with checklist table
logger.info("Creating audit summary page with checklist...")
summary_page_id = self._create_audit_summary_with_table(report, full_audit_id, db_id)
result["summary_page_id"] = summary_page_id
logger.info(f"Audit report created: {full_audit_id}")
return result
def _create_audit_summary_with_table(
self,
report: "AuditReport",
audit_id: str,
database_id: str,
) -> str:
"""
Create audit summary page with checklist table format.
Args:
report: AuditReport object
audit_id: Full audit ID
database_id: Parent database ID
Returns:
Summary page ID
"""
site_domain = report.site.replace('https://', '').replace('http://', '').split('/')[0]
# Build page content blocks
children = []
# Header with audit info
children.append({
"object": "block",
"type": "callout",
"callout": {
"rich_text": [
{"type": "text", "text": {"content": f"Audit ID: {audit_id}\n"}},
{"type": "text", "text": {"content": f"Date: {report.audit_date.strftime('%Y-%m-%d %H:%M')}\n"}},
{"type": "text", "text": {"content": f"Total Issues: {report.total_issues}"}},
],
"icon": {"type": "emoji", "emoji": "📋"},
"color": "blue_background",
}
})
# Audit Status Summary
children.append({
"object": "block",
"type": "heading_2",
"heading_2": {
"rich_text": [{"type": "text", "text": {"content": "Audit Status"}}]
}
})
# Status table
status_table = {
"object": "block",
"type": "table",
"table": {
"table_width": 2,
"has_column_header": True,
"has_row_header": False,
"children": [
{
"type": "table_row",
"table_row": {
"cells": [
[{"type": "text", "text": {"content": "Check"}}],
[{"type": "text", "text": {"content": "Status"}}],
]
}
},
{
"type": "table_row",
"table_row": {
"cells": [
[{"type": "text", "text": {"content": "Robots.txt"}}],
[{"type": "text", "text": {"content": report.robots_txt_status}}],
]
}
},
{
"type": "table_row",
"table_row": {
"cells": [
[{"type": "text", "text": {"content": "Sitemap"}}],
[{"type": "text", "text": {"content": report.sitemap_status}}],
]
}
},
{
"type": "table_row",
"table_row": {
"cells": [
[{"type": "text", "text": {"content": "Schema Markup"}}],
[{"type": "text", "text": {"content": report.schema_status}}],
]
}
},
{
"type": "table_row",
"table_row": {
"cells": [
[{"type": "text", "text": {"content": "Performance"}}],
[{"type": "text", "text": {"content": report.performance_status}}],
]
}
},
]
}
}
children.append(status_table)
# Divider
children.append({"object": "block", "type": "divider", "divider": {}})
# Findings Checklist Header
children.append({
"object": "block",
"type": "heading_2",
"heading_2": {
"rich_text": [{"type": "text", "text": {"content": "Findings Checklist"}}]
}
})
children.append({
"object": "block",
"type": "paragraph",
"paragraph": {
"rich_text": [{"type": "text", "text": {"content": "Use this checklist to track fixes. Check off items as you complete them."}}]
}
})
# Create findings table with checklist format
if report.findings:
# Build table rows - Header row
table_rows = [
{
"type": "table_row",
"table_row": {
"cells": [
[{"type": "text", "text": {"content": "#"}, "annotations": {"bold": True}}],
[{"type": "text", "text": {"content": "Priority"}, "annotations": {"bold": True}}],
[{"type": "text", "text": {"content": "Category"}, "annotations": {"bold": True}}],
[{"type": "text", "text": {"content": "Issue"}, "annotations": {"bold": True}}],
[{"type": "text", "text": {"content": "URL"}, "annotations": {"bold": True}}],
]
}
}
]
# Add finding rows
for idx, finding in enumerate(report.findings, 1):
# Truncate long text for table cells
issue_text = finding.issue[:50] + "..." if len(finding.issue) > 50 else finding.issue
url_text = finding.url[:40] + "..." if finding.url and len(finding.url) > 40 else (finding.url or "-")
table_rows.append({
"type": "table_row",
"table_row": {
"cells": [
[{"type": "text", "text": {"content": str(idx)}}],
[{"type": "text", "text": {"content": finding.priority}}],
[{"type": "text", "text": {"content": finding.category}}],
[{"type": "text", "text": {"content": issue_text}}],
[{"type": "text", "text": {"content": url_text}}],
]
}
})
findings_table = {
"object": "block",
"type": "table",
"table": {
"table_width": 5,
"has_column_header": True,
"has_row_header": False,
"children": table_rows
}
}
children.append(findings_table)
# Divider
children.append({"object": "block", "type": "divider", "divider": {}})
# Detailed Findings with To-Do checkboxes
children.append({
"object": "block",
"type": "heading_2",
"heading_2": {
"rich_text": [{"type": "text", "text": {"content": "Detailed Findings & Actions"}}]
}
})
# Group findings by priority and add as to-do items
for priority in ["Critical", "High", "Medium", "Low"]:
priority_findings = [f for f in report.findings if f.priority == priority]
if not priority_findings:
continue
# Priority header with emoji
priority_emoji = {"Critical": "🔴", "High": "🟠", "Medium": "🟡", "Low": ""}
children.append({
"object": "block",
"type": "heading_3",
"heading_3": {
"rich_text": [{"type": "text", "text": {"content": f"{priority_emoji.get(priority, '')} {priority} Priority ({len(priority_findings)})"}}]
}
})
# Add each finding as a to-do item with details
for finding in priority_findings:
# Main to-do item
children.append({
"object": "block",
"type": "to_do",
"to_do": {
"rich_text": [
{"type": "text", "text": {"content": f"[{finding.category}] "}, "annotations": {"bold": True}},
{"type": "text", "text": {"content": finding.issue}},
],
"checked": False,
}
})
# URL if available
if finding.url:
children.append({
"object": "block",
"type": "bulleted_list_item",
"bulleted_list_item": {
"rich_text": [
{"type": "text", "text": {"content": "URL: "}},
{"type": "text", "text": {"content": finding.url, "link": {"url": finding.url}}},
]
}
})
# Affected URLs list if available
if finding.affected_urls:
children.append({
"object": "block",
"type": "toggle",
"toggle": {
"rich_text": [{"type": "text", "text": {"content": f"Affected URLs ({len(finding.affected_urls)})"}}],
"children": [
{
"object": "block",
"type": "bulleted_list_item",
"bulleted_list_item": {
"rich_text": [{"type": "text", "text": {"content": url, "link": {"url": url} if url.startswith("http") else None}}]
}
}
for url in finding.affected_urls[:20] # Limit to 20 URLs
] + ([{
"object": "block",
"type": "paragraph",
"paragraph": {
"rich_text": [{"type": "text", "text": {"content": f"... and {len(finding.affected_urls) - 20} more URLs"}}]
}
}] if len(finding.affected_urls) > 20 else [])
}
})
# Recommendation as sub-item
if finding.recommendation:
children.append({
"object": "block",
"type": "bulleted_list_item",
"bulleted_list_item": {
"rich_text": [
{"type": "text", "text": {"content": "💡 "}, "annotations": {"bold": True}},
{"type": "text", "text": {"content": finding.recommendation}},
]
}
})
# Create the summary page
try:
response = self.client.pages.create(
parent={"database_id": database_id},
properties={
"Issue": {"title": [{"text": {"content": f"📊 Audit Report: {site_domain}"}}]},
"Category": {"select": {"name": "Technical SEO"}},
"Priority": {"select": {"name": "High"}},
"Site": {"url": report.site},
"Audit ID": {"rich_text": [{"text": {"content": audit_id}}]},
"Found Date": {"date": {"start": report.audit_date.strftime("%Y-%m-%d")}},
},
children=children,
)
logger.info(f"Created audit summary page: {response['id']}")
return response["id"]
except Exception as e:
logger.error(f"Failed to create audit summary page: {e}")
raise
def create_quick_audit_report(
self,
site: str,
findings: list[SEOFinding],
robots_status: str = "Not checked",
sitemap_status: str = "Not checked",
schema_status: str = "Not checked",
performance_status: str = "Not checked",
database_id: str | None = None,
) -> dict:
"""
Quick method to create audit report from a list of findings.
Args:
site: Site URL
findings: List of SEOFinding objects
robots_status: Robots.txt check result
sitemap_status: Sitemap check result
schema_status: Schema check result
performance_status: Performance check result
database_id: Target database ID
Returns:
Dict with audit results
"""
report = AuditReport(site=site)
report.robots_txt_status = robots_status
report.sitemap_status = sitemap_status
report.schema_status = schema_status
report.performance_status = performance_status
for finding in findings:
report.add_finding(finding)
return self.create_audit_report(report, database_id)
def main():
"""CLI entry point for testing."""
import argparse
parser = argparse.ArgumentParser(description="Notion SEO Reporter")
parser.add_argument("--action", "-a", required=True,
choices=["create-db", "add-finding", "query"],
help="Action to perform")
parser.add_argument("--parent-id", "-p", help="Parent page ID")
parser.add_argument("--database-id", "-d", help="Database ID")
parser.add_argument("--title", "-t", default="SEO Audit Findings",
help="Database title")
args = parser.parse_args()
reporter = NotionReporter()
if args.action == "create-db":
if not args.parent_id:
parser.error("--parent-id required for create-db")
db_id = reporter.create_findings_database(args.parent_id, args.title)
print(f"Created database: {db_id}")
elif args.action == "add-finding":
if not args.database_id:
parser.error("--database-id required for add-finding")
# Example finding
finding = SEOFinding(
issue="Missing meta description",
category="On-page SEO",
priority="Medium",
url="https://example.com/page",
description="Page is missing meta description tag",
impact="May affect CTR in search results",
recommendation="Add unique meta description under 160 characters",
)
page_id = reporter.add_finding(args.database_id, finding)
print(f"Created finding: {page_id}")
elif args.action == "query":
if not args.database_id:
parser.error("--database-id required for query")
findings = reporter.query_findings(args.database_id)
print(f"Found {len(findings)} findings")
for f in findings[:5]:
title = f["properties"]["Issue"]["title"]
if title:
print(f" - {title[0]['plain_text']}")
if __name__ == "__main__":
main()