""" Notion Reporter - Create SEO audit findings in Notion ===================================================== Purpose: Output SEO audit findings to Notion databases Python: 3.10+ Usage: from notion_reporter import NotionReporter, SEOFinding, AuditReport reporter = NotionReporter() # Create audit report with checklist table report = AuditReport(site="https://example.com") report.add_finding(SEOFinding(...)) reporter.create_audit_report(report) """ import json import logging import os from dataclasses import dataclass, field from datetime import datetime from pathlib import Path from typing import Any from notion_client import Client from base_client import config logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", ) logger = logging.getLogger(__name__) # Template directory TEMPLATE_DIR = Path(__file__).parent.parent / "templates" # Default OurDigital SEO Audit Log database DEFAULT_DATABASE_ID = "2c8581e5-8a1e-8035-880b-e38cefc2f3ef" # Default parent page for audit reports (OurDigital SEO Audit Log) DEFAULT_AUDIT_REPORTS_PAGE_ID = "2c8581e5-8a1e-8035-880b-e38cefc2f3ef" @dataclass class SEOFinding: """Represents an SEO audit finding.""" issue: str category: str priority: str status: str = "To Fix" url: str | None = None description: str | None = None impact: str | None = None recommendation: str | None = None site: str | None = None # The audited site URL audit_id: str | None = None # Groups findings from same audit session affected_urls: list[str] = field(default_factory=list) # List of all affected URLs @dataclass class AuditReport: """Represents a complete SEO audit report with checklist.""" site: str audit_id: str = field(default_factory=lambda: datetime.now().strftime("%Y%m%d-%H%M%S")) audit_date: datetime = field(default_factory=datetime.now) findings: list[SEOFinding] = field(default_factory=list) # Audit check results robots_txt_status: str = "Not checked" sitemap_status: str = "Not checked" schema_status: str = "Not checked" performance_status: str = "Not checked" # Summary statistics total_urls_checked: int = 0 total_issues: int = 0 def add_finding(self, finding: SEOFinding) -> None: """Add a finding to the report.""" finding.site = self.site finding.audit_id = f"{self.site.replace('https://', '').replace('http://', '').split('/')[0]}-{self.audit_id}" self.findings.append(finding) self.total_issues = len(self.findings) def get_findings_by_priority(self) -> dict[str, list[SEOFinding]]: """Group findings by priority.""" result = {"Critical": [], "High": [], "Medium": [], "Low": []} for f in self.findings: if f.priority in result: result[f.priority].append(f) return result def get_findings_by_category(self) -> dict[str, list[SEOFinding]]: """Group findings by category.""" result = {} for f in self.findings: if f.category not in result: result[f.category] = [] result[f.category].append(f) return result class NotionReporter: """Create and manage SEO audit findings in Notion.""" CATEGORIES = [ "Technical SEO", "On-page SEO", "Content", "Local SEO", "Performance", "Schema/Structured Data", "Sitemap", "Robots.txt", ] PRIORITIES = ["Critical", "High", "Medium", "Low"] STATUSES = ["To Fix", "In Progress", "Fixed", "Monitoring"] CATEGORY_COLORS = { "Technical SEO": "blue", "On-page SEO": "green", "Content": "purple", "Local SEO": "orange", "Performance": "red", "Schema/Structured Data": "yellow", "Sitemap": "pink", "Robots.txt": "gray", } PRIORITY_COLORS = { "Critical": "red", "High": "orange", "Medium": "yellow", "Low": "gray", } def __init__(self, token: str | None = None): """ Initialize Notion reporter. Args: token: Notion API token """ self.token = token or config.notion_token if not self.token: raise ValueError( "Notion token not configured. " "Set NOTION_TOKEN or NOTION_API_KEY environment variable." ) self.client = Client(auth=self.token) def create_findings_database( self, parent_page_id: str, title: str = "SEO Audit Findings", ) -> str: """ Create a new SEO findings database. Args: parent_page_id: Parent page ID for the database title: Database title Returns: Database ID """ # Build database schema properties = { "Issue": {"title": {}}, "Category": { "select": { "options": [ {"name": cat, "color": self.CATEGORY_COLORS.get(cat, "default")} for cat in self.CATEGORIES ] } }, "Priority": { "select": { "options": [ {"name": pri, "color": self.PRIORITY_COLORS.get(pri, "default")} for pri in self.PRIORITIES ] } }, "Status": { "status": { "options": [ {"name": "To Fix", "color": "red"}, {"name": "In Progress", "color": "yellow"}, {"name": "Fixed", "color": "green"}, {"name": "Monitoring", "color": "blue"}, ], "groups": [ {"name": "To-do", "option_ids": [], "color": "gray"}, {"name": "In progress", "option_ids": [], "color": "blue"}, {"name": "Complete", "option_ids": [], "color": "green"}, ], } }, "URL": {"url": {}}, "Description": {"rich_text": {}}, "Impact": {"rich_text": {}}, "Recommendation": {"rich_text": {}}, "Found Date": {"date": {}}, } try: response = self.client.databases.create( parent={"page_id": parent_page_id}, title=[{"type": "text", "text": {"content": title}}], properties=properties, ) database_id = response["id"] logger.info(f"Created database: {database_id}") return database_id except Exception as e: logger.error(f"Failed to create database: {e}") raise def add_finding( self, finding: SEOFinding, database_id: str | None = None, ) -> str: """ Add a finding to the database with page content. Args: finding: SEOFinding object database_id: Target database ID (defaults to OurDigital SEO Audit Log) Returns: Page ID of created entry """ db_id = database_id or DEFAULT_DATABASE_ID # Database properties (metadata) properties = { "Issue": {"title": [{"text": {"content": finding.issue}}]}, "Category": {"select": {"name": finding.category}}, "Priority": {"select": {"name": finding.priority}}, "Found Date": {"date": {"start": datetime.now().strftime("%Y-%m-%d")}}, } if finding.url: properties["URL"] = {"url": finding.url} if finding.site: properties["Site"] = {"url": finding.site} if finding.audit_id: properties["Audit ID"] = { "rich_text": [{"text": {"content": finding.audit_id}}] } # Page content blocks (Description, Impact, Recommendation) children = [] if finding.description: children.extend([ { "object": "block", "type": "heading_2", "heading_2": { "rich_text": [{"type": "text", "text": {"content": "Description"}}] } }, { "object": "block", "type": "paragraph", "paragraph": { "rich_text": [{"type": "text", "text": {"content": finding.description}}] } } ]) if finding.impact: children.extend([ { "object": "block", "type": "heading_2", "heading_2": { "rich_text": [{"type": "text", "text": {"content": "Impact"}}] } }, { "object": "block", "type": "callout", "callout": { "rich_text": [{"type": "text", "text": {"content": finding.impact}}], "icon": {"type": "emoji", "emoji": "⚠️"} } } ]) if finding.recommendation: children.extend([ { "object": "block", "type": "heading_2", "heading_2": { "rich_text": [{"type": "text", "text": {"content": "Recommendation"}}] } }, { "object": "block", "type": "callout", "callout": { "rich_text": [{"type": "text", "text": {"content": finding.recommendation}}], "icon": {"type": "emoji", "emoji": "💡"} } } ]) try: response = self.client.pages.create( parent={"database_id": db_id}, properties=properties, children=children if children else None, ) return response["id"] except Exception as e: logger.error(f"Failed to add finding: {e}") raise def add_findings_batch( self, findings: list[SEOFinding], database_id: str | None = None, ) -> list[str]: """ Add multiple findings to the database. Args: findings: List of SEOFinding objects database_id: Target database ID (defaults to OurDigital SEO Audit Log) Returns: List of created page IDs """ page_ids = [] for finding in findings: try: page_id = self.add_finding(finding, database_id) page_ids.append(page_id) except Exception as e: logger.error(f"Failed to add finding '{finding.issue}': {e}") return page_ids def create_audit_summary_page( self, parent_page_id: str, url: str, summary: dict, ) -> str: """ Create a summary page for the audit. Args: parent_page_id: Parent page ID url: Audited URL summary: Audit summary data Returns: Page ID """ # Build page content children = [ { "object": "block", "type": "heading_1", "heading_1": { "rich_text": [{"type": "text", "text": {"content": f"SEO Audit: {url}"}}] }, }, { "object": "block", "type": "paragraph", "paragraph": { "rich_text": [ { "type": "text", "text": {"content": f"Audit Date: {datetime.now().strftime('%Y-%m-%d %H:%M')}"}, } ] }, }, { "object": "block", "type": "divider", "divider": {}, }, { "object": "block", "type": "heading_2", "heading_2": { "rich_text": [{"type": "text", "text": {"content": "Summary"}}] }, }, ] # Add summary statistics if "stats" in summary: stats = summary["stats"] stats_text = "\n".join([f"• {k}: {v}" for k, v in stats.items()]) children.append({ "object": "block", "type": "paragraph", "paragraph": { "rich_text": [{"type": "text", "text": {"content": stats_text}}] }, }) # Add findings by priority if "findings_by_priority" in summary: children.append({ "object": "block", "type": "heading_2", "heading_2": { "rich_text": [{"type": "text", "text": {"content": "Findings by Priority"}}] }, }) for priority, count in summary["findings_by_priority"].items(): children.append({ "object": "block", "type": "bulleted_list_item", "bulleted_list_item": { "rich_text": [{"type": "text", "text": {"content": f"{priority}: {count}"}}] }, }) try: response = self.client.pages.create( parent={"page_id": parent_page_id}, properties={ "title": {"title": [{"text": {"content": f"SEO Audit - {url}"}}]} }, children=children, ) return response["id"] except Exception as e: logger.error(f"Failed to create summary page: {e}") raise def query_findings( self, database_id: str, category: str | None = None, priority: str | None = None, status: str | None = None, ) -> list[dict]: """ Query findings from database. Args: database_id: Database ID category: Filter by category priority: Filter by priority status: Filter by status Returns: List of finding records """ filters = [] if category: filters.append({ "property": "Category", "select": {"equals": category}, }) if priority: filters.append({ "property": "Priority", "select": {"equals": priority}, }) if status: filters.append({ "property": "Status", "status": {"equals": status}, }) query_params = {"database_id": database_id} if filters: if len(filters) == 1: query_params["filter"] = filters[0] else: query_params["filter"] = {"and": filters} try: response = self.client.databases.query(**query_params) return response.get("results", []) except Exception as e: logger.error(f"Failed to query findings: {e}") raise def update_finding_status( self, page_id: str, status: str, ) -> None: """Update the status of a finding.""" if status not in self.STATUSES: raise ValueError(f"Invalid status: {status}") try: self.client.pages.update( page_id=page_id, properties={"Status": {"status": {"name": status}}}, ) logger.info(f"Updated finding {page_id} to {status}") except Exception as e: logger.error(f"Failed to update status: {e}") raise def create_audit_report( self, report: "AuditReport", database_id: str | None = None, ) -> dict: """ Create a comprehensive audit report page with checklist table. This creates: 1. Individual finding pages in the database 2. A summary page with all findings in table format for checklist tracking Args: report: AuditReport object with all findings database_id: Target database ID (defaults to OurDigital SEO Audit Log) Returns: Dict with summary_page_id and finding_page_ids """ db_id = database_id or DEFAULT_DATABASE_ID # Generate full audit ID site_domain = report.site.replace('https://', '').replace('http://', '').split('/')[0] full_audit_id = f"{site_domain}-{report.audit_id}" result = { "audit_id": full_audit_id, "site": report.site, "summary_page_id": None, "finding_page_ids": [], } # 1. Create individual finding pages in database logger.info(f"Creating {len(report.findings)} finding pages...") for finding in report.findings: finding.audit_id = full_audit_id finding.site = report.site try: page_id = self.add_finding(finding, db_id) result["finding_page_ids"].append(page_id) except Exception as e: logger.error(f"Failed to add finding '{finding.issue}': {e}") # 2. Create summary page with checklist table logger.info("Creating audit summary page with checklist...") summary_page_id = self._create_audit_summary_with_table(report, full_audit_id, db_id) result["summary_page_id"] = summary_page_id logger.info(f"Audit report created: {full_audit_id}") return result def _create_audit_summary_with_table( self, report: "AuditReport", audit_id: str, database_id: str, ) -> str: """ Create audit summary page with checklist table format. Args: report: AuditReport object audit_id: Full audit ID database_id: Parent database ID Returns: Summary page ID """ site_domain = report.site.replace('https://', '').replace('http://', '').split('/')[0] # Build page content blocks children = [] # Header with audit info children.append({ "object": "block", "type": "callout", "callout": { "rich_text": [ {"type": "text", "text": {"content": f"Audit ID: {audit_id}\n"}}, {"type": "text", "text": {"content": f"Date: {report.audit_date.strftime('%Y-%m-%d %H:%M')}\n"}}, {"type": "text", "text": {"content": f"Total Issues: {report.total_issues}"}}, ], "icon": {"type": "emoji", "emoji": "📋"}, "color": "blue_background", } }) # Audit Status Summary children.append({ "object": "block", "type": "heading_2", "heading_2": { "rich_text": [{"type": "text", "text": {"content": "Audit Status"}}] } }) # Status table status_table = { "object": "block", "type": "table", "table": { "table_width": 2, "has_column_header": True, "has_row_header": False, "children": [ { "type": "table_row", "table_row": { "cells": [ [{"type": "text", "text": {"content": "Check"}}], [{"type": "text", "text": {"content": "Status"}}], ] } }, { "type": "table_row", "table_row": { "cells": [ [{"type": "text", "text": {"content": "Robots.txt"}}], [{"type": "text", "text": {"content": report.robots_txt_status}}], ] } }, { "type": "table_row", "table_row": { "cells": [ [{"type": "text", "text": {"content": "Sitemap"}}], [{"type": "text", "text": {"content": report.sitemap_status}}], ] } }, { "type": "table_row", "table_row": { "cells": [ [{"type": "text", "text": {"content": "Schema Markup"}}], [{"type": "text", "text": {"content": report.schema_status}}], ] } }, { "type": "table_row", "table_row": { "cells": [ [{"type": "text", "text": {"content": "Performance"}}], [{"type": "text", "text": {"content": report.performance_status}}], ] } }, ] } } children.append(status_table) # Divider children.append({"object": "block", "type": "divider", "divider": {}}) # Findings Checklist Header children.append({ "object": "block", "type": "heading_2", "heading_2": { "rich_text": [{"type": "text", "text": {"content": "Findings Checklist"}}] } }) children.append({ "object": "block", "type": "paragraph", "paragraph": { "rich_text": [{"type": "text", "text": {"content": "Use this checklist to track fixes. Check off items as you complete them."}}] } }) # Create findings table with checklist format if report.findings: # Build table rows - Header row table_rows = [ { "type": "table_row", "table_row": { "cells": [ [{"type": "text", "text": {"content": "#"}, "annotations": {"bold": True}}], [{"type": "text", "text": {"content": "Priority"}, "annotations": {"bold": True}}], [{"type": "text", "text": {"content": "Category"}, "annotations": {"bold": True}}], [{"type": "text", "text": {"content": "Issue"}, "annotations": {"bold": True}}], [{"type": "text", "text": {"content": "URL"}, "annotations": {"bold": True}}], ] } } ] # Add finding rows for idx, finding in enumerate(report.findings, 1): # Truncate long text for table cells issue_text = finding.issue[:50] + "..." if len(finding.issue) > 50 else finding.issue url_text = finding.url[:40] + "..." if finding.url and len(finding.url) > 40 else (finding.url or "-") table_rows.append({ "type": "table_row", "table_row": { "cells": [ [{"type": "text", "text": {"content": str(idx)}}], [{"type": "text", "text": {"content": finding.priority}}], [{"type": "text", "text": {"content": finding.category}}], [{"type": "text", "text": {"content": issue_text}}], [{"type": "text", "text": {"content": url_text}}], ] } }) findings_table = { "object": "block", "type": "table", "table": { "table_width": 5, "has_column_header": True, "has_row_header": False, "children": table_rows } } children.append(findings_table) # Divider children.append({"object": "block", "type": "divider", "divider": {}}) # Detailed Findings with To-Do checkboxes children.append({ "object": "block", "type": "heading_2", "heading_2": { "rich_text": [{"type": "text", "text": {"content": "Detailed Findings & Actions"}}] } }) # Group findings by priority and add as to-do items for priority in ["Critical", "High", "Medium", "Low"]: priority_findings = [f for f in report.findings if f.priority == priority] if not priority_findings: continue # Priority header with emoji priority_emoji = {"Critical": "🔴", "High": "🟠", "Medium": "🟡", "Low": "⚪"} children.append({ "object": "block", "type": "heading_3", "heading_3": { "rich_text": [{"type": "text", "text": {"content": f"{priority_emoji.get(priority, '')} {priority} Priority ({len(priority_findings)})"}}] } }) # Add each finding as a to-do item with details for finding in priority_findings: # Main to-do item children.append({ "object": "block", "type": "to_do", "to_do": { "rich_text": [ {"type": "text", "text": {"content": f"[{finding.category}] "}, "annotations": {"bold": True}}, {"type": "text", "text": {"content": finding.issue}}, ], "checked": False, } }) # URL if available if finding.url: children.append({ "object": "block", "type": "bulleted_list_item", "bulleted_list_item": { "rich_text": [ {"type": "text", "text": {"content": "URL: "}}, {"type": "text", "text": {"content": finding.url, "link": {"url": finding.url}}}, ] } }) # Affected URLs list if available if finding.affected_urls: children.append({ "object": "block", "type": "toggle", "toggle": { "rich_text": [{"type": "text", "text": {"content": f"Affected URLs ({len(finding.affected_urls)})"}}], "children": [ { "object": "block", "type": "bulleted_list_item", "bulleted_list_item": { "rich_text": [{"type": "text", "text": {"content": url, "link": {"url": url} if url.startswith("http") else None}}] } } for url in finding.affected_urls[:20] # Limit to 20 URLs ] + ([{ "object": "block", "type": "paragraph", "paragraph": { "rich_text": [{"type": "text", "text": {"content": f"... and {len(finding.affected_urls) - 20} more URLs"}}] } }] if len(finding.affected_urls) > 20 else []) } }) # Recommendation as sub-item if finding.recommendation: children.append({ "object": "block", "type": "bulleted_list_item", "bulleted_list_item": { "rich_text": [ {"type": "text", "text": {"content": "💡 "}, "annotations": {"bold": True}}, {"type": "text", "text": {"content": finding.recommendation}}, ] } }) # Create the summary page try: response = self.client.pages.create( parent={"database_id": database_id}, properties={ "Issue": {"title": [{"text": {"content": f"📊 Audit Report: {site_domain}"}}]}, "Category": {"select": {"name": "Technical SEO"}}, "Priority": {"select": {"name": "High"}}, "Site": {"url": report.site}, "Audit ID": {"rich_text": [{"text": {"content": audit_id}}]}, "Found Date": {"date": {"start": report.audit_date.strftime("%Y-%m-%d")}}, }, children=children, ) logger.info(f"Created audit summary page: {response['id']}") return response["id"] except Exception as e: logger.error(f"Failed to create audit summary page: {e}") raise def create_quick_audit_report( self, site: str, findings: list[SEOFinding], robots_status: str = "Not checked", sitemap_status: str = "Not checked", schema_status: str = "Not checked", performance_status: str = "Not checked", database_id: str | None = None, ) -> dict: """ Quick method to create audit report from a list of findings. Args: site: Site URL findings: List of SEOFinding objects robots_status: Robots.txt check result sitemap_status: Sitemap check result schema_status: Schema check result performance_status: Performance check result database_id: Target database ID Returns: Dict with audit results """ report = AuditReport(site=site) report.robots_txt_status = robots_status report.sitemap_status = sitemap_status report.schema_status = schema_status report.performance_status = performance_status for finding in findings: report.add_finding(finding) return self.create_audit_report(report, database_id) def main(): """CLI entry point for testing.""" import argparse parser = argparse.ArgumentParser(description="Notion SEO Reporter") parser.add_argument("--action", "-a", required=True, choices=["create-db", "add-finding", "query"], help="Action to perform") parser.add_argument("--parent-id", "-p", help="Parent page ID") parser.add_argument("--database-id", "-d", help="Database ID") parser.add_argument("--title", "-t", default="SEO Audit Findings", help="Database title") args = parser.parse_args() reporter = NotionReporter() if args.action == "create-db": if not args.parent_id: parser.error("--parent-id required for create-db") db_id = reporter.create_findings_database(args.parent_id, args.title) print(f"Created database: {db_id}") elif args.action == "add-finding": if not args.database_id: parser.error("--database-id required for add-finding") # Example finding finding = SEOFinding( issue="Missing meta description", category="On-page SEO", priority="Medium", url="https://example.com/page", description="Page is missing meta description tag", impact="May affect CTR in search results", recommendation="Add unique meta description under 160 characters", ) page_id = reporter.add_finding(args.database_id, finding) print(f"Created finding: {page_id}") elif args.action == "query": if not args.database_id: parser.error("--database-id required for query") findings = reporter.query_findings(args.database_id) print(f"Found {len(findings)} findings") for f in findings[:5]: title = f["properties"]["Issue"]["title"] if title: print(f" - {title[0]['plain_text']}") if __name__ == "__main__": main()