refactor: Reorganize skill numbering and update documentation

Skill Numbering Changes: - 01-03: OurDigital core (was 30-32) - 31-32: Notion tools (was 01-02) - 99_archive: Renamed from _archive for sorting New Files: - AGENTS.md: Claude Code agent routing guide - requirements.txt for 00-claude-code-setting, 32-notion-writer, 43-jamie-youtube-manager Documentation Updates: - CLAUDE.md: Updated skill inventory (23 skills) - AUDIT_REPORT.md: Current completion status (91%) - Archived REFACTORING_PLAN.md (most tasks complete) Removed: - ga-agent-skills/ (moved to separate repo ~/Project/dintel-ga4-agent) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-23 18:42:39 +07:00
parent ae193d5e08
commit b69e4b6f3a
100 changed files with 655 additions and 1812 deletions
--- a/custom-skills/03-ourdigital-presentation/code/scripts/extract_notion.py
+++ b/custom-skills/03-ourdigital-presentation/code/scripts/extract_notion.py
@@ -0,0 +1,203 @@
+#!/usr/bin/env python3
+"""
+Extract research content from Notion pages and databases
+Outputs structured JSON for downstream processing
+"""
+
+import json
+import argparse
+from typing import Dict, List, Any
+from datetime import datetime
+
+def extract_notion_content(notion_url: str) -> Dict[str, Any]:
+    """
+    Extract and structure content from Notion
+    
+    This function would integrate with Notion MCP tools:
+    - notion-search for finding related pages
+    - notion-fetch for getting full content
+    
+    Args:
+        notion_url: URL of Notion page or database
+        
+    Returns:
+        Structured research data
+    """
+    
+    # Parse Notion URL to get page/database ID
+    page_id = parse_notion_url(notion_url)
+    
+    # This would use actual Notion MCP tools in production
+    # Simulating the structure for now
+    extracted_data = {
+        "source": {
+            "url": notion_url,
+            "id": page_id,
+            "type": "page",  # or "database"
+            "extracted_at": datetime.now().isoformat()
+        },
+        "metadata": {
+            "title": "Q4 Research Summary",
+            "last_edited": "2024-12-15T10:30:00Z",
+            "created_by": "user@company.com",
+            "tags": ["research", "Q4", "strategy"]
+        },
+        "content": {
+            "sections": [
+                {
+                    "title": "Executive Summary",
+                    "content": "Key findings from Q4 research indicate...",
+                    "level": 1,
+                    "data_points": [
+                        {"metric": "Growth Rate", "value": "25%"},
+                        {"metric": "User Satisfaction", "value": "4.5/5"}
+                    ]
+                },
+                {
+                    "title": "Market Analysis",
+                    "content": "The market landscape shows...",
+                    "level": 1,
+                    "subsections": [
+                        {
+                            "title": "Competitive Landscape",
+                            "content": "Our position relative to competitors...",
+                            "level": 2
+                        },
+                        {
+                            "title": "Growth Opportunities",
+                            "content": "Identified opportunities include...",
+                            "level": 2
+                        }
+                    ]
+                },
+                {
+                    "title": "Customer Insights",
+                    "content": "Customer feedback reveals...",
+                    "level": 1,
+                    "data_points": [
+                        {"metric": "NPS Score", "value": "72"},
+                        {"metric": "Retention Rate", "value": "89%"}
+                    ]
+                },
+                {
+                    "title": "Recommendations",
+                    "content": "Based on the research, we recommend...",
+                    "level": 1,
+                    "action_items": [
+                        "Expand into new market segments",
+                        "Enhance product features based on feedback",
+                        "Increase investment in customer success"
+                    ]
+                }
+            ]
+        },
+        "linked_pages": [
+            {
+                "title": "Detailed Customer Survey Results",
+                "url": "notion://page/survey-results-id",
+                "relevance": "high"
+            },
+            {
+                "title": "Competitor Analysis Deep Dive", 
+                "url": "notion://page/competitor-analysis-id",
+                "relevance": "medium"
+            }
+        ],
+        "attachments": [
+            {
+                "type": "spreadsheet",
+                "title": "Q4 Metrics Dashboard",
+                "url": "notion://attachment/metrics-id"
+            }
+        ]
+    }
+    
+    return extracted_data
+
+def parse_notion_url(url: str) -> str:
+    """Extract page/database ID from Notion URL"""
+    # Simplified URL parsing
+    if "notion.so/" in url or "notion://" in url:
+        parts = url.split("/")
+        return parts[-1].split("?")[0]
+    return url
+
+def fetch_linked_content(linked_pages: List[Dict], depth: int = 1) -> List[Dict]:
+    """
+    Recursively fetch linked page content
+    
+    Args:
+        linked_pages: List of linked page references
+        depth: How deep to follow links
+        
+    Returns:
+        Expanded content from linked pages
+    """
+    if depth <= 0:
+        return []
+    
+    expanded_content = []
+    for page in linked_pages:
+        if page.get("relevance") in ["high", "medium"]:
+            # Would fetch actual content here
+            expanded_content.append({
+                "source": page["url"],
+                "title": page["title"],
+                "content": f"Content from {page['title']}..."
+            })
+    
+    return expanded_content
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Extract research content from Notion"
+    )
+    parser.add_argument(
+        "notion_url",
+        help="URL of Notion page or database"
+    )
+    parser.add_argument(
+        "--output",
+        default="research.json",
+        help="Output JSON file (default: research.json)"
+    )
+    parser.add_argument(
+        "--include-linked",
+        action="store_true",
+        help="Include content from linked pages"
+    )
+    parser.add_argument(
+        "--depth",
+        type=int,
+        default=1,
+        help="Link following depth (default: 1)"
+    )
+    
+    args = parser.parse_args()
+    
+    print(f"📚 Extracting content from: {args.notion_url}")
+    
+    # Extract main content
+    research_data = extract_notion_content(args.notion_url)
+    
+    # Optionally fetch linked content
+    if args.include_linked and research_data.get("linked_pages"):
+        print("📎 Fetching linked pages...")
+        linked_content = fetch_linked_content(
+            research_data["linked_pages"],
+            args.depth
+        )
+        research_data["linked_content"] = linked_content
+    
+    # Save to JSON
+    with open(args.output, 'w', encoding='utf-8') as f:
+        json.dump(research_data, f, indent=2, ensure_ascii=False)
+    
+    print(f"✅ Research data saved to: {args.output}")
+    print(f"📊 Extracted {len(research_data['content']['sections'])} sections")
+    
+    if research_data.get("linked_pages"):
+        print(f"🔗 Found {len(research_data['linked_pages'])} linked pages")
+
+if __name__ == "__main__":
+    main()