Custom Skills (ourdigital-custom-skills/): - 00-ourdigital-visual-storytelling: Blog featured image prompt generator - 01-ourdigital-research-publisher: Research-to-publication workflow - 02-notion-organizer: Notion workspace management - 03-research-to-presentation: Notion research to PPT/Figma - 04-seo-gateway-strategist: SEO gateway page strategy planning - 05-gateway-page-content-builder: Gateway page content generation - 20-jamie-brand-editor: Jamie Clinic branded content GENERATION - 21-jamie-brand-guardian: Jamie Clinic content REVIEW & evaluation Refinements applied: - All skills converted to SKILL.md format with YAML frontmatter - Added version fields to all skills - Flattened nested folder structures - Removed packaging artifacts (.zip, .skill files) - Reorganized file structures (scripts/, references/, etc.) - Differentiated Jamie skills with clear roles 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
204 lines
6.4 KiB
Python
204 lines
6.4 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Extract research content from Notion pages and databases
|
|
Outputs structured JSON for downstream processing
|
|
"""
|
|
|
|
import json
|
|
import argparse
|
|
from typing import Dict, List, Any
|
|
from datetime import datetime
|
|
|
|
def extract_notion_content(notion_url: str) -> Dict[str, Any]:
|
|
"""
|
|
Extract and structure content from Notion
|
|
|
|
This function would integrate with Notion MCP tools:
|
|
- notion-search for finding related pages
|
|
- notion-fetch for getting full content
|
|
|
|
Args:
|
|
notion_url: URL of Notion page or database
|
|
|
|
Returns:
|
|
Structured research data
|
|
"""
|
|
|
|
# Parse Notion URL to get page/database ID
|
|
page_id = parse_notion_url(notion_url)
|
|
|
|
# This would use actual Notion MCP tools in production
|
|
# Simulating the structure for now
|
|
extracted_data = {
|
|
"source": {
|
|
"url": notion_url,
|
|
"id": page_id,
|
|
"type": "page", # or "database"
|
|
"extracted_at": datetime.now().isoformat()
|
|
},
|
|
"metadata": {
|
|
"title": "Q4 Research Summary",
|
|
"last_edited": "2024-12-15T10:30:00Z",
|
|
"created_by": "user@company.com",
|
|
"tags": ["research", "Q4", "strategy"]
|
|
},
|
|
"content": {
|
|
"sections": [
|
|
{
|
|
"title": "Executive Summary",
|
|
"content": "Key findings from Q4 research indicate...",
|
|
"level": 1,
|
|
"data_points": [
|
|
{"metric": "Growth Rate", "value": "25%"},
|
|
{"metric": "User Satisfaction", "value": "4.5/5"}
|
|
]
|
|
},
|
|
{
|
|
"title": "Market Analysis",
|
|
"content": "The market landscape shows...",
|
|
"level": 1,
|
|
"subsections": [
|
|
{
|
|
"title": "Competitive Landscape",
|
|
"content": "Our position relative to competitors...",
|
|
"level": 2
|
|
},
|
|
{
|
|
"title": "Growth Opportunities",
|
|
"content": "Identified opportunities include...",
|
|
"level": 2
|
|
}
|
|
]
|
|
},
|
|
{
|
|
"title": "Customer Insights",
|
|
"content": "Customer feedback reveals...",
|
|
"level": 1,
|
|
"data_points": [
|
|
{"metric": "NPS Score", "value": "72"},
|
|
{"metric": "Retention Rate", "value": "89%"}
|
|
]
|
|
},
|
|
{
|
|
"title": "Recommendations",
|
|
"content": "Based on the research, we recommend...",
|
|
"level": 1,
|
|
"action_items": [
|
|
"Expand into new market segments",
|
|
"Enhance product features based on feedback",
|
|
"Increase investment in customer success"
|
|
]
|
|
}
|
|
]
|
|
},
|
|
"linked_pages": [
|
|
{
|
|
"title": "Detailed Customer Survey Results",
|
|
"url": "notion://page/survey-results-id",
|
|
"relevance": "high"
|
|
},
|
|
{
|
|
"title": "Competitor Analysis Deep Dive",
|
|
"url": "notion://page/competitor-analysis-id",
|
|
"relevance": "medium"
|
|
}
|
|
],
|
|
"attachments": [
|
|
{
|
|
"type": "spreadsheet",
|
|
"title": "Q4 Metrics Dashboard",
|
|
"url": "notion://attachment/metrics-id"
|
|
}
|
|
]
|
|
}
|
|
|
|
return extracted_data
|
|
|
|
def parse_notion_url(url: str) -> str:
|
|
"""Extract page/database ID from Notion URL"""
|
|
# Simplified URL parsing
|
|
if "notion.so/" in url or "notion://" in url:
|
|
parts = url.split("/")
|
|
return parts[-1].split("?")[0]
|
|
return url
|
|
|
|
def fetch_linked_content(linked_pages: List[Dict], depth: int = 1) -> List[Dict]:
|
|
"""
|
|
Recursively fetch linked page content
|
|
|
|
Args:
|
|
linked_pages: List of linked page references
|
|
depth: How deep to follow links
|
|
|
|
Returns:
|
|
Expanded content from linked pages
|
|
"""
|
|
if depth <= 0:
|
|
return []
|
|
|
|
expanded_content = []
|
|
for page in linked_pages:
|
|
if page.get("relevance") in ["high", "medium"]:
|
|
# Would fetch actual content here
|
|
expanded_content.append({
|
|
"source": page["url"],
|
|
"title": page["title"],
|
|
"content": f"Content from {page['title']}..."
|
|
})
|
|
|
|
return expanded_content
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(
|
|
description="Extract research content from Notion"
|
|
)
|
|
parser.add_argument(
|
|
"notion_url",
|
|
help="URL of Notion page or database"
|
|
)
|
|
parser.add_argument(
|
|
"--output",
|
|
default="research.json",
|
|
help="Output JSON file (default: research.json)"
|
|
)
|
|
parser.add_argument(
|
|
"--include-linked",
|
|
action="store_true",
|
|
help="Include content from linked pages"
|
|
)
|
|
parser.add_argument(
|
|
"--depth",
|
|
type=int,
|
|
default=1,
|
|
help="Link following depth (default: 1)"
|
|
)
|
|
|
|
args = parser.parse_args()
|
|
|
|
print(f"📚 Extracting content from: {args.notion_url}")
|
|
|
|
# Extract main content
|
|
research_data = extract_notion_content(args.notion_url)
|
|
|
|
# Optionally fetch linked content
|
|
if args.include_linked and research_data.get("linked_pages"):
|
|
print("📎 Fetching linked pages...")
|
|
linked_content = fetch_linked_content(
|
|
research_data["linked_pages"],
|
|
args.depth
|
|
)
|
|
research_data["linked_content"] = linked_content
|
|
|
|
# Save to JSON
|
|
with open(args.output, 'w', encoding='utf-8') as f:
|
|
json.dump(research_data, f, indent=2, ensure_ascii=False)
|
|
|
|
print(f"✅ Research data saved to: {args.output}")
|
|
print(f"📊 Extracted {len(research_data['content']['sections'])} sections")
|
|
|
|
if research_data.get("linked_pages"):
|
|
print(f"🔗 Found {len(research_data['linked_pages'])} linked pages")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|