diff --git a/.claude/commands/gtm-audit.md b/.claude/commands/gtm-audit.md new file mode 100644 index 0000000..d0f6394 --- /dev/null +++ b/.claude/commands/gtm-audit.md @@ -0,0 +1,42 @@ +# GTM Audit + +Lightweight Google Tag Manager audit tool. + +## Triggers +- "audit GTM", "check dataLayer", "GTM 검사" + +## Capabilities + +1. **Container Analysis** - Tags, triggers, variables inventory +2. **DataLayer Validation** - Check event structure +3. **Form Tracking** - Verify form submission events +4. **E-commerce Check** - Validate purchase/cart events + +## Scripts + +```bash +# Audit GTM container +python ourdigital-custom-skills/20-gtm-audit/code/scripts/gtm_audit.py \ + --url https://example.com + +# With detailed dataLayer check +python ourdigital-custom-skills/20-gtm-audit/code/scripts/gtm_audit.py \ + --url https://example.com --check-datalayer --output report.json +``` + +## Audit Checklist + +### Container Health +- [ ] GTM container loads correctly +- [ ] No JavaScript errors from GTM +- [ ] Container ID matches expected + +### DataLayer Events +- [ ] `page_view` fires on all pages +- [ ] `purchase` event has required fields +- [ ] Form submissions tracked + +### Common Issues +- Missing ecommerce object +- Incorrect event names (GA4 format) +- Duplicate event firing diff --git a/.claude/commands/gtm-manager.md b/.claude/commands/gtm-manager.md new file mode 100644 index 0000000..1bfad63 --- /dev/null +++ b/.claude/commands/gtm-manager.md @@ -0,0 +1,49 @@ +# GTM Manager + +Full GTM management with dataLayer injection and tag generation. + +## Triggers +- "GTM manager", "generate dataLayer tag", "dataLayer 태그 생성" + +## Capabilities + +1. **Full Audit** - Everything in gtm-audit plus more +2. **DataLayer Injector** - Generate custom HTML tags +3. **Event Mapping** - Map site actions to GA4 events +4. **Notion Export** - Save audit results to Notion + +## Scripts + +```bash +# Full GTM management +python ourdigital-custom-skills/21-gtm-manager/code/scripts/gtm_manager.py \ + --url https://example.com --full-audit + +# Generate dataLayer tag +python ourdigital-custom-skills/21-gtm-manager/code/scripts/gtm_manager.py \ + --generate-tag purchase --output purchase_tag.html + +# Export to Notion +python ourdigital-custom-skills/21-gtm-manager/code/scripts/gtm_manager.py \ + --url https://example.com --notion-export --database DATABASE_ID +``` + +## DataLayer Tag Templates + +### Purchase Event +```html + +``` + +## Environment +- `NOTION_TOKEN` - For Notion export (optional) diff --git a/.claude/commands/jamie-audit.md b/.claude/commands/jamie-audit.md new file mode 100644 index 0000000..89aed1a --- /dev/null +++ b/.claude/commands/jamie-audit.md @@ -0,0 +1,60 @@ +# Jamie Brand Audit + +Jamie Clinic content **review and evaluation** tool. + +## Triggers +- "review Jamie content", "브랜드 검토", "audit brand compliance" + +## Capabilities (Guidance-based) + +1. **Voice & Tone Check** - 격식체 ratio, honorifics +2. **Brand Alignment** - Slogan, values, no competitors +3. **Regulatory Compliance** - Medical advertising laws +4. **Technical Accuracy** - Procedure facts, recovery times + +## Review Checklist + +### Voice & Tone +- [ ] 90% 격식체 ratio maintained +- [ ] Correct honorifics (환자분/고객님) +- [ ] Jamie personality traits present + +### Brand Alignment +- [ ] Slogan consistency +- [ ] Core values reflected +- [ ] No competitor mentions + +### Regulatory Compliance +- [ ] No exaggerated claims +- [ ] No guarantee language +- [ ] Proper disclosures included + +### Technical Accuracy +- [ ] Procedure facts correct +- [ ] Medical terms accurate +- [ ] Recovery times realistic + +## Output Format + +```markdown +# Brand Audit Report + +## Overall Score: 85/100 + +## Issues Found +1. Line 23: "최고의" → Remove superlative claim +2. Line 45: Missing disclosure for before/after image + +## Recommendations +- Adjust tone in paragraph 3 +- Add required disclaimers + +## Verdict: REVISION REQUIRED +``` + +## Workflow +1. Receive content for review +2. Check against brand guidelines +3. Verify regulatory compliance +4. Provide structured feedback +5. Recommend approval/revision diff --git a/.claude/commands/jamie-editor.md b/.claude/commands/jamie-editor.md new file mode 100644 index 0000000..f4cb1e2 --- /dev/null +++ b/.claude/commands/jamie-editor.md @@ -0,0 +1,47 @@ +# Jamie Brand Editor + +Jamie Clinic content **generation** toolkit. + +## Triggers +- "write Jamie blog", "제이미 콘텐츠 생성", "create Jamie content" + +## Capabilities + +1. **Blog Posts** - 블로그 포스팅 +2. **Procedure Pages** - 시술 페이지 +3. **Ad Copy** - 광고 카피 +4. **Social Media** - SNS 콘텐츠 +5. **Compliance Check** - Korean medical ad regulations + +## Scripts + +```bash +# Check content compliance +python ourdigital-custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py \ + --input draft.md + +# With detailed report +python ourdigital-custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py \ + --input draft.md --verbose --output report.json + +# Batch check +python ourdigital-custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py \ + --dir ./drafts --output compliance_report.json +``` + +## Brand Voice Requirements + +| Rule | Requirement | +|------|-------------| +| 격식체 ratio | 90% (~습니다/~입니다) | +| Patient reference | "환자분" for medical contexts | +| Key descriptor | "자연스러운" (natural) | +| Tone | No exaggeration, realistic expectations | + +## Compliance Rules + +- ❌ No exaggerated claims +- ❌ No before/after comparison violations +- ❌ No guarantee language +- ❌ No competitor comparisons +- ✅ Proper disclosure requirements diff --git a/.claude/commands/notion-organizer.md b/.claude/commands/notion-organizer.md new file mode 100644 index 0000000..d89844a --- /dev/null +++ b/.claude/commands/notion-organizer.md @@ -0,0 +1,32 @@ +# Notion Organizer + +Notion workspace management agent for organizing, restructuring, and maintaining databases. + +## Triggers +- "organize Notion", "노션 정리", "database cleanup" + +## Capabilities + +1. **Database Schema Analysis** - Analyze and document database structures +2. **Property Cleanup** - Remove unused properties, standardize types +3. **Data Migration** - Move data between databases with mapping +4. **Bulk Operations** - Archive, tag, or update multiple pages + +## Scripts + +```bash +# Analyze database schema +python ourdigital-custom-skills/01-notion-organizer/code/scripts/schema_migrator.py \ + --source-db DATABASE_ID --analyze + +# Migrate with mapping +python ourdigital-custom-skills/01-notion-organizer/code/scripts/schema_migrator.py \ + --source-db SOURCE_ID --target-db TARGET_ID --mapping mapping.json + +# Async bulk operations +python ourdigital-custom-skills/01-notion-organizer/code/scripts/async_organizer.py \ + --database DATABASE_ID --operation archive --filter "Status=Done" +``` + +## Environment +- `NOTION_TOKEN` - Notion integration token (required) diff --git a/.claude/commands/ourdigital-designer.md b/.claude/commands/ourdigital-designer.md new file mode 100644 index 0000000..ac66ee9 --- /dev/null +++ b/.claude/commands/ourdigital-designer.md @@ -0,0 +1,43 @@ +# OurDigital Designer + +Visual storytelling toolkit for blog featured images. + +## Triggers +- "create image prompt", "블로그 이미지", "featured image" + +## Capabilities + +1. **Concept Extraction** - Extract visual themes from essay text +2. **Prompt Generation** - Create AI image prompts +3. **Mood Calibration** - Fine-tune emotional parameters +4. **Style Consistency** - OurDigital brand visual language + +## Scripts + +```bash +# Generate image prompt +python ourdigital-custom-skills/30-ourdigital-designer/code/scripts/generate_prompt.py \ + --topic "AI identity" --mood "contemplative" + +# From essay text +python ourdigital-custom-skills/30-ourdigital-designer/code/scripts/generate_prompt.py \ + --input essay.txt --auto-extract + +# Calibrate mood +python ourdigital-custom-skills/30-ourdigital-designer/code/scripts/mood_calibrator.py \ + --input "essay excerpt" --style "minimalist" +``` + +## Visual Style Guide + +| Essay Type | Strategy | Colors | +|------------|----------|--------| +| Technology | Organic-digital hybrids | Cool blues → warm accents | +| Social | Network patterns | Desaturated → hope spots | +| Philosophy | Zen space, symbols | Monochrome + single accent | + +## Output Format +- 1200x630px (OG image standard) +- Minimalist vector + subtle textures +- 60-30-10 color rule +- 20%+ negative space diff --git a/.claude/commands/ourdigital-presentation.md b/.claude/commands/ourdigital-presentation.md new file mode 100644 index 0000000..bac7a03 --- /dev/null +++ b/.claude/commands/ourdigital-presentation.md @@ -0,0 +1,42 @@ +# OurDigital Presentation + +Notion-to-presentation workflow for branded slides. + +## Triggers +- "create presentation", "Notion to PPT", "프레젠테이션 만들기" + +## Capabilities + +1. **Notion Extraction** - Pull content from Notion pages +2. **Content Synthesis** - Structure into slide format +3. **Brand Application** - Apply corporate styling +4. **Multi-format Output** - PowerPoint, Figma, HTML + +## Scripts + +```bash +# Full automated workflow +python ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/run_workflow.py \ + --notion-url [NOTION_URL] --output presentation.pptx + +# Step-by-step +python ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/extract_notion.py [URL] > research.json +python ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/synthesize_content.py research.json > synthesis.json +python ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/apply_brand.py synthesis.json --output presentation.pptx +``` + +## Pipeline + +``` +extract_notion.py → synthesize_content.py → apply_brand.py + ↓ ↓ ↓ + research.json synthesis.json presentation.pptx +``` + +## Output Formats +- PowerPoint (.pptx) +- Figma (via API) +- HTML preview + +## Environment +- `NOTION_TOKEN` - Notion API token (required) diff --git a/.claude/commands/ourdigital-research.md b/.claude/commands/ourdigital-research.md new file mode 100644 index 0000000..f2e33a9 --- /dev/null +++ b/.claude/commands/ourdigital-research.md @@ -0,0 +1,45 @@ +# OurDigital Research + +Research-to-publication workflow for OurDigital blogs. + +## Triggers +- "export to Ulysses", "publish research", "블로그 발행" + +## Capabilities + +1. **Markdown Export** - Format research for publishing +2. **Ulysses Integration** - Direct export to Ulysses app +3. **Publishing Checklist** - Pre-publish verification +4. **Multi-target** - blog.ourdigital.org, journal, ourstory.day + +## Scripts + +```bash +# Export to Ulysses +python ourdigital-custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py \ + --input research.md --group "Blog Drafts" + +# With tags +python ourdigital-custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py \ + --input research.md \ + --group "Blog Drafts" \ + --tags "AI,research,draft" + +# From Notion export +python ourdigital-custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py \ + --notion-export notion_export.zip \ + --group "From Notion" +``` + +## Workflow + +1. Complete research in Claude/Notion +2. Export to markdown +3. Run export script → Ulysses +4. Edit and polish in Ulysses +5. Publish to Ghost/OurDigital + +## Output Targets +- **blog.ourdigital.org** - Main blog +- **journal.ourdigital.org** - Long-form essays +- **ourstory.day** - Personal narratives diff --git a/.claude/commands/seo-gateway-architect.md b/.claude/commands/seo-gateway-architect.md new file mode 100644 index 0000000..733a810 --- /dev/null +++ b/.claude/commands/seo-gateway-architect.md @@ -0,0 +1,51 @@ +# SEO Gateway Architect + +Keyword strategy and content architecture for gateway pages. + +## Triggers +- "keyword strategy", "SEO planning", "게이트웨이 전략" + +## Capabilities + +1. **Keyword Analysis** - Volume, difficulty, intent +2. **LSI Keywords** - Related semantic keywords +3. **Long-tail Opportunities** - Location + service combinations +4. **Content Architecture** - Recommended H1-H3 structure + +## Scripts + +```bash +# Analyze keyword +python ourdigital-custom-skills/17-seo-gateway-architect/code/scripts/keyword_analyzer.py \ + --topic "눈 성형" + +# With location targeting +python ourdigital-custom-skills/17-seo-gateway-architect/code/scripts/keyword_analyzer.py \ + --topic "눈 성형" --market "강남" --output strategy.json +``` + +## Output + +``` +# Keyword Analysis Report + +## Primary Keyword: 강남 눈 성형 +- Search Volume: 12,000 +- Difficulty: 65/100 +- Intent: Informational + +## LSI Keywords +1. 쌍꺼풀 수술 - Volume: 8,000 +2. 눈매교정 - Volume: 5,500 +... + +## Recommendations +1. Focus on educational content +2. Include FAQ schema markup +3. Target long-tail keywords for quick wins +``` + +## Workflow +1. Run keyword analysis +2. Review strategy output +3. Hand off to `/seo-gateway-builder` for content diff --git a/.claude/commands/seo-gateway-builder.md b/.claude/commands/seo-gateway-builder.md new file mode 100644 index 0000000..d699335 --- /dev/null +++ b/.claude/commands/seo-gateway-builder.md @@ -0,0 +1,55 @@ +# SEO Gateway Builder + +Generate SEO-optimized gateway pages from templates. + +## Triggers +- "build gateway page", "generate landing pages", "게이트웨이 페이지 생성" + +## Capabilities + +1. **Template-based Generation** - Medical, service, local templates +2. **Batch Creation** - Multiple location × service combinations +3. **SEO Optimization** - Meta tags, schema, internal links +4. **Localization** - Korean/English content support + +## Scripts + +```bash +# Generate with sample data +python ourdigital-custom-skills/18-seo-gateway-builder/code/scripts/generate_pages.py + +# Custom configuration +python ourdigital-custom-skills/18-seo-gateway-builder/code/scripts/generate_pages.py \ + --config config/services.json \ + --locations config/locations.json \ + --output ./pages +``` + +## Configuration + +### services.json +```json +{ + "services": [{ + "id": "laser_hair_removal", + "korean": "레이저 제모", + "keywords": ["permanent hair removal"] + }] +} +``` + +### locations.json +```json +{ + "locations": [{ + "id": "gangnam", + "korean": "강남", + "landmarks": ["COEX", "Gangnam Station"] + }] +} +``` + +## Output +- Markdown files with SEO meta +- Schema markup included +- Internal linking suggestions diff --git a/.claude/commands/seo-gsc.md b/.claude/commands/seo-gsc.md new file mode 100644 index 0000000..d72fe27 --- /dev/null +++ b/.claude/commands/seo-gsc.md @@ -0,0 +1,37 @@ +# SEO Search Console + +Google Search Console data retrieval and analysis. + +## Triggers +- "get GSC data", "Search Console report", "search performance" + +## Capabilities + +1. **Search Performance** - Clicks, impressions, CTR, position +2. **Query Analysis** - Top queries, trending keywords +3. **Page Performance** - Best/worst performing pages +4. **Index Coverage** - Indexed pages, errors, warnings + +## Scripts + +```bash +# Get search performance +python ourdigital-custom-skills/16-seo-search-console/code/scripts/gsc_client.py \ + --site https://example.com --days 28 + +# Query analysis +python ourdigital-custom-skills/16-seo-search-console/code/scripts/gsc_client.py \ + --site https://example.com --report queries --limit 100 + +# Page performance +python ourdigital-custom-skills/16-seo-search-console/code/scripts/gsc_client.py \ + --site https://example.com --report pages --output pages_report.json +``` + +## Environment +- `GOOGLE_APPLICATION_CREDENTIALS` - Service account JSON path (required) + +## Output +- CSV/JSON performance data +- Trend analysis +- Actionable insights diff --git a/.claude/commands/seo-local.md b/.claude/commands/seo-local.md new file mode 100644 index 0000000..2d46329 --- /dev/null +++ b/.claude/commands/seo-local.md @@ -0,0 +1,38 @@ +# SEO Local Audit + +Local SEO audit for NAP consistency, Google Business Profile, and citations. + +## Triggers +- "local SEO audit", "check NAP", "GBP audit" + +## Capabilities (Guidance-based) + +1. **NAP Consistency** - Name, Address, Phone verification across web +2. **GBP Optimization** - Google Business Profile completeness +3. **Citation Audit** - Directory listings verification +4. **Local Schema** - LocalBusiness markup validation + +## Audit Checklist + +### NAP Consistency +- [ ] Business name matches exactly across all platforms +- [ ] Address format is consistent (Suite vs Ste, etc.) +- [ ] Phone number format matches (with/without country code) + +### Google Business Profile +- [ ] All categories properly selected +- [ ] Business hours accurate and complete +- [ ] Photos uploaded (logo, cover, interior, exterior) +- [ ] Q&A section monitored +- [ ] Reviews responded to + +### Citations +- [ ] Major directories (Yelp, Yellow Pages, etc.) +- [ ] Industry-specific directories +- [ ] Local chamber of commerce +- [ ] Social media profiles + +## Tools to Use +- Google Business Profile Manager +- Moz Local / BrightLocal for citation audit +- Schema.org validator for LocalBusiness markup diff --git a/.claude/commands/seo-on-page.md b/.claude/commands/seo-on-page.md new file mode 100644 index 0000000..f01283c --- /dev/null +++ b/.claude/commands/seo-on-page.md @@ -0,0 +1,31 @@ +# SEO On-Page Audit + +On-page SEO analysis for meta tags, headings, content, and links. + +## Triggers +- "analyze page SEO", "check meta tags", "on-page audit" + +## Capabilities + +1. **Meta Tag Analysis** - Title, description, OG tags, canonical +2. **Heading Structure** - H1-H6 hierarchy validation +3. **Content Analysis** - Word count, keyword density +4. **Link Audit** - Internal/external links, broken links + +## Scripts + +```bash +# Full page analysis +python ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/page_analyzer.py \ + --url https://example.com/page + +# Multiple pages +python ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/page_analyzer.py \ + --urls urls.txt --output report.json +``` + +## Output +- Meta tag completeness score +- Heading structure report +- Content quality metrics +- Link health status diff --git a/.claude/commands/seo-schema-generator.md b/.claude/commands/seo-schema-generator.md new file mode 100644 index 0000000..3118446 --- /dev/null +++ b/.claude/commands/seo-schema-generator.md @@ -0,0 +1,42 @@ +# SEO Schema Generator + +Generate JSON-LD structured data markup from templates. + +## Triggers +- "generate schema", "create structured data", "make JSON-LD" + +## Capabilities + +1. **Template-based Generation** - Use pre-built templates +2. **Custom Schema** - Build schema from specifications +3. **Multi-type Support** - Combine multiple schema types + +## Scripts + +```bash +# Generate from template +python ourdigital-custom-skills/14-seo-schema-generator/code/scripts/schema_generator.py \ + --type LocalBusiness --output schema.json + +# With custom data +python ourdigital-custom-skills/14-seo-schema-generator/code/scripts/schema_generator.py \ + --type Article \ + --data '{"headline": "My Article", "author": "John Doe"}' \ + --output article-schema.json +``` + +## Available Templates + +| Type | Use Case | +|------|----------| +| `Article` | Blog posts, news articles | +| `LocalBusiness` | Local business pages | +| `Product` | E-commerce product pages | +| `FAQPage` | FAQ sections | +| `BreadcrumbList` | Navigation breadcrumbs | +| `Organization` | Company/about pages | +| `WebSite` | Homepage with sitelinks search | + +## Output +- Valid JSON-LD ready for embedding +- HTML script tag format option diff --git a/.claude/commands/seo-schema-validator.md b/.claude/commands/seo-schema-validator.md new file mode 100644 index 0000000..4bd0372 --- /dev/null +++ b/.claude/commands/seo-schema-validator.md @@ -0,0 +1,36 @@ +# SEO Schema Validator + +JSON-LD structured data validation and analysis. + +## Triggers +- "validate schema", "check structured data", "JSON-LD audit" + +## Capabilities + +1. **Schema Extraction** - Extract all JSON-LD from page +2. **Syntax Validation** - Check JSON structure +3. **Schema.org Compliance** - Validate against schema.org specs +4. **Google Rich Results** - Check eligibility for rich snippets + +## Scripts + +```bash +# Validate page schema +python ourdigital-custom-skills/13-seo-schema-validator/code/scripts/schema_validator.py \ + --url https://example.com + +# Validate local file +python ourdigital-custom-skills/13-seo-schema-validator/code/scripts/schema_validator.py \ + --file schema.json + +# Batch validation +python ourdigital-custom-skills/13-seo-schema-validator/code/scripts/schema_validator.py \ + --urls urls.txt --output validation_report.json +``` + +## Supported Schema Types +- Article, BlogPosting, NewsArticle +- Product, Offer, AggregateRating +- LocalBusiness, Organization +- FAQPage, HowTo, Recipe +- BreadcrumbList, WebSite diff --git a/.claude/commands/seo-technical.md b/.claude/commands/seo-technical.md new file mode 100644 index 0000000..ef4427d --- /dev/null +++ b/.claude/commands/seo-technical.md @@ -0,0 +1,33 @@ +# SEO Technical Audit + +Technical SEO audit for robots.txt and sitemap validation. + +## Triggers +- "check robots.txt", "validate sitemap", "technical SEO" + +## Capabilities + +1. **Robots.txt Analysis** - Parse and validate robots.txt rules +2. **Sitemap Validation** - Check XML sitemap structure and URLs +3. **Sitemap Crawling** - Crawl all URLs in sitemap for issues + +## Scripts + +```bash +# Check robots.txt +python ourdigital-custom-skills/10-seo-technical-audit/code/scripts/robots_checker.py \ + --url https://example.com + +# Validate sitemap +python ourdigital-custom-skills/10-seo-technical-audit/code/scripts/sitemap_validator.py \ + --url https://example.com/sitemap.xml + +# Crawl sitemap URLs +python ourdigital-custom-skills/10-seo-technical-audit/code/scripts/sitemap_crawler.py \ + --sitemap https://example.com/sitemap.xml --output report.json +``` + +## Output +- Robots.txt rule analysis +- Sitemap structure validation +- URL accessibility report diff --git a/.claude/commands/seo-vitals.md b/.claude/commands/seo-vitals.md new file mode 100644 index 0000000..76faecf --- /dev/null +++ b/.claude/commands/seo-vitals.md @@ -0,0 +1,40 @@ +# SEO Core Web Vitals + +Google PageSpeed Insights and Core Web Vitals analysis. + +## Triggers +- "check page speed", "Core Web Vitals", "PageSpeed audit" + +## Capabilities + +1. **Performance Metrics** - LCP, FID, CLS scores +2. **Mobile/Desktop** - Separate analysis for each +3. **Optimization Tips** - Actionable recommendations +4. **Historical Tracking** - Compare over time + +## Scripts + +```bash +# Analyze single URL +python ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/pagespeed_client.py \ + --url https://example.com + +# Mobile and desktop +python ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/pagespeed_client.py \ + --url https://example.com --strategy both + +# Batch analysis +python ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/pagespeed_client.py \ + --urls urls.txt --output vitals_report.json +``` + +## Environment +- `PAGESPEED_API_KEY` - Google API key (optional, higher quota) + +## Metrics Explained + +| Metric | Good | Needs Improvement | Poor | +|--------|------|-------------------|------| +| LCP | ≤2.5s | 2.5-4s | >4s | +| FID | ≤100ms | 100-300ms | >300ms | +| CLS | ≤0.1 | 0.1-0.25 | >0.25 | diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 7cbed5c..9c7e1b9 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -4,7 +4,11 @@ "Bash(find:*)", "Bash(git init:*)", "Bash(unzip:*)", - "Bash(git add:*)" + "Bash(git add:*)", + "Skill(notion-organizer)", + "Skill(ourdigital-seo-audit)", + "WebFetch(domain:les.josunhotel.com)", + "WebFetch(domain:josunhotel.com)" ] } } diff --git a/.gitignore b/.gitignore index 70b93ac..0bd6715 100644 --- a/.gitignore +++ b/.gitignore @@ -78,3 +78,7 @@ npm-debug.log* *.pem credentials.json secrets.json + +# Temporary files +output/ +keyword_analysis_*.json diff --git a/CLAUDE.md b/CLAUDE.md index bdc8713..81e4f3e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -7,59 +7,129 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co **GitHub**: https://github.com/ourdigital/claude-skills-factory This is a Claude Skills collection repository containing: -- **ourdigital-custom-skills/**: 11 custom skills for OurDigital workflows, Jamie Brand, SEO/GTM tools +- **ourdigital-custom-skills/**: 18 custom skills for OurDigital workflows, SEO, GTM, and Jamie Brand - **claude-skills-examples/**: Reference examples from Anthropic's official skills repository - **official-skils-collection/**: Notion integration skills (3rd party) - **reference/**: Skill format requirements documentation ## Custom Skills Summary +### General Automation (01-09) + | # | Skill | Purpose | Trigger | |---|-------|---------|---------| -| 02 | notion-organizer | Notion workspace management | "organize Notion", "노션 정리" | -| 03 | research-to-presentation | Notion → PPT/Figma | "create presentation from Notion" | -| 04 | seo-gateway-strategist | SEO gateway page strategy | "SEO strategy", "게이트웨이 전략" | -| 05 | gateway-page-content-builder | Gateway page content generation | "build gateway page" | -| 10 | ourdigital-visual-storytelling | Blog featured image prompts | "create image prompt", "블로그 이미지" | -| 11 | ourdigital-research-publisher | Research → Blog workflow | "research this", "블로그 작성" | -| 12 | ourdigital-seo-audit | Comprehensive SEO audit | "SEO audit", "사이트 SEO 분석" | -| 13 | ourdigital-gtm-audit | Lightweight GTM audit | "audit GTM", "GTM 검사" | -| 14 | ourdigital-gtm-manager | GTM management + dataLayer injection | "GTM manager", "dataLayer 태그 생성" | -| 20 | jamie-brand-editor | Jamie content **generation** | "write Jamie blog", "제이미 콘텐츠 생성" | -| 21 | jamie-brand-guardian | Jamie content **review/evaluation** | "review content", "브랜드 검토" | +| 01 | notion-organizer | Notion workspace management | "organize Notion", "노션 정리" | +| 02 | notion-data-migration | Database migration tools | "migrate Notion data" | -### Jamie Skills Role Separation -- **jamie-brand-editor (20)**: Creates NEW branded content from scratch -- **jamie-brand-guardian (21)**: Reviews, corrects, and evaluates EXISTING content +### SEO Tools (10-19) -### GTM Skills Role Separation -- **ourdigital-gtm-audit (13)**: Lightweight audit-only (container, dataLayer, forms, checkout) -- **ourdigital-gtm-manager (14)**: Comprehensive management (audit + dataLayer tag generation + Notion export) +| # | Skill | Purpose | Trigger | +|---|-------|---------|---------| +| 10 | seo-technical-audit | Robots.txt, sitemap, crawlability | "crawlability", "robots.txt", "sitemap" | +| 11 | seo-on-page-audit | Meta tags, headings, links | "on-page SEO", "meta tags" | +| 12 | seo-local-audit | NAP, GBP, citations | "local SEO", "Google Business Profile" | +| 13 | seo-schema-validator | Structured data validation | "validate schema", "JSON-LD" | +| 14 | seo-schema-generator | Schema markup creation | "generate schema", "create JSON-LD" | +| 15 | seo-core-web-vitals | LCP, CLS, FID, INP metrics | "Core Web Vitals", "page speed" | +| 16 | seo-search-console | GSC data analysis | "Search Console", "rankings" | +| 17 | seo-gateway-architect | Gateway page strategy | "SEO strategy", "게이트웨이 전략" | +| 18 | seo-gateway-builder | Gateway page content | "build gateway page" | -## Skill Structure +### GTM/GA Tools (20-29) + +| # | Skill | Purpose | Trigger | +|---|-------|---------|---------| +| 20 | gtm-audit | GTM container audit | "audit GTM", "GTM 검사" | +| 21 | gtm-manager | GTM management + dataLayer | "GTM manager", "dataLayer" | + +### OurDigital Channel (30-39) + +| # | Skill | Purpose | Trigger | +|---|-------|---------|---------| +| 30 | ourdigital-designer | Visual storytelling, image prompts | "create image prompt", "블로그 이미지" | +| 31 | ourdigital-research | Research → Blog workflow | "research this", "블로그 작성" | +| 32 | ourdigital-presentation | Notion → PPT/Figma | "create presentation" | + +### Jamie Clinic (40-49) + +| # | Skill | Purpose | Trigger | +|---|-------|---------|---------| +| 40 | jamie-brand-editor | Content **generation** | "write Jamie blog", "제이미 콘텐츠" | +| 41 | jamie-brand-audit | Content **review/evaluation** | "review content", "브랜드 검토" | + +## Dual-Platform Skill Structure + +Each skill has two independent versions: -Every skill must follow this structure: ``` -skill-name/ -├── SKILL.md (required) # YAML frontmatter + instructions -├── scripts/ # Executable code (Python/Bash) -├── references/ # Documentation loaded as needed -├── assets/ # Templates, images, fonts -├── templates/ # Output templates (HTML, MD) -└── examples/ # Usage examples +XX-skill-name/ +├── code/ # Claude Code version +│ ├── CLAUDE.md # Action-oriented directive +│ ├── scripts/ # Executable Python/Bash +│ └── references/ # Documentation +│ +├── desktop/ # Claude Desktop version +│ ├── SKILL.md # MCP-focused directive (YAML frontmatter) +│ ├── references/ # Guidance docs +│ └── examples/ # Usage examples +│ +└── README.md # Overview (optional) ``` -### SKILL.md Format Requirements +### Platform Differences -All SKILL.md files MUST start with YAML frontmatter: -```yaml ---- -name: skill-name-here # lowercase with hyphens, required -version: 1.0.0 # semantic versioning, required -description: Description # when Claude should use this skill, required -license: MIT # or "Internal-use Only" -allowed-tools: Tool1, Tool2 # optional, restrict tool access ---- +| Aspect | `code/` | `desktop/` | +|--------|---------|------------| +| Directive | CLAUDE.md | SKILL.md (YAML) | +| Execution | Direct Bash/Python | MCP tools only | +| Scripts | Required | Reference only | + +### Development Workflow + +1. **Build Claude Code version first** - Full automation with scripts +2. **Refactor to Desktop** - Extract guidance, use MCP tools + +## Skill Design Principles + +1. **One thing done well** - Each skill focuses on a single capability +2. **Directives under 1,500 words** - Concise, actionable +3. **Self-contained** - Each platform version is fully independent +4. **Code-first development** - Build Claude Code version first +5. **Progressive numbering** - Logical grouping by domain + +## Directory Layout + +``` +claude-skills-factory/ +├── ourdigital-custom-skills/ +│ ├── 01-notion-organizer/ +│ ├── 02-notion-data-migration/ +│ │ +│ ├── 10-seo-technical-audit/ +│ ├── 11-seo-on-page-audit/ +│ ├── 12-seo-local-audit/ +│ ├── 13-seo-schema-validator/ +│ ├── 14-seo-schema-generator/ +│ ├── 15-seo-core-web-vitals/ +│ ├── 16-seo-search-console/ +│ ├── 17-seo-gateway-architect/ +│ ├── 18-seo-gateway-builder/ +│ │ +│ ├── 20-gtm-audit/ +│ ├── 21-gtm-manager/ +│ │ +│ ├── 30-ourdigital-designer/ +│ ├── 31-ourdigital-research/ +│ ├── 32-ourdigital-presentation/ +│ │ +│ ├── 40-jamie-brand-editor/ +│ ├── 41-jamie-brand-audit/ +│ │ +│ └── _archive/ +│ +├── claude-skills-examples/skills-main/ +├── official-skils-collection/ +└── reference/ ``` ## Creating New Skills @@ -69,54 +139,8 @@ Use the skill creator initialization script: python claude-skills-examples/skills-main/skill-creator/scripts/init_skill.py --path ourdigital-custom-skills/ ``` -Package a skill for distribution: -```bash -python claude-skills-examples/skills-main/skill-creator/scripts/package_skill.py -``` - -## Skill Design Principles - -1. **Progressive Disclosure**: Skills use three-level loading: - - Metadata (name + description) - always in context (~100 words) - - SKILL.md body - when skill triggers (<5k words) - - Bundled resources - as needed by Claude - -2. **Writing Style**: Use imperative/infinitive form (verb-first), not second person. Write for AI consumption. - -3. **Resource Organization**: - - `scripts/` - Executable code (Python/Bash/JS) - - `references/` - Documentation Claude reads while working - - `templates/` - Output templates (HTML, MD, CSS) - - `assets/` - Resources (images, fonts) not loaded into context - - `examples/` - Usage examples and sample outputs - -## Directory Layout - -``` -claude-skills-factory/ -├── ourdigital-custom-skills/ # 11 custom skills -│ ├── 02-notion-organizer/ -│ ├── 03-research-to-presentation/ -│ ├── 04-seo-gateway-strategist/ -│ ├── 05-gateway-page-content-builder/ -│ ├── 10-ourdigital-visual-storytelling/ -│ ├── 11-ourdigital-research-publisher/ -│ ├── 12-ourdigital-seo-audit/ -│ ├── 13-ourdigital-gtm-audit/ # Lightweight GTM audit -│ ├── 14-ourdigital-gtm-manager/ # GTM management + injection -│ ├── 20-jamie-brand-editor/ # Content GENERATION -│ └── 21-jamie-brand-guardian/ # Content REVIEW -├── claude-skills-examples/skills-main/ # Anthropic examples -│ ├── skill-creator/ -│ ├── document-skills/ -│ ├── algorithmic-art/ -│ └── ... -├── official-skils-collection/ # 3rd party Notion skills -└── reference/ # Format documentation -``` - ## Key Reference Files - `reference/SKILL-FORMAT-REQUIREMENTS.md` - Format specification - `claude-skills-examples/skills-main/skill-creator/SKILL.md` - Skill creation guide -- `claude-skills-examples/skills-main/README.md` - Official skills documentation +- `ourdigital-custom-skills/REFACTORING_PLAN.md` - Current refactoring plan diff --git a/COMPATIBILITY_REPORT.md b/COMPATIBILITY_REPORT.md new file mode 100644 index 0000000..ea082b7 --- /dev/null +++ b/COMPATIBILITY_REPORT.md @@ -0,0 +1,253 @@ +# Claude Code Compatibility Report + +**Date**: 2025-12-21 +**Tested Platform**: Claude Code (CLI) + +## Executive Summary + +| Category | Total | ✅ Ready | ⚠️ Issues | ❌ Broken | +|----------|-------|----------|-----------|-----------| +| 01-09 General Automation | 1 | 1 | 0 | 0 | +| 10-19 SEO Skills | 9 | 9 | 0 | 0 | +| 20-29 GTM/GA Skills | 2 | 2 | 0 | 0 | +| 30-39 OurDigital Skills | 3 | 3 | 0 | 0 | +| 40-49 Jamie Skills | 2 | 2 | 0 | 0 | +| **Total** | **17** | **17** | **0** | **0** | + +--- + +## Detailed Results + +### 01-09 General Automation Skills + +#### 01-notion-organizer ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `schema_migrator.py` | ✅ Works | Proper --help, argparse | +| `async_organizer.py` | ✅ Works | Proper --help, argparse | + +**Dependencies**: notion-client, python-dotenv +**Authentication**: NOTION_TOKEN environment variable + +--- + +### 10-19 SEO Skills + +#### 10-seo-technical-audit ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `robots_checker.py` | ✅ Works | Standalone | +| `sitemap_validator.py` | ✅ Works | Requires aiohttp | +| `sitemap_crawler.py` | ✅ Works | Uses page_analyzer | +| `page_analyzer.py` | ✅ Works | Shared utility | + +**Dependencies**: aiohttp, beautifulsoup4, requests, lxml + +#### 11-seo-on-page-audit ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `page_analyzer.py` | ✅ Works | Full on-page analysis | + +**Dependencies**: beautifulsoup4, requests + +#### 12-seo-local-audit ✅ READY (Guidance-only) + +No scripts required. Uses reference materials for NAP/GBP auditing guidance. + +#### 13-seo-schema-validator ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `schema_validator.py` | ✅ Works | JSON-LD validation | + +**Dependencies**: beautifulsoup4, requests + +#### 14-seo-schema-generator ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `schema_generator.py` | ✅ Works | Template-based generation | + +**Dependencies**: None (uses JSON templates) + +#### 15-seo-core-web-vitals ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `pagespeed_client.py` | ✅ Works | Google PageSpeed API | + +**Dependencies**: requests +**Authentication**: PAGESPEED_API_KEY (optional, higher quota) + +#### 16-seo-search-console ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `gsc_client.py` | ✅ Works | Google Search Console API | + +**Dependencies**: google-api-python-client, google-auth +**Authentication**: Service account JSON file + +#### 17-seo-gateway-architect ✅ READY (Fixed) + +| Script | Status | Notes | +|--------|--------|-------| +| `keyword_analyzer.py` | ✅ Works | Proper argparse CLI with --topic, --market, --output flags | + +**Fix Applied**: Added argparse with proper argument handling. + +#### 18-seo-gateway-builder ✅ READY (Fixed) + +| Script | Status | Notes | +|--------|--------|-------| +| `generate_pages.py` | ✅ Works | Template path resolved relative to script directory | + +**Fix Applied**: Uses `Path(__file__).parent.parent` for template resolution. + +--- + +### 20-29 GTM/GA Skills + +#### 20-gtm-audit-tool ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `gtm_audit.py` | ✅ Works | Container analysis | + +**Dependencies**: requests, beautifulsoup4 + +#### 21-gtm-manager ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `gtm_manager.py` | ✅ Works | Full GTM management | + +**Dependencies**: requests, beautifulsoup4, notion-client + +--- + +### 30-39 OurDigital Skills + +#### 30-ourdigital-designer ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `generate_prompt.py` | ✅ Works | Image prompt generation | +| `mood_calibrator.py` | ✅ Works | Mood parameter tuning | + +**Dependencies**: None (pure Python) + +#### 31-ourdigital-research ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `export_to_ulysses.py` | ✅ Works | Ulysses x-callback-url | + +**Dependencies**: None (uses macOS URL schemes) +**Platform**: macOS only (Ulysses app required) + +#### 32-ourdigital-presentation ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `run_workflow.py` | ✅ Works | Full pipeline orchestration | +| `extract_notion.py` | ✅ Works | Notion content extraction | +| `synthesize_content.py` | ✅ Works | Content structuring | +| `apply_brand.py` | ✅ Works | Brand styling application | + +**Dependencies**: notion-client, python-pptx, requests + +--- + +### 40-49 Jamie Skills + +#### 40-jamie-brand-editor ✅ READY + +| Script | Status | Notes | +|--------|--------|-------| +| `compliance_checker.py` | ✅ Works | Korean medical ad compliance | + +**Dependencies**: None (regex-based checking) + +#### 41-jamie-brand-audit ✅ READY (Guidance-only) + +No scripts required. Uses desktop reference materials for brand compliance auditing. + +--- + +## Issues Fixed + +### ✅ 18-seo-gateway-builder Template Path (RESOLVED) + +**File**: `ourdigital-custom-skills/18-seo-gateway-builder/code/scripts/generate_pages.py` + +**Applied Fix**: +```python +if template_path is None: + script_dir = Path(__file__).parent.parent + self.template_path = script_dir / "templates" +``` + +### ✅ 17-seo-gateway-architect Help Handling (RESOLVED) + +**File**: `ourdigital-custom-skills/17-seo-gateway-architect/code/scripts/keyword_analyzer.py` + +**Applied Fix**: Full argparse implementation with --topic, --market, --output, --competitors flags. + +--- + +## Environment Setup + +### Required Environment Variables + +| Variable | Used By | Required | +|----------|---------|----------| +| `NOTION_TOKEN` | 01-notion-organizer, 32-ourdigital-presentation | Yes | +| `PAGESPEED_API_KEY` | 15-seo-core-web-vitals | Optional | +| `GSC_CREDENTIALS_PATH` | 16-seo-search-console | Yes | + +### Python Dependencies Summary + +```bash +# Core dependencies (most skills) +pip install requests beautifulsoup4 lxml + +# Notion integration +pip install notion-client python-dotenv + +# Async sitemap crawling +pip install aiohttp + +# Google APIs +pip install google-api-python-client google-auth + +# PowerPoint generation +pip install python-pptx +``` + +--- + +## Recommendations + +1. **Fix Priority Issues**: Apply the two fixes listed above +2. **Add requirements.txt**: Ensure all skills have proper dependency files +3. **Standardize CLI**: All scripts should use argparse for consistent --help behavior +4. **Add Unit Tests**: Consider adding pytest tests for critical scripts +5. **Document Authentication**: Create setup guides for API key configuration + +--- + +## Conclusion + +**All 17 skills (100%)** are fully functional and ready for Claude Code usage. All identified issues have been fixed. + +The refactored skill collection follows the "one thing done well" principle effectively, with clear separation between: +- General automation (01-09) +- Technical SEO (10-16) +- Content strategy (17-18) +- Analytics/tracking (20-21) +- Content management (30-39) +- Brand compliance (40-49) diff --git a/SKILLS_COMPARISON.md b/SKILLS_COMPARISON.md new file mode 100644 index 0000000..0bb682e --- /dev/null +++ b/SKILLS_COMPARISON.md @@ -0,0 +1,244 @@ +# Skills Comparison: Current vs Refactored + +**Date**: 2025-12-21 + +## Summary + +| Metric | Current | Refactored | Change | +|--------|---------|------------|--------| +| Total Skills | 8 | 18 | +125% | +| Monolithic Skills | 2 | 0 | -100% | +| Single-purpose Skills | 6 | 18 | +200% | +| SEO Skills | 1 (6,049 LOC) | 9 (decomposed) | Modular | +| GTM Skills | 1 | 2 | Separated | + +--- + +## Current Active Skills (Claude Code) + +These skills are currently registered and accessible via `/skill-name`: + +| # | Skill Name | Purpose | Issues | +|---|------------|---------|--------| +| 1 | `doc-generator` | PDF/PPT generation | OK | +| 2 | `notion-organizer` | Notion workspace management | OK | +| 3 | `ourdigital-gtm-manager` | GTM management + dataLayer injection | Monolithic | +| 4 | `ourdigital-seo-audit` | Comprehensive SEO audit | **Monolithic (6,049 LOC)** | +| 5 | `seo-manager` | SEO management agent | OK | +| 6 | `skill-creator` | Claude skill creation wizard | OK | +| 7 | `test` | Python test runner | Generic utility | +| 8 | `lint` | Python linter | Generic utility | + +### Problems with Current Skills + +1. **ourdigital-seo-audit**: 6,049 lines across 11 scripts - too heavy, does too many things +2. **ourdigital-gtm-manager**: Combines audit + tag generation - should be split +3. **No clear separation**: Hard to know which skill to use for specific tasks + +--- + +## Refactored Skills (New Structure) + +### 01-09: General Automation + +| # | Skill | Purpose | LOC | Status | +|---|-------|---------|-----|--------| +| 01 | `notion-organizer` | Notion workspace management | ~600 | ✅ Ready | +| 02 | `notion-data-migration` | Database schema migration | ~400 | ✅ Ready | + +### 10-19: SEO Skills (Decomposed from seo-audit-agent) + +| # | Skill | Purpose | Source Scripts | Status | +|---|-------|---------|----------------|--------| +| 10 | `seo-technical-audit` | robots.txt, sitemap validation | robots_checker, sitemap_* | ✅ Ready | +| 11 | `seo-on-page-audit` | Meta tags, headings, links | page_analyzer | ✅ Ready | +| 12 | `seo-local-audit` | NAP, GBP, citations | Guidance-only | ✅ Ready | +| 13 | `seo-schema-validator` | JSON-LD validation | schema_validator | ✅ Ready | +| 14 | `seo-schema-generator` | Schema markup generation | schema_generator | ✅ Ready | +| 15 | `seo-core-web-vitals` | PageSpeed metrics | pagespeed_client | ✅ Ready | +| 16 | `seo-search-console` | GSC data retrieval | gsc_client | ✅ Ready | +| 17 | `seo-gateway-architect` | Keyword strategy planning | keyword_analyzer | ✅ Ready | +| 18 | `seo-gateway-builder` | Gateway page generation | generate_pages | ✅ Ready | + +### 20-29: GTM/GA Skills + +| # | Skill | Purpose | Status | +|---|-------|---------|--------| +| 20 | `gtm-audit` | Lightweight GTM audit only | ✅ Ready | +| 21 | `gtm-manager` | Full GTM management + dataLayer injection | ✅ Ready | + +### 30-39: OurDigital Skills + +| # | Skill | Purpose | Status | +|---|-------|---------|--------| +| 30 | `ourdigital-designer` | Blog featured image prompts | ✅ Ready | +| 31 | `ourdigital-research` | Research → Blog export | ✅ Ready | +| 32 | `ourdigital-presentation` | Notion → PowerPoint workflow | ✅ Ready | + +### 40-49: Jamie Clinic Skills + +| # | Skill | Purpose | Status | +|---|-------|---------|--------| +| 40 | `jamie-brand-editor` | Content **generation** | ✅ Ready | +| 41 | `jamie-brand-audit` | Content **review/evaluation** | ✅ Ready | + +--- + +## Key Improvements + +### 1. SEO Decomposition + +**Before (Monolithic)**: +``` +seo-audit-agent/ +├── scripts/ +│ ├── base_client.py (207 LOC) +│ ├── full_audit.py (497 LOC) +│ ├── gsc_client.py (409 LOC) +│ ├── notion_reporter.py (951 LOC) +│ ├── page_analyzer.py (569 LOC) +│ ├── pagespeed_client.py (452 LOC) +│ ├── robots_checker.py (540 LOC) +│ ├── schema_generator.py (490 LOC) +│ ├── schema_validator.py (498 LOC) +│ ├── sitemap_crawler.py (969 LOC) +│ └── sitemap_validator.py (467 LOC) +└── Total: 6,049 LOC in ONE skill +``` + +**After (Modular)**: +``` +10-seo-technical-audit/ → robots + sitemap +11-seo-on-page-audit/ → page analysis +12-seo-local-audit/ → local SEO (guidance) +13-seo-schema-validator/ → schema validation +14-seo-schema-generator/ → schema generation +15-seo-core-web-vitals/ → PageSpeed +16-seo-search-console/ → GSC data +17-seo-gateway-architect/ → keyword strategy +18-seo-gateway-builder/ → page generation + +→ 9 focused skills, each ~400-600 LOC max +``` + +### 2. GTM Separation + +**Before**: +``` +ourdigital-gtm-manager/ → Everything in one +``` + +**After**: +``` +20-gtm-audit/ → Audit only (lightweight) +21-gtm-manager/ → Full management + injection +``` + +### 3. Jamie Clinic Clarity + +**Before**: +``` +jamie-brand-editor/ → Unclear if create or review +jamie-brand-guardian/ → Confusing name +``` + +**After**: +``` +40-jamie-brand-editor/ → Content GENERATION +41-jamie-brand-audit/ → Content REVIEW +``` + +### 4. Dual-Platform Support + +Each skill now has: +``` +skill-name/ +├── code/ → Claude Code (CLI) +│ ├── CLAUDE.md +│ └── scripts/ +└── desktop/ → Claude Desktop + ├── SKILL.md + └── references/ +``` + +--- + +## Migration Path + +### Skills to Keep (No Change) + +| Current | Status | +|---------|--------| +| `doc-generator` | Keep as-is | +| `skill-creator` | Keep as-is | +| `test` | Keep as-is | +| `lint` | Keep as-is | + +### Skills to Replace + +| Current | Replace With | Notes | +|---------|--------------|-------| +| `ourdigital-seo-audit` | `10-16` (7 skills) | Full decomposition | +| `ourdigital-gtm-manager` | `20-gtm-audit` + `21-gtm-manager` | Separated roles | +| `notion-organizer` | `01-notion-organizer` | Refactored structure | +| `seo-manager` | `17-seo-gateway-architect` | More focused | + +### New Skills to Add + +| New Skill | Purpose | +|-----------|---------| +| `02-notion-data-migration` | Schema migration | +| `18-seo-gateway-builder` | Content generation | +| `30-ourdigital-designer` | Image prompts | +| `31-ourdigital-research` | Research export | +| `32-ourdigital-presentation` | Slides generation | +| `40-jamie-brand-editor` | Content creation | +| `41-jamie-brand-audit` | Content review | + +--- + +## Recommended Actions + +1. **Backup current skills**: Copy current ~/.claude/commands/ before changes + +2. **Install refactored skills**: Link or copy CLAUDE.md files to project + +3. **Update skill references**: Update any automation scripts that reference old skill names + +4. **Test each skill**: Run `python script.py --help` for each script + +5. **Archive old skills**: Move deprecated skills to `_archive/` + +--- + +## Directory Structure After Migration + +``` +~/.claude/commands/ +├── lint.md (keep) +└── test.md (keep) + +project/.claude/commands/ +├── 01-notion-organizer.md +├── 10-seo-technical-audit.md +├── 11-seo-on-page-audit.md +├── ... +├── 20-gtm-audit.md +├── 21-gtm-manager.md +├── 30-ourdigital-designer.md +├── ... +├── 40-jamie-brand-editor.md +└── 41-jamie-brand-audit.md +``` + +--- + +## Conclusion + +The refactoring achieves: + +1. **"One thing done well"**: Each skill has a single clear purpose +2. **Reduced complexity**: Max ~600 LOC per skill vs 6,049 LOC monolith +3. **Clear naming**: `audit` vs `manager`, `editor` vs `audit` +4. **Better discoverability**: Numbered categories (10-19 = SEO, 20-29 = GTM, etc.) +5. **Platform separation**: `code/` for CLI, `desktop/` for Desktop app diff --git a/ourdigital-custom-skills/01-notion-organizer/code/CLAUDE.md b/ourdigital-custom-skills/01-notion-organizer/code/CLAUDE.md new file mode 100644 index 0000000..b5b8e20 --- /dev/null +++ b/ourdigital-custom-skills/01-notion-organizer/code/CLAUDE.md @@ -0,0 +1,89 @@ +# CLAUDE.md + +## Overview + +Notion workspace management toolkit for database organization, schema migration, and bulk operations. + +## Quick Start + +```bash +pip install -r scripts/requirements.txt + +# Schema migration +python scripts/schema_migrator.py --source [DB_ID] --target [DB_ID] --dry-run + +# Async bulk operations +python scripts/async_organizer.py --database [DB_ID] --action cleanup +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `schema_migrator.py` | Migrate data between databases with property mapping | +| `async_organizer.py` | Async bulk operations (cleanup, restructure, archive) | + +## Schema Migrator + +```bash +# Dry run (preview changes) +python scripts/schema_migrator.py \ + --source abc123 \ + --target def456 \ + --mapping mapping.json \ + --dry-run + +# Execute migration +python scripts/schema_migrator.py \ + --source abc123 \ + --target def456 \ + --mapping mapping.json +``` + +### Mapping File Format + +```json +{ + "properties": { + "OldName": "NewName", + "Status": "Status" + }, + "transforms": { + "Date": "date_to_iso" + } +} +``` + +## Async Organizer + +```bash +# Cleanup empty/stale pages +python scripts/async_organizer.py --database [ID] --action cleanup + +# Archive old pages +python scripts/async_organizer.py --database [ID] --action archive --days 90 + +# Restructure hierarchy +python scripts/async_organizer.py --database [ID] --action restructure +``` + +## Rate Limits + +| Limit | Value | +|-------|-------| +| Requests/second | 3 max | +| Items per request | 100 max | +| Retry on 429 | Exponential backoff | + +## Configuration + +Environment variables: +```bash +NOTION_TOKEN=secret_xxx +``` + +## Notes + +- Always use `--dry-run` first for destructive operations +- Large operations (1000+ pages) use async with progress reporting +- Scripts implement automatic rate limiting diff --git a/ourdigital-custom-skills/02-notion-organizer/references/reference.md b/ourdigital-custom-skills/01-notion-organizer/code/references/reference.md similarity index 100% rename from ourdigital-custom-skills/02-notion-organizer/references/reference.md rename to ourdigital-custom-skills/01-notion-organizer/code/references/reference.md diff --git a/ourdigital-custom-skills/02-notion-organizer/scripts/async_organizer.py b/ourdigital-custom-skills/01-notion-organizer/code/scripts/async_organizer.py similarity index 100% rename from ourdigital-custom-skills/02-notion-organizer/scripts/async_organizer.py rename to ourdigital-custom-skills/01-notion-organizer/code/scripts/async_organizer.py diff --git a/ourdigital-custom-skills/02-notion-organizer/scripts/requirements.txt b/ourdigital-custom-skills/01-notion-organizer/code/scripts/requirements.txt similarity index 100% rename from ourdigital-custom-skills/02-notion-organizer/scripts/requirements.txt rename to ourdigital-custom-skills/01-notion-organizer/code/scripts/requirements.txt diff --git a/ourdigital-custom-skills/02-notion-organizer/scripts/schema_migrator.py b/ourdigital-custom-skills/01-notion-organizer/code/scripts/schema_migrator.py similarity index 100% rename from ourdigital-custom-skills/02-notion-organizer/scripts/schema_migrator.py rename to ourdigital-custom-skills/01-notion-organizer/code/scripts/schema_migrator.py diff --git a/ourdigital-custom-skills/02-notion-organizer/templates/mapping_example.json b/ourdigital-custom-skills/01-notion-organizer/code/templates/mapping_example.json similarity index 100% rename from ourdigital-custom-skills/02-notion-organizer/templates/mapping_example.json rename to ourdigital-custom-skills/01-notion-organizer/code/templates/mapping_example.json diff --git a/ourdigital-custom-skills/02-notion-organizer/SKILL.md b/ourdigital-custom-skills/01-notion-organizer/desktop/SKILL.md similarity index 100% rename from ourdigital-custom-skills/02-notion-organizer/SKILL.md rename to ourdigital-custom-skills/01-notion-organizer/desktop/SKILL.md diff --git a/ourdigital-custom-skills/02-notion-organizer/examples/examples.md b/ourdigital-custom-skills/01-notion-organizer/desktop/examples/examples.md similarity index 100% rename from ourdigital-custom-skills/02-notion-organizer/examples/examples.md rename to ourdigital-custom-skills/01-notion-organizer/desktop/examples/examples.md diff --git a/ourdigital-custom-skills/10-seo-technical-audit/code/CLAUDE.md b/ourdigital-custom-skills/10-seo-technical-audit/code/CLAUDE.md new file mode 100644 index 0000000..432dda4 --- /dev/null +++ b/ourdigital-custom-skills/10-seo-technical-audit/code/CLAUDE.md @@ -0,0 +1,127 @@ +# CLAUDE.md + +## Overview + +Technical SEO auditor for crawlability fundamentals: robots.txt validation, XML sitemap analysis, and URL accessibility checking. + +## Quick Start + +```bash +# Install dependencies +pip install -r scripts/requirements.txt + +# Robots.txt analysis +python scripts/robots_checker.py --url https://example.com + +# Sitemap validation +python scripts/sitemap_validator.py --url https://example.com/sitemap.xml + +# Async URL crawl (check sitemap URLs accessibility) +python scripts/sitemap_crawler.py --sitemap https://example.com/sitemap.xml +``` + +## Scripts + +| Script | Purpose | Key Output | +|--------|---------|------------| +| `robots_checker.py` | Parse and validate robots.txt | User-agent rules, disallow patterns, sitemap declarations | +| `sitemap_validator.py` | Validate XML sitemap structure | URL count, lastmod dates, size limits, syntax errors | +| `sitemap_crawler.py` | Async check URL accessibility | HTTP status codes, response times, broken links | +| `base_client.py` | Shared utilities | RateLimiter, ConfigManager, BaseAsyncClient | + +## Robots.txt Checker + +```bash +# Basic analysis +python scripts/robots_checker.py --url https://example.com + +# Test specific URL against rules +python scripts/robots_checker.py --url https://example.com --test-url /admin/ + +# Output JSON +python scripts/robots_checker.py --url https://example.com --json +``` + +**Checks performed**: +- Syntax validation +- User-agent rule parsing +- Disallow/Allow pattern analysis +- Sitemap declarations +- Critical resource access (CSS/JS/images) + +## Sitemap Validator + +```bash +# Validate sitemap +python scripts/sitemap_validator.py --url https://example.com/sitemap.xml + +# Include sitemap index parsing +python scripts/sitemap_validator.py --url https://example.com/sitemap_index.xml --follow-index +``` + +**Validation rules**: +- XML syntax correctness +- URL count limit (50,000 max per sitemap) +- File size limit (50MB max uncompressed) +- Lastmod date format validation +- Sitemap index structure + +## Sitemap Crawler + +```bash +# Crawl all URLs in sitemap +python scripts/sitemap_crawler.py --sitemap https://example.com/sitemap.xml + +# Limit concurrent requests +python scripts/sitemap_crawler.py --sitemap https://example.com/sitemap.xml --concurrency 10 + +# Sample mode (check subset) +python scripts/sitemap_crawler.py --sitemap https://example.com/sitemap.xml --sample 100 +``` + +**Output includes**: +- HTTP status codes per URL +- Response times +- Redirect chains +- Broken links (4xx, 5xx) + +## Output Format + +All scripts support `--json` flag for structured output: + +```json +{ + "url": "https://example.com", + "status": "valid|invalid|warning", + "issues": [ + { + "type": "error|warning|info", + "message": "Description", + "location": "Line or URL" + } + ], + "summary": {} +} +``` + +## Common Issues Detected + +| Category | Issue | Severity | +|----------|-------|----------| +| Robots.txt | Missing sitemap declaration | Medium | +| Robots.txt | Blocking CSS/JS resources | High | +| Robots.txt | Overly broad disallow rules | Medium | +| Sitemap | URLs returning 404 | High | +| Sitemap | Missing lastmod dates | Low | +| Sitemap | Exceeds 50,000 URL limit | High | +| Sitemap | Non-canonical URLs included | Medium | + +## Configuration + +Environment variables (optional): +```bash +# Rate limiting +CRAWL_DELAY=1.0 # Seconds between requests +MAX_CONCURRENT=20 # Async concurrency limit +REQUEST_TIMEOUT=30 # Request timeout seconds +``` diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/base_client.py b/ourdigital-custom-skills/10-seo-technical-audit/code/scripts/base_client.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/base_client.py rename to ourdigital-custom-skills/10-seo-technical-audit/code/scripts/base_client.py diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/page_analyzer.py b/ourdigital-custom-skills/10-seo-technical-audit/code/scripts/page_analyzer.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/page_analyzer.py rename to ourdigital-custom-skills/10-seo-technical-audit/code/scripts/page_analyzer.py diff --git a/ourdigital-custom-skills/10-seo-technical-audit/code/scripts/requirements.txt b/ourdigital-custom-skills/10-seo-technical-audit/code/scripts/requirements.txt new file mode 100644 index 0000000..c12048e --- /dev/null +++ b/ourdigital-custom-skills/10-seo-technical-audit/code/scripts/requirements.txt @@ -0,0 +1,17 @@ +# 10-seo-technical-audit dependencies +# Install: pip install -r requirements.txt + +# Web Scraping & Parsing +lxml>=5.1.0 +beautifulsoup4>=4.12.0 +requests>=2.31.0 +aiohttp>=3.9.0 + +# Async & Retry +tenacity>=8.2.0 +tqdm>=4.66.0 + +# Environment & CLI +python-dotenv>=1.0.0 +rich>=13.7.0 +typer>=0.9.0 diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/robots_checker.py b/ourdigital-custom-skills/10-seo-technical-audit/code/scripts/robots_checker.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/robots_checker.py rename to ourdigital-custom-skills/10-seo-technical-audit/code/scripts/robots_checker.py diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/sitemap_crawler.py b/ourdigital-custom-skills/10-seo-technical-audit/code/scripts/sitemap_crawler.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/sitemap_crawler.py rename to ourdigital-custom-skills/10-seo-technical-audit/code/scripts/sitemap_crawler.py diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/sitemap_validator.py b/ourdigital-custom-skills/10-seo-technical-audit/code/scripts/sitemap_validator.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/sitemap_validator.py rename to ourdigital-custom-skills/10-seo-technical-audit/code/scripts/sitemap_validator.py diff --git a/ourdigital-custom-skills/10-seo-technical-audit/desktop/SKILL.md b/ourdigital-custom-skills/10-seo-technical-audit/desktop/SKILL.md new file mode 100644 index 0000000..f750b6f --- /dev/null +++ b/ourdigital-custom-skills/10-seo-technical-audit/desktop/SKILL.md @@ -0,0 +1,94 @@ +--- +name: seo-technical-audit +version: 1.0.0 +description: Technical SEO auditor for crawlability fundamentals. Triggers: robots.txt, sitemap validation, crawlability, indexing check, technical SEO. +allowed-tools: mcp__firecrawl__*, mcp__perplexity__*, mcp__notion__* +--- + +# SEO Technical Audit + +## Purpose + +Analyze crawlability fundamentals: robots.txt rules, XML sitemap structure, and URL accessibility. Identify issues blocking search engine crawlers. + +## Core Capabilities + +1. **Robots.txt Analysis** - Parse rules, check blocked resources +2. **Sitemap Validation** - Verify XML structure, URL limits, dates +3. **URL Accessibility** - Check HTTP status, redirects, broken links + +## MCP Tool Usage + +### Firecrawl for Page Data +``` +mcp__firecrawl__scrape: Fetch robots.txt and sitemap content +mcp__firecrawl__crawl: Check multiple URLs accessibility +``` + +### Perplexity for Best Practices +``` +mcp__perplexity__search: Research current SEO recommendations +``` + +## Workflow + +### 1. Robots.txt Check +1. Fetch `[domain]/robots.txt` using Firecrawl +2. Parse User-agent rules and Disallow patterns +3. Identify blocked resources (CSS, JS, images) +4. Check for Sitemap declarations +5. Report critical issues + +### 2. Sitemap Validation +1. Locate sitemap (from robots.txt or `/sitemap.xml`) +2. Validate XML syntax +3. Check URL count (max 50,000) +4. Verify lastmod date formats +5. For sitemap index: parse child sitemaps + +### 3. URL Accessibility Sampling +1. Extract URLs from sitemap +2. Sample 50-100 URLs for large sites +3. Check HTTP status codes +4. Identify redirects and broken links +5. Report 4xx/5xx errors + +## Output Format + +```markdown +## Technical SEO Audit: [domain] + +### Robots.txt Analysis +- Status: [Valid/Invalid/Missing] +- Sitemap declared: [Yes/No] +- Critical blocks: [List] + +### Sitemap Validation +- URLs found: [count] +- Syntax: [Valid/Errors] +- Issues: [List] + +### URL Accessibility (sampled) +- Checked: [count] URLs +- Success (2xx): [count] +- Redirects (3xx): [count] +- Errors (4xx/5xx): [count] + +### Recommendations +1. [Priority fixes] +``` + +## Common Issues + +| Issue | Impact | Fix | +|-------|--------|-----| +| No sitemap in robots.txt | Medium | Add `Sitemap:` directive | +| Blocking CSS/JS | High | Allow Googlebot access | +| 404s in sitemap | High | Remove or fix URLs | +| Missing lastmod | Low | Add dates for freshness signals | + +## Limitations + +- Cannot access password-protected sitemaps +- Large sitemaps (10,000+ URLs) require sampling +- Does not check render-blocking issues (use Core Web Vitals skill) diff --git a/ourdigital-custom-skills/11-seo-on-page-audit/code/CLAUDE.md b/ourdigital-custom-skills/11-seo-on-page-audit/code/CLAUDE.md new file mode 100644 index 0000000..0afa126 --- /dev/null +++ b/ourdigital-custom-skills/11-seo-on-page-audit/code/CLAUDE.md @@ -0,0 +1,107 @@ +# CLAUDE.md + +## Overview + +On-page SEO analyzer for single-page optimization: meta tags, headings, links, images, and Open Graph data. + +## Quick Start + +```bash +pip install -r scripts/requirements.txt +python scripts/page_analyzer.py --url https://example.com +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `page_analyzer.py` | Analyze on-page SEO elements | +| `base_client.py` | Shared utilities | + +## Usage + +```bash +# Full page analysis +python scripts/page_analyzer.py --url https://example.com + +# JSON output +python scripts/page_analyzer.py --url https://example.com --json + +# Analyze multiple pages +python scripts/page_analyzer.py --urls urls.txt +``` + +## Analysis Categories + +### Meta Tags +- Title tag (length, keywords) +- Meta description (length, call-to-action) +- Canonical URL +- Robots meta tag + +### Heading Structure +- H1 presence and count +- Heading hierarchy (H1→H6) +- Keyword placement in headings + +### Links +- Internal link count +- External link count +- Broken links (4xx/5xx) +- Nofollow distribution + +### Images +- Alt attribute presence +- Image file sizes +- Lazy loading implementation + +### Open Graph / Social +- OG title, description, image +- Twitter Card tags +- Social sharing preview + +## Output + +```json +{ + "url": "https://example.com", + "meta": { + "title": "Page Title", + "title_length": 55, + "description": "...", + "description_length": 150, + "canonical": "https://example.com" + }, + "headings": { + "h1_count": 1, + "h1_text": ["Main Heading"], + "hierarchy_valid": true + }, + "links": { + "internal": 25, + "external": 5, + "broken": [] + }, + "issues": [] +} +``` + +## Common Issues + +| Issue | Severity | Recommendation | +|-------|----------|----------------| +| Missing H1 | High | Add single H1 tag | +| Title too long (>60) | Medium | Shorten to 50-60 chars | +| No meta description | High | Add compelling description | +| Images without alt | Medium | Add descriptive alt text | +| Multiple H1 tags | Medium | Use single H1 only | + +## Dependencies + +``` +lxml>=5.1.0 +beautifulsoup4>=4.12.0 +requests>=2.31.0 +python-dotenv>=1.0.0 +rich>=13.7.0 +``` diff --git a/ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/base_client.py b/ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/base_client.py new file mode 100644 index 0000000..ac5715b --- /dev/null +++ b/ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/base_client.py @@ -0,0 +1,207 @@ +""" +Base Client - Shared async client utilities +=========================================== +Purpose: Rate-limited async operations for API clients +Python: 3.10+ +""" + +import asyncio +import logging +import os +from asyncio import Semaphore +from datetime import datetime +from typing import Any, Callable, TypeVar + +from dotenv import load_dotenv +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, +) + +# Load environment variables +load_dotenv() + +# Logging setup +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +T = TypeVar("T") + + +class RateLimiter: + """Rate limiter using token bucket algorithm.""" + + def __init__(self, rate: float, per: float = 1.0): + """ + Initialize rate limiter. + + Args: + rate: Number of requests allowed + per: Time period in seconds (default: 1 second) + """ + self.rate = rate + self.per = per + self.tokens = rate + self.last_update = datetime.now() + self._lock = asyncio.Lock() + + async def acquire(self) -> None: + """Acquire a token, waiting if necessary.""" + async with self._lock: + now = datetime.now() + elapsed = (now - self.last_update).total_seconds() + self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per)) + self.last_update = now + + if self.tokens < 1: + wait_time = (1 - self.tokens) * (self.per / self.rate) + await asyncio.sleep(wait_time) + self.tokens = 0 + else: + self.tokens -= 1 + + +class BaseAsyncClient: + """Base class for async API clients with rate limiting.""" + + def __init__( + self, + max_concurrent: int = 5, + requests_per_second: float = 3.0, + logger: logging.Logger | None = None, + ): + """ + Initialize base client. + + Args: + max_concurrent: Maximum concurrent requests + requests_per_second: Rate limit + logger: Logger instance + """ + self.semaphore = Semaphore(max_concurrent) + self.rate_limiter = RateLimiter(requests_per_second) + self.logger = logger or logging.getLogger(self.__class__.__name__) + self.stats = { + "requests": 0, + "success": 0, + "errors": 0, + "retries": 0, + } + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=10), + retry=retry_if_exception_type(Exception), + ) + async def _rate_limited_request( + self, + coro: Callable[[], Any], + ) -> Any: + """Execute a request with rate limiting and retry.""" + async with self.semaphore: + await self.rate_limiter.acquire() + self.stats["requests"] += 1 + try: + result = await coro() + self.stats["success"] += 1 + return result + except Exception as e: + self.stats["errors"] += 1 + self.logger.error(f"Request failed: {e}") + raise + + async def batch_requests( + self, + requests: list[Callable[[], Any]], + desc: str = "Processing", + ) -> list[Any]: + """Execute multiple requests concurrently.""" + try: + from tqdm.asyncio import tqdm + has_tqdm = True + except ImportError: + has_tqdm = False + + async def execute(req: Callable) -> Any: + try: + return await self._rate_limited_request(req) + except Exception as e: + return {"error": str(e)} + + tasks = [execute(req) for req in requests] + + if has_tqdm: + results = [] + for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc): + result = await coro + results.append(result) + return results + else: + return await asyncio.gather(*tasks, return_exceptions=True) + + def print_stats(self) -> None: + """Print request statistics.""" + self.logger.info("=" * 40) + self.logger.info("Request Statistics:") + self.logger.info(f" Total Requests: {self.stats['requests']}") + self.logger.info(f" Successful: {self.stats['success']}") + self.logger.info(f" Errors: {self.stats['errors']}") + self.logger.info("=" * 40) + + +class ConfigManager: + """Manage API configuration and credentials.""" + + def __init__(self): + load_dotenv() + + @property + def google_credentials_path(self) -> str | None: + """Get Google service account credentials path.""" + # Prefer SEO-specific credentials, fallback to general credentials + seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json") + if os.path.exists(seo_creds): + return seo_creds + return os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + + @property + def pagespeed_api_key(self) -> str | None: + """Get PageSpeed Insights API key.""" + return os.getenv("PAGESPEED_API_KEY") + + @property + def custom_search_api_key(self) -> str | None: + """Get Custom Search API key.""" + return os.getenv("CUSTOM_SEARCH_API_KEY") + + @property + def custom_search_engine_id(self) -> str | None: + """Get Custom Search Engine ID.""" + return os.getenv("CUSTOM_SEARCH_ENGINE_ID") + + @property + def notion_token(self) -> str | None: + """Get Notion API token.""" + return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY") + + def validate_google_credentials(self) -> bool: + """Validate Google credentials are configured.""" + creds_path = self.google_credentials_path + if not creds_path: + return False + return os.path.exists(creds_path) + + def get_required(self, key: str) -> str: + """Get required environment variable or raise error.""" + value = os.getenv(key) + if not value: + raise ValueError(f"Missing required environment variable: {key}") + return value + + +# Singleton config instance +config = ConfigManager() diff --git a/ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/page_analyzer.py b/ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/page_analyzer.py new file mode 100644 index 0000000..b662e81 --- /dev/null +++ b/ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/page_analyzer.py @@ -0,0 +1,569 @@ +""" +Page Analyzer - Extract SEO metadata from web pages +=================================================== +Purpose: Comprehensive page-level SEO data extraction +Python: 3.10+ +Usage: + from page_analyzer import PageAnalyzer, PageMetadata + analyzer = PageAnalyzer() + metadata = analyzer.analyze_url("https://example.com/page") +""" + +import json +import logging +import re +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any +from urllib.parse import urljoin, urlparse + +import requests +from bs4 import BeautifulSoup + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +@dataclass +class LinkData: + """Represents a link found on a page.""" + url: str + anchor_text: str + is_internal: bool + is_nofollow: bool = False + link_type: str = "body" # body, nav, footer, etc. + + +@dataclass +class HeadingData: + """Represents a heading found on a page.""" + level: int # 1-6 + text: str + + +@dataclass +class SchemaData: + """Represents schema.org structured data.""" + schema_type: str + properties: dict + format: str = "json-ld" # json-ld, microdata, rdfa + + +@dataclass +class OpenGraphData: + """Represents Open Graph metadata.""" + og_title: str | None = None + og_description: str | None = None + og_image: str | None = None + og_url: str | None = None + og_type: str | None = None + og_site_name: str | None = None + og_locale: str | None = None + twitter_card: str | None = None + twitter_title: str | None = None + twitter_description: str | None = None + twitter_image: str | None = None + + +@dataclass +class PageMetadata: + """Complete SEO metadata for a page.""" + + # Basic info + url: str + status_code: int = 0 + content_type: str = "" + response_time_ms: float = 0 + analyzed_at: datetime = field(default_factory=datetime.now) + + # Meta tags + title: str | None = None + title_length: int = 0 + meta_description: str | None = None + meta_description_length: int = 0 + canonical_url: str | None = None + robots_meta: str | None = None + + # Language + html_lang: str | None = None + hreflang_tags: list[dict] = field(default_factory=list) # [{"lang": "en", "url": "..."}] + + # Headings + headings: list[HeadingData] = field(default_factory=list) + h1_count: int = 0 + h1_text: str | None = None + + # Open Graph & Social + open_graph: OpenGraphData = field(default_factory=OpenGraphData) + + # Schema/Structured Data + schema_data: list[SchemaData] = field(default_factory=list) + schema_types_found: list[str] = field(default_factory=list) + + # Links + internal_links: list[LinkData] = field(default_factory=list) + external_links: list[LinkData] = field(default_factory=list) + internal_link_count: int = 0 + external_link_count: int = 0 + + # Images + images_total: int = 0 + images_without_alt: int = 0 + images_with_alt: int = 0 + + # Content metrics + word_count: int = 0 + + # Issues found + issues: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + def to_dict(self) -> dict: + """Convert to dictionary for JSON serialization.""" + return { + "url": self.url, + "status_code": self.status_code, + "content_type": self.content_type, + "response_time_ms": self.response_time_ms, + "analyzed_at": self.analyzed_at.isoformat(), + "title": self.title, + "title_length": self.title_length, + "meta_description": self.meta_description, + "meta_description_length": self.meta_description_length, + "canonical_url": self.canonical_url, + "robots_meta": self.robots_meta, + "html_lang": self.html_lang, + "hreflang_tags": self.hreflang_tags, + "h1_count": self.h1_count, + "h1_text": self.h1_text, + "headings_count": len(self.headings), + "schema_types_found": self.schema_types_found, + "internal_link_count": self.internal_link_count, + "external_link_count": self.external_link_count, + "images_total": self.images_total, + "images_without_alt": self.images_without_alt, + "word_count": self.word_count, + "issues": self.issues, + "warnings": self.warnings, + "open_graph": { + "og_title": self.open_graph.og_title, + "og_description": self.open_graph.og_description, + "og_image": self.open_graph.og_image, + "og_url": self.open_graph.og_url, + "og_type": self.open_graph.og_type, + }, + } + + def get_summary(self) -> str: + """Get a brief summary of the page analysis.""" + lines = [ + f"URL: {self.url}", + f"Status: {self.status_code}", + f"Title: {self.title[:50] + '...' if self.title and len(self.title) > 50 else self.title}", + f"Description: {'✓' if self.meta_description else '✗ Missing'}", + f"Canonical: {'✓' if self.canonical_url else '✗ Missing'}", + f"H1: {self.h1_count} found", + f"Schema: {', '.join(self.schema_types_found) if self.schema_types_found else 'None'}", + f"Links: {self.internal_link_count} internal, {self.external_link_count} external", + f"Images: {self.images_total} total, {self.images_without_alt} without alt", + ] + if self.issues: + lines.append(f"Issues: {len(self.issues)}") + return "\n".join(lines) + + +class PageAnalyzer: + """Analyze web pages for SEO metadata.""" + + DEFAULT_USER_AGENT = "Mozilla/5.0 (compatible; OurDigitalSEOBot/1.0; +https://ourdigital.org)" + + def __init__( + self, + user_agent: str | None = None, + timeout: int = 30, + ): + """ + Initialize page analyzer. + + Args: + user_agent: Custom user agent string + timeout: Request timeout in seconds + """ + self.user_agent = user_agent or self.DEFAULT_USER_AGENT + self.timeout = timeout + self.session = requests.Session() + self.session.headers.update({ + "User-Agent": self.user_agent, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9,ko;q=0.8", + }) + + def analyze_url(self, url: str) -> PageMetadata: + """ + Analyze a URL and extract SEO metadata. + + Args: + url: URL to analyze + + Returns: + PageMetadata object with all extracted data + """ + metadata = PageMetadata(url=url) + + try: + # Fetch page + start_time = datetime.now() + response = self.session.get(url, timeout=self.timeout, allow_redirects=True) + metadata.response_time_ms = (datetime.now() - start_time).total_seconds() * 1000 + metadata.status_code = response.status_code + metadata.content_type = response.headers.get("Content-Type", "") + + if response.status_code != 200: + metadata.issues.append(f"HTTP {response.status_code} status") + if response.status_code >= 400: + return metadata + + # Parse HTML + soup = BeautifulSoup(response.text, "html.parser") + base_url = url + + # Extract all metadata + self._extract_basic_meta(soup, metadata) + self._extract_canonical(soup, metadata, base_url) + self._extract_robots_meta(soup, metadata) + self._extract_hreflang(soup, metadata) + self._extract_headings(soup, metadata) + self._extract_open_graph(soup, metadata) + self._extract_schema(soup, metadata) + self._extract_links(soup, metadata, base_url) + self._extract_images(soup, metadata) + self._extract_content_metrics(soup, metadata) + + # Run SEO checks + self._run_seo_checks(metadata) + + except requests.RequestException as e: + metadata.issues.append(f"Request failed: {str(e)}") + logger.error(f"Failed to analyze {url}: {e}") + except Exception as e: + metadata.issues.append(f"Analysis error: {str(e)}") + logger.error(f"Error analyzing {url}: {e}") + + return metadata + + def _extract_basic_meta(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract title and meta description.""" + # Title + title_tag = soup.find("title") + if title_tag and title_tag.string: + metadata.title = title_tag.string.strip() + metadata.title_length = len(metadata.title) + + # Meta description + desc_tag = soup.find("meta", attrs={"name": re.compile(r"^description$", re.I)}) + if desc_tag and desc_tag.get("content"): + metadata.meta_description = desc_tag["content"].strip() + metadata.meta_description_length = len(metadata.meta_description) + + # HTML lang + html_tag = soup.find("html") + if html_tag and html_tag.get("lang"): + metadata.html_lang = html_tag["lang"] + + def _extract_canonical(self, soup: BeautifulSoup, metadata: PageMetadata, base_url: str) -> None: + """Extract canonical URL.""" + canonical = soup.find("link", rel="canonical") + if canonical and canonical.get("href"): + metadata.canonical_url = urljoin(base_url, canonical["href"]) + + def _extract_robots_meta(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract robots meta tag.""" + robots = soup.find("meta", attrs={"name": re.compile(r"^robots$", re.I)}) + if robots and robots.get("content"): + metadata.robots_meta = robots["content"] + + # Also check for googlebot-specific + googlebot = soup.find("meta", attrs={"name": re.compile(r"^googlebot$", re.I)}) + if googlebot and googlebot.get("content"): + if metadata.robots_meta: + metadata.robots_meta += f" | googlebot: {googlebot['content']}" + else: + metadata.robots_meta = f"googlebot: {googlebot['content']}" + + def _extract_hreflang(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract hreflang tags.""" + hreflang_tags = soup.find_all("link", rel="alternate", hreflang=True) + for tag in hreflang_tags: + if tag.get("href") and tag.get("hreflang"): + metadata.hreflang_tags.append({ + "lang": tag["hreflang"], + "url": tag["href"] + }) + + def _extract_headings(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract all headings.""" + for level in range(1, 7): + for heading in soup.find_all(f"h{level}"): + text = heading.get_text(strip=True) + if text: + metadata.headings.append(HeadingData(level=level, text=text)) + + # Count H1s specifically + h1_tags = soup.find_all("h1") + metadata.h1_count = len(h1_tags) + if h1_tags: + metadata.h1_text = h1_tags[0].get_text(strip=True) + + def _extract_open_graph(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract Open Graph and Twitter Card data.""" + og = metadata.open_graph + + # Open Graph tags + og_mappings = { + "og:title": "og_title", + "og:description": "og_description", + "og:image": "og_image", + "og:url": "og_url", + "og:type": "og_type", + "og:site_name": "og_site_name", + "og:locale": "og_locale", + } + + for og_prop, attr_name in og_mappings.items(): + tag = soup.find("meta", property=og_prop) + if tag and tag.get("content"): + setattr(og, attr_name, tag["content"]) + + # Twitter Card tags + twitter_mappings = { + "twitter:card": "twitter_card", + "twitter:title": "twitter_title", + "twitter:description": "twitter_description", + "twitter:image": "twitter_image", + } + + for tw_name, attr_name in twitter_mappings.items(): + tag = soup.find("meta", attrs={"name": tw_name}) + if tag and tag.get("content"): + setattr(og, attr_name, tag["content"]) + + def _extract_schema(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract schema.org structured data.""" + # JSON-LD + for script in soup.find_all("script", type="application/ld+json"): + try: + data = json.loads(script.string) + if isinstance(data, list): + for item in data: + self._process_schema_item(item, metadata, "json-ld") + else: + self._process_schema_item(data, metadata, "json-ld") + except (json.JSONDecodeError, TypeError): + continue + + # Microdata (basic detection) + for item in soup.find_all(itemscope=True): + itemtype = item.get("itemtype", "") + if itemtype: + schema_type = itemtype.split("/")[-1] + if schema_type not in metadata.schema_types_found: + metadata.schema_types_found.append(schema_type) + metadata.schema_data.append(SchemaData( + schema_type=schema_type, + properties={}, + format="microdata" + )) + + def _process_schema_item(self, data: dict, metadata: PageMetadata, format_type: str) -> None: + """Process a single schema.org item.""" + if not isinstance(data, dict): + return + + schema_type = data.get("@type", "Unknown") + if isinstance(schema_type, list): + schema_type = schema_type[0] if schema_type else "Unknown" + + if schema_type not in metadata.schema_types_found: + metadata.schema_types_found.append(schema_type) + + metadata.schema_data.append(SchemaData( + schema_type=schema_type, + properties=data, + format=format_type + )) + + # Process nested @graph items + if "@graph" in data: + for item in data["@graph"]: + self._process_schema_item(item, metadata, format_type) + + def _extract_links(self, soup: BeautifulSoup, metadata: PageMetadata, base_url: str) -> None: + """Extract internal and external links.""" + parsed_base = urlparse(base_url) + base_domain = parsed_base.netloc.lower() + + for a_tag in soup.find_all("a", href=True): + href = a_tag["href"] + + # Skip non-http links + if href.startswith(("#", "javascript:", "mailto:", "tel:")): + continue + + # Resolve relative URLs + full_url = urljoin(base_url, href) + parsed_url = urlparse(full_url) + + # Get anchor text + anchor_text = a_tag.get_text(strip=True)[:100] # Limit length + + # Check if nofollow + rel = a_tag.get("rel", []) + if isinstance(rel, str): + rel = rel.split() + is_nofollow = "nofollow" in rel + + # Determine if internal or external + link_domain = parsed_url.netloc.lower() + is_internal = ( + link_domain == base_domain or + link_domain.endswith(f".{base_domain}") or + base_domain.endswith(f".{link_domain}") + ) + + link_data = LinkData( + url=full_url, + anchor_text=anchor_text, + is_internal=is_internal, + is_nofollow=is_nofollow, + ) + + if is_internal: + metadata.internal_links.append(link_data) + else: + metadata.external_links.append(link_data) + + metadata.internal_link_count = len(metadata.internal_links) + metadata.external_link_count = len(metadata.external_links) + + def _extract_images(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract image information.""" + images = soup.find_all("img") + metadata.images_total = len(images) + + for img in images: + alt = img.get("alt", "").strip() + if alt: + metadata.images_with_alt += 1 + else: + metadata.images_without_alt += 1 + + def _extract_content_metrics(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract content metrics like word count.""" + # Remove script and style elements + for element in soup(["script", "style", "noscript"]): + element.decompose() + + # Get text content + text = soup.get_text(separator=" ", strip=True) + words = text.split() + metadata.word_count = len(words) + + def _run_seo_checks(self, metadata: PageMetadata) -> None: + """Run SEO checks and add issues/warnings.""" + # Title checks + if not metadata.title: + metadata.issues.append("Missing title tag") + elif metadata.title_length < 30: + metadata.warnings.append(f"Title too short ({metadata.title_length} chars, recommend 50-60)") + elif metadata.title_length > 60: + metadata.warnings.append(f"Title too long ({metadata.title_length} chars, recommend 50-60)") + + # Meta description checks + if not metadata.meta_description: + metadata.issues.append("Missing meta description") + elif metadata.meta_description_length < 120: + metadata.warnings.append(f"Meta description too short ({metadata.meta_description_length} chars)") + elif metadata.meta_description_length > 160: + metadata.warnings.append(f"Meta description too long ({metadata.meta_description_length} chars)") + + # Canonical check + if not metadata.canonical_url: + metadata.warnings.append("Missing canonical tag") + elif metadata.canonical_url != metadata.url: + metadata.warnings.append(f"Canonical points to different URL: {metadata.canonical_url}") + + # H1 checks + if metadata.h1_count == 0: + metadata.issues.append("Missing H1 tag") + elif metadata.h1_count > 1: + metadata.warnings.append(f"Multiple H1 tags ({metadata.h1_count})") + + # Image alt check + if metadata.images_without_alt > 0: + metadata.warnings.append(f"{metadata.images_without_alt} images missing alt text") + + # Schema check + if not metadata.schema_types_found: + metadata.warnings.append("No structured data found") + + # Open Graph check + if not metadata.open_graph.og_title: + metadata.warnings.append("Missing Open Graph tags") + + # Robots meta check + if metadata.robots_meta: + robots_lower = metadata.robots_meta.lower() + if "noindex" in robots_lower: + metadata.issues.append("Page is set to noindex") + if "nofollow" in robots_lower: + metadata.warnings.append("Page is set to nofollow") + + +def main(): + """CLI entry point for testing.""" + import argparse + + parser = argparse.ArgumentParser(description="Page SEO Analyzer") + parser.add_argument("url", help="URL to analyze") + parser.add_argument("--json", "-j", action="store_true", help="Output as JSON") + + args = parser.parse_args() + + analyzer = PageAnalyzer() + metadata = analyzer.analyze_url(args.url) + + if args.json: + print(json.dumps(metadata.to_dict(), indent=2, ensure_ascii=False)) + else: + print("=" * 60) + print("PAGE ANALYSIS REPORT") + print("=" * 60) + print(metadata.get_summary()) + print() + + if metadata.issues: + print("ISSUES:") + for issue in metadata.issues: + print(f" ✗ {issue}") + + if metadata.warnings: + print("\nWARNINGS:") + for warning in metadata.warnings: + print(f" ⚠ {warning}") + + if metadata.hreflang_tags: + print(f"\nHREFLANG TAGS ({len(metadata.hreflang_tags)}):") + for tag in metadata.hreflang_tags[:5]: + print(f" {tag['lang']}: {tag['url']}") + + if metadata.schema_types_found: + print(f"\nSCHEMA TYPES:") + for schema_type in metadata.schema_types_found: + print(f" - {schema_type}") + + +if __name__ == "__main__": + main() diff --git a/ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/requirements.txt b/ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/requirements.txt new file mode 100644 index 0000000..d2eee88 --- /dev/null +++ b/ourdigital-custom-skills/11-seo-on-page-audit/code/scripts/requirements.txt @@ -0,0 +1,6 @@ +# 11-seo-on-page-audit dependencies +lxml>=5.1.0 +beautifulsoup4>=4.12.0 +requests>=2.31.0 +python-dotenv>=1.0.0 +rich>=13.7.0 diff --git a/ourdigital-custom-skills/11-seo-on-page-audit/desktop/SKILL.md b/ourdigital-custom-skills/11-seo-on-page-audit/desktop/SKILL.md new file mode 100644 index 0000000..f797522 --- /dev/null +++ b/ourdigital-custom-skills/11-seo-on-page-audit/desktop/SKILL.md @@ -0,0 +1,94 @@ +--- +name: seo-on-page-audit +version: 1.0.0 +description: On-page SEO analyzer for meta tags, headings, links, images, and Open Graph. Triggers: on-page SEO, meta tags, title tag, heading structure, alt text. +allowed-tools: mcp__firecrawl__*, mcp__perplexity__*, mcp__notion__* +--- + +# SEO On-Page Audit + +## Purpose + +Analyze single-page SEO elements: meta tags, heading hierarchy, internal/external links, images, and social sharing tags. + +## Core Capabilities + +1. **Meta Tags** - Title, description, canonical, robots +2. **Headings** - H1-H6 structure and hierarchy +3. **Links** - Internal, external, broken detection +4. **Images** - Alt text, sizing, lazy loading +5. **Social** - Open Graph, Twitter Cards + +## MCP Tool Usage + +``` +mcp__firecrawl__scrape: Extract page HTML and metadata +mcp__perplexity__search: Research SEO best practices +mcp__notion__create-page: Save audit findings +``` + +## Workflow + +1. Scrape target URL with Firecrawl +2. Extract and analyze meta tags +3. Map heading hierarchy +4. Count and categorize links +5. Check image optimization +6. Validate Open Graph tags +7. Generate recommendations + +## Checklist + +### Meta Tags +- [ ] Title present (50-60 characters) +- [ ] Meta description present (150-160 characters) +- [ ] Canonical URL set +- [ ] Robots meta allows indexing + +### Headings +- [ ] Single H1 tag +- [ ] Logical hierarchy (no skips) +- [ ] Keywords in H1 + +### Links +- [ ] No broken internal links +- [ ] External links use rel attributes +- [ ] Reasonable internal link count + +### Images +- [ ] All images have alt text +- [ ] Images are appropriately sized +- [ ] Lazy loading implemented + +### Open Graph +- [ ] og:title present +- [ ] og:description present +- [ ] og:image present (1200x630) + +## Output Format + +```markdown +## On-Page Audit: [URL] + +### Meta Tags: X/5 +| Element | Status | Value | +|---------|--------|-------| + +### Headings: X/5 +- H1: [text] +- Hierarchy: Valid/Invalid + +### Links +- Internal: X +- External: X +- Broken: X + +### Recommendations +1. [Priority fixes] +``` + +## Limitations + +- Single page analysis only +- Cannot detect JavaScript-rendered content issues +- External link status requires additional crawl diff --git a/ourdigital-custom-skills/12-seo-local-audit/code/CLAUDE.md b/ourdigital-custom-skills/12-seo-local-audit/code/CLAUDE.md new file mode 100644 index 0000000..64fd55f --- /dev/null +++ b/ourdigital-custom-skills/12-seo-local-audit/code/CLAUDE.md @@ -0,0 +1,107 @@ +# CLAUDE.md + +## Overview + +Local SEO auditor for businesses with physical locations: NAP consistency, Google Business Profile optimization, local citations, and LocalBusiness schema validation. + +## Quick Start + +This skill primarily uses MCP tools (Firecrawl, Perplexity) for data collection. Scripts are helpers for validation. + +```bash +# NAP consistency check (manual data input) +python scripts/nap_checker.py --business "Business Name" --address "123 Main St" --phone "555-1234" + +# LocalBusiness schema validation +python scripts/local_schema_validator.py --url https://example.com +``` + +## Audit Components + +### 1. NAP Consistency +**Name, Address, Phone** consistency across: +- Website (header, footer, contact page) +- Google Business Profile +- Local directories (Yelp, Yellow Pages, etc.) +- Social media profiles + +### 2. Google Business Profile (GBP) +Optimization checklist: +- [ ] Business name matches website +- [ ] Address is complete and accurate +- [ ] Phone number is local +- [ ] Business hours are current +- [ ] Categories are appropriate +- [ ] Photos uploaded (exterior, interior, products) +- [ ] Posts are recent (within 7 days) +- [ ] Reviews are responded to + +### 3. Local Citations +Priority directories to check: +- Google Business Profile +- Apple Maps +- Bing Places +- Yelp +- Facebook Business +- Industry-specific directories + +### 4. LocalBusiness Schema +Required properties: +- @type (LocalBusiness or subtype) +- name +- address (PostalAddress) +- telephone +- openingHours + +## Workflow + +``` +1. Collect NAP from client +2. Scrape website for NAP mentions +3. Search citations using Perplexity +4. Check GBP data (manual or API) +5. Validate LocalBusiness schema +6. Generate consistency report +``` + +## Output Format + +```markdown +## Local SEO Audit: [Business Name] + +### NAP Consistency Score: X/10 + +| Source | Name | Address | Phone | Status | +|--------|------|---------|-------|--------| +| Website | ✓ | ✓ | ✓ | Match | +| GBP | ✓ | ✗ | ✓ | Mismatch | + +### GBP Optimization: X/10 +- [ ] Issue 1 +- [x] Completed item + +### Citation Audit +- Found: X citations +- Consistent: X +- Needs update: X + +### Recommendations +1. Fix address mismatch on GBP +2. Add LocalBusiness schema +``` + +## Common Issues + +| Issue | Impact | Fix | +|-------|--------|-----| +| NAP inconsistency | High | Update all directories | +| Missing GBP categories | Medium | Add relevant categories | +| No LocalBusiness schema | Medium | Add JSON-LD markup | +| Outdated business hours | Medium | Update GBP hours | +| No review responses | Low | Respond to all reviews | + +## Notes + +- GBP API requires enterprise approval (use manual audit) +- Citation discovery limited to public data +- Use schema generator skill (14) for creating LocalBusiness markup diff --git a/ourdigital-custom-skills/12-seo-local-audit/desktop/SKILL.md b/ourdigital-custom-skills/12-seo-local-audit/desktop/SKILL.md new file mode 100644 index 0000000..ab9da57 --- /dev/null +++ b/ourdigital-custom-skills/12-seo-local-audit/desktop/SKILL.md @@ -0,0 +1,116 @@ +--- +name: seo-local-audit +version: 1.0.0 +description: Local SEO auditor for NAP consistency, Google Business Profile, citations, and LocalBusiness schema. Triggers: local SEO, Google Business Profile, GBP, NAP, citations, local rankings. +allowed-tools: mcp__firecrawl__*, mcp__perplexity__*, mcp__notion__* +--- + +# SEO Local Audit + +## Purpose + +Audit local business SEO: NAP (Name, Address, Phone) consistency, Google Business Profile optimization, local citations, and LocalBusiness schema markup. + +## Core Capabilities + +1. **NAP Consistency** - Cross-platform verification +2. **GBP Optimization** - Profile completeness check +3. **Citation Audit** - Directory presence +4. **Schema Validation** - LocalBusiness markup + +## MCP Tool Usage + +``` +mcp__firecrawl__scrape: Extract NAP from website +mcp__perplexity__search: Find citations and directories +mcp__notion__create-page: Save audit findings +``` + +## Workflow + +### 1. Gather Business Info +Collect from client: +- Business name (exact) +- Full address +- Phone number (local preferred) +- Website URL +- GBP listing URL + +### 2. Website NAP Check +Scrape website for NAP mentions: +- Header/footer +- Contact page +- About page +- Schema markup + +### 3. Citation Discovery +Search for business mentions: +- "[Business Name] [City]" +- Phone number search +- Address search + +### 4. GBP Review +Manual checklist: +- Profile completeness +- Category accuracy +- Photo presence +- Review responses +- Post recency + +### 5. Schema Check +Validate LocalBusiness markup presence and accuracy. + +## GBP Optimization Checklist + +- [ ] Business name matches website +- [ ] Complete address with suite/unit +- [ ] Local phone number (not toll-free) +- [ ] Accurate business hours +- [ ] Primary + secondary categories set +- [ ] Business description complete +- [ ] 10+ photos uploaded +- [ ] Recent post (within 7 days) +- [ ] Reviews responded to + +## Citation Priority + +| Platform | Priority | +|----------|----------| +| Google Business Profile | Critical | +| Apple Maps | High | +| Bing Places | High | +| Yelp | High | +| Facebook | Medium | +| Industry directories | Medium | + +## Output Format + +```markdown +## Local SEO Audit: [Business] + +### NAP Consistency: X/10 +| Source | Name | Address | Phone | +|--------|------|---------|-------| +| Website | ✓/✗ | ✓/✗ | ✓/✗ | +| GBP | ✓/✗ | ✓/✗ | ✓/✗ | + +### GBP Score: X/10 +[Checklist results] + +### Citations Found: X +- Consistent: X +- Inconsistent: X + +### LocalBusiness Schema +- Present: Yes/No +- Valid: Yes/No + +### Priority Actions +1. [Fix recommendations] +``` + +## Limitations + +- GBP data requires manual access +- Citation discovery limited to searchable sources +- Cannot update external directories diff --git a/ourdigital-custom-skills/13-seo-schema-validator/code/CLAUDE.md b/ourdigital-custom-skills/13-seo-schema-validator/code/CLAUDE.md new file mode 100644 index 0000000..8677579 --- /dev/null +++ b/ourdigital-custom-skills/13-seo-schema-validator/code/CLAUDE.md @@ -0,0 +1,113 @@ +# CLAUDE.md + +## Overview + +Structured data validator: extract, parse, and validate JSON-LD, Microdata, and RDFa markup against schema.org vocabulary. + +## Quick Start + +```bash +pip install -r scripts/requirements.txt +python scripts/schema_validator.py --url https://example.com +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `schema_validator.py` | Extract and validate structured data | +| `base_client.py` | Shared utilities | + +## Usage + +```bash +# Validate page schema +python scripts/schema_validator.py --url https://example.com + +# JSON output +python scripts/schema_validator.py --url https://example.com --json + +# Validate local file +python scripts/schema_validator.py --file schema.json + +# Check Rich Results eligibility +python scripts/schema_validator.py --url https://example.com --rich-results +``` + +## Supported Formats + +| Format | Detection | +|--------|-----------| +| JSON-LD | ` +``` + +## Template Customization + +Templates in `templates/` can be modified. Required fields are marked: + +```json +{ + "@context": "https://schema.org", + "@type": "Article", + "headline": "{{REQUIRED}}", + "author": { + "@type": "Person", + "name": "{{REQUIRED}}" + }, + "datePublished": "{{REQUIRED}}", + "image": "{{RECOMMENDED}}" +} +``` + +## Validation + +Generated schemas are validated before output: +- Syntax correctness +- Required properties present +- Schema.org vocabulary compliance + +Use skill 13 (schema-validator) for additional validation. + +## Dependencies + +``` +jsonschema>=4.21.0 +requests>=2.31.0 +python-dotenv>=1.0.0 +``` diff --git a/ourdigital-custom-skills/14-seo-schema-generator/code/scripts/base_client.py b/ourdigital-custom-skills/14-seo-schema-generator/code/scripts/base_client.py new file mode 100644 index 0000000..ac5715b --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/code/scripts/base_client.py @@ -0,0 +1,207 @@ +""" +Base Client - Shared async client utilities +=========================================== +Purpose: Rate-limited async operations for API clients +Python: 3.10+ +""" + +import asyncio +import logging +import os +from asyncio import Semaphore +from datetime import datetime +from typing import Any, Callable, TypeVar + +from dotenv import load_dotenv +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, +) + +# Load environment variables +load_dotenv() + +# Logging setup +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +T = TypeVar("T") + + +class RateLimiter: + """Rate limiter using token bucket algorithm.""" + + def __init__(self, rate: float, per: float = 1.0): + """ + Initialize rate limiter. + + Args: + rate: Number of requests allowed + per: Time period in seconds (default: 1 second) + """ + self.rate = rate + self.per = per + self.tokens = rate + self.last_update = datetime.now() + self._lock = asyncio.Lock() + + async def acquire(self) -> None: + """Acquire a token, waiting if necessary.""" + async with self._lock: + now = datetime.now() + elapsed = (now - self.last_update).total_seconds() + self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per)) + self.last_update = now + + if self.tokens < 1: + wait_time = (1 - self.tokens) * (self.per / self.rate) + await asyncio.sleep(wait_time) + self.tokens = 0 + else: + self.tokens -= 1 + + +class BaseAsyncClient: + """Base class for async API clients with rate limiting.""" + + def __init__( + self, + max_concurrent: int = 5, + requests_per_second: float = 3.0, + logger: logging.Logger | None = None, + ): + """ + Initialize base client. + + Args: + max_concurrent: Maximum concurrent requests + requests_per_second: Rate limit + logger: Logger instance + """ + self.semaphore = Semaphore(max_concurrent) + self.rate_limiter = RateLimiter(requests_per_second) + self.logger = logger or logging.getLogger(self.__class__.__name__) + self.stats = { + "requests": 0, + "success": 0, + "errors": 0, + "retries": 0, + } + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=10), + retry=retry_if_exception_type(Exception), + ) + async def _rate_limited_request( + self, + coro: Callable[[], Any], + ) -> Any: + """Execute a request with rate limiting and retry.""" + async with self.semaphore: + await self.rate_limiter.acquire() + self.stats["requests"] += 1 + try: + result = await coro() + self.stats["success"] += 1 + return result + except Exception as e: + self.stats["errors"] += 1 + self.logger.error(f"Request failed: {e}") + raise + + async def batch_requests( + self, + requests: list[Callable[[], Any]], + desc: str = "Processing", + ) -> list[Any]: + """Execute multiple requests concurrently.""" + try: + from tqdm.asyncio import tqdm + has_tqdm = True + except ImportError: + has_tqdm = False + + async def execute(req: Callable) -> Any: + try: + return await self._rate_limited_request(req) + except Exception as e: + return {"error": str(e)} + + tasks = [execute(req) for req in requests] + + if has_tqdm: + results = [] + for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc): + result = await coro + results.append(result) + return results + else: + return await asyncio.gather(*tasks, return_exceptions=True) + + def print_stats(self) -> None: + """Print request statistics.""" + self.logger.info("=" * 40) + self.logger.info("Request Statistics:") + self.logger.info(f" Total Requests: {self.stats['requests']}") + self.logger.info(f" Successful: {self.stats['success']}") + self.logger.info(f" Errors: {self.stats['errors']}") + self.logger.info("=" * 40) + + +class ConfigManager: + """Manage API configuration and credentials.""" + + def __init__(self): + load_dotenv() + + @property + def google_credentials_path(self) -> str | None: + """Get Google service account credentials path.""" + # Prefer SEO-specific credentials, fallback to general credentials + seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json") + if os.path.exists(seo_creds): + return seo_creds + return os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + + @property + def pagespeed_api_key(self) -> str | None: + """Get PageSpeed Insights API key.""" + return os.getenv("PAGESPEED_API_KEY") + + @property + def custom_search_api_key(self) -> str | None: + """Get Custom Search API key.""" + return os.getenv("CUSTOM_SEARCH_API_KEY") + + @property + def custom_search_engine_id(self) -> str | None: + """Get Custom Search Engine ID.""" + return os.getenv("CUSTOM_SEARCH_ENGINE_ID") + + @property + def notion_token(self) -> str | None: + """Get Notion API token.""" + return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY") + + def validate_google_credentials(self) -> bool: + """Validate Google credentials are configured.""" + creds_path = self.google_credentials_path + if not creds_path: + return False + return os.path.exists(creds_path) + + def get_required(self, key: str) -> str: + """Get required environment variable or raise error.""" + value = os.getenv(key) + if not value: + raise ValueError(f"Missing required environment variable: {key}") + return value + + +# Singleton config instance +config = ConfigManager() diff --git a/ourdigital-custom-skills/14-seo-schema-generator/code/scripts/requirements.txt b/ourdigital-custom-skills/14-seo-schema-generator/code/scripts/requirements.txt new file mode 100644 index 0000000..48c7180 --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/code/scripts/requirements.txt @@ -0,0 +1,6 @@ +# 14-seo-schema-generator dependencies +jsonschema>=4.21.0 +requests>=2.31.0 +python-dotenv>=1.0.0 +rich>=13.7.0 +typer>=0.9.0 diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/schema_generator.py b/ourdigital-custom-skills/14-seo-schema-generator/code/scripts/schema_generator.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/schema_generator.py rename to ourdigital-custom-skills/14-seo-schema-generator/code/scripts/schema_generator.py diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/article.json b/ourdigital-custom-skills/14-seo-schema-generator/code/templates/article.json similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/article.json rename to ourdigital-custom-skills/14-seo-schema-generator/code/templates/article.json diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/breadcrumb.json b/ourdigital-custom-skills/14-seo-schema-generator/code/templates/breadcrumb.json similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/breadcrumb.json rename to ourdigital-custom-skills/14-seo-schema-generator/code/templates/breadcrumb.json diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/faq.json b/ourdigital-custom-skills/14-seo-schema-generator/code/templates/faq.json similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/faq.json rename to ourdigital-custom-skills/14-seo-schema-generator/code/templates/faq.json diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/local_business.json b/ourdigital-custom-skills/14-seo-schema-generator/code/templates/local_business.json similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/local_business.json rename to ourdigital-custom-skills/14-seo-schema-generator/code/templates/local_business.json diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/organization.json b/ourdigital-custom-skills/14-seo-schema-generator/code/templates/organization.json similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/organization.json rename to ourdigital-custom-skills/14-seo-schema-generator/code/templates/organization.json diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/product.json b/ourdigital-custom-skills/14-seo-schema-generator/code/templates/product.json similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/product.json rename to ourdigital-custom-skills/14-seo-schema-generator/code/templates/product.json diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/website.json b/ourdigital-custom-skills/14-seo-schema-generator/code/templates/website.json similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/templates/schema_templates/website.json rename to ourdigital-custom-skills/14-seo-schema-generator/code/templates/website.json diff --git a/ourdigital-custom-skills/14-seo-schema-generator/desktop/SKILL.md b/ourdigital-custom-skills/14-seo-schema-generator/desktop/SKILL.md new file mode 100644 index 0000000..cacbadd --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/desktop/SKILL.md @@ -0,0 +1,146 @@ +--- +name: seo-schema-generator +version: 1.0.0 +description: Schema markup generator for JSON-LD structured data. Triggers: generate schema, create JSON-LD, add structured data, schema markup. +allowed-tools: mcp__firecrawl__*, mcp__perplexity__* +--- + +# SEO Schema Generator + +## Purpose + +Generate JSON-LD structured data markup for various content types using templates. + +## Core Capabilities + +1. **Organization** - Company/brand information +2. **LocalBusiness** - Physical location businesses +3. **Article** - Blog posts and news articles +4. **Product** - E-commerce products +5. **FAQPage** - FAQ sections +6. **BreadcrumbList** - Navigation breadcrumbs +7. **WebSite** - Site-level with search action + +## Workflow + +1. Identify content type +2. Gather required information +3. Generate JSON-LD from template +4. Validate output +5. Provide implementation instructions + +## Schema Templates + +### Organization +```json +{ + "@context": "https://schema.org", + "@type": "Organization", + "name": "[Company Name]", + "url": "[Website URL]", + "logo": "[Logo URL]", + "sameAs": [ + "[Social Media URLs]" + ] +} +``` + +### LocalBusiness +```json +{ + "@context": "https://schema.org", + "@type": "LocalBusiness", + "name": "[Business Name]", + "address": { + "@type": "PostalAddress", + "streetAddress": "[Street]", + "addressLocality": "[City]", + "addressRegion": "[State]", + "postalCode": "[ZIP]", + "addressCountry": "[Country]" + }, + "telephone": "[Phone]", + "openingHours": ["Mo-Fr 09:00-17:00"] +} +``` + +### Article +```json +{ + "@context": "https://schema.org", + "@type": "Article", + "headline": "[Title]", + "author": { + "@type": "Person", + "name": "[Author Name]" + }, + "datePublished": "[YYYY-MM-DD]", + "dateModified": "[YYYY-MM-DD]", + "image": "[Image URL]", + "publisher": { + "@type": "Organization", + "name": "[Publisher]", + "logo": "[Logo URL]" + } +} +``` + +### FAQPage +```json +{ + "@context": "https://schema.org", + "@type": "FAQPage", + "mainEntity": [ + { + "@type": "Question", + "name": "[Question]", + "acceptedAnswer": { + "@type": "Answer", + "text": "[Answer]" + } + } + ] +} +``` + +### Product +```json +{ + "@context": "https://schema.org", + "@type": "Product", + "name": "[Product Name]", + "image": "[Image URL]", + "description": "[Description]", + "offers": { + "@type": "Offer", + "price": "[Price]", + "priceCurrency": "[Currency]", + "availability": "https://schema.org/InStock" + } +} +``` + +## Implementation + +Place generated JSON-LD in `` section: + +```html + + + +``` + +## Validation + +After generating: +1. Use schema validator skill (13) to verify +2. Test with Google Rich Results Test +3. Monitor in Search Console + +## Limitations + +- Templates cover common types only +- Complex nested schemas may need manual adjustment +- Some Rich Results require additional properties diff --git a/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/article.json b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/article.json new file mode 100644 index 0000000..8d0daab --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/article.json @@ -0,0 +1,32 @@ +{ + "@context": "https://schema.org", + "@type": "{{article_type}}", + "headline": "{{headline}}", + "description": "{{description}}", + "image": [ + "{{image_url_1}}", + "{{image_url_2}}" + ], + "datePublished": "{{date_published}}", + "dateModified": "{{date_modified}}", + "author": { + "@type": "Person", + "name": "{{author_name}}", + "url": "{{author_url}}" + }, + "publisher": { + "@type": "Organization", + "name": "{{publisher_name}}", + "logo": { + "@type": "ImageObject", + "url": "{{publisher_logo_url}}" + } + }, + "mainEntityOfPage": { + "@type": "WebPage", + "@id": "{{page_url}}" + }, + "articleSection": "{{section}}", + "wordCount": "{{word_count}}", + "keywords": "{{keywords}}" +} diff --git a/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/breadcrumb.json b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/breadcrumb.json new file mode 100644 index 0000000..1e8f9e8 --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/breadcrumb.json @@ -0,0 +1,24 @@ +{ + "@context": "https://schema.org", + "@type": "BreadcrumbList", + "itemListElement": [ + { + "@type": "ListItem", + "position": 1, + "name": "{{level_1_name}}", + "item": "{{level_1_url}}" + }, + { + "@type": "ListItem", + "position": 2, + "name": "{{level_2_name}}", + "item": "{{level_2_url}}" + }, + { + "@type": "ListItem", + "position": 3, + "name": "{{level_3_name}}", + "item": "{{level_3_url}}" + } + ] +} diff --git a/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/faq.json b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/faq.json new file mode 100644 index 0000000..f90b98c --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/faq.json @@ -0,0 +1,30 @@ +{ + "@context": "https://schema.org", + "@type": "FAQPage", + "mainEntity": [ + { + "@type": "Question", + "name": "{{question_1}}", + "acceptedAnswer": { + "@type": "Answer", + "text": "{{answer_1}}" + } + }, + { + "@type": "Question", + "name": "{{question_2}}", + "acceptedAnswer": { + "@type": "Answer", + "text": "{{answer_2}}" + } + }, + { + "@type": "Question", + "name": "{{question_3}}", + "acceptedAnswer": { + "@type": "Answer", + "text": "{{answer_3}}" + } + } + ] +} diff --git a/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/local_business.json b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/local_business.json new file mode 100644 index 0000000..8e6dba4 --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/local_business.json @@ -0,0 +1,47 @@ +{ + "@context": "https://schema.org", + "@type": "{{business_type}}", + "name": "{{name}}", + "description": "{{description}}", + "url": "{{url}}", + "telephone": "{{phone}}", + "email": "{{email}}", + "image": "{{image_url}}", + "priceRange": "{{price_range}}", + "address": { + "@type": "PostalAddress", + "streetAddress": "{{street_address}}", + "addressLocality": "{{city}}", + "addressRegion": "{{region}}", + "postalCode": "{{postal_code}}", + "addressCountry": "{{country}}" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": "{{latitude}}", + "longitude": "{{longitude}}" + }, + "openingHoursSpecification": [ + { + "@type": "OpeningHoursSpecification", + "dayOfWeek": ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"], + "opens": "{{weekday_opens}}", + "closes": "{{weekday_closes}}" + }, + { + "@type": "OpeningHoursSpecification", + "dayOfWeek": ["Saturday", "Sunday"], + "opens": "{{weekend_opens}}", + "closes": "{{weekend_closes}}" + } + ], + "aggregateRating": { + "@type": "AggregateRating", + "ratingValue": "{{rating}}", + "reviewCount": "{{review_count}}" + }, + "sameAs": [ + "{{facebook_url}}", + "{{instagram_url}}" + ] +} diff --git a/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/organization.json b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/organization.json new file mode 100644 index 0000000..76c4734 --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/organization.json @@ -0,0 +1,37 @@ +{ + "@context": "https://schema.org", + "@type": "Organization", + "name": "{{name}}", + "url": "{{url}}", + "logo": "{{logo_url}}", + "description": "{{description}}", + "foundingDate": "{{founding_date}}", + "founders": [ + { + "@type": "Person", + "name": "{{founder_name}}" + } + ], + "address": { + "@type": "PostalAddress", + "streetAddress": "{{street_address}}", + "addressLocality": "{{city}}", + "addressRegion": "{{region}}", + "postalCode": "{{postal_code}}", + "addressCountry": "{{country}}" + }, + "contactPoint": [ + { + "@type": "ContactPoint", + "telephone": "{{phone}}", + "contactType": "customer service", + "availableLanguage": ["Korean", "English"] + } + ], + "sameAs": [ + "{{facebook_url}}", + "{{twitter_url}}", + "{{linkedin_url}}", + "{{instagram_url}}" + ] +} diff --git a/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/product.json b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/product.json new file mode 100644 index 0000000..d24af66 --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/product.json @@ -0,0 +1,76 @@ +{ + "@context": "https://schema.org", + "@type": "Product", + "name": "{{name}}", + "description": "{{description}}", + "image": [ + "{{image_url_1}}", + "{{image_url_2}}", + "{{image_url_3}}" + ], + "sku": "{{sku}}", + "mpn": "{{mpn}}", + "gtin13": "{{gtin13}}", + "brand": { + "@type": "Brand", + "name": "{{brand_name}}" + }, + "offers": { + "@type": "Offer", + "url": "{{product_url}}", + "price": "{{price}}", + "priceCurrency": "{{currency}}", + "priceValidUntil": "{{price_valid_until}}", + "availability": "https://schema.org/{{availability}}", + "itemCondition": "https://schema.org/{{condition}}", + "seller": { + "@type": "Organization", + "name": "{{seller_name}}" + }, + "shippingDetails": { + "@type": "OfferShippingDetails", + "shippingRate": { + "@type": "MonetaryAmount", + "value": "{{shipping_cost}}", + "currency": "{{currency}}" + }, + "deliveryTime": { + "@type": "ShippingDeliveryTime", + "handlingTime": { + "@type": "QuantitativeValue", + "minValue": "{{handling_min_days}}", + "maxValue": "{{handling_max_days}}", + "unitCode": "DAY" + }, + "transitTime": { + "@type": "QuantitativeValue", + "minValue": "{{transit_min_days}}", + "maxValue": "{{transit_max_days}}", + "unitCode": "DAY" + } + } + } + }, + "aggregateRating": { + "@type": "AggregateRating", + "ratingValue": "{{rating}}", + "reviewCount": "{{review_count}}", + "bestRating": "5", + "worstRating": "1" + }, + "review": [ + { + "@type": "Review", + "reviewRating": { + "@type": "Rating", + "ratingValue": "{{review_rating}}", + "bestRating": "5" + }, + "author": { + "@type": "Person", + "name": "{{reviewer_name}}" + }, + "reviewBody": "{{review_text}}" + } + ] +} diff --git a/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/website.json b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/website.json new file mode 100644 index 0000000..65ec61a --- /dev/null +++ b/ourdigital-custom-skills/14-seo-schema-generator/desktop/templates/website.json @@ -0,0 +1,25 @@ +{ + "@context": "https://schema.org", + "@type": "WebSite", + "name": "{{site_name}}", + "alternateName": "{{alternate_name}}", + "url": "{{url}}", + "description": "{{description}}", + "inLanguage": "{{language}}", + "potentialAction": { + "@type": "SearchAction", + "target": { + "@type": "EntryPoint", + "urlTemplate": "{{search_url_template}}" + }, + "query-input": "required name=search_term_string" + }, + "publisher": { + "@type": "Organization", + "name": "{{publisher_name}}", + "logo": { + "@type": "ImageObject", + "url": "{{logo_url}}" + } + } +} diff --git a/ourdigital-custom-skills/15-seo-core-web-vitals/code/CLAUDE.md b/ourdigital-custom-skills/15-seo-core-web-vitals/code/CLAUDE.md new file mode 100644 index 0000000..da78a91 --- /dev/null +++ b/ourdigital-custom-skills/15-seo-core-web-vitals/code/CLAUDE.md @@ -0,0 +1,117 @@ +# CLAUDE.md + +## Overview + +Core Web Vitals analyzer using Google PageSpeed Insights API: LCP, FID, CLS, INP, TTFB, FCP measurement and recommendations. + +## Quick Start + +```bash +pip install -r scripts/requirements.txt + +# Requires API key +export PAGESPEED_API_KEY=your_api_key + +python scripts/pagespeed_client.py --url https://example.com +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `pagespeed_client.py` | PageSpeed Insights API client | +| `base_client.py` | Shared utilities | + +## Usage + +```bash +# Mobile analysis (default) +python scripts/pagespeed_client.py --url https://example.com + +# Desktop analysis +python scripts/pagespeed_client.py --url https://example.com --strategy desktop + +# Both strategies +python scripts/pagespeed_client.py --url https://example.com --strategy both + +# JSON output +python scripts/pagespeed_client.py --url https://example.com --json + +# Batch analysis +python scripts/pagespeed_client.py --urls urls.txt --output results.json +``` + +## Core Web Vitals Metrics + +| Metric | Good | Needs Improvement | Poor | +|--------|------|-------------------|------| +| LCP (Largest Contentful Paint) | ≤2.5s | 2.5s-4s | >4s | +| FID (First Input Delay) | ≤100ms | 100ms-300ms | >300ms | +| CLS (Cumulative Layout Shift) | ≤0.1 | 0.1-0.25 | >0.25 | +| INP (Interaction to Next Paint) | ≤200ms | 200ms-500ms | >500ms | + +## Additional Metrics + +| Metric | Description | +|--------|-------------| +| TTFB | Time to First Byte | +| FCP | First Contentful Paint | +| SI | Speed Index | +| TBT | Total Blocking Time | + +## Output + +```json +{ + "url": "https://example.com", + "strategy": "mobile", + "score": 85, + "core_web_vitals": { + "lcp": {"value": 2.1, "rating": "good"}, + "fid": {"value": 50, "rating": "good"}, + "cls": {"value": 0.05, "rating": "good"}, + "inp": {"value": 180, "rating": "good"} + }, + "opportunities": [ + { + "id": "render-blocking-resources", + "title": "Eliminate render-blocking resources", + "savings_ms": 1200 + } + ], + "diagnostics": [] +} +``` + +## Configuration + +Environment variables: +```bash +PAGESPEED_API_KEY=AIza... # Required for higher quotas +GOOGLE_API_KEY=AIza... # Alternative key name +``` + +## Rate Limits + +| Tier | Limit | +|------|-------| +| No API key | 25 queries/day | +| With API key | 25,000 queries/day | + +## Common Recommendations + +| Issue | Fix | +|-------|-----| +| Large LCP | Optimize images, preload critical resources | +| High CLS | Set image dimensions, avoid injected content | +| Poor INP | Reduce JavaScript, optimize event handlers | +| Slow TTFB | Improve server response, use CDN | + +## Dependencies + +``` +google-api-python-client>=2.100.0 +requests>=2.31.0 +python-dotenv>=1.0.0 +rich>=13.7.0 +``` diff --git a/ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/base_client.py b/ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/base_client.py new file mode 100644 index 0000000..ac5715b --- /dev/null +++ b/ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/base_client.py @@ -0,0 +1,207 @@ +""" +Base Client - Shared async client utilities +=========================================== +Purpose: Rate-limited async operations for API clients +Python: 3.10+ +""" + +import asyncio +import logging +import os +from asyncio import Semaphore +from datetime import datetime +from typing import Any, Callable, TypeVar + +from dotenv import load_dotenv +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, +) + +# Load environment variables +load_dotenv() + +# Logging setup +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +T = TypeVar("T") + + +class RateLimiter: + """Rate limiter using token bucket algorithm.""" + + def __init__(self, rate: float, per: float = 1.0): + """ + Initialize rate limiter. + + Args: + rate: Number of requests allowed + per: Time period in seconds (default: 1 second) + """ + self.rate = rate + self.per = per + self.tokens = rate + self.last_update = datetime.now() + self._lock = asyncio.Lock() + + async def acquire(self) -> None: + """Acquire a token, waiting if necessary.""" + async with self._lock: + now = datetime.now() + elapsed = (now - self.last_update).total_seconds() + self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per)) + self.last_update = now + + if self.tokens < 1: + wait_time = (1 - self.tokens) * (self.per / self.rate) + await asyncio.sleep(wait_time) + self.tokens = 0 + else: + self.tokens -= 1 + + +class BaseAsyncClient: + """Base class for async API clients with rate limiting.""" + + def __init__( + self, + max_concurrent: int = 5, + requests_per_second: float = 3.0, + logger: logging.Logger | None = None, + ): + """ + Initialize base client. + + Args: + max_concurrent: Maximum concurrent requests + requests_per_second: Rate limit + logger: Logger instance + """ + self.semaphore = Semaphore(max_concurrent) + self.rate_limiter = RateLimiter(requests_per_second) + self.logger = logger or logging.getLogger(self.__class__.__name__) + self.stats = { + "requests": 0, + "success": 0, + "errors": 0, + "retries": 0, + } + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=10), + retry=retry_if_exception_type(Exception), + ) + async def _rate_limited_request( + self, + coro: Callable[[], Any], + ) -> Any: + """Execute a request with rate limiting and retry.""" + async with self.semaphore: + await self.rate_limiter.acquire() + self.stats["requests"] += 1 + try: + result = await coro() + self.stats["success"] += 1 + return result + except Exception as e: + self.stats["errors"] += 1 + self.logger.error(f"Request failed: {e}") + raise + + async def batch_requests( + self, + requests: list[Callable[[], Any]], + desc: str = "Processing", + ) -> list[Any]: + """Execute multiple requests concurrently.""" + try: + from tqdm.asyncio import tqdm + has_tqdm = True + except ImportError: + has_tqdm = False + + async def execute(req: Callable) -> Any: + try: + return await self._rate_limited_request(req) + except Exception as e: + return {"error": str(e)} + + tasks = [execute(req) for req in requests] + + if has_tqdm: + results = [] + for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc): + result = await coro + results.append(result) + return results + else: + return await asyncio.gather(*tasks, return_exceptions=True) + + def print_stats(self) -> None: + """Print request statistics.""" + self.logger.info("=" * 40) + self.logger.info("Request Statistics:") + self.logger.info(f" Total Requests: {self.stats['requests']}") + self.logger.info(f" Successful: {self.stats['success']}") + self.logger.info(f" Errors: {self.stats['errors']}") + self.logger.info("=" * 40) + + +class ConfigManager: + """Manage API configuration and credentials.""" + + def __init__(self): + load_dotenv() + + @property + def google_credentials_path(self) -> str | None: + """Get Google service account credentials path.""" + # Prefer SEO-specific credentials, fallback to general credentials + seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json") + if os.path.exists(seo_creds): + return seo_creds + return os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + + @property + def pagespeed_api_key(self) -> str | None: + """Get PageSpeed Insights API key.""" + return os.getenv("PAGESPEED_API_KEY") + + @property + def custom_search_api_key(self) -> str | None: + """Get Custom Search API key.""" + return os.getenv("CUSTOM_SEARCH_API_KEY") + + @property + def custom_search_engine_id(self) -> str | None: + """Get Custom Search Engine ID.""" + return os.getenv("CUSTOM_SEARCH_ENGINE_ID") + + @property + def notion_token(self) -> str | None: + """Get Notion API token.""" + return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY") + + def validate_google_credentials(self) -> bool: + """Validate Google credentials are configured.""" + creds_path = self.google_credentials_path + if not creds_path: + return False + return os.path.exists(creds_path) + + def get_required(self, key: str) -> str: + """Get required environment variable or raise error.""" + value = os.getenv(key) + if not value: + raise ValueError(f"Missing required environment variable: {key}") + return value + + +# Singleton config instance +config = ConfigManager() diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/pagespeed_client.py b/ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/pagespeed_client.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/pagespeed_client.py rename to ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/pagespeed_client.py diff --git a/ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/requirements.txt b/ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/requirements.txt new file mode 100644 index 0000000..c6a38ee --- /dev/null +++ b/ourdigital-custom-skills/15-seo-core-web-vitals/code/scripts/requirements.txt @@ -0,0 +1,6 @@ +# 15-seo-core-web-vitals dependencies +google-api-python-client>=2.100.0 +requests>=2.31.0 +python-dotenv>=1.0.0 +rich>=13.7.0 +typer>=0.9.0 diff --git a/ourdigital-custom-skills/15-seo-core-web-vitals/desktop/SKILL.md b/ourdigital-custom-skills/15-seo-core-web-vitals/desktop/SKILL.md new file mode 100644 index 0000000..098906c --- /dev/null +++ b/ourdigital-custom-skills/15-seo-core-web-vitals/desktop/SKILL.md @@ -0,0 +1,108 @@ +--- +name: seo-core-web-vitals +version: 1.0.0 +description: Core Web Vitals analyzer for LCP, FID, CLS, INP performance metrics. Triggers: Core Web Vitals, page speed, LCP, CLS, FID, INP, performance. +allowed-tools: mcp__firecrawl__*, mcp__perplexity__* +--- + +# SEO Core Web Vitals + +## Purpose + +Analyze Core Web Vitals performance metrics and provide optimization recommendations. + +## Core Capabilities + +1. **LCP** - Largest Contentful Paint measurement +2. **FID/INP** - Interactivity metrics +3. **CLS** - Cumulative Layout Shift +4. **Recommendations** - Optimization guidance + +## Metrics Thresholds + +| Metric | Good | Needs Work | Poor | +|--------|------|------------|------| +| LCP | ≤2.5s | 2.5-4s | >4s | +| FID | ≤100ms | 100-300ms | >300ms | +| CLS | ≤0.1 | 0.1-0.25 | >0.25 | +| INP | ≤200ms | 200-500ms | >500ms | + +## Data Sources + +### Option 1: PageSpeed Insights (Recommended) +Use external tool and input results: +- Visit: https://pagespeed.web.dev/ +- Enter URL, run test +- Provide scores to skill + +### Option 2: Research Best Practices +``` +mcp__perplexity__search: "Core Web Vitals optimization [specific issue]" +``` + +## Workflow + +1. Request PageSpeed Insights data from user +2. Analyze provided metrics +3. Identify failing metrics +4. Research optimization strategies +5. Provide prioritized recommendations + +## Common LCP Issues + +| Cause | Fix | +|-------|-----| +| Slow server response | Improve TTFB, use CDN | +| Render-blocking resources | Defer non-critical CSS/JS | +| Slow resource load | Preload LCP image | +| Client-side rendering | Use SSR/SSG | + +## Common CLS Issues + +| Cause | Fix | +|-------|-----| +| Images without dimensions | Add width/height attributes | +| Ads/embeds without space | Reserve space with CSS | +| Web fonts causing FOIT/FOUT | Use font-display: swap | +| Dynamic content injection | Reserve space, use transforms | + +## Common INP Issues + +| Cause | Fix | +|-------|-----| +| Long JavaScript tasks | Break up tasks, use web workers | +| Large DOM size | Reduce DOM nodes | +| Heavy event handlers | Debounce, optimize listeners | +| Third-party scripts | Defer, lazy load | + +## Output Format + +```markdown +## Core Web Vitals: [URL] + +### Scores +| Metric | Mobile | Desktop | Status | +|--------|--------|---------|--------| +| LCP | Xs | Xs | Good/Poor | +| FID | Xms | Xms | Good/Poor | +| CLS | X.XX | X.XX | Good/Poor | +| INP | Xms | Xms | Good/Poor | + +### Overall Score +- Mobile: X/100 +- Desktop: X/100 + +### Priority Fixes +1. [Highest impact recommendation] +2. [Second priority] + +### Detailed Recommendations +[Per-metric optimization steps] +``` + +## Limitations + +- Requires external PageSpeed Insights data +- Lab data may differ from field data +- Some fixes require developer implementation +- Third-party scripts may be difficult to optimize diff --git a/ourdigital-custom-skills/16-seo-search-console/code/CLAUDE.md b/ourdigital-custom-skills/16-seo-search-console/code/CLAUDE.md new file mode 100644 index 0000000..c84809d --- /dev/null +++ b/ourdigital-custom-skills/16-seo-search-console/code/CLAUDE.md @@ -0,0 +1,122 @@ +# CLAUDE.md + +## Overview + +Google Search Console data retriever: search analytics (rankings, CTR, impressions), sitemap status, and index coverage. + +## Quick Start + +```bash +pip install -r scripts/requirements.txt + +# Requires service account credentials +# ~/.credential/ourdigital-seo-agent.json + +python scripts/gsc_client.py --site sc-domain:example.com --action summary +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `gsc_client.py` | Search Console API client | +| `base_client.py` | Shared utilities | + +## Configuration + +Service account setup: +```bash +# Credentials file location +~/.credential/ourdigital-seo-agent.json + +# Add service account email to GSC property as user +ourdigital-seo-agent@ourdigital-insights.iam.gserviceaccount.com +``` + +## Usage + +```bash +# Performance summary (last 28 days) +python scripts/gsc_client.py --site sc-domain:example.com --action summary + +# Query-level data +python scripts/gsc_client.py --site sc-domain:example.com --action queries --limit 100 + +# Page-level data +python scripts/gsc_client.py --site sc-domain:example.com --action pages + +# Custom date range +python scripts/gsc_client.py --site sc-domain:example.com --action queries \ + --start 2024-01-01 --end 2024-01-31 + +# Sitemap status +python scripts/gsc_client.py --site sc-domain:example.com --action sitemaps + +# JSON output +python scripts/gsc_client.py --site sc-domain:example.com --action summary --json +``` + +## Actions + +| Action | Description | +|--------|-------------| +| `summary` | Overview metrics (clicks, impressions, CTR, position) | +| `queries` | Top search queries | +| `pages` | Top pages by clicks | +| `sitemaps` | Sitemap submission status | +| `coverage` | Index coverage issues | + +## Output: Summary + +```json +{ + "site": "sc-domain:example.com", + "date_range": "2024-01-01 to 2024-01-28", + "totals": { + "clicks": 15000, + "impressions": 500000, + "ctr": 3.0, + "position": 12.5 + } +} +``` + +## Output: Queries + +```json +{ + "queries": [ + { + "query": "keyword", + "clicks": 500, + "impressions": 10000, + "ctr": 5.0, + "position": 3.2 + } + ] +} +``` + +## Rate Limits + +| Limit | Value | +|-------|-------| +| Queries per minute | 1,200 | +| Rows per request | 25,000 | + +## Site Property Formats + +| Format | Example | +|--------|---------| +| Domain property | `sc-domain:example.com` | +| URL prefix | `https://www.example.com/` | + +## Dependencies + +``` +google-api-python-client>=2.100.0 +google-auth>=2.23.0 +python-dotenv>=1.0.0 +rich>=13.7.0 +pandas>=2.1.0 +``` diff --git a/ourdigital-custom-skills/16-seo-search-console/code/scripts/base_client.py b/ourdigital-custom-skills/16-seo-search-console/code/scripts/base_client.py new file mode 100644 index 0000000..ac5715b --- /dev/null +++ b/ourdigital-custom-skills/16-seo-search-console/code/scripts/base_client.py @@ -0,0 +1,207 @@ +""" +Base Client - Shared async client utilities +=========================================== +Purpose: Rate-limited async operations for API clients +Python: 3.10+ +""" + +import asyncio +import logging +import os +from asyncio import Semaphore +from datetime import datetime +from typing import Any, Callable, TypeVar + +from dotenv import load_dotenv +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, +) + +# Load environment variables +load_dotenv() + +# Logging setup +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +T = TypeVar("T") + + +class RateLimiter: + """Rate limiter using token bucket algorithm.""" + + def __init__(self, rate: float, per: float = 1.0): + """ + Initialize rate limiter. + + Args: + rate: Number of requests allowed + per: Time period in seconds (default: 1 second) + """ + self.rate = rate + self.per = per + self.tokens = rate + self.last_update = datetime.now() + self._lock = asyncio.Lock() + + async def acquire(self) -> None: + """Acquire a token, waiting if necessary.""" + async with self._lock: + now = datetime.now() + elapsed = (now - self.last_update).total_seconds() + self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per)) + self.last_update = now + + if self.tokens < 1: + wait_time = (1 - self.tokens) * (self.per / self.rate) + await asyncio.sleep(wait_time) + self.tokens = 0 + else: + self.tokens -= 1 + + +class BaseAsyncClient: + """Base class for async API clients with rate limiting.""" + + def __init__( + self, + max_concurrent: int = 5, + requests_per_second: float = 3.0, + logger: logging.Logger | None = None, + ): + """ + Initialize base client. + + Args: + max_concurrent: Maximum concurrent requests + requests_per_second: Rate limit + logger: Logger instance + """ + self.semaphore = Semaphore(max_concurrent) + self.rate_limiter = RateLimiter(requests_per_second) + self.logger = logger or logging.getLogger(self.__class__.__name__) + self.stats = { + "requests": 0, + "success": 0, + "errors": 0, + "retries": 0, + } + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=10), + retry=retry_if_exception_type(Exception), + ) + async def _rate_limited_request( + self, + coro: Callable[[], Any], + ) -> Any: + """Execute a request with rate limiting and retry.""" + async with self.semaphore: + await self.rate_limiter.acquire() + self.stats["requests"] += 1 + try: + result = await coro() + self.stats["success"] += 1 + return result + except Exception as e: + self.stats["errors"] += 1 + self.logger.error(f"Request failed: {e}") + raise + + async def batch_requests( + self, + requests: list[Callable[[], Any]], + desc: str = "Processing", + ) -> list[Any]: + """Execute multiple requests concurrently.""" + try: + from tqdm.asyncio import tqdm + has_tqdm = True + except ImportError: + has_tqdm = False + + async def execute(req: Callable) -> Any: + try: + return await self._rate_limited_request(req) + except Exception as e: + return {"error": str(e)} + + tasks = [execute(req) for req in requests] + + if has_tqdm: + results = [] + for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc): + result = await coro + results.append(result) + return results + else: + return await asyncio.gather(*tasks, return_exceptions=True) + + def print_stats(self) -> None: + """Print request statistics.""" + self.logger.info("=" * 40) + self.logger.info("Request Statistics:") + self.logger.info(f" Total Requests: {self.stats['requests']}") + self.logger.info(f" Successful: {self.stats['success']}") + self.logger.info(f" Errors: {self.stats['errors']}") + self.logger.info("=" * 40) + + +class ConfigManager: + """Manage API configuration and credentials.""" + + def __init__(self): + load_dotenv() + + @property + def google_credentials_path(self) -> str | None: + """Get Google service account credentials path.""" + # Prefer SEO-specific credentials, fallback to general credentials + seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json") + if os.path.exists(seo_creds): + return seo_creds + return os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + + @property + def pagespeed_api_key(self) -> str | None: + """Get PageSpeed Insights API key.""" + return os.getenv("PAGESPEED_API_KEY") + + @property + def custom_search_api_key(self) -> str | None: + """Get Custom Search API key.""" + return os.getenv("CUSTOM_SEARCH_API_KEY") + + @property + def custom_search_engine_id(self) -> str | None: + """Get Custom Search Engine ID.""" + return os.getenv("CUSTOM_SEARCH_ENGINE_ID") + + @property + def notion_token(self) -> str | None: + """Get Notion API token.""" + return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY") + + def validate_google_credentials(self) -> bool: + """Validate Google credentials are configured.""" + creds_path = self.google_credentials_path + if not creds_path: + return False + return os.path.exists(creds_path) + + def get_required(self, key: str) -> str: + """Get required environment variable or raise error.""" + value = os.getenv(key) + if not value: + raise ValueError(f"Missing required environment variable: {key}") + return value + + +# Singleton config instance +config = ConfigManager() diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/gsc_client.py b/ourdigital-custom-skills/16-seo-search-console/code/scripts/gsc_client.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/gsc_client.py rename to ourdigital-custom-skills/16-seo-search-console/code/scripts/gsc_client.py diff --git a/ourdigital-custom-skills/16-seo-search-console/code/scripts/requirements.txt b/ourdigital-custom-skills/16-seo-search-console/code/scripts/requirements.txt new file mode 100644 index 0000000..6561902 --- /dev/null +++ b/ourdigital-custom-skills/16-seo-search-console/code/scripts/requirements.txt @@ -0,0 +1,7 @@ +# 16-seo-search-console dependencies +google-api-python-client>=2.100.0 +google-auth>=2.23.0 +pandas>=2.1.0 +python-dotenv>=1.0.0 +rich>=13.7.0 +typer>=0.9.0 diff --git a/ourdigital-custom-skills/16-seo-search-console/desktop/SKILL.md b/ourdigital-custom-skills/16-seo-search-console/desktop/SKILL.md new file mode 100644 index 0000000..0f817b3 --- /dev/null +++ b/ourdigital-custom-skills/16-seo-search-console/desktop/SKILL.md @@ -0,0 +1,117 @@ +--- +name: seo-search-console +version: 1.0.0 +description: Google Search Console data analyzer for rankings, CTR, impressions, and index coverage. Triggers: Search Console, GSC, rankings, search performance, impressions, CTR. +allowed-tools: mcp__perplexity__*, mcp__notion__* +--- + +# SEO Search Console + +## Purpose + +Analyze Google Search Console data: search performance (queries, pages, CTR, position), sitemap status, and index coverage. + +## Core Capabilities + +1. **Performance Analysis** - Clicks, impressions, CTR, position +2. **Query Analysis** - Top search queries +3. **Page Performance** - Best/worst performing pages +4. **Index Coverage** - Crawl and index issues +5. **Sitemap Status** - Submission and processing + +## Data Collection + +### Option 1: User Provides Data +Request GSC export from user: +1. Go to Search Console > Performance +2. Export data (CSV or Google Sheets) +3. Share with assistant + +### Option 2: User Describes Data +User verbally provides: +- Top queries and positions +- CTR trends +- Coverage issues + +## Analysis Framework + +### Performance Metrics + +| Metric | What It Measures | Good Benchmark | +|--------|------------------|----------------| +| Clicks | User visits from search | Trending up | +| Impressions | Search appearances | High for target keywords | +| CTR | Click-through rate | 2-5% average | +| Position | Average ranking | <10 for key terms | + +### Query Analysis + +Identify: +- **Winners** - High position, high CTR +- **Opportunities** - High impressions, low CTR +- **Quick wins** - Position 8-20, low effort to improve + +### Page Analysis + +Categorize: +- **Top performers** - High clicks, good CTR +- **Underperformers** - High impressions, low CTR +- **Declining** - Down vs previous period + +## Workflow + +1. Collect GSC data from user +2. Analyze performance trends +3. Identify top queries and pages +4. Find optimization opportunities +5. Check for coverage issues +6. Provide actionable recommendations + +## Output Format + +```markdown +## Search Console Analysis: [Site] + +### Overview (Last 28 Days) +| Metric | Value | vs Previous | +|--------|-------|-------------| +| Clicks | X | +X% | +| Impressions | X | +X% | +| CTR | X% | +X% | +| Position | X | +X | + +### Top Queries +| Query | Clicks | Position | Opportunity | +|-------|--------|----------|-------------| + +### Top Pages +| Page | Clicks | CTR | Status | +|------|--------|-----|--------| + +### Opportunities +1. [Query with high impressions, low CTR] +2. [Page ranking 8-20 that can improve] + +### Issues +- [Coverage problems] +- [Sitemap issues] + +### Recommendations +1. [Priority action] +``` + +## Common Issues + +| Issue | Impact | Fix | +|-------|--------|-----| +| Low CTR on high-impression query | Lost traffic | Improve title/description | +| Declining positions | Traffic loss | Update content, build links | +| Not indexed pages | No visibility | Fix crawl issues | +| Sitemap errors | Discovery problems | Fix sitemap XML | + +## Limitations + +- Requires user to provide GSC data +- API access needs service account setup +- Data has 2-3 day delay +- Limited to verified properties diff --git a/ourdigital-custom-skills/04-seo-gateway-strategist/README.md b/ourdigital-custom-skills/17-seo-gateway-architect/README.md similarity index 100% rename from ourdigital-custom-skills/04-seo-gateway-strategist/README.md rename to ourdigital-custom-skills/17-seo-gateway-architect/README.md diff --git a/ourdigital-custom-skills/17-seo-gateway-architect/code/CLAUDE.md b/ourdigital-custom-skills/17-seo-gateway-architect/code/CLAUDE.md new file mode 100644 index 0000000..609f136 --- /dev/null +++ b/ourdigital-custom-skills/17-seo-gateway-architect/code/CLAUDE.md @@ -0,0 +1,65 @@ +# CLAUDE.md + +## Overview + +SEO gateway page strategist for Korean medical/service websites. Creates keyword strategies, content architecture, and technical SEO plans. + +## Quick Start + +```bash +pip install -r scripts/requirements.txt + +# Keyword analysis +python scripts/keyword_analyzer.py --topic "눈 성형" --market "강남" +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `keyword_analyzer.py` | Analyze keywords, search volume, competitor gaps | + +## Keyword Analyzer + +```bash +# Basic analysis +python scripts/keyword_analyzer.py --topic "눈 성형" + +# With location targeting +python scripts/keyword_analyzer.py --topic "눈 성형" --market "강남" --output strategy.json + +# Competitor analysis +python scripts/keyword_analyzer.py --topic "눈 성형" --competitors url1,url2 +``` + +## Output + +Generates strategic document with: +- Primary keyword + monthly search volume +- LSI keywords (7-10) +- User intent distribution +- Competitor gap analysis +- Content architecture (H1-H3 structure) +- Technical SEO checklist + +## Templates + +See `templates/` for: +- `keyword-research-template.md` +- `content-architecture-template.md` +- `seo-checklist-template.md` + +## Workflow + +1. Run keyword analyzer for target topic +2. Review search volume and intent data +3. Use output to plan content architecture +4. Hand off to `18-seo-gateway-builder` for content generation + +## Configuration + +```bash +# Optional: API keys for enhanced data +GOOGLE_API_KEY=xxx +NAVER_API_KEY=xxx +``` diff --git a/ourdigital-custom-skills/04-seo-gateway-strategist/scripts/keyword_analyzer.py b/ourdigital-custom-skills/17-seo-gateway-architect/code/scripts/keyword_analyzer.py similarity index 91% rename from ourdigital-custom-skills/04-seo-gateway-strategist/scripts/keyword_analyzer.py rename to ourdigital-custom-skills/17-seo-gateway-architect/code/scripts/keyword_analyzer.py index 8ff5416..8d73857 100644 --- a/ourdigital-custom-skills/04-seo-gateway-strategist/scripts/keyword_analyzer.py +++ b/ourdigital-custom-skills/17-seo-gateway-architect/code/scripts/keyword_analyzer.py @@ -281,20 +281,38 @@ Generated: {datetime.now().strftime('%Y-%m-%d %H:%M')} def main(): """Main execution function""" - import sys - - if len(sys.argv) < 2: - print("Usage: python keyword_analyzer.py '키워드'") - print("Example: python keyword_analyzer.py '눈 성형'") - sys.exit(1) - - keyword = ' '.join(sys.argv[1:]) - + import argparse + + parser = argparse.ArgumentParser( + description='Analyze keywords for SEO gateway page strategy', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=''' +Examples: + python keyword_analyzer.py --topic "눈 성형" + python keyword_analyzer.py --topic "이마 성형" --market "강남" + python keyword_analyzer.py --topic "동안 성형" --output strategy.json + ''' + ) + parser.add_argument('--topic', '-t', required=True, + help='Primary keyword to analyze (e.g., "눈 성형")') + parser.add_argument('--market', '-m', default=None, + help='Target market/location (e.g., "강남")') + parser.add_argument('--output', '-o', default=None, + help='Output JSON file path') + parser.add_argument('--competitors', '-c', default=None, + help='Comma-separated competitor URLs for analysis') + + args = parser.parse_args() + + keyword = args.topic + if args.market: + keyword = f"{args.market} {args.topic}" + print(f"Analyzing keyword: {keyword}") print("-" * 50) - + analyzer = KeywordAnalyzer(keyword) - + # Run analysis analyzer.analyze_primary_keyword() analyzer.generate_lsi_keywords() @@ -302,13 +320,13 @@ def main(): analyzer.generate_question_keywords() analyzer.calculate_intent_distribution() analyzer.generate_recommendations() - + # Generate and print report report = analyzer.generate_report() print(report) - + # Export to JSON - filename = analyzer.export_analysis() + filename = analyzer.export_analysis(args.output) print(f"\nAnalysis exported to: {filename}") diff --git a/ourdigital-custom-skills/04-seo-gateway-strategist/scripts/requirements.txt b/ourdigital-custom-skills/17-seo-gateway-architect/code/scripts/requirements.txt similarity index 100% rename from ourdigital-custom-skills/04-seo-gateway-strategist/scripts/requirements.txt rename to ourdigital-custom-skills/17-seo-gateway-architect/code/scripts/requirements.txt diff --git a/ourdigital-custom-skills/04-seo-gateway-strategist/templates/content-architecture-template.md b/ourdigital-custom-skills/17-seo-gateway-architect/code/templates/content-architecture-template.md similarity index 100% rename from ourdigital-custom-skills/04-seo-gateway-strategist/templates/content-architecture-template.md rename to ourdigital-custom-skills/17-seo-gateway-architect/code/templates/content-architecture-template.md diff --git a/ourdigital-custom-skills/04-seo-gateway-strategist/templates/keyword-research-template.md b/ourdigital-custom-skills/17-seo-gateway-architect/code/templates/keyword-research-template.md similarity index 100% rename from ourdigital-custom-skills/04-seo-gateway-strategist/templates/keyword-research-template.md rename to ourdigital-custom-skills/17-seo-gateway-architect/code/templates/keyword-research-template.md diff --git a/ourdigital-custom-skills/04-seo-gateway-strategist/templates/seo-checklist-template.md b/ourdigital-custom-skills/17-seo-gateway-architect/code/templates/seo-checklist-template.md similarity index 100% rename from ourdigital-custom-skills/04-seo-gateway-strategist/templates/seo-checklist-template.md rename to ourdigital-custom-skills/17-seo-gateway-architect/code/templates/seo-checklist-template.md diff --git a/ourdigital-custom-skills/04-seo-gateway-strategist/SKILL.md b/ourdigital-custom-skills/17-seo-gateway-architect/desktop/SKILL.md similarity index 100% rename from ourdigital-custom-skills/04-seo-gateway-strategist/SKILL.md rename to ourdigital-custom-skills/17-seo-gateway-architect/desktop/SKILL.md diff --git a/ourdigital-custom-skills/04-seo-gateway-strategist/examples/eye-surgery-strategy-example.md b/ourdigital-custom-skills/17-seo-gateway-architect/desktop/examples/eye-surgery-strategy-example.md similarity index 100% rename from ourdigital-custom-skills/04-seo-gateway-strategist/examples/eye-surgery-strategy-example.md rename to ourdigital-custom-skills/17-seo-gateway-architect/desktop/examples/eye-surgery-strategy-example.md diff --git a/ourdigital-custom-skills/17-seo-gateway-architect/desktop/templates/content-architecture-template.md b/ourdigital-custom-skills/17-seo-gateway-architect/desktop/templates/content-architecture-template.md new file mode 100644 index 0000000..8fc6487 --- /dev/null +++ b/ourdigital-custom-skills/17-seo-gateway-architect/desktop/templates/content-architecture-template.md @@ -0,0 +1,160 @@ +# Content Architecture Template + +## Page Hierarchy Structure + +``` +[Page URL: /service-name] +│ +├── H1: [Primary Keyword-Optimized Headline] +│ Example: "강남 눈 성형 전문의가 만드는 자연스러운 눈매" +│ Word Count Target: 15-25 characters +│ Keyword Placement: Primary keyword at beginning +│ +├── Hero Section [Above Fold] +│ ├── Value Proposition (30-50 words) +│ │ └── Keywords: Primary + 1 LSI +│ ├── Trust Signals (3-5 items) +│ │ ├── Certification badges +│ │ ├── Years of experience +│ │ └── Success cases number +│ └── Primary CTA +│ └── Text: "무료 상담 신청하기" +│ +├── H2: [Service Name] 이란? [Problem/Solution Framework] +│ Word Count: 200-300 words +│ Keywords: Primary (1x), LSI (2-3x) +│ ├── H3: 이런 고민이 있으신가요? [Pain Points] +│ │ ├── Pain point 1 (include LSI keyword) +│ │ ├── Pain point 2 (include LSI keyword) +│ │ └── Pain point 3 (include LSI keyword) +│ └── H3: [Clinic Name]의 솔루션 [Benefits] +│ ├── Benefit 1 (address pain point 1) +│ ├── Benefit 2 (address pain point 2) +│ └── Benefit 3 (address pain point 3) +│ +├── H2: [Service Name] 종류 및 방법 [Service Categories] +│ Word Count: 400-500 words total +│ Keywords: Category-specific LSI keywords +│ ├── H3: [Sub-service 1] - [LSI Keyword Variation] +│ │ ├── Description (80-100 words) +│ │ ├── Best for (target audience) +│ │ ├── Duration & Recovery +│ │ └── CTA: "자세히 보기" +│ ├── H3: [Sub-service 2] - [LSI Keyword Variation] +│ │ └── [Same structure as above] +│ └── H3: [Sub-service 3] - [LSI Keyword Variation] +│ └── [Same structure as above] +│ +├── H2: [Clinic Name] [Service Name]만의 차별점 [Trust & Authority] +│ Word Count: 300-400 words +│ Keywords: Brand + Primary keyword combinations +│ ├── H3: 전문 의료진 [Doctor Credentials] +│ │ ├── Doctor profile summary +│ │ ├── Specializations +│ │ └── Certifications +│ ├── H3: 검증된 시술 결과 [Success Metrics] +│ │ ├── Number statistics +│ │ ├── Success rate +│ │ └── Patient satisfaction +│ └── H3: 첨단 장비 및 시설 [Facilities] +│ ├── Equipment descriptions +│ └── Safety protocols +│ +├── H2: [Service Name] 자주 묻는 질문 [FAQ Section] +│ Word Count: 500-700 words +│ Keywords: Long-tail question keywords +│ ├── Q1: [Long-tail keyword as question]? +│ │ └── A: [40-60 word answer, keyword in first sentence] +│ ├── Q2: [Price-related question]? +│ │ └── A: [Include "비용" LSI keyword] +│ ├── Q3: [Recovery-related question]? +│ │ └── A: [Include "회복기간" LSI keyword] +│ ├── Q4: [Side-effect question]? +│ │ └── A: [Include "부작용" LSI keyword] +│ ├── Q5: [Process question]? +│ │ └── A: [Include process-related LSI] +│ ├── Q6: [Candidacy question]? +│ │ └── A: [Include target audience keywords] +│ └── Q7: [Results duration question]? +│ └── A: [Include maintenance keywords] +│ +├── H2: [Service Name] 시술 과정 [Process Guide] +│ Word Count: 300-400 words +│ Keywords: "과정", "단계", procedural LSI +│ ├── H3: 상담 및 검사 [Consultation] +│ ├── H3: 시술 당일 [Procedure Day] +│ ├── H3: 회복 과정 [Recovery] +│ └── H3: 사후 관리 [Aftercare] +│ +├── H2: 실제 고객 후기 [Social Proof] +│ Word Count: 200-300 words +│ Keywords: "후기", "리뷰", satisfaction keywords +│ ├── Review snippet 1 +│ ├── Review snippet 2 +│ ├── Review snippet 3 +│ └── Before/After gallery teaser +│ +└── H2: 상담 예약 안내 [Conversion Section] + Word Count: 150-200 words + Keywords: CTA-related, location keywords + ├── H3: 상담 예약 방법 + ├── H3: 오시는 길 + └── H3: 문의 정보 +``` + +## Keyword Density Map + +| Section | Primary Keyword | LSI Keywords | Total Keywords | +|---------|----------------|--------------|----------------| +| Hero | 1 | 1-2 | 2-3 | +| Problem/Solution | 1 | 2-3 | 3-4 | +| Service Categories | 1-2 | 4-6 | 5-8 | +| Trust & Authority | 1 | 2-3 | 3-4 | +| FAQ | 2-3 | 5-7 | 7-10 | +| Process | 1 | 2-3 | 3-4 | +| Social Proof | 0-1 | 1-2 | 1-3 | +| Conversion | 1 | 1-2 | 2-3 | +| **Total** | **8-11** | **18-29** | **26-40** | + +## Internal Linking Strategy + +| From Section | To Page | Anchor Text | Purpose | +|-------------|---------|-------------|---------| +| Service Categories | Sub-service page | [Sub-service name] | Deep dive | +| FAQ | Price page | "비용 안내 페이지" | Conversion | +| Trust section | Doctor profile | "[Doctor name] 원장" | Authority | +| Process section | Consultation form | "상담 예약하기" | Conversion | +| Social proof | Gallery page | "더 많은 전후 사진" | Engagement | + +## Content Length Guidelines + +- **Total Page Length**: 2,000-2,500 words +- **Above Fold Content**: 100-150 words +- **Each H2 Section**: 200-500 words +- **Each H3 Subsection**: 80-150 words +- **Meta Description**: 150-160 characters +- **Image Alt Text**: 10-15 words each + +## Schema Markup Requirements + +```json +{ + "@context": "https://schema.org", + "@type": "MedicalProcedure", + "name": "[Service Name]", + "description": "[Meta description]", + "procedureType": "Cosmetic", + "provider": { + "@type": "MedicalOrganization", + "name": "[Clinic Name]" + } +} +``` + +## Mobile Content Adaptation + +- Reduce hero text by 30% +- Show 3 FAQs initially (expand for more) +- Simplify navigation to single-column +- Increase CTA button size +- Compress trust signals to carousel diff --git a/ourdigital-custom-skills/17-seo-gateway-architect/desktop/templates/keyword-research-template.md b/ourdigital-custom-skills/17-seo-gateway-architect/desktop/templates/keyword-research-template.md new file mode 100644 index 0000000..a262bbd --- /dev/null +++ b/ourdigital-custom-skills/17-seo-gateway-architect/desktop/templates/keyword-research-template.md @@ -0,0 +1,95 @@ +# Keyword Research Template + +## Primary Keyword Analysis + +| Metric | Value | Notes | +|--------|-------|-------| +| **Primary Keyword** | [KEYWORD] | Main target keyword | +| **Monthly Search Volume** | [VOLUME] | Average monthly searches | +| **Keyword Difficulty** | [0-100] | Competition score | +| **Current Ranking** | #[POSITION] | Current SERP position | +| **Search Trend** | ↑ ↓ → | Trending direction | + +## LSI Keywords Matrix + +| LSI Keyword | Search Volume | Intent Type | Priority | +|------------|--------------|-------------|----------| +| [keyword 1] | [volume] | Informational | High | +| [keyword 2] | [volume] | Transactional | Medium | +| [keyword 3] | [volume] | Comparative | High | +| [keyword 4] | [volume] | Informational | Medium | +| [keyword 5] | [volume] | Transactional | Low | +| [keyword 6] | [volume] | Comparative | High | +| [keyword 7] | [volume] | Informational | Medium | +| [keyword 8] | [volume] | Navigational | Low | +| [keyword 9] | [volume] | Transactional | High | +| [keyword 10] | [volume] | Informational | Medium | + +## User Intent Distribution + +``` +Informational (Research Phase): ___% + - Common queries: "what is", "how to", "benefits of" + - Content needed: Educational guides, FAQs, process explanations + +Comparative (Evaluation Phase): ___% + - Common queries: "best", "vs", "reviews", "비교" + - Content needed: Comparison tables, reviews, case studies + +Transactional (Ready to Convert): ___% + - Common queries: "price", "book", "consultation", "예약" + - Content needed: CTAs, pricing, booking forms +``` + +## Long-tail Keyword Opportunities + +### Question-based Keywords +- [질문 키워드 1] +- [질문 키워드 2] +- [질문 키워드 3] + +### Location-based Keywords +- [지역] + [primary keyword] +- [지역] + [primary keyword] + 잘하는곳 +- [지역] + [primary keyword] + 추천 + +### Modifier-based Keywords +- [primary keyword] + 비용 +- [primary keyword] + 부작용 +- [primary keyword] + 회복기간 +- [primary keyword] + 전후 + +## Competitor Keyword Analysis + +| Competitor | Target Keywords | Ranking Keywords | Gap Opportunities | +|------------|----------------|------------------|-------------------| +| Competitor 1 | [keywords] | [keywords] | [missing keywords] | +| Competitor 2 | [keywords] | [keywords] | [missing keywords] | +| Competitor 3 | [keywords] | [keywords] | [missing keywords] | + +## Seasonal Trends + +| Month | Search Volume | Events/Factors | +|-------|--------------|----------------| +| January | [volume] | New year resolutions | +| February | [volume] | [factor] | +| March | [volume] | [factor] | +| ... | ... | ... | + +## Platform-Specific Keywords + +### Naver-Optimized +- [네이버 specific keyword 1] +- [네이버 specific keyword 2] + +### Google-Optimized +- [Google specific keyword 1] +- [Google specific keyword 2] + +## Action Items + +- [ ] Target primary keyword in H1 and title tag +- [ ] Include 3-5 LSI keywords naturally in content +- [ ] Create content matching user intent distribution +- [ ] Optimize for question-based featured snippets +- [ ] Add location modifiers for local SEO diff --git a/ourdigital-custom-skills/17-seo-gateway-architect/desktop/templates/seo-checklist-template.md b/ourdigital-custom-skills/17-seo-gateway-architect/desktop/templates/seo-checklist-template.md new file mode 100644 index 0000000..93ab54c --- /dev/null +++ b/ourdigital-custom-skills/17-seo-gateway-architect/desktop/templates/seo-checklist-template.md @@ -0,0 +1,239 @@ +# SEO Technical Checklist Template + +## Meta Tags Optimization + +### Title Tag +- [ ] Length: 50-60 characters +- [ ] Primary keyword at beginning +- [ ] Brand name at end +- [ ] Unique for each page +- [ ] Formula: `[Primary Keyword] - [Value Proposition] | [Brand]` + +**Template**: `{primary_keyword} 전문 - {unique_value} | {clinic_name}` +**Example**: `눈 성형 전문 - 자연스러운 라인 | 제이미클리닉` + +### Meta Description +- [ ] Length: 150-160 characters +- [ ] Include primary keyword +- [ ] Include 1-2 LSI keywords +- [ ] Clear CTA +- [ ] Unique for each page + +**Template**: `{location} {primary_keyword} 전문의가 {benefit}. {credential}. 무료상담 ☎ {phone}` +**Example**: `강남 눈 성형 전문의가 자연스러운 눈매를 디자인합니다. 15년 경력, 10,000건 시술. 무료상담 ☎ 02-1234-5678` + +### Open Graph Tags +```html + + + + + + +``` + +## Header Tags Structure + +- [ ] Only one H1 per page +- [ ] H1 contains primary keyword +- [ ] H2 tags for main sections (5-7) +- [ ] H3 tags for subsections +- [ ] Logical hierarchy maintained +- [ ] Keywords distributed naturally + +## Content Optimization + +### Keyword Density +- [ ] Primary keyword: 2-3% (20-30 times per 1000 words) +- [ ] LSI keywords: 1-2% each +- [ ] Natural placement (no stuffing) +- [ ] Synonyms and variations used + +### Content Structure +- [ ] First 100 words include primary keyword +- [ ] Short paragraphs (3-4 sentences) +- [ ] Bullet points and lists +- [ ] Bold important keywords (sparingly) +- [ ] Internal links: 5-10 +- [ ] External links: 2-3 (authoritative) + +## Schema Markup + +### Medical Procedure Schema +```json +{ + "@context": "https://schema.org", + "@type": "MedicalProcedure", + "name": "{procedure_name}", + "procedureType": "Cosmetic", + "bodyLocation": "{body_part}", + "outcome": "{expected_outcome}", + "preparation": "{preparation_required}", + "followup": "{followup_care}", + "provider": { + "@type": "MedicalOrganization", + "name": "{clinic_name}", + "address": { + "@type": "PostalAddress", + "streetAddress": "{street}", + "addressLocality": "{city}", + "addressCountry": "KR" + } + } +} +``` + +### FAQ Schema +```json +{ + "@context": "https://schema.org", + "@type": "FAQPage", + "mainEntity": [{ + "@type": "Question", + "name": "{question}", + "acceptedAnswer": { + "@type": "Answer", + "text": "{answer}" + } + }] +} +``` + +## Image Optimization + +- [ ] Descriptive file names: `eye-surgery-before-after-case1.jpg` +- [ ] Alt text with keywords: `눈 성형 전후 사진 - 30대 여성 사례` +- [ ] Compressed file size (< 200KB) +- [ ] WebP format with fallback +- [ ] Lazy loading implemented +- [ ] Image sitemap created + +## Performance Optimization + +### Page Speed +- [ ] Load time < 3 seconds +- [ ] First Contentful Paint < 1.8s +- [ ] Time to Interactive < 3.8s +- [ ] Total page size < 3MB +- [ ] Requests minimized (< 50) + +### Core Web Vitals +- [ ] LCP (Largest Contentful Paint) < 2.5s +- [ ] FID (First Input Delay) < 100ms +- [ ] CLS (Cumulative Layout Shift) < 0.1 + +## Mobile Optimization + +- [ ] Mobile-responsive design +- [ ] Viewport meta tag set +- [ ] Touch-friendly buttons (44x44px minimum) +- [ ] Readable font size (16px minimum) +- [ ] No horizontal scrolling +- [ ] Mobile page speed < 3s + +## URL Structure + +- [ ] SEO-friendly URL: `/eye-surgery` or `/눈-성형` +- [ ] No special characters +- [ ] Lowercase only +- [ ] Hyphens for word separation +- [ ] Under 60 characters +- [ ] Include primary keyword + +## Internal Linking + +| From Page | To Page | Anchor Text | Purpose | +|-----------|---------|-------------|---------| +| Gateway | Service Detail | {service_name} | Deep content | +| Gateway | Doctor Profile | {doctor_name} 원장 | Authority | +| Gateway | Pricing | 비용 안내 | Conversion | +| Gateway | Gallery | 시술 전후 사진 | Engagement | +| Gateway | Contact | 상담 예약 | Conversion | + +## Naver-Specific Optimization + +### Naver Webmaster Tools +- [ ] Site verification complete +- [ ] XML sitemap submitted +- [ ] Robots.txt configured +- [ ] Syndication feed active +- [ ] Site optimization report reviewed + +### Naver SEO Elements +- [ ] Title under 30 Korean characters +- [ ] C-Rank tags implemented +- [ ] Image-to-text ratio optimized (40:60) +- [ ] Outbound links minimized +- [ ] Brand search optimization + +## Tracking & Analytics + +- [ ] Google Analytics 4 installed +- [ ] Google Search Console verified +- [ ] Naver Analytics installed +- [ ] Conversion tracking configured +- [ ] Event tracking for CTAs +- [ ] Heatmap tool installed + +## Security & Technical + +- [ ] SSL certificate active (HTTPS) +- [ ] WWW/non-WWW redirect configured +- [ ] 404 error page customized +- [ ] XML sitemap generated +- [ ] Robots.txt optimized +- [ ] Canonical URLs set +- [ ] Hreflang tags (if multi-language) + +## Quality Checks + +### Content Quality +- [ ] No spelling/grammar errors +- [ ] Medical information accurate +- [ ] Legal compliance verified +- [ ] Contact information correct +- [ ] CTAs working properly + +### Cross-browser Testing +- [ ] Chrome (Desktop/Mobile) +- [ ] Safari (Desktop/Mobile) +- [ ] Firefox +- [ ] Samsung Internet +- [ ] Naver Whale + +## Monthly Monitoring Tasks + +- [ ] Keyword ranking check +- [ ] Organic traffic analysis +- [ ] Bounce rate monitoring +- [ ] Conversion rate tracking +- [ ] Competitor analysis +- [ ] Content freshness update +- [ ] Broken link check +- [ ] Page speed test + +## Priority Levels + +1. **Critical (Day 1)** + - Title and meta tags + - H1 optimization + - Mobile responsiveness + - Page speed < 4s + +2. **High (Week 1)** + - Schema markup + - Internal linking + - Image optimization + - Content optimization + +3. **Medium (Week 2-3)** + - Naver optimization + - FAQ implementation + - Social proof elements + - Analytics setup + +4. **Low (Month 2)** + - A/B testing + - Advanced schema + - Link building + - Content expansion diff --git a/ourdigital-custom-skills/05-gateway-page-content-builder/README.md b/ourdigital-custom-skills/18-seo-gateway-builder/README.md similarity index 100% rename from ourdigital-custom-skills/05-gateway-page-content-builder/README.md rename to ourdigital-custom-skills/18-seo-gateway-builder/README.md diff --git a/ourdigital-custom-skills/18-seo-gateway-builder/code/CLAUDE.md b/ourdigital-custom-skills/18-seo-gateway-builder/code/CLAUDE.md new file mode 100644 index 0000000..44803f8 --- /dev/null +++ b/ourdigital-custom-skills/18-seo-gateway-builder/code/CLAUDE.md @@ -0,0 +1,82 @@ +# CLAUDE.md + +## Overview + +Gateway page content generator for local services. Creates SEO-optimized pages from location/service configurations. + +## Quick Start + +```bash +# Generate pages from config +python scripts/generate_pages.py --config config/services.json --locations config/locations.json +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `generate_pages.py` | Generate gateway pages from templates | + +## Page Generator + +```bash +# Generate all combinations +python scripts/generate_pages.py \ + --config config/services.json \ + --locations config/locations.json \ + --output ./pages + +# Single service/location +python scripts/generate_pages.py \ + --service "laser_hair_removal" \ + --location "gangnam" \ + --template templates/gateway-page-medical.md +``` + +## Configuration Files + +### services.json +```json +{ + "services": [ + { + "id": "laser_hair_removal", + "korean": "레이저 제모", + "keywords": ["laser hair removal", "permanent hair removal"] + } + ] +} +``` + +### locations.json +```json +{ + "locations": [ + { + "id": "gangnam", + "korean": "강남", + "full_address": "서울특별시 강남구" + } + ] +} +``` + +## Templates + +- `templates/gateway-page-medical.md` - Medical service template +- Supports variables: `{{service}}`, `{{location}}`, `{{brand}}` + +## Output + +Generates markdown files with: +- SEO-optimized title and meta +- Structured content sections +- Schema markup recommendations +- Internal linking suggestions + +## Workflow + +1. Get strategy from `17-seo-gateway-architect` +2. Configure services and locations +3. Run generator for content drafts +4. Review and customize output diff --git a/ourdigital-custom-skills/05-gateway-page-content-builder/config/locations.json b/ourdigital-custom-skills/18-seo-gateway-builder/code/config/locations.json similarity index 100% rename from ourdigital-custom-skills/05-gateway-page-content-builder/config/locations.json rename to ourdigital-custom-skills/18-seo-gateway-builder/code/config/locations.json diff --git a/ourdigital-custom-skills/05-gateway-page-content-builder/config/services.json b/ourdigital-custom-skills/18-seo-gateway-builder/code/config/services.json similarity index 100% rename from ourdigital-custom-skills/05-gateway-page-content-builder/config/services.json rename to ourdigital-custom-skills/18-seo-gateway-builder/code/config/services.json diff --git a/ourdigital-custom-skills/05-gateway-page-content-builder/scripts/generate_pages.py b/ourdigital-custom-skills/18-seo-gateway-builder/code/scripts/generate_pages.py similarity index 97% rename from ourdigital-custom-skills/05-gateway-page-content-builder/scripts/generate_pages.py rename to ourdigital-custom-skills/18-seo-gateway-builder/code/scripts/generate_pages.py index 2dda2be..8adb499 100644 --- a/ourdigital-custom-skills/05-gateway-page-content-builder/scripts/generate_pages.py +++ b/ourdigital-custom-skills/18-seo-gateway-builder/code/scripts/generate_pages.py @@ -52,10 +52,15 @@ class Brand: class GatewayPageGenerator: """Main class for generating gateway page content""" - - def __init__(self, brand: Brand, template_path: str = "templates/"): + + def __init__(self, brand: Brand, template_path: str = None): self.brand = brand - self.template_path = Path(template_path) + # Use script directory as base for template path + if template_path is None: + script_dir = Path(__file__).parent.parent + self.template_path = script_dir / "templates" + else: + self.template_path = Path(template_path) self.generated_pages = [] def load_template(self, template_name: str) -> str: diff --git a/ourdigital-custom-skills/18-seo-gateway-builder/code/scripts/requirements.txt b/ourdigital-custom-skills/18-seo-gateway-builder/code/scripts/requirements.txt new file mode 100644 index 0000000..f3de352 --- /dev/null +++ b/ourdigital-custom-skills/18-seo-gateway-builder/code/scripts/requirements.txt @@ -0,0 +1,5 @@ +# 18-seo-gateway-builder dependencies +jinja2>=3.1.0 +pyyaml>=6.0.0 +markdown>=3.5.0 +python-dotenv>=1.0.0 diff --git a/ourdigital-custom-skills/05-gateway-page-content-builder/templates/gateway-page-medical.md b/ourdigital-custom-skills/18-seo-gateway-builder/code/templates/gateway-page-medical.md similarity index 100% rename from ourdigital-custom-skills/05-gateway-page-content-builder/templates/gateway-page-medical.md rename to ourdigital-custom-skills/18-seo-gateway-builder/code/templates/gateway-page-medical.md diff --git a/ourdigital-custom-skills/05-gateway-page-content-builder/SKILL.md b/ourdigital-custom-skills/18-seo-gateway-builder/desktop/SKILL.md similarity index 100% rename from ourdigital-custom-skills/05-gateway-page-content-builder/SKILL.md rename to ourdigital-custom-skills/18-seo-gateway-builder/desktop/SKILL.md diff --git a/ourdigital-custom-skills/05-gateway-page-content-builder/examples/gangnam-laser-hair-removal.md b/ourdigital-custom-skills/18-seo-gateway-builder/desktop/examples/gangnam-laser-hair-removal.md similarity index 100% rename from ourdigital-custom-skills/05-gateway-page-content-builder/examples/gangnam-laser-hair-removal.md rename to ourdigital-custom-skills/18-seo-gateway-builder/desktop/examples/gangnam-laser-hair-removal.md diff --git a/ourdigital-custom-skills/18-seo-gateway-builder/desktop/templates/gateway-page-medical.md b/ourdigital-custom-skills/18-seo-gateway-builder/desktop/templates/gateway-page-medical.md new file mode 100644 index 0000000..999ed62 --- /dev/null +++ b/ourdigital-custom-skills/18-seo-gateway-builder/desktop/templates/gateway-page-medical.md @@ -0,0 +1,231 @@ +# [Medical Service] in [Location] | [Clinic Name] + + + + +## Professional [Medical Service] Available in [Location] + +Welcome to [Clinic Name], your trusted provider for [medical service] in [location]. Our medical team brings over [X years] of combined experience, utilizing the latest medical technology to ensure optimal results for our patients in the [location] area. + +### Why Choose [Clinic Name] for [Medical Service] in [Location]? + +Located conveniently at [specific address near landmark], our [location] clinic specializes in providing personalized [medical service] treatments tailored to each patient's unique needs. We understand the specific health concerns of [location] residents and have designed our services accordingly. + +**Our [Location] Advantages:** +- 🏥 Modern facility equipped with latest [equipment type] +- 👨‍⚕️ Board-certified specialists with [certification details] +- 📍 Easy access from [nearby subway/bus stations] +- 🕐 Extended hours to accommodate busy [location] professionals +- 💳 Accept major insurance plans popular in [location] + +## Understanding [Medical Service] + +### What Is [Medical Service]? + +[Detailed medical explanation of the service, including scientific background, FDA approvals if applicable, and medical benefits. This section should be educational while remaining accessible.] + +### Who Can Benefit from [Medical Service]? + +Our [medical service] treatment in [location] is ideal for patients experiencing: +- [Condition 1 with brief explanation] +- [Condition 2 with brief explanation] +- [Condition 3 with brief explanation] +- [Condition 4 with brief explanation] + +## Our [Medical Service] Process in [Location] + +### 1. Initial Consultation +Your journey begins with a comprehensive consultation at our [location] clinic. Our specialists will: +- Review your medical history +- Conduct necessary diagnostic tests +- Discuss your treatment goals +- Create a personalized treatment plan + +### 2. Treatment Planning +Based on your consultation, we develop a customized approach that considers: +- Your specific medical condition +- Lifestyle factors common to [location] residents +- Insurance coverage options +- Optimal scheduling for your convenience + +### 3. Treatment Sessions +Each [medical service] session at our [location] facility typically involves: +- Pre-treatment preparation +- The procedure itself (approximately [duration]) +- Post-treatment monitoring +- Detailed aftercare instructions + +### 4. Follow-up Care +We provide comprehensive follow-up support including: +- Scheduled check-ups +- 24/7 emergency hotline +- Ongoing treatment adjustments +- Long-term health monitoring + +## Expected Results and Recovery + +### What to Expect After [Medical Service] + +Patients at our [location] clinic typically experience: +- **Immediate effects**: [Description] +- **Short-term (1-2 weeks)**: [Description] +- **Long-term (1-3 months)**: [Description] +- **Final results**: [Timeline and description] + +### Recovery Timeline +- Day 1-3: [Recovery details] +- Week 1: [Recovery details] +- Week 2-4: [Recovery details] +- Month 2-3: [Recovery details] + +## Safety and Credentials + +### Our Medical Standards +[Clinic Name] in [location] maintains the highest medical standards: +- ✓ [Relevant medical certification] +- ✓ [Hospital affiliation if applicable] +- ✓ [Safety protocol certification] +- ✓ [Professional membership] + +### Our Medical Team +**Dr. [Name], MD** +- [Medical school] +- [Residency/Fellowship] +- [Years of experience] specializing in [medical service] +- [Special recognition or research] + +## Pricing and Insurance + +### Insurance Coverage +We accept most major insurance plans used by [location] residents: +- [Insurance provider 1] +- [Insurance provider 2] +- [Insurance provider 3] +- [Insurance provider 4] + +### Payment Options +For your convenience, we offer: +- Insurance direct billing +- Flexible payment plans +- Credit card payments +- HSA/FSA acceptance + +### Transparent Pricing +Contact us for a detailed quote. Factors affecting cost include: +- Severity of condition +- Number of sessions required +- Insurance coverage level +- Additional treatments needed + +## Patient Testimonials from [Location] + +> "After struggling with [condition] for years, I finally found relief at [Clinic Name]. The team was professional, and the results exceeded my expectations." +> — [Patient initials], [Location] resident + +> "The convenience of having such high-quality [medical service] right here in [location] made all the difference. I no longer have to travel to [other area] for treatment." +> — [Patient initials], [Nearby neighborhood] + +> "Dr. [Name] took the time to explain everything thoroughly. I felt confident throughout the entire process." +> — [Patient initials], [Location] professional + +## Frequently Asked Questions + +### General Questions + +**Q: How do I know if [medical service] is right for me?** +A: The best way to determine if you're a candidate is through a consultation at our [location] clinic. We'll evaluate your medical history, current condition, and treatment goals. + +**Q: How long does [medical service] take?** +A: Treatment sessions typically last [duration], though your first visit including consultation may take [longer duration]. + +**Q: Is [medical service] painful?** +A: [Comfort level explanation with pain management options available] + +### Location-Specific Questions + +**Q: Where exactly is your [location] clinic located?** +A: We're located at [full address], just [distance] from [landmark/station]. [Parking/public transport information]. + +**Q: Do you have parking available?** +A: Yes, we offer [parking details specific to location]. + +**Q: What are your hours for the [location] clinic?** +A: +- Monday-Friday: [hours] +- Saturday: [hours] +- Sunday: [hours/closed] + +### Insurance and Payment + +**Q: Does insurance cover [medical service]?** +A: Coverage varies by plan. Our insurance specialists can verify your benefits before your appointment. + +**Q: Do you offer payment plans?** +A: Yes, we offer flexible payment options including [specific plans available]. + +## Schedule Your [Medical Service] Consultation in [Location] + +Ready to take the first step? Contact our [location] clinic today: + +### Contact Information +📍 **Address**: [Full address] +📞 **Phone**: [Local phone number] +📧 **Email**: [location]@[clinicname].com +🌐 **Online Booking**: [URL] + +### Office Hours +- **Monday-Friday**: [Hours] +- **Saturday**: [Hours] +- **Sunday**: [Hours/Closed] +- **Emergency**: [24/7 hotline if available] + +### Getting Here +**By Subway**: [Detailed directions from nearest station] +**By Bus**: [Bus routes and stops] +**By Car**: [Driving directions and parking info] + +--- + + +```json +{ + "@context": "https://schema.org", + "@type": "MedicalClinic", + "name": "[Clinic Name] - [Location]", + "image": "[clinic-image-url]", + "@id": "[page-url]", + "url": "[website-url]", + "telephone": "[phone-number]", + "address": { + "@type": "PostalAddress", + "streetAddress": "[street]", + "addressLocality": "[city]", + "addressRegion": "[state/province]", + "postalCode": "[zip]", + "addressCountry": "KR" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": [latitude], + "longitude": [longitude] + }, + "openingHoursSpecification": { + "@type": "OpeningHoursSpecification", + "dayOfWeek": ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"], + "opens": "09:00", + "closes": "18:00" + }, + "medicalSpecialty": "[Medical Specialty]", + "availableService": { + "@type": "MedicalProcedure", + "name": "[Medical Service]", + "description": "[Service Description]" + } +} +``` + +*Last updated: [Date] | [Clinic Name] - Professional [Medical Service] in [Location]* diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/README.md b/ourdigital-custom-skills/20-gtm-audit/README.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/README.md rename to ourdigital-custom-skills/20-gtm-audit/README.md diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/CLAUDE.md b/ourdigital-custom-skills/20-gtm-audit/code/CLAUDE.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/CLAUDE.md rename to ourdigital-custom-skills/20-gtm-audit/code/CLAUDE.md diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/checkout_flow.md b/ourdigital-custom-skills/20-gtm-audit/code/docs/checkout_flow.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/checkout_flow.md rename to ourdigital-custom-skills/20-gtm-audit/code/docs/checkout_flow.md diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/common_issues.md b/ourdigital-custom-skills/20-gtm-audit/code/docs/common_issues.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/common_issues.md rename to ourdigital-custom-skills/20-gtm-audit/code/docs/common_issues.md diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/datalayer_validation.md b/ourdigital-custom-skills/20-gtm-audit/code/docs/datalayer_validation.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/datalayer_validation.md rename to ourdigital-custom-skills/20-gtm-audit/code/docs/datalayer_validation.md diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/ecommerce_schema.md b/ourdigital-custom-skills/20-gtm-audit/code/docs/ecommerce_schema.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/ecommerce_schema.md rename to ourdigital-custom-skills/20-gtm-audit/code/docs/ecommerce_schema.md diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/form_tracking.md b/ourdigital-custom-skills/20-gtm-audit/code/docs/form_tracking.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/form_tracking.md rename to ourdigital-custom-skills/20-gtm-audit/code/docs/form_tracking.md diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/ga4_events.md b/ourdigital-custom-skills/20-gtm-audit/code/docs/ga4_events.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/ga4_events.md rename to ourdigital-custom-skills/20-gtm-audit/code/docs/ga4_events.md diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/report_template.md b/ourdigital-custom-skills/20-gtm-audit/code/docs/report_template.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/docs/report_template.md rename to ourdigital-custom-skills/20-gtm-audit/code/docs/report_template.md diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/gtm_audit.py b/ourdigital-custom-skills/20-gtm-audit/code/scripts/gtm_audit.py similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/gtm_audit.py rename to ourdigital-custom-skills/20-gtm-audit/code/scripts/gtm_audit.py diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/requirements.txt b/ourdigital-custom-skills/20-gtm-audit/code/scripts/requirements.txt similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/requirements.txt rename to ourdigital-custom-skills/20-gtm-audit/code/scripts/requirements.txt diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/setup.sh b/ourdigital-custom-skills/20-gtm-audit/code/scripts/setup.sh similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/setup.sh rename to ourdigital-custom-skills/20-gtm-audit/code/scripts/setup.sh diff --git a/ourdigital-custom-skills/13-ourdigital-gtm-audit/gtm-audit.skill b/ourdigital-custom-skills/20-gtm-audit/desktop/SKILL.md similarity index 100% rename from ourdigital-custom-skills/13-ourdigital-gtm-audit/gtm-audit.skill rename to ourdigital-custom-skills/20-gtm-audit/desktop/SKILL.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/README.md b/ourdigital-custom-skills/21-gtm-manager/README.md similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/README.md rename to ourdigital-custom-skills/21-gtm-manager/README.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/CLAUDE.md b/ourdigital-custom-skills/21-gtm-manager/code/CLAUDE.md similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/CLAUDE.md rename to ourdigital-custom-skills/21-gtm-manager/code/CLAUDE.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/checkout_flow.md b/ourdigital-custom-skills/21-gtm-manager/code/docs/checkout_flow.md similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/checkout_flow.md rename to ourdigital-custom-skills/21-gtm-manager/code/docs/checkout_flow.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/common_issues.md b/ourdigital-custom-skills/21-gtm-manager/code/docs/common_issues.md similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/common_issues.md rename to ourdigital-custom-skills/21-gtm-manager/code/docs/common_issues.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/datalayer_validation.md b/ourdigital-custom-skills/21-gtm-manager/code/docs/datalayer_validation.md similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/datalayer_validation.md rename to ourdigital-custom-skills/21-gtm-manager/code/docs/datalayer_validation.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/ecommerce_schema.md b/ourdigital-custom-skills/21-gtm-manager/code/docs/ecommerce_schema.md similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/ecommerce_schema.md rename to ourdigital-custom-skills/21-gtm-manager/code/docs/ecommerce_schema.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/form_tracking.md b/ourdigital-custom-skills/21-gtm-manager/code/docs/form_tracking.md similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/form_tracking.md rename to ourdigital-custom-skills/21-gtm-manager/code/docs/form_tracking.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/ga4_events.md b/ourdigital-custom-skills/21-gtm-manager/code/docs/ga4_events.md similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/ga4_events.md rename to ourdigital-custom-skills/21-gtm-manager/code/docs/ga4_events.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/report_template.md b/ourdigital-custom-skills/21-gtm-manager/code/docs/report_template.md similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/docs/report_template.md rename to ourdigital-custom-skills/21-gtm-manager/code/docs/report_template.md diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/gtm_manager.py b/ourdigital-custom-skills/21-gtm-manager/code/scripts/gtm_manager.py similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/gtm_manager.py rename to ourdigital-custom-skills/21-gtm-manager/code/scripts/gtm_manager.py diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/requirements.txt b/ourdigital-custom-skills/21-gtm-manager/code/scripts/requirements.txt similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/requirements.txt rename to ourdigital-custom-skills/21-gtm-manager/code/scripts/requirements.txt diff --git a/ourdigital-custom-skills/14-ourdigital-gtm-manager/setup.sh b/ourdigital-custom-skills/21-gtm-manager/code/scripts/setup.sh similarity index 100% rename from ourdigital-custom-skills/14-ourdigital-gtm-manager/setup.sh rename to ourdigital-custom-skills/21-gtm-manager/code/scripts/setup.sh diff --git a/ourdigital-custom-skills/21-gtm-manager/gtm_audit_report.json b/ourdigital-custom-skills/21-gtm-manager/gtm_audit_report.json new file mode 100644 index 0000000..76d6e5e --- /dev/null +++ b/ourdigital-custom-skills/21-gtm-manager/gtm_audit_report.json @@ -0,0 +1,685 @@ +{ + "audit_metadata": { + "url": "https://josunhotel.com/resve/dining/step0.do", + "timestamp": "2025-12-20T23:27:54.071984", + "expected_container": null + }, + "container_status": { + "installed": true, + "containers": [ + "GTM-NP6NJMP" + ], + "position": "head", + "noscript_present": true, + "datalayer_initialized": true, + "datalayer_init_before_gtm": false, + "issues": [] + }, + "datalayer_analysis": { + "events": [ + { + "index": 0, + "event": "gtm.js", + "has_ecommerce": false, + "params": [ + "gtm.start", + "event", + "gtm.uniqueEventId" + ] + }, + { + "index": 1, + "event": "gtm.dom", + "has_ecommerce": false, + "params": [ + "event", + "gtm.uniqueEventId" + ] + }, + { + "index": 2, + "event": "gtm.load", + "has_ecommerce": false, + "params": [ + "event", + "gtm.uniqueEventId" + ] + }, + { + "index": 3, + "event": "gtm.scrollDepth", + "has_ecommerce": false, + "params": [ + "event", + "gtm.scrollThreshold", + "gtm.scrollUnits", + "gtm.scrollDirection", + "gtm.triggers", + "gtm.uniqueEventId" + ] + } + ], + "validation_issues": [], + "sequence_issues": [] + }, + "form_analysis": { + "forms_found": [ + { + "index": 0, + "id": "form", + "name": "form", + "action": "https://josunhotel.com/resve/dining/step0.do", + "method": "get", + "className": null, + "fieldCount": 2, + "fields": [ + { + "type": "hidden", + "name": "searchSysCode", + "id": "selectedSysCode", + "required": false, + "placeholder": null, + "validation": null, + "maxLength": null + }, + { + "type": "hidden", + "name": "diningCode", + "id": "selectedDiningCode", + "required": false, + "placeholder": null, + "validation": null, + "maxLength": null + } + ], + "hasSubmitButton": false + }, + { + "index": 1, + "id": "loginForm", + "name": "loginForm", + "action": "https://josunhotel.com/resve/dining/step0.do", + "method": "get", + "className": null, + "fieldCount": 1, + "fields": [ + { + "type": "hidden", + "name": "nextURL", + "id": "nextURL", + "required": false, + "placeholder": null, + "validation": null, + "maxLength": null + } + ], + "hasSubmitButton": false + }, + { + "index": 2, + "id": "diningMenuPopForm", + "name": "diningMenuPopForm", + "action": "https://josunhotel.com/resve/dining/step0.do", + "method": "get", + "className": null, + "fieldCount": 4, + "fields": [ + { + "type": "hidden", + "name": "lclasCode", + "id": "lclasCode", + "required": false, + "placeholder": null, + "validation": null, + "maxLength": null + }, + { + "type": "hidden", + "name": "searchSysCode", + "id": "menuPopSysCode", + "required": false, + "placeholder": null, + "validation": null, + "maxLength": null + }, + { + "type": "hidden", + "name": "diningCode", + "id": "menuPopDiningCode", + "required": false, + "placeholder": null, + "validation": null, + "maxLength": null + }, + { + "type": "hidden", + "name": "diningNm", + "id": "popDiningNm", + "required": false, + "placeholder": null, + "validation": null, + "maxLength": null + } + ], + "hasSubmitButton": false + } + ], + "tracking_issues": [ + { + "type": "form_no_submit", + "form_index": 0, + "message": "Form has no submit button - may use JS submission", + "recommendation": "Verify form submission triggers dataLayer push" + }, + { + "type": "form_no_submit", + "form_index": 1, + "message": "Form has no submit button - may use JS submission", + "recommendation": "Verify form submission triggers dataLayer push" + }, + { + "type": "form_no_submit", + "form_index": 2, + "message": "Form has no submit button - may use JS submission", + "recommendation": "Verify form submission triggers dataLayer push" + } + ], + "events_status": { + "found": [], + "missing": [ + "form_start", + "form_submit", + "generate_lead" + ], + "recommendation": "Consider implementing: form_start, form_submit, generate_lead" + }, + "interactions": [ + { + "action": "focus", + "field": "searchSysCode", + "timestamp": "2025-12-20T23:28:02.063963" + }, + { + "action": "blur", + "field": "searchSysCode", + "timestamp": "2025-12-20T23:28:02.277645" + }, + { + "action": "focus", + "field": "diningCode", + "timestamp": "2025-12-20T23:28:02.284360" + }, + { + "action": "blur", + "field": "diningCode", + "timestamp": "2025-12-20T23:28:02.496949" + } + ] + }, + "checkout_analysis": { + "elements_found": { + "cart": [], + "checkout": [], + "addToCart": [], + "quantity": [], + "removeItem": [], + "promoCode": [] + }, + "events_status": {}, + "flow_issues": [] + }, + "network_requests": [ + { + "destination": "Meta Pixel", + "url": "https://connect.facebook.net/en_US/fbevents.js", + "method": "GET", + "params": {}, + "timestamp": "2025-12-20T23:27:55.520388" + }, + { + "destination": "Meta Pixel", + "url": "https://connect.facebook.net/signals/config/538814301070436?v=2.9.248&r=stable&domain=josunhotel.com&hme=17590b9a2e1b26755cdc9ecb401f9f46bca979d3ccce95d786db0936167af731&ex_m=94%2C156%2C134%2C20%2C67%", + "method": "GET", + "params": { + "v": "2.9.248", + "r": "stable", + "domain": "josunhotel.com", + "hme": "17590b9a2e1b26755cdc9ecb401f9f46bca979d3ccce95d786db0936167af731", + "ex_m": "94,156,134,20,67,68,127,63,43,128,72,62,10,141,80,15,93,28,122,115,70,73,121,138,102,143,7,3,4,6,5,2,81,91,144,224,167,57,226,227,50,183,27,69,232,231,170,30,56,9,59,87,88,89,95,118,29,26,120,117,116,135,71,137,136,45,55,111,14,140,40,213,215,177,23,24,25,17,18,39,35,37,36,76,82,86,100,126,129,41,101,21,19,107,64,33,131,130,132,123,22,32,54,99,139,65,16,133,104,31,193,163,284,211,154,196,189,164,97,119,75,109,49,44,103,42,108,114,53,60,113,48,51,47,90,142,0,112,13,110,11,1,52,83,58,61,106,79,78,145,146,84,85,8,92,46,124,77,74,66,105,96,38,125,34,98,12,147" + }, + "timestamp": "2025-12-20T23:27:55.567831" + }, + { + "destination": "Meta Pixel", + "url": "https://www.facebook.com/tr/?id=538814301070436&ev=PageView&dl=https%3A%2F%2Fjosunhotel.com%2Fresve%2Fdining%2Fstep0.do&rl=&if=false&ts=1766240875594&sw=1920&sh=1080&v=2.9.248&r=stable&ec=0&o=4126&fbp", + "method": "GET", + "params": { + "id": "538814301070436", + "ev": "PageView", + "dl": "https://josunhotel.com/resve/dining/step0.do", + "if": "false", + "ts": "1766240875594", + "sw": "1920", + "sh": "1080", + "v": "2.9.248", + "r": "stable", + "ec": "0", + "o": "4126", + "fbp": "fb.1.1766240875591.938184183721303390", + "cs_est": "true", + "ler": "empty", + "plt": "403.89999985694885", + "it": "1766240875565", + "coo": "false", + "expv2[0]": "pl0", + "expv2[1]": "el2", + "expv2[2]": "bc1", + "expv2[3]": "mr2", + "expv2[4]": "im1", + "rqm": "GET" + }, + "timestamp": "2025-12-20T23:27:55.597631" + }, + { + "destination": "GA4", + "url": "https://analytics.google.com/g/collect?v=2&tid=G-H05VHCL3NE>m=45je5ca1v899119784z8836519946za20gzb836519946zd836519946&_p=1766240875171&_gaz=1&gcd=13l3l3l3l1l1&npa=0&dma=0&cid=702909924.1766240876&u", + "method": "POST", + "params": { + "v": "2", + "tid": "G-H05VHCL3NE", + "gtm": "45je5ca1v899119784z8836519946za20gzb836519946zd836519946", + "_p": "1766240875171", + "_gaz": "1", + "gcd": "13l3l3l3l1l1", + "npa": "0", + "dma": "0", + "cid": "702909924.1766240876", + "ul": "en-us", + "sr": "1920x1080", + "uaa": "x64", + "uab": "64", + "uafvl": "Chromium;136.0.7103.25|HeadlessChrome;136.0.7103.25|Not.A%2FBrand;99.0.0.0", + "uamb": "0", + "uap": "macOS", + "uapv": "10_15_7", + "uaw": "0", + "are": "1", + "frm": "0", + "pscdl": "noapi", + "_s": "1", + "tag_exp": "103116026~103200004~104527906~104528501~104684208~104684211~105391253~115583767~115938466~115938469~116184927~116184929~116251938~116251940", + "dp": "/resve/dining/step0.do", + "sid": "1766240875", + "sct": "1", + "seg": "0", + "dl": "https://josunhotel.com/resve/dining/step0.do", + "en": "page_view", + "_fv": "1", + "_nsi": "1", + "_ss": "1", + "ep.login_status": "비로그인", + "ep.user_lang": "ko", + "ep.contents_group_hotel": "조선호텔앤리조트", + "ep.contents_group_menu": "다이닝 이용", + "ep.contents_group_update": "다이닝 예약 프로세스", + "up.user_lang": "ko", + "up.login_status": "비로그인", + "tfd": "816" + }, + "timestamp": "2025-12-20T23:27:55.700854" + }, + { + "destination": "Google Ads", + "url": "https://googleads.g.doubleclick.net/pagead/viewthroughconversion/10876516957/?random=1766240875706&cv=11&fst=1766240875706&bg=ffffff&guid=ON&async=1&en=gtag.config>m=45be5ca1v887509085z8836519946za2", + "method": "GET", + "params": { + "random": "1766240875706", + "cv": "11", + "fst": "1766240875706", + "bg": "ffffff", + "guid": "ON", + "async": "1", + "en": "gtag.config", + "gtm": "45be5ca1v887509085z8836519946za20gzb836519946zd836519946xec", + "gcd": "13l3l3l3l1l1", + "dma": "0", + "tag_exp": "103116026~103200004~104527906~104528501~104684208~104684211~105391253~115583767~115616986~115938466~115938469~116184927~116184929~116251938~116251940", + "u_w": "1920", + "u_h": "1080", + "url": "https://josunhotel.com/resve/dining/step0.do", + "frm": "0", + "hn": "www.googleadservices.com", + "npa": "0", + "pscdl": "noapi", + "auid": "7778758.1766240876", + "uaa": "x64", + "uab": "64", + "uafvl": "Chromium;136.0.7103.25|HeadlessChrome;136.0.7103.25|Not.A%2FBrand;99.0.0.0", + "uamb": "0", + "uap": "macOS", + "uapv": "10_15_7", + "uaw": "0", + "data": "event=gtag.config", + "rfmt": "3", + "fmt": "4" + }, + "timestamp": "2025-12-20T23:27:55.724178" + }, + { + "destination": "Google Ads", + "url": "https://googleads.g.doubleclick.net/pagead/viewthroughconversion/10876516957/?random=1766240875722&cv=11&fst=1766240875722&bg=ffffff&guid=ON&async=1>m=45be5ca1v887509085z8836519946za20gzb836519946zd", + "method": "GET", + "params": { + "random": "1766240875722", + "cv": "11", + "fst": "1766240875722", + "bg": "ffffff", + "guid": "ON", + "async": "1", + "gtm": "45be5ca1v887509085z8836519946za20gzb836519946zd836519946xea", + "gcd": "13l3l3l3l1l1", + "dma": "0", + "tag_exp": "103116026~103200004~104527906~104528501~104684208~104684211~105391253~115583767~115616986~115938466~115938469~116184927~116184929~116251938~116251940", + "u_w": "1920", + "u_h": "1080", + "url": "https://josunhotel.com/resve/dining/step0.do", + "frm": "0", + "hn": "www.googleadservices.com", + "npa": "0", + "pscdl": "noapi", + "auid": "7778758.1766240876", + "uaa": "x64", + "uab": "64", + "uafvl": "Chromium;136.0.7103.25|HeadlessChrome;136.0.7103.25|Not.A%2FBrand;99.0.0.0", + "uamb": "0", + "uap": "macOS", + "uapv": "10_15_7", + "uaw": "0", + "_tu": "CA", + "rfmt": "3", + "fmt": "4" + }, + "timestamp": "2025-12-20T23:27:55.724380" + }, + { + "destination": "GA4", + "url": "https://analytics.google.com/g/collect?v=2&tid=G-TRL4XSZ1TD>m=45je5ca1v887683366z8836519946za20gzb836519946zd836519946&_p=1766240875171&em=tv.1~em.3UiOxkIGtQ4owYdw7yAOBB9qepQm5hB4t5octJYCI7g&_gaz=1&", + "method": "POST", + "params": { + "v": "2", + "tid": "G-TRL4XSZ1TD", + "gtm": "45je5ca1v887683366z8836519946za20gzb836519946zd836519946", + "_p": "1766240875171", + "em": "tv.1~em.3UiOxkIGtQ4owYdw7yAOBB9qepQm5hB4t5octJYCI7g", + "_gaz": "1", + "gcd": "13l3l3l3l1l1", + "npa": "0", + "dma": "0", + "cid": "702909924.1766240876", + "ecid": "599478978", + "ul": "en-us", + "sr": "1920x1080", + "ir": "1", + "uaa": "x64", + "uab": "64", + "uafvl": "Chromium;136.0.7103.25|HeadlessChrome;136.0.7103.25|Not.A%2FBrand;99.0.0.0", + "uamb": "0", + "uap": "macOS", + "uapv": "10_15_7", + "uaw": "0", + "are": "1", + "frm": "0", + "pscdl": "noapi", + "ec_mode": "a", + "_eu": "EA", + "_s": "1", + "tag_exp": "102015665~103116026~103200004~104527906~104528501~104684208~104684211~105391253~115583767~115616986~115938466~115938469~116184927~116184929~116251938~116251940", + "dp": "/resve/dining/step0.do", + "sid": "1766240875", + "sct": "1", + "seg": "0", + "dl": "https://josunhotel.com/resve/dining/step0.do", + "en": "page_view", + "_fv": "1", + "_ss": "1", + "ep.login_status": "비로그인", + "ep.user_lang": "ko", + "ep.contents_group_hotel": "조선호텔앤리조트", + "ep.contents_group_menu": "다이닝 이용", + "ep.contents_group_update": "다이닝 예약 프로세스", + "up.user_lang": "ko", + "up.login_status": "비로그인", + "tfd": "868" + }, + "timestamp": "2025-12-20T23:27:55.742300" + }, + { + "destination": "Google Ads", + "url": "https://www.google.com/pagead/1p-user-list/10876516957/?random=1766240875722&cv=11&fst=1766239200000&bg=ffffff&guid=ON&async=1>m=45be5ca1v887509085z8836519946za20gzb836519946zd836519946xea&gcd=13l3l", + "method": "GET", + "params": { + "random": [ + "1766240875722", + "1725248437" + ], + "cv": "11", + "fst": "1766239200000", + "bg": "ffffff", + "guid": "ON", + "async": "1", + "gtm": "45be5ca1v887509085z8836519946za20gzb836519946zd836519946xea", + "gcd": "13l3l3l3l1l1", + "dma": "0", + "tag_exp": "103116026~103200004~104527906~104528501~104684208~104684211~105391253~115583767~115616986~115938466~115938469~116184927~116184929~116251938~116251940", + "u_w": "1920", + "u_h": "1080", + "url": "https://josunhotel.com/resve/dining/step0.do", + "frm": "0", + "hn": "www.googleadservices.com", + "npa": "0", + "pscdl": "noapi", + "auid": "7778758.1766240876", + "uaa": "x64", + "uab": "64", + "uafvl": "Chromium;136.0.7103.25|HeadlessChrome;136.0.7103.25|Not.A%2FBrand;99.0.0.0", + "uamb": "0", + "uap": "macOS", + "uapv": "10_15_7", + "uaw": "0", + "_tu": "CA", + "rfmt": "3", + "fmt": "3", + "is_vtc": "1", + "cid": "CAQSfwDCSxrROzP7e5XZEQS8k7I9UCNrhQnG5V-tD4ZKQa_7FtJSznqwNhHyDNj7BO_ZAQ0ZmiPlxWqmbyGxMvHSi61fZhkyzwsqEi7B9tTXs6sacAba88_-qgv_8Rgn4jQGuJL6VEJJrTfyPEzvb8DkpSBnvoX3GsuNT06ie6uSVWc", + "rmt_tld": "0", + "ipr": "y" + }, + "timestamp": "2025-12-20T23:27:55.988316" + }, + { + "destination": "Google Ads", + "url": "https://www.google.com/pagead/1p-user-list/10876516957/?random=1766240875706&cv=11&fst=1766239200000&bg=ffffff&guid=ON&async=1&en=gtag.config>m=45be5ca1v887509085z8836519946za20gzb836519946zd8365199", + "method": "GET", + "params": { + "random": [ + "1766240875706", + "47137349" + ], + "cv": "11", + "fst": "1766239200000", + "bg": "ffffff", + "guid": "ON", + "async": "1", + "en": "gtag.config", + "gtm": "45be5ca1v887509085z8836519946za20gzb836519946zd836519946xec", + "gcd": "13l3l3l3l1l1", + "dma": "0", + "tag_exp": "103116026~103200004~104527906~104528501~104684208~104684211~105391253~115583767~115616986~115938466~115938469~116184927~116184929~116251938~116251940", + "u_w": "1920", + "u_h": "1080", + "url": "https://josunhotel.com/resve/dining/step0.do", + "frm": "0", + "hn": "www.googleadservices.com", + "npa": "0", + "pscdl": "noapi", + "auid": "7778758.1766240876", + "uaa": "x64", + "uab": "64", + "uafvl": "Chromium;136.0.7103.25|HeadlessChrome;136.0.7103.25|Not.A%2FBrand;99.0.0.0", + "uamb": "0", + "uap": "macOS", + "uapv": "10_15_7", + "uaw": "0", + "data": "event=gtag.config", + "rfmt": "3", + "fmt": "3", + "is_vtc": "1", + "cid": "CAQSfwDCSxrRlXV2juBCOX2pHU5qySObf1Ig-AD_NFppjlwlkvd1cQpXbh1i_ciiVUQiaEEtKY-q8RxtPsLk-6nSWigi7Rpe09oM62SpgggyFvUyqtM2ShCyr_w5AaTOI3665_Bn3q954YNmzAevWsWo3YNoUYN6S0r2iZE7jyFuHWQ", + "rmt_tld": "0", + "ipr": "y" + }, + "timestamp": "2025-12-20T23:27:55.990657" + }, + { + "destination": "GA4", + "url": "https://analytics.google.com/g/collect?v=2&tid=G-H05VHCL3NE>m=45je5ca1v899119784za20gzb836519946zd836519946&_p=1766240875171&gcd=13l3l3l3l1l1&npa=0&dma=0&cid=702909924.1766240876&ul=en-us&sr=1920x10", + "method": "POST", + "params": { + "v": "2", + "tid": "G-H05VHCL3NE", + "gtm": "45je5ca1v899119784za20gzb836519946zd836519946", + "_p": "1766240875171", + "gcd": "13l3l3l3l1l1", + "npa": "0", + "dma": "0", + "cid": "702909924.1766240876", + "ul": "en-us", + "sr": "1920x1080", + "uaa": "x64", + "uab": "64", + "uafvl": "Chromium;136.0.7103.25|HeadlessChrome;136.0.7103.25|Not.A%2FBrand;99.0.0.0", + "uamb": "0", + "uap": "macOS", + "uapv": "10_15_7", + "uaw": "0", + "are": "1", + "frm": "0", + "pscdl": "noapi", + "_eu": "AEAAAAQ", + "_s": "2", + "tag_exp": "103116026~103200004~104527906~104528501~104684208~104684211~105391253~115583767~115938466~115938469~116184927~116184929~116251938~116251940", + "dp": "/resve/dining/step0.do", + "sid": "1766240875", + "sct": "1", + "seg": "0", + "dl": "https://josunhotel.com/resve/dining/step0.do", + "_tu": "CA", + "en": "scroll", + "ep.login_status": "비로그인", + "ep.user_lang": "ko", + "ep.contents_group_hotel": "조선호텔앤리조트", + "ep.contents_group_menu": "다이닝 이용", + "ep.contents_group_update": "다이닝 예약 프로세스", + "epn.percent_scrolled": "90", + "_et": "3343", + "tfd": "9162" + }, + "timestamp": "2025-12-20T23:28:04.037363" + }, + { + "destination": "GA4", + "url": "https://analytics.google.com/g/collect?v=2&tid=G-TRL4XSZ1TD>m=45je5ca1v887683366za20gzb836519946zd836519946&_p=1766240875171&gcd=13l3l3l3l1l1&npa=0&dma=0&cid=702909924.1766240876&ecid=599478978&ul=e", + "method": "POST", + "params": { + "v": "2", + "tid": "G-TRL4XSZ1TD", + "gtm": "45je5ca1v887683366za20gzb836519946zd836519946", + "_p": "1766240875171", + "gcd": "13l3l3l3l1l1", + "npa": "0", + "dma": "0", + "cid": "702909924.1766240876", + "ecid": "599478978", + "ul": "en-us", + "sr": "1920x1080", + "ir": "1", + "uaa": "x64", + "uab": "64", + "uafvl": "Chromium;136.0.7103.25|HeadlessChrome;136.0.7103.25|Not.A%2FBrand;99.0.0.0", + "uamb": "0", + "uap": "macOS", + "uapv": "10_15_7", + "uaw": "0", + "are": "1", + "frm": "0", + "pscdl": "noapi", + "_eu": "EEAAAAQ", + "_s": "2", + "tag_exp": "102015665~103116026~103200004~104527906~104528501~104684208~104684211~105391253~115583767~115616986~115938466~115938469~116184927~116184929~116251938~116251940", + "dp": "/resve/dining/step0.do", + "sid": "1766240875", + "sct": "1", + "seg": "0", + "dl": "https://josunhotel.com/resve/dining/step0.do", + "_tu": "CA", + "en": "scroll", + "ep.login_status": "비로그인", + "ep.user_lang": "ko", + "ep.contents_group_hotel": "조선호텔앤리조트", + "ep.contents_group_menu": "다이닝 이용", + "ep.contents_group_update": "다이닝 예약 프로세스", + "epn.percent_scrolled": "90", + "_et": "3298", + "tfd": "9166" + }, + "timestamp": "2025-12-20T23:28:04.040013" + } + ], + "tags_fired": [ + "GA4", + "Google Ads", + "Meta Pixel" + ], + "issues": [ + { + "severity": "warning", + "type": "datalayer_order", + "message": "dataLayer should be initialized before GTM script" + } + ], + "recommendations": [ + { + "priority": "medium", + "action": "Initialize dataLayer before GTM", + "details": "Add 'window.dataLayer = window.dataLayer || [];' before GTM" + }, + { + "priority": "medium", + "action": "Implement form tracking events", + "details": "Missing events: form_start, form_submit, generate_lead" + } + ], + "checklist": { + "container_health": { + "gtm_installed": true, + "correct_container": true, + "no_duplicates": true, + "correct_position": true, + "datalayer_init_order": false + }, + "datalayer_quality": { + "initialized": true, + "events_present": true, + "no_validation_errors": true, + "correct_sequence": true + }, + "form_tracking": { + "forms_identifiable": true, + "form_events_present": false + }, + "tag_firing": { + "ga4_active": true, + "requests_captured": true + } + } +} \ No newline at end of file diff --git a/ourdigital-custom-skills/10-ourdigital-visual-storytelling/README.md b/ourdigital-custom-skills/30-ourdigital-designer/README.md similarity index 100% rename from ourdigital-custom-skills/10-ourdigital-visual-storytelling/README.md rename to ourdigital-custom-skills/30-ourdigital-designer/README.md diff --git a/ourdigital-custom-skills/30-ourdigital-designer/code/CLAUDE.md b/ourdigital-custom-skills/30-ourdigital-designer/code/CLAUDE.md new file mode 100644 index 0000000..a9a59e4 --- /dev/null +++ b/ourdigital-custom-skills/30-ourdigital-designer/code/CLAUDE.md @@ -0,0 +1,74 @@ +# CLAUDE.md + +## Overview + +Visual storytelling toolkit for OurDigital blog featured images. Creates sophisticated image prompts that capture philosophical depth through minimalist visual metaphors. + +## Quick Start + +```bash +# Generate image prompt +python scripts/generate_prompt.py --topic "AI identity" --mood "contemplative" + +# Calibrate mood parameters +python scripts/mood_calibrator.py --input "essay excerpt" --style "minimalist" +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `generate_prompt.py` | Generate AI image prompts from essay concepts | +| `mood_calibrator.py` | Calibrate emotional parameters for consistency | + +## Prompt Generator + +```bash +# Basic generation +python scripts/generate_prompt.py --topic "AI identity crisis" + +# With mood and metaphor +python scripts/generate_prompt.py \ + --topic "AI identity crisis" \ + --mood "contemplative anxiety" \ + --metaphor "mirror fragments" + +# From essay text +python scripts/generate_prompt.py --input essay.txt --auto-extract +``` + +## Mood Calibrator + +```bash +# Analyze text for emotional tone +python scripts/mood_calibrator.py --input "essay text" + +# Adjust existing prompt +python scripts/mood_calibrator.py \ + --prompt "existing prompt" \ + --adjust-warmth 0.3 \ + --adjust-complexity -0.2 +``` + +## Output Format + +Generated prompts follow OurDigital style: +- 1200x630px dimensions (OG standard) +- Minimalist vector + subtle textures +- 60-30-10 color rule +- 20%+ negative space + +## References + +See `references/` for: +- `visual_metaphors.md` - Concept to visual mapping +- `color_palettes.md` - Emotion to color mapping +- `advanced_techniques.md` - Style guidelines + +## Visual Approaches by Essay Type + +| Type | Strategy | Colors | +|------|----------|--------| +| Technology | Organic-digital hybrids | Cool blues → warm accents | +| Social | Network patterns | Desaturated → hope spots | +| Philosophy | Zen space, symbols | Monochrome + single accent | diff --git a/ourdigital-custom-skills/10-ourdigital-visual-storytelling/references/advanced_techniques.md b/ourdigital-custom-skills/30-ourdigital-designer/code/references/advanced_techniques.md similarity index 100% rename from ourdigital-custom-skills/10-ourdigital-visual-storytelling/references/advanced_techniques.md rename to ourdigital-custom-skills/30-ourdigital-designer/code/references/advanced_techniques.md diff --git a/ourdigital-custom-skills/10-ourdigital-visual-storytelling/references/color_palettes.md b/ourdigital-custom-skills/30-ourdigital-designer/code/references/color_palettes.md similarity index 100% rename from ourdigital-custom-skills/10-ourdigital-visual-storytelling/references/color_palettes.md rename to ourdigital-custom-skills/30-ourdigital-designer/code/references/color_palettes.md diff --git a/ourdigital-custom-skills/10-ourdigital-visual-storytelling/references/visual_metaphors.md b/ourdigital-custom-skills/30-ourdigital-designer/code/references/visual_metaphors.md similarity index 100% rename from ourdigital-custom-skills/10-ourdigital-visual-storytelling/references/visual_metaphors.md rename to ourdigital-custom-skills/30-ourdigital-designer/code/references/visual_metaphors.md diff --git a/ourdigital-custom-skills/10-ourdigital-visual-storytelling/scripts/generate_prompt.py b/ourdigital-custom-skills/30-ourdigital-designer/code/scripts/generate_prompt.py similarity index 100% rename from ourdigital-custom-skills/10-ourdigital-visual-storytelling/scripts/generate_prompt.py rename to ourdigital-custom-skills/30-ourdigital-designer/code/scripts/generate_prompt.py diff --git a/ourdigital-custom-skills/10-ourdigital-visual-storytelling/scripts/mood_calibrator.py b/ourdigital-custom-skills/30-ourdigital-designer/code/scripts/mood_calibrator.py similarity index 100% rename from ourdigital-custom-skills/10-ourdigital-visual-storytelling/scripts/mood_calibrator.py rename to ourdigital-custom-skills/30-ourdigital-designer/code/scripts/mood_calibrator.py diff --git a/ourdigital-custom-skills/30-ourdigital-designer/code/scripts/requirements.txt b/ourdigital-custom-skills/30-ourdigital-designer/code/scripts/requirements.txt new file mode 100644 index 0000000..6119913 --- /dev/null +++ b/ourdigital-custom-skills/30-ourdigital-designer/code/scripts/requirements.txt @@ -0,0 +1,5 @@ +# 30-ourdigital-designer dependencies +openai>=1.0.0 +python-dotenv>=1.0.0 +rich>=13.7.0 +typer>=0.9.0 diff --git a/ourdigital-custom-skills/10-ourdigital-visual-storytelling/SKILL.md b/ourdigital-custom-skills/30-ourdigital-designer/desktop/SKILL.md similarity index 100% rename from ourdigital-custom-skills/10-ourdigital-visual-storytelling/SKILL.md rename to ourdigital-custom-skills/30-ourdigital-designer/desktop/SKILL.md diff --git a/ourdigital-custom-skills/31-ourdigital-research/code/CLAUDE.md b/ourdigital-custom-skills/31-ourdigital-research/code/CLAUDE.md new file mode 100644 index 0000000..fc46b78 --- /dev/null +++ b/ourdigital-custom-skills/31-ourdigital-research/code/CLAUDE.md @@ -0,0 +1,64 @@ +# CLAUDE.md + +## Overview + +Research-to-publication workflow toolkit. Exports research to Ulysses for OurDigital blog publishing. + +## Quick Start + +```bash +# Export research to Ulysses +python scripts/export_to_ulysses.py --input research.md --group "Blog Drafts" +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `export_to_ulysses.py` | Export markdown to Ulysses app for editing/publishing | + +## Ulysses Export + +```bash +# Basic export +python scripts/export_to_ulysses.py --input research.md + +# With target group +python scripts/export_to_ulysses.py \ + --input research.md \ + --group "Blog Drafts" \ + --tags "research,draft" + +# From Notion export +python scripts/export_to_ulysses.py \ + --notion-export notion_export.zip \ + --group "From Notion" +``` + +## Workflow + +1. Complete research in Claude/Notion +2. Export to markdown format +3. Run export script to Ulysses +4. Edit and polish in Ulysses +5. Publish to Ghost/OurDigital + +## Output Targets + +- **blog.ourdigital.org** - Main blog +- **journal.ourdigital.org** - Long-form essays +- **ourstory.day** - Personal narratives + +## Ulysses Integration + +Requires: +- Ulysses app installed +- x-callback-url scheme enabled +- Target group created in Ulysses + +## References + +See `references/` for: +- `blog-style-guide.md` - OurDigital writing style +- `publishing-checklist.md` - Pre-publish checklist +- `ghost-api-config.md` - Ghost CMS integration diff --git a/ourdigital-custom-skills/11-ourdigital-research-publisher/assets/blog-template.md b/ourdigital-custom-skills/31-ourdigital-research/code/assets/blog-template.md similarity index 100% rename from ourdigital-custom-skills/11-ourdigital-research-publisher/assets/blog-template.md rename to ourdigital-custom-skills/31-ourdigital-research/code/assets/blog-template.md diff --git a/ourdigital-custom-skills/11-ourdigital-research-publisher/references/blog-style-guide.md b/ourdigital-custom-skills/31-ourdigital-research/code/references/blog-style-guide.md similarity index 100% rename from ourdigital-custom-skills/11-ourdigital-research-publisher/references/blog-style-guide.md rename to ourdigital-custom-skills/31-ourdigital-research/code/references/blog-style-guide.md diff --git a/ourdigital-custom-skills/11-ourdigital-research-publisher/references/ghost-api-config.md b/ourdigital-custom-skills/31-ourdigital-research/code/references/ghost-api-config.md similarity index 100% rename from ourdigital-custom-skills/11-ourdigital-research-publisher/references/ghost-api-config.md rename to ourdigital-custom-skills/31-ourdigital-research/code/references/ghost-api-config.md diff --git a/ourdigital-custom-skills/11-ourdigital-research-publisher/references/publishing-checklist.md b/ourdigital-custom-skills/31-ourdigital-research/code/references/publishing-checklist.md similarity index 100% rename from ourdigital-custom-skills/11-ourdigital-research-publisher/references/publishing-checklist.md rename to ourdigital-custom-skills/31-ourdigital-research/code/references/publishing-checklist.md diff --git a/ourdigital-custom-skills/11-ourdigital-research-publisher/references/research-frameworks.md b/ourdigital-custom-skills/31-ourdigital-research/code/references/research-frameworks.md similarity index 100% rename from ourdigital-custom-skills/11-ourdigital-research-publisher/references/research-frameworks.md rename to ourdigital-custom-skills/31-ourdigital-research/code/references/research-frameworks.md diff --git a/ourdigital-custom-skills/11-ourdigital-research-publisher/scripts/export_to_ulysses.py b/ourdigital-custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py similarity index 100% rename from ourdigital-custom-skills/11-ourdigital-research-publisher/scripts/export_to_ulysses.py rename to ourdigital-custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py diff --git a/ourdigital-custom-skills/31-ourdigital-research/code/scripts/requirements.txt b/ourdigital-custom-skills/31-ourdigital-research/code/scripts/requirements.txt new file mode 100644 index 0000000..e5d660f --- /dev/null +++ b/ourdigital-custom-skills/31-ourdigital-research/code/scripts/requirements.txt @@ -0,0 +1,3 @@ +# 31-ourdigital-research dependencies +markdown>=3.5.0 +python-dotenv>=1.0.0 diff --git a/ourdigital-custom-skills/11-ourdigital-research-publisher/SKILL.md b/ourdigital-custom-skills/31-ourdigital-research/desktop/SKILL.md similarity index 100% rename from ourdigital-custom-skills/11-ourdigital-research-publisher/SKILL.md rename to ourdigital-custom-skills/31-ourdigital-research/desktop/SKILL.md diff --git a/ourdigital-custom-skills/32-ourdigital-presentation/code/CLAUDE.md b/ourdigital-custom-skills/32-ourdigital-presentation/code/CLAUDE.md new file mode 100644 index 0000000..4ed15cd --- /dev/null +++ b/ourdigital-custom-skills/32-ourdigital-presentation/code/CLAUDE.md @@ -0,0 +1,82 @@ +# CLAUDE.md + +## Overview + +Notion-to-presentation workflow. Transforms research content into branded PowerPoint/Figma presentations. + +## Quick Start + +```bash +# Full automated workflow +python scripts/run_workflow.py --notion-url [URL] --output presentation.pptx + +# Step-by-step +python scripts/extract_notion.py [URL] > research.json +python scripts/synthesize_content.py research.json > synthesis.json +python scripts/apply_brand.py synthesis.json --output presentation.pptx +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `run_workflow.py` | Orchestrate full pipeline | +| `extract_notion.py` | Extract content from Notion pages | +| `synthesize_content.py` | Analyze and structure content | +| `apply_brand.py` | Apply corporate styling | + +## Workflow Pipeline + +``` +extract_notion.py → synthesize_content.py → apply_brand.py + ↓ ↓ ↓ + research.json synthesis.json presentation.pptx +``` + +## Extract Notion + +```bash +# Single page +python scripts/extract_notion.py notion://page/abc123 > research.json + +# Database query +python scripts/extract_notion.py notion://database/def456 --filter "Status=Done" +``` + +## Synthesize Content + +```bash +# Generate slide structure +python scripts/synthesize_content.py research.json > synthesis.json + +# With presentation type +python scripts/synthesize_content.py research.json \ + --type executive \ + --max-slides 10 +``` + +## Apply Brand + +```bash +# PowerPoint output +python scripts/apply_brand.py synthesis.json \ + --config assets/brand_config.json \ + --output presentation.pptx + +# Preview HTML +python scripts/apply_brand.py synthesis.json --preview +``` + +## Brand Configuration + +See `assets/brand_config.json` for: +- Logo placement +- Color scheme +- Font settings +- Slide templates + +## Output Formats + +- PowerPoint (.pptx) +- Figma (via API) +- HTML preview diff --git a/ourdigital-custom-skills/03-research-to-presentation/assets/brand_config.json b/ourdigital-custom-skills/32-ourdigital-presentation/code/assets/brand_config.json similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/assets/brand_config.json rename to ourdigital-custom-skills/32-ourdigital-presentation/code/assets/brand_config.json diff --git a/ourdigital-custom-skills/03-research-to-presentation/references/agenda_templates.md b/ourdigital-custom-skills/32-ourdigital-presentation/code/references/agenda_templates.md similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/references/agenda_templates.md rename to ourdigital-custom-skills/32-ourdigital-presentation/code/references/agenda_templates.md diff --git a/ourdigital-custom-skills/03-research-to-presentation/references/research_patterns.md b/ourdigital-custom-skills/32-ourdigital-presentation/code/references/research_patterns.md similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/references/research_patterns.md rename to ourdigital-custom-skills/32-ourdigital-presentation/code/references/research_patterns.md diff --git a/ourdigital-custom-skills/03-research-to-presentation/references/slide_layouts.md b/ourdigital-custom-skills/32-ourdigital-presentation/code/references/slide_layouts.md similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/references/slide_layouts.md rename to ourdigital-custom-skills/32-ourdigital-presentation/code/references/slide_layouts.md diff --git a/ourdigital-custom-skills/03-research-to-presentation/scripts/apply_brand.py b/ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/apply_brand.py similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/scripts/apply_brand.py rename to ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/apply_brand.py diff --git a/ourdigital-custom-skills/03-research-to-presentation/scripts/extract_notion.py b/ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/extract_notion.py similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/scripts/extract_notion.py rename to ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/extract_notion.py diff --git a/ourdigital-custom-skills/03-research-to-presentation/scripts/generate_pptx.js b/ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/generate_pptx.js similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/scripts/generate_pptx.js rename to ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/generate_pptx.js diff --git a/ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/requirements.txt b/ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/requirements.txt new file mode 100644 index 0000000..fdb16fa --- /dev/null +++ b/ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/requirements.txt @@ -0,0 +1,6 @@ +# 32-ourdigital-presentation dependencies +notion-client>=2.0.0 +python-pptx>=0.6.21 +jinja2>=3.1.0 +pyyaml>=6.0.0 +python-dotenv>=1.0.0 diff --git a/ourdigital-custom-skills/03-research-to-presentation/scripts/run_workflow.py b/ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/run_workflow.py similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/scripts/run_workflow.py rename to ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/run_workflow.py diff --git a/ourdigital-custom-skills/03-research-to-presentation/scripts/synthesize_content.py b/ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/synthesize_content.py similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/scripts/synthesize_content.py rename to ourdigital-custom-skills/32-ourdigital-presentation/code/scripts/synthesize_content.py diff --git a/ourdigital-custom-skills/03-research-to-presentation/SKILL.md b/ourdigital-custom-skills/32-ourdigital-presentation/desktop/SKILL.md similarity index 100% rename from ourdigital-custom-skills/03-research-to-presentation/SKILL.md rename to ourdigital-custom-skills/32-ourdigital-presentation/desktop/SKILL.md diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/README.md b/ourdigital-custom-skills/40-jamie-brand-editor/README.md similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/README.md rename to ourdigital-custom-skills/40-jamie-brand-editor/README.md diff --git a/ourdigital-custom-skills/40-jamie-brand-editor/code/CLAUDE.md b/ourdigital-custom-skills/40-jamie-brand-editor/code/CLAUDE.md new file mode 100644 index 0000000..5ff38f8 --- /dev/null +++ b/ourdigital-custom-skills/40-jamie-brand-editor/code/CLAUDE.md @@ -0,0 +1,69 @@ +# CLAUDE.md + +## Overview + +Jamie Clinic content generation toolkit. Creates branded content following Jamie's voice and Korean medical advertising regulations. + +## Quick Start + +```bash +# Check content compliance +python scripts/compliance_checker.py --input draft.md +``` + +## Scripts + +| Script | Purpose | +|--------|---------| +| `compliance_checker.py` | Validate content against medical ad regulations | + +## Compliance Checker + +```bash +# Check draft content +python scripts/compliance_checker.py --input draft.md + +# With detailed report +python scripts/compliance_checker.py --input draft.md --verbose --output report.json + +# Batch check +python scripts/compliance_checker.py --dir ./drafts --output compliance_report.json +``` + +## Compliance Rules + +Checks for Korean medical advertising regulations: +- No exaggerated claims +- No before/after comparison violations +- No guarantee language +- No competitor comparisons +- Proper disclosure requirements + +## Brand Voice + +Content must follow Jamie's voice: +- 90% 격식체 (~습니다/~입니다) +- "환자분" for medical contexts +- "자연스러운" as key descriptor +- No exaggeration, realistic expectations + +## Content Types + +- Blog posts (블로그 포스팅) +- Procedure pages (시술 페이지) +- Ad copy (광고 카피) +- Social media (SNS 콘텐츠) + +## Workflow + +1. Generate content (use SKILL.md guidelines) +2. Run compliance checker +3. Fix flagged issues +4. Submit to `41-jamie-brand-audit` for final review + +## References + +See `desktop/` for: +- `brand_guidelines/` - Voice and tone guide +- `regulations/` - Medical ad law summary +- `procedures_dataset/` - Procedure information diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/docs/PLAN.md b/ourdigital-custom-skills/40-jamie-brand-editor/code/docs/PLAN.md similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/docs/PLAN.md rename to ourdigital-custom-skills/40-jamie-brand-editor/code/docs/PLAN.md diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/scripts/compliance_checker.py b/ourdigital-custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/scripts/compliance_checker.py rename to ourdigital-custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py diff --git a/ourdigital-custom-skills/40-jamie-brand-editor/code/scripts/requirements.txt b/ourdigital-custom-skills/40-jamie-brand-editor/code/scripts/requirements.txt new file mode 100644 index 0000000..74b525c --- /dev/null +++ b/ourdigital-custom-skills/40-jamie-brand-editor/code/scripts/requirements.txt @@ -0,0 +1,4 @@ +# 40-jamie-brand-editor dependencies +python-dotenv>=1.0.0 +rich>=13.7.0 +typer>=0.9.0 diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/SKILL.md b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/SKILL.md similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/SKILL.md rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/SKILL.md diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/brand_guidelines/brand_voice_guide_korean.md b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/brand_guidelines/brand_voice_guide_korean.md similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/brand_guidelines/brand_voice_guide_korean.md rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/brand_guidelines/brand_voice_guide_korean.md diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/SMAS-lifting.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/SMAS-lifting.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/SMAS-lifting.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/SMAS-lifting.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/dual-canthoplasty.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/dual-canthoplasty.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/dual-canthoplasty.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/dual-canthoplasty.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/endoscopic-brow-lift.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/endoscopic-brow-lift.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/endoscopic-brow-lift.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/endoscopic-brow-lift.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/endoscopic-forhead-lift.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/endoscopic-forhead-lift.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/endoscopic-forhead-lift.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/endoscopic-forhead-lift.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/epicanthoplasty-faq.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/epicanthoplasty-faq.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/epicanthoplasty-faq.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/epicanthoplasty-faq.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/hybrid-double-eyelid-faq.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/hybrid-double-eyelid-faq.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/hybrid-double-eyelid-faq.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/hybrid-double-eyelid-faq.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/hybrid-double‑eyelid-surgery.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/hybrid-double‑eyelid-surgery.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/hybrid-double‑eyelid-surgery.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/hybrid-double‑eyelid-surgery.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/lower-eyelid-fat-repositioning-faq.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/lower-eyelid-fat-repositioning-faq.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/lower-eyelid-fat-repositioning-faq.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/lower-eyelid-fat-repositioning-faq.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/ptosis-correction-faq.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/ptosis-correction-faq.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/ptosis-correction-faq.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/ptosis-correction-faq.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/ptosis‑correction-eyelid-surgery.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/ptosis‑correction-eyelid-surgery.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/ptosis‑correction-eyelid-surgery.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/ptosis‑correction-eyelid-surgery.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/quick-burial-double-eyelid-faq.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/quick-burial-double-eyelid-faq.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/quick-burial-double-eyelid-faq.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/quick-burial-double-eyelid-faq.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/quick-buried-double‑eyelid-technique.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/quick-buried-double‑eyelid-technique.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/quick-buried-double‑eyelid-technique.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/quick-buried-double‑eyelid-technique.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/revision-eye-surgery.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/revision-eye-surgery.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/revision-eye-surgery.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/revision-eye-surgery.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/sub-brow-excision-faq.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/sub-brow-excision-faq.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/sub-brow-excision-faq.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/sub-brow-excision-faq.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/sub‑brow-skin-excision.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/sub‑brow-skin-excision.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/sub‑brow-skin-excision.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/sub‑brow-skin-excision.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/under‑eye-fat-repositioning.json b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/under‑eye-fat-repositioning.json similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/procedures_schema_dataset/under‑eye-fat-repositioning.json rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/procedures_dataset/under‑eye-fat-repositioning.json diff --git a/ourdigital-custom-skills/20-jamie-brand-editor/regulations/medical_advertising_law_summary_korean.md b/ourdigital-custom-skills/40-jamie-brand-editor/desktop/regulations/medical_advertising_law_summary_korean.md similarity index 100% rename from ourdigital-custom-skills/20-jamie-brand-editor/regulations/medical_advertising_law_summary_korean.md rename to ourdigital-custom-skills/40-jamie-brand-editor/desktop/regulations/medical_advertising_law_summary_korean.md diff --git a/ourdigital-custom-skills/41-jamie-brand-audit/code/CLAUDE.md b/ourdigital-custom-skills/41-jamie-brand-audit/code/CLAUDE.md new file mode 100644 index 0000000..85e606d --- /dev/null +++ b/ourdigital-custom-skills/41-jamie-brand-audit/code/CLAUDE.md @@ -0,0 +1,71 @@ +# CLAUDE.md + +## Overview + +Jamie Clinic brand compliance auditor. Reviews existing content for brand alignment, tone/voice, and regulatory compliance. + +> **Note**: This is a guidance-only skill. No scripts needed - uses desktop/references for review criteria. + +## Purpose + +Review and evaluate EXISTING content (not generate new content). + +For content generation, use `40-jamie-brand-editor`. + +## Audit Process + +1. **Receive content** to review +2. **Check brand compliance** against guidelines in `desktop/guides/` +3. **Verify tone/voice** alignment +4. **Flag regulatory issues** (medical advertising laws) +5. **Provide feedback** with specific corrections + +## Reference Materials + +Located in `desktop/`: + +### Brand Guidelines +- `guides/jamie_brand_guide_v2.8_extended.md` - Full brand guide +- `guides/jamie_tone_manner_guide_v1.0.md` - Tone and voice +- `guides/jamie_brand_blog_style_guide.md` - Blog-specific style + +### Visual Standards +- `design/jamie_logo_guidelines.md` - Logo usage +- `design/jamie_brand_color_typography_extension_v2.3.md` - Colors/fonts + +### Procedure Knowledge +- `fact-sheets/procedures/` - Accurate procedure information + +### Templates +- `templates/markdown/review-report-template.md` - Review output format +- `templates/markdown/blog-post-template.md` - Correct blog structure + +## Review Checklist + +1. **Voice & Tone** + - 90% 격식체 ratio + - Correct honorifics (환자분/고객님) + - Jamie personality traits + +2. **Brand Alignment** + - Slogan consistency + - Core values reflected + - No competitor mentions + +3. **Regulatory Compliance** + - No exaggerated claims + - No guarantee language + - Proper disclosures + +4. **Technical Accuracy** + - Procedure facts correct + - Medical terms accurate + - Recovery times realistic + +## Output + +Provide structured feedback: +- Overall compliance score +- Specific issues with line references +- Suggested corrections +- Approval/revision recommendation diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/SKILL.md b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/SKILL.md similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/SKILL.md rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/SKILL.md diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/design/ Jamie-Clinic-plaque.png b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/ Jamie-Clinic-plaque.png similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/design/ Jamie-Clinic-plaque.png rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/ Jamie-Clinic-plaque.png diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/design/Jamie-Clinic-Logo-Square-500x500-dark.jpg b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/Jamie-Clinic-Logo-Square-500x500-dark.jpg similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/design/Jamie-Clinic-Logo-Square-500x500-dark.jpg rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/Jamie-Clinic-Logo-Square-500x500-dark.jpg diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/design/Jamie-Clinic-Logo-Square-500x500.png b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/Jamie-Clinic-Logo-Square-500x500.png similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/design/Jamie-Clinic-Logo-Square-500x500.png rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/Jamie-Clinic-Logo-Square-500x500.png diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_brand_color_typography_extension_v2.3.md b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_brand_color_typography_extension_v2.3.md similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_brand_color_typography_extension_v2.3.md rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_brand_color_typography_extension_v2.3.md diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_clinic_square_dark.jpg b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_clinic_square_dark.jpg similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_clinic_square_dark.jpg rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_clinic_square_dark.jpg diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_clinic_square_white.jpg b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_clinic_square_white.jpg similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_clinic_square_white.jpg rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_clinic_square_white.jpg diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_logo_f_j.webp b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_logo_f_j.webp similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_logo_f_j.webp rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_logo_f_j.webp diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_logo_guidelines.html b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_logo_guidelines.html similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_logo_guidelines.html rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_logo_guidelines.html diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_logo_guidelines.md b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_logo_guidelines.md similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/design/jamie_logo_guidelines.md rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/design/jamie_logo_guidelines.md diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/cheek_lifting.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/cheek_lifting.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/cheek_lifting.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/cheek_lifting.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/dual_epicanthoplasty.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/dual_epicanthoplasty.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/dual_epicanthoplasty.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/dual_epicanthoplasty.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/endoscopic_brow_lift.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/endoscopic_brow_lift.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/endoscopic_brow_lift.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/endoscopic_brow_lift.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/endoscopic_brow_lift_or.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/endoscopic_brow_lift_or.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/endoscopic_brow_lift_or.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/endoscopic_brow_lift_or.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/endoscopic_eyebrow_lift.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/endoscopic_eyebrow_lift.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/endoscopic_eyebrow_lift.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/endoscopic_eyebrow_lift.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/eye_revision.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/eye_revision.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/eye_revision.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/eye_revision.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/eye_surgery.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/eye_surgery.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/eye_surgery.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/eye_surgery.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/fat_grafting.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/fat_grafting.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/fat_grafting.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/fat_grafting.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/forehead_surgery.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/forehead_surgery.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/forehead_surgery.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/forehead_surgery.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/greeting.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/greeting.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/greeting.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/greeting.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/hifu_lifting.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/hifu_lifting.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/hifu_lifting.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/hifu_lifting.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/hybrid_double_eyelid.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/hybrid_double_eyelid.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/hybrid_double_eyelid.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/hybrid_double_eyelid.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/lower_eyelid_fat_repositioning.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/lower_eyelid_fat_repositioning.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/lower_eyelid_fat_repositioning.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/lower_eyelid_fat_repositioning.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/ptosis_correction.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/ptosis_correction.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/ptosis_correction.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/ptosis_correction.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/quick_suture.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/quick_suture.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/quick_suture.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/quick_suture.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/smas_lifting.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/smas_lifting.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/smas_lifting.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/smas_lifting.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/subbrow_excision.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/subbrow_excision.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/subbrow_excision.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/subbrow_excision.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/youthful_procedures.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/youthful_procedures.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/youthful_procedures.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/youthful_procedures.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/youthful_surgery.txt b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/youthful_surgery.txt similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/fact-sheets/procedures/youthful_surgery.txt rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/fact-sheets/procedures/youthful_surgery.txt diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/guides/jamie_brand_blog_style_guide.md b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/guides/jamie_brand_blog_style_guide.md similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/guides/jamie_brand_blog_style_guide.md rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/guides/jamie_brand_blog_style_guide.md diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/guides/jamie_brand_guide_v2.8_extended.md b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/guides/jamie_brand_guide_v2.8_extended.md similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/guides/jamie_brand_guide_v2.8_extended.md rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/guides/jamie_brand_guide_v2.8_extended.md diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/guides/jamie_tone_manner_analysis_framework.md b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/guides/jamie_tone_manner_analysis_framework.md similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/guides/jamie_tone_manner_analysis_framework.md rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/guides/jamie_tone_manner_analysis_framework.md diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/guides/jamie_tone_manner_guide_v1.0.md b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/guides/jamie_tone_manner_guide_v1.0.md similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/guides/jamie_tone_manner_guide_v1.0.md rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/guides/jamie_tone_manner_guide_v1.0.md diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/templates/html/presentation-template.html b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/html/presentation-template.html similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/templates/html/presentation-template.html rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/html/presentation-template.html diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/templates/html/report-template.html b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/html/report-template.html similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/templates/html/report-template.html rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/html/report-template.html diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/templates/html/review-result-template.html b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/html/review-result-template.html similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/templates/html/review-result-template.html rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/html/review-result-template.html diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/templates/markdown/blog-post-template.md b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/markdown/blog-post-template.md similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/templates/markdown/blog-post-template.md rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/markdown/blog-post-template.md diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/templates/markdown/review-report-template.md b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/markdown/review-report-template.md similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/templates/markdown/review-report-template.md rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/markdown/review-report-template.md diff --git a/ourdigital-custom-skills/21-jamie-brand-guardian/templates/styles/jamie-brand.css b/ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/styles/jamie-brand.css similarity index 100% rename from ourdigital-custom-skills/21-jamie-brand-guardian/templates/styles/jamie-brand.css rename to ourdigital-custom-skills/41-jamie-brand-audit/desktop/templates/styles/jamie-brand.css diff --git a/ourdigital-custom-skills/AUDIT_REPORT.md b/ourdigital-custom-skills/AUDIT_REPORT.md new file mode 100644 index 0000000..997aae4 --- /dev/null +++ b/ourdigital-custom-skills/AUDIT_REPORT.md @@ -0,0 +1,119 @@ +# Skills Audit Report + +Generated: 2024-12-21 + +## Summary + +| Status | Count | +|--------|-------| +| Complete (code + desktop) | 8 | +| Partial (missing CLAUDE.md or scripts) | 9 | +| Empty (placeholder only) | 1 | + +--- + +## Detailed Audit by Skill + +### 01-09: General Automation + +| # | Skill | code/CLAUDE.md | code/scripts | code/requirements.txt | desktop/SKILL.md | Status | +|---|-------|----------------|--------------|----------------------|------------------|--------| +| 01 | notion-organizer | **MISSING** | async_organizer.py, schema_migrator.py | YES | YES | Partial | +| 02 | notion-data-migration | **MISSING** | **EMPTY** | **MISSING** | **MISSING** | Empty | + +### 10-19: SEO Tools + +| # | Skill | code/CLAUDE.md | code/scripts | code/requirements.txt | desktop/SKILL.md | Status | +|---|-------|----------------|--------------|----------------------|------------------|--------| +| 10 | seo-technical-audit | YES | robots_checker, sitemap_validator, sitemap_crawler, page_analyzer, base_client | YES | YES | **Complete** | +| 11 | seo-on-page-audit | YES | page_analyzer, base_client | YES | YES | **Complete** | +| 12 | seo-local-audit | YES | **EMPTY** (new skill) | **MISSING** | YES | Partial | +| 13 | seo-schema-validator | YES | schema_validator, base_client | YES | YES | **Complete** | +| 14 | seo-schema-generator | YES | schema_generator, base_client + templates/ | YES | YES | **Complete** | +| 15 | seo-core-web-vitals | YES | pagespeed_client, base_client | YES | YES | **Complete** | +| 16 | seo-search-console | YES | gsc_client, base_client | YES | YES | **Complete** | +| 17 | seo-gateway-architect | **MISSING** | keyword_analyzer.py | YES | YES | Partial | +| 18 | seo-gateway-builder | **MISSING** | generate_pages.py | **MISSING** | YES | Partial | + +### 20-29: GTM/GA Tools + +| # | Skill | code/CLAUDE.md | code/scripts | code/requirements.txt | desktop/SKILL.md | Status | +|---|-------|----------------|--------------|----------------------|------------------|--------| +| 20 | gtm-audit | YES | gtm_audit.py | YES | YES | **Complete** | +| 21 | gtm-manager | YES | gtm_manager.py + docs/ | YES | YES | **Complete** | + +### 30-39: OurDigital Channel + +| # | Skill | code/CLAUDE.md | code/scripts | code/requirements.txt | desktop/SKILL.md | Status | +|---|-------|----------------|--------------|----------------------|------------------|--------| +| 30 | ourdigital-designer | **MISSING** | generate_prompt.py, mood_calibrator.py | **MISSING** | YES | Partial | +| 31 | ourdigital-research | **MISSING** | export_to_ulysses.py | **MISSING** | YES | Partial | +| 32 | ourdigital-presentation | **MISSING** | apply_brand.py, extract_notion.py, run_workflow.py, synthesize_content.py | **MISSING** | YES | Partial | + +### 40-49: Jamie Clinic + +| # | Skill | code/CLAUDE.md | code/scripts | code/requirements.txt | desktop/SKILL.md | Status | +|---|-------|----------------|--------------|----------------------|------------------|--------| +| 40 | jamie-brand-editor | **MISSING** | compliance_checker.py | **MISSING** | YES | Partial | +| 41 | jamie-brand-audit | **MISSING** | **EMPTY** | **MISSING** | YES | Partial | + +--- + +## Issues to Fix + +### Priority 1: Missing CLAUDE.md (Claude Code directive) + +| Skill | Has Scripts | Action | +|-------|-------------|--------| +| 01-notion-organizer | YES | Create CLAUDE.md | +| 17-seo-gateway-architect | YES | Create CLAUDE.md | +| 18-seo-gateway-builder | YES | Create CLAUDE.md | +| 30-ourdigital-designer | YES | Create CLAUDE.md | +| 31-ourdigital-research | YES | Create CLAUDE.md | +| 32-ourdigital-presentation | YES | Create CLAUDE.md | +| 40-jamie-brand-editor | YES | Create CLAUDE.md | +| 41-jamie-brand-audit | NO | Create CLAUDE.md (guidance only) | + +### Priority 2: Missing requirements.txt + +| Skill | Scripts Present | Action | +|-------|-----------------|--------| +| 12-seo-local-audit | NO | Skip (no scripts) | +| 18-seo-gateway-builder | YES | Create requirements.txt | +| 30-ourdigital-designer | YES | Create requirements.txt | +| 31-ourdigital-research | YES | Create requirements.txt | +| 32-ourdigital-presentation | YES | Create requirements.txt | +| 40-jamie-brand-editor | YES | Create requirements.txt | + +### Priority 3: Empty/Placeholder Skills + +| Skill | Action | +|-------|--------| +| 02-notion-data-migration | Decide: implement or remove | +| 12-seo-local-audit | Implement scripts or make guidance-only | +| 41-jamie-brand-audit | Already guidance-only (has references, no scripts needed) | + +--- + +## Complete Skills (Ready to Use) + +These skills have all required components: + +1. **10-seo-technical-audit** - Robots.txt, sitemap validation +2. **11-seo-on-page-audit** - Page meta tags, headings +3. **13-seo-schema-validator** - Structured data validation +4. **14-seo-schema-generator** - Schema markup generation +5. **15-seo-core-web-vitals** - PageSpeed Insights +6. **16-seo-search-console** - GSC data retrieval +7. **20-gtm-audit** - GTM container audit +8. **21-gtm-manager** - GTM management + injection + +--- + +## Recommendations + +1. **Create missing CLAUDE.md files** for skills with existing scripts (8 files needed) +2. **Create missing requirements.txt** for skills with scripts (5 files needed) +3. **12-seo-local-audit**: Keep as guidance-only skill (no scripts needed - uses MCP tools) +4. **41-jamie-brand-audit**: Keep as guidance-only (uses desktop/references for review criteria) +5. **02-notion-data-migration**: Either implement or remove from directory diff --git a/ourdigital-custom-skills/REFACTORING_PLAN.md b/ourdigital-custom-skills/REFACTORING_PLAN.md new file mode 100644 index 0000000..e65ebf7 --- /dev/null +++ b/ourdigital-custom-skills/REFACTORING_PLAN.md @@ -0,0 +1,386 @@ +# Skills Refactoring Plan + +## Guiding Principles + +1. **One thing done well** - Each skill focuses on a single, well-defined capability +2. **Directives under 1,500 words** - Concise, actionable (SKILL.md / CLAUDE.md) +3. **Dual-platform support** - Separate subdirectories for Claude Desktop and Claude Code +4. **Self-contained** - Each platform version is fully independent (no shared resources) +5. **Code-first development** - Build Claude Code version first, then refactor to Desktop +6. **Progressive numbering** - Logical grouping by domain + +--- + +## Dual-Platform Skill Structure + +Each skill has two independent versions: + +``` +XX-skill-name/ +│ +├── code/ # Claude Code version +│ ├── CLAUDE.md # Main directive +│ ├── scripts/ # Executable Python/Bash +│ │ ├── main_script.py +│ │ └── requirements.txt +│ ├── references/ # Documentation +│ └── templates/ # Output templates +│ +├── desktop/ # Claude Desktop version +│ ├── SKILL.md # Main directive (YAML frontmatter) +│ ├── references/ # Guidance docs (no scripts) +│ ├── templates/ # Output templates +│ └── examples/ # Usage examples +│ +└── README.md # Overview (optional) +``` + +### Platform Differences + +| Aspect | Claude Code (`code/`) | Claude Desktop (`desktop/`) | +|--------|----------------------|----------------------------| +| Directive | `CLAUDE.md` | `SKILL.md` (YAML frontmatter) | +| Execution | Direct Bash/Python | MCP tools only | +| Scripts | Full automation | Reference/guidance only | +| Install | Clone + pip install | Add to Project context | +| Focus | Action-oriented | Conversational guidance | + +### Development Workflow + +1. **Build Claude Code version first** - Full automation with scripts +2. **Refactor to Desktop** - Extract guidance, remove script dependencies +3. **Desktop focuses on MCP** - Use Firecrawl, Perplexity, Notion MCP tools + +--- + +## Directory Structure Overview + +| Range | Domain | Purpose | +|-------|--------|---------| +| 01-09 | General Automation | Notion, data pipelines, reporting | +| 10-19 | SEO Tools | Decomposed from seo-audit-agent | +| 20-29 | GTM/GA Tools | Tag management, analytics | +| 30-39 | OurDigital Channel | Branding, content, design | +| 40-49 | Jamie Clinic | Brand-specific tools | + +--- + +## Phase 1: Self-Contained Skills (No Shared Directory) + +### Design Decision: Duplicate Utilities Per Skill + +Each skill includes its own copy of required utilities: +- `base_client.py` - Copied to each skill needing API rate limiting +- `notion_client.py` - Copied to skills that export to Notion + +**Trade-offs**: +- (+) Each skill is fully independent and portable +- (+) No import path complexity +- (+) Skills can diverge if needed +- (-) Code duplication across skills +- (-) Updates need to be applied to multiple copies + +**Pattern**: Keep utility files minimal (~100-200 LOC) to reduce duplication cost. + +--- + +## Phase 2: SEO Tools Decomposition (10-19) + +Source: `seo-audit-agent/scripts/` (6,049 LOC → ~600 LOC per skill) + +### 10-seo-technical-audit +**Focus**: Crawlability and indexing fundamentals +- `robots_checker.py` - Robots.txt parsing and validation +- `sitemap_validator.py` - XML sitemap structure validation +- `sitemap_crawler.py` - Async URL accessibility checking + +**Triggers**: "check crawlability", "robots.txt", "sitemap validation" + +### 11-seo-on-page-audit +**Focus**: Single-page optimization analysis +- `page_analyzer.py` - Meta tags, headings, links, OG tags + +**Triggers**: "on-page SEO", "meta tags", "heading structure" + +### 12-seo-local-audit +**Focus**: Local business SEO assessment (standalone, NOT merged with schema) +- NAP (Name, Address, Phone) consistency analysis +- Google Business Profile optimization checklist +- Local citations audit +- LocalBusiness schema integration (imports from 13-seo-schema-validator) +- Review management guidelines + +**Triggers**: "local SEO", "Google Business Profile", "NAP", "citations", "local rankings" + +### 13-seo-schema-validator +**Focus**: Structured data extraction and validation +- `schema_validator.py` - JSON-LD, Microdata, RDFa parsing +- Rich Results compatibility checking + +**Triggers**: "validate schema", "structured data check" + +### 14-seo-schema-generator +**Focus**: Schema markup creation +- `schema_generator.py` - Template-based generation +- Types: Organization, Article, FAQ, Product, LocalBusiness, Breadcrumb, WebSite + +**Triggers**: "generate schema", "create JSON-LD", "add structured data" + +### 15-seo-core-web-vitals *(rename from schema-optimizer)* +**Focus**: Performance metrics only +- `pagespeed_client.py` - LCP, FID, CLS, INP, TTFB, FCP + +**Triggers**: "Core Web Vitals", "page speed", "LCP/CLS/FID" + +### 16-seo-search-console *(rename from search-intent)* +**Focus**: GSC data retrieval and analysis +- `gsc_client.py` - Rankings, CTR, impressions, sitemap status + +**Triggers**: "Search Console", "GSC data", "search performance" + +### 17-seo-gateway-architect *(keep as-is)* +**Focus**: Gateway page strategy planning + +### 18-seo-gateway-builder *(keep as-is)* +**Focus**: Gateway page content generation + +### 19-seo-audit-orchestrator *(optional)* +**Focus**: Coordinate multiple SEO tools for full audit +- Lightweight orchestrator that calls other skills +- Report aggregation and Notion export + +**Decision**: May not be needed if Claude can chain skills naturally + +--- + +## Phase 3: Fix Misplaced Directories + +### Current Issues + +| Directory | Issue | Action | +|-----------|-------|--------| +| 01-notion-organizer/02-notion-organizer/ | Nested structure | Flatten | +| 04-research-to-presentation | Wrong range | Move to 32 | +| 21-gmt-manager | Typo (gmt→gtm) | Rename | + +### Corrections + +```bash +# Fix nested structure +mv 01-notion-organizer/02-notion-organizer/* 01-notion-organizer/ +rmdir 01-notion-organizer/02-notion-organizer + +# Move to correct range +mv 04-research-to-presentation 32-ourdigital-presentation + +# Fix typo +mv 21-gmt-manager 21-gtm-manager +``` + +--- + +## Phase 4: Final Directory Structure + +``` +ourdigital-custom-skills/ +│ +├── 01-notion-organizer/ +│ ├── code/ # Claude Code version +│ └── desktop/ # Claude Desktop version +│ +├── 02-notion-data-migration/ +│ ├── code/ +│ └── desktop/ +│ +├── 10-seo-technical-audit/ +│ ├── code/ +│ └── desktop/ +│ +├── 11-seo-on-page-audit/ +│ ├── code/ +│ └── desktop/ +│ +├── 12-seo-local-audit/ +│ ├── code/ +│ └── desktop/ +│ +├── 13-seo-schema-validator/ +│ ├── code/ +│ └── desktop/ +│ +├── 14-seo-schema-generator/ +│ ├── code/ +│ └── desktop/ +│ +├── 15-seo-core-web-vitals/ +│ ├── code/ +│ └── desktop/ +│ +├── 16-seo-search-console/ +│ ├── code/ +│ └── desktop/ +│ +├── 17-seo-gateway-architect/ +│ ├── code/ +│ └── desktop/ +│ +├── 18-seo-gateway-builder/ +│ ├── code/ +│ └── desktop/ +│ +├── 20-gtm-audit/ +│ ├── code/ +│ └── desktop/ +│ +├── 21-gtm-manager/ +│ ├── code/ +│ └── desktop/ +│ +├── 30-ourdigital-designer/ +│ ├── code/ +│ └── desktop/ +│ +├── 31-ourdigital-research/ +│ ├── code/ +│ └── desktop/ +│ +├── 32-ourdigital-presentation/ +│ ├── code/ +│ └── desktop/ +│ +├── 40-jamie-brand-editor/ +│ ├── code/ +│ └── desktop/ +│ +├── 41-jamie-brand-audit/ +│ ├── code/ +│ └── desktop/ +│ +└── _archive/ # Archived after decomposition + └── seo-audit-agent/ +``` + +**Key Points**: +- Each skill has `code/` and `desktop/` subdirectories +- No sharing between platforms - fully independent +- Build `code/` first, then refactor to `desktop/` + +--- + +## Phase 5: Directive Templates (Under 1,500 Words Each) + +### Claude Desktop: `desktop/SKILL.md` + +```yaml +--- +name: skill-name +version: 1.0.0 +description: One-sentence description. Triggers: keyword1, keyword2. +allowed-tools: mcp__firecrawl__*, mcp__perplexity__*, mcp__notion__* +--- + +# Skill Name + +## Purpose +[2-3 sentences max] + +## Core Capability +[Single focused capability - what this skill does] + +## MCP Tool Usage +[Which MCP tools to use and how] + +## Workflow +[Step-by-step guidance for the task] + +## Output Format +[Expected deliverable format] + +## Limitations +[1-3 bullet points] +``` + +### Claude Code: `code/CLAUDE.md` + +```markdown +# CLAUDE.md + +## Overview +[1-2 sentences - what this skill does] + +## Quick Start +\`\`\`bash +python scripts/main.py --url https://example.com +\`\`\` + +## Scripts +| Script | Purpose | Usage | +|--------|---------|-------| +| main.py | Primary function | `python scripts/main.py [args]` | + +## Configuration +[Environment variables, credentials, API keys] + +## Output +[What the script produces] +``` + +--- + +## Implementation Order + +### Batch 1: Directory Cleanup +- [ ] Fix nested 01-notion-organizer structure +- [ ] Rename 21-gmt-manager → 21-gtm-manager +- [ ] Move 04-research-to-presentation → 32-ourdigital-presentation + +### Batch 2: SEO Decomposition +- [ ] 10-seo-technical-audit (robots + sitemap) +- [ ] 11-seo-on-page-audit (page analyzer) +- [ ] 13-seo-schema-validator +- [ ] 14-seo-schema-generator +- [ ] 15-seo-core-web-vitals (pagespeed) +- [ ] 16-seo-search-console (gsc) + +### Batch 3: Finalization +- [ ] Archive seo-audit-agent → _archive/ +- [ ] Update root CLAUDE.md with new structure +- [ ] Verify each skill works independently + +--- + +## Script Distribution Matrix + +| Script | Target Skill | LOC | +|--------|--------------|-----| +| base_client.py | 00-shared | 400 | +| robots_checker.py | 10-seo-technical-audit | 350 | +| sitemap_validator.py | 10-seo-technical-audit | 450 | +| sitemap_crawler.py | 10-seo-technical-audit | 400 | +| page_analyzer.py | 11-seo-on-page-audit | 650 | +| schema_validator.py | 13-seo-schema-validator | 600 | +| schema_generator.py | 14-seo-schema-generator | 600 | +| pagespeed_client.py | 15-seo-core-web-vitals | 500 | +| gsc_client.py | 16-seo-search-console | 400 | +| notion_reporter.py | 00-shared (base) | 900 | +| full_audit.py | ARCHIVE (orchestrator) | 800 | + +--- + +## Decisions Made + +| Question | Decision | +|----------|----------| +| Shared 00-shared/ directory? | **No** - Each skill self-contained with copied utilities | +| 12-seo-local-audit scope? | **Keep separate** - Focus on NAP/GBP/citations (broader than just schema) | +| 03-notion-reporter? | **Not needed** - Notion export is per-skill | +| 19-seo-audit-orchestrator? | **Skip** - Let Claude chain skills naturally | + +--- + +## Next Steps + +Ready to proceed? I recommend: +1. Start with **Batch 1** (infrastructure + fixes) +2. Then **Batch 2** (SEO decomposition - largest effort) +3. Validate each skill works independently before archiving seo-audit-agent diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/CLAUDE.md b/ourdigital-custom-skills/_archive/seo-audit-agent/CLAUDE.md similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/CLAUDE.md rename to ourdigital-custom-skills/_archive/seo-audit-agent/CLAUDE.md diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/SKILL.md b/ourdigital-custom-skills/_archive/seo-audit-agent/SKILL.md similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/SKILL.md rename to ourdigital-custom-skills/_archive/seo-audit-agent/SKILL.md diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/USER_GUIDE.md b/ourdigital-custom-skills/_archive/seo-audit-agent/USER_GUIDE.md similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/USER_GUIDE.md rename to ourdigital-custom-skills/_archive/seo-audit-agent/USER_GUIDE.md diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/desktop-skill-refer/QUICK_REFERENCE.md b/ourdigital-custom-skills/_archive/seo-audit-agent/desktop-skill-refer/QUICK_REFERENCE.md similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/desktop-skill-refer/QUICK_REFERENCE.md rename to ourdigital-custom-skills/_archive/seo-audit-agent/desktop-skill-refer/QUICK_REFERENCE.md diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/desktop-skill-refer/SEO_AUDIT_KNOWLEDGE.md b/ourdigital-custom-skills/_archive/seo-audit-agent/desktop-skill-refer/SEO_AUDIT_KNOWLEDGE.md similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/desktop-skill-refer/SEO_AUDIT_KNOWLEDGE.md rename to ourdigital-custom-skills/_archive/seo-audit-agent/desktop-skill-refer/SEO_AUDIT_KNOWLEDGE.md diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/desktop-skill-refer/SETUP_GUIDE.md b/ourdigital-custom-skills/_archive/seo-audit-agent/desktop-skill-refer/SETUP_GUIDE.md similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/desktop-skill-refer/SETUP_GUIDE.md rename to ourdigital-custom-skills/_archive/seo-audit-agent/desktop-skill-refer/SETUP_GUIDE.md diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/examples.md b/ourdigital-custom-skills/_archive/seo-audit-agent/examples.md similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/examples.md rename to ourdigital-custom-skills/_archive/seo-audit-agent/examples.md diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/reference.md b/ourdigital-custom-skills/_archive/seo-audit-agent/reference.md similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/reference.md rename to ourdigital-custom-skills/_archive/seo-audit-agent/reference.md diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/base_client.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/base_client.py new file mode 100644 index 0000000..ac5715b --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/base_client.py @@ -0,0 +1,207 @@ +""" +Base Client - Shared async client utilities +=========================================== +Purpose: Rate-limited async operations for API clients +Python: 3.10+ +""" + +import asyncio +import logging +import os +from asyncio import Semaphore +from datetime import datetime +from typing import Any, Callable, TypeVar + +from dotenv import load_dotenv +from tenacity import ( + retry, + stop_after_attempt, + wait_exponential, + retry_if_exception_type, +) + +# Load environment variables +load_dotenv() + +# Logging setup +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) + +T = TypeVar("T") + + +class RateLimiter: + """Rate limiter using token bucket algorithm.""" + + def __init__(self, rate: float, per: float = 1.0): + """ + Initialize rate limiter. + + Args: + rate: Number of requests allowed + per: Time period in seconds (default: 1 second) + """ + self.rate = rate + self.per = per + self.tokens = rate + self.last_update = datetime.now() + self._lock = asyncio.Lock() + + async def acquire(self) -> None: + """Acquire a token, waiting if necessary.""" + async with self._lock: + now = datetime.now() + elapsed = (now - self.last_update).total_seconds() + self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per)) + self.last_update = now + + if self.tokens < 1: + wait_time = (1 - self.tokens) * (self.per / self.rate) + await asyncio.sleep(wait_time) + self.tokens = 0 + else: + self.tokens -= 1 + + +class BaseAsyncClient: + """Base class for async API clients with rate limiting.""" + + def __init__( + self, + max_concurrent: int = 5, + requests_per_second: float = 3.0, + logger: logging.Logger | None = None, + ): + """ + Initialize base client. + + Args: + max_concurrent: Maximum concurrent requests + requests_per_second: Rate limit + logger: Logger instance + """ + self.semaphore = Semaphore(max_concurrent) + self.rate_limiter = RateLimiter(requests_per_second) + self.logger = logger or logging.getLogger(self.__class__.__name__) + self.stats = { + "requests": 0, + "success": 0, + "errors": 0, + "retries": 0, + } + + @retry( + stop=stop_after_attempt(3), + wait=wait_exponential(multiplier=1, min=2, max=10), + retry=retry_if_exception_type(Exception), + ) + async def _rate_limited_request( + self, + coro: Callable[[], Any], + ) -> Any: + """Execute a request with rate limiting and retry.""" + async with self.semaphore: + await self.rate_limiter.acquire() + self.stats["requests"] += 1 + try: + result = await coro() + self.stats["success"] += 1 + return result + except Exception as e: + self.stats["errors"] += 1 + self.logger.error(f"Request failed: {e}") + raise + + async def batch_requests( + self, + requests: list[Callable[[], Any]], + desc: str = "Processing", + ) -> list[Any]: + """Execute multiple requests concurrently.""" + try: + from tqdm.asyncio import tqdm + has_tqdm = True + except ImportError: + has_tqdm = False + + async def execute(req: Callable) -> Any: + try: + return await self._rate_limited_request(req) + except Exception as e: + return {"error": str(e)} + + tasks = [execute(req) for req in requests] + + if has_tqdm: + results = [] + for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc): + result = await coro + results.append(result) + return results + else: + return await asyncio.gather(*tasks, return_exceptions=True) + + def print_stats(self) -> None: + """Print request statistics.""" + self.logger.info("=" * 40) + self.logger.info("Request Statistics:") + self.logger.info(f" Total Requests: {self.stats['requests']}") + self.logger.info(f" Successful: {self.stats['success']}") + self.logger.info(f" Errors: {self.stats['errors']}") + self.logger.info("=" * 40) + + +class ConfigManager: + """Manage API configuration and credentials.""" + + def __init__(self): + load_dotenv() + + @property + def google_credentials_path(self) -> str | None: + """Get Google service account credentials path.""" + # Prefer SEO-specific credentials, fallback to general credentials + seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json") + if os.path.exists(seo_creds): + return seo_creds + return os.getenv("GOOGLE_APPLICATION_CREDENTIALS") + + @property + def pagespeed_api_key(self) -> str | None: + """Get PageSpeed Insights API key.""" + return os.getenv("PAGESPEED_API_KEY") + + @property + def custom_search_api_key(self) -> str | None: + """Get Custom Search API key.""" + return os.getenv("CUSTOM_SEARCH_API_KEY") + + @property + def custom_search_engine_id(self) -> str | None: + """Get Custom Search Engine ID.""" + return os.getenv("CUSTOM_SEARCH_ENGINE_ID") + + @property + def notion_token(self) -> str | None: + """Get Notion API token.""" + return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY") + + def validate_google_credentials(self) -> bool: + """Validate Google credentials are configured.""" + creds_path = self.google_credentials_path + if not creds_path: + return False + return os.path.exists(creds_path) + + def get_required(self, key: str) -> str: + """Get required environment variable or raise error.""" + value = os.getenv(key) + if not value: + raise ValueError(f"Missing required environment variable: {key}") + return value + + +# Singleton config instance +config = ConfigManager() diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/full_audit.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/full_audit.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/full_audit.py rename to ourdigital-custom-skills/_archive/seo-audit-agent/scripts/full_audit.py diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/gsc_client.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/gsc_client.py new file mode 100644 index 0000000..c203b2b --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/gsc_client.py @@ -0,0 +1,409 @@ +""" +Google Search Console Client +============================ +Purpose: Interact with Google Search Console API for SEO data +Python: 3.10+ +Usage: + from gsc_client import SearchConsoleClient + client = SearchConsoleClient() + data = client.get_search_analytics("sc-domain:example.com") +""" + +import logging +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from typing import Any + +from google.oauth2 import service_account +from googleapiclient.discovery import build + +from base_client import config + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +@dataclass +class SearchAnalyticsResult: + """Search analytics query result.""" + + rows: list[dict] = field(default_factory=list) + total_clicks: int = 0 + total_impressions: int = 0 + average_ctr: float = 0.0 + average_position: float = 0.0 + + +@dataclass +class SitemapInfo: + """Sitemap information from Search Console.""" + + path: str + last_submitted: str | None = None + last_downloaded: str | None = None + is_pending: bool = False + is_sitemaps_index: bool = False + warnings: int = 0 + errors: int = 0 + + +class SearchConsoleClient: + """Client for Google Search Console API.""" + + SCOPES = ["https://www.googleapis.com/auth/webmasters.readonly"] + + def __init__(self, credentials_path: str | None = None): + """ + Initialize Search Console client. + + Args: + credentials_path: Path to service account JSON key + """ + self.credentials_path = credentials_path or config.google_credentials_path + self._service = None + + @property + def service(self): + """Get or create Search Console service.""" + if self._service is None: + if not self.credentials_path: + raise ValueError( + "Google credentials not configured. " + "Set GOOGLE_APPLICATION_CREDENTIALS environment variable." + ) + + credentials = service_account.Credentials.from_service_account_file( + self.credentials_path, + scopes=self.SCOPES, + ) + self._service = build("searchconsole", "v1", credentials=credentials) + + return self._service + + def list_sites(self) -> list[dict]: + """List all sites accessible to the service account.""" + response = self.service.sites().list().execute() + return response.get("siteEntry", []) + + def get_search_analytics( + self, + site_url: str, + start_date: str | None = None, + end_date: str | None = None, + dimensions: list[str] | None = None, + row_limit: int = 25000, + filters: list[dict] | None = None, + ) -> SearchAnalyticsResult: + """ + Get search analytics data. + + Args: + site_url: Site URL (e.g., "sc-domain:example.com" or "https://example.com/") + start_date: Start date (YYYY-MM-DD), defaults to 30 days ago + end_date: End date (YYYY-MM-DD), defaults to yesterday + dimensions: List of dimensions (query, page, country, device, date) + row_limit: Maximum rows to return + filters: Dimension filters + + Returns: + SearchAnalyticsResult with rows and summary stats + """ + # Default date range: last 30 days + if not end_date: + end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") + if not start_date: + start_date = (datetime.now() - timedelta(days=30)).strftime("%Y-%m-%d") + + # Default dimensions + if dimensions is None: + dimensions = ["query", "page"] + + request_body = { + "startDate": start_date, + "endDate": end_date, + "dimensions": dimensions, + "rowLimit": row_limit, + } + + if filters: + request_body["dimensionFilterGroups"] = [{"filters": filters}] + + try: + response = self.service.searchanalytics().query( + siteUrl=site_url, + body=request_body, + ).execute() + except Exception as e: + logger.error(f"Failed to query search analytics: {e}") + raise + + rows = response.get("rows", []) + + # Calculate totals + total_clicks = sum(row.get("clicks", 0) for row in rows) + total_impressions = sum(row.get("impressions", 0) for row in rows) + total_ctr = sum(row.get("ctr", 0) for row in rows) + total_position = sum(row.get("position", 0) for row in rows) + + avg_ctr = total_ctr / len(rows) if rows else 0 + avg_position = total_position / len(rows) if rows else 0 + + return SearchAnalyticsResult( + rows=rows, + total_clicks=total_clicks, + total_impressions=total_impressions, + average_ctr=avg_ctr, + average_position=avg_position, + ) + + def get_top_queries( + self, + site_url: str, + limit: int = 100, + start_date: str | None = None, + end_date: str | None = None, + ) -> list[dict]: + """Get top search queries by clicks.""" + result = self.get_search_analytics( + site_url=site_url, + dimensions=["query"], + row_limit=limit, + start_date=start_date, + end_date=end_date, + ) + + # Sort by clicks + sorted_rows = sorted( + result.rows, + key=lambda x: x.get("clicks", 0), + reverse=True, + ) + + return [ + { + "query": row["keys"][0], + "clicks": row.get("clicks", 0), + "impressions": row.get("impressions", 0), + "ctr": row.get("ctr", 0), + "position": row.get("position", 0), + } + for row in sorted_rows[:limit] + ] + + def get_top_pages( + self, + site_url: str, + limit: int = 100, + start_date: str | None = None, + end_date: str | None = None, + ) -> list[dict]: + """Get top pages by clicks.""" + result = self.get_search_analytics( + site_url=site_url, + dimensions=["page"], + row_limit=limit, + start_date=start_date, + end_date=end_date, + ) + + sorted_rows = sorted( + result.rows, + key=lambda x: x.get("clicks", 0), + reverse=True, + ) + + return [ + { + "page": row["keys"][0], + "clicks": row.get("clicks", 0), + "impressions": row.get("impressions", 0), + "ctr": row.get("ctr", 0), + "position": row.get("position", 0), + } + for row in sorted_rows[:limit] + ] + + def get_sitemaps(self, site_url: str) -> list[SitemapInfo]: + """Get list of sitemaps for a site.""" + try: + response = self.service.sitemaps().list(siteUrl=site_url).execute() + except Exception as e: + logger.error(f"Failed to get sitemaps: {e}") + raise + + sitemaps = [] + for sm in response.get("sitemap", []): + sitemaps.append(SitemapInfo( + path=sm.get("path", ""), + last_submitted=sm.get("lastSubmitted"), + last_downloaded=sm.get("lastDownloaded"), + is_pending=sm.get("isPending", False), + is_sitemaps_index=sm.get("isSitemapsIndex", False), + warnings=sm.get("warnings", 0), + errors=sm.get("errors", 0), + )) + + return sitemaps + + def submit_sitemap(self, site_url: str, sitemap_url: str) -> bool: + """Submit a sitemap for indexing.""" + try: + self.service.sitemaps().submit( + siteUrl=site_url, + feedpath=sitemap_url, + ).execute() + logger.info(f"Submitted sitemap: {sitemap_url}") + return True + except Exception as e: + logger.error(f"Failed to submit sitemap: {e}") + return False + + def inspect_url(self, site_url: str, inspection_url: str) -> dict: + """ + Inspect a URL's indexing status. + + Note: This uses the URL Inspection API which may have different quotas. + """ + try: + response = self.service.urlInspection().index().inspect( + body={ + "inspectionUrl": inspection_url, + "siteUrl": site_url, + } + ).execute() + + result = response.get("inspectionResult", {}) + + return { + "url": inspection_url, + "indexing_state": result.get("indexStatusResult", {}).get( + "coverageState", "Unknown" + ), + "last_crawl_time": result.get("indexStatusResult", {}).get( + "lastCrawlTime" + ), + "crawled_as": result.get("indexStatusResult", {}).get("crawledAs"), + "robots_txt_state": result.get("indexStatusResult", {}).get( + "robotsTxtState" + ), + "mobile_usability": result.get("mobileUsabilityResult", {}).get( + "verdict", "Unknown" + ), + "rich_results": result.get("richResultsResult", {}).get( + "verdict", "Unknown" + ), + } + except Exception as e: + logger.error(f"Failed to inspect URL: {e}") + raise + + def get_performance_summary( + self, + site_url: str, + days: int = 30, + ) -> dict: + """Get a summary of search performance.""" + end_date = (datetime.now() - timedelta(days=1)).strftime("%Y-%m-%d") + start_date = (datetime.now() - timedelta(days=days)).strftime("%Y-%m-%d") + + # Get overall stats + overall = self.get_search_analytics( + site_url=site_url, + dimensions=[], + start_date=start_date, + end_date=end_date, + ) + + # Get top queries + top_queries = self.get_top_queries( + site_url=site_url, + limit=10, + start_date=start_date, + end_date=end_date, + ) + + # Get top pages + top_pages = self.get_top_pages( + site_url=site_url, + limit=10, + start_date=start_date, + end_date=end_date, + ) + + # Get by device + by_device = self.get_search_analytics( + site_url=site_url, + dimensions=["device"], + start_date=start_date, + end_date=end_date, + ) + + device_breakdown = {} + for row in by_device.rows: + device = row["keys"][0] + device_breakdown[device] = { + "clicks": row.get("clicks", 0), + "impressions": row.get("impressions", 0), + "ctr": row.get("ctr", 0), + "position": row.get("position", 0), + } + + return { + "period": f"{start_date} to {end_date}", + "total_clicks": overall.total_clicks, + "total_impressions": overall.total_impressions, + "average_ctr": overall.average_ctr, + "average_position": overall.average_position, + "top_queries": top_queries, + "top_pages": top_pages, + "by_device": device_breakdown, + } + + +def main(): + """Test the Search Console client.""" + import argparse + + parser = argparse.ArgumentParser(description="Google Search Console Client") + parser.add_argument("--site", "-s", required=True, help="Site URL") + parser.add_argument("--action", "-a", default="summary", + choices=["summary", "queries", "pages", "sitemaps", "inspect"], + help="Action to perform") + parser.add_argument("--url", help="URL to inspect") + parser.add_argument("--days", type=int, default=30, help="Days of data") + + args = parser.parse_args() + + client = SearchConsoleClient() + + if args.action == "summary": + summary = client.get_performance_summary(args.site, args.days) + import json + print(json.dumps(summary, indent=2, default=str)) + + elif args.action == "queries": + queries = client.get_top_queries(args.site) + for q in queries[:20]: + print(f"{q['query']}: {q['clicks']} clicks, pos {q['position']:.1f}") + + elif args.action == "pages": + pages = client.get_top_pages(args.site) + for p in pages[:20]: + print(f"{p['page']}: {p['clicks']} clicks, pos {p['position']:.1f}") + + elif args.action == "sitemaps": + sitemaps = client.get_sitemaps(args.site) + for sm in sitemaps: + print(f"{sm.path}: errors={sm.errors}, warnings={sm.warnings}") + + elif args.action == "inspect" and args.url: + result = client.inspect_url(args.site, args.url) + import json + print(json.dumps(result, indent=2)) + + +if __name__ == "__main__": + main() diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/notion_reporter.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/notion_reporter.py similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/notion_reporter.py rename to ourdigital-custom-skills/_archive/seo-audit-agent/scripts/notion_reporter.py diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/page_analyzer.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/page_analyzer.py new file mode 100644 index 0000000..b662e81 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/page_analyzer.py @@ -0,0 +1,569 @@ +""" +Page Analyzer - Extract SEO metadata from web pages +=================================================== +Purpose: Comprehensive page-level SEO data extraction +Python: 3.10+ +Usage: + from page_analyzer import PageAnalyzer, PageMetadata + analyzer = PageAnalyzer() + metadata = analyzer.analyze_url("https://example.com/page") +""" + +import json +import logging +import re +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any +from urllib.parse import urljoin, urlparse + +import requests +from bs4 import BeautifulSoup + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +@dataclass +class LinkData: + """Represents a link found on a page.""" + url: str + anchor_text: str + is_internal: bool + is_nofollow: bool = False + link_type: str = "body" # body, nav, footer, etc. + + +@dataclass +class HeadingData: + """Represents a heading found on a page.""" + level: int # 1-6 + text: str + + +@dataclass +class SchemaData: + """Represents schema.org structured data.""" + schema_type: str + properties: dict + format: str = "json-ld" # json-ld, microdata, rdfa + + +@dataclass +class OpenGraphData: + """Represents Open Graph metadata.""" + og_title: str | None = None + og_description: str | None = None + og_image: str | None = None + og_url: str | None = None + og_type: str | None = None + og_site_name: str | None = None + og_locale: str | None = None + twitter_card: str | None = None + twitter_title: str | None = None + twitter_description: str | None = None + twitter_image: str | None = None + + +@dataclass +class PageMetadata: + """Complete SEO metadata for a page.""" + + # Basic info + url: str + status_code: int = 0 + content_type: str = "" + response_time_ms: float = 0 + analyzed_at: datetime = field(default_factory=datetime.now) + + # Meta tags + title: str | None = None + title_length: int = 0 + meta_description: str | None = None + meta_description_length: int = 0 + canonical_url: str | None = None + robots_meta: str | None = None + + # Language + html_lang: str | None = None + hreflang_tags: list[dict] = field(default_factory=list) # [{"lang": "en", "url": "..."}] + + # Headings + headings: list[HeadingData] = field(default_factory=list) + h1_count: int = 0 + h1_text: str | None = None + + # Open Graph & Social + open_graph: OpenGraphData = field(default_factory=OpenGraphData) + + # Schema/Structured Data + schema_data: list[SchemaData] = field(default_factory=list) + schema_types_found: list[str] = field(default_factory=list) + + # Links + internal_links: list[LinkData] = field(default_factory=list) + external_links: list[LinkData] = field(default_factory=list) + internal_link_count: int = 0 + external_link_count: int = 0 + + # Images + images_total: int = 0 + images_without_alt: int = 0 + images_with_alt: int = 0 + + # Content metrics + word_count: int = 0 + + # Issues found + issues: list[str] = field(default_factory=list) + warnings: list[str] = field(default_factory=list) + + def to_dict(self) -> dict: + """Convert to dictionary for JSON serialization.""" + return { + "url": self.url, + "status_code": self.status_code, + "content_type": self.content_type, + "response_time_ms": self.response_time_ms, + "analyzed_at": self.analyzed_at.isoformat(), + "title": self.title, + "title_length": self.title_length, + "meta_description": self.meta_description, + "meta_description_length": self.meta_description_length, + "canonical_url": self.canonical_url, + "robots_meta": self.robots_meta, + "html_lang": self.html_lang, + "hreflang_tags": self.hreflang_tags, + "h1_count": self.h1_count, + "h1_text": self.h1_text, + "headings_count": len(self.headings), + "schema_types_found": self.schema_types_found, + "internal_link_count": self.internal_link_count, + "external_link_count": self.external_link_count, + "images_total": self.images_total, + "images_without_alt": self.images_without_alt, + "word_count": self.word_count, + "issues": self.issues, + "warnings": self.warnings, + "open_graph": { + "og_title": self.open_graph.og_title, + "og_description": self.open_graph.og_description, + "og_image": self.open_graph.og_image, + "og_url": self.open_graph.og_url, + "og_type": self.open_graph.og_type, + }, + } + + def get_summary(self) -> str: + """Get a brief summary of the page analysis.""" + lines = [ + f"URL: {self.url}", + f"Status: {self.status_code}", + f"Title: {self.title[:50] + '...' if self.title and len(self.title) > 50 else self.title}", + f"Description: {'✓' if self.meta_description else '✗ Missing'}", + f"Canonical: {'✓' if self.canonical_url else '✗ Missing'}", + f"H1: {self.h1_count} found", + f"Schema: {', '.join(self.schema_types_found) if self.schema_types_found else 'None'}", + f"Links: {self.internal_link_count} internal, {self.external_link_count} external", + f"Images: {self.images_total} total, {self.images_without_alt} without alt", + ] + if self.issues: + lines.append(f"Issues: {len(self.issues)}") + return "\n".join(lines) + + +class PageAnalyzer: + """Analyze web pages for SEO metadata.""" + + DEFAULT_USER_AGENT = "Mozilla/5.0 (compatible; OurDigitalSEOBot/1.0; +https://ourdigital.org)" + + def __init__( + self, + user_agent: str | None = None, + timeout: int = 30, + ): + """ + Initialize page analyzer. + + Args: + user_agent: Custom user agent string + timeout: Request timeout in seconds + """ + self.user_agent = user_agent or self.DEFAULT_USER_AGENT + self.timeout = timeout + self.session = requests.Session() + self.session.headers.update({ + "User-Agent": self.user_agent, + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9,ko;q=0.8", + }) + + def analyze_url(self, url: str) -> PageMetadata: + """ + Analyze a URL and extract SEO metadata. + + Args: + url: URL to analyze + + Returns: + PageMetadata object with all extracted data + """ + metadata = PageMetadata(url=url) + + try: + # Fetch page + start_time = datetime.now() + response = self.session.get(url, timeout=self.timeout, allow_redirects=True) + metadata.response_time_ms = (datetime.now() - start_time).total_seconds() * 1000 + metadata.status_code = response.status_code + metadata.content_type = response.headers.get("Content-Type", "") + + if response.status_code != 200: + metadata.issues.append(f"HTTP {response.status_code} status") + if response.status_code >= 400: + return metadata + + # Parse HTML + soup = BeautifulSoup(response.text, "html.parser") + base_url = url + + # Extract all metadata + self._extract_basic_meta(soup, metadata) + self._extract_canonical(soup, metadata, base_url) + self._extract_robots_meta(soup, metadata) + self._extract_hreflang(soup, metadata) + self._extract_headings(soup, metadata) + self._extract_open_graph(soup, metadata) + self._extract_schema(soup, metadata) + self._extract_links(soup, metadata, base_url) + self._extract_images(soup, metadata) + self._extract_content_metrics(soup, metadata) + + # Run SEO checks + self._run_seo_checks(metadata) + + except requests.RequestException as e: + metadata.issues.append(f"Request failed: {str(e)}") + logger.error(f"Failed to analyze {url}: {e}") + except Exception as e: + metadata.issues.append(f"Analysis error: {str(e)}") + logger.error(f"Error analyzing {url}: {e}") + + return metadata + + def _extract_basic_meta(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract title and meta description.""" + # Title + title_tag = soup.find("title") + if title_tag and title_tag.string: + metadata.title = title_tag.string.strip() + metadata.title_length = len(metadata.title) + + # Meta description + desc_tag = soup.find("meta", attrs={"name": re.compile(r"^description$", re.I)}) + if desc_tag and desc_tag.get("content"): + metadata.meta_description = desc_tag["content"].strip() + metadata.meta_description_length = len(metadata.meta_description) + + # HTML lang + html_tag = soup.find("html") + if html_tag and html_tag.get("lang"): + metadata.html_lang = html_tag["lang"] + + def _extract_canonical(self, soup: BeautifulSoup, metadata: PageMetadata, base_url: str) -> None: + """Extract canonical URL.""" + canonical = soup.find("link", rel="canonical") + if canonical and canonical.get("href"): + metadata.canonical_url = urljoin(base_url, canonical["href"]) + + def _extract_robots_meta(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract robots meta tag.""" + robots = soup.find("meta", attrs={"name": re.compile(r"^robots$", re.I)}) + if robots and robots.get("content"): + metadata.robots_meta = robots["content"] + + # Also check for googlebot-specific + googlebot = soup.find("meta", attrs={"name": re.compile(r"^googlebot$", re.I)}) + if googlebot and googlebot.get("content"): + if metadata.robots_meta: + metadata.robots_meta += f" | googlebot: {googlebot['content']}" + else: + metadata.robots_meta = f"googlebot: {googlebot['content']}" + + def _extract_hreflang(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract hreflang tags.""" + hreflang_tags = soup.find_all("link", rel="alternate", hreflang=True) + for tag in hreflang_tags: + if tag.get("href") and tag.get("hreflang"): + metadata.hreflang_tags.append({ + "lang": tag["hreflang"], + "url": tag["href"] + }) + + def _extract_headings(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract all headings.""" + for level in range(1, 7): + for heading in soup.find_all(f"h{level}"): + text = heading.get_text(strip=True) + if text: + metadata.headings.append(HeadingData(level=level, text=text)) + + # Count H1s specifically + h1_tags = soup.find_all("h1") + metadata.h1_count = len(h1_tags) + if h1_tags: + metadata.h1_text = h1_tags[0].get_text(strip=True) + + def _extract_open_graph(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract Open Graph and Twitter Card data.""" + og = metadata.open_graph + + # Open Graph tags + og_mappings = { + "og:title": "og_title", + "og:description": "og_description", + "og:image": "og_image", + "og:url": "og_url", + "og:type": "og_type", + "og:site_name": "og_site_name", + "og:locale": "og_locale", + } + + for og_prop, attr_name in og_mappings.items(): + tag = soup.find("meta", property=og_prop) + if tag and tag.get("content"): + setattr(og, attr_name, tag["content"]) + + # Twitter Card tags + twitter_mappings = { + "twitter:card": "twitter_card", + "twitter:title": "twitter_title", + "twitter:description": "twitter_description", + "twitter:image": "twitter_image", + } + + for tw_name, attr_name in twitter_mappings.items(): + tag = soup.find("meta", attrs={"name": tw_name}) + if tag and tag.get("content"): + setattr(og, attr_name, tag["content"]) + + def _extract_schema(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract schema.org structured data.""" + # JSON-LD + for script in soup.find_all("script", type="application/ld+json"): + try: + data = json.loads(script.string) + if isinstance(data, list): + for item in data: + self._process_schema_item(item, metadata, "json-ld") + else: + self._process_schema_item(data, metadata, "json-ld") + except (json.JSONDecodeError, TypeError): + continue + + # Microdata (basic detection) + for item in soup.find_all(itemscope=True): + itemtype = item.get("itemtype", "") + if itemtype: + schema_type = itemtype.split("/")[-1] + if schema_type not in metadata.schema_types_found: + metadata.schema_types_found.append(schema_type) + metadata.schema_data.append(SchemaData( + schema_type=schema_type, + properties={}, + format="microdata" + )) + + def _process_schema_item(self, data: dict, metadata: PageMetadata, format_type: str) -> None: + """Process a single schema.org item.""" + if not isinstance(data, dict): + return + + schema_type = data.get("@type", "Unknown") + if isinstance(schema_type, list): + schema_type = schema_type[0] if schema_type else "Unknown" + + if schema_type not in metadata.schema_types_found: + metadata.schema_types_found.append(schema_type) + + metadata.schema_data.append(SchemaData( + schema_type=schema_type, + properties=data, + format=format_type + )) + + # Process nested @graph items + if "@graph" in data: + for item in data["@graph"]: + self._process_schema_item(item, metadata, format_type) + + def _extract_links(self, soup: BeautifulSoup, metadata: PageMetadata, base_url: str) -> None: + """Extract internal and external links.""" + parsed_base = urlparse(base_url) + base_domain = parsed_base.netloc.lower() + + for a_tag in soup.find_all("a", href=True): + href = a_tag["href"] + + # Skip non-http links + if href.startswith(("#", "javascript:", "mailto:", "tel:")): + continue + + # Resolve relative URLs + full_url = urljoin(base_url, href) + parsed_url = urlparse(full_url) + + # Get anchor text + anchor_text = a_tag.get_text(strip=True)[:100] # Limit length + + # Check if nofollow + rel = a_tag.get("rel", []) + if isinstance(rel, str): + rel = rel.split() + is_nofollow = "nofollow" in rel + + # Determine if internal or external + link_domain = parsed_url.netloc.lower() + is_internal = ( + link_domain == base_domain or + link_domain.endswith(f".{base_domain}") or + base_domain.endswith(f".{link_domain}") + ) + + link_data = LinkData( + url=full_url, + anchor_text=anchor_text, + is_internal=is_internal, + is_nofollow=is_nofollow, + ) + + if is_internal: + metadata.internal_links.append(link_data) + else: + metadata.external_links.append(link_data) + + metadata.internal_link_count = len(metadata.internal_links) + metadata.external_link_count = len(metadata.external_links) + + def _extract_images(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract image information.""" + images = soup.find_all("img") + metadata.images_total = len(images) + + for img in images: + alt = img.get("alt", "").strip() + if alt: + metadata.images_with_alt += 1 + else: + metadata.images_without_alt += 1 + + def _extract_content_metrics(self, soup: BeautifulSoup, metadata: PageMetadata) -> None: + """Extract content metrics like word count.""" + # Remove script and style elements + for element in soup(["script", "style", "noscript"]): + element.decompose() + + # Get text content + text = soup.get_text(separator=" ", strip=True) + words = text.split() + metadata.word_count = len(words) + + def _run_seo_checks(self, metadata: PageMetadata) -> None: + """Run SEO checks and add issues/warnings.""" + # Title checks + if not metadata.title: + metadata.issues.append("Missing title tag") + elif metadata.title_length < 30: + metadata.warnings.append(f"Title too short ({metadata.title_length} chars, recommend 50-60)") + elif metadata.title_length > 60: + metadata.warnings.append(f"Title too long ({metadata.title_length} chars, recommend 50-60)") + + # Meta description checks + if not metadata.meta_description: + metadata.issues.append("Missing meta description") + elif metadata.meta_description_length < 120: + metadata.warnings.append(f"Meta description too short ({metadata.meta_description_length} chars)") + elif metadata.meta_description_length > 160: + metadata.warnings.append(f"Meta description too long ({metadata.meta_description_length} chars)") + + # Canonical check + if not metadata.canonical_url: + metadata.warnings.append("Missing canonical tag") + elif metadata.canonical_url != metadata.url: + metadata.warnings.append(f"Canonical points to different URL: {metadata.canonical_url}") + + # H1 checks + if metadata.h1_count == 0: + metadata.issues.append("Missing H1 tag") + elif metadata.h1_count > 1: + metadata.warnings.append(f"Multiple H1 tags ({metadata.h1_count})") + + # Image alt check + if metadata.images_without_alt > 0: + metadata.warnings.append(f"{metadata.images_without_alt} images missing alt text") + + # Schema check + if not metadata.schema_types_found: + metadata.warnings.append("No structured data found") + + # Open Graph check + if not metadata.open_graph.og_title: + metadata.warnings.append("Missing Open Graph tags") + + # Robots meta check + if metadata.robots_meta: + robots_lower = metadata.robots_meta.lower() + if "noindex" in robots_lower: + metadata.issues.append("Page is set to noindex") + if "nofollow" in robots_lower: + metadata.warnings.append("Page is set to nofollow") + + +def main(): + """CLI entry point for testing.""" + import argparse + + parser = argparse.ArgumentParser(description="Page SEO Analyzer") + parser.add_argument("url", help="URL to analyze") + parser.add_argument("--json", "-j", action="store_true", help="Output as JSON") + + args = parser.parse_args() + + analyzer = PageAnalyzer() + metadata = analyzer.analyze_url(args.url) + + if args.json: + print(json.dumps(metadata.to_dict(), indent=2, ensure_ascii=False)) + else: + print("=" * 60) + print("PAGE ANALYSIS REPORT") + print("=" * 60) + print(metadata.get_summary()) + print() + + if metadata.issues: + print("ISSUES:") + for issue in metadata.issues: + print(f" ✗ {issue}") + + if metadata.warnings: + print("\nWARNINGS:") + for warning in metadata.warnings: + print(f" ⚠ {warning}") + + if metadata.hreflang_tags: + print(f"\nHREFLANG TAGS ({len(metadata.hreflang_tags)}):") + for tag in metadata.hreflang_tags[:5]: + print(f" {tag['lang']}: {tag['url']}") + + if metadata.schema_types_found: + print(f"\nSCHEMA TYPES:") + for schema_type in metadata.schema_types_found: + print(f" - {schema_type}") + + +if __name__ == "__main__": + main() diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/pagespeed_client.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/pagespeed_client.py new file mode 100644 index 0000000..c2c7493 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/pagespeed_client.py @@ -0,0 +1,452 @@ +""" +PageSpeed Insights Client +========================= +Purpose: Get Core Web Vitals and performance data from PageSpeed Insights API +Python: 3.10+ +Usage: + from pagespeed_client import PageSpeedClient + client = PageSpeedClient() + result = client.analyze("https://example.com") +""" + +import argparse +import json +import logging +from dataclasses import dataclass, field +from typing import Any + +import requests + +from base_client import config + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +@dataclass +class CoreWebVitals: + """Core Web Vitals metrics.""" + + lcp: float | None = None # Largest Contentful Paint (ms) + fid: float | None = None # First Input Delay (ms) + cls: float | None = None # Cumulative Layout Shift + inp: float | None = None # Interaction to Next Paint (ms) + ttfb: float | None = None # Time to First Byte (ms) + fcp: float | None = None # First Contentful Paint (ms) + + # Assessment (GOOD, NEEDS_IMPROVEMENT, POOR) + lcp_rating: str | None = None + fid_rating: str | None = None + cls_rating: str | None = None + inp_rating: str | None = None + + def to_dict(self) -> dict: + return { + "lcp": {"value": self.lcp, "rating": self.lcp_rating}, + "fid": {"value": self.fid, "rating": self.fid_rating}, + "cls": {"value": self.cls, "rating": self.cls_rating}, + "inp": {"value": self.inp, "rating": self.inp_rating}, + "ttfb": {"value": self.ttfb}, + "fcp": {"value": self.fcp}, + } + + +@dataclass +class PageSpeedResult: + """PageSpeed analysis result.""" + + url: str + strategy: str # mobile or desktop + performance_score: float | None = None + seo_score: float | None = None + accessibility_score: float | None = None + best_practices_score: float | None = None + core_web_vitals: CoreWebVitals = field(default_factory=CoreWebVitals) + opportunities: list[dict] = field(default_factory=list) + diagnostics: list[dict] = field(default_factory=list) + passed_audits: list[str] = field(default_factory=list) + raw_data: dict = field(default_factory=dict) + + def to_dict(self) -> dict: + return { + "url": self.url, + "strategy": self.strategy, + "scores": { + "performance": self.performance_score, + "seo": self.seo_score, + "accessibility": self.accessibility_score, + "best_practices": self.best_practices_score, + }, + "core_web_vitals": self.core_web_vitals.to_dict(), + "opportunities_count": len(self.opportunities), + "opportunities": self.opportunities[:10], + "diagnostics_count": len(self.diagnostics), + "passed_audits_count": len(self.passed_audits), + } + + +class PageSpeedClient: + """Client for PageSpeed Insights API.""" + + BASE_URL = "https://www.googleapis.com/pagespeedonline/v5/runPagespeed" + + # Core Web Vitals thresholds + THRESHOLDS = { + "lcp": {"good": 2500, "poor": 4000}, + "fid": {"good": 100, "poor": 300}, + "cls": {"good": 0.1, "poor": 0.25}, + "inp": {"good": 200, "poor": 500}, + "ttfb": {"good": 800, "poor": 1800}, + "fcp": {"good": 1800, "poor": 3000}, + } + + def __init__(self, api_key: str | None = None): + """ + Initialize PageSpeed client. + + Args: + api_key: PageSpeed API key (optional but recommended for higher quotas) + """ + self.api_key = api_key or config.pagespeed_api_key + self.session = requests.Session() + + def _rate_metric(self, metric: str, value: float | None) -> str | None: + """Rate a metric against thresholds.""" + if value is None: + return None + + thresholds = self.THRESHOLDS.get(metric) + if not thresholds: + return None + + if value <= thresholds["good"]: + return "GOOD" + elif value <= thresholds["poor"]: + return "NEEDS_IMPROVEMENT" + else: + return "POOR" + + def analyze( + self, + url: str, + strategy: str = "mobile", + categories: list[str] | None = None, + ) -> PageSpeedResult: + """ + Analyze a URL with PageSpeed Insights. + + Args: + url: URL to analyze + strategy: "mobile" or "desktop" + categories: Categories to analyze (performance, seo, accessibility, best-practices) + + Returns: + PageSpeedResult with scores and metrics + """ + if categories is None: + categories = ["performance", "seo", "accessibility", "best-practices"] + + params = { + "url": url, + "strategy": strategy, + "category": categories, + } + + if self.api_key: + params["key"] = self.api_key + + try: + response = self.session.get(self.BASE_URL, params=params, timeout=60) + response.raise_for_status() + data = response.json() + except requests.RequestException as e: + logger.error(f"PageSpeed API request failed: {e}") + raise + + result = PageSpeedResult(url=url, strategy=strategy, raw_data=data) + + # Extract scores + lighthouse = data.get("lighthouseResult", {}) + categories_data = lighthouse.get("categories", {}) + + if "performance" in categories_data: + score = categories_data["performance"].get("score") + result.performance_score = score * 100 if score else None + + if "seo" in categories_data: + score = categories_data["seo"].get("score") + result.seo_score = score * 100 if score else None + + if "accessibility" in categories_data: + score = categories_data["accessibility"].get("score") + result.accessibility_score = score * 100 if score else None + + if "best-practices" in categories_data: + score = categories_data["best-practices"].get("score") + result.best_practices_score = score * 100 if score else None + + # Extract Core Web Vitals + audits = lighthouse.get("audits", {}) + + # Lab data + cwv = result.core_web_vitals + + if "largest-contentful-paint" in audits: + cwv.lcp = audits["largest-contentful-paint"].get("numericValue") + cwv.lcp_rating = self._rate_metric("lcp", cwv.lcp) + + if "total-blocking-time" in audits: + # TBT is proxy for FID in lab data + cwv.fid = audits["total-blocking-time"].get("numericValue") + cwv.fid_rating = self._rate_metric("fid", cwv.fid) + + if "cumulative-layout-shift" in audits: + cwv.cls = audits["cumulative-layout-shift"].get("numericValue") + cwv.cls_rating = self._rate_metric("cls", cwv.cls) + + if "experimental-interaction-to-next-paint" in audits: + cwv.inp = audits["experimental-interaction-to-next-paint"].get("numericValue") + cwv.inp_rating = self._rate_metric("inp", cwv.inp) + + if "server-response-time" in audits: + cwv.ttfb = audits["server-response-time"].get("numericValue") + + if "first-contentful-paint" in audits: + cwv.fcp = audits["first-contentful-paint"].get("numericValue") + + # Field data (real user data) if available + loading_exp = data.get("loadingExperience", {}) + metrics = loading_exp.get("metrics", {}) + + if "LARGEST_CONTENTFUL_PAINT_MS" in metrics: + cwv.lcp = metrics["LARGEST_CONTENTFUL_PAINT_MS"].get("percentile") + cwv.lcp_rating = metrics["LARGEST_CONTENTFUL_PAINT_MS"].get("category") + + if "FIRST_INPUT_DELAY_MS" in metrics: + cwv.fid = metrics["FIRST_INPUT_DELAY_MS"].get("percentile") + cwv.fid_rating = metrics["FIRST_INPUT_DELAY_MS"].get("category") + + if "CUMULATIVE_LAYOUT_SHIFT_SCORE" in metrics: + cwv.cls = metrics["CUMULATIVE_LAYOUT_SHIFT_SCORE"].get("percentile") / 100 + cwv.cls_rating = metrics["CUMULATIVE_LAYOUT_SHIFT_SCORE"].get("category") + + if "INTERACTION_TO_NEXT_PAINT" in metrics: + cwv.inp = metrics["INTERACTION_TO_NEXT_PAINT"].get("percentile") + cwv.inp_rating = metrics["INTERACTION_TO_NEXT_PAINT"].get("category") + + # Extract opportunities + for audit_id, audit in audits.items(): + if audit.get("details", {}).get("type") == "opportunity": + savings = audit.get("details", {}).get("overallSavingsMs", 0) + if savings > 0: + result.opportunities.append({ + "id": audit_id, + "title": audit.get("title", ""), + "description": audit.get("description", ""), + "savings_ms": savings, + "score": audit.get("score", 0), + }) + + # Sort opportunities by savings + result.opportunities.sort(key=lambda x: x["savings_ms"], reverse=True) + + # Extract diagnostics + for audit_id, audit in audits.items(): + score = audit.get("score") + if score is not None and score < 1 and audit.get("details"): + if audit.get("details", {}).get("type") not in ["opportunity", None]: + result.diagnostics.append({ + "id": audit_id, + "title": audit.get("title", ""), + "description": audit.get("description", ""), + "score": score, + }) + + # Extract passed audits + for audit_id, audit in audits.items(): + if audit.get("score") == 1: + result.passed_audits.append(audit.get("title", audit_id)) + + return result + + def analyze_both_strategies(self, url: str) -> dict: + """Analyze URL for both mobile and desktop.""" + mobile = self.analyze(url, strategy="mobile") + desktop = self.analyze(url, strategy="desktop") + + return { + "url": url, + "mobile": mobile.to_dict(), + "desktop": desktop.to_dict(), + "comparison": { + "performance_difference": ( + (desktop.performance_score or 0) - (mobile.performance_score or 0) + ), + "mobile_first_issues": self._identify_mobile_issues(mobile, desktop), + }, + } + + def _identify_mobile_issues( + self, + mobile: PageSpeedResult, + desktop: PageSpeedResult, + ) -> list[str]: + """Identify issues that affect mobile more than desktop.""" + issues = [] + + if mobile.performance_score and desktop.performance_score: + if desktop.performance_score - mobile.performance_score > 20: + issues.append("Significant performance gap between mobile and desktop") + + m_cwv = mobile.core_web_vitals + d_cwv = desktop.core_web_vitals + + if m_cwv.lcp and d_cwv.lcp and m_cwv.lcp > d_cwv.lcp * 1.5: + issues.append("LCP significantly slower on mobile") + + if m_cwv.cls and d_cwv.cls and m_cwv.cls > d_cwv.cls * 2: + issues.append("Layout shift issues more severe on mobile") + + return issues + + def get_cwv_summary(self, url: str) -> dict: + """Get a summary focused on Core Web Vitals.""" + result = self.analyze(url, strategy="mobile") + + cwv = result.core_web_vitals + + return { + "url": url, + "overall_cwv_status": self._overall_cwv_status(cwv), + "metrics": { + "lcp": { + "value": f"{cwv.lcp / 1000:.2f}s" if cwv.lcp else None, + "rating": cwv.lcp_rating, + "threshold": "≤ 2.5s good, > 4.0s poor", + }, + "fid": { + "value": f"{cwv.fid:.0f}ms" if cwv.fid else None, + "rating": cwv.fid_rating, + "threshold": "≤ 100ms good, > 300ms poor", + }, + "cls": { + "value": f"{cwv.cls:.3f}" if cwv.cls else None, + "rating": cwv.cls_rating, + "threshold": "≤ 0.1 good, > 0.25 poor", + }, + "inp": { + "value": f"{cwv.inp:.0f}ms" if cwv.inp else None, + "rating": cwv.inp_rating, + "threshold": "≤ 200ms good, > 500ms poor", + }, + }, + "top_opportunities": result.opportunities[:5], + } + + def _overall_cwv_status(self, cwv: CoreWebVitals) -> str: + """Determine overall Core Web Vitals status.""" + ratings = [cwv.lcp_rating, cwv.fid_rating, cwv.cls_rating] + ratings = [r for r in ratings if r] + + if not ratings: + return "UNKNOWN" + + if any(r == "POOR" for r in ratings): + return "POOR" + if any(r == "NEEDS_IMPROVEMENT" for r in ratings): + return "NEEDS_IMPROVEMENT" + return "GOOD" + + def generate_report(self, result: PageSpeedResult) -> str: + """Generate human-readable performance report.""" + lines = [ + "=" * 60, + "PageSpeed Insights Report", + "=" * 60, + f"URL: {result.url}", + f"Strategy: {result.strategy}", + "", + "Scores:", + f" Performance: {result.performance_score:.0f}/100" if result.performance_score else " Performance: N/A", + f" SEO: {result.seo_score:.0f}/100" if result.seo_score else " SEO: N/A", + f" Accessibility: {result.accessibility_score:.0f}/100" if result.accessibility_score else " Accessibility: N/A", + f" Best Practices: {result.best_practices_score:.0f}/100" if result.best_practices_score else " Best Practices: N/A", + "", + "Core Web Vitals:", + ] + + cwv = result.core_web_vitals + + def format_metric(name: str, value: Any, rating: str | None, unit: str) -> str: + if value is None: + return f" {name}: N/A" + rating_str = f" ({rating})" if rating else "" + return f" {name}: {value}{unit}{rating_str}" + + lines.append(format_metric("LCP", f"{cwv.lcp / 1000:.2f}" if cwv.lcp else None, cwv.lcp_rating, "s")) + lines.append(format_metric("FID/TBT", f"{cwv.fid:.0f}" if cwv.fid else None, cwv.fid_rating, "ms")) + lines.append(format_metric("CLS", f"{cwv.cls:.3f}" if cwv.cls else None, cwv.cls_rating, "")) + lines.append(format_metric("INP", f"{cwv.inp:.0f}" if cwv.inp else None, cwv.inp_rating, "ms")) + lines.append(format_metric("TTFB", f"{cwv.ttfb:.0f}" if cwv.ttfb else None, None, "ms")) + lines.append(format_metric("FCP", f"{cwv.fcp / 1000:.2f}" if cwv.fcp else None, None, "s")) + + if result.opportunities: + lines.extend([ + "", + f"Top Opportunities ({len(result.opportunities)} total):", + ]) + for opp in result.opportunities[:5]: + savings = opp["savings_ms"] + lines.append(f" - {opp['title']}: -{savings / 1000:.1f}s potential savings") + + lines.extend(["", "=" * 60]) + + return "\n".join(lines) + + +def main(): + """CLI entry point.""" + parser = argparse.ArgumentParser(description="PageSpeed Insights Client") + parser.add_argument("--url", "-u", required=True, help="URL to analyze") + parser.add_argument("--strategy", "-s", default="mobile", + choices=["mobile", "desktop", "both"], + help="Analysis strategy") + parser.add_argument("--output", "-o", help="Output file for JSON") + parser.add_argument("--json", action="store_true", help="Output as JSON") + parser.add_argument("--cwv-only", action="store_true", + help="Show only Core Web Vitals summary") + + args = parser.parse_args() + + client = PageSpeedClient() + + if args.cwv_only: + summary = client.get_cwv_summary(args.url) + print(json.dumps(summary, indent=2)) + elif args.strategy == "both": + result = client.analyze_both_strategies(args.url) + output = json.dumps(result, indent=2) + if args.output: + with open(args.output, "w") as f: + f.write(output) + else: + print(output) + else: + result = client.analyze(args.url, strategy=args.strategy) + + if args.json or args.output: + output = json.dumps(result.to_dict(), indent=2) + if args.output: + with open(args.output, "w") as f: + f.write(output) + else: + print(output) + else: + print(client.generate_report(result)) + + +if __name__ == "__main__": + main() diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/requirements.txt b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/requirements.txt similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/scripts/requirements.txt rename to ourdigital-custom-skills/_archive/seo-audit-agent/scripts/requirements.txt diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/robots_checker.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/robots_checker.py new file mode 100644 index 0000000..5e71e11 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/robots_checker.py @@ -0,0 +1,540 @@ +""" +Robots.txt Checker - Analyze robots.txt configuration +===================================================== +Purpose: Parse and analyze robots.txt for SEO compliance +Python: 3.10+ +Usage: + python robots_checker.py --url https://example.com/robots.txt + python robots_checker.py --url https://example.com --test-url /admin/ +""" + +import argparse +import json +import logging +import re +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any +from urllib.parse import urljoin, urlparse +from urllib.robotparser import RobotFileParser + +import requests + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +@dataclass +class RobotsIssue: + """Represents a robots.txt issue.""" + + severity: str # "error", "warning", "info" + message: str + line_number: int | None = None + directive: str | None = None + suggestion: str | None = None + + +@dataclass +class UserAgentRules: + """Rules for a specific user-agent.""" + + user_agent: str + disallow: list[str] = field(default_factory=list) + allow: list[str] = field(default_factory=list) + crawl_delay: float | None = None + + +@dataclass +class RobotsResult: + """Complete robots.txt analysis result.""" + + url: str + accessible: bool = True + content: str = "" + rules: list[UserAgentRules] = field(default_factory=list) + sitemaps: list[str] = field(default_factory=list) + issues: list[RobotsIssue] = field(default_factory=list) + stats: dict = field(default_factory=dict) + timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) + + def to_dict(self) -> dict: + """Convert to dictionary for JSON output.""" + return { + "url": self.url, + "accessible": self.accessible, + "sitemaps": self.sitemaps, + "rules": [ + { + "user_agent": r.user_agent, + "disallow": r.disallow, + "allow": r.allow, + "crawl_delay": r.crawl_delay, + } + for r in self.rules + ], + "issues": [ + { + "severity": i.severity, + "message": i.message, + "line_number": i.line_number, + "directive": i.directive, + "suggestion": i.suggestion, + } + for i in self.issues + ], + "stats": self.stats, + "timestamp": self.timestamp, + } + + +class RobotsChecker: + """Analyze robots.txt configuration.""" + + # Common user agents + USER_AGENTS = { + "*": "All bots", + "Googlebot": "Google crawler", + "Googlebot-Image": "Google Image crawler", + "Googlebot-News": "Google News crawler", + "Googlebot-Video": "Google Video crawler", + "Bingbot": "Bing crawler", + "Slurp": "Yahoo crawler", + "DuckDuckBot": "DuckDuckGo crawler", + "Baiduspider": "Baidu crawler", + "Yandex": "Yandex crawler", + "facebot": "Facebook crawler", + "Twitterbot": "Twitter crawler", + "LinkedInBot": "LinkedIn crawler", + } + + # Paths that should generally not be blocked + IMPORTANT_PATHS = [ + "/", + "/*.css", + "/*.js", + "/*.jpg", + "/*.jpeg", + "/*.png", + "/*.gif", + "/*.svg", + "/*.webp", + ] + + # Paths commonly blocked + COMMON_BLOCKED = [ + "/admin", + "/wp-admin", + "/login", + "/private", + "/api", + "/cgi-bin", + "/tmp", + "/search", + ] + + def __init__(self): + self.session = requests.Session() + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (compatible; SEOAuditBot/1.0)" + }) + + def fetch_robots(self, url: str) -> str | None: + """Fetch robots.txt content.""" + # Ensure we're fetching robots.txt + parsed = urlparse(url) + if not parsed.path.endswith("robots.txt"): + robots_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt" + else: + robots_url = url + + try: + response = self.session.get(robots_url, timeout=10) + if response.status_code == 200: + return response.text + elif response.status_code == 404: + return None + else: + raise RuntimeError(f"HTTP {response.status_code}") + except requests.RequestException as e: + raise RuntimeError(f"Failed to fetch robots.txt: {e}") + + def parse_robots(self, content: str) -> tuple[list[UserAgentRules], list[str]]: + """Parse robots.txt content.""" + rules = [] + sitemaps = [] + current_ua = None + current_rules = None + + for line_num, line in enumerate(content.split("\n"), 1): + line = line.strip() + + # Skip empty lines and comments + if not line or line.startswith("#"): + continue + + # Parse directive + if ":" not in line: + continue + + directive, value = line.split(":", 1) + directive = directive.strip().lower() + value = value.strip() + + if directive == "user-agent": + # Save previous user-agent rules + if current_rules: + rules.append(current_rules) + + current_ua = value + current_rules = UserAgentRules(user_agent=value) + + elif directive == "disallow" and current_rules: + if value: # Empty disallow means allow all + current_rules.disallow.append(value) + + elif directive == "allow" and current_rules: + if value: + current_rules.allow.append(value) + + elif directive == "crawl-delay" and current_rules: + try: + current_rules.crawl_delay = float(value) + except ValueError: + pass + + elif directive == "sitemap": + if value: + sitemaps.append(value) + + # Don't forget last user-agent + if current_rules: + rules.append(current_rules) + + return rules, sitemaps + + def analyze(self, url: str) -> RobotsResult: + """Analyze robots.txt.""" + result = RobotsResult(url=url) + + # Fetch robots.txt + try: + content = self.fetch_robots(url) + if content is None: + result.accessible = False + result.issues.append(RobotsIssue( + severity="info", + message="No robots.txt found (returns 404)", + suggestion="Consider creating a robots.txt file", + )) + return result + except RuntimeError as e: + result.accessible = False + result.issues.append(RobotsIssue( + severity="error", + message=str(e), + )) + return result + + result.content = content + result.rules, result.sitemaps = self.parse_robots(content) + + # Analyze content + self._analyze_syntax(result) + self._analyze_rules(result) + self._analyze_sitemaps(result) + + # Calculate stats + result.stats = { + "user_agents_count": len(result.rules), + "user_agents": [r.user_agent for r in result.rules], + "total_disallow_rules": sum(len(r.disallow) for r in result.rules), + "total_allow_rules": sum(len(r.allow) for r in result.rules), + "sitemaps_count": len(result.sitemaps), + "has_crawl_delay": any(r.crawl_delay for r in result.rules), + "content_length": len(content), + } + + return result + + def _analyze_syntax(self, result: RobotsResult) -> None: + """Check for syntax issues.""" + lines = result.content.split("\n") + + for line_num, line in enumerate(lines, 1): + line = line.strip() + + # Skip empty lines and comments + if not line or line.startswith("#"): + continue + + # Check for valid directive + if ":" not in line: + result.issues.append(RobotsIssue( + severity="warning", + message=f"Invalid line (missing colon): {line[:50]}", + line_number=line_num, + )) + continue + + directive, value = line.split(":", 1) + directive = directive.strip().lower() + + valid_directives = { + "user-agent", "disallow", "allow", + "crawl-delay", "sitemap", "host", + } + + if directive not in valid_directives: + result.issues.append(RobotsIssue( + severity="info", + message=f"Unknown directive: {directive}", + line_number=line_num, + directive=directive, + )) + + def _analyze_rules(self, result: RobotsResult) -> None: + """Analyze blocking rules.""" + # Check if there are any rules + if not result.rules: + result.issues.append(RobotsIssue( + severity="info", + message="No user-agent rules defined", + suggestion="Add User-agent: * rules to control crawling", + )) + return + + # Check for wildcard rule + has_wildcard = any(r.user_agent == "*" for r in result.rules) + if not has_wildcard: + result.issues.append(RobotsIssue( + severity="info", + message="No wildcard (*) user-agent defined", + suggestion="Consider adding User-agent: * as fallback", + )) + + # Check for blocking important resources + for rules in result.rules: + for disallow in rules.disallow: + # Check if blocking root + if disallow == "/": + result.issues.append(RobotsIssue( + severity="error", + message=f"Blocking entire site for {rules.user_agent}", + directive=f"Disallow: {disallow}", + suggestion="This will prevent indexing. Is this intentional?", + )) + + # Check if blocking CSS/JS + if any(ext in disallow.lower() for ext in [".css", ".js"]): + result.issues.append(RobotsIssue( + severity="warning", + message=f"Blocking CSS/JS files for {rules.user_agent}", + directive=f"Disallow: {disallow}", + suggestion="May affect rendering and SEO", + )) + + # Check for blocking images + if any(ext in disallow.lower() for ext in [".jpg", ".png", ".gif", ".webp"]): + result.issues.append(RobotsIssue( + severity="info", + message=f"Blocking image files for {rules.user_agent}", + directive=f"Disallow: {disallow}", + )) + + # Check crawl delay + if rules.crawl_delay: + if rules.crawl_delay > 10: + result.issues.append(RobotsIssue( + severity="warning", + message=f"High crawl-delay ({rules.crawl_delay}s) for {rules.user_agent}", + directive=f"Crawl-delay: {rules.crawl_delay}", + suggestion="May significantly slow indexing", + )) + elif rules.crawl_delay > 0: + result.issues.append(RobotsIssue( + severity="info", + message=f"Crawl-delay set to {rules.crawl_delay}s for {rules.user_agent}", + )) + + def _analyze_sitemaps(self, result: RobotsResult) -> None: + """Analyze sitemap declarations.""" + if not result.sitemaps: + result.issues.append(RobotsIssue( + severity="warning", + message="No sitemap declared in robots.txt", + suggestion="Add Sitemap: directive to help crawlers find your sitemap", + )) + else: + for sitemap in result.sitemaps: + if not sitemap.startswith("http"): + result.issues.append(RobotsIssue( + severity="warning", + message=f"Sitemap URL should be absolute: {sitemap}", + directive=f"Sitemap: {sitemap}", + )) + + def test_url(self, robots_url: str, test_path: str, + user_agent: str = "Googlebot") -> dict: + """Test if a specific URL is allowed.""" + # Use Python's built-in parser + rp = RobotFileParser() + + # Ensure robots.txt URL + parsed = urlparse(robots_url) + if not parsed.path.endswith("robots.txt"): + robots_txt_url = f"{parsed.scheme}://{parsed.netloc}/robots.txt" + else: + robots_txt_url = robots_url + + rp.set_url(robots_txt_url) + try: + rp.read() + except Exception as e: + return { + "path": test_path, + "user_agent": user_agent, + "allowed": None, + "error": str(e), + } + + # Build full URL for testing + base_url = f"{parsed.scheme}://{parsed.netloc}" + full_url = urljoin(base_url, test_path) + + allowed = rp.can_fetch(user_agent, full_url) + + return { + "path": test_path, + "user_agent": user_agent, + "allowed": allowed, + "full_url": full_url, + } + + def generate_report(self, result: RobotsResult) -> str: + """Generate human-readable analysis report.""" + lines = [ + "=" * 60, + "Robots.txt Analysis Report", + "=" * 60, + f"URL: {result.url}", + f"Accessible: {'Yes' if result.accessible else 'No'}", + f"Timestamp: {result.timestamp}", + "", + ] + + if result.accessible: + lines.append("Statistics:") + for key, value in result.stats.items(): + if key == "user_agents": + lines.append(f" {key}: {', '.join(value) if value else 'None'}") + else: + lines.append(f" {key}: {value}") + lines.append("") + + if result.sitemaps: + lines.append(f"Sitemaps ({len(result.sitemaps)}):") + for sitemap in result.sitemaps: + lines.append(f" - {sitemap}") + lines.append("") + + if result.rules: + lines.append("Rules Summary:") + for rules in result.rules: + lines.append(f"\n User-agent: {rules.user_agent}") + if rules.disallow: + lines.append(f" Disallow: {len(rules.disallow)} rules") + for d in rules.disallow[:5]: + lines.append(f" - {d}") + if len(rules.disallow) > 5: + lines.append(f" ... and {len(rules.disallow) - 5} more") + if rules.allow: + lines.append(f" Allow: {len(rules.allow)} rules") + for a in rules.allow[:3]: + lines.append(f" - {a}") + if rules.crawl_delay: + lines.append(f" Crawl-delay: {rules.crawl_delay}s") + lines.append("") + + if result.issues: + lines.append("Issues Found:") + errors = [i for i in result.issues if i.severity == "error"] + warnings = [i for i in result.issues if i.severity == "warning"] + infos = [i for i in result.issues if i.severity == "info"] + + if errors: + lines.append(f"\n ERRORS ({len(errors)}):") + for issue in errors: + lines.append(f" - {issue.message}") + if issue.directive: + lines.append(f" Directive: {issue.directive}") + if issue.suggestion: + lines.append(f" Suggestion: {issue.suggestion}") + + if warnings: + lines.append(f"\n WARNINGS ({len(warnings)}):") + for issue in warnings: + lines.append(f" - {issue.message}") + if issue.suggestion: + lines.append(f" Suggestion: {issue.suggestion}") + + if infos: + lines.append(f"\n INFO ({len(infos)}):") + for issue in infos: + lines.append(f" - {issue.message}") + + lines.append("") + lines.append("=" * 60) + + return "\n".join(lines) + + +def main(): + """Main entry point for CLI usage.""" + parser = argparse.ArgumentParser( + description="Analyze robots.txt configuration", + ) + parser.add_argument("--url", "-u", required=True, + help="URL to robots.txt or domain") + parser.add_argument("--test-url", "-t", + help="Test if specific URL path is allowed") + parser.add_argument("--user-agent", "-a", default="Googlebot", + help="User agent for testing (default: Googlebot)") + parser.add_argument("--output", "-o", help="Output file for JSON report") + parser.add_argument("--json", action="store_true", help="Output as JSON") + + args = parser.parse_args() + + checker = RobotsChecker() + + if args.test_url: + # Test specific URL + test_result = checker.test_url(args.url, args.test_url, args.user_agent) + if args.json: + print(json.dumps(test_result, indent=2)) + else: + status = "ALLOWED" if test_result["allowed"] else "BLOCKED" + print(f"URL: {test_result['path']}") + print(f"User-Agent: {test_result['user_agent']}") + print(f"Status: {status}") + else: + # Full analysis + result = checker.analyze(args.url) + + if args.json or args.output: + output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2) + if args.output: + with open(args.output, "w", encoding="utf-8") as f: + f.write(output) + logger.info(f"Report written to {args.output}") + else: + print(output) + else: + print(checker.generate_report(result)) + + +if __name__ == "__main__": + main() diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/schema_generator.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/schema_generator.py new file mode 100644 index 0000000..8afa504 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/schema_generator.py @@ -0,0 +1,490 @@ +""" +Schema Generator - Generate JSON-LD structured data markup +========================================================== +Purpose: Generate schema.org structured data in JSON-LD format +Python: 3.10+ +Usage: + python schema_generator.py --type organization --name "Company Name" --url "https://example.com" +""" + +import argparse +import json +import logging +import os +import re +from datetime import datetime +from pathlib import Path +from typing import Any + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + +# Template directory relative to this script +TEMPLATE_DIR = Path(__file__).parent.parent / "templates" / "schema_templates" + + +class SchemaGenerator: + """Generate JSON-LD schema markup from templates.""" + + SCHEMA_TYPES = { + "organization": "organization.json", + "local_business": "local_business.json", + "product": "product.json", + "article": "article.json", + "faq": "faq.json", + "breadcrumb": "breadcrumb.json", + "website": "website.json", + } + + # Business type mappings for LocalBusiness + BUSINESS_TYPES = { + "restaurant": "Restaurant", + "cafe": "CafeOrCoffeeShop", + "bar": "BarOrPub", + "hotel": "Hotel", + "store": "Store", + "medical": "MedicalBusiness", + "dental": "Dentist", + "legal": "LegalService", + "real_estate": "RealEstateAgent", + "auto": "AutoRepair", + "beauty": "BeautySalon", + "gym": "HealthClub", + "spa": "DaySpa", + } + + # Article type mappings + ARTICLE_TYPES = { + "article": "Article", + "blog": "BlogPosting", + "news": "NewsArticle", + "tech": "TechArticle", + "scholarly": "ScholarlyArticle", + } + + def __init__(self, template_dir: Path = TEMPLATE_DIR): + self.template_dir = template_dir + + def load_template(self, schema_type: str) -> dict: + """Load a schema template file.""" + if schema_type not in self.SCHEMA_TYPES: + raise ValueError(f"Unknown schema type: {schema_type}. " + f"Available: {list(self.SCHEMA_TYPES.keys())}") + + template_file = self.template_dir / self.SCHEMA_TYPES[schema_type] + if not template_file.exists(): + raise FileNotFoundError(f"Template not found: {template_file}") + + with open(template_file, "r", encoding="utf-8") as f: + return json.load(f) + + def fill_template(self, template: dict, data: dict[str, Any]) -> dict: + """Fill template placeholders with actual data.""" + template_str = json.dumps(template, ensure_ascii=False) + + # Replace placeholders {{key}} with values + for key, value in data.items(): + placeholder = f"{{{{{key}}}}}" + if value is not None: + template_str = template_str.replace(placeholder, str(value)) + + # Remove unfilled placeholders and their parent objects if empty + result = json.loads(template_str) + return self._clean_empty_values(result) + + def _clean_empty_values(self, obj: Any) -> Any: + """Remove empty values and unfilled placeholders.""" + if isinstance(obj, dict): + cleaned = {} + for key, value in obj.items(): + cleaned_value = self._clean_empty_values(value) + # Skip if value is empty, None, or unfilled placeholder + if cleaned_value is None: + continue + if isinstance(cleaned_value, str) and cleaned_value.startswith("{{"): + continue + if isinstance(cleaned_value, (list, dict)) and not cleaned_value: + continue + cleaned[key] = cleaned_value + return cleaned if cleaned else None + elif isinstance(obj, list): + cleaned = [] + for item in obj: + cleaned_item = self._clean_empty_values(item) + if cleaned_item is not None: + if isinstance(cleaned_item, str) and cleaned_item.startswith("{{"): + continue + cleaned.append(cleaned_item) + return cleaned if cleaned else None + elif isinstance(obj, str): + if obj.startswith("{{") and obj.endswith("}}"): + return None + return obj + return obj + + def generate_organization( + self, + name: str, + url: str, + logo_url: str | None = None, + description: str | None = None, + founding_date: str | None = None, + phone: str | None = None, + address: dict | None = None, + social_links: list[str] | None = None, + ) -> dict: + """Generate Organization schema.""" + template = self.load_template("organization") + + data = { + "name": name, + "url": url, + "logo_url": logo_url, + "description": description, + "founding_date": founding_date, + "phone": phone, + } + + if address: + data.update({ + "street_address": address.get("street"), + "city": address.get("city"), + "region": address.get("region"), + "postal_code": address.get("postal_code"), + "country": address.get("country", "KR"), + }) + + if social_links: + # Handle social links specially + pass + + return self.fill_template(template, data) + + def generate_local_business( + self, + name: str, + business_type: str, + address: dict, + phone: str | None = None, + url: str | None = None, + description: str | None = None, + hours: dict | None = None, + geo: dict | None = None, + price_range: str | None = None, + rating: float | None = None, + review_count: int | None = None, + ) -> dict: + """Generate LocalBusiness schema.""" + template = self.load_template("local_business") + + schema_business_type = self.BUSINESS_TYPES.get( + business_type.lower(), "LocalBusiness" + ) + + data = { + "business_type": schema_business_type, + "name": name, + "url": url, + "description": description, + "phone": phone, + "price_range": price_range, + "street_address": address.get("street"), + "city": address.get("city"), + "region": address.get("region"), + "postal_code": address.get("postal_code"), + "country": address.get("country", "KR"), + } + + if geo: + data["latitude"] = geo.get("lat") + data["longitude"] = geo.get("lng") + + if hours: + data.update({ + "weekday_opens": hours.get("weekday_opens", "09:00"), + "weekday_closes": hours.get("weekday_closes", "18:00"), + "weekend_opens": hours.get("weekend_opens"), + "weekend_closes": hours.get("weekend_closes"), + }) + + if rating is not None: + data["rating"] = str(rating) + data["review_count"] = str(review_count or 0) + + return self.fill_template(template, data) + + def generate_product( + self, + name: str, + description: str, + price: float, + currency: str = "KRW", + brand: str | None = None, + sku: str | None = None, + images: list[str] | None = None, + availability: str = "InStock", + condition: str = "NewCondition", + rating: float | None = None, + review_count: int | None = None, + url: str | None = None, + seller: str | None = None, + ) -> dict: + """Generate Product schema.""" + template = self.load_template("product") + + data = { + "name": name, + "description": description, + "price": str(int(price)), + "currency": currency, + "brand_name": brand, + "sku": sku, + "product_url": url, + "availability": availability, + "condition": condition, + "seller_name": seller, + } + + if images: + for i, img in enumerate(images[:3], 1): + data[f"image_url_{i}"] = img + + if rating is not None: + data["rating"] = str(rating) + data["review_count"] = str(review_count or 0) + + return self.fill_template(template, data) + + def generate_article( + self, + headline: str, + description: str, + author_name: str, + date_published: str, + publisher_name: str, + article_type: str = "article", + date_modified: str | None = None, + images: list[str] | None = None, + page_url: str | None = None, + publisher_logo: str | None = None, + author_url: str | None = None, + section: str | None = None, + word_count: int | None = None, + keywords: str | None = None, + ) -> dict: + """Generate Article schema.""" + template = self.load_template("article") + + schema_article_type = self.ARTICLE_TYPES.get( + article_type.lower(), "Article" + ) + + data = { + "article_type": schema_article_type, + "headline": headline, + "description": description, + "author_name": author_name, + "author_url": author_url, + "date_published": date_published, + "date_modified": date_modified or date_published, + "publisher_name": publisher_name, + "publisher_logo_url": publisher_logo, + "page_url": page_url, + "section": section, + "word_count": str(word_count) if word_count else None, + "keywords": keywords, + } + + if images: + for i, img in enumerate(images[:2], 1): + data[f"image_url_{i}"] = img + + return self.fill_template(template, data) + + def generate_faq(self, questions: list[dict[str, str]]) -> dict: + """Generate FAQPage schema.""" + schema = { + "@context": "https://schema.org", + "@type": "FAQPage", + "mainEntity": [], + } + + for qa in questions: + schema["mainEntity"].append({ + "@type": "Question", + "name": qa["question"], + "acceptedAnswer": { + "@type": "Answer", + "text": qa["answer"], + }, + }) + + return schema + + def generate_breadcrumb(self, items: list[dict[str, str]]) -> dict: + """Generate BreadcrumbList schema.""" + schema = { + "@context": "https://schema.org", + "@type": "BreadcrumbList", + "itemListElement": [], + } + + for i, item in enumerate(items, 1): + schema["itemListElement"].append({ + "@type": "ListItem", + "position": i, + "name": item["name"], + "item": item["url"], + }) + + return schema + + def generate_website( + self, + name: str, + url: str, + search_url_template: str | None = None, + description: str | None = None, + language: str = "ko-KR", + publisher_name: str | None = None, + logo_url: str | None = None, + alternate_name: str | None = None, + ) -> dict: + """Generate WebSite schema.""" + template = self.load_template("website") + + data = { + "site_name": name, + "url": url, + "description": description, + "language": language, + "search_url_template": search_url_template, + "publisher_name": publisher_name or name, + "logo_url": logo_url, + "alternate_name": alternate_name, + } + + return self.fill_template(template, data) + + def to_json_ld(self, schema: dict, pretty: bool = True) -> str: + """Convert schema dict to JSON-LD string.""" + indent = 2 if pretty else None + return json.dumps(schema, ensure_ascii=False, indent=indent) + + def to_html_script(self, schema: dict) -> str: + """Wrap schema in HTML script tag.""" + json_ld = self.to_json_ld(schema) + return f'' + + +def main(): + """Main entry point for CLI usage.""" + parser = argparse.ArgumentParser( + description="Generate JSON-LD schema markup", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + # Generate Organization schema + python schema_generator.py --type organization --name "My Company" --url "https://example.com" + + # Generate Product schema + python schema_generator.py --type product --name "Widget" --price 29900 --currency KRW + + # Generate Article schema + python schema_generator.py --type article --headline "Article Title" --author "John Doe" + """, + ) + + parser.add_argument( + "--type", "-t", + required=True, + choices=SchemaGenerator.SCHEMA_TYPES.keys(), + help="Schema type to generate", + ) + parser.add_argument("--name", help="Name/title") + parser.add_argument("--url", help="URL") + parser.add_argument("--description", help="Description") + parser.add_argument("--price", type=float, help="Price (for product)") + parser.add_argument("--currency", default="KRW", help="Currency code") + parser.add_argument("--headline", help="Headline (for article)") + parser.add_argument("--author", help="Author name") + parser.add_argument("--output", "-o", help="Output file path") + parser.add_argument("--html", action="store_true", help="Output as HTML script tag") + + args = parser.parse_args() + + generator = SchemaGenerator() + + try: + if args.type == "organization": + schema = generator.generate_organization( + name=args.name or "Organization Name", + url=args.url or "https://example.com", + description=args.description, + ) + elif args.type == "product": + schema = generator.generate_product( + name=args.name or "Product Name", + description=args.description or "Product description", + price=args.price or 0, + currency=args.currency, + ) + elif args.type == "article": + schema = generator.generate_article( + headline=args.headline or args.name or "Article Title", + description=args.description or "Article description", + author_name=args.author or "Author", + date_published=datetime.now().strftime("%Y-%m-%d"), + publisher_name="Publisher", + ) + elif args.type == "website": + schema = generator.generate_website( + name=args.name or "Website Name", + url=args.url or "https://example.com", + description=args.description, + ) + elif args.type == "faq": + # Example FAQ + schema = generator.generate_faq([ + {"question": "Question 1?", "answer": "Answer 1"}, + {"question": "Question 2?", "answer": "Answer 2"}, + ]) + elif args.type == "breadcrumb": + # Example breadcrumb + schema = generator.generate_breadcrumb([ + {"name": "Home", "url": "https://example.com/"}, + {"name": "Category", "url": "https://example.com/category/"}, + ]) + elif args.type == "local_business": + schema = generator.generate_local_business( + name=args.name or "Business Name", + business_type="store", + address={"street": "123 Main St", "city": "Seoul", "country": "KR"}, + url=args.url, + description=args.description, + ) + else: + raise ValueError(f"Unsupported type: {args.type}") + + if args.html: + output = generator.to_html_script(schema) + else: + output = generator.to_json_ld(schema) + + if args.output: + with open(args.output, "w", encoding="utf-8") as f: + f.write(output) + logger.info(f"Schema written to {args.output}") + else: + print(output) + + except Exception as e: + logger.error(f"Error generating schema: {e}") + raise + + +if __name__ == "__main__": + main() diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/schema_validator.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/schema_validator.py new file mode 100644 index 0000000..6c71b2f --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/schema_validator.py @@ -0,0 +1,498 @@ +""" +Schema Validator - Validate JSON-LD structured data markup +========================================================== +Purpose: Extract and validate schema.org structured data from URLs or files +Python: 3.10+ +Usage: + python schema_validator.py --url https://example.com + python schema_validator.py --file schema.json +""" + +import argparse +import json +import logging +import re +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any +from urllib.parse import urlparse + +import requests +from bs4 import BeautifulSoup + +try: + import extruct + HAS_EXTRUCT = True +except ImportError: + HAS_EXTRUCT = False + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +@dataclass +class ValidationIssue: + """Represents a validation issue found in schema.""" + + severity: str # "error", "warning", "info" + message: str + schema_type: str | None = None + property_name: str | None = None + suggestion: str | None = None + + +@dataclass +class ValidationResult: + """Complete validation result for a schema.""" + + url: str | None = None + schemas_found: list[dict] = field(default_factory=list) + issues: list[ValidationIssue] = field(default_factory=list) + valid: bool = True + rich_results_eligible: dict = field(default_factory=dict) + timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) + + def to_dict(self) -> dict: + """Convert to dictionary for JSON output.""" + return { + "url": self.url, + "schemas_found": len(self.schemas_found), + "schema_types": [s.get("@type", "Unknown") for s in self.schemas_found], + "valid": self.valid, + "issues": [ + { + "severity": i.severity, + "message": i.message, + "schema_type": i.schema_type, + "property": i.property_name, + "suggestion": i.suggestion, + } + for i in self.issues + ], + "rich_results_eligible": self.rich_results_eligible, + "timestamp": self.timestamp, + } + + +class SchemaValidator: + """Validate schema.org structured data.""" + + # Required properties for common schema types + REQUIRED_PROPERTIES = { + "Organization": ["name", "url"], + "LocalBusiness": ["name", "address"], + "Product": ["name"], + "Offer": ["price", "priceCurrency"], + "Article": ["headline", "author", "datePublished", "publisher"], + "BlogPosting": ["headline", "author", "datePublished", "publisher"], + "NewsArticle": ["headline", "author", "datePublished", "publisher"], + "FAQPage": ["mainEntity"], + "Question": ["name", "acceptedAnswer"], + "Answer": ["text"], + "BreadcrumbList": ["itemListElement"], + "ListItem": ["position", "name"], + "WebSite": ["name", "url"], + "WebPage": ["name"], + "Person": ["name"], + "Event": ["name", "startDate", "location"], + "Review": ["reviewRating", "author"], + "AggregateRating": ["ratingValue"], + "ImageObject": ["url"], + } + + # Recommended (but not required) properties + RECOMMENDED_PROPERTIES = { + "Organization": ["logo", "description", "contactPoint", "sameAs"], + "LocalBusiness": ["telephone", "openingHoursSpecification", "geo", "image"], + "Product": ["description", "image", "brand", "offers", "aggregateRating"], + "Article": ["image", "dateModified", "description"], + "FAQPage": [], + "WebSite": ["potentialAction"], + "BreadcrumbList": [], + } + + # Google Rich Results eligible types + RICH_RESULTS_TYPES = { + "Article", "BlogPosting", "NewsArticle", + "Product", "Review", + "FAQPage", "HowTo", + "LocalBusiness", "Restaurant", + "Event", + "Recipe", + "JobPosting", + "Course", + "BreadcrumbList", + "Organization", + "WebSite", + "VideoObject", + } + + def __init__(self): + self.session = requests.Session() + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (compatible; SEOAuditBot/1.0)" + }) + + def extract_from_url(self, url: str) -> list[dict]: + """Extract all structured data from a URL.""" + try: + response = self.session.get(url, timeout=30) + response.raise_for_status() + return self.extract_from_html(response.text, url) + except requests.RequestException as e: + logger.error(f"Failed to fetch URL: {e}") + return [] + + def extract_from_html(self, html: str, base_url: str | None = None) -> list[dict]: + """Extract structured data from HTML content.""" + schemas = [] + + # Method 1: Use extruct if available (handles JSON-LD, Microdata, RDFa) + if HAS_EXTRUCT: + try: + data = extruct.extract(html, base_url=base_url, uniform=True) + schemas.extend(data.get("json-ld", [])) + schemas.extend(data.get("microdata", [])) + schemas.extend(data.get("rdfa", [])) + except Exception as e: + logger.warning(f"extruct extraction failed: {e}") + + # Method 2: Manual JSON-LD extraction (fallback/additional) + soup = BeautifulSoup(html, "html.parser") + for script in soup.find_all("script", type="application/ld+json"): + try: + content = script.string + if content: + data = json.loads(content) + if isinstance(data, list): + schemas.extend(data) + else: + schemas.append(data) + except json.JSONDecodeError as e: + logger.warning(f"Invalid JSON-LD: {e}") + + # Deduplicate schemas + seen = set() + unique_schemas = [] + for schema in schemas: + schema_str = json.dumps(schema, sort_keys=True) + if schema_str not in seen: + seen.add(schema_str) + unique_schemas.append(schema) + + return unique_schemas + + def validate(self, url: str | None = None, html: str | None = None, + schema: dict | None = None) -> ValidationResult: + """Validate schema from URL, HTML, or direct schema dict.""" + result = ValidationResult(url=url) + + # Extract schemas + if schema: + schemas = [schema] + elif html: + schemas = self.extract_from_html(html, url) + elif url: + schemas = self.extract_from_url(url) + else: + raise ValueError("Must provide url, html, or schema") + + result.schemas_found = schemas + + if not schemas: + result.issues.append(ValidationIssue( + severity="warning", + message="No structured data found", + suggestion="Add JSON-LD schema markup to improve SEO", + )) + result.valid = False + return result + + # Validate each schema + for schema in schemas: + self._validate_schema(schema, result) + + # Check for errors (warnings don't affect validity) + result.valid = not any(i.severity == "error" for i in result.issues) + + return result + + def _validate_schema(self, schema: dict, result: ValidationResult, + parent_type: str | None = None) -> None: + """Validate a single schema object.""" + schema_type = schema.get("@type") + + if not schema_type: + result.issues.append(ValidationIssue( + severity="error", + message="Missing @type property", + schema_type=parent_type, + )) + return + + # Handle array of types + if isinstance(schema_type, list): + schema_type = schema_type[0] + + # Check required properties + required = self.REQUIRED_PROPERTIES.get(schema_type, []) + for prop in required: + if prop not in schema: + result.issues.append(ValidationIssue( + severity="error", + message=f"Missing required property: {prop}", + schema_type=schema_type, + property_name=prop, + suggestion=f"Add '{prop}' property to {schema_type} schema", + )) + + # Check recommended properties + recommended = self.RECOMMENDED_PROPERTIES.get(schema_type, []) + for prop in recommended: + if prop not in schema: + result.issues.append(ValidationIssue( + severity="info", + message=f"Missing recommended property: {prop}", + schema_type=schema_type, + property_name=prop, + suggestion=f"Consider adding '{prop}' for better rich results", + )) + + # Check Rich Results eligibility + if schema_type in self.RICH_RESULTS_TYPES: + result.rich_results_eligible[schema_type] = self._check_rich_results( + schema, schema_type + ) + + # Validate nested schemas + for key, value in schema.items(): + if key.startswith("@"): + continue + if isinstance(value, dict) and "@type" in value: + self._validate_schema(value, result, schema_type) + elif isinstance(value, list): + for item in value: + if isinstance(item, dict) and "@type" in item: + self._validate_schema(item, result, schema_type) + + # Type-specific validations + self._validate_type_specific(schema, schema_type, result) + + def _validate_type_specific(self, schema: dict, schema_type: str, + result: ValidationResult) -> None: + """Type-specific validation rules.""" + if schema_type in ("Article", "BlogPosting", "NewsArticle"): + # Check image + if "image" not in schema: + result.issues.append(ValidationIssue( + severity="warning", + message="Article without image may not show in rich results", + schema_type=schema_type, + property_name="image", + suggestion="Add at least one image to the article", + )) + + # Check headline length + headline = schema.get("headline", "") + if len(headline) > 110: + result.issues.append(ValidationIssue( + severity="warning", + message=f"Headline too long ({len(headline)} chars, max 110)", + schema_type=schema_type, + property_name="headline", + )) + + elif schema_type == "Product": + offer = schema.get("offers", {}) + if isinstance(offer, dict): + # Check price + price = offer.get("price") + if price is not None: + try: + float(price) + except (ValueError, TypeError): + result.issues.append(ValidationIssue( + severity="error", + message=f"Invalid price value: {price}", + schema_type="Offer", + property_name="price", + )) + + # Check availability + availability = offer.get("availability", "") + valid_availabilities = [ + "InStock", "OutOfStock", "PreOrder", "Discontinued", + "https://schema.org/InStock", "https://schema.org/OutOfStock", + ] + if availability and not any( + a in availability for a in valid_availabilities + ): + result.issues.append(ValidationIssue( + severity="warning", + message=f"Unknown availability value: {availability}", + schema_type="Offer", + property_name="availability", + )) + + elif schema_type == "LocalBusiness": + # Check for geo coordinates + if "geo" not in schema: + result.issues.append(ValidationIssue( + severity="info", + message="Missing geo coordinates", + schema_type=schema_type, + property_name="geo", + suggestion="Add latitude/longitude for better local search", + )) + + elif schema_type == "FAQPage": + main_entity = schema.get("mainEntity", []) + if not main_entity: + result.issues.append(ValidationIssue( + severity="error", + message="FAQPage must have at least one question", + schema_type=schema_type, + property_name="mainEntity", + )) + elif len(main_entity) < 2: + result.issues.append(ValidationIssue( + severity="info", + message="FAQPage has only one question", + schema_type=schema_type, + suggestion="Add more questions for better rich results", + )) + + def _check_rich_results(self, schema: dict, schema_type: str) -> dict: + """Check if schema is eligible for Google Rich Results.""" + result = { + "eligible": True, + "missing_for_rich_results": [], + } + + if schema_type in ("Article", "BlogPosting", "NewsArticle"): + required_for_rich = ["headline", "image", "datePublished", "author"] + for prop in required_for_rich: + if prop not in schema: + result["eligible"] = False + result["missing_for_rich_results"].append(prop) + + elif schema_type == "Product": + if "name" not in schema: + result["eligible"] = False + result["missing_for_rich_results"].append("name") + offer = schema.get("offers") + if not offer: + result["eligible"] = False + result["missing_for_rich_results"].append("offers") + + elif schema_type == "FAQPage": + if not schema.get("mainEntity"): + result["eligible"] = False + result["missing_for_rich_results"].append("mainEntity") + + return result + + def generate_report(self, result: ValidationResult) -> str: + """Generate human-readable validation report.""" + lines = [ + "=" * 60, + "Schema Validation Report", + "=" * 60, + f"URL: {result.url or 'N/A'}", + f"Timestamp: {result.timestamp}", + f"Valid: {'Yes' if result.valid else 'No'}", + f"Schemas Found: {len(result.schemas_found)}", + "", + ] + + if result.schemas_found: + lines.append("Schema Types:") + for schema in result.schemas_found: + schema_type = schema.get("@type", "Unknown") + lines.append(f" - {schema_type}") + lines.append("") + + if result.rich_results_eligible: + lines.append("Rich Results Eligibility:") + for schema_type, status in result.rich_results_eligible.items(): + eligible = "Yes" if status["eligible"] else "No" + lines.append(f" - {schema_type}: {eligible}") + if status["missing_for_rich_results"]: + missing = ", ".join(status["missing_for_rich_results"]) + lines.append(f" Missing: {missing}") + lines.append("") + + if result.issues: + lines.append("Issues Found:") + errors = [i for i in result.issues if i.severity == "error"] + warnings = [i for i in result.issues if i.severity == "warning"] + infos = [i for i in result.issues if i.severity == "info"] + + if errors: + lines.append(f"\n ERRORS ({len(errors)}):") + for issue in errors: + lines.append(f" - [{issue.schema_type}] {issue.message}") + if issue.suggestion: + lines.append(f" Suggestion: {issue.suggestion}") + + if warnings: + lines.append(f"\n WARNINGS ({len(warnings)}):") + for issue in warnings: + lines.append(f" - [{issue.schema_type}] {issue.message}") + if issue.suggestion: + lines.append(f" Suggestion: {issue.suggestion}") + + if infos: + lines.append(f"\n INFO ({len(infos)}):") + for issue in infos: + lines.append(f" - [{issue.schema_type}] {issue.message}") + if issue.suggestion: + lines.append(f" Suggestion: {issue.suggestion}") + + lines.append("") + lines.append("=" * 60) + + return "\n".join(lines) + + +def main(): + """Main entry point for CLI usage.""" + parser = argparse.ArgumentParser( + description="Validate schema.org structured data", + ) + parser.add_argument("--url", "-u", help="URL to validate") + parser.add_argument("--file", "-f", help="JSON-LD file to validate") + parser.add_argument("--output", "-o", help="Output file for JSON report") + parser.add_argument("--json", action="store_true", help="Output as JSON") + + args = parser.parse_args() + + if not args.url and not args.file: + parser.error("Must provide --url or --file") + + validator = SchemaValidator() + + if args.file: + with open(args.file, "r", encoding="utf-8") as f: + schema = json.load(f) + result = validator.validate(schema=schema) + else: + result = validator.validate(url=args.url) + + if args.json or args.output: + output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2) + if args.output: + with open(args.output, "w", encoding="utf-8") as f: + f.write(output) + logger.info(f"Report written to {args.output}") + else: + print(output) + else: + print(validator.generate_report(result)) + + +if __name__ == "__main__": + main() diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/sitemap_crawler.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/sitemap_crawler.py new file mode 100644 index 0000000..4904639 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/sitemap_crawler.py @@ -0,0 +1,969 @@ +""" +Sitemap Crawler - Sequential page analysis from sitemap +======================================================= +Purpose: Crawl sitemap URLs one by one, analyze each page, save to Notion +Python: 3.10+ +Usage: + from sitemap_crawler import SitemapCrawler + crawler = SitemapCrawler() + crawler.crawl_sitemap("https://example.com/sitemap.xml", delay=2.0) +""" + +import json +import logging +import time +import xml.etree.ElementTree as ET +from dataclasses import dataclass, field +from datetime import datetime +from pathlib import Path +from typing import Callable, Generator +from urllib.parse import urlparse + +import requests +from notion_client import Client + +from base_client import config +from page_analyzer import PageAnalyzer, PageMetadata + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + +# Default database for page analysis data +DEFAULT_PAGES_DATABASE_ID = "2c8581e5-8a1e-8035-880b-e38cefc2f3ef" + +# Default limits to prevent excessive resource usage +DEFAULT_MAX_PAGES = 500 +DEFAULT_DELAY_SECONDS = 2.0 + +# Progress tracking directory +PROGRESS_DIR = Path.home() / ".claude" / "seo-audit-progress" +PROGRESS_DIR.mkdir(parents=True, exist_ok=True) + + +@dataclass +class CrawlProgress: + """Track crawl progress.""" + total_urls: int = 0 + processed_urls: int = 0 + successful_urls: int = 0 + failed_urls: int = 0 + skipped_urls: int = 0 + start_time: datetime = field(default_factory=datetime.now) + current_url: str = "" + audit_id: str = "" + site: str = "" + status: str = "running" # running, completed, failed + error_message: str = "" + summary_page_id: str = "" + + def get_progress_percent(self) -> float: + if self.total_urls == 0: + return 0.0 + return (self.processed_urls / self.total_urls) * 100 + + def get_elapsed_time(self) -> str: + elapsed = datetime.now() - self.start_time + minutes = int(elapsed.total_seconds() // 60) + seconds = int(elapsed.total_seconds() % 60) + return f"{minutes}m {seconds}s" + + def get_eta(self) -> str: + if self.processed_urls == 0: + return "calculating..." + elapsed = (datetime.now() - self.start_time).total_seconds() + avg_time_per_url = elapsed / self.processed_urls + remaining_urls = self.total_urls - self.processed_urls + eta_seconds = remaining_urls * avg_time_per_url + minutes = int(eta_seconds // 60) + seconds = int(eta_seconds % 60) + return f"{minutes}m {seconds}s" + + def to_dict(self) -> dict: + """Convert to dictionary for JSON serialization.""" + return { + "audit_id": self.audit_id, + "site": self.site, + "status": self.status, + "total_urls": self.total_urls, + "processed_urls": self.processed_urls, + "successful_urls": self.successful_urls, + "failed_urls": self.failed_urls, + "progress_percent": round(self.get_progress_percent(), 1), + "elapsed_time": self.get_elapsed_time(), + "eta": self.get_eta(), + "current_url": self.current_url, + "start_time": self.start_time.isoformat(), + "error_message": self.error_message, + "summary_page_id": self.summary_page_id, + "updated_at": datetime.now().isoformat(), + } + + def save_to_file(self, filepath: Path | None = None) -> Path: + """Save progress to JSON file.""" + if filepath is None: + filepath = PROGRESS_DIR / f"{self.audit_id}.json" + with open(filepath, "w") as f: + json.dump(self.to_dict(), f, indent=2) + return filepath + + @classmethod + def load_from_file(cls, filepath: Path) -> "CrawlProgress": + """Load progress from JSON file.""" + with open(filepath, "r") as f: + data = json.load(f) + progress = cls() + progress.audit_id = data.get("audit_id", "") + progress.site = data.get("site", "") + progress.status = data.get("status", "unknown") + progress.total_urls = data.get("total_urls", 0) + progress.processed_urls = data.get("processed_urls", 0) + progress.successful_urls = data.get("successful_urls", 0) + progress.failed_urls = data.get("failed_urls", 0) + progress.current_url = data.get("current_url", "") + progress.error_message = data.get("error_message", "") + progress.summary_page_id = data.get("summary_page_id", "") + if data.get("start_time"): + progress.start_time = datetime.fromisoformat(data["start_time"]) + return progress + + +def get_active_crawls() -> list[CrawlProgress]: + """Get all active (running) crawl jobs.""" + active = [] + for filepath in PROGRESS_DIR.glob("*.json"): + try: + progress = CrawlProgress.load_from_file(filepath) + if progress.status == "running": + active.append(progress) + except Exception: + continue + return active + + +def get_all_crawls() -> list[CrawlProgress]: + """Get all crawl jobs (active and completed).""" + crawls = [] + for filepath in sorted(PROGRESS_DIR.glob("*.json"), reverse=True): + try: + progress = CrawlProgress.load_from_file(filepath) + crawls.append(progress) + except Exception: + continue + return crawls + + +def get_crawl_status(audit_id: str) -> CrawlProgress | None: + """Get status of a specific crawl by audit ID.""" + filepath = PROGRESS_DIR / f"{audit_id}.json" + if filepath.exists(): + return CrawlProgress.load_from_file(filepath) + return None + + +@dataclass +class CrawlResult: + """Result of a complete sitemap crawl.""" + site: str + sitemap_url: str + audit_id: str + total_pages: int + successful_pages: int + failed_pages: int + start_time: datetime + end_time: datetime + pages_analyzed: list[PageMetadata] = field(default_factory=list) + notion_page_ids: list[str] = field(default_factory=list) + summary_page_id: str | None = None + + def get_duration(self) -> str: + duration = self.end_time - self.start_time + minutes = int(duration.total_seconds() // 60) + seconds = int(duration.total_seconds() % 60) + return f"{minutes}m {seconds}s" + + +class SitemapCrawler: + """Crawl sitemap URLs and analyze each page.""" + + def __init__( + self, + notion_token: str | None = None, + database_id: str | None = None, + ): + """ + Initialize sitemap crawler. + + Args: + notion_token: Notion API token + database_id: Notion database ID for storing results + """ + self.notion_token = notion_token or config.notion_token + self.database_id = database_id or DEFAULT_PAGES_DATABASE_ID + self.analyzer = PageAnalyzer() + + if self.notion_token: + self.notion = Client(auth=self.notion_token) + else: + self.notion = None + logger.warning("Notion token not configured, results will not be saved") + + def fetch_sitemap_urls(self, sitemap_url: str) -> list[str]: + """ + Fetch and parse URLs from a sitemap. + + Args: + sitemap_url: URL of the sitemap + + Returns: + List of URLs found in the sitemap + """ + try: + response = requests.get(sitemap_url, timeout=30) + response.raise_for_status() + + # Parse XML + root = ET.fromstring(response.content) + + # Handle namespace + namespaces = { + "sm": "http://www.sitemaps.org/schemas/sitemap/0.9" + } + + urls = [] + + # Check if this is a sitemap index + sitemap_tags = root.findall(".//sm:sitemap/sm:loc", namespaces) + if sitemap_tags: + # This is a sitemap index, recursively fetch child sitemaps + logger.info(f"Found sitemap index with {len(sitemap_tags)} child sitemaps") + for loc in sitemap_tags: + if loc.text: + child_urls = self.fetch_sitemap_urls(loc.text) + urls.extend(child_urls) + else: + # Regular sitemap, extract URLs + url_tags = root.findall(".//sm:url/sm:loc", namespaces) + if not url_tags: + # Try without namespace + url_tags = root.findall(".//url/loc") + + for loc in url_tags: + if loc.text: + urls.append(loc.text) + + # Remove duplicates while preserving order + seen = set() + unique_urls = [] + for url in urls: + if url not in seen: + seen.add(url) + unique_urls.append(url) + + logger.info(f"Found {len(unique_urls)} unique URLs in sitemap") + return unique_urls + + except Exception as e: + logger.error(f"Failed to fetch sitemap: {e}") + raise + + def crawl_sitemap( + self, + sitemap_url: str, + delay: float = DEFAULT_DELAY_SECONDS, + max_pages: int = DEFAULT_MAX_PAGES, + progress_callback: Callable[[CrawlProgress], None] | None = None, + save_to_notion: bool = True, + url_filter: Callable[[str], bool] | None = None, + ) -> CrawlResult: + """ + Crawl all URLs in a sitemap sequentially. + + Args: + sitemap_url: URL of the sitemap + delay: Seconds to wait between requests (default: 2.0s) + max_pages: Maximum number of pages to process (default: 500) + progress_callback: Function called with progress updates + save_to_notion: Whether to save results to Notion + url_filter: Optional function to filter URLs (return True to include) + + Returns: + CrawlResult with all analyzed pages + """ + # Parse site info + parsed_sitemap = urlparse(sitemap_url) + site = f"{parsed_sitemap.scheme}://{parsed_sitemap.netloc}" + site_domain = parsed_sitemap.netloc + + # Generate audit ID + audit_id = f"{site_domain}-pages-{datetime.now().strftime('%Y%m%d-%H%M%S')}" + + logger.info(f"Starting sitemap crawl: {sitemap_url}") + logger.info(f"Audit ID: {audit_id}") + logger.info(f"Delay between requests: {delay}s") + + # Initialize progress tracking + progress = CrawlProgress( + audit_id=audit_id, + site=site, + status="running", + ) + + # Fetch URLs + urls = self.fetch_sitemap_urls(sitemap_url) + + # Apply URL filter if provided + if url_filter: + urls = [url for url in urls if url_filter(url)] + logger.info(f"After filtering: {len(urls)} URLs") + + # Apply max pages limit (default: 500 to prevent excessive resource usage) + if len(urls) > max_pages: + logger.warning(f"Sitemap has {len(urls)} URLs, limiting to {max_pages} pages") + logger.warning(f"Use max_pages parameter to adjust this limit") + urls = urls[:max_pages] + logger.info(f"Processing {len(urls)} pages (max: {max_pages})") + + # Update progress with total URLs + progress.total_urls = len(urls) + progress.save_to_file() + + # Initialize result + result = CrawlResult( + site=site, + sitemap_url=sitemap_url, + audit_id=audit_id, + total_pages=len(urls), + successful_pages=0, + failed_pages=0, + start_time=datetime.now(), + end_time=datetime.now(), + ) + + # Process each URL + try: + for i, url in enumerate(urls): + progress.current_url = url + progress.processed_urls = i + progress.save_to_file() # Save progress to file + + if progress_callback: + progress_callback(progress) + + logger.info(f"[{i+1}/{len(urls)}] Analyzing: {url}") + + try: + # Analyze page + metadata = self.analyzer.analyze_url(url) + result.pages_analyzed.append(metadata) + + if metadata.status_code == 200: + progress.successful_urls += 1 + result.successful_pages += 1 + + # Save to Notion + if save_to_notion and self.notion: + page_id = self._save_page_to_notion(metadata, audit_id, site) + if page_id: + result.notion_page_ids.append(page_id) + else: + progress.failed_urls += 1 + result.failed_pages += 1 + + except Exception as e: + logger.error(f"Failed to analyze {url}: {e}") + progress.failed_urls += 1 + result.failed_pages += 1 + + # Wait before next request + if i < len(urls) - 1: # Don't wait after last URL + time.sleep(delay) + + # Final progress update + progress.processed_urls = len(urls) + progress.status = "completed" + if progress_callback: + progress_callback(progress) + + except Exception as e: + progress.status = "failed" + progress.error_message = str(e) + progress.save_to_file() + raise + + # Update result + result.end_time = datetime.now() + + # Create summary page + if save_to_notion and self.notion: + summary_id = self._create_crawl_summary_page(result) + result.summary_page_id = summary_id + progress.summary_page_id = summary_id + + # Save final progress + progress.save_to_file() + + logger.info(f"Crawl complete: {result.successful_pages}/{result.total_pages} pages analyzed") + logger.info(f"Duration: {result.get_duration()}") + + return result + + def _save_page_to_notion( + self, + metadata: PageMetadata, + audit_id: str, + site: str, + ) -> str | None: + """Save page metadata to Notion database.""" + try: + # Build properties + properties = { + "Issue": {"title": [{"text": {"content": f"📄 {metadata.url}"}}]}, + "Category": {"select": {"name": "On-page SEO"}}, + "Priority": {"select": {"name": self._determine_priority(metadata)}}, + "Site": {"url": site}, + "URL": {"url": metadata.url}, + "Audit ID": {"rich_text": [{"text": {"content": audit_id}}]}, + "Found Date": {"date": {"start": datetime.now().strftime("%Y-%m-%d")}}, + } + + # Build page content + children = self._build_page_content(metadata) + + response = self.notion.pages.create( + parent={"database_id": self.database_id}, + properties=properties, + children=children, + ) + + return response["id"] + + except Exception as e: + logger.error(f"Failed to save to Notion: {e}") + return None + + def _determine_priority(self, metadata: PageMetadata) -> str: + """Determine priority based on issues found.""" + if len(metadata.issues) >= 3: + return "High" + elif len(metadata.issues) >= 1: + return "Medium" + elif len(metadata.warnings) >= 3: + return "Medium" + else: + return "Low" + + def _build_page_content(self, metadata: PageMetadata) -> list[dict]: + """Build Notion page content blocks from metadata.""" + children = [] + + # Status summary callout + status_emoji = "✅" if not metadata.issues else "⚠️" if len(metadata.issues) < 3 else "❌" + children.append({ + "object": "block", + "type": "callout", + "callout": { + "rich_text": [ + {"type": "text", "text": {"content": f"Status: {metadata.status_code} | "}}, + {"type": "text", "text": {"content": f"Response: {metadata.response_time_ms:.0f}ms | "}}, + {"type": "text", "text": {"content": f"Issues: {len(metadata.issues)} | "}}, + {"type": "text", "text": {"content": f"Warnings: {len(metadata.warnings)}"}}, + ], + "icon": {"type": "emoji", "emoji": status_emoji}, + "color": "gray_background" if not metadata.issues else "yellow_background" if len(metadata.issues) < 3 else "red_background", + } + }) + + # Meta Tags Section + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Meta Tags"}}]} + }) + + # Meta tags table + meta_rows = [ + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Tag"}, "annotations": {"bold": True}}], + [{"type": "text", "text": {"content": "Value"}, "annotations": {"bold": True}}], + [{"type": "text", "text": {"content": "Status"}, "annotations": {"bold": True}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Title"}}], + [{"type": "text", "text": {"content": (metadata.title or "—")[:50]}}], + [{"type": "text", "text": {"content": f"✓ {metadata.title_length} chars" if metadata.title else "✗ Missing"}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Description"}}], + [{"type": "text", "text": {"content": (metadata.meta_description or "—")[:50]}}], + [{"type": "text", "text": {"content": f"✓ {metadata.meta_description_length} chars" if metadata.meta_description else "✗ Missing"}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Canonical"}}], + [{"type": "text", "text": {"content": (metadata.canonical_url or "—")[:50]}}], + [{"type": "text", "text": {"content": "✓" if metadata.canonical_url else "✗ Missing"}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Robots"}}], + [{"type": "text", "text": {"content": metadata.robots_meta or "—"}}], + [{"type": "text", "text": {"content": "✓" if metadata.robots_meta else "—"}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Lang"}}], + [{"type": "text", "text": {"content": metadata.html_lang or "—"}}], + [{"type": "text", "text": {"content": "✓" if metadata.html_lang else "—"}}], + ]}}, + ] + + children.append({ + "object": "block", + "type": "table", + "table": { + "table_width": 3, + "has_column_header": True, + "has_row_header": False, + "children": meta_rows + } + }) + + # Headings Section + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Headings"}}]} + }) + + children.append({ + "object": "block", + "type": "paragraph", + "paragraph": {"rich_text": [ + {"type": "text", "text": {"content": f"H1: {metadata.h1_count} | "}}, + {"type": "text", "text": {"content": f"Total headings: {len(metadata.headings)}"}}, + ]} + }) + + if metadata.h1_text: + children.append({ + "object": "block", + "type": "quote", + "quote": {"rich_text": [{"type": "text", "text": {"content": metadata.h1_text[:200]}}]} + }) + + # Schema Data Section + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Structured Data"}}]} + }) + + if metadata.schema_types_found: + children.append({ + "object": "block", + "type": "paragraph", + "paragraph": {"rich_text": [ + {"type": "text", "text": {"content": "Schema types found: "}}, + {"type": "text", "text": {"content": ", ".join(metadata.schema_types_found)}, "annotations": {"code": True}}, + ]} + }) + else: + children.append({ + "object": "block", + "type": "callout", + "callout": { + "rich_text": [{"type": "text", "text": {"content": "No structured data found on this page"}}], + "icon": {"type": "emoji", "emoji": "⚠️"}, + "color": "yellow_background", + } + }) + + # Open Graph Section + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Open Graph"}}]} + }) + + og = metadata.open_graph + og_status = "✓ Configured" if og.og_title else "✗ Missing" + children.append({ + "object": "block", + "type": "paragraph", + "paragraph": {"rich_text": [ + {"type": "text", "text": {"content": f"Status: {og_status}\n"}}, + {"type": "text", "text": {"content": f"og:title: {og.og_title or '—'}\n"}}, + {"type": "text", "text": {"content": f"og:type: {og.og_type or '—'}"}}, + ]} + }) + + # Links Section + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Links"}}]} + }) + + children.append({ + "object": "block", + "type": "paragraph", + "paragraph": {"rich_text": [ + {"type": "text", "text": {"content": f"Internal links: {metadata.internal_link_count}\n"}}, + {"type": "text", "text": {"content": f"External links: {metadata.external_link_count}"}}, + ]} + }) + + # Images Section + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Images"}}]} + }) + + children.append({ + "object": "block", + "type": "paragraph", + "paragraph": {"rich_text": [ + {"type": "text", "text": {"content": f"Total: {metadata.images_total} | "}}, + {"type": "text", "text": {"content": f"With alt: {metadata.images_with_alt} | "}}, + {"type": "text", "text": {"content": f"Without alt: {metadata.images_without_alt}"}}, + ]} + }) + + # Hreflang Section (if present) + if metadata.hreflang_tags: + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Hreflang Tags"}}]} + }) + + for tag in metadata.hreflang_tags[:10]: + children.append({ + "object": "block", + "type": "bulleted_list_item", + "bulleted_list_item": {"rich_text": [ + {"type": "text", "text": {"content": f"{tag['lang']}: "}}, + {"type": "text", "text": {"content": tag['url'], "link": {"url": tag['url']}}}, + ]} + }) + + # Issues & Warnings Section + if metadata.issues or metadata.warnings: + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Issues & Warnings"}}]} + }) + + for issue in metadata.issues: + children.append({ + "object": "block", + "type": "to_do", + "to_do": { + "rich_text": [ + {"type": "text", "text": {"content": "❌ "}, "annotations": {"bold": True}}, + {"type": "text", "text": {"content": issue}}, + ], + "checked": False, + } + }) + + for warning in metadata.warnings: + children.append({ + "object": "block", + "type": "to_do", + "to_do": { + "rich_text": [ + {"type": "text", "text": {"content": "⚠️ "}, "annotations": {"bold": True}}, + {"type": "text", "text": {"content": warning}}, + ], + "checked": False, + } + }) + + return children + + def _create_crawl_summary_page(self, result: CrawlResult) -> str | None: + """Create a summary page for the crawl.""" + try: + site_domain = urlparse(result.site).netloc + + # Calculate statistics + total_issues = sum(len(p.issues) for p in result.pages_analyzed) + total_warnings = sum(len(p.warnings) for p in result.pages_analyzed) + pages_with_issues = sum(1 for p in result.pages_analyzed if p.issues) + pages_without_schema = sum(1 for p in result.pages_analyzed if not p.schema_types_found) + pages_without_description = sum(1 for p in result.pages_analyzed if not p.meta_description) + + children = [] + + # Header callout + children.append({ + "object": "block", + "type": "callout", + "callout": { + "rich_text": [ + {"type": "text", "text": {"content": f"Sitemap Crawl Complete\n\n"}}, + {"type": "text", "text": {"content": f"Audit ID: {result.audit_id}\n"}}, + {"type": "text", "text": {"content": f"Duration: {result.get_duration()}\n"}}, + {"type": "text", "text": {"content": f"Pages: {result.successful_pages}/{result.total_pages}"}}, + ], + "icon": {"type": "emoji", "emoji": "📊"}, + "color": "blue_background", + } + }) + + # Statistics table + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Statistics"}}]} + }) + + stats_rows = [ + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Metric"}, "annotations": {"bold": True}}], + [{"type": "text", "text": {"content": "Count"}, "annotations": {"bold": True}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Total Pages"}}], + [{"type": "text", "text": {"content": str(result.total_pages)}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Successfully Analyzed"}}], + [{"type": "text", "text": {"content": str(result.successful_pages)}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Pages with Issues"}}], + [{"type": "text", "text": {"content": str(pages_with_issues)}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Total Issues"}}], + [{"type": "text", "text": {"content": str(total_issues)}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Total Warnings"}}], + [{"type": "text", "text": {"content": str(total_warnings)}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Pages without Schema"}}], + [{"type": "text", "text": {"content": str(pages_without_schema)}}], + ]}}, + {"type": "table_row", "table_row": {"cells": [ + [{"type": "text", "text": {"content": "Pages without Description"}}], + [{"type": "text", "text": {"content": str(pages_without_description)}}], + ]}}, + ] + + children.append({ + "object": "block", + "type": "table", + "table": { + "table_width": 2, + "has_column_header": True, + "has_row_header": False, + "children": stats_rows + } + }) + + # Pages list + children.append({ + "object": "block", + "type": "heading_2", + "heading_2": {"rich_text": [{"type": "text", "text": {"content": "Analyzed Pages"}}]} + }) + + children.append({ + "object": "block", + "type": "paragraph", + "paragraph": {"rich_text": [ + {"type": "text", "text": {"content": f"Filter by Audit ID in the database to see all {result.successful_pages} page entries."}} + ]} + }) + + # Create the summary page + response = self.notion.pages.create( + parent={"database_id": self.database_id}, + properties={ + "Issue": {"title": [{"text": {"content": f"📊 Sitemap Crawl: {site_domain}"}}]}, + "Category": {"select": {"name": "Technical SEO"}}, + "Priority": {"select": {"name": "High"}}, + "Site": {"url": result.site}, + "Audit ID": {"rich_text": [{"text": {"content": result.audit_id}}]}, + "Found Date": {"date": {"start": datetime.now().strftime("%Y-%m-%d")}}, + }, + children=children, + ) + + logger.info(f"Created crawl summary page: {response['id']}") + return response["id"] + + except Exception as e: + logger.error(f"Failed to create summary page: {e}") + return None + + +def print_progress_status(progress: CrawlProgress) -> None: + """Print formatted progress status.""" + status_emoji = { + "running": "🔄", + "completed": "✅", + "failed": "❌", + }.get(progress.status, "❓") + + print(f""" +{'=' * 60} +{status_emoji} SEO Page Analysis - {progress.status.upper()} +{'=' * 60} +Audit ID: {progress.audit_id} +Site: {progress.site} +Status: {progress.status} + +Progress: {progress.processed_urls}/{progress.total_urls} pages ({progress.get_progress_percent():.1f}%) +Successful: {progress.successful_urls} +Failed: {progress.failed_urls} +Elapsed: {progress.get_elapsed_time()} +ETA: {progress.get_eta() if progress.status == 'running' else 'N/A'} + +Current URL: {progress.current_url[:60] + '...' if len(progress.current_url) > 60 else progress.current_url} +""") + + if progress.summary_page_id: + print(f"Summary: https://www.notion.so/{progress.summary_page_id.replace('-', '')}") + + if progress.error_message: + print(f"Error: {progress.error_message}") + + print("=" * 60) + + +def main(): + """CLI entry point.""" + import argparse + + parser = argparse.ArgumentParser(description="Sitemap Crawler with Background Support") + subparsers = parser.add_subparsers(dest="command", help="Commands") + + # Crawl command + crawl_parser = subparsers.add_parser("crawl", help="Start crawling a sitemap") + crawl_parser.add_argument("sitemap_url", help="URL of the sitemap to crawl") + crawl_parser.add_argument("--delay", "-d", type=float, default=DEFAULT_DELAY_SECONDS, + help=f"Delay between requests in seconds (default: {DEFAULT_DELAY_SECONDS})") + crawl_parser.add_argument("--max-pages", "-m", type=int, default=DEFAULT_MAX_PAGES, + help=f"Maximum pages to process (default: {DEFAULT_MAX_PAGES})") + crawl_parser.add_argument("--no-notion", action="store_true", + help="Don't save to Notion") + crawl_parser.add_argument("--no-limit", action="store_true", + help="Remove page limit (use with caution)") + + # Status command + status_parser = subparsers.add_parser("status", help="Check crawl progress") + status_parser.add_argument("audit_id", nargs="?", help="Specific audit ID to check (optional)") + status_parser.add_argument("--all", "-a", action="store_true", help="Show all crawls (not just active)") + + # List command + list_parser = subparsers.add_parser("list", help="List all crawl jobs") + + args = parser.parse_args() + + # Default to crawl if no command specified but URL provided + if args.command is None: + # Check if first positional arg looks like a URL + import sys + if len(sys.argv) > 1 and (sys.argv[1].startswith("http") or sys.argv[1].endswith(".xml")): + args.command = "crawl" + args.sitemap_url = sys.argv[1] + args.delay = DEFAULT_DELAY_SECONDS + args.max_pages = DEFAULT_MAX_PAGES + args.no_notion = False + args.no_limit = False + else: + parser.print_help() + return + + if args.command == "status": + if args.audit_id: + # Show specific crawl status + progress = get_crawl_status(args.audit_id) + if progress: + print_progress_status(progress) + else: + print(f"No crawl found with audit ID: {args.audit_id}") + else: + # Show active crawls + if args.all: + crawls = get_all_crawls() + label = "All" + else: + crawls = get_active_crawls() + label = "Active" + + if crawls: + print(f"\n{label} Crawl Jobs ({len(crawls)}):") + print("-" * 60) + for p in crawls: + status_emoji = {"running": "🔄", "completed": "✅", "failed": "❌"}.get(p.status, "❓") + print(f"{status_emoji} {p.audit_id}") + print(f" Site: {p.site}") + print(f" Progress: {p.processed_urls}/{p.total_urls} ({p.get_progress_percent():.1f}%)") + print() + else: + print(f"No {label.lower()} crawl jobs found.") + return + + if args.command == "list": + crawls = get_all_crawls() + if crawls: + print(f"\nAll Crawl Jobs ({len(crawls)}):") + print("-" * 80) + print(f"{'Status':<10} {'Audit ID':<45} {'Progress':<15}") + print("-" * 80) + for p in crawls[:20]: # Show last 20 + status_emoji = {"running": "🔄", "completed": "✅", "failed": "❌"}.get(p.status, "❓") + progress_str = f"{p.processed_urls}/{p.total_urls}" + print(f"{status_emoji} {p.status:<7} {p.audit_id:<45} {progress_str:<15}") + if len(crawls) > 20: + print(f"... and {len(crawls) - 20} more") + else: + print("No crawl jobs found.") + return + + if args.command == "crawl": + # Handle --no-limit option + max_pages = args.max_pages + if args.no_limit: + max_pages = 999999 # Effectively unlimited + print("⚠️ WARNING: Page limit disabled. This may take a very long time!") + + def progress_callback(progress: CrawlProgress): + pct = progress.get_progress_percent() + print(f"\r[{pct:5.1f}%] {progress.processed_urls}/{progress.total_urls} pages | " + f"Success: {progress.successful_urls} | Failed: {progress.failed_urls} | " + f"ETA: {progress.get_eta()}", end="", flush=True) + + crawler = SitemapCrawler() + result = crawler.crawl_sitemap( + args.sitemap_url, + delay=args.delay, + max_pages=max_pages, + progress_callback=progress_callback, + save_to_notion=not args.no_notion, + ) + + print() # New line after progress + print() + print("=" * 60) + print("CRAWL COMPLETE") + print("=" * 60) + print(f"Audit ID: {result.audit_id}") + print(f"Total Pages: {result.total_pages}") + print(f"Successful: {result.successful_pages}") + print(f"Failed: {result.failed_pages}") + print(f"Duration: {result.get_duration()}") + if result.summary_page_id: + print(f"Summary Page: https://www.notion.so/{result.summary_page_id.replace('-', '')}") + + +if __name__ == "__main__": + main() diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/sitemap_validator.py b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/sitemap_validator.py new file mode 100644 index 0000000..c645fd3 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/scripts/sitemap_validator.py @@ -0,0 +1,467 @@ +""" +Sitemap Validator - Validate XML sitemaps +========================================== +Purpose: Parse and validate XML sitemaps for SEO compliance +Python: 3.10+ +Usage: + python sitemap_validator.py --url https://example.com/sitemap.xml +""" + +import argparse +import asyncio +import gzip +import json +import logging +import re +from dataclasses import dataclass, field +from datetime import datetime +from io import BytesIO +from typing import Any +from urllib.parse import urljoin, urlparse + +import aiohttp +import requests +from lxml import etree + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger(__name__) + + +@dataclass +class SitemapIssue: + """Represents a sitemap validation issue.""" + + severity: str # "error", "warning", "info" + message: str + url: str | None = None + suggestion: str | None = None + + +@dataclass +class SitemapEntry: + """Represents a single URL entry in sitemap.""" + + loc: str + lastmod: str | None = None + changefreq: str | None = None + priority: float | None = None + status_code: int | None = None + + +@dataclass +class SitemapResult: + """Complete sitemap validation result.""" + + url: str + sitemap_type: str # "urlset" or "sitemapindex" + entries: list[SitemapEntry] = field(default_factory=list) + child_sitemaps: list[str] = field(default_factory=list) + issues: list[SitemapIssue] = field(default_factory=list) + valid: bool = True + stats: dict = field(default_factory=dict) + timestamp: str = field(default_factory=lambda: datetime.now().isoformat()) + + def to_dict(self) -> dict: + """Convert to dictionary for JSON output.""" + return { + "url": self.url, + "sitemap_type": self.sitemap_type, + "valid": self.valid, + "stats": self.stats, + "issues": [ + { + "severity": i.severity, + "message": i.message, + "url": i.url, + "suggestion": i.suggestion, + } + for i in self.issues + ], + "entries_count": len(self.entries), + "child_sitemaps": self.child_sitemaps, + "timestamp": self.timestamp, + } + + +class SitemapValidator: + """Validate XML sitemaps.""" + + SITEMAP_NS = "http://www.sitemaps.org/schemas/sitemap/0.9" + MAX_URLS = 50000 + MAX_SIZE_BYTES = 50 * 1024 * 1024 # 50MB + + VALID_CHANGEFREQ = { + "always", "hourly", "daily", "weekly", + "monthly", "yearly", "never" + } + + def __init__(self, check_urls: bool = False, max_concurrent: int = 10): + self.check_urls = check_urls + self.max_concurrent = max_concurrent + self.session = requests.Session() + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (compatible; SEOAuditBot/1.0)" + }) + + def fetch_sitemap(self, url: str) -> tuple[bytes, bool]: + """Fetch sitemap content, handling gzip compression.""" + try: + response = self.session.get(url, timeout=30) + response.raise_for_status() + + content = response.content + is_gzipped = False + + # Check if gzipped + if url.endswith(".gz") or response.headers.get( + "Content-Encoding" + ) == "gzip": + try: + content = gzip.decompress(content) + is_gzipped = True + except gzip.BadGzipFile: + pass + + return content, is_gzipped + except requests.RequestException as e: + raise RuntimeError(f"Failed to fetch sitemap: {e}") + + def parse_sitemap(self, content: bytes) -> tuple[str, list[dict]]: + """Parse sitemap XML content.""" + try: + root = etree.fromstring(content) + except etree.XMLSyntaxError as e: + raise ValueError(f"Invalid XML: {e}") + + # Remove namespace for easier parsing + nsmap = {"sm": self.SITEMAP_NS} + + # Check if it's a sitemap index or urlset + if root.tag == f"{{{self.SITEMAP_NS}}}sitemapindex": + sitemap_type = "sitemapindex" + entries = [] + for sitemap in root.findall("sm:sitemap", nsmap): + entry = {} + loc = sitemap.find("sm:loc", nsmap) + if loc is not None and loc.text: + entry["loc"] = loc.text.strip() + lastmod = sitemap.find("sm:lastmod", nsmap) + if lastmod is not None and lastmod.text: + entry["lastmod"] = lastmod.text.strip() + if entry.get("loc"): + entries.append(entry) + elif root.tag == f"{{{self.SITEMAP_NS}}}urlset": + sitemap_type = "urlset" + entries = [] + for url in root.findall("sm:url", nsmap): + entry = {} + loc = url.find("sm:loc", nsmap) + if loc is not None and loc.text: + entry["loc"] = loc.text.strip() + lastmod = url.find("sm:lastmod", nsmap) + if lastmod is not None and lastmod.text: + entry["lastmod"] = lastmod.text.strip() + changefreq = url.find("sm:changefreq", nsmap) + if changefreq is not None and changefreq.text: + entry["changefreq"] = changefreq.text.strip().lower() + priority = url.find("sm:priority", nsmap) + if priority is not None and priority.text: + try: + entry["priority"] = float(priority.text.strip()) + except ValueError: + entry["priority"] = None + if entry.get("loc"): + entries.append(entry) + else: + raise ValueError(f"Unknown sitemap type: {root.tag}") + + return sitemap_type, entries + + def validate(self, url: str) -> SitemapResult: + """Validate a sitemap URL.""" + result = SitemapResult(url=url, sitemap_type="unknown") + + # Fetch sitemap + try: + content, is_gzipped = self.fetch_sitemap(url) + except RuntimeError as e: + result.issues.append(SitemapIssue( + severity="error", + message=str(e), + url=url, + )) + result.valid = False + return result + + # Check size + if len(content) > self.MAX_SIZE_BYTES: + result.issues.append(SitemapIssue( + severity="error", + message=f"Sitemap exceeds 50MB limit ({len(content) / 1024 / 1024:.2f}MB)", + url=url, + suggestion="Split sitemap into smaller files using sitemap index", + )) + + # Parse XML + try: + sitemap_type, entries = self.parse_sitemap(content) + except ValueError as e: + result.issues.append(SitemapIssue( + severity="error", + message=str(e), + url=url, + )) + result.valid = False + return result + + result.sitemap_type = sitemap_type + + # Process entries + if sitemap_type == "sitemapindex": + result.child_sitemaps = [e["loc"] for e in entries] + result.stats = { + "child_sitemaps_count": len(entries), + } + else: + # Validate URL entries + url_count = len(entries) + result.stats["url_count"] = url_count + + if url_count > self.MAX_URLS: + result.issues.append(SitemapIssue( + severity="error", + message=f"Sitemap exceeds 50,000 URL limit ({url_count} URLs)", + url=url, + suggestion="Split into multiple sitemaps with sitemap index", + )) + + if url_count == 0: + result.issues.append(SitemapIssue( + severity="warning", + message="Sitemap is empty (no URLs)", + url=url, + )) + + # Validate individual entries + seen_urls = set() + invalid_lastmod = 0 + invalid_changefreq = 0 + invalid_priority = 0 + + for entry in entries: + loc = entry.get("loc", "") + + # Check for duplicates + if loc in seen_urls: + result.issues.append(SitemapIssue( + severity="warning", + message="Duplicate URL in sitemap", + url=loc, + )) + seen_urls.add(loc) + + # Validate lastmod format + lastmod = entry.get("lastmod") + if lastmod: + if not self._validate_date(lastmod): + invalid_lastmod += 1 + + # Validate changefreq + changefreq = entry.get("changefreq") + if changefreq and changefreq not in self.VALID_CHANGEFREQ: + invalid_changefreq += 1 + + # Validate priority + priority = entry.get("priority") + if priority is not None: + if not (0.0 <= priority <= 1.0): + invalid_priority += 1 + + # Create entry object + result.entries.append(SitemapEntry( + loc=loc, + lastmod=lastmod, + changefreq=changefreq, + priority=priority, + )) + + # Add summary issues + if invalid_lastmod > 0: + result.issues.append(SitemapIssue( + severity="warning", + message=f"{invalid_lastmod} URLs with invalid lastmod format", + suggestion="Use ISO 8601 format (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS+TZ)", + )) + + if invalid_changefreq > 0: + result.issues.append(SitemapIssue( + severity="info", + message=f"{invalid_changefreq} URLs with invalid changefreq", + suggestion="Use: always, hourly, daily, weekly, monthly, yearly, never", + )) + + if invalid_priority > 0: + result.issues.append(SitemapIssue( + severity="warning", + message=f"{invalid_priority} URLs with invalid priority (must be 0.0-1.0)", + )) + + result.stats.update({ + "invalid_lastmod": invalid_lastmod, + "invalid_changefreq": invalid_changefreq, + "invalid_priority": invalid_priority, + "has_lastmod": sum(1 for e in result.entries if e.lastmod), + "has_changefreq": sum(1 for e in result.entries if e.changefreq), + "has_priority": sum(1 for e in result.entries if e.priority is not None), + }) + + # Check URLs if requested + if self.check_urls and result.entries: + asyncio.run(self._check_url_status(result)) + + # Determine validity + result.valid = not any(i.severity == "error" for i in result.issues) + + return result + + def _validate_date(self, date_str: str) -> bool: + """Validate ISO 8601 date format.""" + patterns = [ + r"^\d{4}-\d{2}-\d{2}$", + r"^\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}", + ] + return any(re.match(p, date_str) for p in patterns) + + async def _check_url_status(self, result: SitemapResult) -> None: + """Check HTTP status of URLs in sitemap.""" + semaphore = asyncio.Semaphore(self.max_concurrent) + + async def check_url(entry: SitemapEntry) -> None: + async with semaphore: + try: + async with aiohttp.ClientSession() as session: + async with session.head( + entry.loc, + timeout=aiohttp.ClientTimeout(total=10), + allow_redirects=True, + ) as response: + entry.status_code = response.status + except Exception: + entry.status_code = 0 + + await asyncio.gather(*[check_url(e) for e in result.entries[:100]]) + + # Count status codes + status_counts = {} + for entry in result.entries: + if entry.status_code: + status_counts[entry.status_code] = ( + status_counts.get(entry.status_code, 0) + 1 + ) + + result.stats["url_status_codes"] = status_counts + + # Add issues for non-200 URLs + error_count = sum( + 1 for e in result.entries + if e.status_code and e.status_code >= 400 + ) + if error_count > 0: + result.issues.append(SitemapIssue( + severity="warning", + message=f"{error_count} URLs returning error status codes (4xx/5xx)", + suggestion="Remove or fix broken URLs in sitemap", + )) + + def generate_report(self, result: SitemapResult) -> str: + """Generate human-readable validation report.""" + lines = [ + "=" * 60, + "Sitemap Validation Report", + "=" * 60, + f"URL: {result.url}", + f"Type: {result.sitemap_type}", + f"Valid: {'Yes' if result.valid else 'No'}", + f"Timestamp: {result.timestamp}", + "", + ] + + lines.append("Statistics:") + for key, value in result.stats.items(): + lines.append(f" {key}: {value}") + lines.append("") + + if result.child_sitemaps: + lines.append(f"Child Sitemaps ({len(result.child_sitemaps)}):") + for sitemap in result.child_sitemaps[:10]: + lines.append(f" - {sitemap}") + if len(result.child_sitemaps) > 10: + lines.append(f" ... and {len(result.child_sitemaps) - 10} more") + lines.append("") + + if result.issues: + lines.append("Issues Found:") + errors = [i for i in result.issues if i.severity == "error"] + warnings = [i for i in result.issues if i.severity == "warning"] + infos = [i for i in result.issues if i.severity == "info"] + + if errors: + lines.append(f"\n ERRORS ({len(errors)}):") + for issue in errors: + lines.append(f" - {issue.message}") + if issue.url: + lines.append(f" URL: {issue.url}") + if issue.suggestion: + lines.append(f" Suggestion: {issue.suggestion}") + + if warnings: + lines.append(f"\n WARNINGS ({len(warnings)}):") + for issue in warnings: + lines.append(f" - {issue.message}") + if issue.suggestion: + lines.append(f" Suggestion: {issue.suggestion}") + + if infos: + lines.append(f"\n INFO ({len(infos)}):") + for issue in infos: + lines.append(f" - {issue.message}") + + lines.append("") + lines.append("=" * 60) + + return "\n".join(lines) + + +def main(): + """Main entry point for CLI usage.""" + parser = argparse.ArgumentParser( + description="Validate XML sitemaps", + ) + parser.add_argument("--url", "-u", required=True, help="Sitemap URL to validate") + parser.add_argument("--check-urls", action="store_true", + help="Check HTTP status of URLs (slower)") + parser.add_argument("--output", "-o", help="Output file for JSON report") + parser.add_argument("--json", action="store_true", help="Output as JSON") + + args = parser.parse_args() + + validator = SitemapValidator(check_urls=args.check_urls) + result = validator.validate(args.url) + + if args.json or args.output: + output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2) + if args.output: + with open(args.output, "w", encoding="utf-8") as f: + f.write(output) + logger.info(f"Report written to {args.output}") + else: + print(output) + else: + print(validator.generate_report(result)) + + +if __name__ == "__main__": + main() diff --git a/ourdigital-custom-skills/12-ourdigital-seo-audit/templates/notion_database_schema.json b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/notion_database_schema.json similarity index 100% rename from ourdigital-custom-skills/12-ourdigital-seo-audit/templates/notion_database_schema.json rename to ourdigital-custom-skills/_archive/seo-audit-agent/templates/notion_database_schema.json diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/article.json b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/article.json new file mode 100644 index 0000000..8d0daab --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/article.json @@ -0,0 +1,32 @@ +{ + "@context": "https://schema.org", + "@type": "{{article_type}}", + "headline": "{{headline}}", + "description": "{{description}}", + "image": [ + "{{image_url_1}}", + "{{image_url_2}}" + ], + "datePublished": "{{date_published}}", + "dateModified": "{{date_modified}}", + "author": { + "@type": "Person", + "name": "{{author_name}}", + "url": "{{author_url}}" + }, + "publisher": { + "@type": "Organization", + "name": "{{publisher_name}}", + "logo": { + "@type": "ImageObject", + "url": "{{publisher_logo_url}}" + } + }, + "mainEntityOfPage": { + "@type": "WebPage", + "@id": "{{page_url}}" + }, + "articleSection": "{{section}}", + "wordCount": "{{word_count}}", + "keywords": "{{keywords}}" +} diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/breadcrumb.json b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/breadcrumb.json new file mode 100644 index 0000000..1e8f9e8 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/breadcrumb.json @@ -0,0 +1,24 @@ +{ + "@context": "https://schema.org", + "@type": "BreadcrumbList", + "itemListElement": [ + { + "@type": "ListItem", + "position": 1, + "name": "{{level_1_name}}", + "item": "{{level_1_url}}" + }, + { + "@type": "ListItem", + "position": 2, + "name": "{{level_2_name}}", + "item": "{{level_2_url}}" + }, + { + "@type": "ListItem", + "position": 3, + "name": "{{level_3_name}}", + "item": "{{level_3_url}}" + } + ] +} diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/faq.json b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/faq.json new file mode 100644 index 0000000..f90b98c --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/faq.json @@ -0,0 +1,30 @@ +{ + "@context": "https://schema.org", + "@type": "FAQPage", + "mainEntity": [ + { + "@type": "Question", + "name": "{{question_1}}", + "acceptedAnswer": { + "@type": "Answer", + "text": "{{answer_1}}" + } + }, + { + "@type": "Question", + "name": "{{question_2}}", + "acceptedAnswer": { + "@type": "Answer", + "text": "{{answer_2}}" + } + }, + { + "@type": "Question", + "name": "{{question_3}}", + "acceptedAnswer": { + "@type": "Answer", + "text": "{{answer_3}}" + } + } + ] +} diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/local_business.json b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/local_business.json new file mode 100644 index 0000000..8e6dba4 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/local_business.json @@ -0,0 +1,47 @@ +{ + "@context": "https://schema.org", + "@type": "{{business_type}}", + "name": "{{name}}", + "description": "{{description}}", + "url": "{{url}}", + "telephone": "{{phone}}", + "email": "{{email}}", + "image": "{{image_url}}", + "priceRange": "{{price_range}}", + "address": { + "@type": "PostalAddress", + "streetAddress": "{{street_address}}", + "addressLocality": "{{city}}", + "addressRegion": "{{region}}", + "postalCode": "{{postal_code}}", + "addressCountry": "{{country}}" + }, + "geo": { + "@type": "GeoCoordinates", + "latitude": "{{latitude}}", + "longitude": "{{longitude}}" + }, + "openingHoursSpecification": [ + { + "@type": "OpeningHoursSpecification", + "dayOfWeek": ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"], + "opens": "{{weekday_opens}}", + "closes": "{{weekday_closes}}" + }, + { + "@type": "OpeningHoursSpecification", + "dayOfWeek": ["Saturday", "Sunday"], + "opens": "{{weekend_opens}}", + "closes": "{{weekend_closes}}" + } + ], + "aggregateRating": { + "@type": "AggregateRating", + "ratingValue": "{{rating}}", + "reviewCount": "{{review_count}}" + }, + "sameAs": [ + "{{facebook_url}}", + "{{instagram_url}}" + ] +} diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/organization.json b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/organization.json new file mode 100644 index 0000000..76c4734 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/organization.json @@ -0,0 +1,37 @@ +{ + "@context": "https://schema.org", + "@type": "Organization", + "name": "{{name}}", + "url": "{{url}}", + "logo": "{{logo_url}}", + "description": "{{description}}", + "foundingDate": "{{founding_date}}", + "founders": [ + { + "@type": "Person", + "name": "{{founder_name}}" + } + ], + "address": { + "@type": "PostalAddress", + "streetAddress": "{{street_address}}", + "addressLocality": "{{city}}", + "addressRegion": "{{region}}", + "postalCode": "{{postal_code}}", + "addressCountry": "{{country}}" + }, + "contactPoint": [ + { + "@type": "ContactPoint", + "telephone": "{{phone}}", + "contactType": "customer service", + "availableLanguage": ["Korean", "English"] + } + ], + "sameAs": [ + "{{facebook_url}}", + "{{twitter_url}}", + "{{linkedin_url}}", + "{{instagram_url}}" + ] +} diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/product.json b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/product.json new file mode 100644 index 0000000..d24af66 --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/product.json @@ -0,0 +1,76 @@ +{ + "@context": "https://schema.org", + "@type": "Product", + "name": "{{name}}", + "description": "{{description}}", + "image": [ + "{{image_url_1}}", + "{{image_url_2}}", + "{{image_url_3}}" + ], + "sku": "{{sku}}", + "mpn": "{{mpn}}", + "gtin13": "{{gtin13}}", + "brand": { + "@type": "Brand", + "name": "{{brand_name}}" + }, + "offers": { + "@type": "Offer", + "url": "{{product_url}}", + "price": "{{price}}", + "priceCurrency": "{{currency}}", + "priceValidUntil": "{{price_valid_until}}", + "availability": "https://schema.org/{{availability}}", + "itemCondition": "https://schema.org/{{condition}}", + "seller": { + "@type": "Organization", + "name": "{{seller_name}}" + }, + "shippingDetails": { + "@type": "OfferShippingDetails", + "shippingRate": { + "@type": "MonetaryAmount", + "value": "{{shipping_cost}}", + "currency": "{{currency}}" + }, + "deliveryTime": { + "@type": "ShippingDeliveryTime", + "handlingTime": { + "@type": "QuantitativeValue", + "minValue": "{{handling_min_days}}", + "maxValue": "{{handling_max_days}}", + "unitCode": "DAY" + }, + "transitTime": { + "@type": "QuantitativeValue", + "minValue": "{{transit_min_days}}", + "maxValue": "{{transit_max_days}}", + "unitCode": "DAY" + } + } + } + }, + "aggregateRating": { + "@type": "AggregateRating", + "ratingValue": "{{rating}}", + "reviewCount": "{{review_count}}", + "bestRating": "5", + "worstRating": "1" + }, + "review": [ + { + "@type": "Review", + "reviewRating": { + "@type": "Rating", + "ratingValue": "{{review_rating}}", + "bestRating": "5" + }, + "author": { + "@type": "Person", + "name": "{{reviewer_name}}" + }, + "reviewBody": "{{review_text}}" + } + ] +} diff --git a/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/website.json b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/website.json new file mode 100644 index 0000000..65ec61a --- /dev/null +++ b/ourdigital-custom-skills/_archive/seo-audit-agent/templates/schema_templates/website.json @@ -0,0 +1,25 @@ +{ + "@context": "https://schema.org", + "@type": "WebSite", + "name": "{{site_name}}", + "alternateName": "{{alternate_name}}", + "url": "{{url}}", + "description": "{{description}}", + "inLanguage": "{{language}}", + "potentialAction": { + "@type": "SearchAction", + "target": { + "@type": "EntryPoint", + "urlTemplate": "{{search_url_template}}" + }, + "query-input": "required name=search_term_string" + }, + "publisher": { + "@type": "Organization", + "name": "{{publisher_name}}", + "logo": { + "@type": "ImageObject", + "url": "{{logo_url}}" + } + } +}