Compare commits
10 Commits
159f7ec3f7
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 338176abbe | |||
| 72a6be6a74 | |||
| 9ba0748bf2 | |||
|
|
2aa9d098cb | ||
|
|
397fa2aa5d | ||
| 59e5c519f5 | |||
|
|
a28bfbf847 | ||
| d2d0a2d460 | |||
| dbfaa883cd | |||
| a3ff965b87 |
@@ -20,11 +20,11 @@ Lightweight Google Tag Manager audit tool.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Audit GTM container
|
# Audit GTM container
|
||||||
python custom-skills/20-gtm-audit/code/scripts/gtm_audit.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/20-gtm-audit/code/scripts/gtm_audit.py \
|
||||||
--url https://example.com
|
--url https://example.com
|
||||||
|
|
||||||
# With detailed dataLayer check
|
# With detailed dataLayer check
|
||||||
python custom-skills/20-gtm-audit/code/scripts/gtm_audit.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/20-gtm-audit/code/scripts/gtm_audit.py \
|
||||||
--url https://example.com --check-datalayer --output report.json
|
--url https://example.com --check-datalayer --output report.json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -20,15 +20,15 @@ Full GTM management with dataLayer injection and tag generation.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Full GTM management
|
# Full GTM management
|
||||||
python custom-skills/21-gtm-manager/code/scripts/gtm_manager.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/21-gtm-manager/code/scripts/gtm_manager.py \
|
||||||
--url https://example.com --full-audit
|
--url https://example.com --full-audit
|
||||||
|
|
||||||
# Generate dataLayer tag
|
# Generate dataLayer tag
|
||||||
python custom-skills/21-gtm-manager/code/scripts/gtm_manager.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/21-gtm-manager/code/scripts/gtm_manager.py \
|
||||||
--generate-tag purchase --output purchase_tag.html
|
--generate-tag purchase --output purchase_tag.html
|
||||||
|
|
||||||
# Export to Notion
|
# Export to Notion
|
||||||
python custom-skills/21-gtm-manager/code/scripts/gtm_manager.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/21-gtm-manager/code/scripts/gtm_manager.py \
|
||||||
--url https://example.com --notion-export --database DATABASE_ID
|
--url https://example.com --notion-export --database DATABASE_ID
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -21,15 +21,15 @@ Jamie Clinic content **generation** toolkit.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Check content compliance
|
# Check content compliance
|
||||||
python custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py \
|
||||||
--input draft.md
|
--input draft.md
|
||||||
|
|
||||||
# With detailed report
|
# With detailed report
|
||||||
python custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py \
|
||||||
--input draft.md --verbose --output report.json
|
--input draft.md --verbose --output report.json
|
||||||
|
|
||||||
# Batch check
|
# Batch check
|
||||||
python custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/40-jamie-brand-editor/code/scripts/compliance_checker.py \
|
||||||
--dir ./drafts --output compliance_report.json
|
--dir ./drafts --output compliance_report.json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
71
.claude/commands/multi-agent-guide.md
Normal file
71
.claude/commands/multi-agent-guide.md
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
---
|
||||||
|
description: Set up multi-agent collaboration framework (Claude, Gemini, Codex) with guardrails
|
||||||
|
argument-hint: [--quick] [--full]
|
||||||
|
---
|
||||||
|
|
||||||
|
# Multi-Agent Guide
|
||||||
|
|
||||||
|
Set up multi-agent collaboration framework for projects where multiple AI agents work together.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "set up multi-agent", "agent guardrails", "multi-agent collaboration"
|
||||||
|
|
||||||
|
## Quick Setup (Recommended)
|
||||||
|
|
||||||
|
Rapid deployment with minimal questions:
|
||||||
|
|
||||||
|
1. Assess project structure
|
||||||
|
2. Ask which agents participate (Claude/Gemini/Codex/Human)
|
||||||
|
3. Create framework files
|
||||||
|
4. Customize ownership matrix
|
||||||
|
|
||||||
|
## Files Created
|
||||||
|
|
||||||
|
```
|
||||||
|
your-project/
|
||||||
|
├── .agent-state/
|
||||||
|
│ ├── tasks.yaml # Task registry
|
||||||
|
│ └── locks.yaml # Lock registry
|
||||||
|
├── tools/
|
||||||
|
│ └── check-ownership.py # Ownership verification
|
||||||
|
├── MULTI_AGENT_FRAMEWORK.md # Consolidated rules
|
||||||
|
├── GEMINI.md # Sub-agent directive (if selected)
|
||||||
|
└── CODEX.md # Sub-agent directive (if selected)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Agent Hierarchy
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────┐
|
||||||
|
│ Claude Code │
|
||||||
|
│ (Lead Agent) │
|
||||||
|
└────────┬────────┘
|
||||||
|
│
|
||||||
|
┌──────────────┼──────────────┐
|
||||||
|
v v v
|
||||||
|
┌──────────┐ ┌──────────┐ ┌──────────┐
|
||||||
|
│ Gemini │ │ Codex │ │ Human │
|
||||||
|
│(Research)│ │ (Speed) │ │ (Review) │
|
||||||
|
└──────────┘ └──────────┘ └──────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
## Commit Message Format
|
||||||
|
|
||||||
|
```
|
||||||
|
[Agent] type(scope): description
|
||||||
|
|
||||||
|
Examples:
|
||||||
|
[Claude] feat(core): implement new feature
|
||||||
|
[Gemini] docs(api): update API documentation
|
||||||
|
[Codex] test(models): add unit tests
|
||||||
|
```
|
||||||
|
|
||||||
|
## Post-Setup
|
||||||
|
|
||||||
|
1. Set agent identity: `export AGENT_AUTHOR=claude`
|
||||||
|
2. Review ownership matrix in `MULTI_AGENT_FRAMEWORK.md`
|
||||||
|
3. Install pre-commit hooks: `pre-commit install` (optional)
|
||||||
|
|
||||||
|
## Source
|
||||||
|
Full details: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/91-multi-agent-guide/README.md`
|
||||||
|
Related commands: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/91-multi-agent-guide/commands/`
|
||||||
62
.claude/commands/notebooklm-agent.md
Normal file
62
.claude/commands/notebooklm-agent.md
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
---
|
||||||
|
description: Q&A agent using NotebookLM's Gemini-powered analysis with source citations
|
||||||
|
---
|
||||||
|
|
||||||
|
# NotebookLM Agent
|
||||||
|
|
||||||
|
Q&A agent that answers questions using NotebookLM's Gemini-powered analysis with source citations.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install notebooklm-py
|
||||||
|
playwright install chromium
|
||||||
|
notebooklm login # One-time auth
|
||||||
|
```
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# List notebooks
|
||||||
|
notebooklm list
|
||||||
|
|
||||||
|
# Set context
|
||||||
|
notebooklm use <notebook_id>
|
||||||
|
|
||||||
|
# Ask questions
|
||||||
|
notebooklm ask "What are the key findings?"
|
||||||
|
notebooklm ask "Elaborate on point 2" # continues conversation
|
||||||
|
notebooklm ask "New topic" --new # new conversation
|
||||||
|
|
||||||
|
# With citations (JSON output)
|
||||||
|
notebooklm ask "Summarize" --json
|
||||||
|
|
||||||
|
# Query specific sources
|
||||||
|
notebooklm ask "Compare" -s source1 -s source2
|
||||||
|
```
|
||||||
|
|
||||||
|
## Autonomy
|
||||||
|
|
||||||
|
**Auto-run:** `list`, `status`, `source list`, `ask`
|
||||||
|
**Ask first:** `delete`, `source add`
|
||||||
|
|
||||||
|
## JSON Output Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"answer": "Response with [1] [2] citations",
|
||||||
|
"references": [
|
||||||
|
{"source_id": "...", "citation_number": 1, "cited_text": "..."}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Error Recovery
|
||||||
|
|
||||||
|
| Error | Fix |
|
||||||
|
|-------|-----|
|
||||||
|
| No context | `notebooklm use <id>` |
|
||||||
|
| Auth error | `notebooklm login` |
|
||||||
|
|
||||||
|
## Source
|
||||||
|
Full details: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/50-notebooklm-agent/code/CLAUDE.md`
|
||||||
57
.claude/commands/notebooklm-automation.md
Normal file
57
.claude/commands/notebooklm-automation.md
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
---
|
||||||
|
description: Programmatic control over NotebookLM notebooks, sources, and artifacts
|
||||||
|
---
|
||||||
|
|
||||||
|
# NotebookLM Automation
|
||||||
|
|
||||||
|
Complete programmatic control over NotebookLM notebooks, sources, and artifacts.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install notebooklm-py
|
||||||
|
playwright install chromium
|
||||||
|
notebooklm login
|
||||||
|
```
|
||||||
|
|
||||||
|
## Commands
|
||||||
|
|
||||||
|
### Notebooks
|
||||||
|
```bash
|
||||||
|
notebooklm list [--json]
|
||||||
|
notebooklm create "Title" [--json]
|
||||||
|
notebooklm rename <id> "New Name"
|
||||||
|
notebooklm delete <id>
|
||||||
|
notebooklm use <id>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Sources
|
||||||
|
```bash
|
||||||
|
notebooklm source add "https://..." [--json]
|
||||||
|
notebooklm source add ./file.pdf
|
||||||
|
notebooklm source list [--json]
|
||||||
|
notebooklm source delete <id>
|
||||||
|
notebooklm source wait <id>
|
||||||
|
```
|
||||||
|
|
||||||
|
### Artifacts
|
||||||
|
```bash
|
||||||
|
notebooklm artifact list [--json]
|
||||||
|
notebooklm artifact wait <id>
|
||||||
|
notebooklm artifact delete <id>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
| Variable | Purpose |
|
||||||
|
|----------|---------|
|
||||||
|
| `NOTEBOOKLM_HOME` | Custom config dir |
|
||||||
|
| `NOTEBOOKLM_AUTH_JSON` | Inline auth (CI/CD) |
|
||||||
|
|
||||||
|
## Autonomy
|
||||||
|
|
||||||
|
**Auto-run:** `list`, `status`, `create`, `use`, `source add`
|
||||||
|
**Ask first:** `delete`, `rename`
|
||||||
|
|
||||||
|
## Source
|
||||||
|
Full details: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/51-notebooklm-automation/code/CLAUDE.md`
|
||||||
66
.claude/commands/notebooklm-research.md
Normal file
66
.claude/commands/notebooklm-research.md
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
---
|
||||||
|
description: NotebookLM research workflows - web research, Drive search, auto-import, source extraction
|
||||||
|
---
|
||||||
|
|
||||||
|
# NotebookLM Research
|
||||||
|
|
||||||
|
Research workflows: web research, Drive search, auto-import, source extraction.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install notebooklm-py
|
||||||
|
playwright install chromium
|
||||||
|
notebooklm login
|
||||||
|
```
|
||||||
|
|
||||||
|
## Research Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Web research
|
||||||
|
notebooklm source add-research "topic"
|
||||||
|
notebooklm source add-research "topic" --mode deep --import-all
|
||||||
|
notebooklm source add-research "topic" --mode deep --no-wait
|
||||||
|
|
||||||
|
# Drive research
|
||||||
|
notebooklm source add-research "topic" --from drive
|
||||||
|
|
||||||
|
# Status and wait
|
||||||
|
notebooklm research status
|
||||||
|
notebooklm research wait --import-all
|
||||||
|
```
|
||||||
|
|
||||||
|
## Source Extraction
|
||||||
|
|
||||||
|
```bash
|
||||||
|
notebooklm source fulltext <id>
|
||||||
|
notebooklm source guide <id>
|
||||||
|
```
|
||||||
|
|
||||||
|
## Research Modes
|
||||||
|
|
||||||
|
| Mode | Sources | Time |
|
||||||
|
|------|---------|------|
|
||||||
|
| `fast` | 5-10 | seconds |
|
||||||
|
| `deep` | 20+ | 2-5 min |
|
||||||
|
|
||||||
|
## Subagent Pattern
|
||||||
|
|
||||||
|
```python
|
||||||
|
# Non-blocking deep research
|
||||||
|
notebooklm source add-research "topic" --mode deep --no-wait
|
||||||
|
|
||||||
|
# Spawn subagent to wait
|
||||||
|
Task(
|
||||||
|
prompt="Wait for research and import: notebooklm research wait -n {id} --import-all",
|
||||||
|
subagent_type="general-purpose"
|
||||||
|
)
|
||||||
|
```
|
||||||
|
|
||||||
|
## Autonomy
|
||||||
|
|
||||||
|
**Auto-run:** `research status`, `source fulltext`, `source guide`
|
||||||
|
**Ask first:** `source add-research`, `research wait --import-all`
|
||||||
|
|
||||||
|
## Source
|
||||||
|
Full details: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/53-notebooklm-research/code/CLAUDE.md`
|
||||||
73
.claude/commands/notebooklm-studio.md
Normal file
73
.claude/commands/notebooklm-studio.md
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
---
|
||||||
|
description: Generate NotebookLM Studio content - audio, video, quizzes, flashcards, slides, mind maps
|
||||||
|
---
|
||||||
|
|
||||||
|
# NotebookLM Studio
|
||||||
|
|
||||||
|
Generate NotebookLM Studio content: audio, video, quizzes, flashcards, slides, infographics, mind maps.
|
||||||
|
|
||||||
|
## Prerequisites
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install notebooklm-py
|
||||||
|
playwright install chromium
|
||||||
|
notebooklm login
|
||||||
|
```
|
||||||
|
|
||||||
|
## Generate Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Audio
|
||||||
|
notebooklm generate audio "instructions"
|
||||||
|
notebooklm generate audio --format debate --length longer
|
||||||
|
|
||||||
|
# Video
|
||||||
|
notebooklm generate video --style whiteboard
|
||||||
|
|
||||||
|
# Quiz & Flashcards
|
||||||
|
notebooklm generate quiz --difficulty hard
|
||||||
|
notebooklm generate flashcards --quantity more
|
||||||
|
|
||||||
|
# Visual
|
||||||
|
notebooklm generate slide-deck --format detailed
|
||||||
|
notebooklm generate infographic --orientation portrait
|
||||||
|
notebooklm generate mind-map
|
||||||
|
|
||||||
|
# Data
|
||||||
|
notebooklm generate data-table "description"
|
||||||
|
notebooklm generate report --format study_guide
|
||||||
|
```
|
||||||
|
|
||||||
|
## Download Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
notebooklm artifact list # Check status
|
||||||
|
notebooklm download audio ./podcast.mp3
|
||||||
|
notebooklm download video ./video.mp4
|
||||||
|
notebooklm download quiz --format markdown ./quiz.md
|
||||||
|
notebooklm download flashcards --format json ./cards.json
|
||||||
|
notebooklm download slide-deck ./slides.pdf
|
||||||
|
notebooklm download mind-map ./mindmap.json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Styles & Formats
|
||||||
|
|
||||||
|
**Video:** `classic`, `whiteboard`, `kawaii`, `anime`, `pixel`, `watercolor`, `neon`, `paper`, `sketch`
|
||||||
|
**Audio:** `deep-dive`, `brief`, `critique`, `debate`
|
||||||
|
|
||||||
|
## Timing
|
||||||
|
|
||||||
|
| Type | Time |
|
||||||
|
|------|------|
|
||||||
|
| Mind map | Instant |
|
||||||
|
| Quiz | 5-15 min |
|
||||||
|
| Audio | 10-20 min |
|
||||||
|
| Video | 15-45 min |
|
||||||
|
|
||||||
|
## Autonomy
|
||||||
|
|
||||||
|
**Auto-run:** `artifact list`
|
||||||
|
**Ask first:** `generate *`, `download *`
|
||||||
|
|
||||||
|
## Source
|
||||||
|
Full details: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/52-notebooklm-studio/code/CLAUDE.md`
|
||||||
@@ -20,15 +20,15 @@ Notion workspace management agent for organizing, restructuring, and maintaining
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Analyze database schema
|
# Analyze database schema
|
||||||
python custom-skills/01-notion-organizer/code/scripts/schema_migrator.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/01-notion-organizer/code/scripts/schema_migrator.py \
|
||||||
--source-db DATABASE_ID --analyze
|
--source-db DATABASE_ID --analyze
|
||||||
|
|
||||||
# Migrate with mapping
|
# Migrate with mapping
|
||||||
python custom-skills/01-notion-organizer/code/scripts/schema_migrator.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/01-notion-organizer/code/scripts/schema_migrator.py \
|
||||||
--source-db SOURCE_ID --target-db TARGET_ID --mapping mapping.json
|
--source-db SOURCE_ID --target-db TARGET_ID --mapping mapping.json
|
||||||
|
|
||||||
# Async bulk operations
|
# Async bulk operations
|
||||||
python custom-skills/01-notion-organizer/code/scripts/async_organizer.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/01-notion-organizer/code/scripts/async_organizer.py \
|
||||||
--database DATABASE_ID --operation archive --filter "Status=Done"
|
--database DATABASE_ID --operation archive --filter "Status=Done"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
63
.claude/commands/notion-writer.md
Normal file
63
.claude/commands/notion-writer.md
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
---
|
||||||
|
description: Push markdown content to Notion pages or databases
|
||||||
|
---
|
||||||
|
|
||||||
|
# Notion Writer
|
||||||
|
|
||||||
|
Push markdown content to Notion pages or databases via the Notion API.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "write to Notion", "export to Notion", "노션에 쓰기"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
| Feature | Input | Output |
|
||||||
|
|---------|-------|--------|
|
||||||
|
| Page Content Append | Markdown + Page URL | Appended blocks |
|
||||||
|
| Page Content Replace | Markdown + Page URL | Replaced content |
|
||||||
|
| Database Row Create | Markdown + DB URL + Title | New database row |
|
||||||
|
| Connection Test | API token | Connection status |
|
||||||
|
|
||||||
|
## Environment
|
||||||
|
- `NOTION_TOKEN` / `NOTION_API_KEY` - Notion integration token (required)
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
cd ~/Projects/our-claude-skills/custom-skills/32-notion-writer/code/scripts
|
||||||
|
|
||||||
|
# Test connection
|
||||||
|
python notion_writer.py --test
|
||||||
|
|
||||||
|
# Page info
|
||||||
|
python notion_writer.py --page PAGE_URL --info
|
||||||
|
|
||||||
|
# Write to page (append)
|
||||||
|
python notion_writer.py --page PAGE_URL --file content.md
|
||||||
|
|
||||||
|
# Replace page content
|
||||||
|
python notion_writer.py --page PAGE_URL --file content.md --replace
|
||||||
|
|
||||||
|
# Create database row
|
||||||
|
python notion_writer.py --database DB_URL --title "New Entry" --file content.md
|
||||||
|
|
||||||
|
# From stdin
|
||||||
|
cat report.md | python notion_writer.py --page PAGE_URL --stdin
|
||||||
|
```
|
||||||
|
|
||||||
|
## Markdown Support
|
||||||
|
|
||||||
|
Headings, bulleted/numbered lists, to-do items, quotes, code blocks (with language), dividers, paragraphs.
|
||||||
|
|
||||||
|
## API Limits
|
||||||
|
|
||||||
|
| Limit | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| Blocks per request | 100 |
|
||||||
|
| Text per block | 2,000 chars |
|
||||||
|
| Requests/sec | ~3 |
|
||||||
|
|
||||||
|
The script automatically batches large content.
|
||||||
|
|
||||||
|
## Source
|
||||||
|
Full details: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/32-notion-writer/code/CLAUDE.md`
|
||||||
69
.claude/commands/ourdigital-brand-guide.md
Normal file
69
.claude/commands/ourdigital-brand-guide.md
Normal file
@@ -0,0 +1,69 @@
|
|||||||
|
---
|
||||||
|
description: OurDigital brand standards, writing style, and visual identity reference
|
||||||
|
---
|
||||||
|
|
||||||
|
# OurDigital Brand Guide
|
||||||
|
|
||||||
|
Reference skill for OurDigital brand standards, writing style, and visual identity.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "ourdigital brand guide", "our brand guide"
|
||||||
|
- "ourdigital style check", "our style check"
|
||||||
|
|
||||||
|
## Brand Foundation
|
||||||
|
|
||||||
|
| Element | Content |
|
||||||
|
|---------|---------|
|
||||||
|
| **Brand Name** | OurDigital Clinic |
|
||||||
|
| **Tagline** | 우리 디지털 클리닉 \| Your Digital Health Partner |
|
||||||
|
| **Mission** | 디지털 마케팅 클리닉 for SMBs, 자영업자, 프리랜서, 비영리단체 |
|
||||||
|
| **Promise** | 진단-처방-측정 가능한 성장 |
|
||||||
|
|
||||||
|
### Core Values
|
||||||
|
|
||||||
|
| 가치 | English | 클리닉 메타포 |
|
||||||
|
|------|---------|--------------|
|
||||||
|
| 데이터 중심 | Data-driven | 정밀 검사 |
|
||||||
|
| 실행 지향 | In-Action | 실행 가능한 처방 |
|
||||||
|
| 마케팅 과학 | Marketing Science | 근거 중심 의학 |
|
||||||
|
|
||||||
|
## Channel Tone Matrix
|
||||||
|
|
||||||
|
| Channel | Domain | Personality | Tone |
|
||||||
|
|---------|--------|-------------|------|
|
||||||
|
| Main Hub | ourdigital.org | Professional & Confident | Data-driven, Solution-oriented |
|
||||||
|
| Blog | blog.ourdigital.org | Analytical & Personal | Educational, Thought-provoking |
|
||||||
|
| Journal | journal.ourdigital.org | Conversational & Poetic | Reflective, Cultural Observer |
|
||||||
|
| OurStory | ourstory.day | Intimate & Reflective | Authentic, Personal Journey |
|
||||||
|
|
||||||
|
## Writing Style
|
||||||
|
|
||||||
|
### Korean: 철학-기술 융합체, 역설 활용, 수사적 질문, 우울한 낙관주의
|
||||||
|
### English: Philosophical-Technical Hybridization, Paradox as Device, Rhetorical Questions, Melancholic Optimism
|
||||||
|
|
||||||
|
**Do's:** Use paradox, ask rhetorical questions, connect tech to human implications, blend Korean/English naturally
|
||||||
|
**Don'ts:** Avoid purely declarative tone, don't separate tech from cultural impact, avoid simplistic optimism
|
||||||
|
|
||||||
|
## Visual Identity
|
||||||
|
|
||||||
|
| Token | Color | HEX | Usage |
|
||||||
|
|-------|-------|-----|-------|
|
||||||
|
| --d-black | D.Black | #221814 | Footer, dark backgrounds |
|
||||||
|
| --d-olive | D.Olive | #cedc00 | Primary accent, CTA buttons |
|
||||||
|
| --d-green | D.Green | #287379 | Links hover, secondary accent |
|
||||||
|
| --d-blue | D.Blue | #0075c0 | Links |
|
||||||
|
| --d-beige | D.Beige | #f2f2de | Light text on dark |
|
||||||
|
| --d-gray | D.Gray | #ebebeb | Alt backgrounds |
|
||||||
|
|
||||||
|
**Typography:** Korean: Noto Sans KR | English: Noto Sans, Inter | Grid: 12-column responsive
|
||||||
|
|
||||||
|
## Brand Compliance Check
|
||||||
|
|
||||||
|
1. **Tone Match**: Does it match the channel's personality?
|
||||||
|
2. **Value Alignment**: Reflects Data-driven, In-Action, Marketing Science?
|
||||||
|
3. **Philosophy Check**: Precision + Empathy + Evidence present?
|
||||||
|
4. **Language Style**: Appropriate blend of Korean/English terms?
|
||||||
|
5. **Visual Consistency**: Uses approved color palette?
|
||||||
|
|
||||||
|
## Source
|
||||||
|
Full details: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/01-ourdigital-brand-guide/desktop/SKILL.md`
|
||||||
@@ -20,15 +20,15 @@ Visual storytelling toolkit for blog featured images.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Generate image prompt
|
# Generate image prompt
|
||||||
python custom-skills/30-ourdigital-designer/code/scripts/generate_prompt.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/30-ourdigital-designer/code/scripts/generate_prompt.py \
|
||||||
--topic "AI identity" --mood "contemplative"
|
--topic "AI identity" --mood "contemplative"
|
||||||
|
|
||||||
# From essay text
|
# From essay text
|
||||||
python custom-skills/30-ourdigital-designer/code/scripts/generate_prompt.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/30-ourdigital-designer/code/scripts/generate_prompt.py \
|
||||||
--input essay.txt --auto-extract
|
--input essay.txt --auto-extract
|
||||||
|
|
||||||
# Calibrate mood
|
# Calibrate mood
|
||||||
python custom-skills/30-ourdigital-designer/code/scripts/mood_calibrator.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/30-ourdigital-designer/code/scripts/mood_calibrator.py \
|
||||||
--input "essay excerpt" --style "minimalist"
|
--input "essay excerpt" --style "minimalist"
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -20,13 +20,13 @@ Notion-to-presentation workflow for branded slides.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Full automated workflow
|
# Full automated workflow
|
||||||
python custom-skills/32-ourdigital-presentation/code/scripts/run_workflow.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-ourdigital-presentation/code/scripts/run_workflow.py \
|
||||||
--notion-url [NOTION_URL] --output presentation.pptx
|
--notion-url [NOTION_URL] --output presentation.pptx
|
||||||
|
|
||||||
# Step-by-step
|
# Step-by-step
|
||||||
python custom-skills/32-ourdigital-presentation/code/scripts/extract_notion.py [URL] > research.json
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-ourdigital-presentation/code/scripts/extract_notion.py [URL] > research.json
|
||||||
python custom-skills/32-ourdigital-presentation/code/scripts/synthesize_content.py research.json > synthesis.json
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-ourdigital-presentation/code/scripts/synthesize_content.py research.json > synthesis.json
|
||||||
python custom-skills/32-ourdigital-presentation/code/scripts/apply_brand.py synthesis.json --output presentation.pptx
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-ourdigital-presentation/code/scripts/apply_brand.py synthesis.json --output presentation.pptx
|
||||||
```
|
```
|
||||||
|
|
||||||
## Pipeline
|
## Pipeline
|
||||||
|
|||||||
@@ -20,17 +20,17 @@ Research-to-publication workflow for OurDigital blogs.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Export to Ulysses
|
# Export to Ulysses
|
||||||
python custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py \
|
||||||
--input research.md --group "Blog Drafts"
|
--input research.md --group "Blog Drafts"
|
||||||
|
|
||||||
# With tags
|
# With tags
|
||||||
python custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py \
|
||||||
--input research.md \
|
--input research.md \
|
||||||
--group "Blog Drafts" \
|
--group "Blog Drafts" \
|
||||||
--tags "AI,research,draft"
|
--tags "AI,research,draft"
|
||||||
|
|
||||||
# From Notion export
|
# From Notion export
|
||||||
python custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/31-ourdigital-research/code/scripts/export_to_ulysses.py \
|
||||||
--notion-export notion_export.zip \
|
--notion-export notion_export.zip \
|
||||||
--group "From Notion"
|
--group "From Notion"
|
||||||
```
|
```
|
||||||
|
|||||||
233
.claude/commands/reference-curator.md
Normal file
233
.claude/commands/reference-curator.md
Normal file
@@ -0,0 +1,233 @@
|
|||||||
|
---
|
||||||
|
description: Full reference curation pipeline - discovery, crawl, store, distill, review, export with configurable depth
|
||||||
|
argument-hint: <topic|urls|manifest> [--depth light|standard|deep|full] [--output ~/Documents/reference-library/] [--max-sources 100] [--auto-approve] [--export-format project_files]
|
||||||
|
allowed-tools: WebSearch, WebFetch, Read, Write, Bash, Grep, Glob, Task
|
||||||
|
---
|
||||||
|
|
||||||
|
# Reference Curator Pipeline
|
||||||
|
|
||||||
|
Full-stack orchestration of the 6-skill reference curation workflow.
|
||||||
|
|
||||||
|
## Input Modes
|
||||||
|
|
||||||
|
| Mode | Input Example | Pipeline Start |
|
||||||
|
|------|---------------|----------------|
|
||||||
|
| **Topic** | `"Claude system prompts"` | reference-discovery |
|
||||||
|
| **URLs** | `https://docs.anthropic.com/...` | web-crawler (skip discovery) |
|
||||||
|
| **Manifest** | `./manifest.json` | web-crawler (resume) |
|
||||||
|
|
||||||
|
## Arguments
|
||||||
|
|
||||||
|
- `<input>`: Required. Topic string, URL(s), or manifest file path
|
||||||
|
- `--depth`: Crawl depth level (default: `standard`). See Depth Levels below
|
||||||
|
- `--output`: Output directory path (default: `~/Documents/reference-library/`)
|
||||||
|
- `--max-sources`: Max sources to discover (default: 100)
|
||||||
|
- `--max-pages`: Max pages per source to crawl (default varies by depth)
|
||||||
|
- `--auto-approve`: Auto-approve scores above threshold
|
||||||
|
- `--threshold`: Approval threshold (default: 0.85)
|
||||||
|
- `--max-iterations`: Max QA loop iterations per document (default: 3)
|
||||||
|
- `--export-format`: `project_files`, `fine_tuning`, `jsonl` (default: project_files)
|
||||||
|
- `--include-subdomains`: Include subdomains in site mapping (default: false)
|
||||||
|
- `--follow-external`: Follow external links found in content (default: false)
|
||||||
|
|
||||||
|
## Output Directory
|
||||||
|
|
||||||
|
**IMPORTANT: Never store output in any claude-skills, `.claude/`, or skill-related directory.**
|
||||||
|
|
||||||
|
The `--output` argument sets the base path for all pipeline output. If omitted, the default is `~/Documents/reference-library/`.
|
||||||
|
|
||||||
|
### Directory Structure
|
||||||
|
|
||||||
|
```
|
||||||
|
{output}/
|
||||||
|
├── {topic-slug}/ # One folder per pipeline run
|
||||||
|
│ ├── README.md # Index with table of contents
|
||||||
|
│ ├── 00-page-name.md # Individual page files
|
||||||
|
│ ├── 01-page-name.md
|
||||||
|
│ ├── ...
|
||||||
|
│ ├── {topic-slug}-complete.md # Combined bundle (all pages)
|
||||||
|
│ └── manifest.json # Crawl metadata
|
||||||
|
├── pipeline_state/ # Resume state (auto-managed)
|
||||||
|
│ └── run_XXX/state.json
|
||||||
|
└── exports/ # Fine-tuning / JSONL exports
|
||||||
|
```
|
||||||
|
|
||||||
|
### Resolution Rules
|
||||||
|
|
||||||
|
1. If `--output` is provided, use that path exactly
|
||||||
|
2. If not provided, use `~/Documents/reference-library/`
|
||||||
|
3. **Before writing any files**, check if the output directory exists
|
||||||
|
4. If the directory does NOT exist, **ask the user for permission** before creating it:
|
||||||
|
- Show the full resolved path that will be created
|
||||||
|
- Wait for explicit user approval
|
||||||
|
- Only then run `mkdir -p <path>`
|
||||||
|
5. The topic slug is derived from the input (URL domain+path or topic string)
|
||||||
|
|
||||||
|
### Examples
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Uses default: ~/Documents/reference-library/glossary-for-wordpress/
|
||||||
|
/reference-curator https://docs.codeat.co/glossary/
|
||||||
|
|
||||||
|
# Custom path: /tmp/research/mcp-docs/
|
||||||
|
/reference-curator "MCP servers" --output /tmp/research
|
||||||
|
|
||||||
|
# Explicit home subfolder: ~/Projects/client-docs/api-reference/
|
||||||
|
/reference-curator https://api.example.com/docs --output ~/Projects/client-docs
|
||||||
|
```
|
||||||
|
|
||||||
|
## Depth Levels
|
||||||
|
|
||||||
|
### `--depth light`
|
||||||
|
Fast scan for quick reference. Main content only, minimal crawling.
|
||||||
|
|
||||||
|
| Parameter | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| `onlyMainContent` | `true` |
|
||||||
|
| `formats` | `["markdown"]` |
|
||||||
|
| `maxDiscoveryDepth` | 1 |
|
||||||
|
| `max-pages` default | 20 |
|
||||||
|
| Map limit | 50 |
|
||||||
|
| `deduplicateSimilarURLs` | `true` |
|
||||||
|
| Follow sub-links | No |
|
||||||
|
| JS rendering wait | None |
|
||||||
|
|
||||||
|
**Best for:** Quick lookups, single-page references, API docs you already know.
|
||||||
|
|
||||||
|
### `--depth standard` (default)
|
||||||
|
Balanced crawl. Main content with links for cross-referencing.
|
||||||
|
|
||||||
|
| Parameter | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| `onlyMainContent` | `true` |
|
||||||
|
| `formats` | `["markdown", "links"]` |
|
||||||
|
| `maxDiscoveryDepth` | 2 |
|
||||||
|
| `max-pages` default | 50 |
|
||||||
|
| Map limit | 100 |
|
||||||
|
| `deduplicateSimilarURLs` | `true` |
|
||||||
|
| Follow sub-links | Same-domain only |
|
||||||
|
| JS rendering wait | None |
|
||||||
|
|
||||||
|
**Best for:** Documentation sites, plugin guides, knowledge bases.
|
||||||
|
|
||||||
|
### `--depth deep`
|
||||||
|
Thorough crawl. Full page content including sidebars, nav, and related pages.
|
||||||
|
|
||||||
|
| Parameter | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| `onlyMainContent` | `false` |
|
||||||
|
| `formats` | `["markdown", "links", "html"]` |
|
||||||
|
| `maxDiscoveryDepth` | 3 |
|
||||||
|
| `max-pages` default | 150 |
|
||||||
|
| Map limit | 300 |
|
||||||
|
| `deduplicateSimilarURLs` | `true` |
|
||||||
|
| Follow sub-links | Same-domain + linked resources |
|
||||||
|
| JS rendering wait | `waitFor: 3000` |
|
||||||
|
| `includeSubdomains` | `true` |
|
||||||
|
|
||||||
|
**Best for:** Complete product documentation, research material, sites with sidebars/code samples hidden behind tabs.
|
||||||
|
|
||||||
|
### `--depth full`
|
||||||
|
Exhaustive crawl. Everything captured including raw HTML, screenshots, and external references.
|
||||||
|
|
||||||
|
| Parameter | Value |
|
||||||
|
|-----------|-------|
|
||||||
|
| `onlyMainContent` | `false` |
|
||||||
|
| `formats` | `["markdown", "html", "rawHtml", "links"]` |
|
||||||
|
| `maxDiscoveryDepth` | 5 |
|
||||||
|
| `max-pages` default | 500 |
|
||||||
|
| Map limit | 1000 |
|
||||||
|
| `deduplicateSimilarURLs` | `false` |
|
||||||
|
| Follow sub-links | All (same-domain + external references) |
|
||||||
|
| JS rendering wait | `waitFor: 5000` |
|
||||||
|
| `includeSubdomains` | `true` |
|
||||||
|
| Screenshots | Capture for JS-heavy pages |
|
||||||
|
|
||||||
|
**Best for:** Archival, migration references, preserving sites, training data collection.
|
||||||
|
|
||||||
|
## Depth Comparison
|
||||||
|
|
||||||
|
```
|
||||||
|
light ████░░░░░░░░░░░░ Speed: fastest Pages: ~20 Content: main only
|
||||||
|
standard ████████░░░░░░░░ Speed: fast Pages: ~50 Content: main + links
|
||||||
|
deep ████████████░░░░ Speed: moderate Pages: ~150 Content: full page + HTML
|
||||||
|
full ████████████████ Speed: slow Pages: ~500 Content: everything + raw
|
||||||
|
```
|
||||||
|
|
||||||
|
## Pipeline Stages
|
||||||
|
|
||||||
|
```
|
||||||
|
1. reference-discovery (topic mode only)
|
||||||
|
2. web-crawler ← depth controls this stage
|
||||||
|
3. content-repository
|
||||||
|
4. content-distiller <--------+
|
||||||
|
5. quality-reviewer |
|
||||||
|
+-- APPROVE -> export |
|
||||||
|
+-- REFACTOR -----------------+
|
||||||
|
+-- DEEP_RESEARCH -> crawler -+
|
||||||
|
+-- REJECT -> archive
|
||||||
|
6. markdown-exporter
|
||||||
|
```
|
||||||
|
|
||||||
|
## Crawl Execution by Depth
|
||||||
|
|
||||||
|
When executing the crawl (Stage 2), apply the depth settings to the Firecrawl tools:
|
||||||
|
|
||||||
|
### Site Mapping (`firecrawl_map`)
|
||||||
|
```
|
||||||
|
firecrawl_map:
|
||||||
|
url: <target>
|
||||||
|
limit: {depth.map_limit}
|
||||||
|
includeSubdomains: {depth.includeSubdomains}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Page Scraping (`firecrawl_scrape`)
|
||||||
|
```
|
||||||
|
firecrawl_scrape:
|
||||||
|
url: <page>
|
||||||
|
formats: {depth.formats}
|
||||||
|
onlyMainContent: {depth.onlyMainContent}
|
||||||
|
waitFor: {depth.waitFor} # deep/full only
|
||||||
|
excludeTags: ["nav", "footer"] # light/standard only
|
||||||
|
```
|
||||||
|
|
||||||
|
### Batch Crawling (`firecrawl_crawl`) - for deep/full only
|
||||||
|
```
|
||||||
|
firecrawl_crawl:
|
||||||
|
url: <target>
|
||||||
|
maxDiscoveryDepth: {depth.maxDiscoveryDepth}
|
||||||
|
limit: {depth.max_pages}
|
||||||
|
deduplicateSimilarURLs: {depth.dedup}
|
||||||
|
scrapeOptions:
|
||||||
|
formats: {depth.formats}
|
||||||
|
onlyMainContent: {depth.onlyMainContent}
|
||||||
|
waitFor: {depth.waitFor}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Example Usage
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Quick scan of a single doc page
|
||||||
|
/reference-curator https://docs.example.com/api --depth light
|
||||||
|
|
||||||
|
# Standard documentation crawl (default)
|
||||||
|
/reference-curator "Glossary for WordPress" --max-sources 5
|
||||||
|
|
||||||
|
# Deep crawl capturing full page content and HTML
|
||||||
|
/reference-curator https://docs.codeat.co/glossary/ --depth deep
|
||||||
|
|
||||||
|
# Full archival crawl with all formats
|
||||||
|
/reference-curator https://docs.anthropic.com --depth full --max-pages 300
|
||||||
|
|
||||||
|
# Deep crawl with auto-approval and fine-tuning export
|
||||||
|
/reference-curator "MCP servers" --depth deep --auto-approve --export-format fine_tuning
|
||||||
|
```
|
||||||
|
|
||||||
|
## Related Sub-commands
|
||||||
|
|
||||||
|
Individual stages available at: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/90-reference-curator/commands/`
|
||||||
|
- `/reference-discovery`, `/web-crawler`, `/content-repository`
|
||||||
|
- `/content-distiller`, `/quality-reviewer`, `/markdown-exporter`
|
||||||
|
|
||||||
|
## Source
|
||||||
|
Full details: `/Users/ourdigital/Projects/our-claude-skills/custom-skills/90-reference-curator/README.md`
|
||||||
63
.claude/commands/seo-ai-visibility.md
Normal file
63
.claude/commands/seo-ai-visibility.md
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
---
|
||||||
|
description: AI search visibility - citations, brand radar, share of voice tracking
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO AI Visibility
|
||||||
|
|
||||||
|
Track brand visibility in AI-generated search answers with citation analysis and share of voice monitoring.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "AI visibility", "AI search", "AI citations", "AI share of voice"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **AI Impressions Tracking** - How often brand appears in AI answers
|
||||||
|
2. **AI Mentions Monitoring** - Brand mention frequency across AI engines
|
||||||
|
3. **Share of Voice** - AI search SOV vs competitors with trend analysis
|
||||||
|
4. **Citation Analysis** - Which domains and pages AI engines cite
|
||||||
|
5. **AI Response Analysis** - How the brand appears in AI-generated answers
|
||||||
|
6. **Competitor Comparison** - Side-by-side AI visibility benchmarking
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# AI visibility overview
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/27-seo-ai-visibility/code/scripts/ai_visibility_tracker.py \
|
||||||
|
--target example.com --json
|
||||||
|
|
||||||
|
# With competitor comparison
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/27-seo-ai-visibility/code/scripts/ai_visibility_tracker.py \
|
||||||
|
--target example.com --competitor comp1.com --competitor comp2.com --json
|
||||||
|
|
||||||
|
# Historical trend (impressions/mentions)
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/27-seo-ai-visibility/code/scripts/ai_visibility_tracker.py \
|
||||||
|
--target example.com --history --json
|
||||||
|
|
||||||
|
# Share of voice analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/27-seo-ai-visibility/code/scripts/ai_visibility_tracker.py \
|
||||||
|
--target example.com --sov --json
|
||||||
|
|
||||||
|
# AI citation analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/27-seo-ai-visibility/code/scripts/ai_citation_analyzer.py \
|
||||||
|
--target example.com --json
|
||||||
|
|
||||||
|
# Cited domains analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/27-seo-ai-visibility/code/scripts/ai_citation_analyzer.py \
|
||||||
|
--target example.com --cited-domains --json
|
||||||
|
|
||||||
|
# Cited pages analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/27-seo-ai-visibility/code/scripts/ai_citation_analyzer.py \
|
||||||
|
--target example.com --cited-pages --json
|
||||||
|
|
||||||
|
# AI response content analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/27-seo-ai-visibility/code/scripts/ai_citation_analyzer.py \
|
||||||
|
--target example.com --responses --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- AI impressions and mentions with trend indicators
|
||||||
|
- Share of voice percentage vs competitors
|
||||||
|
- Cited domains and pages ranked by frequency
|
||||||
|
- AI response samples showing brand context
|
||||||
|
- Recommendations for improving AI visibility
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: AI Search Visibility)
|
||||||
53
.claude/commands/seo-competitor-intel.md
Normal file
53
.claude/commands/seo-competitor-intel.md
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
---
|
||||||
|
description: SEO competitor intelligence and benchmarking
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Competitor Intelligence
|
||||||
|
|
||||||
|
Competitor profiling, benchmarking, and threat scoring for comprehensive SEO competitive analysis.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "competitor analysis", "competitive intel", "경쟁사 분석"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Competitor Profiling** - Auto-discover competitors, build profile cards (DR, traffic, keywords, backlinks, content volume)
|
||||||
|
2. **Head-to-Head Matrix** - Comparison across all SEO dimensions
|
||||||
|
3. **Keyword Overlap** - Shared, unique, and gap keyword analysis
|
||||||
|
4. **Threat Scoring** - 0-100 score based on growth trajectory, keyword overlap, DR gap
|
||||||
|
5. **Korean Market** - Naver Blog/Cafe presence detection for competitors
|
||||||
|
6. **Competitive Monitoring** - Traffic trends, keyword movement, content velocity over time
|
||||||
|
7. **Market Share** - Organic traffic-based market share estimation
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Auto-discover and profile competitors
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/31-seo-competitor-intel/code/scripts/competitor_profiler.py \
|
||||||
|
--target https://example.com --json
|
||||||
|
|
||||||
|
# Specify competitors manually
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/31-seo-competitor-intel/code/scripts/competitor_profiler.py \
|
||||||
|
--target https://example.com --competitor https://comp1.com --competitor https://comp2.com --json
|
||||||
|
|
||||||
|
# Include Korean market analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/31-seo-competitor-intel/code/scripts/competitor_profiler.py \
|
||||||
|
--target https://example.com --korean-market --json
|
||||||
|
|
||||||
|
# 30-day competitive monitoring
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/31-seo-competitor-intel/code/scripts/competitive_monitor.py \
|
||||||
|
--target https://example.com --period 30 --json
|
||||||
|
|
||||||
|
# Traffic trend comparison (90 days)
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/31-seo-competitor-intel/code/scripts/competitive_monitor.py \
|
||||||
|
--target https://example.com --scope traffic --period 90 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Competitor profile cards with DR, traffic, keywords, referring domains
|
||||||
|
- Head-to-head comparison matrix
|
||||||
|
- Keyword overlap analysis (shared/unique/gap)
|
||||||
|
- Threat scores (0-100) per competitor
|
||||||
|
- Traffic trend and market share charts
|
||||||
|
- Alerts for significant competitive movements
|
||||||
|
- Saved to Notion SEO Audit Log (Category: Competitor Intelligence, Audit ID: COMP-YYYYMMDD-NNN)
|
||||||
56
.claude/commands/seo-content-strategy.md
Normal file
56
.claude/commands/seo-content-strategy.md
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
---
|
||||||
|
description: Content audit, decay detection, gap analysis, and brief generation
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Content Strategy
|
||||||
|
|
||||||
|
Content inventory, performance scoring, decay detection, topic gap analysis, cluster mapping, and SEO content brief generation.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "content strategy", "content audit", "콘텐츠 전략"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Content Audit** - Inventory via sitemap crawl with performance scoring
|
||||||
|
2. **Content Decay Detection** - Identify pages losing traffic over time
|
||||||
|
3. **Content Type Classification** - Blog, product, service, landing, resource
|
||||||
|
4. **Topic Gap Analysis** - Find missing topics vs competitors with cluster mapping
|
||||||
|
5. **Editorial Calendar** - Priority-scored publishing calendar from gap analysis
|
||||||
|
6. **Content Brief Generation** - SEO briefs with H2/H3 outlines, keyword targets, and word count recommendations
|
||||||
|
7. **Korean Content Analysis** - Naver Blog format and review/후기 content patterns
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full content audit
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/23-seo-content-strategy/code/scripts/content_auditor.py \
|
||||||
|
--url https://example.com --json
|
||||||
|
|
||||||
|
# Detect decaying content
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/23-seo-content-strategy/code/scripts/content_auditor.py \
|
||||||
|
--url https://example.com --decay --json
|
||||||
|
|
||||||
|
# Filter by content type
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/23-seo-content-strategy/code/scripts/content_auditor.py \
|
||||||
|
--url https://example.com --type blog --json
|
||||||
|
|
||||||
|
# Content gap analysis with topic clusters
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/23-seo-content-strategy/code/scripts/content_gap_analyzer.py \
|
||||||
|
--target https://example.com --competitor https://comp1.com --clusters --json
|
||||||
|
|
||||||
|
# Generate content brief for keyword
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/23-seo-content-strategy/code/scripts/content_brief_generator.py \
|
||||||
|
--keyword "치과 임플란트 비용" --url https://example.com --json
|
||||||
|
|
||||||
|
# Brief with competitor analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/23-seo-content-strategy/code/scripts/content_brief_generator.py \
|
||||||
|
--keyword "dental implant cost" --url https://example.com --competitors 5 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Content inventory with page count by type and average performance score
|
||||||
|
- Decaying content list with traffic trend data
|
||||||
|
- Topic gaps and cluster map with pillar/cluster pages
|
||||||
|
- Editorial calendar with priority scores
|
||||||
|
- Content briefs with outline, keywords, word count targets, and internal link suggestions
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: Content Strategy, ID: CONTENT-YYYYMMDD-NNN)
|
||||||
54
.claude/commands/seo-crawl-budget.md
Normal file
54
.claude/commands/seo-crawl-budget.md
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
---
|
||||||
|
description: Crawl budget optimization and log analysis
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Crawl Budget
|
||||||
|
|
||||||
|
Server access log analysis, bot profiling, and crawl budget waste identification.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "crawl budget", "log analysis", "크롤 예산"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Log Parsing** - Parse Nginx, Apache, CloudFront access logs (streaming for >1GB files)
|
||||||
|
2. **Bot Identification** - Googlebot, Yeti/Naver, Bingbot, Daumoa/Kakao, and others by User-Agent
|
||||||
|
3. **Per-Bot Profiling** - Crawl frequency, depth distribution, status codes, crawl patterns
|
||||||
|
4. **Waste Detection** - Parameter URLs, low-value pages, redirect chains, soft 404s, duplicate URLs
|
||||||
|
5. **Orphan Pages** - Pages in sitemap but never crawled, crawled but not in sitemap
|
||||||
|
6. **Optimization Plan** - robots.txt suggestions, URL parameter handling, noindex recommendations
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Parse Nginx access log
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-seo-crawl-budget/code/scripts/log_parser.py \
|
||||||
|
--log-file /var/log/nginx/access.log --json
|
||||||
|
|
||||||
|
# Parse Apache log, filter by Googlebot
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-seo-crawl-budget/code/scripts/log_parser.py \
|
||||||
|
--log-file /var/log/apache2/access.log --format apache --bot googlebot --json
|
||||||
|
|
||||||
|
# Parse gzipped log in streaming mode
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-seo-crawl-budget/code/scripts/log_parser.py \
|
||||||
|
--log-file access.log.gz --streaming --json
|
||||||
|
|
||||||
|
# Full crawl budget analysis with sitemap comparison
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-seo-crawl-budget/code/scripts/crawl_budget_analyzer.py \
|
||||||
|
--log-file access.log --sitemap https://example.com/sitemap.xml --json
|
||||||
|
|
||||||
|
# Waste identification only
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-seo-crawl-budget/code/scripts/crawl_budget_analyzer.py \
|
||||||
|
--log-file access.log --scope waste --json
|
||||||
|
|
||||||
|
# Orphan page detection
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/32-seo-crawl-budget/code/scripts/crawl_budget_analyzer.py \
|
||||||
|
--log-file access.log --sitemap https://example.com/sitemap.xml --scope orphans --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Bot request counts, status code distribution, top crawled URLs per bot
|
||||||
|
- Crawl waste breakdown (parameter URLs, redirects, soft 404s, duplicates)
|
||||||
|
- Orphan page lists (in sitemap not crawled, crawled not in sitemap)
|
||||||
|
- Efficiency score (0-100) with optimization recommendations
|
||||||
|
- Saved to Notion SEO Audit Log (Category: Crawl Budget, Audit ID: CRAWL-YYYYMMDD-NNN)
|
||||||
56
.claude/commands/seo-ecommerce.md
Normal file
56
.claude/commands/seo-ecommerce.md
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
---
|
||||||
|
description: E-commerce SEO audit and product schema validation
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO E-Commerce
|
||||||
|
|
||||||
|
Product page SEO audit, product schema validation, category taxonomy analysis, and Korean marketplace presence checking.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "e-commerce SEO", "product SEO", "이커머스 SEO"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Product Page Audit** - Titles, meta descriptions, image alt text, H1 structure
|
||||||
|
2. **Category Taxonomy Analysis** - Depth, breadcrumb implementation, faceted navigation
|
||||||
|
3. **Duplicate Content Detection** - Parameter URLs, product variants, pagination issues
|
||||||
|
4. **Pagination SEO** - Validate rel=prev/next, canonical tags, infinite scroll handling
|
||||||
|
5. **Product Schema Validation** - Product, Offer, AggregateRating, Review, BreadcrumbList
|
||||||
|
6. **Rich Result Eligibility** - Required and optional property completeness checks
|
||||||
|
7. **Korean Marketplace Presence** - Naver Smart Store, Coupang, Gmarket, 11번가 detection
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full e-commerce SEO audit
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/24-seo-ecommerce/code/scripts/ecommerce_auditor.py \
|
||||||
|
--url https://example.com --json
|
||||||
|
|
||||||
|
# Product page audit only
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/24-seo-ecommerce/code/scripts/ecommerce_auditor.py \
|
||||||
|
--url https://example.com --scope products --json
|
||||||
|
|
||||||
|
# Category taxonomy analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/24-seo-ecommerce/code/scripts/ecommerce_auditor.py \
|
||||||
|
--url https://example.com --scope categories --json
|
||||||
|
|
||||||
|
# Korean marketplace presence check
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/24-seo-ecommerce/code/scripts/ecommerce_auditor.py \
|
||||||
|
--url https://example.com --korean-marketplaces --json
|
||||||
|
|
||||||
|
# Validate product schema on single page
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/24-seo-ecommerce/code/scripts/product_schema_checker.py \
|
||||||
|
--url https://example.com/product/123 --json
|
||||||
|
|
||||||
|
# Batch validate from sitemap (sample 50 pages)
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/24-seo-ecommerce/code/scripts/product_schema_checker.py \
|
||||||
|
--sitemap https://example.com/product-sitemap.xml --sample 50 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Product page issue list by severity (critical, high, medium, low)
|
||||||
|
- Category structure analysis (depth, breadcrumbs, faceted nav issues)
|
||||||
|
- Schema validation results (pages with/without schema, common errors)
|
||||||
|
- Rich result eligibility assessment
|
||||||
|
- Korean marketplace presence status (Naver Smart Store, Coupang, Gmarket)
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: E-Commerce SEO, ID: ECOM-YYYYMMDD-NNN)
|
||||||
@@ -20,11 +20,11 @@ Keyword strategy and content architecture for gateway pages.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Analyze keyword
|
# Analyze keyword
|
||||||
python custom-skills/17-seo-gateway-architect/code/scripts/keyword_analyzer.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/29-seo-gateway-architect/code/scripts/keyword_analyzer.py \
|
||||||
--topic "눈 성형"
|
--topic "눈 성형"
|
||||||
|
|
||||||
# With location targeting
|
# With location targeting
|
||||||
python custom-skills/17-seo-gateway-architect/code/scripts/keyword_analyzer.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/29-seo-gateway-architect/code/scripts/keyword_analyzer.py \
|
||||||
--topic "눈 성형" --market "강남" --output strategy.json
|
--topic "눈 성형" --market "강남" --output strategy.json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -20,10 +20,10 @@ Generate SEO-optimized gateway pages from templates.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Generate with sample data
|
# Generate with sample data
|
||||||
python custom-skills/18-seo-gateway-builder/code/scripts/generate_pages.py
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/30-seo-gateway-builder/code/scripts/generate_pages.py
|
||||||
|
|
||||||
# Custom configuration
|
# Custom configuration
|
||||||
python custom-skills/18-seo-gateway-builder/code/scripts/generate_pages.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/30-seo-gateway-builder/code/scripts/generate_pages.py \
|
||||||
--config config/services.json \
|
--config config/services.json \
|
||||||
--locations config/locations.json \
|
--locations config/locations.json \
|
||||||
--output ./pages
|
--output ./pages
|
||||||
|
|||||||
@@ -20,15 +20,15 @@ Google Search Console data retrieval and analysis.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Get search performance
|
# Get search performance
|
||||||
python custom-skills/16-seo-search-console/code/scripts/gsc_client.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/15-seo-search-console/code/scripts/gsc_client.py \
|
||||||
--site https://example.com --days 28
|
--site https://example.com --days 28
|
||||||
|
|
||||||
# Query analysis
|
# Query analysis
|
||||||
python custom-skills/16-seo-search-console/code/scripts/gsc_client.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/15-seo-search-console/code/scripts/gsc_client.py \
|
||||||
--site https://example.com --report queries --limit 100
|
--site https://example.com --report queries --limit 100
|
||||||
|
|
||||||
# Page performance
|
# Page performance
|
||||||
python custom-skills/16-seo-search-console/code/scripts/gsc_client.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/15-seo-search-console/code/scripts/gsc_client.py \
|
||||||
--site https://example.com --report pages --output pages_report.json
|
--site https://example.com --report pages --output pages_report.json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
59
.claude/commands/seo-international.md
Normal file
59
.claude/commands/seo-international.md
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
---
|
||||||
|
description: International SEO - hreflang validation, content parity, multi-language audit
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO International Audit
|
||||||
|
|
||||||
|
Multi-language and multi-region SEO audit with hreflang validation and content parity analysis.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "international SEO", "hreflang", "multi-language SEO", "다국어 SEO"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Hreflang Validation** - Bidirectional links, self-referencing, x-default, ISO code checks
|
||||||
|
2. **URL Structure Analysis** - ccTLD vs subdomain vs subdirectory with recommendations
|
||||||
|
3. **Content Parity** - Page count, key page availability, freshness comparison across languages
|
||||||
|
4. **Language Detection** - HTML lang attribute, Content-Language header, actual content analysis
|
||||||
|
5. **Redirect Logic Audit** - IP-based and Accept-Language redirect behavior
|
||||||
|
6. **Korean Expansion** - Priority markets (ko->ja, ko->zh, ko->en), CJK URL encoding, Naver/Baidu considerations
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Hreflang validation
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/26-seo-international/code/scripts/hreflang_validator.py \
|
||||||
|
--url https://example.com --json
|
||||||
|
|
||||||
|
# With sitemap-based discovery
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/26-seo-international/code/scripts/hreflang_validator.py \
|
||||||
|
--url https://example.com --sitemap https://example.com/sitemap.xml --json
|
||||||
|
|
||||||
|
# Check specific pages from file
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/26-seo-international/code/scripts/hreflang_validator.py \
|
||||||
|
--urls-file pages.txt --json
|
||||||
|
|
||||||
|
# Full international audit
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/26-seo-international/code/scripts/international_auditor.py \
|
||||||
|
--url https://example.com --json
|
||||||
|
|
||||||
|
# URL structure analysis only
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/26-seo-international/code/scripts/international_auditor.py \
|
||||||
|
--url https://example.com --scope structure --json
|
||||||
|
|
||||||
|
# Content parity check only
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/26-seo-international/code/scripts/international_auditor.py \
|
||||||
|
--url https://example.com --scope parity --json
|
||||||
|
|
||||||
|
# Korean expansion focus
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/26-seo-international/code/scripts/international_auditor.py \
|
||||||
|
--url https://example.com --korean-expansion --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Hreflang error report (missing bidirectional, self-reference, x-default)
|
||||||
|
- URL structure recommendation
|
||||||
|
- Content parity matrix across languages (page count, freshness)
|
||||||
|
- Redirect logic assessment (forced vs suggested)
|
||||||
|
- International SEO score
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: International SEO)
|
||||||
50
.claude/commands/seo-keyword-strategy.md
Normal file
50
.claude/commands/seo-keyword-strategy.md
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
---
|
||||||
|
description: Keyword strategy and research for SEO campaigns
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Keyword Strategy
|
||||||
|
|
||||||
|
Keyword expansion, intent classification, clustering, and competitor gap analysis. Supports Korean market with Naver autocomplete.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "keyword research", "keyword strategy", "키워드 리서치"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Keyword Expansion** - Seed keyword expansion with matching, related, and suggested terms
|
||||||
|
2. **Intent Classification** - Classify keywords as informational, navigational, commercial, or transactional
|
||||||
|
3. **Topic Clustering** - Group keywords into topic clusters with volume aggregation
|
||||||
|
4. **Korean Suffix Expansion** - Expand with Korean suffixes (추천, 가격, 후기, 잘하는곳, 부작용, 전후)
|
||||||
|
5. **Volume Comparison** - Compare search volume across Korea vs global markets
|
||||||
|
6. **Keyword Gap Analysis** - Find keywords competitors rank for but target does not
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Basic keyword research
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/19-seo-keyword-strategy/code/scripts/keyword_researcher.py \
|
||||||
|
--keyword "치과 임플란트" --country kr --json
|
||||||
|
|
||||||
|
# Korean market with suffix expansion
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/19-seo-keyword-strategy/code/scripts/keyword_researcher.py \
|
||||||
|
--keyword "치과 임플란트" --country kr --korean-suffixes --json
|
||||||
|
|
||||||
|
# Volume comparison Korea vs global
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/19-seo-keyword-strategy/code/scripts/keyword_researcher.py \
|
||||||
|
--keyword "dental implant" --country kr --compare-global --json
|
||||||
|
|
||||||
|
# Keyword gap vs competitor
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/19-seo-keyword-strategy/code/scripts/keyword_gap_analyzer.py \
|
||||||
|
--target https://example.com --competitor https://competitor.com --json
|
||||||
|
|
||||||
|
# Multiple competitors with minimum volume filter
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/19-seo-keyword-strategy/code/scripts/keyword_gap_analyzer.py \
|
||||||
|
--target https://example.com --competitor https://comp1.com \
|
||||||
|
--competitor https://comp2.com --min-volume 100 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Keyword list with volume, KD, CPC, intent, and cluster assignment
|
||||||
|
- Topic clusters with aggregated volume
|
||||||
|
- Gap keywords with opportunity scores
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: Keyword Research, ID: KW-YYYYMMDD-NNN)
|
||||||
57
.claude/commands/seo-knowledge-graph.md
Normal file
57
.claude/commands/seo-knowledge-graph.md
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
---
|
||||||
|
description: Knowledge Graph & Entity SEO - Knowledge Panel, PAA, FAQ rich results
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Knowledge Graph
|
||||||
|
|
||||||
|
Entity SEO analysis for Knowledge Panel presence, People Also Ask monitoring, and FAQ rich results tracking.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "knowledge graph", "entity SEO", "Knowledge Panel", "PAA monitoring"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Knowledge Panel Detection** - Check entity presence in Google Knowledge Graph
|
||||||
|
2. **Entity Attribute Analysis** - Name, type, description, logo, social profiles, completeness score
|
||||||
|
3. **Wikipedia/Wikidata Check** - Article and QID presence verification
|
||||||
|
4. **Naver Presence** - Encyclopedia and knowledge iN (지식iN) coverage
|
||||||
|
5. **PAA Monitoring** - People Also Ask tracking for brand queries
|
||||||
|
6. **FAQ Rich Results** - FAQPage schema presence and SERP appearance tracking
|
||||||
|
7. **Entity Markup Audit** - Organization/Person/LocalBusiness schema and sameAs validation
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Knowledge Graph analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/28-seo-knowledge-graph/code/scripts/knowledge_graph_analyzer.py \
|
||||||
|
--entity "Samsung Electronics" --json
|
||||||
|
|
||||||
|
# Korean entity check
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/28-seo-knowledge-graph/code/scripts/knowledge_graph_analyzer.py \
|
||||||
|
--entity "삼성전자" --language ko --json
|
||||||
|
|
||||||
|
# Include Wikipedia/Wikidata check
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/28-seo-knowledge-graph/code/scripts/knowledge_graph_analyzer.py \
|
||||||
|
--entity "Samsung" --wiki --json
|
||||||
|
|
||||||
|
# Full entity SEO audit
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/28-seo-knowledge-graph/code/scripts/entity_auditor.py \
|
||||||
|
--url https://example.com --entity "Brand Name" --json
|
||||||
|
|
||||||
|
# PAA monitoring
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/28-seo-knowledge-graph/code/scripts/entity_auditor.py \
|
||||||
|
--url https://example.com --entity "Brand Name" --paa --json
|
||||||
|
|
||||||
|
# FAQ rich result tracking
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/28-seo-knowledge-graph/code/scripts/entity_auditor.py \
|
||||||
|
--url https://example.com --entity "Brand Name" --faq --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Knowledge Panel detection with attribute completeness score
|
||||||
|
- Wikipedia/Wikidata presence status
|
||||||
|
- Naver encyclopedia and knowledge iN coverage
|
||||||
|
- PAA questions list for brand keywords
|
||||||
|
- FAQ rich result tracking
|
||||||
|
- Entity schema audit (Organization, sameAs links)
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: Knowledge Graph & Entity SEO)
|
||||||
63
.claude/commands/seo-kpi-framework.md
Normal file
63
.claude/commands/seo-kpi-framework.md
Normal file
@@ -0,0 +1,63 @@
|
|||||||
|
---
|
||||||
|
description: SEO KPI framework - unified metrics, health scores, ROI estimation
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO KPI Framework
|
||||||
|
|
||||||
|
Unified KPI aggregation across all SEO dimensions with health scores, baselines, and ROI estimation.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "SEO KPI", "SEO performance", "health score", "SEO ROI"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **KPI Aggregation** - Unified metrics across 7 dimensions (traffic, rankings, engagement, technical, content, links, local)
|
||||||
|
2. **Health Score** - Weighted 0-100 score with trend indicators
|
||||||
|
3. **Baseline & Targets** - Establish baselines and set 30/60/90-day targets
|
||||||
|
4. **Performance Reporting** - Period-over-period comparison (MoM, QoQ, YoY)
|
||||||
|
5. **Executive Summary** - Top wins, concerns, and recommendations
|
||||||
|
6. **ROI Estimation** - Organic traffic cost valuation
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Aggregate KPIs
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/25-seo-kpi-framework/code/scripts/kpi_aggregator.py \
|
||||||
|
--url https://example.com --json
|
||||||
|
|
||||||
|
# Set baseline
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/25-seo-kpi-framework/code/scripts/kpi_aggregator.py \
|
||||||
|
--url https://example.com --set-baseline --json
|
||||||
|
|
||||||
|
# Compare against baseline
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/25-seo-kpi-framework/code/scripts/kpi_aggregator.py \
|
||||||
|
--url https://example.com --baseline baseline.json --json
|
||||||
|
|
||||||
|
# With ROI estimation
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/25-seo-kpi-framework/code/scripts/kpi_aggregator.py \
|
||||||
|
--url https://example.com --roi --json
|
||||||
|
|
||||||
|
# Monthly performance report
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/25-seo-kpi-framework/code/scripts/performance_reporter.py \
|
||||||
|
--url https://example.com --period monthly --json
|
||||||
|
|
||||||
|
# Quarterly report
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/25-seo-kpi-framework/code/scripts/performance_reporter.py \
|
||||||
|
--url https://example.com --period quarterly --json
|
||||||
|
|
||||||
|
# Custom date range
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/25-seo-kpi-framework/code/scripts/performance_reporter.py \
|
||||||
|
--url https://example.com --from 2025-01-01 --to 2025-03-31 --json
|
||||||
|
|
||||||
|
# Executive summary only
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/25-seo-kpi-framework/code/scripts/performance_reporter.py \
|
||||||
|
--url https://example.com --period monthly --executive --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Unified KPI dashboard with health score (0-100)
|
||||||
|
- 7-dimension breakdown (traffic, rankings, engagement, technical, content, links, local)
|
||||||
|
- Trend indicators (up/down/stable) per dimension
|
||||||
|
- 30/60/90-day targets with progress tracking
|
||||||
|
- Executive summary with top wins and concerns
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: SEO KPI & Performance)
|
||||||
58
.claude/commands/seo-link-building.md
Normal file
58
.claude/commands/seo-link-building.md
Normal file
@@ -0,0 +1,58 @@
|
|||||||
|
---
|
||||||
|
description: Backlink audit, toxic link detection, and link gap analysis
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Link Building
|
||||||
|
|
||||||
|
Backlink profile analysis, toxic link detection, competitor link gap identification, and Korean platform link mapping.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "backlink audit", "link building", "링크 분석"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Backlink Profile Audit** - DR, referring domains, dofollow ratio
|
||||||
|
2. **Anchor Text Distribution** - Branded, exact-match, partial-match, generic, naked URL breakdown
|
||||||
|
3. **Toxic Link Detection** - PBN patterns, spammy domains, link farm identification
|
||||||
|
4. **Link Velocity Tracking** - New and lost referring domains over time
|
||||||
|
5. **Broken Backlink Recovery** - Find broken backlinks for reclamation
|
||||||
|
6. **Korean Platform Mapping** - Naver Blog, Naver Cafe, Tistory, Brunch link analysis
|
||||||
|
7. **Link Gap Analysis** - Find domains linking to competitors but not target
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full backlink audit
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/22-seo-link-building/code/scripts/backlink_auditor.py \
|
||||||
|
--url https://example.com --json
|
||||||
|
|
||||||
|
# Check link velocity
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/22-seo-link-building/code/scripts/backlink_auditor.py \
|
||||||
|
--url https://example.com --velocity --json
|
||||||
|
|
||||||
|
# Find broken backlinks for recovery
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/22-seo-link-building/code/scripts/backlink_auditor.py \
|
||||||
|
--url https://example.com --broken --json
|
||||||
|
|
||||||
|
# Korean platform link analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/22-seo-link-building/code/scripts/backlink_auditor.py \
|
||||||
|
--url https://example.com --korean-platforms --json
|
||||||
|
|
||||||
|
# Link gap vs competitor
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/22-seo-link-building/code/scripts/link_gap_finder.py \
|
||||||
|
--target https://example.com --competitor https://comp1.com --json
|
||||||
|
|
||||||
|
# Multiple competitors with minimum DR filter
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/22-seo-link-building/code/scripts/link_gap_finder.py \
|
||||||
|
--target https://example.com --competitor https://comp1.com \
|
||||||
|
--competitor https://comp2.com --min-dr 30 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Domain Rating, backlink stats, dofollow ratio
|
||||||
|
- Anchor text distribution percentages
|
||||||
|
- Toxic link list with detection reason
|
||||||
|
- Link velocity (new/lost last 30 days)
|
||||||
|
- Korean platform backlink counts
|
||||||
|
- Gap domains scored by DR, traffic, and relevance
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: Link Building, ID: LINK-YYYYMMDD-NNN)
|
||||||
57
.claude/commands/seo-migration-planner.md
Normal file
57
.claude/commands/seo-migration-planner.md
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
---
|
||||||
|
description: Site migration planning and post-migration monitoring
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Migration Planner
|
||||||
|
|
||||||
|
Pre-migration risk assessment, redirect mapping, and post-migration traffic/indexation monitoring.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "site migration", "domain move", "사이트 이전"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **URL Inventory** - Full URL capture via Firecrawl crawl with status codes
|
||||||
|
2. **Traffic Baseline** - Per-page traffic and keyword baseline via our-seo-agent
|
||||||
|
3. **Redirect Mapping** - Old URL to new URL mapping with per-URL risk scoring
|
||||||
|
4. **Risk Assessment** - Per-URL risk based on traffic, backlinks, keyword rankings
|
||||||
|
5. **Pre-Migration Checklist** - Automated checklist generation
|
||||||
|
6. **Post-Migration Monitoring** - Traffic comparison, redirect health, indexation tracking
|
||||||
|
7. **Migration Types** - Domain move, platform change, URL restructure, HTTPS, subdomain consolidation
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Domain move planning
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/33-seo-migration-planner/code/scripts/migration_planner.py \
|
||||||
|
--domain https://example.com --type domain-move --new-domain https://new-example.com --json
|
||||||
|
|
||||||
|
# Platform migration (e.g., WordPress to headless)
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/33-seo-migration-planner/code/scripts/migration_planner.py \
|
||||||
|
--domain https://example.com --type platform --json
|
||||||
|
|
||||||
|
# URL restructuring
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/33-seo-migration-planner/code/scripts/migration_planner.py \
|
||||||
|
--domain https://example.com --type url-restructure --json
|
||||||
|
|
||||||
|
# HTTPS migration
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/33-seo-migration-planner/code/scripts/migration_planner.py \
|
||||||
|
--domain http://example.com --type https --json
|
||||||
|
|
||||||
|
# Post-launch traffic comparison
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/33-seo-migration-planner/code/scripts/migration_monitor.py \
|
||||||
|
--domain https://new-example.com --migration-date 2025-01-15 --baseline baseline.json --json
|
||||||
|
|
||||||
|
# Quick redirect health check
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/33-seo-migration-planner/code/scripts/migration_monitor.py \
|
||||||
|
--domain https://new-example.com --migration-date 2025-01-15 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- URL inventory with traffic/keyword baselines
|
||||||
|
- Redirect map (source -> target, status code, priority)
|
||||||
|
- Risk assessment (high/medium/low risk URL counts, overall risk level)
|
||||||
|
- Pre-migration checklist
|
||||||
|
- Post-migration: traffic delta, broken redirects, ranking changes, recovery timeline
|
||||||
|
- Alerts for traffic drops >20%
|
||||||
|
- Saved to Notion SEO Audit Log (Category: SEO Migration, Audit ID: MIGR-YYYYMMDD-NNN)
|
||||||
@@ -20,11 +20,11 @@ On-page SEO analysis for meta tags, headings, content, and links.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Full page analysis
|
# Full page analysis
|
||||||
python custom-skills/11-seo-on-page-audit/code/scripts/page_analyzer.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/13-seo-on-page-audit/code/scripts/page_analyzer.py \
|
||||||
--url https://example.com/page
|
--url https://example.com/page
|
||||||
|
|
||||||
# Multiple pages
|
# Multiple pages
|
||||||
python custom-skills/11-seo-on-page-audit/code/scripts/page_analyzer.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/13-seo-on-page-audit/code/scripts/page_analyzer.py \
|
||||||
--urls urls.txt --output report.json
|
--urls urls.txt --output report.json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
55
.claude/commands/seo-position-tracking.md
Normal file
55
.claude/commands/seo-position-tracking.md
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
---
|
||||||
|
description: Keyword rank monitoring with visibility scores and alerts
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Position Tracking
|
||||||
|
|
||||||
|
Monitor keyword rankings, detect position changes with threshold alerts, and calculate visibility scores.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "rank tracking", "position monitoring", "순위 추적"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Position Tracking** - Retrieve current ranking positions for tracked keywords
|
||||||
|
2. **Change Detection** - Detect position changes with configurable threshold alerts
|
||||||
|
3. **Visibility Scoring** - Calculate visibility scores weighted by search volume
|
||||||
|
4. **Brand/Non-Brand Segments** - Segment keywords into brand vs non-brand
|
||||||
|
5. **Competitor Comparison** - Compare rank positions against competitors
|
||||||
|
6. **Ranking Reports** - Period-over-period trend analysis with top movers
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get current positions
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/21-seo-position-tracking/code/scripts/position_tracker.py \
|
||||||
|
--target https://example.com --json
|
||||||
|
|
||||||
|
# With change threshold alerts (flag moves of +-5 or more)
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/21-seo-position-tracking/code/scripts/position_tracker.py \
|
||||||
|
--target https://example.com --threshold 5 --json
|
||||||
|
|
||||||
|
# Filter by brand segment
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/21-seo-position-tracking/code/scripts/position_tracker.py \
|
||||||
|
--target https://example.com --segment brand --json
|
||||||
|
|
||||||
|
# Compare with competitor
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/21-seo-position-tracking/code/scripts/position_tracker.py \
|
||||||
|
--target https://example.com --competitor https://comp1.com --json
|
||||||
|
|
||||||
|
# 30-day ranking report
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/21-seo-position-tracking/code/scripts/ranking_reporter.py \
|
||||||
|
--target https://example.com --period 30 --json
|
||||||
|
|
||||||
|
# Quarterly report with competitor comparison
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/21-seo-position-tracking/code/scripts/ranking_reporter.py \
|
||||||
|
--target https://example.com --competitor https://comp1.com --period 90 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Position distribution (top 3/10/20/50/100)
|
||||||
|
- Change summary (improved, declined, stable, new, lost)
|
||||||
|
- Threshold alerts for significant position changes
|
||||||
|
- Visibility score and trend over time
|
||||||
|
- Brand vs non-brand segment breakdown
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: Position Tracking, ID: RANK-YYYYMMDD-NNN)
|
||||||
59
.claude/commands/seo-reporting-dashboard.md
Normal file
59
.claude/commands/seo-reporting-dashboard.md
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
---
|
||||||
|
description: SEO reporting dashboard and executive reports
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Reporting Dashboard
|
||||||
|
|
||||||
|
Aggregate all SEO skill outputs into executive reports and interactive HTML dashboards.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "SEO report", "SEO dashboard", "보고서"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Report Aggregation** - Collect and normalize outputs from skills 11-33 into unified structure
|
||||||
|
2. **Cross-Skill Health Score** - Weighted scores across technical, on-page, performance, content, links, keywords
|
||||||
|
3. **HTML Dashboard** - Self-contained Chart.js dashboard with gauge, line, bar, pie, and radar charts
|
||||||
|
4. **Executive Report** - Korean-language summaries tailored to audience (C-level, marketing, technical)
|
||||||
|
5. **Priority Issues** - Top issues ranked across all audit dimensions
|
||||||
|
6. **Trend Analysis** - Period-over-period comparison narrative with audit timeline
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Aggregate all skill outputs for a domain
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/34-seo-reporting-dashboard/code/scripts/report_aggregator.py \
|
||||||
|
--domain https://example.com --json
|
||||||
|
|
||||||
|
# Aggregate with date range filter
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/34-seo-reporting-dashboard/code/scripts/report_aggregator.py \
|
||||||
|
--domain https://example.com --from 2025-01-01 --to 2025-03-31 --json
|
||||||
|
|
||||||
|
# Generate HTML dashboard
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/34-seo-reporting-dashboard/code/scripts/dashboard_generator.py \
|
||||||
|
--report aggregated_report.json --output dashboard.html
|
||||||
|
|
||||||
|
# C-level executive summary (Korean)
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/34-seo-reporting-dashboard/code/scripts/executive_report.py \
|
||||||
|
--report aggregated_report.json --audience c-level --output report.md
|
||||||
|
|
||||||
|
# Marketing team report
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/34-seo-reporting-dashboard/code/scripts/executive_report.py \
|
||||||
|
--report aggregated_report.json --audience marketing --output report.md
|
||||||
|
|
||||||
|
# Technical team report
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/34-seo-reporting-dashboard/code/scripts/executive_report.py \
|
||||||
|
--report aggregated_report.json --audience technical --output report.md
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- Aggregated JSON report with overall health score, category scores, top issues/wins
|
||||||
|
- Self-contained HTML dashboard (responsive, no external dependencies except Chart.js CDN)
|
||||||
|
- Korean executive summary in Markdown (tailored by audience level)
|
||||||
|
- Saved to Notion SEO Audit Log (Category: SEO Dashboard, Audit ID: DASH-YYYYMMDD-NNN)
|
||||||
|
|
||||||
|
## Workflow
|
||||||
|
1. Run audits with individual skills (11-33)
|
||||||
|
2. Aggregate with `report_aggregator.py`
|
||||||
|
3. Generate dashboard and/or executive report
|
||||||
|
4. Share HTML dashboard or Markdown report with stakeholders
|
||||||
@@ -19,11 +19,11 @@ Generate JSON-LD structured data markup from templates.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Generate from template
|
# Generate from template
|
||||||
python custom-skills/14-seo-schema-generator/code/scripts/schema_generator.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/17-seo-schema-generator/code/scripts/schema_generator.py \
|
||||||
--type LocalBusiness --output schema.json
|
--type LocalBusiness --output schema.json
|
||||||
|
|
||||||
# With custom data
|
# With custom data
|
||||||
python custom-skills/14-seo-schema-generator/code/scripts/schema_generator.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/17-seo-schema-generator/code/scripts/schema_generator.py \
|
||||||
--type Article \
|
--type Article \
|
||||||
--data '{"headline": "My Article", "author": "John Doe"}' \
|
--data '{"headline": "My Article", "author": "John Doe"}' \
|
||||||
--output article-schema.json
|
--output article-schema.json
|
||||||
|
|||||||
@@ -20,15 +20,15 @@ JSON-LD structured data validation and analysis.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Validate page schema
|
# Validate page schema
|
||||||
python custom-skills/13-seo-schema-validator/code/scripts/schema_validator.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/16-seo-schema-validator/code/scripts/schema_validator.py \
|
||||||
--url https://example.com
|
--url https://example.com
|
||||||
|
|
||||||
# Validate local file
|
# Validate local file
|
||||||
python custom-skills/13-seo-schema-validator/code/scripts/schema_validator.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/16-seo-schema-validator/code/scripts/schema_validator.py \
|
||||||
--file schema.json
|
--file schema.json
|
||||||
|
|
||||||
# Batch validation
|
# Batch validation
|
||||||
python custom-skills/13-seo-schema-validator/code/scripts/schema_validator.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/16-seo-schema-validator/code/scripts/schema_validator.py \
|
||||||
--urls urls.txt --output validation_report.json
|
--urls urls.txt --output validation_report.json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
46
.claude/commands/seo-serp-analysis.md
Normal file
46
.claude/commands/seo-serp-analysis.md
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
---
|
||||||
|
description: Google and Naver SERP feature detection and competitor mapping
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO SERP Analysis
|
||||||
|
|
||||||
|
Detect SERP features, map competitor positions, and score feature opportunities for Google and Naver.
|
||||||
|
|
||||||
|
## Triggers
|
||||||
|
- "SERP analysis", "SERP features", "검색 결과 분석"
|
||||||
|
|
||||||
|
## Capabilities
|
||||||
|
|
||||||
|
1. **Google SERP Feature Detection** - Featured snippet, PAA, knowledge panel, local pack, video carousel, ads, image pack, site links
|
||||||
|
2. **Competitor Position Mapping** - Map competitor domains and positions per keyword
|
||||||
|
3. **Content Type Distribution** - Analyze content types in results (blog, product, service, news, video)
|
||||||
|
4. **Opportunity Scoring** - Score SERP feature opportunities for target site
|
||||||
|
5. **Intent Validation** - Validate search intent from SERP composition
|
||||||
|
6. **Naver SERP Analysis** - Section detection (블로그, 카페, 지식iN, 스마트스토어, 브랜드존, 숏폼, 인플루언서)
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Google SERP analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/20-seo-serp-analysis/code/scripts/serp_analyzer.py \
|
||||||
|
--keyword "치과 임플란트" --country kr --json
|
||||||
|
|
||||||
|
# Multiple keywords from file
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/20-seo-serp-analysis/code/scripts/serp_analyzer.py \
|
||||||
|
--keywords-file keywords.txt --country kr --json
|
||||||
|
|
||||||
|
# Naver SERP analysis
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/20-seo-serp-analysis/code/scripts/naver_serp_analyzer.py \
|
||||||
|
--keyword "치과 임플란트" --json
|
||||||
|
|
||||||
|
# Naver multiple keywords
|
||||||
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/20-seo-serp-analysis/code/scripts/naver_serp_analyzer.py \
|
||||||
|
--keywords-file keywords.txt --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Output
|
||||||
|
- SERP feature presence map with ad counts
|
||||||
|
- Competitor positions with domain, URL, title, and content type
|
||||||
|
- Opportunity score and intent signals
|
||||||
|
- Naver section priority mapping and content type distribution
|
||||||
|
- Reports saved to Notion SEO Audit Log (Category: SERP Analysis, ID: SERP-YYYYMMDD-NNN)
|
||||||
@@ -19,15 +19,15 @@ Technical SEO audit for robots.txt and sitemap validation.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Check robots.txt
|
# Check robots.txt
|
||||||
python custom-skills/10-seo-technical-audit/code/scripts/robots_checker.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/12-seo-technical-audit/code/scripts/robots_checker.py \
|
||||||
--url https://example.com
|
--url https://example.com
|
||||||
|
|
||||||
# Validate sitemap
|
# Validate sitemap
|
||||||
python custom-skills/10-seo-technical-audit/code/scripts/sitemap_validator.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/12-seo-technical-audit/code/scripts/sitemap_validator.py \
|
||||||
--url https://example.com/sitemap.xml
|
--url https://example.com/sitemap.xml
|
||||||
|
|
||||||
# Crawl sitemap URLs
|
# Crawl sitemap URLs
|
||||||
python custom-skills/10-seo-technical-audit/code/scripts/sitemap_crawler.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/12-seo-technical-audit/code/scripts/sitemap_crawler.py \
|
||||||
--sitemap https://example.com/sitemap.xml --output report.json
|
--sitemap https://example.com/sitemap.xml --output report.json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -20,15 +20,15 @@ Google PageSpeed Insights and Core Web Vitals analysis.
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Analyze single URL
|
# Analyze single URL
|
||||||
python custom-skills/15-seo-core-web-vitals/code/scripts/pagespeed_client.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/14-seo-core-web-vitals/code/scripts/pagespeed_client.py \
|
||||||
--url https://example.com
|
--url https://example.com
|
||||||
|
|
||||||
# Mobile and desktop
|
# Mobile and desktop
|
||||||
python custom-skills/15-seo-core-web-vitals/code/scripts/pagespeed_client.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/14-seo-core-web-vitals/code/scripts/pagespeed_client.py \
|
||||||
--url https://example.com --strategy both
|
--url https://example.com --strategy both
|
||||||
|
|
||||||
# Batch analysis
|
# Batch analysis
|
||||||
python custom-skills/15-seo-core-web-vitals/code/scripts/pagespeed_client.py \
|
python /Users/ourdigital/Projects/our-claude-skills/custom-skills/14-seo-core-web-vitals/code/scripts/pagespeed_client.py \
|
||||||
--urls urls.txt --output vitals_report.json
|
--urls urls.txt --output vitals_report.json
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
16
.gitignore
vendored
16
.gitignore
vendored
@@ -81,8 +81,20 @@ credentials.json
|
|||||||
secrets.json
|
secrets.json
|
||||||
token.json
|
token.json
|
||||||
|
|
||||||
# Claude Code local config
|
# Claude Code local config (keep commands tracked for slash commands)
|
||||||
.claude/
|
.claude/*
|
||||||
|
!.claude/commands/
|
||||||
|
|
||||||
|
# Claude Code agent workspace
|
||||||
|
.claude-agents/
|
||||||
|
|
||||||
|
# Node lock files
|
||||||
|
package-lock.json
|
||||||
|
|
||||||
|
# Egg info / build artifacts
|
||||||
|
*.egg-info/
|
||||||
|
dist/
|
||||||
|
build/
|
||||||
|
|
||||||
# Temporary files
|
# Temporary files
|
||||||
output/
|
output/
|
||||||
|
|||||||
14
AGENTS.md
14
AGENTS.md
@@ -115,13 +115,17 @@ Task 2: general-purpose - "Implement the planned skill" # Needs Task 1 result
|
|||||||
|
|
||||||
## Domain-Specific Routing
|
## Domain-Specific Routing
|
||||||
|
|
||||||
### SEO Skills (11-30)
|
### SEO Skills (11-34)
|
||||||
|
|
||||||
- Use **seo-advisor** agent for SEO strategy questions
|
|
||||||
- Use **Explore** to understand existing SEO script patterns
|
- Use **Explore** to understand existing SEO script patterns
|
||||||
- Python scripts in these skills follow `base_client.py` patterns
|
- Python scripts in these skills follow `base_client.py` patterns (RateLimiter, ConfigManager, BaseAsyncClient)
|
||||||
- `11-seo-comprehensive-audit` orchestrates skills 12-18 for unified audits
|
- `11-seo-comprehensive-audit` orchestrates skills 12-18 for unified audits
|
||||||
- Skills 19-28 are reserved for future SEO capabilities
|
- Skills 19-28 provide advanced SEO capabilities (keyword strategy, SERP analysis, position tracking, link building, content strategy, e-commerce, KPI framework, international SEO, AI visibility, knowledge graph)
|
||||||
|
- Skills 31-32 cover competitor intelligence and crawl budget optimization
|
||||||
|
- Skill 33 provides site migration planning (pre-migration baseline, redirect mapping, risk assessment, post-migration monitoring)
|
||||||
|
- Skill 34 aggregates outputs from all SEO skills into executive reports, HTML dashboards, and Korean-language summaries
|
||||||
|
- All SEO skills integrate with Ahrefs MCP tools and output to the Notion SEO Audit Log database
|
||||||
|
- Slash commands available: `/seo-keyword-strategy`, `/seo-serp-analysis`, `/seo-position-tracking`, `/seo-link-building`, `/seo-content-strategy`, `/seo-ecommerce`, `/seo-kpi-framework`, `/seo-international`, `/seo-ai-visibility`, `/seo-knowledge-graph`, `/seo-competitor-intel`, `/seo-crawl-budget`, `/seo-migration-planner`, `/seo-reporting-dashboard`
|
||||||
|
|
||||||
### GTM Skills (60-69)
|
### GTM Skills (60-69)
|
||||||
|
|
||||||
@@ -200,7 +204,7 @@ For long-running tasks, use `run_in_background: true`:
|
|||||||
|
|
||||||
```
|
```
|
||||||
# Good candidates for background execution:
|
# Good candidates for background execution:
|
||||||
- Full skill audit across all 23 skills
|
- Full skill audit across all 52 skills
|
||||||
- Running Python tests on multiple skills
|
- Running Python tests on multiple skills
|
||||||
- Generating comprehensive documentation
|
- Generating comprehensive documentation
|
||||||
|
|
||||||
|
|||||||
88
CLAUDE.md
88
CLAUDE.md
@@ -7,7 +7,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co
|
|||||||
**GitHub**: https://github.com/ourdigital/our-claude-skills
|
**GitHub**: https://github.com/ourdigital/our-claude-skills
|
||||||
|
|
||||||
This is a Claude Skills collection repository containing:
|
This is a Claude Skills collection repository containing:
|
||||||
- **custom-skills/**: 38 custom skills for OurDigital workflows, SEO, GTM, Jamie Brand, NotebookLM, Notion, Reference Curation, and Multi-Agent Collaboration
|
- **custom-skills/**: 61 custom skills for OurDigital workflows, SEO, GTM, Jamie Brand, NotebookLM, Notion, D.intelligence Agent Corps, Reference Curation, and Multi-Agent Collaboration
|
||||||
- **example-skills/**: Reference examples from Anthropic's official skills repository
|
- **example-skills/**: Reference examples from Anthropic's official skills repository
|
||||||
- **official-skills/**: Notion integration skills (3rd party)
|
- **official-skills/**: Notion integration skills (3rd party)
|
||||||
- **reference/**: Skill format requirements documentation
|
- **reference/**: Skill format requirements documentation
|
||||||
@@ -35,7 +35,7 @@ This is a Claude Skills collection repository containing:
|
|||||||
| 09 | ourdigital-backoffice | Business document creation | "create proposal", "견적서" |
|
| 09 | ourdigital-backoffice | Business document creation | "create proposal", "견적서" |
|
||||||
| 10 | ourdigital-skill-creator | Meta skill for creating skills | "create skill", "init skill" |
|
| 10 | ourdigital-skill-creator | Meta skill for creating skills | "create skill", "init skill" |
|
||||||
|
|
||||||
### SEO Tools (11-30)
|
### SEO Tools (11-34)
|
||||||
|
|
||||||
| # | Skill | Purpose | Trigger |
|
| # | Skill | Purpose | Trigger |
|
||||||
|---|-------|---------|---------|
|
|---|-------|---------|---------|
|
||||||
@@ -47,22 +47,22 @@ This is a Claude Skills collection repository containing:
|
|||||||
| 16 | seo-schema-validator | Structured data validation | "validate schema", "JSON-LD" |
|
| 16 | seo-schema-validator | Structured data validation | "validate schema", "JSON-LD" |
|
||||||
| 17 | seo-schema-generator | Schema markup creation | "generate schema", "create JSON-LD" |
|
| 17 | seo-schema-generator | Schema markup creation | "generate schema", "create JSON-LD" |
|
||||||
| 18 | seo-local-audit | NAP, GBP, citations | "local SEO", "Google Business Profile" |
|
| 18 | seo-local-audit | NAP, GBP, citations | "local SEO", "Google Business Profile" |
|
||||||
|
| 19 | seo-keyword-strategy | Keyword expansion, intent, clustering, gaps | "keyword research", "keyword strategy" |
|
||||||
|
| 20 | seo-serp-analysis | Google/Naver SERP features, competitor positions | "SERP analysis", "SERP features" |
|
||||||
|
| 21 | seo-position-tracking | Rank monitoring, visibility scores, alerts | "rank tracking", "position monitoring" |
|
||||||
|
| 22 | seo-link-building | Backlink audit, toxic links, link gaps | "backlink audit", "link building" |
|
||||||
|
| 23 | seo-content-strategy | Content audit, decay, briefs, clusters | "content strategy", "content audit" |
|
||||||
|
| 24 | seo-ecommerce | Product page audit, product schema | "e-commerce SEO", "product SEO" |
|
||||||
|
| 25 | seo-kpi-framework | Unified KPIs, health scores, ROI | "SEO KPI", "SEO performance" |
|
||||||
|
| 26 | seo-international | Hreflang, content parity, multi-language | "international SEO", "hreflang" |
|
||||||
|
| 27 | seo-ai-visibility | AI search citations, brand radar, SOV | "AI visibility", "AI search" |
|
||||||
|
| 28 | seo-knowledge-graph | Entity SEO, Knowledge Panel, PAA | "knowledge graph", "entity SEO" |
|
||||||
| 29 | seo-gateway-architect | Gateway page strategy | "SEO strategy", "gateway pages" |
|
| 29 | seo-gateway-architect | Gateway page strategy | "SEO strategy", "gateway pages" |
|
||||||
| 30 | seo-gateway-builder | Gateway page content | "build gateway page" |
|
| 30 | seo-gateway-builder | Gateway page content | "build gateway page" |
|
||||||
|
| 31 | seo-competitor-intel | Competitor profiling, benchmarking, threats | "competitor analysis", "competitive intel" |
|
||||||
**Future SEO Skills (19-28 reserved):**
|
| 32 | seo-crawl-budget | Log analysis, bot profiling, crawl waste | "crawl budget", "log analysis" |
|
||||||
|
| 33 | seo-migration-planner | Site migration planning, redirect mapping | "site migration", "domain move", "사이트 이전" |
|
||||||
| # | Planned Skill | Status |
|
| 34 | seo-reporting-dashboard | Executive reports, HTML dashboards, aggregation | "SEO report", "SEO dashboard", "보고서" |
|
||||||
|---|--------------|--------|
|
|
||||||
| 19 | Keyword Strategy & Research | Planned |
|
|
||||||
| 20 | SERP Analysis | Planned |
|
|
||||||
| 21 | Position Tracking | Planned |
|
|
||||||
| 22 | Link Building Diagnosis | Planned |
|
|
||||||
| 23 | Content Strategy | Planned |
|
|
||||||
| 24 | E-Commerce SEO | Planned |
|
|
||||||
| 25 | SEO KPI & Performance Framework | Planned |
|
|
||||||
| 26 | International SEO | Planned |
|
|
||||||
| 27-28 | *(reserved)* | — |
|
|
||||||
|
|
||||||
### GTM/GA Tools (60-69)
|
### GTM/GA Tools (60-69)
|
||||||
|
|
||||||
@@ -101,6 +101,26 @@ This is a Claude Skills collection repository containing:
|
|||||||
|
|
||||||
**Prerequisites:** `pip install notebooklm-py && playwright install chromium && notebooklm login`
|
**Prerequisites:** `pip install notebooklm-py && playwright install chromium && notebooklm login`
|
||||||
|
|
||||||
|
### D.intelligence Agent Corps (70-88)
|
||||||
|
|
||||||
|
| # | Skill | Purpose | Autonomy | Trigger |
|
||||||
|
|---|-------|---------|----------|---------|
|
||||||
|
| 70 | dintel-brand-guardian | Brand compliance review (100pt checklist) | Auto | "brand check", "브랜드 검수" |
|
||||||
|
| 71 | dintel-brand-editor | Brand-compliant copywriting & style evaluation | Auto + Ask | "write copy", "카피 작성" |
|
||||||
|
| 72 | dintel-doc-secretary | Document formatting, meeting notes, reports | Draft & Wait | "format document", "회의록" |
|
||||||
|
| 73 | dintel-quotation-mgr | Quotation generation (4 sub-agents) | Draft & Wait | "견적서", "quotation" |
|
||||||
|
| 74 | dintel-service-architect | Service scope design & module recommendation | Inquiry-driven | "서비스 설계", "service design" |
|
||||||
|
| 75 | dintel-marketing-mgr | Content pipeline (Magazine D., newsletter, LinkedIn) | Draft & Wait | "콘텐츠 발행", "newsletter" |
|
||||||
|
| 76 | dintel-backoffice-mgr | Invoicing, contracts, NDA, HR operations | Draft & Wait | "계약서", "인보이스" |
|
||||||
|
| 77 | dintel-account-mgr | Client relationship management & monitoring | Mixed | "client status", "미팅 준비" |
|
||||||
|
| 88 | dintel-skill-update | Cross-skill consistency management (meta-agent) | Triggered | "skill sync", "스킬 업데이트" |
|
||||||
|
|
||||||
|
**Shared infrastructure:** `dintel-shared/` (Python package + reference docs)
|
||||||
|
|
||||||
|
**Install:** `cd custom-skills/dintel-shared && ./install.sh --all`
|
||||||
|
|
||||||
|
**User Guide:** `custom-skills/dintel-shared/USER-GUIDE.md`
|
||||||
|
|
||||||
### Reference Curator & Multi-Agent (90-99)
|
### Reference Curator & Multi-Agent (90-99)
|
||||||
|
|
||||||
| # | Skill | Purpose | Trigger |
|
| # | Skill | Purpose | Trigger |
|
||||||
@@ -112,17 +132,17 @@ This is a Claude Skills collection repository containing:
|
|||||||
|
|
||||||
| Command | Purpose |
|
| Command | Purpose |
|
||||||
|---------|---------|
|
|---------|---------|
|
||||||
| `/reference-curator-pipeline` | Full pipeline orchestration with QA loop handling |
|
| `/reference-curator` | Full pipeline orchestration with QA loop handling |
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
# Full pipeline from topic
|
# Full pipeline from topic
|
||||||
/reference-curator-pipeline "Claude Code best practices" --max-sources 5
|
/reference-curator "Claude Code best practices" --max-sources 5
|
||||||
|
|
||||||
# From URLs (skip discovery)
|
# From URLs (skip discovery)
|
||||||
/reference-curator-pipeline https://docs.anthropic.com/en/docs/prompt-caching
|
/reference-curator https://docs.anthropic.com/en/docs/prompt-caching
|
||||||
|
|
||||||
# With auto-approve and fine-tuning output
|
# With auto-approve and fine-tuning output
|
||||||
/reference-curator-pipeline "MCP servers" --auto-approve --export-format fine_tuning
|
/reference-curator "MCP servers" --auto-approve --export-format fine_tuning
|
||||||
```
|
```
|
||||||
|
|
||||||
**Individual Sub-skills:**
|
**Individual Sub-skills:**
|
||||||
@@ -130,7 +150,7 @@ This is a Claude Skills collection repository containing:
|
|||||||
| Skill | Command | Purpose |
|
| Skill | Command | Purpose |
|
||||||
|-------|---------|---------|
|
|-------|---------|---------|
|
||||||
| reference-discovery | `/reference-discovery` | Search & validate authoritative sources |
|
| reference-discovery | `/reference-discovery` | Search & validate authoritative sources |
|
||||||
| web-crawler-orchestrator | `/web-crawler` | Multi-backend crawling (Firecrawl/Node/aiohttp/Scrapy) |
|
| web-crawler | `/web-crawler` | Multi-backend crawling (Firecrawl/Node/aiohttp/Scrapy) |
|
||||||
| content-repository | `/content-repository` | MySQL storage with version tracking |
|
| content-repository | `/content-repository` | MySQL storage with version tracking |
|
||||||
| content-distiller | `/content-distiller` | Summarization & key concept extraction |
|
| content-distiller | `/content-distiller` | Summarization & key concept extraction |
|
||||||
| quality-reviewer | `/quality-reviewer` | QA loop with approve/refactor/research routing |
|
| quality-reviewer | `/quality-reviewer` | QA loop with approve/refactor/research routing |
|
||||||
@@ -209,9 +229,22 @@ our-claude-skills/
|
|||||||
│ ├── 16-seo-schema-validator/
|
│ ├── 16-seo-schema-validator/
|
||||||
│ ├── 17-seo-schema-generator/
|
│ ├── 17-seo-schema-generator/
|
||||||
│ ├── 18-seo-local-audit/
|
│ ├── 18-seo-local-audit/
|
||||||
│ ├── 19-28 (reserved for future SEO skills)
|
│ ├── 19-seo-keyword-strategy/
|
||||||
|
│ ├── 20-seo-serp-analysis/
|
||||||
|
│ ├── 21-seo-position-tracking/
|
||||||
|
│ ├── 22-seo-link-building/
|
||||||
|
│ ├── 23-seo-content-strategy/
|
||||||
|
│ ├── 24-seo-ecommerce/
|
||||||
|
│ ├── 25-seo-kpi-framework/
|
||||||
|
│ ├── 26-seo-international/
|
||||||
|
│ ├── 27-seo-ai-visibility/
|
||||||
|
│ ├── 28-seo-knowledge-graph/
|
||||||
│ ├── 29-seo-gateway-architect/
|
│ ├── 29-seo-gateway-architect/
|
||||||
│ ├── 30-seo-gateway-builder/
|
│ ├── 30-seo-gateway-builder/
|
||||||
|
│ ├── 31-seo-competitor-intel/
|
||||||
|
│ ├── 32-seo-crawl-budget/
|
||||||
|
│ ├── 33-seo-migration-planner/
|
||||||
|
│ ├── 34-seo-reporting-dashboard/
|
||||||
│ │
|
│ │
|
||||||
│ ├── 60-gtm-audit/
|
│ ├── 60-gtm-audit/
|
||||||
│ ├── 61-gtm-manager/
|
│ ├── 61-gtm-manager/
|
||||||
@@ -232,6 +265,17 @@ our-claude-skills/
|
|||||||
│ ├── 52-notebooklm-studio/
|
│ ├── 52-notebooklm-studio/
|
||||||
│ ├── 53-notebooklm-research/
|
│ ├── 53-notebooklm-research/
|
||||||
│ │
|
│ │
|
||||||
|
│ ├── 70-dintel-brand-guardian/
|
||||||
|
│ ├── 71-dintel-brand-editor/
|
||||||
|
│ ├── 72-dintel-doc-secretary/
|
||||||
|
│ ├── 73-dintel-quotation-mgr/
|
||||||
|
│ ├── 74-dintel-service-architect/
|
||||||
|
│ ├── 75-dintel-marketing-mgr/
|
||||||
|
│ ├── 76-dintel-backoffice-mgr/
|
||||||
|
│ ├── 77-dintel-account-mgr/
|
||||||
|
│ ├── 88-dintel-skill-update/
|
||||||
|
│ ├── dintel-shared/ # D.intelligence shared infra (Python pkg, refs, installer)
|
||||||
|
│ │
|
||||||
│ ├── 90-reference-curator/ # Modular reference documentation suite
|
│ ├── 90-reference-curator/ # Modular reference documentation suite
|
||||||
│ │ ├── 01-reference-discovery/
|
│ │ ├── 01-reference-discovery/
|
||||||
│ │ ├── 02-web-crawler-orchestrator/
|
│ │ ├── 02-web-crawler-orchestrator/
|
||||||
|
|||||||
42
README.md
42
README.md
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
> **Internal R&D Repository** - This repository is restricted for internal use only.
|
> **Internal R&D Repository** - This repository is restricted for internal use only.
|
||||||
|
|
||||||
A collection of **38 custom Claude Skills** for OurDigital workflows, Jamie Plastic Surgery Clinic brand management, SEO/GTM tools, NotebookLM automation, Notion integrations, reference documentation curation, and multi-agent collaboration.
|
A collection of **61 custom Claude Skills** for OurDigital workflows, D.intelligence Agent Corps (9-agent business operations suite), Jamie Plastic Surgery Clinic brand management, SEO/GTM tools, NotebookLM automation, Notion integrations, reference documentation curation, and multi-agent collaboration.
|
||||||
|
|
||||||
## Quick Install
|
## Quick Install
|
||||||
|
|
||||||
@@ -35,7 +35,7 @@ cd our-claude-skills/custom-skills/_ourdigital-shared
|
|||||||
| 09 | `ourdigital-backoffice` | Business document creation |
|
| 09 | `ourdigital-backoffice` | Business document creation |
|
||||||
| 10 | `ourdigital-skill-creator` | Meta skill for creating/managing skills |
|
| 10 | `ourdigital-skill-creator` | Meta skill for creating/managing skills |
|
||||||
|
|
||||||
### SEO Tools (11-30)
|
### SEO Tools (11-34)
|
||||||
|
|
||||||
| # | Skill | Purpose |
|
| # | Skill | Purpose |
|
||||||
|---|-------|---------|
|
|---|-------|---------|
|
||||||
@@ -46,11 +46,43 @@ cd our-claude-skills/custom-skills/_ourdigital-shared
|
|||||||
| 15 | `seo-search-console` | GSC data analysis |
|
| 15 | `seo-search-console` | GSC data analysis |
|
||||||
| 16 | `seo-schema-validator` | Structured data validation |
|
| 16 | `seo-schema-validator` | Structured data validation |
|
||||||
| 17 | `seo-schema-generator` | Schema markup creation |
|
| 17 | `seo-schema-generator` | Schema markup creation |
|
||||||
| 18 | `seo-local-audit` | NAP consistency, GBP, citations |
|
| 18 | `seo-local-audit` | NAP consistency, GBP, Naver Smart Place, citations |
|
||||||
|
| 19 | `seo-keyword-strategy` | Keyword expansion, intent classification, clustering, gaps |
|
||||||
|
| 20 | `seo-serp-analysis` | Google/Naver SERP features, competitor positions |
|
||||||
|
| 21 | `seo-position-tracking` | Rank monitoring, visibility scores, alerts |
|
||||||
|
| 22 | `seo-link-building` | Backlink audit, toxic links, link gaps |
|
||||||
|
| 23 | `seo-content-strategy` | Content audit, decay detection, briefs, clusters |
|
||||||
|
| 24 | `seo-ecommerce` | Product page audit, product schema, Naver Smart Store |
|
||||||
|
| 25 | `seo-kpi-framework` | Unified KPIs, health scores, ROI estimation |
|
||||||
|
| 26 | `seo-international` | Hreflang validation, content parity, multi-language |
|
||||||
|
| 27 | `seo-ai-visibility` | AI search citations, brand radar, share of voice |
|
||||||
|
| 28 | `seo-knowledge-graph` | Entity SEO, Knowledge Panel, PAA, FAQ |
|
||||||
| 29 | `seo-gateway-architect` | Gateway page strategy (Naver/Google) |
|
| 29 | `seo-gateway-architect` | Gateway page strategy (Naver/Google) |
|
||||||
| 30 | `seo-gateway-builder` | Gateway page content generation |
|
| 30 | `seo-gateway-builder` | Gateway page content generation |
|
||||||
|
| 31 | `seo-competitor-intel` | Competitor profiling, benchmarking, threat scoring |
|
||||||
|
| 32 | `seo-crawl-budget` | Log analysis, bot profiling, crawl waste detection |
|
||||||
|
| 33 | `seo-migration-planner` | Site migration planning, redirect mapping, monitoring |
|
||||||
|
| 34 | `seo-reporting-dashboard` | Executive reports, HTML dashboards, data aggregation |
|
||||||
|
|
||||||
**Future SEO Skills (19-28 reserved):** Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, SEO KPI Framework, International SEO.
|
### D.intelligence Agent Corps (70-88)
|
||||||
|
|
||||||
|
| # | Skill | Purpose | Autonomy |
|
||||||
|
|---|-------|---------|----------|
|
||||||
|
| 70 | `dintel-brand-guardian` | Brand compliance review (100pt checklist) | Auto |
|
||||||
|
| 71 | `dintel-brand-editor` | Brand-compliant copywriting & style evaluation | Auto + Ask |
|
||||||
|
| 72 | `dintel-doc-secretary` | Document formatting, meeting notes, reports | Draft & Wait |
|
||||||
|
| 73 | `dintel-quotation-mgr` | Quotation generation (4 sub-agents) | Draft & Wait |
|
||||||
|
| 74 | `dintel-service-architect` | Service scope design & module recommendation | Inquiry-driven |
|
||||||
|
| 75 | `dintel-marketing-mgr` | Content pipeline (Magazine D., newsletter, LinkedIn) | Draft & Wait |
|
||||||
|
| 76 | `dintel-backoffice-mgr` | Invoicing, contracts, NDA, HR operations | Draft & Wait |
|
||||||
|
| 77 | `dintel-account-mgr` | Client relationship management & Notion monitoring | Mixed |
|
||||||
|
| 88 | `dintel-skill-update` | Cross-skill consistency management (meta-agent) | Triggered |
|
||||||
|
|
||||||
|
**Shared infrastructure:** `dintel-shared/` (Python package + reference docs)
|
||||||
|
|
||||||
|
**Install:** `cd custom-skills/dintel-shared && ./install.sh --all`
|
||||||
|
|
||||||
|
**User Guide:** `custom-skills/dintel-shared/USER-GUIDE.md`
|
||||||
|
|
||||||
### GTM/GA Tools (60-69)
|
### GTM/GA Tools (60-69)
|
||||||
|
|
||||||
@@ -138,7 +170,7 @@ our-claude-skills/
|
|||||||
│ │
|
│ │
|
||||||
│ ├── 00-our-settings-audit/
|
│ ├── 00-our-settings-audit/
|
||||||
│ ├── 01-10 (OurDigital core)
|
│ ├── 01-10 (OurDigital core)
|
||||||
│ ├── 11-30 (SEO tools, 19-28 reserved)
|
│ ├── 11-34 (SEO tools)
|
||||||
│ ├── 60-62 (GTM/GA tools)
|
│ ├── 60-62 (GTM/GA tools)
|
||||||
│ ├── 31-32 (Notion tools)
|
│ ├── 31-32 (Notion tools)
|
||||||
│ ├── 40-45 (Jamie clinic)
|
│ ├── 40-45 (Jamie clinic)
|
||||||
|
|||||||
@@ -34,9 +34,38 @@ python scripts/seo_audit_orchestrator.py --url https://example.com --json
|
|||||||
| 2 | On-Page SEO | `13-seo-on-page-audit/code/scripts/page_analyzer.py` |
|
| 2 | On-Page SEO | `13-seo-on-page-audit/code/scripts/page_analyzer.py` |
|
||||||
| 3 | Core Web Vitals | `14-seo-core-web-vitals/code/scripts/pagespeed_client.py` |
|
| 3 | Core Web Vitals | `14-seo-core-web-vitals/code/scripts/pagespeed_client.py` |
|
||||||
| 4 | Schema Validation | `16-seo-schema-validator/code/scripts/schema_validator.py` |
|
| 4 | Schema Validation | `16-seo-schema-validator/code/scripts/schema_validator.py` |
|
||||||
| 5 | Local SEO | `18-seo-local-audit/` (prompt-driven) |
|
| 5 | Local SEO | `18-seo-local-audit/` (prompt-driven — see Stage 5 notes below) |
|
||||||
| 6 | Search Console | `15-seo-search-console/code/scripts/gsc_client.py` |
|
| 6 | Search Console | `15-seo-search-console/code/scripts/gsc_client.py` |
|
||||||
|
|
||||||
|
## Stage 5: Local SEO — Key Requirements
|
||||||
|
|
||||||
|
Stage 5 is prompt-driven and requires **Business Identity extraction as a mandatory first step**:
|
||||||
|
1. Extract Korean name, English name, address, phone from website JSON-LD schema markup (`Organization`/`Hospital`/`LocalBusiness`)
|
||||||
|
2. Check website footer, contact page, and schema `sameAs` for GBP, Naver Place, and Kakao Map URLs
|
||||||
|
3. Use layered search fallback if listing URLs are not found on the website
|
||||||
|
4. Follow `18-seo-local-audit/code/CLAUDE.md` for the full workflow
|
||||||
|
5. **Korean market priorities**: GBP and Naver Smart Place are both Critical; Kakao Map is High; US-centric directories (Yelp, Yellow Pages) are Low
|
||||||
|
6. **Important**: GBP and Naver Map are JS-rendered. Report unfound listings as "not discoverable via web search" — not "does not exist"
|
||||||
|
|
||||||
|
## Extended SEO Skills Pipeline
|
||||||
|
|
||||||
|
Beyond the 6 core audit stages, additional specialized skills are available for deeper analysis:
|
||||||
|
|
||||||
|
| Skill | Audit ID | Purpose | Command |
|
||||||
|
|-------|----------|---------|---------|
|
||||||
|
| 19 - Keyword Strategy | KW | Seed expansion, intent classification, keyword gaps | `/seo-keyword-strategy` |
|
||||||
|
| 20 - SERP Analysis | SERP | Google/Naver SERP features, competitor positions | `/seo-serp-analysis` |
|
||||||
|
| 21 - Position Tracking | RANK | Rank monitoring, visibility scores, alerts | `/seo-position-tracking` |
|
||||||
|
| 22 - Link Building | LINK | Backlink audit, toxic links, link gaps | `/seo-link-building` |
|
||||||
|
| 23 - Content Strategy | CONTENT | Content audit, decay detection, briefs | `/seo-content-strategy` |
|
||||||
|
| 24 - E-Commerce SEO | ECOM | Product page audit, product schema | `/seo-ecommerce` |
|
||||||
|
| 25 - SEO KPI Framework | KPI | Unified KPIs, health scores, ROI | `/seo-kpi-framework` |
|
||||||
|
| 26 - International SEO | INTL | Hreflang validation, content parity | `/seo-international` |
|
||||||
|
| 27 - AI Visibility | AI | AI search citations, brand radar, SOV | `/seo-ai-visibility` |
|
||||||
|
| 28 - Knowledge Graph | KG | Entity SEO, Knowledge Panel, PAA | `/seo-knowledge-graph` |
|
||||||
|
| 31 - Competitor Intel | COMP | Competitor profiling, benchmarking | `/seo-competitor-intel` |
|
||||||
|
| 32 - Crawl Budget | CRAWL | Log analysis, bot profiling, waste | `/seo-crawl-budget` |
|
||||||
|
|
||||||
## Health Score Weights
|
## Health Score Weights
|
||||||
|
|
||||||
| Category | Weight |
|
| Category | Weight |
|
||||||
|
|||||||
@@ -62,10 +62,37 @@ python "$SKILLS/14-seo-core-web-vitals/code/scripts/pagespeed_client.py" --url $
|
|||||||
# Stage 4: Schema Validation
|
# Stage 4: Schema Validation
|
||||||
python "$SKILLS/16-seo-schema-validator/code/scripts/schema_validator.py" --url $URL --json
|
python "$SKILLS/16-seo-schema-validator/code/scripts/schema_validator.py" --url $URL --json
|
||||||
|
|
||||||
# Stage 5: Local SEO (prompt-driven, use WebFetch + WebSearch)
|
# Stage 5: Local SEO (see detailed instructions below)
|
||||||
# Stage 6: Search Console (requires GSC API credentials)
|
# Stage 6: Search Console (requires GSC API credentials)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
### Stage 5: Local SEO — Detailed Instructions
|
||||||
|
|
||||||
|
Stage 5 is prompt-driven (no script). Follow this sequence:
|
||||||
|
|
||||||
|
1. **Extract Business Identity from website (MANDATORY FIRST)**
|
||||||
|
- WebFetch the homepage and parse JSON-LD `<script type="application/ld+json">` tags
|
||||||
|
- Extract from `Organization`, `Hospital`, or `LocalBusiness` schema: Korean name, English name, address, telephone
|
||||||
|
- Check `sameAs` array for GBP, Naver Place, Kakao Map URLs
|
||||||
|
|
||||||
|
2. **Check website for listing links**
|
||||||
|
- Scrape footer, contact page, about page for links matching:
|
||||||
|
- GBP: `maps.app.goo.gl/*`, `google.com/maps/place/*`, `g.page/*`
|
||||||
|
- Naver Place: `naver.me/*`, `map.naver.com/*/place/*`, `m.place.naver.com/*`
|
||||||
|
- Kakao Map: `place.map.kakao.com/*`, `kko.to/*`
|
||||||
|
- Check embedded iframes for Google Maps Place IDs or Naver Map embeds
|
||||||
|
|
||||||
|
3. **Layered search fallback (if links not found on website)**
|
||||||
|
- GBP: Search `"[Korean Name]" "[district]" Google Maps`, then `"[phone]" site:google.com/maps`
|
||||||
|
- Naver: Search `"[Korean Name]" site:map.naver.com`, then `"[Korean Name]" 네이버 지도 [district]`
|
||||||
|
- Kakao: Search `"[Korean Name]" site:place.map.kakao.com`
|
||||||
|
|
||||||
|
4. **Follow `18-seo-local-audit/code/CLAUDE.md` workflow** for the full audit (Steps 2-7)
|
||||||
|
|
||||||
|
5. **Important language**: Distinguish **"not discoverable via web search"** from **"does not exist."** GBP and Naver Map are JS-rendered; WebFetch cannot extract their listing data. Absence in search results does not confirm absence of the listing.
|
||||||
|
|
||||||
|
6. **Korean market priorities**: GBP and Naver Smart Place are both Critical. Kakao Map is High. US-centric directories (Yelp, Yellow Pages) are Low priority for Korean businesses.
|
||||||
|
|
||||||
## Health Score (Weighted 0-100)
|
## Health Score (Weighted 0-100)
|
||||||
|
|
||||||
| Category | Weight |
|
| Category | Weight |
|
||||||
|
|||||||
@@ -2,109 +2,253 @@
|
|||||||
|
|
||||||
## Overview
|
## Overview
|
||||||
|
|
||||||
Local SEO auditor for businesses with physical locations: NAP consistency, Google Business Profile optimization, local citations, and LocalBusiness schema validation.
|
Local SEO auditor for Korean-market businesses with physical locations. Covers business identity extraction, GBP optimization, Naver Smart Place, Kakao Map, NAP consistency, local citations, and LocalBusiness schema validation.
|
||||||
|
|
||||||
## Quick Start
|
|
||||||
|
|
||||||
This skill primarily uses MCP tools (Firecrawl, Perplexity) for data collection. Scripts are helpers for validation.
|
|
||||||
|
|
||||||
```bash
|
|
||||||
# NAP consistency check (manual data input)
|
|
||||||
python scripts/nap_checker.py --business "Business Name" --address "123 Main St" --phone "555-1234"
|
|
||||||
|
|
||||||
# LocalBusiness schema validation
|
|
||||||
python scripts/local_schema_validator.py --url https://example.com
|
|
||||||
```
|
|
||||||
|
|
||||||
## Audit Components
|
|
||||||
|
|
||||||
### 1. NAP Consistency
|
|
||||||
**Name, Address, Phone** consistency across:
|
|
||||||
- Website (header, footer, contact page)
|
|
||||||
- Google Business Profile
|
|
||||||
- Local directories (Yelp, Yellow Pages, etc.)
|
|
||||||
- Social media profiles
|
|
||||||
|
|
||||||
### 2. Google Business Profile (GBP)
|
|
||||||
Optimization checklist:
|
|
||||||
- [ ] Business name matches website
|
|
||||||
- [ ] Address is complete and accurate
|
|
||||||
- [ ] Phone number is local
|
|
||||||
- [ ] Business hours are current
|
|
||||||
- [ ] Categories are appropriate
|
|
||||||
- [ ] Photos uploaded (exterior, interior, products)
|
|
||||||
- [ ] Posts are recent (within 7 days)
|
|
||||||
- [ ] Reviews are responded to
|
|
||||||
|
|
||||||
### 3. Local Citations
|
|
||||||
Priority directories to check:
|
|
||||||
- Google Business Profile
|
|
||||||
- Apple Maps
|
|
||||||
- Bing Places
|
|
||||||
- Yelp
|
|
||||||
- Facebook Business
|
|
||||||
- Industry-specific directories
|
|
||||||
|
|
||||||
### 4. LocalBusiness Schema
|
|
||||||
Required properties:
|
|
||||||
- @type (LocalBusiness or subtype)
|
|
||||||
- name
|
|
||||||
- address (PostalAddress)
|
|
||||||
- telephone
|
|
||||||
- openingHours
|
|
||||||
|
|
||||||
## Workflow
|
## Workflow
|
||||||
|
|
||||||
|
### Step 0: Business Identity (MANDATORY FIRST STEP)
|
||||||
|
|
||||||
|
Before any audit work, establish the official business identity.
|
||||||
|
|
||||||
|
**Sources (in priority order):**
|
||||||
|
1. Website schema markup (JSON-LD `Organization`, `Hospital`, `LocalBusiness`) — the `name` field is authoritative
|
||||||
|
2. Contact page / About page
|
||||||
|
3. Footer (address, phone, social links)
|
||||||
|
4. User-provided information (known GBP URL, Naver Place URL, etc.)
|
||||||
|
|
||||||
|
**Data to collect:**
|
||||||
|
|
||||||
|
| Field | Example |
|
||||||
|
|-------|---------|
|
||||||
|
| Official name (Korean) | 제이미성형외과의원 |
|
||||||
|
| Official name (English) | Jamie Plastic Surgery Clinic |
|
||||||
|
| Brand/display name | Jamie Clinic |
|
||||||
|
| Website URL | https://www.jamie.clinic |
|
||||||
|
| Address (Korean) | 서울특별시 강남구 ... |
|
||||||
|
| Phone | 02-XXX-XXXX |
|
||||||
|
| Known GBP URL | (if available) |
|
||||||
|
| Known Naver Place URL | (if available) |
|
||||||
|
| Known Kakao Map URL | (if available) |
|
||||||
|
|
||||||
|
**How to extract:**
|
||||||
```
|
```
|
||||||
1. Collect NAP from client
|
WebFetch homepage → parse JSON-LD script tags → extract name, address, telephone, sameAs
|
||||||
2. Scrape website for NAP mentions
|
WebFetch /contact or /about → extract NAP from page content
|
||||||
3. Search citations using Perplexity
|
Check footer for social links, map embeds, place listing URLs
|
||||||
4. Check GBP data (manual or API)
|
|
||||||
5. Validate LocalBusiness schema
|
|
||||||
6. Generate consistency report
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Look specifically for these URL patterns in `sameAs`, footer links, or embedded iframes:
|
||||||
|
- GBP: `maps.app.goo.gl/*`, `google.com/maps/place/*`, `g.page/*`
|
||||||
|
- Naver Place: `naver.me/*`, `map.naver.com/*/place/*`, `m.place.naver.com/*`
|
||||||
|
- Kakao Map: `place.map.kakao.com/*`, `kko.to/*`
|
||||||
|
|
||||||
|
### Step 1: Website NAP Extraction
|
||||||
|
|
||||||
|
Scrape header, footer, contact page, about page for NAP mentions. Cross-reference with schema markup. Establish the **canonical NAP** baseline (the single source of truth for this audit).
|
||||||
|
|
||||||
|
### Step 2: GBP Verification & Audit
|
||||||
|
|
||||||
|
**Layered discovery (try in order, stop when found):**
|
||||||
|
1. Use provided GBP URL (from Step 0 or user input)
|
||||||
|
2. Check website for GBP link (footer, contact page, schema `sameAs`, embedded Google Maps iframe with Place ID)
|
||||||
|
3. WebSearch: `"[Korean Name]" "[City/District]" Google Maps`
|
||||||
|
4. WebSearch: `"[English Name]" Google Maps [City]`
|
||||||
|
5. WebSearch: `"[exact phone number]" site:google.com/maps`
|
||||||
|
|
||||||
|
**Important**: Google Maps is JS-rendered — WebFetch cannot extract business data from the listing page itself. Use WebSearch to find the listing URL, then verify details via search result snippets.
|
||||||
|
|
||||||
|
**If found — audit checklist (score /10):**
|
||||||
|
- [ ] Business name matches canonical NAP
|
||||||
|
- [ ] Address is complete and accurate
|
||||||
|
- [ ] Phone number matches
|
||||||
|
- [ ] Business hours are current
|
||||||
|
- [ ] Primary + secondary categories are appropriate
|
||||||
|
- [ ] Business description is complete
|
||||||
|
- [ ] 10+ photos uploaded (exterior, interior, products/services)
|
||||||
|
- [ ] Posts are recent (within 7 days)
|
||||||
|
- [ ] Reviews are responded to
|
||||||
|
- [ ] Q&A section is active
|
||||||
|
|
||||||
|
**If NOT found after all attempts:** Report as **"not discoverable via web search"** — this is distinct from "does not exist." The listing may exist but be unfindable through non-JS search methods.
|
||||||
|
|
||||||
|
### Step 3: Naver Smart Place Verification & Audit
|
||||||
|
|
||||||
|
**Layered discovery (try in order, stop when found):**
|
||||||
|
1. Use provided Naver Place URL (from Step 0 or user input)
|
||||||
|
2. Check website for Naver Place link (footer, contact page, schema `sameAs`, `naver.me/*` or `map.naver.com/*/place/*` patterns)
|
||||||
|
3. WebSearch: `"[Korean Name]" site:map.naver.com`
|
||||||
|
4. WebSearch: `"[Korean Name]" 네이버 지도 [district]`
|
||||||
|
5. WebSearch: `"[Korean Name]" 네이버 스마트플레이스`
|
||||||
|
6. WebSearch: `"[exact phone number]" site:map.naver.com`
|
||||||
|
|
||||||
|
**Important**: Naver Map is JS-rendered — WebFetch cannot extract data from the listing page. Use WebSearch for discovery, verify via search result snippets.
|
||||||
|
|
||||||
|
**If found — audit checklist (score /10):**
|
||||||
|
- [ ] Business name matches canonical NAP
|
||||||
|
- [ ] Address is complete and accurate
|
||||||
|
- [ ] Phone number matches
|
||||||
|
- [ ] Business hours are current
|
||||||
|
- [ ] Place is "claimed" (owner-managed / 업주 등록)
|
||||||
|
- [ ] Keywords/tags are set
|
||||||
|
- [ ] Booking/reservation link present
|
||||||
|
- [ ] Recent blog reviews linked
|
||||||
|
- [ ] Photos uploaded and current
|
||||||
|
- [ ] Menu/service/price information present
|
||||||
|
|
||||||
|
**If NOT found after all attempts:** Report as **"not discoverable via web search"** (not "does not exist" or "not registered").
|
||||||
|
|
||||||
|
### Step 4: Kakao Map Verification
|
||||||
|
|
||||||
|
**Discovery:**
|
||||||
|
1. Use provided Kakao Map URL (from Step 0)
|
||||||
|
2. Check website for Kakao Map link (`place.map.kakao.com/*`, `kko.to/*`)
|
||||||
|
3. WebSearch: `"[Korean Name]" site:place.map.kakao.com`
|
||||||
|
4. WebSearch: `"[Korean Name]" 카카오맵 [district]`
|
||||||
|
|
||||||
|
**If found:** Verify NAP consistency against canonical NAP.
|
||||||
|
|
||||||
|
### Step 5: Citation Discovery
|
||||||
|
|
||||||
|
**Korean market platform priorities:**
|
||||||
|
|
||||||
|
| Platform | Priority | Market |
|
||||||
|
|----------|----------|--------|
|
||||||
|
| Google Business Profile | Critical | Global |
|
||||||
|
| Naver Smart Place (네이버 스마트플레이스) | Critical | Korea |
|
||||||
|
| Kakao Map (카카오맵) | High | Korea |
|
||||||
|
| Industry-specific directories | High | Varies |
|
||||||
|
| Apple Maps | Medium | Global |
|
||||||
|
| Bing Places | Low | Global |
|
||||||
|
|
||||||
|
**Korean medical/cosmetic industry directories:**
|
||||||
|
- 강남언니 (Gangnam Unni)
|
||||||
|
- 바비톡 (Babitalk)
|
||||||
|
- 성예사 (Sungyesa)
|
||||||
|
- 굿닥 (Goodoc)
|
||||||
|
- 똑닥 (Ddocdoc)
|
||||||
|
- 모두닥 (Modoodoc)
|
||||||
|
- 하이닥 (HiDoc)
|
||||||
|
|
||||||
|
**Discovery methods:**
|
||||||
|
- Phone number search across platforms
|
||||||
|
- Korean business name + district search
|
||||||
|
- English business name search
|
||||||
|
- Address search
|
||||||
|
|
||||||
|
### Step 6: NAP Consistency Report
|
||||||
|
|
||||||
|
Cross-reference all discovered sources against the canonical NAP from Step 1.
|
||||||
|
|
||||||
|
**Common inconsistency points to check:**
|
||||||
|
- Building/landmark names (e.g., "EHL빌딩" vs "엔와이빌딩") — the authoritative source is the **business registration certificate** (사업자등록증), not the website alone
|
||||||
|
- Phone format variations (02-XXX-XXXX vs +82-2-XXX-XXXX vs 02XXXXXXX)
|
||||||
|
- Address format (road-name vs lot-number / 도로명 vs 지번)
|
||||||
|
- Korean vs English name spelling variations
|
||||||
|
- Suite/floor number omissions
|
||||||
|
|
||||||
|
### Step 7: LocalBusiness Schema Validation
|
||||||
|
|
||||||
|
Validate JSON-LD completeness:
|
||||||
|
- @type (LocalBusiness, Hospital, or appropriate subtype)
|
||||||
|
- name (Korean and/or English)
|
||||||
|
- address (PostalAddress with Korean format)
|
||||||
|
- telephone
|
||||||
|
- openingHours / openingHoursSpecification
|
||||||
|
- geo (GeoCoordinates — latitude, longitude)
|
||||||
|
- sameAs (should include GBP, Naver Place, Kakao Map, social profiles)
|
||||||
|
- url
|
||||||
|
- image
|
||||||
|
|
||||||
|
Use schema generator skill (17) for creating/fixing markup.
|
||||||
|
|
||||||
|
## Scoring
|
||||||
|
|
||||||
|
| Component | Weight | Max Score |
|
||||||
|
|-----------|--------|-----------|
|
||||||
|
| Business Identity completeness | 5% | /10 |
|
||||||
|
| NAP Consistency | 20% | /10 |
|
||||||
|
| GBP Optimization | 20% | /10 |
|
||||||
|
| Naver Smart Place | 20% | /10 |
|
||||||
|
| Kakao Map presence | 10% | /10 |
|
||||||
|
| Citations (directories) | 10% | /10 |
|
||||||
|
| LocalBusiness Schema | 15% | /10 |
|
||||||
|
|
||||||
|
**Overall Local SEO Score** = weighted average, normalized to /100.
|
||||||
|
|
||||||
## Output Format
|
## Output Format
|
||||||
|
|
||||||
```markdown
|
```markdown
|
||||||
## Local SEO Audit: [Business Name]
|
## Local SEO Audit: [Business Name]
|
||||||
|
**Date**: YYYY-MM-DD
|
||||||
|
**Website**: [URL]
|
||||||
|
|
||||||
### NAP Consistency Score: X/10
|
### Business Identity
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| Korean Name | ... |
|
||||||
|
| English Name | ... |
|
||||||
|
| Brand Name | ... |
|
||||||
|
| Address | ... |
|
||||||
|
| Phone | ... |
|
||||||
|
|
||||||
|
### NAP Consistency: X/10
|
||||||
| Source | Name | Address | Phone | Status |
|
| Source | Name | Address | Phone | Status |
|
||||||
|--------|------|---------|-------|--------|
|
|--------|------|---------|-------|--------|
|
||||||
| Website | ✓ | ✓ | ✓ | Match |
|
| Website | OK/Issue | OK/Issue | OK/Issue | Match/Mismatch |
|
||||||
| GBP | ✓ | ✗ | ✓ | Mismatch |
|
| GBP | OK/Issue | OK/Issue | OK/Issue | Match/Mismatch |
|
||||||
|
| Naver Place | OK/Issue | OK/Issue | OK/Issue | Match/Mismatch |
|
||||||
|
| Kakao Map | OK/Issue | OK/Issue | OK/Issue | Match/Mismatch |
|
||||||
|
|
||||||
### GBP Optimization: X/10
|
### GBP Optimization: X/10
|
||||||
- [ ] Issue 1
|
- [x] Completed items
|
||||||
- [x] Completed item
|
- [ ] Missing items
|
||||||
|
**GBP URL**: [URL or "not discoverable"]
|
||||||
|
|
||||||
### Citation Audit
|
### Naver Smart Place: X/10
|
||||||
- Found: X citations
|
- [x] Completed items
|
||||||
- Consistent: X
|
- [ ] Missing items
|
||||||
- Needs update: X
|
**Naver Place URL**: [URL or "not discoverable"]
|
||||||
|
|
||||||
### Recommendations
|
### Kakao Map: X/10
|
||||||
1. Fix address mismatch on GBP
|
**Status**: Found/Not discoverable
|
||||||
2. Add LocalBusiness schema
|
**Kakao Map URL**: [URL or "not discoverable"]
|
||||||
|
|
||||||
|
### Citations: X/10
|
||||||
|
| Platform | Found | NAP Match |
|
||||||
|
|----------|-------|-----------|
|
||||||
|
| 강남언니 | Yes/No | OK/Issue |
|
||||||
|
| ... | | |
|
||||||
|
|
||||||
|
### LocalBusiness Schema: X/10
|
||||||
|
- Present: Yes/No
|
||||||
|
- Valid: Yes/No
|
||||||
|
- Missing fields: [list]
|
||||||
|
|
||||||
|
### Overall Score: XX/100 (Grade)
|
||||||
|
### Priority Actions
|
||||||
|
1. [Highest impact recommendation]
|
||||||
|
2. ...
|
||||||
```
|
```
|
||||||
|
|
||||||
## Common Issues
|
## Common Issues
|
||||||
|
|
||||||
| Issue | Impact | Fix |
|
| Issue | Impact | Fix |
|
||||||
|-------|--------|-----|
|
|-------|--------|-----|
|
||||||
| NAP inconsistency | High | Update all directories |
|
| NAP inconsistency | High | Update all directories to match canonical NAP |
|
||||||
| Missing GBP categories | Medium | Add relevant categories |
|
| Missing Naver Smart Place | Critical | Register and claim via smartplace.naver.com |
|
||||||
| No LocalBusiness schema | Medium | Add JSON-LD markup |
|
| Unclaimed Naver Place | High | Claim ownership via 네이버 스마트플레이스 |
|
||||||
| Outdated business hours | Medium | Update GBP hours |
|
| Missing GBP listing | Critical | Create via business.google.com |
|
||||||
| No review responses | Low | Respond to all reviews |
|
| Building name mismatch | Medium | Align to business registration certificate |
|
||||||
|
| No LocalBusiness schema | Medium | Add JSON-LD markup with sameAs links |
|
||||||
|
| Missing GeoCoordinates | Medium | Add lat/lng to schema |
|
||||||
|
| No sameAs in schema | Medium | Add GBP, Naver, Kakao, social URLs |
|
||||||
|
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
- GBP API requires enterprise approval (use manual audit)
|
- GBP and Naver Map are JS-rendered — WebFetch cannot extract listing data directly. Always use WebSearch for discovery.
|
||||||
- Citation discovery limited to public data
|
- "Not discoverable via web search" != "does not exist." Always use this precise language.
|
||||||
- Use schema generator skill (14) for creating LocalBusiness markup
|
- For Korean businesses, Naver Smart Place is as important as GBP (often more so for domestic traffic).
|
||||||
|
- Citation discovery is limited to publicly searchable data.
|
||||||
|
|
||||||
## Notion Output (Required)
|
## Notion Output (Required)
|
||||||
|
|
||||||
@@ -123,20 +267,13 @@ Required properties:
|
|||||||
|----------|------|-------------|
|
|----------|------|-------------|
|
||||||
| Issue | Title | Report title (Korean + date) |
|
| Issue | Title | Report title (Korean + date) |
|
||||||
| Site | URL | Audited website URL |
|
| Site | URL | Audited website URL |
|
||||||
| Category | Select | Technical SEO, On-page SEO, Performance, Schema/Structured Data, Sitemap, Robots.txt, Content, Local SEO |
|
| Category | Select | Local SEO |
|
||||||
| Priority | Select | Critical, High, Medium, Low |
|
| Priority | Select | Critical, High, Medium, Low |
|
||||||
| Found Date | Date | Audit date (YYYY-MM-DD) |
|
| Found Date | Date | Audit date (YYYY-MM-DD) |
|
||||||
| Audit ID | Rich Text | Format: [TYPE]-YYYYMMDD-NNN |
|
| Audit ID | Rich Text | Format: LOCAL-YYYYMMDD-NNN |
|
||||||
|
|
||||||
### Language Guidelines
|
### Language Guidelines
|
||||||
|
|
||||||
- Report content in Korean (한국어)
|
- Report content in Korean (한국어)
|
||||||
- Keep technical English terms as-is (e.g., SEO Audit, Core Web Vitals, Schema Markup)
|
- Keep technical English terms as-is (e.g., SEO Audit, GBP, NAP, Schema Markup)
|
||||||
- URLs and code remain unchanged
|
- URLs and code remain unchanged
|
||||||
|
|
||||||
### Example MCP Call
|
|
||||||
|
|
||||||
```bash
|
|
||||||
mcp-cli call notion/API-post-page '{"parent": {"database_id": "2c8581e5-8a1e-8035-880b-e38cefc2f3ef"}, "properties": {...}}'
|
|
||||||
```
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,125 +1,239 @@
|
|||||||
---
|
---
|
||||||
name: seo-local-audit
|
name: seo-local-audit
|
||||||
description: |
|
description: |
|
||||||
Local business SEO auditor for NAP consistency, Google Business Profile, and citations.
|
Local business SEO auditor for Korean-market businesses. Covers business identity extraction,
|
||||||
Triggers: local SEO, NAP audit, Google Business Profile, GBP optimization, local citations.
|
NAP consistency, Google Business Profile, Naver Smart Place, Kakao Map, local citations,
|
||||||
|
and LocalBusiness schema validation.
|
||||||
|
Triggers: local SEO, NAP audit, Google Business Profile, GBP optimization, local citations,
|
||||||
|
네이버 스마트플레이스, 카카오맵, 로컬 SEO.
|
||||||
---
|
---
|
||||||
|
|
||||||
# SEO Local Audit
|
# SEO Local Audit
|
||||||
|
|
||||||
## Purpose
|
## Purpose
|
||||||
|
|
||||||
Audit local business SEO: NAP (Name, Address, Phone) consistency, Google Business Profile optimization, local citations, and LocalBusiness schema markup.
|
Audit local business SEO for Korean-market businesses: business identity extraction, NAP consistency, GBP optimization, Naver Smart Place, Kakao Map, local citations, and LocalBusiness schema markup.
|
||||||
|
|
||||||
## Core Capabilities
|
## Core Capabilities
|
||||||
|
|
||||||
1. **NAP Consistency** - Cross-platform verification
|
1. **Business Identity** - Extract official names, address, phone from website schema/content
|
||||||
2. **GBP Optimization** - Profile completeness check
|
2. **NAP Consistency** - Cross-platform verification against canonical NAP
|
||||||
3. **Citation Audit** - Directory presence
|
3. **GBP Optimization** - Layered discovery + profile completeness audit
|
||||||
4. **Schema Validation** - LocalBusiness markup
|
4. **Naver Smart Place** - Layered discovery + listing completeness audit
|
||||||
|
5. **Kakao Map** - Presence verification + NAP check
|
||||||
|
6. **Citation Audit** - Korean-first directory presence
|
||||||
|
7. **Schema Validation** - LocalBusiness JSON-LD markup
|
||||||
|
|
||||||
## MCP Tool Usage
|
## MCP Tool Usage
|
||||||
|
|
||||||
```
|
```
|
||||||
mcp__firecrawl__scrape: Extract NAP from website
|
mcp__firecrawl__scrape: Extract NAP and schema from website
|
||||||
mcp__perplexity__search: Find citations and directories
|
mcp__perplexity__search: Find citations, GBP, Naver Place listings
|
||||||
mcp__notion__create-page: Save audit findings
|
mcp__notion__create-page: Save audit findings
|
||||||
```
|
```
|
||||||
|
|
||||||
## Workflow
|
## Workflow
|
||||||
|
|
||||||
### 1. Gather Business Info
|
### Step 0: Business Identity (MANDATORY FIRST STEP)
|
||||||
Collect from client:
|
|
||||||
- Business name (exact)
|
|
||||||
- Full address
|
|
||||||
- Phone number (local preferred)
|
|
||||||
- Website URL
|
|
||||||
- GBP listing URL
|
|
||||||
|
|
||||||
### 2. Website NAP Check
|
Before any audit, establish the official business identity.
|
||||||
Scrape website for NAP mentions:
|
|
||||||
- Header/footer
|
|
||||||
- Contact page
|
|
||||||
- About page
|
|
||||||
- Schema markup
|
|
||||||
|
|
||||||
### 3. Citation Discovery
|
**Sources (in priority order):**
|
||||||
Search for business mentions:
|
1. Website schema markup (JSON-LD `Organization`, `Hospital`, `LocalBusiness`) — `name` field is authoritative
|
||||||
- "[Business Name] [City]"
|
2. Contact page / About page
|
||||||
- Phone number search
|
3. Footer (address, phone, social links)
|
||||||
- Address search
|
4. User-provided information
|
||||||
|
|
||||||
### 4. GBP Review
|
**Data to collect:**
|
||||||
Manual checklist:
|
|
||||||
- Profile completeness
|
|
||||||
- Category accuracy
|
|
||||||
- Photo presence
|
|
||||||
- Review responses
|
|
||||||
- Post recency
|
|
||||||
|
|
||||||
### 5. Schema Check
|
| Field | Example |
|
||||||
Validate LocalBusiness markup presence and accuracy.
|
|-------|---------|
|
||||||
|
| Official name (Korean) | 제이미성형외과의원 |
|
||||||
|
| Official name (English) | Jamie Plastic Surgery Clinic |
|
||||||
|
| Brand/display name | Jamie Clinic |
|
||||||
|
| Website URL | https://www.jamie.clinic |
|
||||||
|
| Address (Korean) | 서울특별시 강남구 ... |
|
||||||
|
| Phone | 02-XXX-XXXX |
|
||||||
|
| Known GBP URL | (if available) |
|
||||||
|
| Known Naver Place URL | (if available) |
|
||||||
|
| Known Kakao Map URL | (if available) |
|
||||||
|
|
||||||
## GBP Optimization Checklist
|
Look for these URL patterns in `sameAs`, footer links, or embedded iframes:
|
||||||
|
- GBP: `maps.app.goo.gl/*`, `google.com/maps/place/*`, `g.page/*`
|
||||||
|
- Naver Place: `naver.me/*`, `map.naver.com/*/place/*`, `m.place.naver.com/*`
|
||||||
|
- Kakao Map: `place.map.kakao.com/*`, `kko.to/*`
|
||||||
|
|
||||||
- [ ] Business name matches website
|
### Step 1: Website NAP Extraction
|
||||||
- [ ] Complete address with suite/unit
|
|
||||||
- [ ] Local phone number (not toll-free)
|
Scrape header, footer, contact page, about page. Cross-reference with schema markup. Establish the **canonical NAP** baseline.
|
||||||
- [ ] Accurate business hours
|
|
||||||
- [ ] Primary + secondary categories set
|
### Step 2: GBP Verification & Audit
|
||||||
|
|
||||||
|
**Layered discovery (try in order, stop when found):**
|
||||||
|
1. Use provided GBP URL (from Step 0 or user input)
|
||||||
|
2. Check website for GBP link (footer, contact, schema `sameAs`, embedded Google Maps iframe)
|
||||||
|
3. Search: `"[Korean Name]" "[City/District]" Google Maps`
|
||||||
|
4. Search: `"[English Name]" Google Maps [City]`
|
||||||
|
5. Search: `"[exact phone number]" site:google.com/maps`
|
||||||
|
|
||||||
|
**Important**: Google Maps is JS-rendered — scraping tools cannot extract business data. Use search for discovery, verify via search result snippets.
|
||||||
|
|
||||||
|
**If found — audit checklist (score /10):**
|
||||||
|
- [ ] Business name matches canonical NAP
|
||||||
|
- [ ] Address is complete and accurate
|
||||||
|
- [ ] Phone number matches
|
||||||
|
- [ ] Business hours are current
|
||||||
|
- [ ] Primary + secondary categories appropriate
|
||||||
- [ ] Business description complete
|
- [ ] Business description complete
|
||||||
- [ ] 10+ photos uploaded
|
- [ ] 10+ photos uploaded
|
||||||
- [ ] Recent post (within 7 days)
|
- [ ] Posts are recent (within 7 days)
|
||||||
- [ ] Reviews responded to
|
- [ ] Reviews are responded to
|
||||||
|
- [ ] Q&A section is active
|
||||||
|
|
||||||
## Citation Priority
|
**If NOT found:** Report as **"not discoverable via web search"** (distinct from "does not exist").
|
||||||
|
|
||||||
| Platform | Priority |
|
### Step 3: Naver Smart Place Verification & Audit
|
||||||
|----------|----------|
|
|
||||||
| Google Business Profile | Critical |
|
**Layered discovery (try in order, stop when found):**
|
||||||
| Apple Maps | High |
|
1. Use provided Naver Place URL (from Step 0 or user input)
|
||||||
| Bing Places | High |
|
2. Check website for Naver Place link (footer, contact, schema `sameAs`)
|
||||||
| Yelp | High |
|
3. Search: `"[Korean Name]" site:map.naver.com`
|
||||||
| Facebook | Medium |
|
4. Search: `"[Korean Name]" 네이버 지도 [district]`
|
||||||
| Industry directories | Medium |
|
5. Search: `"[Korean Name]" 네이버 스마트플레이스`
|
||||||
|
6. Search: `"[exact phone number]" site:map.naver.com`
|
||||||
|
|
||||||
|
**Important**: Naver Map is JS-rendered — scraping tools cannot extract data. Use search for discovery, verify via snippets.
|
||||||
|
|
||||||
|
**If found — audit checklist (score /10):**
|
||||||
|
- [ ] Business name matches canonical NAP
|
||||||
|
- [ ] Address is complete and accurate
|
||||||
|
- [ ] Phone number matches
|
||||||
|
- [ ] Business hours are current
|
||||||
|
- [ ] Place is "claimed" (owner-managed / 업주 등록)
|
||||||
|
- [ ] Keywords/tags are set
|
||||||
|
- [ ] Booking/reservation link present
|
||||||
|
- [ ] Recent blog reviews linked
|
||||||
|
- [ ] Photos uploaded and current
|
||||||
|
- [ ] Menu/service/price information present
|
||||||
|
|
||||||
|
**If NOT found:** Report as **"not discoverable via web search"** (not "does not exist" or "not registered").
|
||||||
|
|
||||||
|
### Step 4: Kakao Map Verification
|
||||||
|
|
||||||
|
**Discovery:**
|
||||||
|
1. Use provided Kakao Map URL (from Step 0)
|
||||||
|
2. Check website for Kakao Map link (`place.map.kakao.com/*`, `kko.to/*`)
|
||||||
|
3. Search: `"[Korean Name]" site:place.map.kakao.com`
|
||||||
|
4. Search: `"[Korean Name]" 카카오맵 [district]`
|
||||||
|
|
||||||
|
**If found:** Verify NAP consistency against canonical NAP.
|
||||||
|
|
||||||
|
### Step 5: Citation Discovery
|
||||||
|
|
||||||
|
**Korean market platform priorities:**
|
||||||
|
|
||||||
|
| Platform | Priority | Market |
|
||||||
|
|----------|----------|--------|
|
||||||
|
| Google Business Profile | Critical | Global |
|
||||||
|
| Naver Smart Place (네이버 스마트플레이스) | Critical | Korea |
|
||||||
|
| Kakao Map (카카오맵) | High | Korea |
|
||||||
|
| Industry-specific directories | High | Varies |
|
||||||
|
| Apple Maps | Medium | Global |
|
||||||
|
| Bing Places | Low | Global |
|
||||||
|
|
||||||
|
**Korean medical/cosmetic industry directories:**
|
||||||
|
- 강남언니 (Gangnam Unni)
|
||||||
|
- 바비톡 (Babitalk)
|
||||||
|
- 성예사 (Sungyesa)
|
||||||
|
- 굿닥 (Goodoc)
|
||||||
|
- 똑닥 (Ddocdoc)
|
||||||
|
- 모두닥 (Modoodoc)
|
||||||
|
- 하이닥 (HiDoc)
|
||||||
|
|
||||||
|
### Step 6: NAP Consistency Report
|
||||||
|
|
||||||
|
Cross-reference all sources against canonical NAP.
|
||||||
|
|
||||||
|
**Common inconsistency points:**
|
||||||
|
- Building/landmark names — authoritative source is the **business registration certificate** (사업자등록증)
|
||||||
|
- Phone format variations (02-XXX-XXXX vs +82-2-XXX-XXXX)
|
||||||
|
- Address format (road-name vs lot-number / 도로명 vs 지번)
|
||||||
|
- Korean vs English name spelling variations
|
||||||
|
- Suite/floor number omissions
|
||||||
|
|
||||||
|
### Step 7: LocalBusiness Schema Validation
|
||||||
|
|
||||||
|
Validate JSON-LD completeness: @type, name, address, telephone, openingHours, geo (GeoCoordinates), sameAs (GBP, Naver, Kakao, social), url, image.
|
||||||
|
|
||||||
|
## Scoring
|
||||||
|
|
||||||
|
| Component | Weight | Max Score |
|
||||||
|
|-----------|--------|-----------|
|
||||||
|
| Business Identity completeness | 5% | /10 |
|
||||||
|
| NAP Consistency | 20% | /10 |
|
||||||
|
| GBP Optimization | 20% | /10 |
|
||||||
|
| Naver Smart Place | 20% | /10 |
|
||||||
|
| Kakao Map presence | 10% | /10 |
|
||||||
|
| Citations (directories) | 10% | /10 |
|
||||||
|
| LocalBusiness Schema | 15% | /10 |
|
||||||
|
|
||||||
|
**Overall Local SEO Score** = weighted average, normalized to /100.
|
||||||
|
|
||||||
## Output Format
|
## Output Format
|
||||||
|
|
||||||
```markdown
|
```markdown
|
||||||
## Local SEO Audit: [Business]
|
## Local SEO Audit: [Business]
|
||||||
|
|
||||||
|
### Business Identity
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| Korean Name | ... |
|
||||||
|
| English Name | ... |
|
||||||
|
| Address | ... |
|
||||||
|
| Phone | ... |
|
||||||
|
|
||||||
### NAP Consistency: X/10
|
### NAP Consistency: X/10
|
||||||
| Source | Name | Address | Phone |
|
| Source | Name | Address | Phone | Status |
|
||||||
|--------|------|---------|-------|
|
|--------|------|---------|-------|--------|
|
||||||
| Website | ✓/✗ | ✓/✗ | ✓/✗ |
|
| Website | OK/Issue | OK/Issue | OK/Issue | Match/Mismatch |
|
||||||
| GBP | ✓/✗ | ✓/✗ | ✓/✗ |
|
| GBP | OK/Issue | OK/Issue | OK/Issue | Match/Mismatch |
|
||||||
|
| Naver Place | OK/Issue | OK/Issue | OK/Issue | Match/Mismatch |
|
||||||
|
| Kakao Map | OK/Issue | OK/Issue | OK/Issue | Match/Mismatch |
|
||||||
|
|
||||||
### GBP Score: X/10
|
### GBP Score: X/10
|
||||||
[Checklist results]
|
[Checklist results]
|
||||||
|
|
||||||
### Citations Found: X
|
### Naver Smart Place: X/10
|
||||||
- Consistent: X
|
[Checklist results]
|
||||||
- Inconsistent: X
|
|
||||||
|
|
||||||
### LocalBusiness Schema
|
### Kakao Map: X/10
|
||||||
|
[Status + NAP check]
|
||||||
|
|
||||||
|
### Citations: X/10
|
||||||
|
| Platform | Found | NAP Match |
|
||||||
|
|----------|-------|-----------|
|
||||||
|
| ... | | |
|
||||||
|
|
||||||
|
### LocalBusiness Schema: X/10
|
||||||
- Present: Yes/No
|
- Present: Yes/No
|
||||||
- Valid: Yes/No
|
- Valid: Yes/No
|
||||||
|
- Missing fields: [list]
|
||||||
|
|
||||||
|
### Overall Score: XX/100 (Grade)
|
||||||
### Priority Actions
|
### Priority Actions
|
||||||
1. [Fix recommendations]
|
1. [Recommendations]
|
||||||
```
|
```
|
||||||
|
|
||||||
## Limitations
|
## Notes
|
||||||
|
|
||||||
- GBP data requires manual access
|
- GBP and Naver Map are JS-rendered — scraping tools cannot extract listing data. Always use search for discovery.
|
||||||
- Citation discovery limited to searchable sources
|
- "Not discoverable via web search" != "does not exist." Always use this precise language.
|
||||||
- Cannot update external directories
|
- For Korean businesses, Naver Smart Place is as important as GBP (often more so for domestic traffic).
|
||||||
|
|
||||||
## Notion Output (Required)
|
## Notion Output (Required)
|
||||||
|
|
||||||
All audit reports MUST be saved to OurDigital SEO Audit Log:
|
All audit reports MUST be saved to OurDigital SEO Audit Log:
|
||||||
- **Database ID**: `2c8581e5-8a1e-8035-880b-e38cefc2f3ef`
|
- **Database ID**: `2c8581e5-8a1e-8035-880b-e38cefc2f3ef`
|
||||||
- **Properties**: Issue (title), Site (url), Category, Priority, Found Date, Audit ID
|
- **Properties**: Issue (title), Site (url), Category (Local SEO), Priority, Found Date, Audit ID
|
||||||
- **Language**: Korean with English technical terms
|
- **Language**: Korean with English technical terms
|
||||||
- **Audit ID Format**: [TYPE]-YYYYMMDD-NNN
|
- **Audit ID Format**: LOCAL-YYYYMMDD-NNN
|
||||||
|
|
||||||
|
|||||||
135
custom-skills/19-seo-keyword-strategy/code/CLAUDE.md
Normal file
135
custom-skills/19-seo-keyword-strategy/code/CLAUDE.md
Normal file
@@ -0,0 +1,135 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Keyword strategy and research tool for SEO campaigns. Expands seed keywords via our-seo-agent CLI or pre-fetched data, classifies search intent, clusters topics, performs competitor keyword gap analysis, and supports Korean market keyword discovery including Naver autocomplete.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Install dependencies
|
||||||
|
pip install -r scripts/requirements.txt
|
||||||
|
|
||||||
|
# Keyword research from seed keyword
|
||||||
|
python scripts/keyword_researcher.py --keyword "치과 임플란트" --country kr --json
|
||||||
|
|
||||||
|
# Keyword gap analysis vs competitor
|
||||||
|
python scripts/keyword_gap_analyzer.py --target https://example.com --competitor https://competitor.com --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
| Script | Purpose | Key Output |
|
||||||
|
|--------|---------|------------|
|
||||||
|
| `keyword_researcher.py` | Expand seed keywords, classify intent, cluster topics | Keyword list with volume, KD, intent, clusters |
|
||||||
|
| `keyword_gap_analyzer.py` | Find competitor keyword gaps | Gap keywords with opportunity scores |
|
||||||
|
| `base_client.py` | Shared utilities | RateLimiter, ConfigManager, BaseAsyncClient |
|
||||||
|
|
||||||
|
## Keyword Researcher
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Basic expansion
|
||||||
|
python scripts/keyword_researcher.py --keyword "dental implant" --json
|
||||||
|
|
||||||
|
# Korean market with suffix expansion
|
||||||
|
python scripts/keyword_researcher.py --keyword "치과 임플란트" --country kr --korean-suffixes --json
|
||||||
|
|
||||||
|
# With volume-by-country comparison
|
||||||
|
python scripts/keyword_researcher.py --keyword "dental implant" --country kr --compare-global --json
|
||||||
|
|
||||||
|
# Output to file
|
||||||
|
python scripts/keyword_researcher.py --keyword "치과 임플란트" --country kr --output report.json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Seed keyword expansion (matching terms, related terms, search suggestions)
|
||||||
|
- Korean suffix expansion (추천, 가격, 후기, 잘하는곳, 부작용, 전후)
|
||||||
|
- Search intent classification (informational/navigational/commercial/transactional)
|
||||||
|
- Keyword clustering into topic groups
|
||||||
|
- Volume-by-country comparison (Korea vs global)
|
||||||
|
- Keyword difficulty scoring
|
||||||
|
|
||||||
|
## Keyword Gap Analyzer
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Find gaps vs one competitor
|
||||||
|
python scripts/keyword_gap_analyzer.py --target https://example.com --competitor https://competitor.com --json
|
||||||
|
|
||||||
|
# Multiple competitors
|
||||||
|
python scripts/keyword_gap_analyzer.py --target https://example.com --competitor https://comp1.com --competitor https://comp2.com --json
|
||||||
|
|
||||||
|
# Filter by minimum volume
|
||||||
|
python scripts/keyword_gap_analyzer.py --target https://example.com --competitor https://competitor.com --min-volume 100 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Identify keywords competitors rank for but target doesn't
|
||||||
|
- Opportunity scoring based on volume, KD, and competitor positions
|
||||||
|
- Segment gaps by intent type
|
||||||
|
- Prioritize low-KD high-volume opportunities
|
||||||
|
|
||||||
|
## Data Sources
|
||||||
|
|
||||||
|
| Source | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `our-seo-agent` CLI | Primary data source (future); use `--input` for pre-fetched JSON |
|
||||||
|
| WebSearch / WebFetch | Supplementary live data |
|
||||||
|
| Notion MCP | Save audit report to database |
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
All scripts support `--json` flag for structured output:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"seed_keyword": "치과 임플란트",
|
||||||
|
"country": "kr",
|
||||||
|
"total_keywords": 150,
|
||||||
|
"clusters": [
|
||||||
|
{
|
||||||
|
"topic": "임플란트 가격",
|
||||||
|
"keywords": [...],
|
||||||
|
"total_volume": 12000
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"keywords": [
|
||||||
|
{
|
||||||
|
"keyword": "치과 임플란트 가격",
|
||||||
|
"volume": 5400,
|
||||||
|
"kd": 32,
|
||||||
|
"cpc": 2.5,
|
||||||
|
"intent": "commercial",
|
||||||
|
"cluster": "임플란트 가격"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"timestamp": "2025-01-01T00:00:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
**IMPORTANT**: All audit reports MUST be saved to the OurDigital SEO Audit Log database.
|
||||||
|
|
||||||
|
### Database Configuration
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| Database ID | `2c8581e5-8a1e-8035-880b-e38cefc2f3ef` |
|
||||||
|
| URL | https://www.notion.so/dintelligence/2c8581e58a1e8035880be38cefc2f3ef |
|
||||||
|
|
||||||
|
### Required Properties
|
||||||
|
|
||||||
|
| Property | Type | Description |
|
||||||
|
|----------|------|-------------|
|
||||||
|
| Issue | Title | Report title (Korean + date) |
|
||||||
|
| Site | URL | Audited website URL |
|
||||||
|
| Category | Select | Keyword Research |
|
||||||
|
| Priority | Select | Based on opportunity score |
|
||||||
|
| Found Date | Date | Research date (YYYY-MM-DD) |
|
||||||
|
| Audit ID | Rich Text | Format: KW-YYYYMMDD-NNN |
|
||||||
|
|
||||||
|
### Language Guidelines
|
||||||
|
|
||||||
|
- Report content in Korean (한국어)
|
||||||
|
- Keep technical English terms as-is (e.g., Keyword Difficulty, Search Volume, CPC)
|
||||||
|
- URLs and code remain unchanged
|
||||||
@@ -0,0 +1,207 @@
|
|||||||
|
"""
|
||||||
|
Base Client - Shared async client utilities
|
||||||
|
===========================================
|
||||||
|
Purpose: Rate-limited async operations for API clients
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from asyncio import Semaphore
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from tenacity import (
|
||||||
|
retry,
|
||||||
|
stop_after_attempt,
|
||||||
|
wait_exponential,
|
||||||
|
retry_if_exception_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Logging setup
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimiter:
|
||||||
|
"""Rate limiter using token bucket algorithm."""
|
||||||
|
|
||||||
|
def __init__(self, rate: float, per: float = 1.0):
|
||||||
|
"""
|
||||||
|
Initialize rate limiter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rate: Number of requests allowed
|
||||||
|
per: Time period in seconds (default: 1 second)
|
||||||
|
"""
|
||||||
|
self.rate = rate
|
||||||
|
self.per = per
|
||||||
|
self.tokens = rate
|
||||||
|
self.last_update = datetime.now()
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def acquire(self) -> None:
|
||||||
|
"""Acquire a token, waiting if necessary."""
|
||||||
|
async with self._lock:
|
||||||
|
now = datetime.now()
|
||||||
|
elapsed = (now - self.last_update).total_seconds()
|
||||||
|
self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
|
||||||
|
self.last_update = now
|
||||||
|
|
||||||
|
if self.tokens < 1:
|
||||||
|
wait_time = (1 - self.tokens) * (self.per / self.rate)
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
self.tokens = 0
|
||||||
|
else:
|
||||||
|
self.tokens -= 1
|
||||||
|
|
||||||
|
|
||||||
|
class BaseAsyncClient:
|
||||||
|
"""Base class for async API clients with rate limiting."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_concurrent: int = 5,
|
||||||
|
requests_per_second: float = 3.0,
|
||||||
|
logger: logging.Logger | None = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize base client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
max_concurrent: Maximum concurrent requests
|
||||||
|
requests_per_second: Rate limit
|
||||||
|
logger: Logger instance
|
||||||
|
"""
|
||||||
|
self.semaphore = Semaphore(max_concurrent)
|
||||||
|
self.rate_limiter = RateLimiter(requests_per_second)
|
||||||
|
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
||||||
|
self.stats = {
|
||||||
|
"requests": 0,
|
||||||
|
"success": 0,
|
||||||
|
"errors": 0,
|
||||||
|
"retries": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=1, min=2, max=10),
|
||||||
|
retry=retry_if_exception_type(Exception),
|
||||||
|
)
|
||||||
|
async def _rate_limited_request(
|
||||||
|
self,
|
||||||
|
coro: Callable[[], Any],
|
||||||
|
) -> Any:
|
||||||
|
"""Execute a request with rate limiting and retry."""
|
||||||
|
async with self.semaphore:
|
||||||
|
await self.rate_limiter.acquire()
|
||||||
|
self.stats["requests"] += 1
|
||||||
|
try:
|
||||||
|
result = await coro()
|
||||||
|
self.stats["success"] += 1
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
self.stats["errors"] += 1
|
||||||
|
self.logger.error(f"Request failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def batch_requests(
|
||||||
|
self,
|
||||||
|
requests: list[Callable[[], Any]],
|
||||||
|
desc: str = "Processing",
|
||||||
|
) -> list[Any]:
|
||||||
|
"""Execute multiple requests concurrently."""
|
||||||
|
try:
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
|
has_tqdm = True
|
||||||
|
except ImportError:
|
||||||
|
has_tqdm = False
|
||||||
|
|
||||||
|
async def execute(req: Callable) -> Any:
|
||||||
|
try:
|
||||||
|
return await self._rate_limited_request(req)
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
tasks = [execute(req) for req in requests]
|
||||||
|
|
||||||
|
if has_tqdm:
|
||||||
|
results = []
|
||||||
|
for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
|
||||||
|
result = await coro
|
||||||
|
results.append(result)
|
||||||
|
return results
|
||||||
|
else:
|
||||||
|
return await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
def print_stats(self) -> None:
|
||||||
|
"""Print request statistics."""
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
self.logger.info("Request Statistics:")
|
||||||
|
self.logger.info(f" Total Requests: {self.stats['requests']}")
|
||||||
|
self.logger.info(f" Successful: {self.stats['success']}")
|
||||||
|
self.logger.info(f" Errors: {self.stats['errors']}")
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigManager:
|
||||||
|
"""Manage API configuration and credentials."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def google_credentials_path(self) -> str | None:
|
||||||
|
"""Get Google service account credentials path."""
|
||||||
|
# Prefer SEO-specific credentials, fallback to general credentials
|
||||||
|
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
|
||||||
|
if os.path.exists(seo_creds):
|
||||||
|
return seo_creds
|
||||||
|
return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pagespeed_api_key(self) -> str | None:
|
||||||
|
"""Get PageSpeed Insights API key."""
|
||||||
|
return os.getenv("PAGESPEED_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_api_key(self) -> str | None:
|
||||||
|
"""Get Custom Search API key."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_engine_id(self) -> str | None:
|
||||||
|
"""Get Custom Search Engine ID."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def notion_token(self) -> str | None:
|
||||||
|
"""Get Notion API token."""
|
||||||
|
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
|
||||||
|
|
||||||
|
def validate_google_credentials(self) -> bool:
|
||||||
|
"""Validate Google credentials are configured."""
|
||||||
|
creds_path = self.google_credentials_path
|
||||||
|
if not creds_path:
|
||||||
|
return False
|
||||||
|
return os.path.exists(creds_path)
|
||||||
|
|
||||||
|
def get_required(self, key: str) -> str:
|
||||||
|
"""Get required environment variable or raise error."""
|
||||||
|
value = os.getenv(key)
|
||||||
|
if not value:
|
||||||
|
raise ValueError(f"Missing required environment variable: {key}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton config instance
|
||||||
|
config = ConfigManager()
|
||||||
@@ -0,0 +1,584 @@
|
|||||||
|
"""
|
||||||
|
Keyword Gap Analyzer - Competitor keyword gap analysis with opportunity scoring
|
||||||
|
===============================================================================
|
||||||
|
Purpose: Identify keywords competitors rank for but target site doesn't,
|
||||||
|
score opportunities, and prioritize by volume/difficulty ratio.
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Logging
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("keyword_gap_analyzer")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Intent classification patterns (shared with keyword_researcher)
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
INTENT_PATTERNS: dict[str, list[str]] = {
|
||||||
|
"transactional": [
|
||||||
|
r"구매|구입|주문|buy|order|purchase|shop|deal|discount|coupon|할인|쿠폰",
|
||||||
|
r"예약|booking|reserve|sign\s?up|register|등록|신청",
|
||||||
|
],
|
||||||
|
"commercial": [
|
||||||
|
r"가격|비용|얼마|price|cost|pricing|fee|요금",
|
||||||
|
r"추천|best|top\s?\d|review|비교|compare|vs|versus|후기|리뷰|평점|평가",
|
||||||
|
r"잘하는곳|잘하는|맛집|업체|병원|추천\s?병원",
|
||||||
|
],
|
||||||
|
"navigational": [
|
||||||
|
r"^(www\.|http|\.com|\.co\.kr|\.net)",
|
||||||
|
r"공식|official|login|로그인|홈페이지|사이트|website",
|
||||||
|
r"고객센터|contact|support|customer\s?service",
|
||||||
|
],
|
||||||
|
"informational": [
|
||||||
|
r"방법|how\s?to|what\s?is|why|when|where|who|which",
|
||||||
|
r"뜻|의미|정의|definition|meaning|guide|tutorial",
|
||||||
|
r"효과|부작용|증상|원인|차이|종류|type|cause|symptom|effect",
|
||||||
|
r"전후|before\s?and\s?after|결과|result",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Dataclasses
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OrganicKeyword:
|
||||||
|
"""A keyword that a domain ranks for organically."""
|
||||||
|
|
||||||
|
keyword: str
|
||||||
|
position: int = 0
|
||||||
|
volume: int = 0
|
||||||
|
kd: float = 0.0
|
||||||
|
cpc: float = 0.0
|
||||||
|
url: str = ""
|
||||||
|
traffic: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GapKeyword:
|
||||||
|
"""A keyword gap between target and competitor(s)."""
|
||||||
|
|
||||||
|
keyword: str
|
||||||
|
volume: int = 0
|
||||||
|
kd: float = 0.0
|
||||||
|
cpc: float = 0.0
|
||||||
|
intent: str = "informational"
|
||||||
|
opportunity_score: float = 0.0
|
||||||
|
competitor_positions: dict[str, int] = field(default_factory=dict)
|
||||||
|
competitor_urls: dict[str, str] = field(default_factory=dict)
|
||||||
|
avg_competitor_position: float = 0.0
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GapAnalysisResult:
|
||||||
|
"""Complete gap analysis result."""
|
||||||
|
|
||||||
|
target: str
|
||||||
|
competitors: list[str] = field(default_factory=list)
|
||||||
|
country: str = "kr"
|
||||||
|
total_gaps: int = 0
|
||||||
|
total_opportunity_volume: int = 0
|
||||||
|
gaps_by_intent: dict[str, int] = field(default_factory=dict)
|
||||||
|
top_opportunities: list[GapKeyword] = field(default_factory=list)
|
||||||
|
all_gaps: list[GapKeyword] = field(default_factory=list)
|
||||||
|
target_keyword_count: int = 0
|
||||||
|
competitor_keyword_counts: dict[str, int] = field(default_factory=dict)
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"target": self.target,
|
||||||
|
"competitors": self.competitors,
|
||||||
|
"country": self.country,
|
||||||
|
"total_gaps": self.total_gaps,
|
||||||
|
"total_opportunity_volume": self.total_opportunity_volume,
|
||||||
|
"gaps_by_intent": self.gaps_by_intent,
|
||||||
|
"top_opportunities": [g.to_dict() for g in self.top_opportunities],
|
||||||
|
"all_gaps": [g.to_dict() for g in self.all_gaps],
|
||||||
|
"target_keyword_count": self.target_keyword_count,
|
||||||
|
"competitor_keyword_counts": self.competitor_keyword_counts,
|
||||||
|
"timestamp": self.timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# MCP Helper
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def call_mcp_tool(tool_name: str, params: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Call an Ahrefs MCP tool and return parsed JSON response.
|
||||||
|
|
||||||
|
In production this delegates to the MCP bridge. For standalone usage
|
||||||
|
it invokes the Claude CLI with the appropriate tool call.
|
||||||
|
"""
|
||||||
|
logger.info(f"Calling MCP tool: {tool_name} with params: {json.dumps(params, ensure_ascii=False)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
cmd = [
|
||||||
|
"claude",
|
||||||
|
"--print",
|
||||||
|
"--output-format", "json",
|
||||||
|
"-p",
|
||||||
|
(
|
||||||
|
f"Call the tool mcp__claude_ai_Ahrefs__{tool_name} with these parameters: "
|
||||||
|
f"{json.dumps(params, ensure_ascii=False)}. Return ONLY the raw JSON result."
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
logger.warning(f"MCP tool {tool_name} returned non-zero exit code: {result.returncode}")
|
||||||
|
logger.debug(f"stderr: {result.stderr}")
|
||||||
|
return {"error": result.stderr, "keywords": [], "items": []}
|
||||||
|
|
||||||
|
try:
|
||||||
|
return json.loads(result.stdout)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {"raw": result.stdout, "keywords": [], "items": []}
|
||||||
|
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.error(f"MCP tool {tool_name} timed out")
|
||||||
|
return {"error": "timeout", "keywords": [], "items": []}
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.warning("Claude CLI not found - returning empty result for standalone testing")
|
||||||
|
return {"keywords": [], "items": []}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Utility functions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def extract_domain(url: str) -> str:
|
||||||
|
"""Extract clean domain from URL."""
|
||||||
|
if not url.startswith(("http://", "https://")):
|
||||||
|
url = f"https://{url}"
|
||||||
|
parsed = urlparse(url)
|
||||||
|
domain = parsed.netloc or parsed.path
|
||||||
|
domain = domain.lower().strip("/")
|
||||||
|
if domain.startswith("www."):
|
||||||
|
domain = domain[4:]
|
||||||
|
return domain
|
||||||
|
|
||||||
|
|
||||||
|
def classify_intent(keyword: str) -> str:
|
||||||
|
"""Classify search intent based on keyword patterns."""
|
||||||
|
keyword_lower = keyword.lower().strip()
|
||||||
|
for intent, patterns in INTENT_PATTERNS.items():
|
||||||
|
for pattern in patterns:
|
||||||
|
if re.search(pattern, keyword_lower, re.IGNORECASE):
|
||||||
|
return intent
|
||||||
|
return "informational"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# KeywordGapAnalyzer
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class KeywordGapAnalyzer:
|
||||||
|
"""Analyze keyword gaps between a target site and its competitors."""
|
||||||
|
|
||||||
|
def __init__(self, country: str = "kr", min_volume: int = 0):
|
||||||
|
self.country = country
|
||||||
|
self.min_volume = min_volume
|
||||||
|
|
||||||
|
def get_organic_keywords(self, domain: str, limit: int = 1000) -> list[OrganicKeyword]:
|
||||||
|
"""
|
||||||
|
Fetch organic keywords for a domain via Ahrefs site-explorer-organic-keywords.
|
||||||
|
Returns a list of OrganicKeyword entries.
|
||||||
|
"""
|
||||||
|
clean_domain = extract_domain(domain)
|
||||||
|
logger.info(f"Fetching organic keywords for: {clean_domain} (limit={limit})")
|
||||||
|
|
||||||
|
result = call_mcp_tool("site-explorer-organic-keywords", {
|
||||||
|
"target": clean_domain,
|
||||||
|
"country": self.country,
|
||||||
|
"limit": limit,
|
||||||
|
"mode": "domain",
|
||||||
|
})
|
||||||
|
|
||||||
|
keywords: list[OrganicKeyword] = []
|
||||||
|
for item in result.get("keywords", result.get("items", [])):
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
continue
|
||||||
|
kw = OrganicKeyword(
|
||||||
|
keyword=item.get("keyword", item.get("term", "")),
|
||||||
|
position=int(item.get("position", item.get("rank", 0)) or 0),
|
||||||
|
volume=int(item.get("volume", item.get("search_volume", 0)) or 0),
|
||||||
|
kd=float(item.get("keyword_difficulty", item.get("kd", 0)) or 0),
|
||||||
|
cpc=float(item.get("cpc", item.get("cost_per_click", 0)) or 0),
|
||||||
|
url=item.get("url", item.get("best_position_url", "")),
|
||||||
|
traffic=int(item.get("traffic", item.get("estimated_traffic", 0)) or 0),
|
||||||
|
)
|
||||||
|
if kw.keyword:
|
||||||
|
keywords.append(kw)
|
||||||
|
|
||||||
|
logger.info(f"Found {len(keywords)} organic keywords for {clean_domain}")
|
||||||
|
return keywords
|
||||||
|
|
||||||
|
def find_gaps(
|
||||||
|
self,
|
||||||
|
target_keywords: list[OrganicKeyword],
|
||||||
|
competitor_keyword_sets: dict[str, list[OrganicKeyword]],
|
||||||
|
) -> list[GapKeyword]:
|
||||||
|
"""
|
||||||
|
Identify keywords that competitors rank for but the target doesn't.
|
||||||
|
|
||||||
|
A gap keyword is one that appears in at least one competitor's keyword
|
||||||
|
set but not in the target's keyword set.
|
||||||
|
"""
|
||||||
|
# Build target keyword set for fast lookup
|
||||||
|
target_kw_set: set[str] = {kw.keyword.lower().strip() for kw in target_keywords}
|
||||||
|
|
||||||
|
# Collect all competitor keywords with their positions
|
||||||
|
gap_map: dict[str, GapKeyword] = {}
|
||||||
|
|
||||||
|
for comp_domain, comp_keywords in competitor_keyword_sets.items():
|
||||||
|
for ckw in comp_keywords:
|
||||||
|
kw_lower = ckw.keyword.lower().strip()
|
||||||
|
|
||||||
|
# Skip if target already ranks for this keyword
|
||||||
|
if kw_lower in target_kw_set:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip below minimum volume
|
||||||
|
if ckw.volume < self.min_volume:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if kw_lower not in gap_map:
|
||||||
|
gap_map[kw_lower] = GapKeyword(
|
||||||
|
keyword=ckw.keyword,
|
||||||
|
volume=ckw.volume,
|
||||||
|
kd=ckw.kd,
|
||||||
|
cpc=ckw.cpc,
|
||||||
|
intent=classify_intent(ckw.keyword),
|
||||||
|
competitor_positions={},
|
||||||
|
competitor_urls={},
|
||||||
|
)
|
||||||
|
|
||||||
|
gap_map[kw_lower].competitor_positions[comp_domain] = ckw.position
|
||||||
|
gap_map[kw_lower].competitor_urls[comp_domain] = ckw.url
|
||||||
|
|
||||||
|
# Update volume/kd if higher from another competitor
|
||||||
|
if ckw.volume > gap_map[kw_lower].volume:
|
||||||
|
gap_map[kw_lower].volume = ckw.volume
|
||||||
|
if ckw.kd > 0 and (gap_map[kw_lower].kd == 0 or ckw.kd < gap_map[kw_lower].kd):
|
||||||
|
gap_map[kw_lower].kd = ckw.kd
|
||||||
|
|
||||||
|
gaps = list(gap_map.values())
|
||||||
|
|
||||||
|
# Calculate average competitor position for each gap
|
||||||
|
for gap in gaps:
|
||||||
|
positions = list(gap.competitor_positions.values())
|
||||||
|
gap.avg_competitor_position = round(
|
||||||
|
sum(positions) / len(positions), 1
|
||||||
|
) if positions else 0.0
|
||||||
|
|
||||||
|
logger.info(f"Found {len(gaps)} keyword gaps")
|
||||||
|
return gaps
|
||||||
|
|
||||||
|
def score_opportunities(self, gaps: list[GapKeyword]) -> list[GapKeyword]:
|
||||||
|
"""
|
||||||
|
Score each gap keyword by opportunity potential.
|
||||||
|
|
||||||
|
Formula:
|
||||||
|
opportunity_score = (volume_score * 0.4) + (kd_score * 0.3) +
|
||||||
|
(position_score * 0.2) + (intent_score * 0.1)
|
||||||
|
|
||||||
|
Where:
|
||||||
|
- volume_score: normalized 0-100 based on max volume in set
|
||||||
|
- kd_score: inverted (lower KD = higher score), normalized 0-100
|
||||||
|
- position_score: based on avg competitor position (lower = easier to compete)
|
||||||
|
- intent_score: commercial/transactional get higher scores
|
||||||
|
"""
|
||||||
|
if not gaps:
|
||||||
|
return gaps
|
||||||
|
|
||||||
|
# Find max volume for normalization
|
||||||
|
max_volume = max(g.volume for g in gaps) if gaps else 1
|
||||||
|
max_volume = max(max_volume, 1)
|
||||||
|
|
||||||
|
intent_scores = {
|
||||||
|
"transactional": 100,
|
||||||
|
"commercial": 80,
|
||||||
|
"informational": 40,
|
||||||
|
"navigational": 20,
|
||||||
|
}
|
||||||
|
|
||||||
|
for gap in gaps:
|
||||||
|
# Volume score (0-100)
|
||||||
|
volume_score = (gap.volume / max_volume) * 100
|
||||||
|
|
||||||
|
# KD score (inverted: low KD = high score)
|
||||||
|
kd_score = max(0, 100 - gap.kd)
|
||||||
|
|
||||||
|
# Position score (competitors ranking 1-10 means realistic opportunity)
|
||||||
|
if gap.avg_competitor_position <= 10:
|
||||||
|
position_score = 90
|
||||||
|
elif gap.avg_competitor_position <= 20:
|
||||||
|
position_score = 70
|
||||||
|
elif gap.avg_competitor_position <= 50:
|
||||||
|
position_score = 50
|
||||||
|
else:
|
||||||
|
position_score = 30
|
||||||
|
|
||||||
|
# Intent score
|
||||||
|
intent_score = intent_scores.get(gap.intent, 40)
|
||||||
|
|
||||||
|
# Combined score
|
||||||
|
gap.opportunity_score = round(
|
||||||
|
(volume_score * 0.4) +
|
||||||
|
(kd_score * 0.3) +
|
||||||
|
(position_score * 0.2) +
|
||||||
|
(intent_score * 0.1),
|
||||||
|
1,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Sort by opportunity score descending
|
||||||
|
gaps.sort(key=lambda g: g.opportunity_score, reverse=True)
|
||||||
|
|
||||||
|
logger.info(f"Scored {len(gaps)} gap keywords by opportunity")
|
||||||
|
return gaps
|
||||||
|
|
||||||
|
def analyze(self, target_url: str, competitor_urls: list[str]) -> GapAnalysisResult:
|
||||||
|
"""
|
||||||
|
Orchestrate full keyword gap analysis:
|
||||||
|
1. Fetch organic keywords for target
|
||||||
|
2. Fetch organic keywords for each competitor
|
||||||
|
3. Identify gaps
|
||||||
|
4. Score opportunities
|
||||||
|
5. Compile results
|
||||||
|
"""
|
||||||
|
target_domain = extract_domain(target_url)
|
||||||
|
competitor_domains = [extract_domain(url) for url in competitor_urls]
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Starting gap analysis: {target_domain} vs {', '.join(competitor_domains)}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 1: Fetch target keywords
|
||||||
|
target_keywords = self.get_organic_keywords(target_domain)
|
||||||
|
|
||||||
|
# Step 2: Fetch competitor keywords
|
||||||
|
competitor_keyword_sets: dict[str, list[OrganicKeyword]] = {}
|
||||||
|
competitor_keyword_counts: dict[str, int] = {}
|
||||||
|
|
||||||
|
for comp_domain in competitor_domains:
|
||||||
|
comp_keywords = self.get_organic_keywords(comp_domain)
|
||||||
|
competitor_keyword_sets[comp_domain] = comp_keywords
|
||||||
|
competitor_keyword_counts[comp_domain] = len(comp_keywords)
|
||||||
|
|
||||||
|
# Step 3: Find gaps
|
||||||
|
gaps = self.find_gaps(target_keywords, competitor_keyword_sets)
|
||||||
|
|
||||||
|
# Step 4: Score opportunities
|
||||||
|
scored_gaps = self.score_opportunities(gaps)
|
||||||
|
|
||||||
|
# Step 5: Calculate intent distribution
|
||||||
|
gaps_by_intent: dict[str, int] = {}
|
||||||
|
for gap in scored_gaps:
|
||||||
|
gaps_by_intent[gap.intent] = gaps_by_intent.get(gap.intent, 0) + 1
|
||||||
|
|
||||||
|
# Step 6: Compile result
|
||||||
|
result = GapAnalysisResult(
|
||||||
|
target=target_domain,
|
||||||
|
competitors=competitor_domains,
|
||||||
|
country=self.country,
|
||||||
|
total_gaps=len(scored_gaps),
|
||||||
|
total_opportunity_volume=sum(g.volume for g in scored_gaps),
|
||||||
|
gaps_by_intent=gaps_by_intent,
|
||||||
|
top_opportunities=scored_gaps[:50],
|
||||||
|
all_gaps=scored_gaps,
|
||||||
|
target_keyword_count=len(target_keywords),
|
||||||
|
competitor_keyword_counts=competitor_keyword_counts,
|
||||||
|
timestamp=datetime.now().isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Gap analysis complete: {result.total_gaps} gaps found, "
|
||||||
|
f"total opportunity volume {result.total_opportunity_volume:,}"
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Plain-text report formatter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def format_text_report(result: GapAnalysisResult) -> str:
|
||||||
|
"""Format gap analysis result as a human-readable text report."""
|
||||||
|
lines: list[str] = []
|
||||||
|
lines.append("=" * 75)
|
||||||
|
lines.append(f"Keyword Gap Analysis Report")
|
||||||
|
lines.append(f"Target: {result.target}")
|
||||||
|
lines.append(f"Competitors: {', '.join(result.competitors)}")
|
||||||
|
lines.append(f"Country: {result.country.upper()} | Date: {result.timestamp[:10]}")
|
||||||
|
lines.append("=" * 75)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Overview
|
||||||
|
lines.append("## Overview")
|
||||||
|
lines.append(f" Target keywords: {result.target_keyword_count:,}")
|
||||||
|
for comp, count in result.competitor_keyword_counts.items():
|
||||||
|
lines.append(f" {comp} keywords: {count:,}")
|
||||||
|
lines.append(f" Keyword gaps found: {result.total_gaps:,}")
|
||||||
|
lines.append(f" Total opportunity volume: {result.total_opportunity_volume:,}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Intent distribution
|
||||||
|
if result.gaps_by_intent:
|
||||||
|
lines.append("## Gaps by Intent")
|
||||||
|
for intent, count in sorted(result.gaps_by_intent.items(), key=lambda x: x[1], reverse=True):
|
||||||
|
pct = (count / result.total_gaps) * 100 if result.total_gaps else 0
|
||||||
|
lines.append(f" {intent:<15}: {count:>5} ({pct:.1f}%)")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Top opportunities
|
||||||
|
if result.top_opportunities:
|
||||||
|
lines.append("## Top Opportunities (by score)")
|
||||||
|
header = f" {'Keyword':<35} {'Vol':>8} {'KD':>6} {'Score':>7} {'Intent':<15} {'Competitors'}"
|
||||||
|
lines.append(header)
|
||||||
|
lines.append(" " + "-" * 90)
|
||||||
|
|
||||||
|
for gap in result.top_opportunities[:30]:
|
||||||
|
kw_display = gap.keyword[:33] if len(gap.keyword) > 33 else gap.keyword
|
||||||
|
comp_positions = ", ".join(
|
||||||
|
f"{d}:#{p}" for d, p in gap.competitor_positions.items()
|
||||||
|
)
|
||||||
|
comp_display = comp_positions[:30] if len(comp_positions) > 30 else comp_positions
|
||||||
|
|
||||||
|
lines.append(
|
||||||
|
f" {kw_display:<35} {gap.volume:>8,} {gap.kd:>6.1f} "
|
||||||
|
f"{gap.opportunity_score:>7.1f} {gap.intent:<15} {comp_display}"
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Quick wins (low KD, high volume)
|
||||||
|
quick_wins = [g for g in result.all_gaps if g.kd <= 30 and g.volume >= 100]
|
||||||
|
quick_wins.sort(key=lambda g: g.volume, reverse=True)
|
||||||
|
if quick_wins:
|
||||||
|
lines.append("## Quick Wins (KD <= 30, Volume >= 100)")
|
||||||
|
lines.append(f" {'Keyword':<35} {'Vol':>8} {'KD':>6} {'Intent':<15}")
|
||||||
|
lines.append(" " + "-" * 64)
|
||||||
|
for gap in quick_wins[:20]:
|
||||||
|
kw_display = gap.keyword[:33] if len(gap.keyword) > 33 else gap.keyword
|
||||||
|
lines.append(
|
||||||
|
f" {kw_display:<35} {gap.volume:>8,} {gap.kd:>6.1f} {gap.intent:<15}"
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Keyword Gap Analyzer - Find competitor keyword opportunities",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
python keyword_gap_analyzer.py --target https://example.com --competitor https://comp.com --json
|
||||||
|
python keyword_gap_analyzer.py --target example.com --competitor comp1.com --competitor comp2.com --min-volume 100 --json
|
||||||
|
python keyword_gap_analyzer.py --target example.com --competitor comp.com --country us --output gaps.json
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--target",
|
||||||
|
required=True,
|
||||||
|
help="Target website URL or domain",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--competitor",
|
||||||
|
action="append",
|
||||||
|
required=True,
|
||||||
|
dest="competitors",
|
||||||
|
help="Competitor URL or domain (can be repeated)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--country",
|
||||||
|
default="kr",
|
||||||
|
help="Target country code (default: kr)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--min-volume",
|
||||||
|
type=int,
|
||||||
|
default=0,
|
||||||
|
help="Minimum search volume filter (default: 0)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json",
|
||||||
|
action="store_true",
|
||||||
|
dest="output_json",
|
||||||
|
help="Output results as JSON",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Write output to file (path)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--verbose",
|
||||||
|
action="store_true",
|
||||||
|
help="Enable verbose/debug logging",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.verbose:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
# Run analysis
|
||||||
|
analyzer = KeywordGapAnalyzer(
|
||||||
|
country=args.country,
|
||||||
|
min_volume=args.min_volume,
|
||||||
|
)
|
||||||
|
result = analyzer.analyze(args.target, args.competitors)
|
||||||
|
|
||||||
|
# Format output
|
||||||
|
if args.output_json:
|
||||||
|
output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
|
||||||
|
else:
|
||||||
|
output = format_text_report(result)
|
||||||
|
|
||||||
|
# Write or print
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(output)
|
||||||
|
logger.info(f"Output written to: {args.output}")
|
||||||
|
else:
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
@@ -0,0 +1,656 @@
|
|||||||
|
"""
|
||||||
|
Keyword Researcher - Seed keyword expansion, intent classification, and topic clustering
|
||||||
|
========================================================================================
|
||||||
|
Purpose: Expand seed keywords via Ahrefs APIs, classify search intent,
|
||||||
|
cluster topics, and support Korean market keyword discovery.
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Logging
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger("keyword_researcher")
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Constants - Korean suffix expansion
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
KOREAN_SUFFIXES: list[str] = [
|
||||||
|
"추천",
|
||||||
|
"가격",
|
||||||
|
"후기",
|
||||||
|
"잘하는곳",
|
||||||
|
"부작용",
|
||||||
|
"전후",
|
||||||
|
"비용",
|
||||||
|
"추천 병원",
|
||||||
|
"후기 블로그",
|
||||||
|
"방법",
|
||||||
|
"종류",
|
||||||
|
"비교",
|
||||||
|
"효과",
|
||||||
|
"주의사항",
|
||||||
|
"장단점",
|
||||||
|
]
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Intent classification patterns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
INTENT_PATTERNS: dict[str, list[str]] = {
|
||||||
|
"transactional": [
|
||||||
|
r"구매|구입|주문|buy|order|purchase|shop|deal|discount|coupon|할인|쿠폰",
|
||||||
|
r"예약|booking|reserve|sign\s?up|register|등록|신청",
|
||||||
|
],
|
||||||
|
"commercial": [
|
||||||
|
r"가격|비용|얼마|price|cost|pricing|fee|요금",
|
||||||
|
r"추천|best|top\s?\d|review|비교|compare|vs|versus|후기|리뷰|평점|평가",
|
||||||
|
r"잘하는곳|잘하는|맛집|업체|병원|추천\s?병원",
|
||||||
|
],
|
||||||
|
"navigational": [
|
||||||
|
r"^(www\.|http|\.com|\.co\.kr|\.net)",
|
||||||
|
r"공식|official|login|로그인|홈페이지|사이트|website",
|
||||||
|
r"고객센터|contact|support|customer\s?service",
|
||||||
|
],
|
||||||
|
"informational": [
|
||||||
|
r"방법|how\s?to|what\s?is|why|when|where|who|which",
|
||||||
|
r"뜻|의미|정의|definition|meaning|guide|tutorial",
|
||||||
|
r"효과|부작용|증상|원인|차이|종류|type|cause|symptom|effect",
|
||||||
|
r"전후|before\s?and\s?after|결과|result",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Dataclasses
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class KeywordEntry:
|
||||||
|
"""Single keyword with its metrics and classification."""
|
||||||
|
|
||||||
|
keyword: str
|
||||||
|
volume: int = 0
|
||||||
|
kd: float = 0.0
|
||||||
|
cpc: float = 0.0
|
||||||
|
intent: str = "informational"
|
||||||
|
cluster: str = ""
|
||||||
|
source: str = ""
|
||||||
|
country_volumes: dict[str, int] = field(default_factory=dict)
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
data = asdict(self)
|
||||||
|
if not data["country_volumes"]:
|
||||||
|
del data["country_volumes"]
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class KeywordCluster:
|
||||||
|
"""Group of semantically related keywords."""
|
||||||
|
|
||||||
|
topic: str
|
||||||
|
keywords: list[str] = field(default_factory=list)
|
||||||
|
total_volume: int = 0
|
||||||
|
avg_kd: float = 0.0
|
||||||
|
primary_intent: str = "informational"
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return asdict(self)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResearchResult:
|
||||||
|
"""Full research result container."""
|
||||||
|
|
||||||
|
seed_keyword: str
|
||||||
|
country: str
|
||||||
|
total_keywords: int = 0
|
||||||
|
total_volume: int = 0
|
||||||
|
clusters: list[KeywordCluster] = field(default_factory=list)
|
||||||
|
keywords: list[KeywordEntry] = field(default_factory=list)
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
return {
|
||||||
|
"seed_keyword": self.seed_keyword,
|
||||||
|
"country": self.country,
|
||||||
|
"total_keywords": self.total_keywords,
|
||||||
|
"total_volume": self.total_volume,
|
||||||
|
"clusters": [c.to_dict() for c in self.clusters],
|
||||||
|
"keywords": [k.to_dict() for k in self.keywords],
|
||||||
|
"timestamp": self.timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# MCP Helper - calls Ahrefs MCP tools via subprocess
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def call_mcp_tool(tool_name: str, params: dict) -> dict:
|
||||||
|
"""
|
||||||
|
Call an Ahrefs MCP tool and return parsed JSON response.
|
||||||
|
|
||||||
|
In production this delegates to the MCP bridge. For standalone usage
|
||||||
|
it invokes the Claude CLI with the appropriate tool call.
|
||||||
|
"""
|
||||||
|
logger.info(f"Calling MCP tool: {tool_name} with params: {json.dumps(params, ensure_ascii=False)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
cmd = [
|
||||||
|
"claude",
|
||||||
|
"--print",
|
||||||
|
"--output-format", "json",
|
||||||
|
"-p",
|
||||||
|
f"Call the tool mcp__claude_ai_Ahrefs__{tool_name} with these parameters: {json.dumps(params, ensure_ascii=False)}. Return ONLY the raw JSON result.",
|
||||||
|
]
|
||||||
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
||||||
|
|
||||||
|
if result.returncode != 0:
|
||||||
|
logger.warning(f"MCP tool {tool_name} returned non-zero exit code: {result.returncode}")
|
||||||
|
logger.debug(f"stderr: {result.stderr}")
|
||||||
|
return {"error": result.stderr, "keywords": [], "items": []}
|
||||||
|
|
||||||
|
try:
|
||||||
|
return json.loads(result.stdout)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
return {"raw": result.stdout, "keywords": [], "items": []}
|
||||||
|
|
||||||
|
except subprocess.TimeoutExpired:
|
||||||
|
logger.error(f"MCP tool {tool_name} timed out")
|
||||||
|
return {"error": "timeout", "keywords": [], "items": []}
|
||||||
|
except FileNotFoundError:
|
||||||
|
logger.warning("Claude CLI not found - returning empty result for standalone testing")
|
||||||
|
return {"keywords": [], "items": []}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# KeywordResearcher
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class KeywordResearcher:
|
||||||
|
"""Expand seed keywords, classify intent, and cluster topics."""
|
||||||
|
|
||||||
|
def __init__(self, country: str = "kr", korean_suffixes: bool = False, compare_global: bool = False):
|
||||||
|
self.country = country
|
||||||
|
self.korean_suffixes = korean_suffixes
|
||||||
|
self.compare_global = compare_global
|
||||||
|
self._seen: set[str] = set()
|
||||||
|
|
||||||
|
# ---- Keyword expansion via Ahrefs MCP ----
|
||||||
|
|
||||||
|
def expand_keywords(self, seed: str) -> list[KeywordEntry]:
|
||||||
|
"""
|
||||||
|
Expand a seed keyword using Ahrefs matching-terms, related-terms,
|
||||||
|
and search-suggestions endpoints.
|
||||||
|
"""
|
||||||
|
all_keywords: list[KeywordEntry] = []
|
||||||
|
|
||||||
|
# 1. Matching terms
|
||||||
|
logger.info(f"Fetching matching terms for: {seed}")
|
||||||
|
matching = call_mcp_tool("keywords-explorer-matching-terms", {
|
||||||
|
"keyword": seed,
|
||||||
|
"country": self.country,
|
||||||
|
"limit": 100,
|
||||||
|
})
|
||||||
|
for item in matching.get("keywords", matching.get("items", [])):
|
||||||
|
kw = self._parse_keyword_item(item, source="matching-terms")
|
||||||
|
if kw and kw.keyword not in self._seen:
|
||||||
|
self._seen.add(kw.keyword)
|
||||||
|
all_keywords.append(kw)
|
||||||
|
|
||||||
|
# 2. Related terms
|
||||||
|
logger.info(f"Fetching related terms for: {seed}")
|
||||||
|
related = call_mcp_tool("keywords-explorer-related-terms", {
|
||||||
|
"keyword": seed,
|
||||||
|
"country": self.country,
|
||||||
|
"limit": 100,
|
||||||
|
})
|
||||||
|
for item in related.get("keywords", related.get("items", [])):
|
||||||
|
kw = self._parse_keyword_item(item, source="related-terms")
|
||||||
|
if kw and kw.keyword not in self._seen:
|
||||||
|
self._seen.add(kw.keyword)
|
||||||
|
all_keywords.append(kw)
|
||||||
|
|
||||||
|
# 3. Search suggestions
|
||||||
|
logger.info(f"Fetching search suggestions for: {seed}")
|
||||||
|
suggestions = call_mcp_tool("keywords-explorer-search-suggestions", {
|
||||||
|
"keyword": seed,
|
||||||
|
"country": self.country,
|
||||||
|
"limit": 50,
|
||||||
|
})
|
||||||
|
for item in suggestions.get("keywords", suggestions.get("items", [])):
|
||||||
|
kw = self._parse_keyword_item(item, source="search-suggestions")
|
||||||
|
if kw and kw.keyword not in self._seen:
|
||||||
|
self._seen.add(kw.keyword)
|
||||||
|
all_keywords.append(kw)
|
||||||
|
|
||||||
|
# 4. Add the seed itself if not already present
|
||||||
|
if seed not in self._seen:
|
||||||
|
self._seen.add(seed)
|
||||||
|
overview = call_mcp_tool("keywords-explorer-overview", {
|
||||||
|
"keyword": seed,
|
||||||
|
"country": self.country,
|
||||||
|
})
|
||||||
|
seed_entry = self._parse_keyword_item(overview, source="seed")
|
||||||
|
if seed_entry:
|
||||||
|
seed_entry.keyword = seed
|
||||||
|
all_keywords.insert(0, seed_entry)
|
||||||
|
|
||||||
|
logger.info(f"Expanded to {len(all_keywords)} keywords from Ahrefs APIs")
|
||||||
|
return all_keywords
|
||||||
|
|
||||||
|
def expand_korean_suffixes(self, seed: str) -> list[KeywordEntry]:
|
||||||
|
"""
|
||||||
|
Generate keyword variations by appending common Korean suffixes.
|
||||||
|
Each variation is checked against Ahrefs for volume data.
|
||||||
|
"""
|
||||||
|
suffix_keywords: list[KeywordEntry] = []
|
||||||
|
|
||||||
|
for suffix in KOREAN_SUFFIXES:
|
||||||
|
variation = f"{seed} {suffix}"
|
||||||
|
if variation in self._seen:
|
||||||
|
continue
|
||||||
|
|
||||||
|
logger.info(f"Checking Korean suffix variation: {variation}")
|
||||||
|
overview = call_mcp_tool("keywords-explorer-overview", {
|
||||||
|
"keyword": variation,
|
||||||
|
"country": self.country,
|
||||||
|
})
|
||||||
|
kw = self._parse_keyword_item(overview, source="korean-suffix")
|
||||||
|
if kw:
|
||||||
|
kw.keyword = variation
|
||||||
|
if kw.volume > 0:
|
||||||
|
self._seen.add(variation)
|
||||||
|
suffix_keywords.append(kw)
|
||||||
|
else:
|
||||||
|
# Even if no data, include as zero-volume for completeness
|
||||||
|
entry = KeywordEntry(
|
||||||
|
keyword=variation,
|
||||||
|
volume=0,
|
||||||
|
kd=0.0,
|
||||||
|
cpc=0.0,
|
||||||
|
intent=self.classify_intent(variation),
|
||||||
|
source="korean-suffix",
|
||||||
|
)
|
||||||
|
self._seen.add(variation)
|
||||||
|
suffix_keywords.append(entry)
|
||||||
|
|
||||||
|
logger.info(f"Korean suffix expansion yielded {len(suffix_keywords)} variations")
|
||||||
|
return suffix_keywords
|
||||||
|
|
||||||
|
def get_volume_by_country(self, keyword: str) -> dict[str, int]:
|
||||||
|
"""
|
||||||
|
Get search volume breakdown by country for a keyword.
|
||||||
|
Useful for comparing Korean vs global demand.
|
||||||
|
"""
|
||||||
|
logger.info(f"Fetching volume-by-country for: {keyword}")
|
||||||
|
result = call_mcp_tool("keywords-explorer-volume-by-country", {
|
||||||
|
"keyword": keyword,
|
||||||
|
})
|
||||||
|
|
||||||
|
volumes: dict[str, int] = {}
|
||||||
|
for item in result.get("countries", result.get("items", [])):
|
||||||
|
if isinstance(item, dict):
|
||||||
|
country_code = item.get("country", item.get("code", ""))
|
||||||
|
volume = item.get("volume", item.get("search_volume", 0))
|
||||||
|
if country_code and volume:
|
||||||
|
volumes[country_code.lower()] = int(volume)
|
||||||
|
|
||||||
|
return volumes
|
||||||
|
|
||||||
|
# ---- Intent classification ----
|
||||||
|
|
||||||
|
def classify_intent(self, keyword: str) -> str:
|
||||||
|
"""
|
||||||
|
Classify search intent based on keyword patterns.
|
||||||
|
Priority: transactional > commercial > navigational > informational
|
||||||
|
"""
|
||||||
|
keyword_lower = keyword.lower().strip()
|
||||||
|
|
||||||
|
for intent, patterns in INTENT_PATTERNS.items():
|
||||||
|
for pattern in patterns:
|
||||||
|
if re.search(pattern, keyword_lower, re.IGNORECASE):
|
||||||
|
return intent
|
||||||
|
|
||||||
|
return "informational"
|
||||||
|
|
||||||
|
# ---- Keyword clustering ----
|
||||||
|
|
||||||
|
def cluster_keywords(self, keywords: list[KeywordEntry]) -> list[KeywordCluster]:
|
||||||
|
"""
|
||||||
|
Group keywords into topic clusters using shared n-gram tokens.
|
||||||
|
Uses a simple token overlap approach: keywords sharing significant
|
||||||
|
tokens (2+ character words) are grouped together.
|
||||||
|
"""
|
||||||
|
if not keywords:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Extract meaningful tokens from each keyword
|
||||||
|
def tokenize(text: str) -> set[str]:
|
||||||
|
tokens = set()
|
||||||
|
for word in re.split(r"\s+", text.strip().lower()):
|
||||||
|
if len(word) >= 2:
|
||||||
|
tokens.add(word)
|
||||||
|
return tokens
|
||||||
|
|
||||||
|
# Build token-to-keyword mapping
|
||||||
|
token_map: dict[str, list[int]] = {}
|
||||||
|
kw_tokens: list[set[str]] = []
|
||||||
|
|
||||||
|
for i, kw in enumerate(keywords):
|
||||||
|
tokens = tokenize(kw.keyword)
|
||||||
|
kw_tokens.append(tokens)
|
||||||
|
for token in tokens:
|
||||||
|
if token not in token_map:
|
||||||
|
token_map[token] = []
|
||||||
|
token_map[token].append(i)
|
||||||
|
|
||||||
|
# Find the most common significant tokens (cluster anchors)
|
||||||
|
token_freq = sorted(token_map.items(), key=lambda x: len(x[1]), reverse=True)
|
||||||
|
|
||||||
|
assigned: set[int] = set()
|
||||||
|
clusters: list[KeywordCluster] = []
|
||||||
|
|
||||||
|
for token, indices in token_freq:
|
||||||
|
# Skip single-occurrence tokens or very common stop-like tokens
|
||||||
|
if len(indices) < 2:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Gather unassigned keywords that share this token
|
||||||
|
cluster_indices = [i for i in indices if i not in assigned]
|
||||||
|
if len(cluster_indices) < 2:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create the cluster
|
||||||
|
cluster_kws = [keywords[i].keyword for i in cluster_indices]
|
||||||
|
cluster_volumes = [keywords[i].volume for i in cluster_indices]
|
||||||
|
cluster_kds = [keywords[i].kd for i in cluster_indices]
|
||||||
|
cluster_intents = [keywords[i].intent for i in cluster_indices]
|
||||||
|
|
||||||
|
# Determine primary intent by frequency
|
||||||
|
intent_counts: dict[str, int] = {}
|
||||||
|
for intent in cluster_intents:
|
||||||
|
intent_counts[intent] = intent_counts.get(intent, 0) + 1
|
||||||
|
primary_intent = max(intent_counts, key=intent_counts.get)
|
||||||
|
|
||||||
|
cluster = KeywordCluster(
|
||||||
|
topic=token,
|
||||||
|
keywords=cluster_kws,
|
||||||
|
total_volume=sum(cluster_volumes),
|
||||||
|
avg_kd=round(sum(cluster_kds) / len(cluster_kds), 1) if cluster_kds else 0.0,
|
||||||
|
primary_intent=primary_intent,
|
||||||
|
)
|
||||||
|
clusters.append(cluster)
|
||||||
|
|
||||||
|
for i in cluster_indices:
|
||||||
|
assigned.add(i)
|
||||||
|
keywords[i].cluster = token
|
||||||
|
|
||||||
|
# Assign unclustered keywords to an "other" cluster
|
||||||
|
unclustered = [i for i in range(len(keywords)) if i not in assigned]
|
||||||
|
if unclustered:
|
||||||
|
other_kws = [keywords[i].keyword for i in unclustered]
|
||||||
|
other_volumes = [keywords[i].volume for i in unclustered]
|
||||||
|
other_kds = [keywords[i].kd for i in unclustered]
|
||||||
|
|
||||||
|
other_cluster = KeywordCluster(
|
||||||
|
topic="(unclustered)",
|
||||||
|
keywords=other_kws,
|
||||||
|
total_volume=sum(other_volumes),
|
||||||
|
avg_kd=round(sum(other_kds) / len(other_kds), 1) if other_kds else 0.0,
|
||||||
|
primary_intent="informational",
|
||||||
|
)
|
||||||
|
clusters.append(other_cluster)
|
||||||
|
|
||||||
|
for i in unclustered:
|
||||||
|
keywords[i].cluster = "(unclustered)"
|
||||||
|
|
||||||
|
# Sort clusters by total volume descending
|
||||||
|
clusters.sort(key=lambda c: c.total_volume, reverse=True)
|
||||||
|
|
||||||
|
logger.info(f"Clustered {len(keywords)} keywords into {len(clusters)} clusters")
|
||||||
|
return clusters
|
||||||
|
|
||||||
|
# ---- Full analysis orchestration ----
|
||||||
|
|
||||||
|
def analyze(self, seed_keyword: str) -> ResearchResult:
|
||||||
|
"""
|
||||||
|
Orchestrate a full keyword research analysis:
|
||||||
|
1. Expand seed via Ahrefs
|
||||||
|
2. Optionally expand Korean suffixes
|
||||||
|
3. Classify intent for all keywords
|
||||||
|
4. Optionally fetch volume-by-country
|
||||||
|
5. Cluster keywords into topics
|
||||||
|
6. Compile results
|
||||||
|
"""
|
||||||
|
logger.info(f"Starting keyword research for: {seed_keyword} (country={self.country})")
|
||||||
|
|
||||||
|
# Step 1: Expand keywords
|
||||||
|
keywords = self.expand_keywords(seed_keyword)
|
||||||
|
|
||||||
|
# Step 2: Korean suffix expansion
|
||||||
|
if self.korean_suffixes:
|
||||||
|
suffix_keywords = self.expand_korean_suffixes(seed_keyword)
|
||||||
|
keywords.extend(suffix_keywords)
|
||||||
|
|
||||||
|
# Step 3: Classify intent for all keywords
|
||||||
|
for kw in keywords:
|
||||||
|
if not kw.intent or kw.intent == "informational":
|
||||||
|
kw.intent = self.classify_intent(kw.keyword)
|
||||||
|
|
||||||
|
# Step 4: Volume-by-country comparison
|
||||||
|
if self.compare_global and keywords:
|
||||||
|
# Fetch for the seed and top volume keywords
|
||||||
|
top_keywords = sorted(keywords, key=lambda k: k.volume, reverse=True)[:10]
|
||||||
|
for kw in top_keywords:
|
||||||
|
volumes = self.get_volume_by_country(kw.keyword)
|
||||||
|
kw.country_volumes = volumes
|
||||||
|
|
||||||
|
# Step 5: Cluster keywords
|
||||||
|
clusters = self.cluster_keywords(keywords)
|
||||||
|
|
||||||
|
# Step 6: Compile result
|
||||||
|
result = ResearchResult(
|
||||||
|
seed_keyword=seed_keyword,
|
||||||
|
country=self.country,
|
||||||
|
total_keywords=len(keywords),
|
||||||
|
total_volume=sum(kw.volume for kw in keywords),
|
||||||
|
clusters=clusters,
|
||||||
|
keywords=sorted(keywords, key=lambda k: k.volume, reverse=True),
|
||||||
|
timestamp=datetime.now().isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Research complete: {result.total_keywords} keywords, "
|
||||||
|
f"{len(result.clusters)} clusters, "
|
||||||
|
f"total volume {result.total_volume}"
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ---- Internal helpers ----
|
||||||
|
|
||||||
|
def _parse_keyword_item(self, item: dict, source: str = "") -> Optional[KeywordEntry]:
|
||||||
|
"""Parse an Ahrefs API response item into a KeywordEntry."""
|
||||||
|
if not item or "error" in item:
|
||||||
|
return None
|
||||||
|
|
||||||
|
keyword = item.get("keyword", item.get("term", item.get("query", "")))
|
||||||
|
if not keyword:
|
||||||
|
return None
|
||||||
|
|
||||||
|
volume = int(item.get("volume", item.get("search_volume", 0)) or 0)
|
||||||
|
kd = float(item.get("keyword_difficulty", item.get("kd", 0)) or 0)
|
||||||
|
cpc = float(item.get("cpc", item.get("cost_per_click", 0)) or 0)
|
||||||
|
|
||||||
|
return KeywordEntry(
|
||||||
|
keyword=keyword,
|
||||||
|
volume=volume,
|
||||||
|
kd=round(kd, 1),
|
||||||
|
cpc=round(cpc, 2),
|
||||||
|
intent=self.classify_intent(keyword),
|
||||||
|
source=source,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Plain-text report formatter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def format_text_report(result: ResearchResult) -> str:
|
||||||
|
"""Format research result as a human-readable text report."""
|
||||||
|
lines: list[str] = []
|
||||||
|
lines.append("=" * 70)
|
||||||
|
lines.append(f"Keyword Strategy Report: {result.seed_keyword}")
|
||||||
|
lines.append(f"Country: {result.country.upper()} | Date: {result.timestamp[:10]}")
|
||||||
|
lines.append("=" * 70)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("## Overview")
|
||||||
|
lines.append(f" Total keywords discovered: {result.total_keywords}")
|
||||||
|
lines.append(f" Topic clusters: {len(result.clusters)}")
|
||||||
|
lines.append(f" Total search volume: {result.total_volume:,}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Clusters summary
|
||||||
|
if result.clusters:
|
||||||
|
lines.append("## Top Clusters")
|
||||||
|
lines.append(f" {'Cluster':<25} {'Keywords':>8} {'Volume':>10} {'Avg KD':>8} {'Intent':<15}")
|
||||||
|
lines.append(" " + "-" * 66)
|
||||||
|
for cluster in result.clusters[:15]:
|
||||||
|
lines.append(
|
||||||
|
f" {cluster.topic:<25} {len(cluster.keywords):>8} "
|
||||||
|
f"{cluster.total_volume:>10,} {cluster.avg_kd:>8.1f} "
|
||||||
|
f"{cluster.primary_intent:<15}"
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Top keywords
|
||||||
|
if result.keywords:
|
||||||
|
lines.append("## Top Keywords (by volume)")
|
||||||
|
lines.append(f" {'Keyword':<40} {'Vol':>8} {'KD':>6} {'CPC':>7} {'Intent':<15} {'Cluster':<15}")
|
||||||
|
lines.append(" " + "-" * 91)
|
||||||
|
for kw in result.keywords[:30]:
|
||||||
|
kw_display = kw.keyword[:38] if len(kw.keyword) > 38 else kw.keyword
|
||||||
|
cluster_display = kw.cluster[:13] if len(kw.cluster) > 13 else kw.cluster
|
||||||
|
lines.append(
|
||||||
|
f" {kw_display:<40} {kw.volume:>8,} {kw.kd:>6.1f} "
|
||||||
|
f"{kw.cpc:>7.2f} {kw.intent:<15} {cluster_display:<15}"
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
# Intent distribution
|
||||||
|
intent_dist: dict[str, int] = {}
|
||||||
|
for kw in result.keywords:
|
||||||
|
intent_dist[kw.intent] = intent_dist.get(kw.intent, 0) + 1
|
||||||
|
if intent_dist:
|
||||||
|
lines.append("## Intent Distribution")
|
||||||
|
for intent, count in sorted(intent_dist.items(), key=lambda x: x[1], reverse=True):
|
||||||
|
pct = (count / len(result.keywords)) * 100 if result.keywords else 0
|
||||||
|
lines.append(f" {intent:<15}: {count:>5} ({pct:.1f}%)")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Keyword Researcher - Expand, classify, and cluster keywords",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
python keyword_researcher.py --keyword "치과 임플란트" --country kr --json
|
||||||
|
python keyword_researcher.py --keyword "dental implant" --compare-global --json
|
||||||
|
python keyword_researcher.py --keyword "치과 임플란트" --korean-suffixes --output report.json
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--keyword",
|
||||||
|
required=True,
|
||||||
|
help="Seed keyword to expand and research",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--country",
|
||||||
|
default="kr",
|
||||||
|
help="Target country code (default: kr)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--korean-suffixes",
|
||||||
|
action="store_true",
|
||||||
|
help="Enable Korean suffix expansion (추천, 가격, 후기, etc.)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--compare-global",
|
||||||
|
action="store_true",
|
||||||
|
help="Fetch volume-by-country comparison for top keywords",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json",
|
||||||
|
action="store_true",
|
||||||
|
dest="output_json",
|
||||||
|
help="Output results as JSON",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Write output to file (path)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--verbose",
|
||||||
|
action="store_true",
|
||||||
|
help="Enable verbose/debug logging",
|
||||||
|
)
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.verbose:
|
||||||
|
logging.getLogger().setLevel(logging.DEBUG)
|
||||||
|
|
||||||
|
# Run analysis
|
||||||
|
researcher = KeywordResearcher(
|
||||||
|
country=args.country,
|
||||||
|
korean_suffixes=args.korean_suffixes,
|
||||||
|
compare_global=args.compare_global,
|
||||||
|
)
|
||||||
|
result = researcher.analyze(args.keyword)
|
||||||
|
|
||||||
|
# Format output
|
||||||
|
if args.output_json:
|
||||||
|
output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
|
||||||
|
else:
|
||||||
|
output = format_text_report(result)
|
||||||
|
|
||||||
|
# Write or print
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(output)
|
||||||
|
logger.info(f"Output written to: {args.output}")
|
||||||
|
else:
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main())
|
||||||
@@ -0,0 +1,20 @@
|
|||||||
|
# 19-seo-keyword-strategy dependencies
|
||||||
|
# Install: pip install -r requirements.txt
|
||||||
|
|
||||||
|
# HTTP & Async
|
||||||
|
requests>=2.31.0
|
||||||
|
aiohttp>=3.9.0
|
||||||
|
|
||||||
|
# Data Processing
|
||||||
|
pandas>=2.1.0
|
||||||
|
|
||||||
|
# NLP / Text Similarity
|
||||||
|
scikit-learn>=1.3.0
|
||||||
|
|
||||||
|
# Async & Retry
|
||||||
|
tenacity>=8.2.0
|
||||||
|
tqdm>=4.66.0
|
||||||
|
|
||||||
|
# Environment & CLI
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
rich>=13.7.0
|
||||||
112
custom-skills/19-seo-keyword-strategy/desktop/SKILL.md
Normal file
112
custom-skills/19-seo-keyword-strategy/desktop/SKILL.md
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
---
|
||||||
|
name: seo-keyword-strategy
|
||||||
|
description: |
|
||||||
|
Keyword strategy and research for SEO campaigns.
|
||||||
|
Triggers: keyword research, keyword analysis, keyword gap, search volume,
|
||||||
|
keyword clustering, intent classification, 키워드 전략, 키워드 분석,
|
||||||
|
키워드 리서치, 검색량 분석, 키워드 클러스터링.
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Keyword Strategy & Research
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Expand seed keywords, classify search intent, cluster topics, and identify competitor keyword gaps for comprehensive keyword strategy development.
|
||||||
|
|
||||||
|
## Core Capabilities
|
||||||
|
|
||||||
|
1. **Keyword Expansion** - Matching terms, related terms, search suggestions
|
||||||
|
2. **Korean Market** - Suffix expansion, Naver autocomplete, Korean intent patterns
|
||||||
|
3. **Intent Classification** - Informational, navigational, commercial, transactional
|
||||||
|
4. **Topic Clustering** - Group keywords into semantic clusters
|
||||||
|
5. **Gap Analysis** - Find competitor keywords missing from target site
|
||||||
|
|
||||||
|
## MCP Tool Usage
|
||||||
|
|
||||||
|
### SEO Data (DataForSEO)
|
||||||
|
|
||||||
|
**Primary — our-seo-agent CLI:**
|
||||||
|
```bash
|
||||||
|
our keywords volume "<keyword>" --location 2410 --language ko
|
||||||
|
our keywords ideas "<keyword>" --location 2410 --limit 50
|
||||||
|
our keywords for-site <competitor.com> --location 2410 --limit 100
|
||||||
|
our keywords intent "<kw1>" "<kw2>" "<kw3>"
|
||||||
|
our keywords difficulty "<kw1>" "<kw2>"
|
||||||
|
```
|
||||||
|
|
||||||
|
**Interactive fallback — DataForSEO MCP:**
|
||||||
|
```
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_keyword_overview
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_keyword_ideas
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_keyword_suggestions
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_search_intent
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_bulk_keyword_difficulty
|
||||||
|
mcp__dfs-mcp__kw_data_google_ads_search_volume
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_keywords_for_site
|
||||||
|
```
|
||||||
|
|
||||||
|
### Common Parameters
|
||||||
|
- **location_code**: 2410 (Korea), 2840 (US), 2392 (Japan)
|
||||||
|
- **language_code**: ko, en, ja
|
||||||
|
|
||||||
|
### Web Search for Naver Discovery
|
||||||
|
```
|
||||||
|
WebSearch: Naver autocomplete and trend discovery
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflow
|
||||||
|
|
||||||
|
### 1. Seed Keyword Expansion
|
||||||
|
1. Input seed keyword (Korean or English)
|
||||||
|
2. Fetch search volume via `our keywords volume "<seed>" --location 2410 --language ko`
|
||||||
|
3. Expand with `our keywords ideas "<seed>" --location 2410 --limit 50`
|
||||||
|
4. Get autocomplete suggestions via MCP: `mcp__dfs-mcp__dataforseo_labs_google_keyword_suggestions`
|
||||||
|
5. Apply Korean suffix expansion if Korean market
|
||||||
|
6. Deduplicate and merge results
|
||||||
|
|
||||||
|
### 2. Intent Classification & Clustering
|
||||||
|
1. Classify each keyword by search intent
|
||||||
|
2. Group keywords into topic clusters
|
||||||
|
3. Identify pillar topics and supporting terms
|
||||||
|
4. Calculate cluster-level metrics (total volume, avg KD)
|
||||||
|
|
||||||
|
### 3. Gap Analysis
|
||||||
|
1. Pull organic keywords for target: `our keywords for-site <target.com> --location 2410 --limit 100`
|
||||||
|
2. Pull organic keywords for competitors: `our keywords for-site <competitor.com> --location 2410 --limit 100`
|
||||||
|
3. Identify keywords present in competitors but missing from target
|
||||||
|
4. Score opportunities by volume/difficulty ratio
|
||||||
|
5. Prioritize by intent alignment with business goals
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## Keyword Strategy Report: [seed keyword]
|
||||||
|
|
||||||
|
### Overview
|
||||||
|
- Total keywords discovered: [count]
|
||||||
|
- Topic clusters: [count]
|
||||||
|
- Total search volume: [sum]
|
||||||
|
|
||||||
|
### Top Clusters
|
||||||
|
| Cluster | Keywords | Total Volume | Avg KD |
|
||||||
|
|---------|----------|-------------|--------|
|
||||||
|
| ... | ... | ... | ... |
|
||||||
|
|
||||||
|
### Top Opportunities
|
||||||
|
| Keyword | Volume | KD | Intent | Cluster |
|
||||||
|
|---------|--------|-----|--------|---------|
|
||||||
|
| ... | ... | ... | ... | ... |
|
||||||
|
|
||||||
|
### Keyword Gaps (vs competitors)
|
||||||
|
| Keyword | Volume | Competitor Position | Opportunity Score |
|
||||||
|
|---------|--------|-------------------|-------------------|
|
||||||
|
| ... | ... | ... | ... |
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
All audit reports MUST be saved to OurDigital SEO Audit Log:
|
||||||
|
- **Database ID**: `2c8581e5-8a1e-8035-880b-e38cefc2f3ef`
|
||||||
|
- **Properties**: Issue (title), Site (url), Category, Priority, Found Date, Audit ID
|
||||||
|
- **Language**: Korean with English technical terms
|
||||||
|
- **Audit ID Format**: KW-YYYYMMDD-NNN
|
||||||
9
custom-skills/19-seo-keyword-strategy/desktop/skill.yaml
Normal file
9
custom-skills/19-seo-keyword-strategy/desktop/skill.yaml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
name: seo-keyword-strategy
|
||||||
|
description: |
|
||||||
|
Keyword strategy and research for SEO campaigns. Triggers: keyword research, keyword analysis, keyword gap, search volume, keyword clustering, intent classification.
|
||||||
|
|
||||||
|
allowed-tools:
|
||||||
|
- mcp__ahrefs__*
|
||||||
|
- mcp__notion__*
|
||||||
|
- WebSearch
|
||||||
|
- WebFetch
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# Ahrefs
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# Notion
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# WebSearch
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
132
custom-skills/20-seo-serp-analysis/code/CLAUDE.md
Normal file
132
custom-skills/20-seo-serp-analysis/code/CLAUDE.md
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
SERP analysis tool for understanding search result landscapes. Detects Google SERP features (featured snippets, PAA, knowledge panels, local pack, video, ads), analyzes Naver SERP composition (blog, cafe, knowledge iN, Smart Store, brand zone, shortform, influencer), maps competitor positions, and scores SERP feature opportunities.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r scripts/requirements.txt
|
||||||
|
|
||||||
|
# Google SERP analysis
|
||||||
|
python scripts/serp_analyzer.py --keyword "치과 임플란트" --country kr --json
|
||||||
|
|
||||||
|
# Naver SERP analysis
|
||||||
|
python scripts/naver_serp_analyzer.py --keyword "치과 임플란트" --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
| Script | Purpose | Key Output |
|
||||||
|
|--------|---------|------------|
|
||||||
|
| `serp_analyzer.py` | Google SERP feature detection and competitor mapping | SERP features, competitor positions, opportunity scores |
|
||||||
|
| `naver_serp_analyzer.py` | Naver SERP composition analysis | Section distribution, content type mapping |
|
||||||
|
| `base_client.py` | Shared utilities | RateLimiter, ConfigManager, BaseAsyncClient |
|
||||||
|
|
||||||
|
## SERP Analyzer (Google)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Single keyword analysis
|
||||||
|
python scripts/serp_analyzer.py --keyword "dental implant cost" --json
|
||||||
|
|
||||||
|
# Korean market
|
||||||
|
python scripts/serp_analyzer.py --keyword "치과 임플란트 가격" --country kr --json
|
||||||
|
|
||||||
|
# Multiple keywords from file
|
||||||
|
python scripts/serp_analyzer.py --keywords-file keywords.txt --country kr --json
|
||||||
|
|
||||||
|
# Output to file
|
||||||
|
python scripts/serp_analyzer.py --keyword "dental implant" --output serp_report.json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- SERP feature detection (featured snippet, PAA, knowledge panel, local pack, video carousel, ads, image pack, site links)
|
||||||
|
- Competitor position mapping per keyword
|
||||||
|
- Content type distribution analysis (blog, product, service, news, video)
|
||||||
|
- SERP feature opportunity scoring
|
||||||
|
- Search intent validation from SERP composition
|
||||||
|
- SERP volatility assessment
|
||||||
|
|
||||||
|
## Naver SERP Analyzer
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Analyze Naver search results
|
||||||
|
python scripts/naver_serp_analyzer.py --keyword "치과 임플란트" --json
|
||||||
|
|
||||||
|
# Analyze multiple keywords
|
||||||
|
python scripts/naver_serp_analyzer.py --keywords-file keywords.txt --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Naver section detection (블로그, 카페, 지식iN, 스마트스토어, 브랜드존, 도서, 숏폼, 인플루언서)
|
||||||
|
- Section priority mapping (which sections appear above fold)
|
||||||
|
- Content type distribution per section
|
||||||
|
- Brand zone presence detection
|
||||||
|
- Shortform/influencer content analysis
|
||||||
|
|
||||||
|
## Data Sources
|
||||||
|
|
||||||
|
| Source | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `our-seo-agent` CLI | Primary data source (future); use `--input` for pre-fetched JSON |
|
||||||
|
| WebSearch / WebFetch | Live SERP data and Naver section analysis |
|
||||||
|
| Notion MCP | Save analysis report to SEO Audit Log database |
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"keyword": "치과 임플란트",
|
||||||
|
"country": "kr",
|
||||||
|
"serp_features": {
|
||||||
|
"featured_snippet": true,
|
||||||
|
"people_also_ask": true,
|
||||||
|
"local_pack": true,
|
||||||
|
"knowledge_panel": false,
|
||||||
|
"video_carousel": false,
|
||||||
|
"ads_top": 3,
|
||||||
|
"ads_bottom": 2
|
||||||
|
},
|
||||||
|
"competitors": [
|
||||||
|
{
|
||||||
|
"position": 1,
|
||||||
|
"url": "https://example.com/page",
|
||||||
|
"domain": "example.com",
|
||||||
|
"title": "...",
|
||||||
|
"content_type": "service_page"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"opportunity_score": 72,
|
||||||
|
"intent_signals": "commercial",
|
||||||
|
"timestamp": "2025-01-01T00:00:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
**IMPORTANT**: All audit reports MUST be saved to the OurDigital SEO Audit Log database.
|
||||||
|
|
||||||
|
### Database Configuration
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| Database ID | `2c8581e5-8a1e-8035-880b-e38cefc2f3ef` |
|
||||||
|
| URL | https://www.notion.so/dintelligence/2c8581e58a1e8035880be38cefc2f3ef |
|
||||||
|
|
||||||
|
### Required Properties
|
||||||
|
|
||||||
|
| Property | Type | Description |
|
||||||
|
|----------|------|-------------|
|
||||||
|
| Issue | Title | Report title (Korean + date) |
|
||||||
|
| Site | URL | Audited website URL |
|
||||||
|
| Category | Select | SERP Analysis |
|
||||||
|
| Priority | Select | Based on opportunity score |
|
||||||
|
| Found Date | Date | Analysis date (YYYY-MM-DD) |
|
||||||
|
| Audit ID | Rich Text | Format: SERP-YYYYMMDD-NNN |
|
||||||
|
|
||||||
|
### Language Guidelines
|
||||||
|
|
||||||
|
- Report content in Korean (한국어)
|
||||||
|
- Keep technical English terms as-is (e.g., SERP, Featured Snippet, PAA)
|
||||||
|
- URLs and code remain unchanged
|
||||||
207
custom-skills/20-seo-serp-analysis/code/scripts/base_client.py
Normal file
207
custom-skills/20-seo-serp-analysis/code/scripts/base_client.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
"""
|
||||||
|
Base Client - Shared async client utilities
|
||||||
|
===========================================
|
||||||
|
Purpose: Rate-limited async operations for API clients
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from asyncio import Semaphore
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from tenacity import (
|
||||||
|
retry,
|
||||||
|
stop_after_attempt,
|
||||||
|
wait_exponential,
|
||||||
|
retry_if_exception_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Logging setup
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimiter:
|
||||||
|
"""Rate limiter using token bucket algorithm."""
|
||||||
|
|
||||||
|
def __init__(self, rate: float, per: float = 1.0):
|
||||||
|
"""
|
||||||
|
Initialize rate limiter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rate: Number of requests allowed
|
||||||
|
per: Time period in seconds (default: 1 second)
|
||||||
|
"""
|
||||||
|
self.rate = rate
|
||||||
|
self.per = per
|
||||||
|
self.tokens = rate
|
||||||
|
self.last_update = datetime.now()
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def acquire(self) -> None:
|
||||||
|
"""Acquire a token, waiting if necessary."""
|
||||||
|
async with self._lock:
|
||||||
|
now = datetime.now()
|
||||||
|
elapsed = (now - self.last_update).total_seconds()
|
||||||
|
self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
|
||||||
|
self.last_update = now
|
||||||
|
|
||||||
|
if self.tokens < 1:
|
||||||
|
wait_time = (1 - self.tokens) * (self.per / self.rate)
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
self.tokens = 0
|
||||||
|
else:
|
||||||
|
self.tokens -= 1
|
||||||
|
|
||||||
|
|
||||||
|
class BaseAsyncClient:
|
||||||
|
"""Base class for async API clients with rate limiting."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_concurrent: int = 5,
|
||||||
|
requests_per_second: float = 3.0,
|
||||||
|
logger: logging.Logger | None = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize base client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
max_concurrent: Maximum concurrent requests
|
||||||
|
requests_per_second: Rate limit
|
||||||
|
logger: Logger instance
|
||||||
|
"""
|
||||||
|
self.semaphore = Semaphore(max_concurrent)
|
||||||
|
self.rate_limiter = RateLimiter(requests_per_second)
|
||||||
|
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
||||||
|
self.stats = {
|
||||||
|
"requests": 0,
|
||||||
|
"success": 0,
|
||||||
|
"errors": 0,
|
||||||
|
"retries": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=1, min=2, max=10),
|
||||||
|
retry=retry_if_exception_type(Exception),
|
||||||
|
)
|
||||||
|
async def _rate_limited_request(
|
||||||
|
self,
|
||||||
|
coro: Callable[[], Any],
|
||||||
|
) -> Any:
|
||||||
|
"""Execute a request with rate limiting and retry."""
|
||||||
|
async with self.semaphore:
|
||||||
|
await self.rate_limiter.acquire()
|
||||||
|
self.stats["requests"] += 1
|
||||||
|
try:
|
||||||
|
result = await coro()
|
||||||
|
self.stats["success"] += 1
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
self.stats["errors"] += 1
|
||||||
|
self.logger.error(f"Request failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def batch_requests(
|
||||||
|
self,
|
||||||
|
requests: list[Callable[[], Any]],
|
||||||
|
desc: str = "Processing",
|
||||||
|
) -> list[Any]:
|
||||||
|
"""Execute multiple requests concurrently."""
|
||||||
|
try:
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
|
has_tqdm = True
|
||||||
|
except ImportError:
|
||||||
|
has_tqdm = False
|
||||||
|
|
||||||
|
async def execute(req: Callable) -> Any:
|
||||||
|
try:
|
||||||
|
return await self._rate_limited_request(req)
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
tasks = [execute(req) for req in requests]
|
||||||
|
|
||||||
|
if has_tqdm:
|
||||||
|
results = []
|
||||||
|
for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
|
||||||
|
result = await coro
|
||||||
|
results.append(result)
|
||||||
|
return results
|
||||||
|
else:
|
||||||
|
return await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
def print_stats(self) -> None:
|
||||||
|
"""Print request statistics."""
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
self.logger.info("Request Statistics:")
|
||||||
|
self.logger.info(f" Total Requests: {self.stats['requests']}")
|
||||||
|
self.logger.info(f" Successful: {self.stats['success']}")
|
||||||
|
self.logger.info(f" Errors: {self.stats['errors']}")
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigManager:
|
||||||
|
"""Manage API configuration and credentials."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def google_credentials_path(self) -> str | None:
|
||||||
|
"""Get Google service account credentials path."""
|
||||||
|
# Prefer SEO-specific credentials, fallback to general credentials
|
||||||
|
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
|
||||||
|
if os.path.exists(seo_creds):
|
||||||
|
return seo_creds
|
||||||
|
return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pagespeed_api_key(self) -> str | None:
|
||||||
|
"""Get PageSpeed Insights API key."""
|
||||||
|
return os.getenv("PAGESPEED_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_api_key(self) -> str | None:
|
||||||
|
"""Get Custom Search API key."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_engine_id(self) -> str | None:
|
||||||
|
"""Get Custom Search Engine ID."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def notion_token(self) -> str | None:
|
||||||
|
"""Get Notion API token."""
|
||||||
|
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
|
||||||
|
|
||||||
|
def validate_google_credentials(self) -> bool:
|
||||||
|
"""Validate Google credentials are configured."""
|
||||||
|
creds_path = self.google_credentials_path
|
||||||
|
if not creds_path:
|
||||||
|
return False
|
||||||
|
return os.path.exists(creds_path)
|
||||||
|
|
||||||
|
def get_required(self, key: str) -> str:
|
||||||
|
"""Get required environment variable or raise error."""
|
||||||
|
value = os.getenv(key)
|
||||||
|
if not value:
|
||||||
|
raise ValueError(f"Missing required environment variable: {key}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton config instance
|
||||||
|
config = ConfigManager()
|
||||||
@@ -0,0 +1,693 @@
|
|||||||
|
"""
|
||||||
|
Naver SERP Analyzer - Naver search result composition analysis
|
||||||
|
==============================================================
|
||||||
|
Purpose: Analyze Naver SERP section distribution, content type mapping,
|
||||||
|
brand zone detection, and section priority analysis.
|
||||||
|
Python: 3.10+
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python naver_serp_analyzer.py --keyword "치과 임플란트" --json
|
||||||
|
python naver_serp_analyzer.py --keywords-file keywords.txt --json
|
||||||
|
python naver_serp_analyzer.py --keyword "치과 임플란트" --output naver_report.json
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Logging
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Constants - Naver SERP Section Identifiers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# CSS class / id patterns used to detect Naver SERP sections
|
||||||
|
NAVER_SECTION_SELECTORS: dict[str, list[str]] = {
|
||||||
|
"blog": [
|
||||||
|
"sp_blog",
|
||||||
|
"blog_widget",
|
||||||
|
"sc_new.sp_blog",
|
||||||
|
"api_subject_blog",
|
||||||
|
"type_blog",
|
||||||
|
"blog_exact",
|
||||||
|
],
|
||||||
|
"cafe": [
|
||||||
|
"sp_cafe",
|
||||||
|
"cafe_widget",
|
||||||
|
"sc_new.sp_cafe",
|
||||||
|
"api_subject_cafe",
|
||||||
|
"type_cafe",
|
||||||
|
],
|
||||||
|
"knowledge_in": [
|
||||||
|
"sp_kin",
|
||||||
|
"kin_widget",
|
||||||
|
"sc_new.sp_kin",
|
||||||
|
"api_subject_kin",
|
||||||
|
"type_kin",
|
||||||
|
"nx_kin",
|
||||||
|
],
|
||||||
|
"smart_store": [
|
||||||
|
"sp_nshop",
|
||||||
|
"shopping_widget",
|
||||||
|
"sc_new.sp_nshop",
|
||||||
|
"api_subject_shopping",
|
||||||
|
"type_shopping",
|
||||||
|
"smartstore",
|
||||||
|
],
|
||||||
|
"brand_zone": [
|
||||||
|
"sp_brand",
|
||||||
|
"brand_area",
|
||||||
|
"brand_zone",
|
||||||
|
"type_brand",
|
||||||
|
"sc_new.sp_brand",
|
||||||
|
],
|
||||||
|
"news": [
|
||||||
|
"sp_nnews",
|
||||||
|
"news_widget",
|
||||||
|
"sc_new.sp_nnews",
|
||||||
|
"api_subject_news",
|
||||||
|
"type_news",
|
||||||
|
"group_news",
|
||||||
|
],
|
||||||
|
"encyclopedia": [
|
||||||
|
"sp_encyclopedia",
|
||||||
|
"sc_new.sp_encyclopedia",
|
||||||
|
"api_subject_encyclopedia",
|
||||||
|
"type_encyclopedia",
|
||||||
|
"nx_encyclopedia",
|
||||||
|
],
|
||||||
|
"image": [
|
||||||
|
"sp_image",
|
||||||
|
"image_widget",
|
||||||
|
"sc_new.sp_image",
|
||||||
|
"api_subject_image",
|
||||||
|
"type_image",
|
||||||
|
],
|
||||||
|
"video": [
|
||||||
|
"sp_video",
|
||||||
|
"video_widget",
|
||||||
|
"sc_new.sp_video",
|
||||||
|
"api_subject_video",
|
||||||
|
"type_video",
|
||||||
|
],
|
||||||
|
"place": [
|
||||||
|
"sp_local",
|
||||||
|
"local_widget",
|
||||||
|
"sc_new.sp_local",
|
||||||
|
"type_place",
|
||||||
|
"place_section",
|
||||||
|
"loc_map",
|
||||||
|
],
|
||||||
|
"ad": [
|
||||||
|
"sp_nad",
|
||||||
|
"sp_tad",
|
||||||
|
"ad_section",
|
||||||
|
"type_powerlink",
|
||||||
|
"type_ad",
|
||||||
|
"nx_ad",
|
||||||
|
],
|
||||||
|
"books": [
|
||||||
|
"sp_book",
|
||||||
|
"sc_new.sp_book",
|
||||||
|
"type_book",
|
||||||
|
"api_subject_book",
|
||||||
|
"nx_book",
|
||||||
|
],
|
||||||
|
"shortform": [
|
||||||
|
"sp_shortform",
|
||||||
|
"sc_new.sp_shortform",
|
||||||
|
"type_shortform",
|
||||||
|
"sp_shorts",
|
||||||
|
"type_shorts",
|
||||||
|
],
|
||||||
|
"influencer": [
|
||||||
|
"sp_influencer",
|
||||||
|
"sc_new.sp_influencer",
|
||||||
|
"type_influencer",
|
||||||
|
"api_subject_influencer",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Section display names in Korean
|
||||||
|
SECTION_DISPLAY_NAMES: dict[str, str] = {
|
||||||
|
"blog": "블로그",
|
||||||
|
"cafe": "카페",
|
||||||
|
"knowledge_in": "지식iN",
|
||||||
|
"smart_store": "스마트스토어",
|
||||||
|
"brand_zone": "브랜드존",
|
||||||
|
"news": "뉴스",
|
||||||
|
"encyclopedia": "백과사전",
|
||||||
|
"image": "이미지",
|
||||||
|
"video": "동영상",
|
||||||
|
"place": "플레이스",
|
||||||
|
"ad": "광고",
|
||||||
|
"books": "도서",
|
||||||
|
"shortform": "숏폼",
|
||||||
|
"influencer": "인플루언서",
|
||||||
|
}
|
||||||
|
|
||||||
|
# Default headers for Naver requests
|
||||||
|
NAVER_HEADERS = {
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
|
||||||
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||||
|
"Chrome/120.0.0.0 Safari/537.36"
|
||||||
|
),
|
||||||
|
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
|
||||||
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data Classes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class NaverSection:
|
||||||
|
"""A detected section within Naver SERP."""
|
||||||
|
|
||||||
|
section_type: str # blog, cafe, knowledge_in, smart_store, etc.
|
||||||
|
display_name: str = ""
|
||||||
|
position: int = 0 # Order of appearance (1-based)
|
||||||
|
item_count: int = 0 # Number of items in the section
|
||||||
|
is_above_fold: bool = False # Appears within first ~3 sections
|
||||||
|
has_more_link: bool = False # Section has "more results" link
|
||||||
|
raw_html_snippet: str = "" # Short HTML snippet for debugging
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if not self.display_name:
|
||||||
|
self.display_name = SECTION_DISPLAY_NAMES.get(
|
||||||
|
self.section_type, self.section_type
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class NaverSerpResult:
|
||||||
|
"""Complete Naver SERP analysis result for a keyword."""
|
||||||
|
|
||||||
|
keyword: str
|
||||||
|
sections: list[NaverSection] = field(default_factory=list)
|
||||||
|
section_order: list[str] = field(default_factory=list)
|
||||||
|
brand_zone_present: bool = False
|
||||||
|
brand_zone_brand: str = ""
|
||||||
|
total_sections: int = 0
|
||||||
|
above_fold_sections: list[str] = field(default_factory=list)
|
||||||
|
ad_count: int = 0
|
||||||
|
dominant_section: str = ""
|
||||||
|
has_place_section: bool = False
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if not self.timestamp:
|
||||||
|
self.timestamp = datetime.now().isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Naver SERP Analyzer
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class NaverSerpAnalyzer:
|
||||||
|
"""Analyzes Naver search result page composition."""
|
||||||
|
|
||||||
|
NAVER_SEARCH_URL = "https://search.naver.com/search.naver"
|
||||||
|
|
||||||
|
def __init__(self, timeout: int = 30):
|
||||||
|
self.timeout = timeout
|
||||||
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
|
self.session = requests.Session()
|
||||||
|
self.session.headers.update(NAVER_HEADERS)
|
||||||
|
|
||||||
|
# ----- Data Fetching -----
|
||||||
|
|
||||||
|
def fetch_serp(self, keyword: str) -> str:
|
||||||
|
"""
|
||||||
|
Fetch Naver search results HTML for a given keyword.
|
||||||
|
|
||||||
|
Returns the raw HTML string of the search results page.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Fetching Naver SERP for '{keyword}'")
|
||||||
|
|
||||||
|
params = {
|
||||||
|
"where": "nexearch",
|
||||||
|
"sm": "top_hty",
|
||||||
|
"fbm": "0",
|
||||||
|
"ie": "utf8",
|
||||||
|
"query": keyword,
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = self.session.get(
|
||||||
|
self.NAVER_SEARCH_URL,
|
||||||
|
params=params,
|
||||||
|
timeout=self.timeout,
|
||||||
|
)
|
||||||
|
response.raise_for_status()
|
||||||
|
self.logger.info(
|
||||||
|
f"Fetched {len(response.text):,} bytes "
|
||||||
|
f"(status={response.status_code})"
|
||||||
|
)
|
||||||
|
return response.text
|
||||||
|
|
||||||
|
except requests.RequestException as exc:
|
||||||
|
self.logger.error(f"Failed to fetch Naver SERP: {exc}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# ----- Section Detection -----
|
||||||
|
|
||||||
|
def detect_sections(self, html: str) -> list[NaverSection]:
|
||||||
|
"""
|
||||||
|
Identify Naver SERP sections from HTML structure.
|
||||||
|
|
||||||
|
Scans the HTML for known CSS class names and IDs that correspond
|
||||||
|
to Naver's SERP section types.
|
||||||
|
"""
|
||||||
|
if not html:
|
||||||
|
return []
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
sections: list[NaverSection] = []
|
||||||
|
position = 0
|
||||||
|
|
||||||
|
# Strategy 1: Look for section containers with known class names
|
||||||
|
# Naver uses <div class="sc_new sp_XXX"> and <section> elements
|
||||||
|
all_sections = soup.find_all(
|
||||||
|
["div", "section"],
|
||||||
|
class_=re.compile(
|
||||||
|
r"(sc_new|api_subject|sp_|type_|_widget|group_|nx_)"
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
seen_types: set[str] = set()
|
||||||
|
|
||||||
|
for element in all_sections:
|
||||||
|
classes = " ".join(element.get("class", []))
|
||||||
|
element_id = element.get("id", "")
|
||||||
|
search_text = f"{classes} {element_id}".lower()
|
||||||
|
|
||||||
|
for section_type, selectors in NAVER_SECTION_SELECTORS.items():
|
||||||
|
if section_type in seen_types:
|
||||||
|
continue
|
||||||
|
|
||||||
|
matched = False
|
||||||
|
for selector in selectors:
|
||||||
|
if selector.lower() in search_text:
|
||||||
|
matched = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if matched:
|
||||||
|
position += 1
|
||||||
|
seen_types.add(section_type)
|
||||||
|
|
||||||
|
# Count items within the section
|
||||||
|
item_count = self._count_section_items(element, section_type)
|
||||||
|
|
||||||
|
# Check for "more" link
|
||||||
|
has_more = bool(
|
||||||
|
element.find("a", class_=re.compile(r"(more|_more|btn_more)"))
|
||||||
|
or element.find("a", string=re.compile(r"(더보기|전체보기)"))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get short HTML snippet for debugging
|
||||||
|
snippet = str(element)[:200] if element else ""
|
||||||
|
|
||||||
|
section = NaverSection(
|
||||||
|
section_type=section_type,
|
||||||
|
position=position,
|
||||||
|
item_count=item_count,
|
||||||
|
is_above_fold=(position <= 3),
|
||||||
|
has_more_link=has_more,
|
||||||
|
raw_html_snippet=snippet,
|
||||||
|
)
|
||||||
|
sections.append(section)
|
||||||
|
|
||||||
|
# Strategy 2: Fallback - scan entire HTML text for section markers
|
||||||
|
if not sections:
|
||||||
|
self.logger.warning(
|
||||||
|
"No sections found via DOM parsing; "
|
||||||
|
"falling back to text pattern matching"
|
||||||
|
)
|
||||||
|
sections = self._fallback_text_detection(html)
|
||||||
|
|
||||||
|
return sections
|
||||||
|
|
||||||
|
def _count_section_items(self, element: Any, section_type: str) -> int:
|
||||||
|
"""Count the number of result items within a section element."""
|
||||||
|
# Common item container patterns
|
||||||
|
item_selectors = [
|
||||||
|
"li",
|
||||||
|
".api_txt_lines",
|
||||||
|
".total_tit",
|
||||||
|
".detail_box",
|
||||||
|
".item",
|
||||||
|
".lst_total > li",
|
||||||
|
]
|
||||||
|
|
||||||
|
for selector in item_selectors:
|
||||||
|
items = element.select(selector)
|
||||||
|
if items and len(items) > 0:
|
||||||
|
return len(items)
|
||||||
|
|
||||||
|
# Fallback: count links that look like results
|
||||||
|
links = element.find_all("a", href=True)
|
||||||
|
result_links = [
|
||||||
|
a
|
||||||
|
for a in links
|
||||||
|
if a.get("href", "").startswith("http")
|
||||||
|
and "naver.com/search" not in a.get("href", "")
|
||||||
|
]
|
||||||
|
return len(result_links) if result_links else 0
|
||||||
|
|
||||||
|
def _fallback_text_detection(self, html: str) -> list[NaverSection]:
|
||||||
|
"""Detect sections by scanning raw HTML text for known markers."""
|
||||||
|
sections: list[NaverSection] = []
|
||||||
|
position = 0
|
||||||
|
html_lower = html.lower()
|
||||||
|
|
||||||
|
for section_type, selectors in NAVER_SECTION_SELECTORS.items():
|
||||||
|
for selector in selectors:
|
||||||
|
if selector.lower() in html_lower:
|
||||||
|
position += 1
|
||||||
|
sections.append(
|
||||||
|
NaverSection(
|
||||||
|
section_type=section_type,
|
||||||
|
position=position,
|
||||||
|
item_count=0,
|
||||||
|
is_above_fold=(position <= 3),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
break
|
||||||
|
|
||||||
|
return sections
|
||||||
|
|
||||||
|
# ----- Section Priority Analysis -----
|
||||||
|
|
||||||
|
def analyze_section_priority(
|
||||||
|
self, sections: list[NaverSection]
|
||||||
|
) -> list[str]:
|
||||||
|
"""
|
||||||
|
Determine above-fold section order.
|
||||||
|
|
||||||
|
Returns ordered list of section types that appear in the first
|
||||||
|
visible area of the SERP (approximately top 3 sections).
|
||||||
|
"""
|
||||||
|
sorted_sections = sorted(sections, key=lambda s: s.position)
|
||||||
|
above_fold = [s.section_type for s in sorted_sections if s.is_above_fold]
|
||||||
|
return above_fold
|
||||||
|
|
||||||
|
# ----- Brand Zone Detection -----
|
||||||
|
|
||||||
|
def check_brand_zone(self, html: str) -> tuple[bool, str]:
|
||||||
|
"""
|
||||||
|
Detect brand zone presence and extract brand name if available.
|
||||||
|
|
||||||
|
Returns (is_present, brand_name).
|
||||||
|
"""
|
||||||
|
if not html:
|
||||||
|
return False, ""
|
||||||
|
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
|
||||||
|
# Look for brand zone container
|
||||||
|
brand_selectors = [
|
||||||
|
"sp_brand",
|
||||||
|
"brand_area",
|
||||||
|
"brand_zone",
|
||||||
|
"type_brand",
|
||||||
|
]
|
||||||
|
|
||||||
|
for selector in brand_selectors:
|
||||||
|
brand_el = soup.find(
|
||||||
|
["div", "section"],
|
||||||
|
class_=re.compile(selector, re.IGNORECASE),
|
||||||
|
)
|
||||||
|
if brand_el:
|
||||||
|
# Try to extract brand name from the section
|
||||||
|
brand_name = ""
|
||||||
|
title_el = brand_el.find(
|
||||||
|
["h2", "h3", "strong", "a"],
|
||||||
|
class_=re.compile(r"(tit|title|name|brand)", re.IGNORECASE),
|
||||||
|
)
|
||||||
|
if title_el:
|
||||||
|
brand_name = title_el.get_text(strip=True)
|
||||||
|
|
||||||
|
return True, brand_name
|
||||||
|
|
||||||
|
# Text-based fallback
|
||||||
|
if "brand_zone" in html.lower() or "sp_brand" in html.lower():
|
||||||
|
return True, ""
|
||||||
|
|
||||||
|
return False, ""
|
||||||
|
|
||||||
|
# ----- Dominant Section -----
|
||||||
|
|
||||||
|
def _find_dominant_section(self, sections: list[NaverSection]) -> str:
|
||||||
|
"""Find the section with the most items (excluding ads)."""
|
||||||
|
non_ad = [s for s in sections if s.section_type != "ad"]
|
||||||
|
if not non_ad:
|
||||||
|
return ""
|
||||||
|
return max(non_ad, key=lambda s: s.item_count).section_type
|
||||||
|
|
||||||
|
# ----- Main Analysis Orchestrator -----
|
||||||
|
|
||||||
|
def analyze(self, keyword: str) -> NaverSerpResult:
|
||||||
|
"""
|
||||||
|
Orchestrate full Naver SERP analysis for a single keyword.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. Fetch Naver search results page
|
||||||
|
2. Detect SERP sections
|
||||||
|
3. Analyze section priority
|
||||||
|
4. Check brand zone presence
|
||||||
|
5. Compile results
|
||||||
|
"""
|
||||||
|
html = self.fetch_serp(keyword)
|
||||||
|
|
||||||
|
if not html:
|
||||||
|
self.logger.error(f"No HTML content for keyword '{keyword}'")
|
||||||
|
return NaverSerpResult(keyword=keyword)
|
||||||
|
|
||||||
|
sections = self.detect_sections(html)
|
||||||
|
above_fold = self.analyze_section_priority(sections)
|
||||||
|
brand_present, brand_name = self.check_brand_zone(html)
|
||||||
|
|
||||||
|
# Build section order
|
||||||
|
section_order = [s.section_type for s in sorted(sections, key=lambda x: x.position)]
|
||||||
|
|
||||||
|
# Count ads
|
||||||
|
ad_sections = [s for s in sections if s.section_type == "ad"]
|
||||||
|
ad_count = sum(s.item_count for s in ad_sections) if ad_sections else 0
|
||||||
|
|
||||||
|
# Check special sections
|
||||||
|
has_place = any(s.section_type == "place" for s in sections)
|
||||||
|
dominant = self._find_dominant_section(sections)
|
||||||
|
|
||||||
|
result = NaverSerpResult(
|
||||||
|
keyword=keyword,
|
||||||
|
sections=sections,
|
||||||
|
section_order=section_order,
|
||||||
|
brand_zone_present=brand_present,
|
||||||
|
brand_zone_brand=brand_name,
|
||||||
|
total_sections=len(sections),
|
||||||
|
above_fold_sections=above_fold,
|
||||||
|
ad_count=ad_count,
|
||||||
|
dominant_section=dominant,
|
||||||
|
has_place_section=has_place,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Output Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def result_to_dict(result: NaverSerpResult) -> dict[str, Any]:
|
||||||
|
"""Convert NaverSerpResult to a JSON-serializable dictionary."""
|
||||||
|
d = asdict(result)
|
||||||
|
# Remove raw HTML snippets from JSON output to keep it clean
|
||||||
|
for section in d.get("sections", []):
|
||||||
|
section.pop("raw_html_snippet", None)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def print_rich_report(result: NaverSerpResult) -> None:
|
||||||
|
"""Print a human-readable report using rich."""
|
||||||
|
console.rule(f"[bold blue]Naver SERP Analysis: {result.keyword}")
|
||||||
|
console.print(f"[dim]Timestamp: {result.timestamp}[/dim]")
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
summary_table = Table(title="Summary", show_lines=True)
|
||||||
|
summary_table.add_column("Metric", style="cyan")
|
||||||
|
summary_table.add_column("Value", style="green")
|
||||||
|
summary_table.add_row("Total Sections", str(result.total_sections))
|
||||||
|
summary_table.add_row("Ad Count", str(result.ad_count))
|
||||||
|
summary_table.add_row("Brand Zone", "Yes" if result.brand_zone_present else "No")
|
||||||
|
if result.brand_zone_brand:
|
||||||
|
summary_table.add_row("Brand Name", result.brand_zone_brand)
|
||||||
|
summary_table.add_row("Place Section", "Yes" if result.has_place_section else "No")
|
||||||
|
summary_table.add_row("Dominant Section", result.dominant_section or "N/A")
|
||||||
|
console.print(summary_table)
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
# Section Details
|
||||||
|
if result.sections:
|
||||||
|
section_table = Table(title="Detected Sections", show_lines=True)
|
||||||
|
section_table.add_column("#", style="bold")
|
||||||
|
section_table.add_column("Section", style="cyan")
|
||||||
|
section_table.add_column("Display Name", style="magenta")
|
||||||
|
section_table.add_column("Items", style="green")
|
||||||
|
section_table.add_column("Above Fold", style="yellow")
|
||||||
|
section_table.add_column("More Link", style="dim")
|
||||||
|
|
||||||
|
for s in sorted(result.sections, key=lambda x: x.position):
|
||||||
|
section_table.add_row(
|
||||||
|
str(s.position),
|
||||||
|
s.section_type,
|
||||||
|
s.display_name,
|
||||||
|
str(s.item_count),
|
||||||
|
"Yes" if s.is_above_fold else "No",
|
||||||
|
"Yes" if s.has_more_link else "No",
|
||||||
|
)
|
||||||
|
console.print(section_table)
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
# Above-Fold Sections
|
||||||
|
if result.above_fold_sections:
|
||||||
|
console.print("[bold]Above-Fold Section Order:[/bold]")
|
||||||
|
for i, sec in enumerate(result.above_fold_sections, 1):
|
||||||
|
display = SECTION_DISPLAY_NAMES.get(sec, sec)
|
||||||
|
console.print(f" {i}. {display} ({sec})")
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
# Section Order
|
||||||
|
if result.section_order:
|
||||||
|
console.print("[bold]Full Section Order:[/bold]")
|
||||||
|
order_str = " -> ".join(
|
||||||
|
SECTION_DISPLAY_NAMES.get(s, s) for s in result.section_order
|
||||||
|
)
|
||||||
|
console.print(f" {order_str}")
|
||||||
|
|
||||||
|
console.rule()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Naver SERP composition analysis",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
python naver_serp_analyzer.py --keyword "치과 임플란트" --json
|
||||||
|
python naver_serp_analyzer.py --keywords-file keywords.txt --json
|
||||||
|
python naver_serp_analyzer.py --keyword "치과 임플란트" --output report.json
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
group = parser.add_mutually_exclusive_group(required=True)
|
||||||
|
group.add_argument(
|
||||||
|
"--keyword",
|
||||||
|
type=str,
|
||||||
|
help="Single keyword to analyze",
|
||||||
|
)
|
||||||
|
group.add_argument(
|
||||||
|
"--keywords-file",
|
||||||
|
type=str,
|
||||||
|
help="Path to file with one keyword per line",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json",
|
||||||
|
action="store_true",
|
||||||
|
dest="json_output",
|
||||||
|
help="Output results as JSON",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
type=str,
|
||||||
|
help="Write JSON results to file",
|
||||||
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def load_keywords(filepath: str) -> list[str]:
|
||||||
|
"""Load keywords from a text file, one per line."""
|
||||||
|
path = Path(filepath)
|
||||||
|
if not path.exists():
|
||||||
|
logger.error(f"Keywords file not found: {filepath}")
|
||||||
|
sys.exit(1)
|
||||||
|
keywords = []
|
||||||
|
with open(path, "r", encoding="utf-8") as fh:
|
||||||
|
for line in fh:
|
||||||
|
kw = line.strip()
|
||||||
|
if kw and not kw.startswith("#"):
|
||||||
|
keywords.append(kw)
|
||||||
|
logger.info(f"Loaded {len(keywords)} keywords from {filepath}")
|
||||||
|
return keywords
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
analyzer = NaverSerpAnalyzer()
|
||||||
|
|
||||||
|
# Collect keywords
|
||||||
|
if args.keyword:
|
||||||
|
keywords = [args.keyword]
|
||||||
|
else:
|
||||||
|
keywords = load_keywords(args.keywords_file)
|
||||||
|
|
||||||
|
if not keywords:
|
||||||
|
logger.error("No keywords to analyze")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
results: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
for kw in keywords:
|
||||||
|
console.print(f"\n[bold]Analyzing Naver SERP:[/bold] {kw}")
|
||||||
|
result = analyzer.analyze(kw)
|
||||||
|
|
||||||
|
if args.json_output or args.output:
|
||||||
|
results.append(result_to_dict(result))
|
||||||
|
else:
|
||||||
|
print_rich_report(result)
|
||||||
|
|
||||||
|
# JSON output
|
||||||
|
if args.json_output:
|
||||||
|
output_data = results[0] if len(results) == 1 else results
|
||||||
|
print(json.dumps(output_data, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
output_data = results[0] if len(results) == 1 else results
|
||||||
|
output_path = Path(args.output)
|
||||||
|
with open(output_path, "w", encoding="utf-8") as fh:
|
||||||
|
json.dump(output_data, fh, ensure_ascii=False, indent=2)
|
||||||
|
logger.info(f"Results written to {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -0,0 +1,9 @@
|
|||||||
|
# 20-seo-serp-analysis dependencies
|
||||||
|
requests>=2.31.0
|
||||||
|
aiohttp>=3.9.0
|
||||||
|
beautifulsoup4>=4.12.0
|
||||||
|
lxml>=5.1.0
|
||||||
|
tenacity>=8.2.0
|
||||||
|
tqdm>=4.66.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
rich>=13.7.0
|
||||||
891
custom-skills/20-seo-serp-analysis/code/scripts/serp_analyzer.py
Normal file
891
custom-skills/20-seo-serp-analysis/code/scripts/serp_analyzer.py
Normal file
@@ -0,0 +1,891 @@
|
|||||||
|
"""
|
||||||
|
SERP Analyzer - Google SERP feature detection and competitor mapping
|
||||||
|
====================================================================
|
||||||
|
Purpose: Analyze Google SERP features, map competitor positions,
|
||||||
|
classify content types, and score SERP opportunities.
|
||||||
|
Python: 3.10+
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python serp_analyzer.py --keyword "치과 임플란트" --country kr --json
|
||||||
|
python serp_analyzer.py --keywords-file keywords.txt --country kr --json
|
||||||
|
python serp_analyzer.py --keyword "dental implant" --output serp_report.json
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import subprocess
|
||||||
|
import sys
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Logging
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data Classes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SerpFeatures:
|
||||||
|
"""Tracks presence and count of Google SERP features."""
|
||||||
|
|
||||||
|
featured_snippet: bool = False
|
||||||
|
people_also_ask: bool = False
|
||||||
|
local_pack: bool = False
|
||||||
|
knowledge_panel: bool = False
|
||||||
|
video_carousel: bool = False
|
||||||
|
image_pack: bool = False
|
||||||
|
site_links: bool = False
|
||||||
|
ads_top: int = 0
|
||||||
|
ads_bottom: int = 0
|
||||||
|
shopping: bool = False
|
||||||
|
|
||||||
|
@property
|
||||||
|
def feature_count(self) -> int:
|
||||||
|
"""Count of boolean features that are present."""
|
||||||
|
count = 0
|
||||||
|
for f in [
|
||||||
|
self.featured_snippet,
|
||||||
|
self.people_also_ask,
|
||||||
|
self.local_pack,
|
||||||
|
self.knowledge_panel,
|
||||||
|
self.video_carousel,
|
||||||
|
self.image_pack,
|
||||||
|
self.site_links,
|
||||||
|
self.shopping,
|
||||||
|
]:
|
||||||
|
if f:
|
||||||
|
count += 1
|
||||||
|
return count
|
||||||
|
|
||||||
|
@property
|
||||||
|
def has_ads(self) -> bool:
|
||||||
|
return self.ads_top > 0 or self.ads_bottom > 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CompetitorPosition:
|
||||||
|
"""A single competitor entry in the SERP."""
|
||||||
|
|
||||||
|
position: int
|
||||||
|
url: str
|
||||||
|
domain: str
|
||||||
|
title: str = ""
|
||||||
|
content_type: str = "unknown"
|
||||||
|
is_featured: bool = False
|
||||||
|
has_sitelinks: bool = False
|
||||||
|
estimated_traffic_share: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SerpResult:
|
||||||
|
"""Complete SERP analysis result for a keyword."""
|
||||||
|
|
||||||
|
keyword: str
|
||||||
|
country: str = "us"
|
||||||
|
search_volume: int = 0
|
||||||
|
keyword_difficulty: float = 0.0
|
||||||
|
cpc: float = 0.0
|
||||||
|
serp_features: SerpFeatures = field(default_factory=SerpFeatures)
|
||||||
|
competitors: list[CompetitorPosition] = field(default_factory=list)
|
||||||
|
opportunity_score: int = 0
|
||||||
|
intent_signals: str = "informational"
|
||||||
|
content_type_distribution: dict[str, int] = field(default_factory=dict)
|
||||||
|
volatility: str = "stable"
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if not self.timestamp:
|
||||||
|
self.timestamp = datetime.now().isoformat()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Content Type Classifiers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# URL path patterns that hint at content type
|
||||||
|
URL_CONTENT_PATTERNS: dict[str, list[str]] = {
|
||||||
|
"blog": [
|
||||||
|
r"/blog/",
|
||||||
|
r"/post/",
|
||||||
|
r"/article/",
|
||||||
|
r"/news/",
|
||||||
|
r"/magazine/",
|
||||||
|
r"/journal/",
|
||||||
|
r"/column/",
|
||||||
|
r"/story/",
|
||||||
|
r"\d{4}/\d{2}/",
|
||||||
|
],
|
||||||
|
"product": [
|
||||||
|
r"/product/",
|
||||||
|
r"/item/",
|
||||||
|
r"/shop/",
|
||||||
|
r"/store/",
|
||||||
|
r"/buy/",
|
||||||
|
r"/p/",
|
||||||
|
r"/goods/",
|
||||||
|
r"/catalog/",
|
||||||
|
],
|
||||||
|
"service": [
|
||||||
|
r"/service",
|
||||||
|
r"/solution",
|
||||||
|
r"/treatment",
|
||||||
|
r"/procedure",
|
||||||
|
r"/pricing",
|
||||||
|
r"/consultation",
|
||||||
|
],
|
||||||
|
"news": [
|
||||||
|
r"/news/",
|
||||||
|
r"/press/",
|
||||||
|
r"/media/",
|
||||||
|
r"/release/",
|
||||||
|
r"news\.",
|
||||||
|
r"press\.",
|
||||||
|
],
|
||||||
|
"video": [
|
||||||
|
r"youtube\.com/watch",
|
||||||
|
r"youtu\.be/",
|
||||||
|
r"vimeo\.com/",
|
||||||
|
r"/video/",
|
||||||
|
r"/watch/",
|
||||||
|
],
|
||||||
|
"forum": [
|
||||||
|
r"/forum/",
|
||||||
|
r"/community/",
|
||||||
|
r"/discuss",
|
||||||
|
r"/thread/",
|
||||||
|
r"/question/",
|
||||||
|
r"/answers/",
|
||||||
|
],
|
||||||
|
"wiki": [
|
||||||
|
r"wikipedia\.org",
|
||||||
|
r"/wiki/",
|
||||||
|
r"namu\.wiki",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# Title keywords that hint at content type
|
||||||
|
TITLE_CONTENT_PATTERNS: dict[str, list[str]] = {
|
||||||
|
"blog": ["블로그", "후기", "리뷰", "review", "guide", "가이드", "팁", "tips"],
|
||||||
|
"product": ["구매", "가격", "buy", "price", "shop", "할인", "sale", "최저가"],
|
||||||
|
"service": ["상담", "치료", "진료", "병원", "클리닉", "clinic", "treatment"],
|
||||||
|
"news": ["뉴스", "속보", "보도", "news", "기사", "report"],
|
||||||
|
"video": ["영상", "동영상", "video", "youtube"],
|
||||||
|
"comparison": ["비교", "vs", "versus", "compare", "차이", "best"],
|
||||||
|
}
|
||||||
|
|
||||||
|
# CTR distribution by position (approximate click-through rates)
|
||||||
|
CTR_BY_POSITION: dict[int, float] = {
|
||||||
|
1: 0.316,
|
||||||
|
2: 0.158,
|
||||||
|
3: 0.110,
|
||||||
|
4: 0.080,
|
||||||
|
5: 0.062,
|
||||||
|
6: 0.049,
|
||||||
|
7: 0.040,
|
||||||
|
8: 0.034,
|
||||||
|
9: 0.029,
|
||||||
|
10: 0.025,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# SERP Analyzer
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class SerpAnalyzer:
|
||||||
|
"""Analyzes Google SERP features, competitor positions, and opportunities."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.logger = logging.getLogger(self.__class__.__name__)
|
||||||
|
|
||||||
|
# ----- Data Fetching -----
|
||||||
|
|
||||||
|
def get_serp_data(self, keyword: str, country: str = "us") -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Fetch SERP data via Ahrefs serp-overview MCP tool.
|
||||||
|
|
||||||
|
Uses subprocess to invoke the Ahrefs MCP tool. Falls back to a
|
||||||
|
structured placeholder when the MCP tool is unavailable (e.g., in
|
||||||
|
standalone / CI environments).
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Fetching SERP data for '{keyword}' (country={country})")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Attempt MCP tool call via subprocess
|
||||||
|
cmd = [
|
||||||
|
"claude",
|
||||||
|
"mcp",
|
||||||
|
"call",
|
||||||
|
"ahrefs",
|
||||||
|
"serp-overview",
|
||||||
|
json.dumps({"keyword": keyword, "country": country}),
|
||||||
|
]
|
||||||
|
result = subprocess.run(
|
||||||
|
cmd,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=60,
|
||||||
|
)
|
||||||
|
if result.returncode == 0 and result.stdout.strip():
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
self.logger.info("Successfully fetched SERP data via MCP")
|
||||||
|
return data
|
||||||
|
except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError) as exc:
|
||||||
|
self.logger.warning(f"MCP call unavailable ({exc}), using keyword metrics fallback")
|
||||||
|
|
||||||
|
# Fallback: try Ahrefs keywords-explorer-overview
|
||||||
|
try:
|
||||||
|
cmd_kw = [
|
||||||
|
"claude",
|
||||||
|
"mcp",
|
||||||
|
"call",
|
||||||
|
"ahrefs",
|
||||||
|
"keywords-explorer-overview",
|
||||||
|
json.dumps({"keyword": keyword, "country": country}),
|
||||||
|
]
|
||||||
|
result_kw = subprocess.run(
|
||||||
|
cmd_kw,
|
||||||
|
capture_output=True,
|
||||||
|
text=True,
|
||||||
|
timeout=60,
|
||||||
|
)
|
||||||
|
if result_kw.returncode == 0 and result_kw.stdout.strip():
|
||||||
|
data = json.loads(result_kw.stdout)
|
||||||
|
self.logger.info("Fetched keyword overview via MCP")
|
||||||
|
return data
|
||||||
|
except (subprocess.TimeoutExpired, FileNotFoundError, json.JSONDecodeError) as exc:
|
||||||
|
self.logger.warning(f"Keywords-explorer MCP also unavailable ({exc})")
|
||||||
|
|
||||||
|
# Return empty structure when no MCP tools available
|
||||||
|
self.logger.warning(
|
||||||
|
"No MCP data source available. Run inside Claude Desktop "
|
||||||
|
"or provide data via --input flag."
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"keyword": keyword,
|
||||||
|
"country": country,
|
||||||
|
"serp": [],
|
||||||
|
"serp_features": {},
|
||||||
|
"metrics": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
# ----- Feature Detection -----
|
||||||
|
|
||||||
|
def detect_features(self, serp_data: dict[str, Any]) -> SerpFeatures:
|
||||||
|
"""
|
||||||
|
Identify SERP features from Ahrefs response data.
|
||||||
|
|
||||||
|
Handles both the structured 'serp_features' dict returned by
|
||||||
|
keywords-explorer-overview and the raw SERP items list from
|
||||||
|
serp-overview.
|
||||||
|
"""
|
||||||
|
features = SerpFeatures()
|
||||||
|
|
||||||
|
# -- Method 1: structured serp_features from Ahrefs --
|
||||||
|
sf = serp_data.get("serp_features", {})
|
||||||
|
if isinstance(sf, dict):
|
||||||
|
features.featured_snippet = sf.get("featured_snippet", False)
|
||||||
|
features.people_also_ask = sf.get("people_also_ask", False)
|
||||||
|
features.local_pack = sf.get("local_pack", False)
|
||||||
|
features.knowledge_panel = sf.get("knowledge_panel", False) or sf.get(
|
||||||
|
"knowledge_graph", False
|
||||||
|
)
|
||||||
|
features.video_carousel = sf.get("video", False) or sf.get(
|
||||||
|
"video_carousel", False
|
||||||
|
)
|
||||||
|
features.image_pack = sf.get("image_pack", False) or sf.get(
|
||||||
|
"images", False
|
||||||
|
)
|
||||||
|
features.site_links = sf.get("sitelinks", False) or sf.get(
|
||||||
|
"site_links", False
|
||||||
|
)
|
||||||
|
features.shopping = sf.get("shopping_results", False) or sf.get(
|
||||||
|
"shopping", False
|
||||||
|
)
|
||||||
|
features.ads_top = int(sf.get("ads_top", 0) or 0)
|
||||||
|
features.ads_bottom = int(sf.get("ads_bottom", 0) or 0)
|
||||||
|
|
||||||
|
# -- Method 2: infer from raw SERP items list --
|
||||||
|
serp_items = serp_data.get("serp", [])
|
||||||
|
if isinstance(serp_items, list):
|
||||||
|
for item in serp_items:
|
||||||
|
item_type = str(item.get("type", "")).lower()
|
||||||
|
if "featured_snippet" in item_type or item.get("is_featured"):
|
||||||
|
features.featured_snippet = True
|
||||||
|
if "people_also_ask" in item_type or "paa" in item_type:
|
||||||
|
features.people_also_ask = True
|
||||||
|
if "local" in item_type or "map" in item_type:
|
||||||
|
features.local_pack = True
|
||||||
|
if "knowledge" in item_type:
|
||||||
|
features.knowledge_panel = True
|
||||||
|
if "video" in item_type:
|
||||||
|
features.video_carousel = True
|
||||||
|
if "image" in item_type:
|
||||||
|
features.image_pack = True
|
||||||
|
if item.get("sitelinks"):
|
||||||
|
features.site_links = True
|
||||||
|
if "shopping" in item_type:
|
||||||
|
features.shopping = True
|
||||||
|
if "ad" in item_type:
|
||||||
|
pos = item.get("position", 0)
|
||||||
|
if pos <= 4:
|
||||||
|
features.ads_top += 1
|
||||||
|
else:
|
||||||
|
features.ads_bottom += 1
|
||||||
|
|
||||||
|
return features
|
||||||
|
|
||||||
|
# ----- Competitor Mapping -----
|
||||||
|
|
||||||
|
def map_competitors(self, serp_data: dict[str, Any]) -> list[CompetitorPosition]:
|
||||||
|
"""Extract competitor positions and domains from SERP data."""
|
||||||
|
competitors: list[CompetitorPosition] = []
|
||||||
|
serp_items = serp_data.get("serp", [])
|
||||||
|
|
||||||
|
if not isinstance(serp_items, list):
|
||||||
|
return competitors
|
||||||
|
|
||||||
|
for item in serp_items:
|
||||||
|
url = item.get("url", "")
|
||||||
|
if not url:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip ads for organic mapping
|
||||||
|
item_type = str(item.get("type", "")).lower()
|
||||||
|
if "ad" in item_type:
|
||||||
|
continue
|
||||||
|
|
||||||
|
parsed = urlparse(url)
|
||||||
|
domain = parsed.netloc.replace("www.", "")
|
||||||
|
position = int(item.get("position", len(competitors) + 1))
|
||||||
|
title = item.get("title", "")
|
||||||
|
|
||||||
|
content_type = self.classify_content_type(item)
|
||||||
|
traffic_share = CTR_BY_POSITION.get(position, 0.01)
|
||||||
|
|
||||||
|
comp = CompetitorPosition(
|
||||||
|
position=position,
|
||||||
|
url=url,
|
||||||
|
domain=domain,
|
||||||
|
title=title,
|
||||||
|
content_type=content_type,
|
||||||
|
is_featured=bool(item.get("is_featured")),
|
||||||
|
has_sitelinks=bool(item.get("sitelinks")),
|
||||||
|
estimated_traffic_share=round(traffic_share, 4),
|
||||||
|
)
|
||||||
|
competitors.append(comp)
|
||||||
|
|
||||||
|
# Sort by position
|
||||||
|
competitors.sort(key=lambda c: c.position)
|
||||||
|
return competitors
|
||||||
|
|
||||||
|
# ----- Content Type Classification -----
|
||||||
|
|
||||||
|
def classify_content_type(self, result: dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Classify a SERP result as blog/product/service/news/video/forum/wiki
|
||||||
|
based on URL patterns and title keywords.
|
||||||
|
"""
|
||||||
|
url = result.get("url", "").lower()
|
||||||
|
title = result.get("title", "").lower()
|
||||||
|
|
||||||
|
scores: dict[str, int] = {}
|
||||||
|
|
||||||
|
# Score from URL patterns
|
||||||
|
for ctype, patterns in URL_CONTENT_PATTERNS.items():
|
||||||
|
for pattern in patterns:
|
||||||
|
if re.search(pattern, url):
|
||||||
|
scores[ctype] = scores.get(ctype, 0) + 2
|
||||||
|
break
|
||||||
|
|
||||||
|
# Score from title patterns
|
||||||
|
for ctype, keywords in TITLE_CONTENT_PATTERNS.items():
|
||||||
|
for kw in keywords:
|
||||||
|
if kw.lower() in title:
|
||||||
|
scores[ctype] = scores.get(ctype, 0) + 1
|
||||||
|
|
||||||
|
if not scores:
|
||||||
|
# Heuristic: if domain is a known authority site
|
||||||
|
parsed = urlparse(url)
|
||||||
|
domain = parsed.netloc.lower()
|
||||||
|
if any(d in domain for d in ["wikipedia", "namu.wiki", "나무위키"]):
|
||||||
|
return "wiki"
|
||||||
|
if any(d in domain for d in ["youtube", "vimeo"]):
|
||||||
|
return "video"
|
||||||
|
if any(d in domain for d in ["naver.com", "tistory.com", "brunch.co.kr"]):
|
||||||
|
return "blog"
|
||||||
|
return "service_page"
|
||||||
|
|
||||||
|
# Return highest scoring type
|
||||||
|
return max(scores, key=scores.get) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
# ----- Opportunity Scoring -----
|
||||||
|
|
||||||
|
def calculate_opportunity_score(
|
||||||
|
self,
|
||||||
|
features: SerpFeatures,
|
||||||
|
positions: list[CompetitorPosition],
|
||||||
|
) -> int:
|
||||||
|
"""
|
||||||
|
Score SERP opportunity from 0-100.
|
||||||
|
|
||||||
|
Higher scores indicate better opportunity to rank or gain features.
|
||||||
|
|
||||||
|
Factors (additive):
|
||||||
|
- Featured snippet available but could be captured +15
|
||||||
|
- PAA present (related question opportunity) +10
|
||||||
|
- No knowledge panel (less SERP real-estate taken) +10
|
||||||
|
- Low ad count (more organic visibility) +10
|
||||||
|
- Few sitelinks in top results +5
|
||||||
|
- Content diversity (various domains in top 10) +10
|
||||||
|
- No video carousel (opportunity to add video) +5
|
||||||
|
- Top results are blogs (easier to outrank) +10
|
||||||
|
- Image pack absent (image SEO opportunity) +5
|
||||||
|
- Shopping absent for commercial keywords +5
|
||||||
|
- Top positions lacking schema/rich results +5
|
||||||
|
|
||||||
|
Penalty factors (subtractive):
|
||||||
|
- Knowledge panel dominates -15
|
||||||
|
- Heavy ad presence (4+ top ads) -10
|
||||||
|
- Single domain dominates top 5 -10
|
||||||
|
"""
|
||||||
|
score = 50 # Base score
|
||||||
|
|
||||||
|
# -- Positive signals --
|
||||||
|
if features.featured_snippet:
|
||||||
|
score += 15
|
||||||
|
if features.people_also_ask:
|
||||||
|
score += 10
|
||||||
|
if not features.knowledge_panel:
|
||||||
|
score += 10
|
||||||
|
if features.ads_top <= 1:
|
||||||
|
score += 10
|
||||||
|
elif features.ads_top <= 2:
|
||||||
|
score += 5
|
||||||
|
if not features.video_carousel:
|
||||||
|
score += 5
|
||||||
|
if not features.image_pack:
|
||||||
|
score += 5
|
||||||
|
if not features.shopping:
|
||||||
|
score += 5
|
||||||
|
|
||||||
|
# Domain diversity in top 10
|
||||||
|
if positions:
|
||||||
|
top10_domains = {p.domain for p in positions[:10]}
|
||||||
|
if len(top10_domains) >= 8:
|
||||||
|
score += 10
|
||||||
|
elif len(top10_domains) >= 5:
|
||||||
|
score += 5
|
||||||
|
|
||||||
|
# Blog-heavy top results (easier to compete)
|
||||||
|
blog_count = sum(
|
||||||
|
1 for p in positions[:5] if p.content_type == "blog"
|
||||||
|
)
|
||||||
|
if blog_count >= 3:
|
||||||
|
score += 10
|
||||||
|
elif blog_count >= 2:
|
||||||
|
score += 5
|
||||||
|
|
||||||
|
# Sitelinks reduce available space
|
||||||
|
sitelink_count = sum(1 for p in positions[:5] if p.has_sitelinks)
|
||||||
|
if sitelink_count <= 1:
|
||||||
|
score += 5
|
||||||
|
|
||||||
|
# Single domain dominance penalty
|
||||||
|
domain_counts: dict[str, int] = {}
|
||||||
|
for p in positions[:5]:
|
||||||
|
domain_counts[p.domain] = domain_counts.get(p.domain, 0) + 1
|
||||||
|
if any(c >= 3 for c in domain_counts.values()):
|
||||||
|
score -= 10
|
||||||
|
|
||||||
|
# -- Negative signals --
|
||||||
|
if features.knowledge_panel:
|
||||||
|
score -= 15
|
||||||
|
if features.ads_top >= 4:
|
||||||
|
score -= 10
|
||||||
|
elif features.ads_top >= 3:
|
||||||
|
score -= 5
|
||||||
|
|
||||||
|
# Clamp to 0-100
|
||||||
|
return max(0, min(100, score))
|
||||||
|
|
||||||
|
# ----- Intent Validation -----
|
||||||
|
|
||||||
|
def validate_intent(
|
||||||
|
self,
|
||||||
|
features: SerpFeatures,
|
||||||
|
positions: list[CompetitorPosition],
|
||||||
|
) -> str:
|
||||||
|
"""
|
||||||
|
Infer search intent from SERP composition.
|
||||||
|
|
||||||
|
Returns one of: informational, navigational, commercial, transactional, local
|
||||||
|
"""
|
||||||
|
signals: dict[str, int] = {
|
||||||
|
"informational": 0,
|
||||||
|
"navigational": 0,
|
||||||
|
"commercial": 0,
|
||||||
|
"transactional": 0,
|
||||||
|
"local": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Feature-based signals
|
||||||
|
if features.featured_snippet:
|
||||||
|
signals["informational"] += 3
|
||||||
|
if features.people_also_ask:
|
||||||
|
signals["informational"] += 2
|
||||||
|
if features.knowledge_panel:
|
||||||
|
signals["informational"] += 2
|
||||||
|
signals["navigational"] += 2
|
||||||
|
if features.local_pack:
|
||||||
|
signals["local"] += 5
|
||||||
|
if features.shopping:
|
||||||
|
signals["transactional"] += 4
|
||||||
|
if features.has_ads:
|
||||||
|
signals["commercial"] += 2
|
||||||
|
signals["transactional"] += 1
|
||||||
|
if features.ads_top >= 3:
|
||||||
|
signals["transactional"] += 2
|
||||||
|
if features.image_pack:
|
||||||
|
signals["informational"] += 1
|
||||||
|
if features.video_carousel:
|
||||||
|
signals["informational"] += 1
|
||||||
|
|
||||||
|
# Content type signals from top results
|
||||||
|
for pos in positions[:10]:
|
||||||
|
ct = pos.content_type
|
||||||
|
if ct == "blog":
|
||||||
|
signals["informational"] += 1
|
||||||
|
elif ct == "product":
|
||||||
|
signals["transactional"] += 2
|
||||||
|
elif ct == "service":
|
||||||
|
signals["commercial"] += 1
|
||||||
|
elif ct == "news":
|
||||||
|
signals["informational"] += 1
|
||||||
|
elif ct == "video":
|
||||||
|
signals["informational"] += 1
|
||||||
|
elif ct == "wiki":
|
||||||
|
signals["informational"] += 2
|
||||||
|
elif ct == "forum":
|
||||||
|
signals["informational"] += 1
|
||||||
|
elif ct == "comparison":
|
||||||
|
signals["commercial"] += 2
|
||||||
|
|
||||||
|
# Navigational: single domain dominates top 3
|
||||||
|
if positions:
|
||||||
|
top3_domains = [p.domain for p in positions[:3]]
|
||||||
|
if len(set(top3_domains)) == 1:
|
||||||
|
signals["navigational"] += 5
|
||||||
|
|
||||||
|
# Return highest signal
|
||||||
|
return max(signals, key=signals.get) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
# ----- Content Type Distribution -----
|
||||||
|
|
||||||
|
def _content_type_distribution(
|
||||||
|
self, positions: list[CompetitorPosition]
|
||||||
|
) -> dict[str, int]:
|
||||||
|
"""Count content types across top organic results."""
|
||||||
|
dist: dict[str, int] = {}
|
||||||
|
for p in positions[:10]:
|
||||||
|
dist[p.content_type] = dist.get(p.content_type, 0) + 1
|
||||||
|
return dict(sorted(dist.items(), key=lambda x: x[1], reverse=True))
|
||||||
|
|
||||||
|
# ----- Volatility Assessment -----
|
||||||
|
|
||||||
|
def _assess_volatility(self, serp_data: dict[str, Any]) -> str:
|
||||||
|
"""
|
||||||
|
Assess SERP volatility based on available signals.
|
||||||
|
|
||||||
|
Returns: stable, moderate, volatile
|
||||||
|
"""
|
||||||
|
# Check if Ahrefs provides a volatility/movement score
|
||||||
|
metrics = serp_data.get("metrics", {})
|
||||||
|
if isinstance(metrics, dict):
|
||||||
|
volatility_score = metrics.get("serp_volatility", None)
|
||||||
|
if volatility_score is not None:
|
||||||
|
if volatility_score < 3:
|
||||||
|
return "stable"
|
||||||
|
elif volatility_score < 7:
|
||||||
|
return "moderate"
|
||||||
|
else:
|
||||||
|
return "volatile"
|
||||||
|
|
||||||
|
# Heuristic: if many results have recent dates, SERP is more volatile
|
||||||
|
serp_items = serp_data.get("serp", [])
|
||||||
|
if isinstance(serp_items, list) and serp_items:
|
||||||
|
recent_count = 0
|
||||||
|
for item in serp_items[:10]:
|
||||||
|
last_seen = item.get("last_seen", "")
|
||||||
|
if last_seen:
|
||||||
|
try:
|
||||||
|
dt = datetime.fromisoformat(last_seen.replace("Z", "+00:00"))
|
||||||
|
if (datetime.now(dt.tzinfo) - dt).days < 30:
|
||||||
|
recent_count += 1
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass
|
||||||
|
if recent_count >= 5:
|
||||||
|
return "volatile"
|
||||||
|
elif recent_count >= 3:
|
||||||
|
return "moderate"
|
||||||
|
|
||||||
|
return "stable"
|
||||||
|
|
||||||
|
# ----- Main Analysis Orchestrator -----
|
||||||
|
|
||||||
|
def analyze(self, keyword: str, country: str = "us") -> SerpResult:
|
||||||
|
"""
|
||||||
|
Orchestrate full SERP analysis for a single keyword.
|
||||||
|
|
||||||
|
Steps:
|
||||||
|
1. Fetch SERP data from Ahrefs MCP
|
||||||
|
2. Detect SERP features
|
||||||
|
3. Map competitor positions
|
||||||
|
4. Classify content types
|
||||||
|
5. Calculate opportunity score
|
||||||
|
6. Validate search intent
|
||||||
|
7. Assess volatility
|
||||||
|
"""
|
||||||
|
serp_data = self.get_serp_data(keyword, country)
|
||||||
|
|
||||||
|
features = self.detect_features(serp_data)
|
||||||
|
positions = self.map_competitors(serp_data)
|
||||||
|
opportunity = self.calculate_opportunity_score(features, positions)
|
||||||
|
intent = self.validate_intent(features, positions)
|
||||||
|
content_dist = self._content_type_distribution(positions)
|
||||||
|
volatility = self._assess_volatility(serp_data)
|
||||||
|
|
||||||
|
# Extract keyword metrics if available
|
||||||
|
metrics = serp_data.get("metrics", {})
|
||||||
|
search_volume = int(metrics.get("search_volume", 0) or 0)
|
||||||
|
keyword_difficulty = float(metrics.get("keyword_difficulty", 0) or 0)
|
||||||
|
cpc = float(metrics.get("cpc", 0) or 0)
|
||||||
|
|
||||||
|
result = SerpResult(
|
||||||
|
keyword=keyword,
|
||||||
|
country=country,
|
||||||
|
search_volume=search_volume,
|
||||||
|
keyword_difficulty=keyword_difficulty,
|
||||||
|
cpc=cpc,
|
||||||
|
serp_features=features,
|
||||||
|
competitors=positions,
|
||||||
|
opportunity_score=opportunity,
|
||||||
|
intent_signals=intent,
|
||||||
|
content_type_distribution=content_dist,
|
||||||
|
volatility=volatility,
|
||||||
|
)
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Output Helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def result_to_dict(result: SerpResult) -> dict[str, Any]:
|
||||||
|
"""Convert SerpResult to a JSON-serializable dictionary."""
|
||||||
|
d = asdict(result)
|
||||||
|
return d
|
||||||
|
|
||||||
|
|
||||||
|
def print_rich_report(result: SerpResult) -> None:
|
||||||
|
"""Print a human-readable report using rich."""
|
||||||
|
console.rule(f"[bold blue]SERP Analysis: {result.keyword}")
|
||||||
|
console.print(f"[dim]Country: {result.country} | Timestamp: {result.timestamp}[/dim]")
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
# Metrics
|
||||||
|
if result.search_volume or result.keyword_difficulty:
|
||||||
|
metrics_table = Table(title="Keyword Metrics", show_lines=True)
|
||||||
|
metrics_table.add_column("Metric", style="cyan")
|
||||||
|
metrics_table.add_column("Value", style="green")
|
||||||
|
metrics_table.add_row("Search Volume", f"{result.search_volume:,}")
|
||||||
|
metrics_table.add_row("Keyword Difficulty", f"{result.keyword_difficulty:.1f}")
|
||||||
|
metrics_table.add_row("CPC", f"${result.cpc:.2f}")
|
||||||
|
console.print(metrics_table)
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
# SERP Features
|
||||||
|
feat = result.serp_features
|
||||||
|
feat_table = Table(title="SERP Features", show_lines=True)
|
||||||
|
feat_table.add_column("Feature", style="cyan")
|
||||||
|
feat_table.add_column("Present", style="green")
|
||||||
|
feat_table.add_row("Featured Snippet", _bool_icon(feat.featured_snippet))
|
||||||
|
feat_table.add_row("People Also Ask", _bool_icon(feat.people_also_ask))
|
||||||
|
feat_table.add_row("Local Pack", _bool_icon(feat.local_pack))
|
||||||
|
feat_table.add_row("Knowledge Panel", _bool_icon(feat.knowledge_panel))
|
||||||
|
feat_table.add_row("Video Carousel", _bool_icon(feat.video_carousel))
|
||||||
|
feat_table.add_row("Image Pack", _bool_icon(feat.image_pack))
|
||||||
|
feat_table.add_row("Site Links", _bool_icon(feat.site_links))
|
||||||
|
feat_table.add_row("Shopping", _bool_icon(feat.shopping))
|
||||||
|
feat_table.add_row("Ads (top)", str(feat.ads_top))
|
||||||
|
feat_table.add_row("Ads (bottom)", str(feat.ads_bottom))
|
||||||
|
console.print(feat_table)
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
# Competitors
|
||||||
|
if result.competitors:
|
||||||
|
comp_table = Table(title="Top Competitors", show_lines=True)
|
||||||
|
comp_table.add_column("#", style="bold")
|
||||||
|
comp_table.add_column("Domain", style="cyan")
|
||||||
|
comp_table.add_column("Type", style="magenta")
|
||||||
|
comp_table.add_column("CTR Share", style="green")
|
||||||
|
comp_table.add_column("Featured", style="yellow")
|
||||||
|
for c in result.competitors[:10]:
|
||||||
|
comp_table.add_row(
|
||||||
|
str(c.position),
|
||||||
|
c.domain,
|
||||||
|
c.content_type,
|
||||||
|
f"{c.estimated_traffic_share:.1%}",
|
||||||
|
_bool_icon(c.is_featured),
|
||||||
|
)
|
||||||
|
console.print(comp_table)
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
# Content Distribution
|
||||||
|
if result.content_type_distribution:
|
||||||
|
dist_table = Table(title="Content Type Distribution (Top 10)", show_lines=True)
|
||||||
|
dist_table.add_column("Content Type", style="cyan")
|
||||||
|
dist_table.add_column("Count", style="green")
|
||||||
|
for ct, count in result.content_type_distribution.items():
|
||||||
|
dist_table.add_row(ct, str(count))
|
||||||
|
console.print(dist_table)
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
opp_color = "green" if result.opportunity_score >= 60 else (
|
||||||
|
"yellow" if result.opportunity_score >= 40 else "red"
|
||||||
|
)
|
||||||
|
console.print(f"Opportunity Score: [{opp_color}]{result.opportunity_score}/100[/{opp_color}]")
|
||||||
|
console.print(f"Search Intent: [bold]{result.intent_signals}[/bold]")
|
||||||
|
console.print(f"SERP Volatility: [bold]{result.volatility}[/bold]")
|
||||||
|
console.rule()
|
||||||
|
|
||||||
|
|
||||||
|
def _bool_icon(val: bool) -> str:
|
||||||
|
"""Return Yes/No string for boolean values."""
|
||||||
|
return "Yes" if val else "No"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Google SERP feature detection and competitor mapping",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
python serp_analyzer.py --keyword "치과 임플란트" --country kr --json
|
||||||
|
python serp_analyzer.py --keywords-file keywords.txt --country kr --output report.json
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
group = parser.add_mutually_exclusive_group(required=True)
|
||||||
|
group.add_argument(
|
||||||
|
"--keyword",
|
||||||
|
type=str,
|
||||||
|
help="Single keyword to analyze",
|
||||||
|
)
|
||||||
|
group.add_argument(
|
||||||
|
"--keywords-file",
|
||||||
|
type=str,
|
||||||
|
help="Path to file with one keyword per line",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--country",
|
||||||
|
type=str,
|
||||||
|
default="us",
|
||||||
|
help="Country code for SERP (default: us)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json",
|
||||||
|
action="store_true",
|
||||||
|
dest="json_output",
|
||||||
|
help="Output results as JSON",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
type=str,
|
||||||
|
help="Write JSON results to file",
|
||||||
|
)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def load_keywords(filepath: str) -> list[str]:
|
||||||
|
"""Load keywords from a text file, one per line."""
|
||||||
|
path = Path(filepath)
|
||||||
|
if not path.exists():
|
||||||
|
logger.error(f"Keywords file not found: {filepath}")
|
||||||
|
sys.exit(1)
|
||||||
|
keywords = []
|
||||||
|
with open(path, "r", encoding="utf-8") as fh:
|
||||||
|
for line in fh:
|
||||||
|
kw = line.strip()
|
||||||
|
if kw and not kw.startswith("#"):
|
||||||
|
keywords.append(kw)
|
||||||
|
logger.info(f"Loaded {len(keywords)} keywords from {filepath}")
|
||||||
|
return keywords
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
analyzer = SerpAnalyzer()
|
||||||
|
|
||||||
|
# Collect keywords
|
||||||
|
if args.keyword:
|
||||||
|
keywords = [args.keyword]
|
||||||
|
else:
|
||||||
|
keywords = load_keywords(args.keywords_file)
|
||||||
|
|
||||||
|
if not keywords:
|
||||||
|
logger.error("No keywords to analyze")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
results: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
for kw in keywords:
|
||||||
|
console.print(f"\n[bold]Analyzing:[/bold] {kw}")
|
||||||
|
result = analyzer.analyze(kw, args.country)
|
||||||
|
|
||||||
|
if args.json_output or args.output:
|
||||||
|
results.append(result_to_dict(result))
|
||||||
|
else:
|
||||||
|
print_rich_report(result)
|
||||||
|
|
||||||
|
# JSON output
|
||||||
|
if args.json_output:
|
||||||
|
output_data = results[0] if len(results) == 1 else results
|
||||||
|
print(json.dumps(output_data, ensure_ascii=False, indent=2))
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
output_data = results[0] if len(results) == 1 else results
|
||||||
|
output_path = Path(args.output)
|
||||||
|
with open(output_path, "w", encoding="utf-8") as fh:
|
||||||
|
json.dump(output_data, fh, ensure_ascii=False, indent=2)
|
||||||
|
logger.info(f"Results written to {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
136
custom-skills/20-seo-serp-analysis/desktop/SKILL.md
Normal file
136
custom-skills/20-seo-serp-analysis/desktop/SKILL.md
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
---
|
||||||
|
name: seo-serp-analysis
|
||||||
|
description: |
|
||||||
|
SERP analysis for Google and Naver search results.
|
||||||
|
Triggers: SERP analysis, search results, featured snippet, SERP features, Naver SERP, 검색결과 분석, SERP 분석.
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO SERP Analysis
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Analyze search engine result page composition for Google and Naver. Detect SERP features (featured snippets, PAA, knowledge panels, local pack, video, ads), map competitor positions, score SERP feature opportunities, and analyze Naver section distribution.
|
||||||
|
|
||||||
|
## Core Capabilities
|
||||||
|
|
||||||
|
1. **Google SERP Feature Detection** - Identify featured snippets, PAA, knowledge panels, local pack, video carousel, ads, image pack, site links, shopping
|
||||||
|
2. **Competitor Position Mapping** - Extract domains, positions, content types for top organic results
|
||||||
|
3. **Opportunity Scoring** - Score SERP opportunity (0-100) based on feature landscape and competition
|
||||||
|
4. **Search Intent Validation** - Infer intent (informational, navigational, commercial, transactional, local) from SERP composition
|
||||||
|
5. **Naver SERP Composition** - Detect sections (blog, cafe, knowledge iN, Smart Store, brand zone, books, shortform, influencer), map section priority, analyze brand zone presence
|
||||||
|
|
||||||
|
## MCP Tool Usage
|
||||||
|
|
||||||
|
### SEO Data (DataForSEO)
|
||||||
|
|
||||||
|
**Primary — our-seo-agent CLI:**
|
||||||
|
```bash
|
||||||
|
our serp live "<keyword>" --location 2410 --language ko
|
||||||
|
our serp competitors <domain> --location 2410
|
||||||
|
our serp ranked-keywords <domain> --location 2410 --limit 50
|
||||||
|
our serp domain-overview <domain> --location 2410
|
||||||
|
```
|
||||||
|
|
||||||
|
**Interactive fallback — DataForSEO MCP:**
|
||||||
|
```
|
||||||
|
mcp__dfs-mcp__serp_organic_live_advanced
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_serp_competitors
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_ranked_keywords
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_domain_rank_overview
|
||||||
|
```
|
||||||
|
|
||||||
|
### Common Parameters
|
||||||
|
- **location_code**: 2410 (Korea), 2840 (US), 2392 (Japan)
|
||||||
|
- **language_code**: ko, en, ja
|
||||||
|
|
||||||
|
### Notion for Report Storage
|
||||||
|
```
|
||||||
|
mcp__notion__notion-create-pages: Save analysis report to SEO Audit Log database
|
||||||
|
mcp__notion__notion-update-page: Update existing report entries
|
||||||
|
```
|
||||||
|
|
||||||
|
### Web Tools for Naver Analysis
|
||||||
|
```
|
||||||
|
WebSearch: Discover Naver search trends
|
||||||
|
WebFetch: Fetch Naver SERP HTML for section analysis
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflow
|
||||||
|
|
||||||
|
### 1. Google SERP Analysis
|
||||||
|
1. Fetch SERP via `our serp live "<keyword>" --location 2410 --language ko --format json`
|
||||||
|
2. Parse SERP features from response (featured_snippet, people_also_ask, local_pack, etc.)
|
||||||
|
3. Map competitor positions from organic_results (domain, URL, title, position)
|
||||||
|
4. Classify content type for each result (blog, product, service, news, video)
|
||||||
|
5. Calculate opportunity score (0-100) based on feature landscape
|
||||||
|
6. Validate search intent from SERP composition
|
||||||
|
7. Get competitor domain overview via `our serp domain-overview <competitor> --location 2410`
|
||||||
|
|
||||||
|
### 2. Naver SERP Analysis
|
||||||
|
1. Fetch Naver search page for the target keyword
|
||||||
|
2. Detect SERP sections (blog, cafe, knowledge iN, Smart Store, brand zone, news, encyclopedia, books, shortform, influencer)
|
||||||
|
3. Map section priority (above-fold order)
|
||||||
|
4. Check brand zone presence and extract brand name
|
||||||
|
5. Count items per section
|
||||||
|
6. Identify dominant content section
|
||||||
|
|
||||||
|
### 3. Report Generation
|
||||||
|
1. Compile results into structured JSON
|
||||||
|
2. Generate Korean-language report
|
||||||
|
3. Save to Notion SEO Audit Log database
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"keyword": "치과 임플란트",
|
||||||
|
"country": "kr",
|
||||||
|
"serp_features": {
|
||||||
|
"featured_snippet": true,
|
||||||
|
"people_also_ask": true,
|
||||||
|
"local_pack": true,
|
||||||
|
"knowledge_panel": false,
|
||||||
|
"video_carousel": false,
|
||||||
|
"ads_top": 3,
|
||||||
|
"ads_bottom": 2
|
||||||
|
},
|
||||||
|
"competitors": [
|
||||||
|
{
|
||||||
|
"position": 1,
|
||||||
|
"url": "https://example.com/page",
|
||||||
|
"domain": "example.com",
|
||||||
|
"title": "...",
|
||||||
|
"content_type": "service_page"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"opportunity_score": 72,
|
||||||
|
"intent_signals": "commercial",
|
||||||
|
"timestamp": "2025-01-01T00:00:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common SERP Features
|
||||||
|
|
||||||
|
| Feature | Impact | Opportunity |
|
||||||
|
|---------|--------|-------------|
|
||||||
|
| Featured Snippet | High visibility above organic | Optimize content format for snippet capture |
|
||||||
|
| People Also Ask | Related question visibility | Create FAQ content targeting PAA |
|
||||||
|
| Local Pack | Dominates local intent SERPs | Optimize Google Business Profile |
|
||||||
|
| Knowledge Panel | Reduces organic CTR | Focus on brand queries and schema |
|
||||||
|
| Video Carousel | Visual SERP real estate | Create video content for keyword |
|
||||||
|
| Shopping | Transactional intent signal | Product feed optimization |
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- SERP data may have a delay depending on data source (not real-time)
|
||||||
|
- Naver SERP HTML structure changes periodically
|
||||||
|
- Brand zone detection depends on HTML class patterns
|
||||||
|
- Cannot detect personalized SERP results
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
All audit reports MUST be saved to OurDigital SEO Audit Log:
|
||||||
|
- **Database ID**: `2c8581e5-8a1e-8035-880b-e38cefc2f3ef`
|
||||||
|
- **Properties**: Issue (title), Site (url), Category, Priority, Found Date, Audit ID
|
||||||
|
- **Language**: Korean with English technical terms
|
||||||
|
- **Audit ID Format**: SERP-YYYYMMDD-NNN
|
||||||
14
custom-skills/20-seo-serp-analysis/desktop/skill.yaml
Normal file
14
custom-skills/20-seo-serp-analysis/desktop/skill.yaml
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
# Skill metadata (extracted from SKILL.md frontmatter)
|
||||||
|
|
||||||
|
name: seo-serp-analysis
|
||||||
|
description: |
|
||||||
|
SERP analysis for Google and Naver. Triggers: SERP analysis, search results, featured snippet, SERP features, Naver SERP.
|
||||||
|
|
||||||
|
# Optional fields
|
||||||
|
allowed-tools:
|
||||||
|
- mcp__ahrefs__*
|
||||||
|
- mcp__notion__*
|
||||||
|
- WebSearch
|
||||||
|
- WebFetch
|
||||||
|
|
||||||
|
# triggers: [] # TODO: Extract from description
|
||||||
15
custom-skills/20-seo-serp-analysis/desktop/tools/ahrefs.md
Normal file
15
custom-skills/20-seo-serp-analysis/desktop/tools/ahrefs.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Ahrefs
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
15
custom-skills/20-seo-serp-analysis/desktop/tools/notion.md
Normal file
15
custom-skills/20-seo-serp-analysis/desktop/tools/notion.md
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
# Notion
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# WebSearch
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
144
custom-skills/21-seo-position-tracking/code/CLAUDE.md
Normal file
144
custom-skills/21-seo-position-tracking/code/CLAUDE.md
Normal file
@@ -0,0 +1,144 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Position tracking tool for monitoring keyword rankings. Monitors ranking positions, detects position changes with threshold alerts, calculates visibility scores weighted by search volume, compares against competitors, and segments by brand/non-brand keywords.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r scripts/requirements.txt
|
||||||
|
|
||||||
|
# Track positions for a project
|
||||||
|
python scripts/position_tracker.py --target https://example.com --json
|
||||||
|
|
||||||
|
# Generate ranking report
|
||||||
|
python scripts/ranking_reporter.py --target https://example.com --period 30 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
| Script | Purpose | Key Output |
|
||||||
|
|--------|---------|------------|
|
||||||
|
| `position_tracker.py` | Monitor keyword ranking positions and detect changes | Position data, change alerts, visibility scores |
|
||||||
|
| `ranking_reporter.py` | Generate ranking performance reports with trends | Trend analysis, segment reports, competitor comparison |
|
||||||
|
| `base_client.py` | Shared utilities | RateLimiter, ConfigManager, BaseAsyncClient |
|
||||||
|
|
||||||
|
## Position Tracker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Get current positions
|
||||||
|
python scripts/position_tracker.py --target https://example.com --json
|
||||||
|
|
||||||
|
# With change threshold alerts (flag positions that moved +-5 or more)
|
||||||
|
python scripts/position_tracker.py --target https://example.com --threshold 5 --json
|
||||||
|
|
||||||
|
# Filter by keyword segment
|
||||||
|
python scripts/position_tracker.py --target https://example.com --segment brand --json
|
||||||
|
|
||||||
|
# Compare with competitors
|
||||||
|
python scripts/position_tracker.py --target https://example.com --competitor https://comp1.com --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Current ranking position retrieval via our-seo-agent CLI or pre-fetched data
|
||||||
|
- Position change detection with configurable threshold alerts
|
||||||
|
- Visibility score calculation (weighted by search volume)
|
||||||
|
- Brand vs non-brand keyword segmentation
|
||||||
|
- Competitor rank comparison
|
||||||
|
- Keyword segment grouping (by intent, cluster, landing page)
|
||||||
|
|
||||||
|
## Ranking Reporter
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 30-day ranking report
|
||||||
|
python scripts/ranking_reporter.py --target https://example.com --period 30 --json
|
||||||
|
|
||||||
|
# Quarterly comparison
|
||||||
|
python scripts/ranking_reporter.py --target https://example.com --period 90 --json
|
||||||
|
|
||||||
|
# Export with competitor comparison
|
||||||
|
python scripts/ranking_reporter.py --target https://example.com --competitor https://comp1.com --period 30 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Period-over-period ranking trends (improved/declined/stable)
|
||||||
|
- Top movers (biggest position gains/losses)
|
||||||
|
- Visibility score trend over time
|
||||||
|
- Segment-level performance breakdown
|
||||||
|
- Competitor overlap and position comparison
|
||||||
|
- Average position by keyword group
|
||||||
|
|
||||||
|
## Data Sources
|
||||||
|
|
||||||
|
| Source | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `our-seo-agent` CLI | Primary data source (future); use `--input` for pre-fetched JSON |
|
||||||
|
| WebSearch / WebFetch | Supplementary live data |
|
||||||
|
| Notion MCP | Save audit report to database |
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"target": "https://example.com",
|
||||||
|
"total_keywords": 250,
|
||||||
|
"visibility_score": 68.5,
|
||||||
|
"positions": {
|
||||||
|
"top3": 15,
|
||||||
|
"top10": 48,
|
||||||
|
"top20": 92,
|
||||||
|
"top50": 180,
|
||||||
|
"top100": 230
|
||||||
|
},
|
||||||
|
"changes": {
|
||||||
|
"improved": 45,
|
||||||
|
"declined": 30,
|
||||||
|
"stable": 155,
|
||||||
|
"new": 12,
|
||||||
|
"lost": 8
|
||||||
|
},
|
||||||
|
"alerts": [
|
||||||
|
{
|
||||||
|
"keyword": "치과 임플란트 가격",
|
||||||
|
"old_position": 5,
|
||||||
|
"new_position": 15,
|
||||||
|
"change": -10,
|
||||||
|
"volume": 5400
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"segments": {
|
||||||
|
"brand": {"keywords": 30, "avg_position": 2.1},
|
||||||
|
"non_brand": {"keywords": 220, "avg_position": 24.5}
|
||||||
|
},
|
||||||
|
"timestamp": "2025-01-01T00:00:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
**IMPORTANT**: All audit reports MUST be saved to the OurDigital SEO Audit Log database.
|
||||||
|
|
||||||
|
### Database Configuration
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| Database ID | `2c8581e5-8a1e-8035-880b-e38cefc2f3ef` |
|
||||||
|
| URL | https://www.notion.so/dintelligence/2c8581e58a1e8035880be38cefc2f3ef |
|
||||||
|
|
||||||
|
### Required Properties
|
||||||
|
|
||||||
|
| Property | Type | Description |
|
||||||
|
|----------|------|-------------|
|
||||||
|
| Issue | Title | Report title (Korean + date) |
|
||||||
|
| Site | URL | Tracked website URL |
|
||||||
|
| Category | Select | Position Tracking |
|
||||||
|
| Priority | Select | Based on visibility trend |
|
||||||
|
| Found Date | Date | Tracking date (YYYY-MM-DD) |
|
||||||
|
| Audit ID | Rich Text | Format: RANK-YYYYMMDD-NNN |
|
||||||
|
|
||||||
|
### Language Guidelines
|
||||||
|
|
||||||
|
- Report content in Korean (한국어)
|
||||||
|
- Keep technical English terms as-is (e.g., Visibility Score, SERP, Rank Tracker)
|
||||||
|
- URLs and code remain unchanged
|
||||||
@@ -0,0 +1,207 @@
|
|||||||
|
"""
|
||||||
|
Base Client - Shared async client utilities
|
||||||
|
===========================================
|
||||||
|
Purpose: Rate-limited async operations for API clients
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from asyncio import Semaphore
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from tenacity import (
|
||||||
|
retry,
|
||||||
|
stop_after_attempt,
|
||||||
|
wait_exponential,
|
||||||
|
retry_if_exception_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Logging setup
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimiter:
|
||||||
|
"""Rate limiter using token bucket algorithm."""
|
||||||
|
|
||||||
|
def __init__(self, rate: float, per: float = 1.0):
|
||||||
|
"""
|
||||||
|
Initialize rate limiter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rate: Number of requests allowed
|
||||||
|
per: Time period in seconds (default: 1 second)
|
||||||
|
"""
|
||||||
|
self.rate = rate
|
||||||
|
self.per = per
|
||||||
|
self.tokens = rate
|
||||||
|
self.last_update = datetime.now()
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def acquire(self) -> None:
|
||||||
|
"""Acquire a token, waiting if necessary."""
|
||||||
|
async with self._lock:
|
||||||
|
now = datetime.now()
|
||||||
|
elapsed = (now - self.last_update).total_seconds()
|
||||||
|
self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
|
||||||
|
self.last_update = now
|
||||||
|
|
||||||
|
if self.tokens < 1:
|
||||||
|
wait_time = (1 - self.tokens) * (self.per / self.rate)
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
self.tokens = 0
|
||||||
|
else:
|
||||||
|
self.tokens -= 1
|
||||||
|
|
||||||
|
|
||||||
|
class BaseAsyncClient:
|
||||||
|
"""Base class for async API clients with rate limiting."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_concurrent: int = 5,
|
||||||
|
requests_per_second: float = 3.0,
|
||||||
|
logger: logging.Logger | None = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize base client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
max_concurrent: Maximum concurrent requests
|
||||||
|
requests_per_second: Rate limit
|
||||||
|
logger: Logger instance
|
||||||
|
"""
|
||||||
|
self.semaphore = Semaphore(max_concurrent)
|
||||||
|
self.rate_limiter = RateLimiter(requests_per_second)
|
||||||
|
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
||||||
|
self.stats = {
|
||||||
|
"requests": 0,
|
||||||
|
"success": 0,
|
||||||
|
"errors": 0,
|
||||||
|
"retries": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=1, min=2, max=10),
|
||||||
|
retry=retry_if_exception_type(Exception),
|
||||||
|
)
|
||||||
|
async def _rate_limited_request(
|
||||||
|
self,
|
||||||
|
coro: Callable[[], Any],
|
||||||
|
) -> Any:
|
||||||
|
"""Execute a request with rate limiting and retry."""
|
||||||
|
async with self.semaphore:
|
||||||
|
await self.rate_limiter.acquire()
|
||||||
|
self.stats["requests"] += 1
|
||||||
|
try:
|
||||||
|
result = await coro()
|
||||||
|
self.stats["success"] += 1
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
self.stats["errors"] += 1
|
||||||
|
self.logger.error(f"Request failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def batch_requests(
|
||||||
|
self,
|
||||||
|
requests: list[Callable[[], Any]],
|
||||||
|
desc: str = "Processing",
|
||||||
|
) -> list[Any]:
|
||||||
|
"""Execute multiple requests concurrently."""
|
||||||
|
try:
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
|
has_tqdm = True
|
||||||
|
except ImportError:
|
||||||
|
has_tqdm = False
|
||||||
|
|
||||||
|
async def execute(req: Callable) -> Any:
|
||||||
|
try:
|
||||||
|
return await self._rate_limited_request(req)
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
tasks = [execute(req) for req in requests]
|
||||||
|
|
||||||
|
if has_tqdm:
|
||||||
|
results = []
|
||||||
|
for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
|
||||||
|
result = await coro
|
||||||
|
results.append(result)
|
||||||
|
return results
|
||||||
|
else:
|
||||||
|
return await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
def print_stats(self) -> None:
|
||||||
|
"""Print request statistics."""
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
self.logger.info("Request Statistics:")
|
||||||
|
self.logger.info(f" Total Requests: {self.stats['requests']}")
|
||||||
|
self.logger.info(f" Successful: {self.stats['success']}")
|
||||||
|
self.logger.info(f" Errors: {self.stats['errors']}")
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigManager:
|
||||||
|
"""Manage API configuration and credentials."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def google_credentials_path(self) -> str | None:
|
||||||
|
"""Get Google service account credentials path."""
|
||||||
|
# Prefer SEO-specific credentials, fallback to general credentials
|
||||||
|
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
|
||||||
|
if os.path.exists(seo_creds):
|
||||||
|
return seo_creds
|
||||||
|
return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pagespeed_api_key(self) -> str | None:
|
||||||
|
"""Get PageSpeed Insights API key."""
|
||||||
|
return os.getenv("PAGESPEED_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_api_key(self) -> str | None:
|
||||||
|
"""Get Custom Search API key."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_engine_id(self) -> str | None:
|
||||||
|
"""Get Custom Search Engine ID."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def notion_token(self) -> str | None:
|
||||||
|
"""Get Notion API token."""
|
||||||
|
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
|
||||||
|
|
||||||
|
def validate_google_credentials(self) -> bool:
|
||||||
|
"""Validate Google credentials are configured."""
|
||||||
|
creds_path = self.google_credentials_path
|
||||||
|
if not creds_path:
|
||||||
|
return False
|
||||||
|
return os.path.exists(creds_path)
|
||||||
|
|
||||||
|
def get_required(self, key: str) -> str:
|
||||||
|
"""Get required environment variable or raise error."""
|
||||||
|
value = os.getenv(key)
|
||||||
|
if not value:
|
||||||
|
raise ValueError(f"Missing required environment variable: {key}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton config instance
|
||||||
|
config = ConfigManager()
|
||||||
@@ -0,0 +1,786 @@
|
|||||||
|
"""
|
||||||
|
Position Tracker - Keyword Ranking Monitor via Ahrefs Rank Tracker
|
||||||
|
==================================================================
|
||||||
|
Purpose: Monitor keyword positions, detect changes, calculate visibility scores
|
||||||
|
Python: 3.10+
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python position_tracker.py --target https://example.com --json
|
||||||
|
python position_tracker.py --target https://example.com --threshold 5 --json
|
||||||
|
python position_tracker.py --target https://example.com --segment brand --json
|
||||||
|
python position_tracker.py --target https://example.com --competitor https://comp1.com --json
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from base_client import BaseAsyncClient, config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CTR curve weights for visibility score (position 1-100)
|
||||||
|
# Based on industry-standard organic CTR curves
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
CTR_WEIGHTS: dict[int, float] = {
|
||||||
|
1: 0.300,
|
||||||
|
2: 0.150,
|
||||||
|
3: 0.100,
|
||||||
|
4: 0.070,
|
||||||
|
5: 0.050,
|
||||||
|
6: 0.038,
|
||||||
|
7: 0.030,
|
||||||
|
8: 0.025,
|
||||||
|
9: 0.020,
|
||||||
|
10: 0.018,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Positions 11-20 get diminishing CTR
|
||||||
|
for _p in range(11, 21):
|
||||||
|
CTR_WEIGHTS[_p] = round(0.015 - (_p - 11) * 0.001, 4)
|
||||||
|
|
||||||
|
# Positions 21-50 get minimal CTR
|
||||||
|
for _p in range(21, 51):
|
||||||
|
CTR_WEIGHTS[_p] = round(max(0.005 - (_p - 21) * 0.0001, 0.001), 4)
|
||||||
|
|
||||||
|
# Positions 51-100 get near-zero CTR
|
||||||
|
for _p in range(51, 101):
|
||||||
|
CTR_WEIGHTS[_p] = 0.0005
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data classes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
@dataclass
|
||||||
|
class KeywordPosition:
|
||||||
|
"""Single keyword ranking position."""
|
||||||
|
keyword: str
|
||||||
|
position: int
|
||||||
|
previous_position: Optional[int] = None
|
||||||
|
change: int = 0
|
||||||
|
volume: int = 0
|
||||||
|
url: str = ""
|
||||||
|
intent: str = "informational"
|
||||||
|
is_brand: bool = False
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if self.previous_position is not None:
|
||||||
|
self.change = self.previous_position - self.position
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class VisibilityScore:
|
||||||
|
"""Weighted visibility score based on CTR curve."""
|
||||||
|
score: float = 0.0
|
||||||
|
top3: int = 0
|
||||||
|
top10: int = 0
|
||||||
|
top20: int = 0
|
||||||
|
top50: int = 0
|
||||||
|
top100: int = 0
|
||||||
|
total_keywords: int = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def distribution(self) -> dict:
|
||||||
|
return {
|
||||||
|
"top3": self.top3,
|
||||||
|
"top10": self.top10,
|
||||||
|
"top20": self.top20,
|
||||||
|
"top50": self.top50,
|
||||||
|
"top100": self.top100,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class PositionAlert:
|
||||||
|
"""Alert for significant position change."""
|
||||||
|
keyword: str
|
||||||
|
old_position: int
|
||||||
|
new_position: int
|
||||||
|
change: int
|
||||||
|
volume: int = 0
|
||||||
|
severity: str = "medium"
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
abs_change = abs(self.change)
|
||||||
|
if abs_change >= 20:
|
||||||
|
self.severity = "critical"
|
||||||
|
elif abs_change >= 10:
|
||||||
|
self.severity = "high"
|
||||||
|
elif abs_change >= 5:
|
||||||
|
self.severity = "medium"
|
||||||
|
else:
|
||||||
|
self.severity = "low"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CompetitorComparison:
|
||||||
|
"""Competitor ranking comparison result."""
|
||||||
|
competitor: str
|
||||||
|
overlap_keywords: int = 0
|
||||||
|
competitor_better: int = 0
|
||||||
|
target_better: int = 0
|
||||||
|
avg_position_gap: float = 0.0
|
||||||
|
top_gaps: list = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SegmentData:
|
||||||
|
"""Keyword segment aggregation."""
|
||||||
|
name: str
|
||||||
|
keywords: int = 0
|
||||||
|
avg_position: float = 0.0
|
||||||
|
visibility: float = 0.0
|
||||||
|
improved: int = 0
|
||||||
|
declined: int = 0
|
||||||
|
stable: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TrackingResult:
|
||||||
|
"""Complete position tracking result."""
|
||||||
|
target: str
|
||||||
|
total_keywords: int = 0
|
||||||
|
visibility_score: float = 0.0
|
||||||
|
visibility: Optional[VisibilityScore] = None
|
||||||
|
positions: list[KeywordPosition] = field(default_factory=list)
|
||||||
|
changes: dict = field(default_factory=lambda: {
|
||||||
|
"improved": 0, "declined": 0, "stable": 0, "new": 0, "lost": 0,
|
||||||
|
})
|
||||||
|
alerts: list[PositionAlert] = field(default_factory=list)
|
||||||
|
segments: dict[str, SegmentData] = field(default_factory=dict)
|
||||||
|
competitors: list[CompetitorComparison] = field(default_factory=list)
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if not self.timestamp:
|
||||||
|
self.timestamp = datetime.now().isoformat()
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
"""Convert to JSON-serializable dictionary."""
|
||||||
|
result = {
|
||||||
|
"target": self.target,
|
||||||
|
"total_keywords": self.total_keywords,
|
||||||
|
"visibility_score": round(self.visibility_score, 2),
|
||||||
|
"positions": self.visibility.distribution if self.visibility else {},
|
||||||
|
"changes": self.changes,
|
||||||
|
"alerts": [asdict(a) for a in self.alerts],
|
||||||
|
"segments": {
|
||||||
|
k: asdict(v) for k, v in self.segments.items()
|
||||||
|
},
|
||||||
|
"competitors": [asdict(c) for c in self.competitors],
|
||||||
|
"keyword_details": [asdict(p) for p in self.positions],
|
||||||
|
"timestamp": self.timestamp,
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Position Tracker
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
class PositionTracker(BaseAsyncClient):
|
||||||
|
"""Track keyword ranking positions via Ahrefs Rank Tracker."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
max_concurrent=5,
|
||||||
|
requests_per_second=2.0,
|
||||||
|
logger=logger,
|
||||||
|
)
|
||||||
|
self.brand_terms: list[str] = []
|
||||||
|
|
||||||
|
def _extract_domain_brand(self, target: str) -> list[str]:
|
||||||
|
"""Extract brand terms from the target domain name."""
|
||||||
|
parsed = urlparse(target)
|
||||||
|
hostname = parsed.hostname or target
|
||||||
|
# Remove TLD and www prefix
|
||||||
|
parts = hostname.replace("www.", "").split(".")
|
||||||
|
brand_parts = []
|
||||||
|
for part in parts:
|
||||||
|
if part not in ("com", "co", "kr", "net", "org", "io", "ai", "www"):
|
||||||
|
brand_parts.append(part.lower())
|
||||||
|
# Also split camelCase and hyphenated forms
|
||||||
|
if "-" in part:
|
||||||
|
brand_parts.extend(part.lower().split("-"))
|
||||||
|
return list(set(brand_parts))
|
||||||
|
|
||||||
|
async def get_project_keywords(self, target: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Fetch tracked keywords from Ahrefs management-project-keywords.
|
||||||
|
|
||||||
|
Uses Ahrefs MCP tool: management-project-keywords
|
||||||
|
Returns list of keyword dicts with keyword, volume, intent info.
|
||||||
|
"""
|
||||||
|
logger.info(f"Fetching project keywords for: {target}")
|
||||||
|
|
||||||
|
# Step 1: Get project list to find matching project
|
||||||
|
projects = await self._call_ahrefs_projects(target)
|
||||||
|
if not projects:
|
||||||
|
logger.warning(f"No Ahrefs project found for {target}. Using rank-tracker-overview directly.")
|
||||||
|
return []
|
||||||
|
|
||||||
|
project_id = projects[0].get("id", "")
|
||||||
|
|
||||||
|
# Step 2: Fetch keywords for the project
|
||||||
|
keywords_data = await self._call_ahrefs_project_keywords(project_id)
|
||||||
|
return keywords_data
|
||||||
|
|
||||||
|
async def _call_ahrefs_projects(self, target: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Call Ahrefs management-projects MCP tool.
|
||||||
|
In production, this calls the MCP tool. For standalone, reads from config/cache.
|
||||||
|
"""
|
||||||
|
# Simulated MCP call structure - in production this calls:
|
||||||
|
# mcp__ahrefs__management-projects
|
||||||
|
logger.info("Calling Ahrefs management-projects...")
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
result = subprocess.run(
|
||||||
|
["mcp-cli", "call", "ahrefs/management-projects", json.dumps({})],
|
||||||
|
capture_output=True, text=True, timeout=30,
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
return json.loads(result.stdout).get("projects", [])
|
||||||
|
except (FileNotFoundError, subprocess.TimeoutExpired, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
# Return empty if MCP not available - caller handles gracefully
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def _call_ahrefs_project_keywords(self, project_id: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Call Ahrefs management-project-keywords MCP tool.
|
||||||
|
"""
|
||||||
|
logger.info(f"Calling Ahrefs management-project-keywords for project: {project_id}")
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
result = subprocess.run(
|
||||||
|
["mcp-cli", "call", "ahrefs/management-project-keywords",
|
||||||
|
json.dumps({"project_id": project_id})],
|
||||||
|
capture_output=True, text=True, timeout=30,
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
return json.loads(result.stdout).get("keywords", [])
|
||||||
|
except (FileNotFoundError, subprocess.TimeoutExpired, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def get_current_positions(self, target: str) -> list[KeywordPosition]:
|
||||||
|
"""
|
||||||
|
Fetch current keyword positions via Ahrefs rank-tracker-overview.
|
||||||
|
|
||||||
|
Returns list of KeywordPosition objects with current and previous positions.
|
||||||
|
"""
|
||||||
|
logger.info(f"Fetching current positions for: {target}")
|
||||||
|
self.brand_terms = self._extract_domain_brand(target)
|
||||||
|
|
||||||
|
raw_data = await self._call_rank_tracker_overview(target)
|
||||||
|
positions: list[KeywordPosition] = []
|
||||||
|
|
||||||
|
for item in raw_data:
|
||||||
|
keyword = item.get("keyword", "")
|
||||||
|
current_pos = item.get("position", 0)
|
||||||
|
prev_pos = item.get("previous_position")
|
||||||
|
volume = item.get("volume", 0)
|
||||||
|
url = item.get("url", "")
|
||||||
|
intent = item.get("intent", "informational")
|
||||||
|
|
||||||
|
# Determine if brand keyword
|
||||||
|
is_brand = self._is_brand_keyword(keyword)
|
||||||
|
|
||||||
|
kp = KeywordPosition(
|
||||||
|
keyword=keyword,
|
||||||
|
position=current_pos,
|
||||||
|
previous_position=prev_pos,
|
||||||
|
volume=volume,
|
||||||
|
url=url,
|
||||||
|
intent=intent,
|
||||||
|
is_brand=is_brand,
|
||||||
|
)
|
||||||
|
positions.append(kp)
|
||||||
|
|
||||||
|
logger.info(f"Retrieved {len(positions)} keyword positions")
|
||||||
|
return positions
|
||||||
|
|
||||||
|
async def _call_rank_tracker_overview(self, target: str) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Call Ahrefs rank-tracker-overview MCP tool.
|
||||||
|
"""
|
||||||
|
logger.info(f"Calling Ahrefs rank-tracker-overview for: {target}")
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
result = subprocess.run(
|
||||||
|
["mcp-cli", "call", "ahrefs/rank-tracker-overview",
|
||||||
|
json.dumps({"target": target})],
|
||||||
|
capture_output=True, text=True, timeout=60,
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
return data.get("keywords", data.get("results", []))
|
||||||
|
except (FileNotFoundError, subprocess.TimeoutExpired, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
return []
|
||||||
|
|
||||||
|
def _is_brand_keyword(self, keyword: str) -> bool:
|
||||||
|
"""Check if a keyword is brand-related based on domain name."""
|
||||||
|
keyword_lower = keyword.lower()
|
||||||
|
for term in self.brand_terms:
|
||||||
|
if term in keyword_lower:
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
def detect_changes(
|
||||||
|
self,
|
||||||
|
positions: list[KeywordPosition],
|
||||||
|
threshold: int = 3,
|
||||||
|
) -> tuple[dict, list[PositionAlert]]:
|
||||||
|
"""
|
||||||
|
Detect significant position changes and generate alerts.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
positions: List of current keyword positions with previous data
|
||||||
|
threshold: Minimum position change to trigger an alert
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Tuple of (change_summary_dict, list_of_alerts)
|
||||||
|
"""
|
||||||
|
changes = {
|
||||||
|
"improved": 0,
|
||||||
|
"declined": 0,
|
||||||
|
"stable": 0,
|
||||||
|
"new": 0,
|
||||||
|
"lost": 0,
|
||||||
|
}
|
||||||
|
alerts: list[PositionAlert] = []
|
||||||
|
|
||||||
|
for kp in positions:
|
||||||
|
if kp.previous_position is None:
|
||||||
|
changes["new"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
if kp.position == 0 and kp.previous_position > 0:
|
||||||
|
changes["lost"] += 1
|
||||||
|
alert = PositionAlert(
|
||||||
|
keyword=kp.keyword,
|
||||||
|
old_position=kp.previous_position,
|
||||||
|
new_position=0,
|
||||||
|
change=-kp.previous_position,
|
||||||
|
volume=kp.volume,
|
||||||
|
)
|
||||||
|
alerts.append(alert)
|
||||||
|
continue
|
||||||
|
|
||||||
|
change = kp.change # positive = improved, negative = declined
|
||||||
|
if change > 0:
|
||||||
|
changes["improved"] += 1
|
||||||
|
elif change < 0:
|
||||||
|
changes["declined"] += 1
|
||||||
|
else:
|
||||||
|
changes["stable"] += 1
|
||||||
|
|
||||||
|
# Generate alert if change exceeds threshold
|
||||||
|
if abs(change) >= threshold:
|
||||||
|
alert = PositionAlert(
|
||||||
|
keyword=kp.keyword,
|
||||||
|
old_position=kp.previous_position,
|
||||||
|
new_position=kp.position,
|
||||||
|
change=change,
|
||||||
|
volume=kp.volume,
|
||||||
|
)
|
||||||
|
alerts.append(alert)
|
||||||
|
|
||||||
|
# Sort alerts by severity (critical first) then by volume (high first)
|
||||||
|
severity_order = {"critical": 0, "high": 1, "medium": 2, "low": 3}
|
||||||
|
alerts.sort(key=lambda a: (severity_order.get(a.severity, 4), -a.volume))
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Changes detected - improved: {changes['improved']}, "
|
||||||
|
f"declined: {changes['declined']}, stable: {changes['stable']}, "
|
||||||
|
f"new: {changes['new']}, lost: {changes['lost']}"
|
||||||
|
)
|
||||||
|
logger.info(f"Alerts generated: {len(alerts)} (threshold: {threshold})")
|
||||||
|
|
||||||
|
return changes, alerts
|
||||||
|
|
||||||
|
def calculate_visibility(self, positions: list[KeywordPosition]) -> VisibilityScore:
|
||||||
|
"""
|
||||||
|
Calculate weighted visibility score based on CTR curve.
|
||||||
|
|
||||||
|
Visibility = sum(keyword_volume * ctr_weight_for_position) / sum(keyword_volume)
|
||||||
|
Score normalized to 0-100 scale.
|
||||||
|
"""
|
||||||
|
vis = VisibilityScore()
|
||||||
|
total_weighted = 0.0
|
||||||
|
total_volume = 0
|
||||||
|
|
||||||
|
for kp in positions:
|
||||||
|
if kp.position <= 0 or kp.position > 100:
|
||||||
|
continue
|
||||||
|
|
||||||
|
vis.total_keywords += 1
|
||||||
|
volume = max(kp.volume, 1) # Avoid zero volume
|
||||||
|
total_volume += volume
|
||||||
|
|
||||||
|
# Position bucket counting
|
||||||
|
if kp.position <= 3:
|
||||||
|
vis.top3 += 1
|
||||||
|
if kp.position <= 10:
|
||||||
|
vis.top10 += 1
|
||||||
|
if kp.position <= 20:
|
||||||
|
vis.top20 += 1
|
||||||
|
if kp.position <= 50:
|
||||||
|
vis.top50 += 1
|
||||||
|
if kp.position <= 100:
|
||||||
|
vis.top100 += 1
|
||||||
|
|
||||||
|
# Weighted visibility
|
||||||
|
ctr = CTR_WEIGHTS.get(kp.position, 0.0005)
|
||||||
|
total_weighted += volume * ctr
|
||||||
|
|
||||||
|
if total_volume > 0:
|
||||||
|
# Normalize: max possible is if all keywords were position 1
|
||||||
|
max_possible = total_volume * CTR_WEIGHTS[1]
|
||||||
|
vis.score = (total_weighted / max_possible) * 100.0
|
||||||
|
else:
|
||||||
|
vis.score = 0.0
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Visibility score: {vis.score:.2f} | "
|
||||||
|
f"Top3: {vis.top3}, Top10: {vis.top10}, Top20: {vis.top20}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return vis
|
||||||
|
|
||||||
|
def segment_keywords(
|
||||||
|
self,
|
||||||
|
positions: list[KeywordPosition],
|
||||||
|
filter_segment: Optional[str] = None,
|
||||||
|
) -> dict[str, SegmentData]:
|
||||||
|
"""
|
||||||
|
Segment keywords into brand/non-brand and by intent type.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
positions: List of keyword positions
|
||||||
|
filter_segment: Optional filter - 'brand', 'non_brand', or intent type
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Dictionary of segment name to SegmentData
|
||||||
|
"""
|
||||||
|
segments: dict[str, list[KeywordPosition]] = {
|
||||||
|
"brand": [],
|
||||||
|
"non_brand": [],
|
||||||
|
}
|
||||||
|
intent_segments: dict[str, list[KeywordPosition]] = {}
|
||||||
|
|
||||||
|
for kp in positions:
|
||||||
|
# Brand segmentation
|
||||||
|
if kp.is_brand:
|
||||||
|
segments["brand"].append(kp)
|
||||||
|
else:
|
||||||
|
segments["non_brand"].append(kp)
|
||||||
|
|
||||||
|
# Intent segmentation
|
||||||
|
intent_key = kp.intent.lower() if kp.intent else "informational"
|
||||||
|
if intent_key not in intent_segments:
|
||||||
|
intent_segments[intent_key] = []
|
||||||
|
intent_segments[intent_key].append(kp)
|
||||||
|
|
||||||
|
# Merge intent segments into main segments
|
||||||
|
for intent_key, kps in intent_segments.items():
|
||||||
|
segments[f"intent_{intent_key}"] = kps
|
||||||
|
|
||||||
|
# Calculate segment stats
|
||||||
|
result: dict[str, SegmentData] = {}
|
||||||
|
for seg_name, kps in segments.items():
|
||||||
|
if filter_segment and seg_name != filter_segment:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not kps:
|
||||||
|
continue
|
||||||
|
|
||||||
|
active_positions = [kp for kp in kps if kp.position > 0]
|
||||||
|
avg_pos = (
|
||||||
|
sum(kp.position for kp in active_positions) / len(active_positions)
|
||||||
|
if active_positions else 0.0
|
||||||
|
)
|
||||||
|
|
||||||
|
vis = self.calculate_visibility(kps)
|
||||||
|
|
||||||
|
improved = sum(1 for kp in kps if kp.change > 0)
|
||||||
|
declined = sum(1 for kp in kps if kp.change < 0)
|
||||||
|
stable = sum(1 for kp in kps if kp.change == 0 and kp.previous_position is not None)
|
||||||
|
|
||||||
|
result[seg_name] = SegmentData(
|
||||||
|
name=seg_name,
|
||||||
|
keywords=len(kps),
|
||||||
|
avg_position=round(avg_pos, 1),
|
||||||
|
visibility=round(vis.score, 2),
|
||||||
|
improved=improved,
|
||||||
|
declined=declined,
|
||||||
|
stable=stable,
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def compare_competitors(
|
||||||
|
self,
|
||||||
|
target: str,
|
||||||
|
competitors: list[str],
|
||||||
|
) -> list[CompetitorComparison]:
|
||||||
|
"""
|
||||||
|
Compare ranking positions against competitors.
|
||||||
|
|
||||||
|
Uses Ahrefs rank-tracker-competitors-overview MCP tool.
|
||||||
|
"""
|
||||||
|
comparisons: list[CompetitorComparison] = []
|
||||||
|
|
||||||
|
for competitor in competitors:
|
||||||
|
logger.info(f"Comparing with competitor: {competitor}")
|
||||||
|
comp_data = await self._call_competitors_overview(target, competitor)
|
||||||
|
|
||||||
|
comparison = CompetitorComparison(competitor=competitor)
|
||||||
|
|
||||||
|
if comp_data:
|
||||||
|
comparison.overlap_keywords = comp_data.get("overlap_keywords", 0)
|
||||||
|
comparison.competitor_better = comp_data.get("competitor_better", 0)
|
||||||
|
comparison.target_better = comp_data.get("target_better", 0)
|
||||||
|
comparison.avg_position_gap = comp_data.get("avg_position_gap", 0.0)
|
||||||
|
|
||||||
|
# Extract top gaps (keywords where competitor outranks us most)
|
||||||
|
top_gaps = comp_data.get("top_gaps", [])
|
||||||
|
comparison.top_gaps = top_gaps[:10]
|
||||||
|
|
||||||
|
comparisons.append(comparison)
|
||||||
|
|
||||||
|
return comparisons
|
||||||
|
|
||||||
|
async def _call_competitors_overview(self, target: str, competitor: str) -> dict:
|
||||||
|
"""
|
||||||
|
Call Ahrefs rank-tracker-competitors-overview MCP tool.
|
||||||
|
"""
|
||||||
|
logger.info(f"Calling Ahrefs rank-tracker-competitors-overview...")
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
result = subprocess.run(
|
||||||
|
["mcp-cli", "call", "ahrefs/rank-tracker-competitors-overview",
|
||||||
|
json.dumps({"target": target, "competitor": competitor})],
|
||||||
|
capture_output=True, text=True, timeout=60,
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
return json.loads(result.stdout)
|
||||||
|
except (FileNotFoundError, subprocess.TimeoutExpired, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
return {}
|
||||||
|
|
||||||
|
async def analyze(
|
||||||
|
self,
|
||||||
|
target: str,
|
||||||
|
threshold: int = 3,
|
||||||
|
competitors: Optional[list[str]] = None,
|
||||||
|
segment_filter: Optional[str] = None,
|
||||||
|
) -> TrackingResult:
|
||||||
|
"""
|
||||||
|
Orchestrate full position tracking analysis.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target: Target website URL
|
||||||
|
threshold: Position change threshold for alerts
|
||||||
|
competitors: List of competitor URLs to compare
|
||||||
|
segment_filter: Optional segment filter (brand, non_brand, intent_*)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete TrackingResult with all analysis data
|
||||||
|
"""
|
||||||
|
logger.info(f"Starting position tracking analysis for: {target}")
|
||||||
|
logger.info(f"Threshold: {threshold}, Competitors: {competitors or 'none'}")
|
||||||
|
|
||||||
|
result = TrackingResult(target=target)
|
||||||
|
|
||||||
|
# Step 1: Fetch current positions
|
||||||
|
positions = await self.get_current_positions(target)
|
||||||
|
|
||||||
|
if not positions:
|
||||||
|
logger.warning("No position data retrieved. Check Ahrefs project configuration.")
|
||||||
|
return result
|
||||||
|
|
||||||
|
result.positions = positions
|
||||||
|
result.total_keywords = len(positions)
|
||||||
|
|
||||||
|
# Step 2: Detect changes and generate alerts
|
||||||
|
changes, alerts = self.detect_changes(positions, threshold)
|
||||||
|
result.changes = changes
|
||||||
|
result.alerts = alerts
|
||||||
|
|
||||||
|
# Step 3: Calculate visibility score
|
||||||
|
visibility = self.calculate_visibility(positions)
|
||||||
|
result.visibility = visibility
|
||||||
|
result.visibility_score = visibility.score
|
||||||
|
|
||||||
|
# Step 4: Segment keywords
|
||||||
|
segments = self.segment_keywords(positions, segment_filter)
|
||||||
|
result.segments = segments
|
||||||
|
|
||||||
|
# Step 5: Compare with competitors (if provided)
|
||||||
|
if competitors:
|
||||||
|
comp_results = await self.compare_competitors(target, competitors)
|
||||||
|
result.competitors = comp_results
|
||||||
|
|
||||||
|
logger.info(f"Analysis complete. Total keywords: {result.total_keywords}")
|
||||||
|
logger.info(f"Visibility score: {result.visibility_score:.2f}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Output formatters
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def format_text_report(result: TrackingResult) -> str:
|
||||||
|
"""Format tracking result as human-readable text report."""
|
||||||
|
lines = []
|
||||||
|
lines.append("=" * 60)
|
||||||
|
lines.append(f"Position Tracking Report: {result.target}")
|
||||||
|
lines.append(f"Timestamp: {result.timestamp}")
|
||||||
|
lines.append("=" * 60)
|
||||||
|
|
||||||
|
# Visibility overview
|
||||||
|
lines.append(f"\nVisibility Score: {result.visibility_score:.2f}/100")
|
||||||
|
lines.append(f"Total Keywords Tracked: {result.total_keywords}")
|
||||||
|
|
||||||
|
if result.visibility:
|
||||||
|
vis = result.visibility
|
||||||
|
lines.append(f"\nPosition Distribution:")
|
||||||
|
lines.append(f" Top 3: {vis.top3}")
|
||||||
|
lines.append(f" Top 10: {vis.top10}")
|
||||||
|
lines.append(f" Top 20: {vis.top20}")
|
||||||
|
lines.append(f" Top 50: {vis.top50}")
|
||||||
|
lines.append(f" Top 100: {vis.top100}")
|
||||||
|
|
||||||
|
# Changes summary
|
||||||
|
ch = result.changes
|
||||||
|
lines.append(f"\nPosition Changes:")
|
||||||
|
lines.append(f" Improved: {ch.get('improved', 0)}")
|
||||||
|
lines.append(f" Declined: {ch.get('declined', 0)}")
|
||||||
|
lines.append(f" Stable: {ch.get('stable', 0)}")
|
||||||
|
lines.append(f" New: {ch.get('new', 0)}")
|
||||||
|
lines.append(f" Lost: {ch.get('lost', 0)}")
|
||||||
|
|
||||||
|
# Alerts
|
||||||
|
if result.alerts:
|
||||||
|
lines.append(f"\nAlerts ({len(result.alerts)}):")
|
||||||
|
lines.append("-" * 60)
|
||||||
|
for alert in result.alerts[:20]:
|
||||||
|
direction = "UP" if alert.change > 0 else "DOWN"
|
||||||
|
lines.append(
|
||||||
|
f" [{alert.severity.upper()}] {alert.keyword}: "
|
||||||
|
f"{alert.old_position} -> {alert.new_position} "
|
||||||
|
f"({direction} {abs(alert.change)}) | Vol: {alert.volume}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Segments
|
||||||
|
if result.segments:
|
||||||
|
lines.append(f"\nSegments:")
|
||||||
|
lines.append("-" * 60)
|
||||||
|
for name, seg in result.segments.items():
|
||||||
|
lines.append(
|
||||||
|
f" {name}: {seg.keywords} keywords, "
|
||||||
|
f"avg pos {seg.avg_position}, "
|
||||||
|
f"vis {seg.visibility}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Competitors
|
||||||
|
if result.competitors:
|
||||||
|
lines.append(f"\nCompetitor Comparison:")
|
||||||
|
lines.append("-" * 60)
|
||||||
|
for comp in result.competitors:
|
||||||
|
lines.append(f" vs {comp.competitor}:")
|
||||||
|
lines.append(f" Overlap: {comp.overlap_keywords} keywords")
|
||||||
|
lines.append(f" We win: {comp.target_better}")
|
||||||
|
lines.append(f" They win: {comp.competitor_better}")
|
||||||
|
lines.append(f" Avg gap: {comp.avg_position_gap:.1f}")
|
||||||
|
|
||||||
|
lines.append("\n" + "=" * 60)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Position Tracker - Monitor keyword rankings via Ahrefs Rank Tracker",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--target",
|
||||||
|
required=True,
|
||||||
|
help="Target website URL (e.g., https://example.com)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--threshold",
|
||||||
|
type=int,
|
||||||
|
default=3,
|
||||||
|
help="Position change threshold for alerts (default: 3)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--segment",
|
||||||
|
choices=["brand", "non_brand", "intent_informational",
|
||||||
|
"intent_commercial", "intent_transactional", "intent_navigational"],
|
||||||
|
default=None,
|
||||||
|
help="Filter results by keyword segment",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--competitor",
|
||||||
|
action="append",
|
||||||
|
dest="competitors",
|
||||||
|
default=[],
|
||||||
|
help="Competitor URL to compare (repeatable)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json",
|
||||||
|
action="store_true",
|
||||||
|
dest="json_output",
|
||||||
|
help="Output in JSON format",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Save output to file path",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
tracker = PositionTracker()
|
||||||
|
|
||||||
|
result = await tracker.analyze(
|
||||||
|
target=args.target,
|
||||||
|
threshold=args.threshold,
|
||||||
|
competitors=args.competitors,
|
||||||
|
segment_filter=args.segment,
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.json_output:
|
||||||
|
output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
|
||||||
|
else:
|
||||||
|
output = format_text_report(result)
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(output)
|
||||||
|
logger.info(f"Output saved to: {args.output}")
|
||||||
|
else:
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
tracker.print_stats()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -0,0 +1,728 @@
|
|||||||
|
"""
|
||||||
|
Ranking Reporter - Ranking Performance Reports with Trends
|
||||||
|
==========================================================
|
||||||
|
Purpose: Generate ranking reports with trend analysis, top movers, and competitor comparison
|
||||||
|
Python: 3.10+
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
python ranking_reporter.py --target https://example.com --period 30 --json
|
||||||
|
python ranking_reporter.py --target https://example.com --period 90 --json
|
||||||
|
python ranking_reporter.py --target https://example.com --competitor https://comp1.com --period 30 --json
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Optional
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
from base_client import BaseAsyncClient, config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# CTR weights for impact scoring (same as position_tracker)
|
||||||
|
CTR_WEIGHTS: dict[int, float] = {
|
||||||
|
1: 0.300, 2: 0.150, 3: 0.100, 4: 0.070, 5: 0.050,
|
||||||
|
6: 0.038, 7: 0.030, 8: 0.025, 9: 0.020, 10: 0.018,
|
||||||
|
}
|
||||||
|
for _p in range(11, 21):
|
||||||
|
CTR_WEIGHTS[_p] = round(0.015 - (_p - 11) * 0.001, 4)
|
||||||
|
for _p in range(21, 51):
|
||||||
|
CTR_WEIGHTS[_p] = round(max(0.005 - (_p - 21) * 0.0001, 0.001), 4)
|
||||||
|
for _p in range(51, 101):
|
||||||
|
CTR_WEIGHTS[_p] = 0.0005
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data classes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
@dataclass
|
||||||
|
class PositionSnapshot:
|
||||||
|
"""A single position measurement at a point in time."""
|
||||||
|
date: str
|
||||||
|
position: int
|
||||||
|
volume: int = 0
|
||||||
|
url: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RankingTrend:
|
||||||
|
"""Keyword ranking trend over time."""
|
||||||
|
keyword: str
|
||||||
|
positions_over_time: list[PositionSnapshot] = field(default_factory=list)
|
||||||
|
trend_direction: str = "stable" # improved, declined, stable, new, lost
|
||||||
|
avg_position: float = 0.0
|
||||||
|
current_position: int = 0
|
||||||
|
start_position: int = 0
|
||||||
|
total_change: int = 0
|
||||||
|
volume: int = 0
|
||||||
|
intent: str = "informational"
|
||||||
|
is_brand: bool = False
|
||||||
|
|
||||||
|
def compute_trend(self):
|
||||||
|
"""Compute trend direction and average from position history."""
|
||||||
|
if not self.positions_over_time:
|
||||||
|
self.trend_direction = "stable"
|
||||||
|
return
|
||||||
|
|
||||||
|
positions = [s.position for s in self.positions_over_time if s.position > 0]
|
||||||
|
if not positions:
|
||||||
|
self.trend_direction = "lost"
|
||||||
|
return
|
||||||
|
|
||||||
|
self.avg_position = sum(positions) / len(positions)
|
||||||
|
self.current_position = positions[-1]
|
||||||
|
self.start_position = positions[0]
|
||||||
|
self.total_change = self.start_position - self.current_position
|
||||||
|
|
||||||
|
# Determine trend using linear regression direction
|
||||||
|
if len(positions) >= 2:
|
||||||
|
n = len(positions)
|
||||||
|
x_mean = (n - 1) / 2.0
|
||||||
|
y_mean = sum(positions) / n
|
||||||
|
numerator = sum((i - x_mean) * (p - y_mean) for i, p in enumerate(positions))
|
||||||
|
denominator = sum((i - x_mean) ** 2 for i in range(n))
|
||||||
|
|
||||||
|
if denominator > 0:
|
||||||
|
slope = numerator / denominator
|
||||||
|
# Negative slope means position number decreasing = improving
|
||||||
|
if slope < -0.5:
|
||||||
|
self.trend_direction = "improved"
|
||||||
|
elif slope > 0.5:
|
||||||
|
self.trend_direction = "declined"
|
||||||
|
else:
|
||||||
|
self.trend_direction = "stable"
|
||||||
|
else:
|
||||||
|
self.trend_direction = "stable"
|
||||||
|
|
||||||
|
if self.volume == 0 and self.positions_over_time:
|
||||||
|
self.volume = self.positions_over_time[-1].volume
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TopMover:
|
||||||
|
"""Keyword with significant position change."""
|
||||||
|
keyword: str
|
||||||
|
position_change: int
|
||||||
|
current_position: int = 0
|
||||||
|
previous_position: int = 0
|
||||||
|
volume: int = 0
|
||||||
|
impact_score: float = 0.0
|
||||||
|
direction: str = "improved"
|
||||||
|
|
||||||
|
def calculate_impact(self):
|
||||||
|
"""Calculate impact score: volume * CTR delta."""
|
||||||
|
old_ctr = CTR_WEIGHTS.get(self.previous_position, 0.0005) if self.previous_position > 0 else 0.0
|
||||||
|
new_ctr = CTR_WEIGHTS.get(self.current_position, 0.0005) if self.current_position > 0 else 0.0
|
||||||
|
ctr_delta = abs(new_ctr - old_ctr)
|
||||||
|
self.impact_score = round(self.volume * ctr_delta, 2)
|
||||||
|
self.direction = "improved" if self.position_change > 0 else "declined"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SegmentReport:
|
||||||
|
"""Performance breakdown for a keyword segment."""
|
||||||
|
segment_name: str
|
||||||
|
total_keywords: int = 0
|
||||||
|
avg_position: float = 0.0
|
||||||
|
avg_position_change: float = 0.0
|
||||||
|
visibility_score: float = 0.0
|
||||||
|
improved_count: int = 0
|
||||||
|
declined_count: int = 0
|
||||||
|
stable_count: int = 0
|
||||||
|
top_gainers: list[TopMover] = field(default_factory=list)
|
||||||
|
top_losers: list[TopMover] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CompetitorReport:
|
||||||
|
"""Competitor comparison for a reporting period."""
|
||||||
|
competitor: str
|
||||||
|
our_visibility: float = 0.0
|
||||||
|
their_visibility: float = 0.0
|
||||||
|
overlap_keywords: int = 0
|
||||||
|
keywords_we_lead: int = 0
|
||||||
|
keywords_they_lead: int = 0
|
||||||
|
notable_gaps: list[dict] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class RankingReport:
|
||||||
|
"""Complete ranking performance report."""
|
||||||
|
target: str
|
||||||
|
period_days: int = 30
|
||||||
|
period_start: str = ""
|
||||||
|
period_end: str = ""
|
||||||
|
total_keywords: int = 0
|
||||||
|
current_visibility: float = 0.0
|
||||||
|
previous_visibility: float = 0.0
|
||||||
|
visibility_change: float = 0.0
|
||||||
|
trend_summary: dict = field(default_factory=lambda: {
|
||||||
|
"improved": 0, "declined": 0, "stable": 0, "new": 0, "lost": 0,
|
||||||
|
})
|
||||||
|
top_gainers: list[TopMover] = field(default_factory=list)
|
||||||
|
top_losers: list[TopMover] = field(default_factory=list)
|
||||||
|
segments: list[SegmentReport] = field(default_factory=list)
|
||||||
|
competitors: list[CompetitorReport] = field(default_factory=list)
|
||||||
|
keyword_trends: list[RankingTrend] = field(default_factory=list)
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
def __post_init__(self):
|
||||||
|
if not self.timestamp:
|
||||||
|
self.timestamp = datetime.now().isoformat()
|
||||||
|
if not self.period_end:
|
||||||
|
self.period_end = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
if not self.period_start:
|
||||||
|
start = datetime.now() - timedelta(days=self.period_days)
|
||||||
|
self.period_start = start.strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
def to_dict(self) -> dict:
|
||||||
|
"""Convert to JSON-serializable dictionary."""
|
||||||
|
return {
|
||||||
|
"target": self.target,
|
||||||
|
"period": {
|
||||||
|
"days": self.period_days,
|
||||||
|
"start": self.period_start,
|
||||||
|
"end": self.period_end,
|
||||||
|
},
|
||||||
|
"total_keywords": self.total_keywords,
|
||||||
|
"visibility": {
|
||||||
|
"current": round(self.current_visibility, 2),
|
||||||
|
"previous": round(self.previous_visibility, 2),
|
||||||
|
"change": round(self.visibility_change, 2),
|
||||||
|
},
|
||||||
|
"trend_summary": self.trend_summary,
|
||||||
|
"top_gainers": [asdict(m) for m in self.top_gainers],
|
||||||
|
"top_losers": [asdict(m) for m in self.top_losers],
|
||||||
|
"segments": [asdict(s) for s in self.segments],
|
||||||
|
"competitors": [asdict(c) for c in self.competitors],
|
||||||
|
"keyword_trends": [
|
||||||
|
{
|
||||||
|
"keyword": t.keyword,
|
||||||
|
"trend_direction": t.trend_direction,
|
||||||
|
"avg_position": round(t.avg_position, 1),
|
||||||
|
"current_position": t.current_position,
|
||||||
|
"start_position": t.start_position,
|
||||||
|
"total_change": t.total_change,
|
||||||
|
"volume": t.volume,
|
||||||
|
}
|
||||||
|
for t in self.keyword_trends
|
||||||
|
],
|
||||||
|
"timestamp": self.timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Ranking Reporter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
class RankingReporter(BaseAsyncClient):
|
||||||
|
"""Generate ranking performance reports with trend analysis."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__(
|
||||||
|
max_concurrent=5,
|
||||||
|
requests_per_second=2.0,
|
||||||
|
logger=logger,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _extract_domain_brand(self, target: str) -> list[str]:
|
||||||
|
"""Extract brand terms from the target domain name."""
|
||||||
|
parsed = urlparse(target)
|
||||||
|
hostname = parsed.hostname or target
|
||||||
|
parts = hostname.replace("www.", "").split(".")
|
||||||
|
brand_parts = []
|
||||||
|
for part in parts:
|
||||||
|
if part not in ("com", "co", "kr", "net", "org", "io", "ai", "www"):
|
||||||
|
brand_parts.append(part.lower())
|
||||||
|
if "-" in part:
|
||||||
|
brand_parts.extend(part.lower().split("-"))
|
||||||
|
return list(set(brand_parts))
|
||||||
|
|
||||||
|
async def get_historical_positions(
|
||||||
|
self,
|
||||||
|
target: str,
|
||||||
|
period_days: int = 30,
|
||||||
|
) -> list[RankingTrend]:
|
||||||
|
"""
|
||||||
|
Fetch historical position data from Ahrefs rank-tracker-overview
|
||||||
|
with date range parameters.
|
||||||
|
|
||||||
|
Returns list of RankingTrend objects with position snapshots over time.
|
||||||
|
"""
|
||||||
|
logger.info(f"Fetching historical positions for {target} ({period_days} days)")
|
||||||
|
brand_terms = self._extract_domain_brand(target)
|
||||||
|
|
||||||
|
end_date = datetime.now().strftime("%Y-%m-%d")
|
||||||
|
start_date = (datetime.now() - timedelta(days=period_days)).strftime("%Y-%m-%d")
|
||||||
|
|
||||||
|
raw_data = await self._call_rank_tracker_historical(target, start_date, end_date)
|
||||||
|
|
||||||
|
trends: dict[str, RankingTrend] = {}
|
||||||
|
for item in raw_data:
|
||||||
|
keyword = item.get("keyword", "")
|
||||||
|
if keyword not in trends:
|
||||||
|
is_brand = any(term in keyword.lower() for term in brand_terms)
|
||||||
|
trends[keyword] = RankingTrend(
|
||||||
|
keyword=keyword,
|
||||||
|
volume=item.get("volume", 0),
|
||||||
|
intent=item.get("intent", "informational"),
|
||||||
|
is_brand=is_brand,
|
||||||
|
)
|
||||||
|
|
||||||
|
snapshot = PositionSnapshot(
|
||||||
|
date=item.get("date", end_date),
|
||||||
|
position=item.get("position", 0),
|
||||||
|
volume=item.get("volume", 0),
|
||||||
|
url=item.get("url", ""),
|
||||||
|
)
|
||||||
|
trends[keyword].positions_over_time.append(snapshot)
|
||||||
|
|
||||||
|
# Sort snapshots by date and compute trends
|
||||||
|
for trend in trends.values():
|
||||||
|
trend.positions_over_time.sort(key=lambda s: s.date)
|
||||||
|
trend.compute_trend()
|
||||||
|
|
||||||
|
logger.info(f"Retrieved trends for {len(trends)} keywords")
|
||||||
|
return list(trends.values())
|
||||||
|
|
||||||
|
async def _call_rank_tracker_historical(
|
||||||
|
self, target: str, start_date: str, end_date: str,
|
||||||
|
) -> list[dict]:
|
||||||
|
"""Call Ahrefs rank-tracker-overview with date range."""
|
||||||
|
logger.info(f"Calling Ahrefs rank-tracker-overview ({start_date} to {end_date})...")
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
result = subprocess.run(
|
||||||
|
["mcp-cli", "call", "ahrefs/rank-tracker-overview",
|
||||||
|
json.dumps({
|
||||||
|
"target": target,
|
||||||
|
"date_from": start_date,
|
||||||
|
"date_to": end_date,
|
||||||
|
})],
|
||||||
|
capture_output=True, text=True, timeout=60,
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
data = json.loads(result.stdout)
|
||||||
|
return data.get("keywords", data.get("results", []))
|
||||||
|
except (FileNotFoundError, subprocess.TimeoutExpired, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
return []
|
||||||
|
|
||||||
|
def calculate_trends(self, trends: list[RankingTrend]) -> dict:
|
||||||
|
"""
|
||||||
|
Compute overall trend summary from keyword trends.
|
||||||
|
|
||||||
|
Returns dict with improved/declined/stable/new/lost counts.
|
||||||
|
"""
|
||||||
|
summary = {
|
||||||
|
"improved": 0,
|
||||||
|
"declined": 0,
|
||||||
|
"stable": 0,
|
||||||
|
"new": 0,
|
||||||
|
"lost": 0,
|
||||||
|
}
|
||||||
|
for trend in trends:
|
||||||
|
direction = trend.trend_direction
|
||||||
|
if direction in summary:
|
||||||
|
summary[direction] += 1
|
||||||
|
else:
|
||||||
|
summary["stable"] += 1
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
f"Trend summary: improved={summary['improved']}, "
|
||||||
|
f"declined={summary['declined']}, stable={summary['stable']}"
|
||||||
|
)
|
||||||
|
return summary
|
||||||
|
|
||||||
|
def find_top_movers(
|
||||||
|
self,
|
||||||
|
trends: list[RankingTrend],
|
||||||
|
limit: int = 10,
|
||||||
|
) -> tuple[list[TopMover], list[TopMover]]:
|
||||||
|
"""
|
||||||
|
Find keywords with biggest position gains and losses.
|
||||||
|
|
||||||
|
Returns tuple of (top_gainers, top_losers) sorted by impact score.
|
||||||
|
"""
|
||||||
|
gainers: list[TopMover] = []
|
||||||
|
losers: list[TopMover] = []
|
||||||
|
|
||||||
|
for trend in trends:
|
||||||
|
if not trend.positions_over_time or len(trend.positions_over_time) < 2:
|
||||||
|
continue
|
||||||
|
|
||||||
|
first_pos = trend.start_position
|
||||||
|
last_pos = trend.current_position
|
||||||
|
|
||||||
|
if first_pos <= 0 or last_pos <= 0:
|
||||||
|
continue
|
||||||
|
|
||||||
|
change = first_pos - last_pos # positive = improved
|
||||||
|
|
||||||
|
mover = TopMover(
|
||||||
|
keyword=trend.keyword,
|
||||||
|
position_change=change,
|
||||||
|
current_position=last_pos,
|
||||||
|
previous_position=first_pos,
|
||||||
|
volume=trend.volume,
|
||||||
|
)
|
||||||
|
mover.calculate_impact()
|
||||||
|
|
||||||
|
if change > 0:
|
||||||
|
gainers.append(mover)
|
||||||
|
elif change < 0:
|
||||||
|
losers.append(mover)
|
||||||
|
|
||||||
|
# Sort by impact score descending
|
||||||
|
gainers.sort(key=lambda m: m.impact_score, reverse=True)
|
||||||
|
losers.sort(key=lambda m: m.impact_score, reverse=True)
|
||||||
|
|
||||||
|
logger.info(f"Top movers: {len(gainers)} gainers, {len(losers)} losers")
|
||||||
|
return gainers[:limit], losers[:limit]
|
||||||
|
|
||||||
|
def _calculate_visibility_score(self, trends: list[RankingTrend], use_start: bool = False) -> float:
|
||||||
|
"""Calculate visibility score from trends (current or start positions)."""
|
||||||
|
total_weighted = 0.0
|
||||||
|
total_volume = 0
|
||||||
|
|
||||||
|
for trend in trends:
|
||||||
|
pos = trend.start_position if use_start else trend.current_position
|
||||||
|
if pos <= 0 or pos > 100:
|
||||||
|
continue
|
||||||
|
volume = max(trend.volume, 1)
|
||||||
|
total_volume += volume
|
||||||
|
ctr = CTR_WEIGHTS.get(pos, 0.0005)
|
||||||
|
total_weighted += volume * ctr
|
||||||
|
|
||||||
|
if total_volume > 0:
|
||||||
|
max_possible = total_volume * CTR_WEIGHTS[1]
|
||||||
|
return (total_weighted / max_possible) * 100.0
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
def generate_segment_report(self, trends: list[RankingTrend]) -> list[SegmentReport]:
|
||||||
|
"""
|
||||||
|
Generate performance breakdown by keyword segment.
|
||||||
|
|
||||||
|
Segments include: brand, non_brand, and by intent type.
|
||||||
|
"""
|
||||||
|
segment_map: dict[str, list[RankingTrend]] = {}
|
||||||
|
|
||||||
|
for trend in trends:
|
||||||
|
# Brand segment
|
||||||
|
brand_key = "brand" if trend.is_brand else "non_brand"
|
||||||
|
if brand_key not in segment_map:
|
||||||
|
segment_map[brand_key] = []
|
||||||
|
segment_map[brand_key].append(trend)
|
||||||
|
|
||||||
|
# Intent segment
|
||||||
|
intent_key = f"intent_{trend.intent.lower()}" if trend.intent else "intent_informational"
|
||||||
|
if intent_key not in segment_map:
|
||||||
|
segment_map[intent_key] = []
|
||||||
|
segment_map[intent_key].append(trend)
|
||||||
|
|
||||||
|
reports: list[SegmentReport] = []
|
||||||
|
for seg_name, seg_trends in sorted(segment_map.items()):
|
||||||
|
if not seg_trends:
|
||||||
|
continue
|
||||||
|
|
||||||
|
active = [t for t in seg_trends if t.current_position > 0]
|
||||||
|
avg_pos = sum(t.current_position for t in active) / len(active) if active else 0.0
|
||||||
|
avg_change = sum(t.total_change for t in seg_trends) / len(seg_trends) if seg_trends else 0.0
|
||||||
|
|
||||||
|
vis = self._calculate_visibility_score(seg_trends, use_start=False)
|
||||||
|
|
||||||
|
improved = sum(1 for t in seg_trends if t.trend_direction == "improved")
|
||||||
|
declined = sum(1 for t in seg_trends if t.trend_direction == "declined")
|
||||||
|
stable = sum(1 for t in seg_trends if t.trend_direction == "stable")
|
||||||
|
|
||||||
|
# Get top movers within segment
|
||||||
|
seg_gainers, seg_losers = self.find_top_movers(seg_trends, limit=5)
|
||||||
|
|
||||||
|
report = SegmentReport(
|
||||||
|
segment_name=seg_name,
|
||||||
|
total_keywords=len(seg_trends),
|
||||||
|
avg_position=round(avg_pos, 1),
|
||||||
|
avg_position_change=round(avg_change, 1),
|
||||||
|
visibility_score=round(vis, 2),
|
||||||
|
improved_count=improved,
|
||||||
|
declined_count=declined,
|
||||||
|
stable_count=stable,
|
||||||
|
top_gainers=seg_gainers,
|
||||||
|
top_losers=seg_losers,
|
||||||
|
)
|
||||||
|
reports.append(report)
|
||||||
|
|
||||||
|
return reports
|
||||||
|
|
||||||
|
async def compare_with_competitor(
|
||||||
|
self,
|
||||||
|
target: str,
|
||||||
|
competitor: str,
|
||||||
|
period_days: int = 30,
|
||||||
|
) -> CompetitorReport:
|
||||||
|
"""
|
||||||
|
Period-over-period comparison with a competitor.
|
||||||
|
|
||||||
|
Uses Ahrefs rank-tracker-competitors-stats for detailed comparison.
|
||||||
|
"""
|
||||||
|
logger.info(f"Comparing {target} vs {competitor} over {period_days} days")
|
||||||
|
|
||||||
|
comp_data = await self._call_competitors_stats(target, competitor)
|
||||||
|
|
||||||
|
report = CompetitorReport(competitor=competitor)
|
||||||
|
|
||||||
|
if comp_data:
|
||||||
|
report.our_visibility = comp_data.get("target_visibility", 0.0)
|
||||||
|
report.their_visibility = comp_data.get("competitor_visibility", 0.0)
|
||||||
|
report.overlap_keywords = comp_data.get("overlap_keywords", 0)
|
||||||
|
report.keywords_we_lead = comp_data.get("target_better", 0)
|
||||||
|
report.keywords_they_lead = comp_data.get("competitor_better", 0)
|
||||||
|
|
||||||
|
# Extract notable gaps
|
||||||
|
gaps = comp_data.get("keyword_gaps", [])
|
||||||
|
report.notable_gaps = [
|
||||||
|
{
|
||||||
|
"keyword": g.get("keyword", ""),
|
||||||
|
"our_position": g.get("target_position", 0),
|
||||||
|
"their_position": g.get("competitor_position", 0),
|
||||||
|
"volume": g.get("volume", 0),
|
||||||
|
}
|
||||||
|
for g in gaps[:15]
|
||||||
|
]
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
async def _call_competitors_stats(self, target: str, competitor: str) -> dict:
|
||||||
|
"""Call Ahrefs rank-tracker-competitors-stats MCP tool."""
|
||||||
|
logger.info("Calling Ahrefs rank-tracker-competitors-stats...")
|
||||||
|
try:
|
||||||
|
import subprocess
|
||||||
|
result = subprocess.run(
|
||||||
|
["mcp-cli", "call", "ahrefs/rank-tracker-competitors-stats",
|
||||||
|
json.dumps({"target": target, "competitor": competitor})],
|
||||||
|
capture_output=True, text=True, timeout=60,
|
||||||
|
)
|
||||||
|
if result.returncode == 0:
|
||||||
|
return json.loads(result.stdout)
|
||||||
|
except (FileNotFoundError, subprocess.TimeoutExpired, json.JSONDecodeError):
|
||||||
|
pass
|
||||||
|
return {}
|
||||||
|
|
||||||
|
async def generate_report(
|
||||||
|
self,
|
||||||
|
target: str,
|
||||||
|
period_days: int = 30,
|
||||||
|
competitors: Optional[list[str]] = None,
|
||||||
|
) -> RankingReport:
|
||||||
|
"""
|
||||||
|
Orchestrate full ranking performance report generation.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target: Target website URL
|
||||||
|
period_days: Reporting period in days
|
||||||
|
competitors: List of competitor URLs to compare
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Complete RankingReport with trends, movers, segments, and comparisons
|
||||||
|
"""
|
||||||
|
logger.info(f"Generating ranking report for: {target} ({period_days} days)")
|
||||||
|
|
||||||
|
report = RankingReport(target=target, period_days=period_days)
|
||||||
|
|
||||||
|
# Step 1: Fetch historical position data
|
||||||
|
trends = await self.get_historical_positions(target, period_days)
|
||||||
|
|
||||||
|
if not trends:
|
||||||
|
logger.warning("No historical data retrieved. Check Ahrefs project configuration.")
|
||||||
|
return report
|
||||||
|
|
||||||
|
report.keyword_trends = trends
|
||||||
|
report.total_keywords = len(trends)
|
||||||
|
|
||||||
|
# Step 2: Calculate trend summary
|
||||||
|
report.trend_summary = self.calculate_trends(trends)
|
||||||
|
|
||||||
|
# Step 3: Calculate visibility scores (current vs period start)
|
||||||
|
report.current_visibility = self._calculate_visibility_score(trends, use_start=False)
|
||||||
|
report.previous_visibility = self._calculate_visibility_score(trends, use_start=True)
|
||||||
|
report.visibility_change = report.current_visibility - report.previous_visibility
|
||||||
|
|
||||||
|
# Step 4: Find top movers
|
||||||
|
gainers, losers = self.find_top_movers(trends, limit=10)
|
||||||
|
report.top_gainers = gainers
|
||||||
|
report.top_losers = losers
|
||||||
|
|
||||||
|
# Step 5: Generate segment reports
|
||||||
|
report.segments = self.generate_segment_report(trends)
|
||||||
|
|
||||||
|
# Step 6: Compare with competitors
|
||||||
|
if competitors:
|
||||||
|
for competitor in competitors:
|
||||||
|
comp_report = await self.compare_with_competitor(
|
||||||
|
target, competitor, period_days,
|
||||||
|
)
|
||||||
|
report.competitors.append(comp_report)
|
||||||
|
|
||||||
|
logger.info(f"Report complete. Keywords: {report.total_keywords}")
|
||||||
|
logger.info(
|
||||||
|
f"Visibility: {report.previous_visibility:.2f} -> "
|
||||||
|
f"{report.current_visibility:.2f} ({report.visibility_change:+.2f})"
|
||||||
|
)
|
||||||
|
|
||||||
|
return report
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Output formatters
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def format_text_report(report: RankingReport) -> str:
|
||||||
|
"""Format ranking report as human-readable text."""
|
||||||
|
lines = []
|
||||||
|
lines.append("=" * 60)
|
||||||
|
lines.append(f"Ranking Performance Report: {report.target}")
|
||||||
|
lines.append(f"Period: {report.period_start} ~ {report.period_end} ({report.period_days} days)")
|
||||||
|
lines.append(f"Generated: {report.timestamp}")
|
||||||
|
lines.append("=" * 60)
|
||||||
|
|
||||||
|
# Visibility trend
|
||||||
|
lines.append(f"\nVisibility Score:")
|
||||||
|
lines.append(f" Current: {report.current_visibility:.2f}")
|
||||||
|
lines.append(f" Previous: {report.previous_visibility:.2f}")
|
||||||
|
change_sign = "+" if report.visibility_change >= 0 else ""
|
||||||
|
lines.append(f" Change: {change_sign}{report.visibility_change:.2f}")
|
||||||
|
|
||||||
|
# Trend summary
|
||||||
|
ts = report.trend_summary
|
||||||
|
lines.append(f"\nKeyword Trends ({report.total_keywords} total):")
|
||||||
|
lines.append(f" Improved: {ts.get('improved', 0)}")
|
||||||
|
lines.append(f" Declined: {ts.get('declined', 0)}")
|
||||||
|
lines.append(f" Stable: {ts.get('stable', 0)}")
|
||||||
|
lines.append(f" New: {ts.get('new', 0)}")
|
||||||
|
lines.append(f" Lost: {ts.get('lost', 0)}")
|
||||||
|
|
||||||
|
# Top gainers
|
||||||
|
if report.top_gainers:
|
||||||
|
lines.append(f"\nTop Gainers:")
|
||||||
|
lines.append("-" * 60)
|
||||||
|
for m in report.top_gainers:
|
||||||
|
lines.append(
|
||||||
|
f" {m.keyword}: {m.previous_position} -> {m.current_position} "
|
||||||
|
f"(+{m.position_change}) | Vol: {m.volume} | Impact: {m.impact_score}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Top losers
|
||||||
|
if report.top_losers:
|
||||||
|
lines.append(f"\nTop Losers:")
|
||||||
|
lines.append("-" * 60)
|
||||||
|
for m in report.top_losers:
|
||||||
|
lines.append(
|
||||||
|
f" {m.keyword}: {m.previous_position} -> {m.current_position} "
|
||||||
|
f"({m.position_change}) | Vol: {m.volume} | Impact: {m.impact_score}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Segments
|
||||||
|
if report.segments:
|
||||||
|
lines.append(f"\nSegment Breakdown:")
|
||||||
|
lines.append("-" * 60)
|
||||||
|
for seg in report.segments:
|
||||||
|
lines.append(
|
||||||
|
f" {seg.segment_name}: {seg.total_keywords} kw, "
|
||||||
|
f"avg pos {seg.avg_position}, vis {seg.visibility_score}, "
|
||||||
|
f"improved {seg.improved_count} / declined {seg.declined_count}"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Competitors
|
||||||
|
if report.competitors:
|
||||||
|
lines.append(f"\nCompetitor Comparison:")
|
||||||
|
lines.append("-" * 60)
|
||||||
|
for comp in report.competitors:
|
||||||
|
lines.append(f" vs {comp.competitor}:")
|
||||||
|
lines.append(f" Our visibility: {comp.our_visibility:.2f}")
|
||||||
|
lines.append(f" Their visibility: {comp.their_visibility:.2f}")
|
||||||
|
lines.append(f" Overlap: {comp.overlap_keywords} keywords")
|
||||||
|
lines.append(f" We lead: {comp.keywords_we_lead}")
|
||||||
|
lines.append(f" They lead: {comp.keywords_they_lead}")
|
||||||
|
if comp.notable_gaps:
|
||||||
|
lines.append(f" Notable gaps:")
|
||||||
|
for gap in comp.notable_gaps[:5]:
|
||||||
|
lines.append(
|
||||||
|
f" {gap['keyword']}: us #{gap['our_position']} "
|
||||||
|
f"vs them #{gap['their_position']} (vol: {gap['volume']})"
|
||||||
|
)
|
||||||
|
|
||||||
|
lines.append("\n" + "=" * 60)
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Ranking Reporter - Generate ranking performance reports with trends",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--target",
|
||||||
|
required=True,
|
||||||
|
help="Target website URL (e.g., https://example.com)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--period",
|
||||||
|
type=int,
|
||||||
|
default=30,
|
||||||
|
help="Reporting period in days (default: 30)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--competitor",
|
||||||
|
action="append",
|
||||||
|
dest="competitors",
|
||||||
|
default=[],
|
||||||
|
help="Competitor URL to compare (repeatable)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--json",
|
||||||
|
action="store_true",
|
||||||
|
dest="json_output",
|
||||||
|
help="Output in JSON format",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--output",
|
||||||
|
type=str,
|
||||||
|
default=None,
|
||||||
|
help="Save output to file path",
|
||||||
|
)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
reporter = RankingReporter()
|
||||||
|
|
||||||
|
report = await reporter.generate_report(
|
||||||
|
target=args.target,
|
||||||
|
period_days=args.period,
|
||||||
|
competitors=args.competitors,
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.json_output:
|
||||||
|
output = json.dumps(report.to_dict(), ensure_ascii=False, indent=2)
|
||||||
|
else:
|
||||||
|
output = format_text_report(report)
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(output)
|
||||||
|
logger.info(f"Output saved to: {args.output}")
|
||||||
|
else:
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
reporter.print_stats()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
# 21-seo-position-tracking dependencies
|
||||||
|
requests>=2.31.0
|
||||||
|
aiohttp>=3.9.0
|
||||||
|
pandas>=2.1.0
|
||||||
|
tenacity>=8.2.0
|
||||||
|
tqdm>=4.66.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
rich>=13.7.0
|
||||||
119
custom-skills/21-seo-position-tracking/desktop/SKILL.md
Normal file
119
custom-skills/21-seo-position-tracking/desktop/SKILL.md
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
---
|
||||||
|
name: seo-position-tracking
|
||||||
|
description: |
|
||||||
|
Keyword position tracking for keyword ranking monitoring.
|
||||||
|
Triggers: rank tracking, position monitoring, keyword rankings, visibility score, ranking report, 키워드 순위, 순위 추적.
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Position Tracking
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Monitor keyword ranking positions, detect significant changes, calculate visibility scores, and compare against competitors using our-seo-agent CLI or pre-fetched ranking data. Provides actionable alerts for ranking drops and segment-level performance breakdown.
|
||||||
|
|
||||||
|
## Core Capabilities
|
||||||
|
|
||||||
|
1. **Position Monitoring** - Retrieve current keyword ranking positions from our-seo-agent CLI or pre-fetched data
|
||||||
|
2. **Change Detection** - Detect significant position changes with configurable threshold alerts (severity: critical/high/medium/low)
|
||||||
|
3. **Visibility Scoring** - Calculate weighted visibility scores using CTR-curve model (position 1 = 30%, position 2 = 15%, etc.)
|
||||||
|
4. **Brand/Non-brand Segmentation** - Automatically classify keywords by brand relevance and search intent type
|
||||||
|
5. **Competitor Comparison** - Compare keyword overlap, position gaps, and visibility scores against competitors
|
||||||
|
|
||||||
|
## MCP Tool Usage
|
||||||
|
|
||||||
|
### SEO Data (DataForSEO)
|
||||||
|
|
||||||
|
**Primary — our-seo-agent CLI:**
|
||||||
|
```bash
|
||||||
|
our serp ranked-keywords <domain> --location 2410 --limit 100
|
||||||
|
our keywords volume "<kw1>" "<kw2>" --location 2410 --language ko
|
||||||
|
our serp domain-overview <domain> --location 2410
|
||||||
|
our serp competitors <domain> --location 2410
|
||||||
|
```
|
||||||
|
|
||||||
|
**Interactive fallback — DataForSEO MCP:**
|
||||||
|
```
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_ranked_keywords
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_domain_rank_overview
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_historical_rank_overview
|
||||||
|
mcp__dfs-mcp__dataforseo_labs_google_keyword_overview
|
||||||
|
```
|
||||||
|
|
||||||
|
### Common Parameters
|
||||||
|
- **location_code**: 2410 (Korea), 2840 (US), 2392 (Japan)
|
||||||
|
- **language_code**: ko, en, ja
|
||||||
|
|
||||||
|
### Notion for Report Storage
|
||||||
|
```
|
||||||
|
mcp__notion__notion-create-pages: Save tracking reports to SEO Audit Log
|
||||||
|
mcp__notion__notion-update-page: Update existing tracking entries
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflow
|
||||||
|
|
||||||
|
### Phase 1: Data Collection
|
||||||
|
1. Fetch current ranked keywords: `our serp ranked-keywords <domain> --location 2410 --limit 100 --format json`
|
||||||
|
2. Get domain overview: `our serp domain-overview <domain> --location 2410 --format json`
|
||||||
|
3. Get search volumes for tracked keywords: `our keywords volume "<kw1>" "<kw2>" --location 2410`
|
||||||
|
4. Fetch competitor positions: `our serp ranked-keywords <competitor> --location 2410 --limit 100`
|
||||||
|
5. For historical comparison, use MCP: `mcp__dfs-mcp__dataforseo_labs_google_historical_rank_overview`
|
||||||
|
|
||||||
|
### Phase 2: Analysis
|
||||||
|
1. Detect position changes against previous period
|
||||||
|
2. Generate alerts for changes exceeding threshold
|
||||||
|
3. Calculate visibility score weighted by search volume and CTR curve
|
||||||
|
4. Segment keywords into brand/non-brand and by intent type
|
||||||
|
5. Compare positions against each competitor
|
||||||
|
|
||||||
|
### Phase 3: Reporting
|
||||||
|
1. Compile position distribution (top3/top10/top20/top50/top100)
|
||||||
|
2. Summarize changes (improved/declined/stable/new/lost)
|
||||||
|
3. List alerts sorted by severity and search volume
|
||||||
|
4. Generate segment-level breakdown
|
||||||
|
5. Save report to Notion SEO Audit Log database
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"target": "https://example.com",
|
||||||
|
"total_keywords": 250,
|
||||||
|
"visibility_score": 68.5,
|
||||||
|
"positions": {
|
||||||
|
"top3": 15,
|
||||||
|
"top10": 48,
|
||||||
|
"top20": 92,
|
||||||
|
"top50": 180,
|
||||||
|
"top100": 230
|
||||||
|
},
|
||||||
|
"changes": {
|
||||||
|
"improved": 45,
|
||||||
|
"declined": 30,
|
||||||
|
"stable": 155,
|
||||||
|
"new": 12,
|
||||||
|
"lost": 8
|
||||||
|
},
|
||||||
|
"alerts": [
|
||||||
|
{
|
||||||
|
"keyword": "example keyword",
|
||||||
|
"old_position": 5,
|
||||||
|
"new_position": 15,
|
||||||
|
"change": -10,
|
||||||
|
"volume": 5400,
|
||||||
|
"severity": "high"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"segments": {
|
||||||
|
"brand": {"keywords": 30, "avg_position": 2.1},
|
||||||
|
"non_brand": {"keywords": 220, "avg_position": 24.5}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
All tracking reports MUST be saved to OurDigital SEO Audit Log:
|
||||||
|
- **Database ID**: `2c8581e5-8a1e-8035-880b-e38cefc2f3ef`
|
||||||
|
- **Properties**: Issue (title), Site (url), Category (Position Tracking), Priority, Found Date, Audit ID
|
||||||
|
- **Language**: Korean with English technical terms
|
||||||
|
- **Audit ID Format**: RANK-YYYYMMDD-NNN
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
name: seo-position-tracking
|
||||||
|
description: |
|
||||||
|
Keyword position tracking and ranking monitoring. Triggers: rank tracking, position monitoring, keyword rankings, visibility score, ranking report.
|
||||||
|
allowed-tools:
|
||||||
|
- mcp__ahrefs__*
|
||||||
|
- mcp__notion__*
|
||||||
|
- WebSearch
|
||||||
|
- WebFetch
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# Ahrefs
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# Notion
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# WebSearch
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
138
custom-skills/22-seo-link-building/code/CLAUDE.md
Normal file
138
custom-skills/22-seo-link-building/code/CLAUDE.md
Normal file
@@ -0,0 +1,138 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Link building diagnosis tool for backlink profile analysis, toxic link detection, competitor link gap identification, and link velocity tracking. Supports Korean platform link mapping (Naver Blog, Cafe, Tistory, Brunch, Korean news sites).
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r scripts/requirements.txt
|
||||||
|
|
||||||
|
# Backlink profile audit
|
||||||
|
python scripts/backlink_auditor.py --url https://example.com --json
|
||||||
|
|
||||||
|
# Link gap analysis vs competitors
|
||||||
|
python scripts/link_gap_finder.py --target https://example.com --competitor https://competitor.com --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
| Script | Purpose | Key Output |
|
||||||
|
|--------|---------|------------|
|
||||||
|
| `backlink_auditor.py` | Analyze backlink profile, detect toxic links | DR, referring domains, anchor distribution, toxic links |
|
||||||
|
| `link_gap_finder.py` | Find link gap opportunities vs competitors | Domains linking to competitors but not target |
|
||||||
|
| `base_client.py` | Shared utilities | RateLimiter, ConfigManager, BaseAsyncClient |
|
||||||
|
|
||||||
|
## Backlink Auditor
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full backlink audit
|
||||||
|
python scripts/backlink_auditor.py --url https://example.com --json
|
||||||
|
|
||||||
|
# Check link velocity (new/lost over time)
|
||||||
|
python scripts/backlink_auditor.py --url https://example.com --velocity --json
|
||||||
|
|
||||||
|
# Find broken backlinks for recovery
|
||||||
|
python scripts/backlink_auditor.py --url https://example.com --broken --json
|
||||||
|
|
||||||
|
# Korean platform link analysis
|
||||||
|
python scripts/backlink_auditor.py --url https://example.com --korean-platforms --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Domain Rating (DR) and backlink stats overview
|
||||||
|
- Referring domain analysis (count, DR distribution, country distribution)
|
||||||
|
- Anchor text distribution analysis (branded, exact-match, generic, naked URL)
|
||||||
|
- Toxic link detection (PBN patterns, spammy domains, link farms)
|
||||||
|
- Link velocity tracking (new/lost referring domains over time)
|
||||||
|
- Broken backlink recovery opportunities
|
||||||
|
- Korean platform mapping (Naver Blog, Naver Cafe, Tistory, Brunch, Korean news)
|
||||||
|
|
||||||
|
## Link Gap Finder
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Gap vs one competitor
|
||||||
|
python scripts/link_gap_finder.py --target https://example.com --competitor https://comp1.com --json
|
||||||
|
|
||||||
|
# Multiple competitors
|
||||||
|
python scripts/link_gap_finder.py --target https://example.com --competitor https://comp1.com --competitor https://comp2.com --json
|
||||||
|
|
||||||
|
# Filter by minimum DR
|
||||||
|
python scripts/link_gap_finder.py --target https://example.com --competitor https://comp1.com --min-dr 30 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Find domains linking to competitors but not target
|
||||||
|
- Score link opportunities by DR, traffic, relevance
|
||||||
|
- Categorize link sources (editorial, directory, forum, blog, news)
|
||||||
|
- Prioritize by feasibility and impact
|
||||||
|
|
||||||
|
## Data Sources
|
||||||
|
|
||||||
|
| Source | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `our-seo-agent` CLI | Primary data source (future); use `--input` for pre-fetched JSON |
|
||||||
|
| WebSearch / WebFetch | Supplementary live data |
|
||||||
|
| Notion MCP | Save audit report to database |
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"url": "https://example.com",
|
||||||
|
"domain_rating": 45,
|
||||||
|
"backlink_stats": {
|
||||||
|
"total_backlinks": 12500,
|
||||||
|
"referring_domains": 850,
|
||||||
|
"dofollow_ratio": 0.72
|
||||||
|
},
|
||||||
|
"anchor_distribution": {
|
||||||
|
"branded": 35,
|
||||||
|
"exact_match": 12,
|
||||||
|
"partial_match": 18,
|
||||||
|
"generic": 20,
|
||||||
|
"naked_url": 15
|
||||||
|
},
|
||||||
|
"toxic_links": [...],
|
||||||
|
"korean_platforms": {
|
||||||
|
"naver_blog": 45,
|
||||||
|
"naver_cafe": 12,
|
||||||
|
"tistory": 23,
|
||||||
|
"brunch": 5
|
||||||
|
},
|
||||||
|
"link_velocity": {
|
||||||
|
"new_last_30d": 120,
|
||||||
|
"lost_last_30d": 35
|
||||||
|
},
|
||||||
|
"timestamp": "2025-01-01T00:00:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
**IMPORTANT**: All audit reports MUST be saved to the OurDigital SEO Audit Log database.
|
||||||
|
|
||||||
|
### Database Configuration
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| Database ID | `2c8581e5-8a1e-8035-880b-e38cefc2f3ef` |
|
||||||
|
| URL | https://www.notion.so/dintelligence/2c8581e58a1e8035880be38cefc2f3ef |
|
||||||
|
|
||||||
|
### Required Properties
|
||||||
|
|
||||||
|
| Property | Type | Description |
|
||||||
|
|----------|------|-------------|
|
||||||
|
| Issue | Title | Report title (Korean + date) |
|
||||||
|
| Site | URL | Audited website URL |
|
||||||
|
| Category | Select | Link Building |
|
||||||
|
| Priority | Select | Based on toxic link count and gap size |
|
||||||
|
| Found Date | Date | Audit date (YYYY-MM-DD) |
|
||||||
|
| Audit ID | Rich Text | Format: LINK-YYYYMMDD-NNN |
|
||||||
|
|
||||||
|
### Language Guidelines
|
||||||
|
|
||||||
|
- Report content in Korean (한국어)
|
||||||
|
- Keep technical English terms as-is (e.g., Domain Rating, Referring Domains, Backlinks)
|
||||||
|
- URLs and code remain unchanged
|
||||||
1079
custom-skills/22-seo-link-building/code/scripts/backlink_auditor.py
Normal file
1079
custom-skills/22-seo-link-building/code/scripts/backlink_auditor.py
Normal file
File diff suppressed because it is too large
Load Diff
207
custom-skills/22-seo-link-building/code/scripts/base_client.py
Normal file
207
custom-skills/22-seo-link-building/code/scripts/base_client.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
"""
|
||||||
|
Base Client - Shared async client utilities
|
||||||
|
===========================================
|
||||||
|
Purpose: Rate-limited async operations for API clients
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from asyncio import Semaphore
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from tenacity import (
|
||||||
|
retry,
|
||||||
|
stop_after_attempt,
|
||||||
|
wait_exponential,
|
||||||
|
retry_if_exception_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Logging setup
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimiter:
|
||||||
|
"""Rate limiter using token bucket algorithm."""
|
||||||
|
|
||||||
|
def __init__(self, rate: float, per: float = 1.0):
|
||||||
|
"""
|
||||||
|
Initialize rate limiter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rate: Number of requests allowed
|
||||||
|
per: Time period in seconds (default: 1 second)
|
||||||
|
"""
|
||||||
|
self.rate = rate
|
||||||
|
self.per = per
|
||||||
|
self.tokens = rate
|
||||||
|
self.last_update = datetime.now()
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def acquire(self) -> None:
|
||||||
|
"""Acquire a token, waiting if necessary."""
|
||||||
|
async with self._lock:
|
||||||
|
now = datetime.now()
|
||||||
|
elapsed = (now - self.last_update).total_seconds()
|
||||||
|
self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
|
||||||
|
self.last_update = now
|
||||||
|
|
||||||
|
if self.tokens < 1:
|
||||||
|
wait_time = (1 - self.tokens) * (self.per / self.rate)
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
self.tokens = 0
|
||||||
|
else:
|
||||||
|
self.tokens -= 1
|
||||||
|
|
||||||
|
|
||||||
|
class BaseAsyncClient:
|
||||||
|
"""Base class for async API clients with rate limiting."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_concurrent: int = 5,
|
||||||
|
requests_per_second: float = 3.0,
|
||||||
|
logger: logging.Logger | None = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize base client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
max_concurrent: Maximum concurrent requests
|
||||||
|
requests_per_second: Rate limit
|
||||||
|
logger: Logger instance
|
||||||
|
"""
|
||||||
|
self.semaphore = Semaphore(max_concurrent)
|
||||||
|
self.rate_limiter = RateLimiter(requests_per_second)
|
||||||
|
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
||||||
|
self.stats = {
|
||||||
|
"requests": 0,
|
||||||
|
"success": 0,
|
||||||
|
"errors": 0,
|
||||||
|
"retries": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=1, min=2, max=10),
|
||||||
|
retry=retry_if_exception_type(Exception),
|
||||||
|
)
|
||||||
|
async def _rate_limited_request(
|
||||||
|
self,
|
||||||
|
coro: Callable[[], Any],
|
||||||
|
) -> Any:
|
||||||
|
"""Execute a request with rate limiting and retry."""
|
||||||
|
async with self.semaphore:
|
||||||
|
await self.rate_limiter.acquire()
|
||||||
|
self.stats["requests"] += 1
|
||||||
|
try:
|
||||||
|
result = await coro()
|
||||||
|
self.stats["success"] += 1
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
self.stats["errors"] += 1
|
||||||
|
self.logger.error(f"Request failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def batch_requests(
|
||||||
|
self,
|
||||||
|
requests: list[Callable[[], Any]],
|
||||||
|
desc: str = "Processing",
|
||||||
|
) -> list[Any]:
|
||||||
|
"""Execute multiple requests concurrently."""
|
||||||
|
try:
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
|
has_tqdm = True
|
||||||
|
except ImportError:
|
||||||
|
has_tqdm = False
|
||||||
|
|
||||||
|
async def execute(req: Callable) -> Any:
|
||||||
|
try:
|
||||||
|
return await self._rate_limited_request(req)
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
tasks = [execute(req) for req in requests]
|
||||||
|
|
||||||
|
if has_tqdm:
|
||||||
|
results = []
|
||||||
|
for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
|
||||||
|
result = await coro
|
||||||
|
results.append(result)
|
||||||
|
return results
|
||||||
|
else:
|
||||||
|
return await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
def print_stats(self) -> None:
|
||||||
|
"""Print request statistics."""
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
self.logger.info("Request Statistics:")
|
||||||
|
self.logger.info(f" Total Requests: {self.stats['requests']}")
|
||||||
|
self.logger.info(f" Successful: {self.stats['success']}")
|
||||||
|
self.logger.info(f" Errors: {self.stats['errors']}")
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigManager:
|
||||||
|
"""Manage API configuration and credentials."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def google_credentials_path(self) -> str | None:
|
||||||
|
"""Get Google service account credentials path."""
|
||||||
|
# Prefer SEO-specific credentials, fallback to general credentials
|
||||||
|
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
|
||||||
|
if os.path.exists(seo_creds):
|
||||||
|
return seo_creds
|
||||||
|
return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pagespeed_api_key(self) -> str | None:
|
||||||
|
"""Get PageSpeed Insights API key."""
|
||||||
|
return os.getenv("PAGESPEED_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_api_key(self) -> str | None:
|
||||||
|
"""Get Custom Search API key."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_engine_id(self) -> str | None:
|
||||||
|
"""Get Custom Search Engine ID."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def notion_token(self) -> str | None:
|
||||||
|
"""Get Notion API token."""
|
||||||
|
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
|
||||||
|
|
||||||
|
def validate_google_credentials(self) -> bool:
|
||||||
|
"""Validate Google credentials are configured."""
|
||||||
|
creds_path = self.google_credentials_path
|
||||||
|
if not creds_path:
|
||||||
|
return False
|
||||||
|
return os.path.exists(creds_path)
|
||||||
|
|
||||||
|
def get_required(self, key: str) -> str:
|
||||||
|
"""Get required environment variable or raise error."""
|
||||||
|
value = os.getenv(key)
|
||||||
|
if not value:
|
||||||
|
raise ValueError(f"Missing required environment variable: {key}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton config instance
|
||||||
|
config = ConfigManager()
|
||||||
@@ -0,0 +1,802 @@
|
|||||||
|
"""
|
||||||
|
Link Gap Finder - Competitor link gap analysis
|
||||||
|
===============================================
|
||||||
|
Purpose: Identify link building opportunities by finding domains that link
|
||||||
|
to competitors but not to the target site via Ahrefs MCP.
|
||||||
|
Python: 3.10+
|
||||||
|
Usage:
|
||||||
|
python link_gap_finder.py --target https://example.com --competitor https://comp1.com --json
|
||||||
|
python link_gap_finder.py --target https://example.com --competitor https://comp1.com --competitor https://comp2.com --min-dr 30 --json
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from dataclasses import dataclass, field, asdict
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import pandas as pd
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from base_client import BaseAsyncClient, config
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Logging
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
logger = logging.getLogger("link_gap_finder")
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Constants
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
AHREFS_BASE = "https://api.ahrefs.com/v3"
|
||||||
|
|
||||||
|
# Source category detection patterns
|
||||||
|
SOURCE_CATEGORY_PATTERNS: dict[str, list[str]] = {
|
||||||
|
"news": [
|
||||||
|
"news", "press", "media", "journal", "herald", "times", "post",
|
||||||
|
"gazette", "tribune", "daily", "chosun", "donga", "joongang",
|
||||||
|
"hani", "khan", "yna", "yonhap", "reuters", "bloomberg",
|
||||||
|
"techcrunch", "verge", "wired", "arstechnica", "bbc", "cnn",
|
||||||
|
],
|
||||||
|
"blog": [
|
||||||
|
"blog", "wordpress", "medium.com", "tistory.com", "brunch.co.kr",
|
||||||
|
"blog.naver.com", "tumblr", "blogger", "substack", "ghost.io",
|
||||||
|
"velog.io", "dev.to",
|
||||||
|
],
|
||||||
|
"forum": [
|
||||||
|
"forum", "community", "discuss", "reddit.com", "quora.com",
|
||||||
|
"stackexchange", "stackoverflow", "cafe.naver.com", "dcinside",
|
||||||
|
"fmkorea", "clien", "ppomppu", "theqoo", "ruliweb",
|
||||||
|
],
|
||||||
|
"directory": [
|
||||||
|
"directory", "listing", "yellowpages", "yelp", "bbb.org",
|
||||||
|
"clutch.co", "g2.com", "capterra", "trustpilot", "glassdoor",
|
||||||
|
"dmoz", "aboutus", "hotfrog", "manta", "superpages",
|
||||||
|
],
|
||||||
|
"edu_gov": [
|
||||||
|
".edu", ".gov", ".ac.kr", ".go.kr", ".or.kr",
|
||||||
|
],
|
||||||
|
"social": [
|
||||||
|
"facebook.com", "twitter.com", "x.com", "linkedin.com",
|
||||||
|
"instagram.com", "youtube.com", "pinterest.com", "tiktok.com",
|
||||||
|
],
|
||||||
|
"korean_platform": [
|
||||||
|
"naver.com", "daum.net", "kakao.com", "tistory.com",
|
||||||
|
"brunch.co.kr", "zum.com", "nate.com",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Dataclasses
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LinkOpportunity:
|
||||||
|
"""A single link building opportunity from gap analysis."""
|
||||||
|
domain: str
|
||||||
|
dr: float = 0.0
|
||||||
|
traffic: int = 0
|
||||||
|
linked_competitors: list[str] = field(default_factory=list)
|
||||||
|
competitor_count: int = 0
|
||||||
|
not_linked_target: bool = True
|
||||||
|
category: str = "other"
|
||||||
|
feasibility_score: float = 0.0
|
||||||
|
impact_score: float = 0.0
|
||||||
|
overall_score: float = 0.0
|
||||||
|
backlinks_to_competitors: int = 0
|
||||||
|
country: str = ""
|
||||||
|
top_anchor: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GapSummary:
|
||||||
|
"""Summary statistics for the gap analysis."""
|
||||||
|
total_opportunities: int = 0
|
||||||
|
avg_dr: float = 0.0
|
||||||
|
high_dr_count: int = 0
|
||||||
|
category_breakdown: dict[str, int] = field(default_factory=dict)
|
||||||
|
top_countries: list[dict[str, Any]] = field(default_factory=list)
|
||||||
|
total_competitor_refdomains: dict[str, int] = field(default_factory=dict)
|
||||||
|
target_refdomains_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class LinkGapResult:
|
||||||
|
"""Complete link gap analysis result."""
|
||||||
|
target_url: str
|
||||||
|
target_domain: str = ""
|
||||||
|
competitor_urls: list[str] = field(default_factory=list)
|
||||||
|
competitor_domains: list[str] = field(default_factory=list)
|
||||||
|
target_dr: float = 0.0
|
||||||
|
opportunities: list[LinkOpportunity] = field(default_factory=list)
|
||||||
|
summary: GapSummary | None = None
|
||||||
|
top_opportunities: list[LinkOpportunity] = field(default_factory=list)
|
||||||
|
issues: list[dict[str, str]] = field(default_factory=list)
|
||||||
|
recommendations: list[str] = field(default_factory=list)
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# LinkGapFinder
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class LinkGapFinder(BaseAsyncClient):
|
||||||
|
"""Find link building opportunities by analyzing competitor backlink gaps."""
|
||||||
|
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
super().__init__(max_concurrent=5, requests_per_second=2.0, **kwargs)
|
||||||
|
self.session: aiohttp.ClientSession | None = None
|
||||||
|
|
||||||
|
# -- Ahrefs MCP helper ---------------------------------------------------
|
||||||
|
|
||||||
|
async def _call_ahrefs(
|
||||||
|
self, endpoint: str, params: dict[str, Any]
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Call Ahrefs API endpoint.
|
||||||
|
|
||||||
|
In MCP context this calls mcp__ahrefs__<endpoint>.
|
||||||
|
For standalone use, falls back to REST API with token.
|
||||||
|
"""
|
||||||
|
api_token = config.get_required("AHREFS_API_TOKEN") if not self.session else None
|
||||||
|
|
||||||
|
if self.session and api_token:
|
||||||
|
url = f"{AHREFS_BASE}/{endpoint}"
|
||||||
|
headers = {"Authorization": f"Bearer {api_token}"}
|
||||||
|
async with self.session.get(url, headers=headers, params=params) as resp:
|
||||||
|
resp.raise_for_status()
|
||||||
|
return await resp.json()
|
||||||
|
|
||||||
|
logger.warning(
|
||||||
|
f"Ahrefs call to '{endpoint}' - use MCP tool "
|
||||||
|
f"mcp__ahrefs__{endpoint.replace('-', '_')} in Claude Desktop"
|
||||||
|
)
|
||||||
|
return {"endpoint": endpoint, "params": params, "data": [], "note": "mcp_stub"}
|
||||||
|
|
||||||
|
# -- Core methods --------------------------------------------------------
|
||||||
|
|
||||||
|
async def get_referring_domains(
|
||||||
|
self, url: str, limit: int = 1000
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""Fetch referring domains for a given URL/domain."""
|
||||||
|
target = urlparse(url).netloc or url
|
||||||
|
result = await self._call_ahrefs(
|
||||||
|
"site-explorer-referring-domains",
|
||||||
|
{"target": target, "mode": "domain", "limit": limit, "order_by": "domain_rating:desc"},
|
||||||
|
)
|
||||||
|
domains = result.get("data", result.get("refdomains", []))
|
||||||
|
if isinstance(domains, dict):
|
||||||
|
domains = domains.get("refdomains", [])
|
||||||
|
return domains if isinstance(domains, list) else []
|
||||||
|
|
||||||
|
async def get_domain_rating(self, url: str) -> float:
|
||||||
|
"""Fetch Domain Rating for a URL."""
|
||||||
|
target = urlparse(url).netloc or url
|
||||||
|
result = await self._call_ahrefs(
|
||||||
|
"site-explorer-domain-rating",
|
||||||
|
{"target": target},
|
||||||
|
)
|
||||||
|
data = result.get("data", result) if isinstance(result, dict) else {}
|
||||||
|
return data.get("domain_rating", 0.0)
|
||||||
|
|
||||||
|
async def get_domain_metrics(self, url: str) -> dict[str, Any]:
|
||||||
|
"""Fetch comprehensive domain metrics."""
|
||||||
|
target = urlparse(url).netloc or url
|
||||||
|
result = await self._call_ahrefs(
|
||||||
|
"site-explorer-backlinks-stats",
|
||||||
|
{"target": target, "mode": "domain"},
|
||||||
|
)
|
||||||
|
data = result.get("data", result) if isinstance(result, dict) else {}
|
||||||
|
return {
|
||||||
|
"total_backlinks": data.get("live", 0),
|
||||||
|
"referring_domains": data.get("live_refdomains", 0),
|
||||||
|
"dofollow": data.get("live_dofollow", 0),
|
||||||
|
}
|
||||||
|
|
||||||
|
def find_gaps(
|
||||||
|
self,
|
||||||
|
target_domains: set[str],
|
||||||
|
competitor_domain_maps: dict[str, set[str]],
|
||||||
|
) -> list[dict[str, Any]]:
|
||||||
|
"""
|
||||||
|
Find domains linking to competitors but not to the target.
|
||||||
|
|
||||||
|
Returns a list of gap domains with metadata about which
|
||||||
|
competitors they link to.
|
||||||
|
"""
|
||||||
|
# Collect all competitor referring domains
|
||||||
|
all_competitor_domains: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
for comp_name, comp_domains in competitor_domain_maps.items():
|
||||||
|
for domain in comp_domains:
|
||||||
|
domain_lower = domain.lower()
|
||||||
|
if domain_lower not in all_competitor_domains:
|
||||||
|
all_competitor_domains[domain_lower] = []
|
||||||
|
all_competitor_domains[domain_lower].append(comp_name)
|
||||||
|
|
||||||
|
# Find gaps: in competitor set but not in target set
|
||||||
|
target_set_lower = {d.lower() for d in target_domains}
|
||||||
|
gaps = []
|
||||||
|
|
||||||
|
for domain, linked_comps in all_competitor_domains.items():
|
||||||
|
if domain not in target_set_lower:
|
||||||
|
gaps.append({
|
||||||
|
"domain": domain,
|
||||||
|
"linked_competitors": linked_comps,
|
||||||
|
"competitor_count": len(set(linked_comps)),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Sort by number of competitors linking (more = higher priority)
|
||||||
|
gaps.sort(key=lambda g: g["competitor_count"], reverse=True)
|
||||||
|
return gaps
|
||||||
|
|
||||||
|
def score_opportunities(
|
||||||
|
self,
|
||||||
|
gaps: list[dict[str, Any]],
|
||||||
|
refdomains_data: dict[str, list[dict[str, Any]]],
|
||||||
|
total_competitors: int,
|
||||||
|
) -> list[LinkOpportunity]:
|
||||||
|
"""
|
||||||
|
Score gap opportunities by DR, traffic, relevance, and feasibility.
|
||||||
|
|
||||||
|
Scoring factors:
|
||||||
|
- DR weight: Higher DR = more impactful link
|
||||||
|
- Competitor overlap: More competitors linking = easier to acquire
|
||||||
|
- Category bonus: Editorial/news links valued higher
|
||||||
|
- Traffic bonus: Higher traffic domains valued more
|
||||||
|
"""
|
||||||
|
# Build a lookup of domain metadata from competitor refdomains
|
||||||
|
domain_metadata: dict[str, dict[str, Any]] = {}
|
||||||
|
for comp_url, domains in refdomains_data.items():
|
||||||
|
for rd in domains:
|
||||||
|
d = rd.get("domain", rd.get("domain_from", "")).lower()
|
||||||
|
if d and d not in domain_metadata:
|
||||||
|
domain_metadata[d] = {
|
||||||
|
"dr": rd.get("domain_rating", rd.get("dr", 0)),
|
||||||
|
"traffic": rd.get("organic_traffic", rd.get("traffic", 0)),
|
||||||
|
"backlinks": rd.get("backlinks", 0),
|
||||||
|
"country": rd.get("country", ""),
|
||||||
|
}
|
||||||
|
|
||||||
|
opportunities = []
|
||||||
|
|
||||||
|
for gap in gaps:
|
||||||
|
domain = gap["domain"]
|
||||||
|
meta = domain_metadata.get(domain, {})
|
||||||
|
|
||||||
|
dr = meta.get("dr", 0)
|
||||||
|
traffic = meta.get("traffic", 0)
|
||||||
|
comp_count = gap["competitor_count"]
|
||||||
|
|
||||||
|
# Category detection
|
||||||
|
category = self._detect_category(domain)
|
||||||
|
|
||||||
|
# Feasibility score (0-100)
|
||||||
|
# Higher if: more competitors link (social proof), blog/forum (easier outreach)
|
||||||
|
feasibility = min(100, (
|
||||||
|
(comp_count / max(total_competitors, 1)) * 40 # Competitor overlap
|
||||||
|
+ (30 if category in ("blog", "forum", "directory") else 10) # Category ease
|
||||||
|
+ (20 if dr < 60 else 5) # Lower DR = easier to get link from
|
||||||
|
+ (10 if traffic > 0 else 0) # Active site bonus
|
||||||
|
))
|
||||||
|
|
||||||
|
# Impact score (0-100)
|
||||||
|
# Higher if: high DR, high traffic, editorial/news
|
||||||
|
impact = min(100, (
|
||||||
|
min(dr, 100) * 0.4 # DR weight (40%)
|
||||||
|
+ min(traffic / 1000, 30) # Traffic weight (up to 30)
|
||||||
|
+ (20 if category in ("news", "edu_gov") else 5) # Authority bonus
|
||||||
|
+ (comp_count / max(total_competitors, 1)) * 10 # Validation
|
||||||
|
))
|
||||||
|
|
||||||
|
# Overall score = weighted average
|
||||||
|
overall = round(feasibility * 0.4 + impact * 0.6, 1)
|
||||||
|
|
||||||
|
opp = LinkOpportunity(
|
||||||
|
domain=domain,
|
||||||
|
dr=dr,
|
||||||
|
traffic=traffic,
|
||||||
|
linked_competitors=gap["linked_competitors"],
|
||||||
|
competitor_count=comp_count,
|
||||||
|
not_linked_target=True,
|
||||||
|
category=category,
|
||||||
|
feasibility_score=round(feasibility, 1),
|
||||||
|
impact_score=round(impact, 1),
|
||||||
|
overall_score=overall,
|
||||||
|
backlinks_to_competitors=meta.get("backlinks", 0),
|
||||||
|
country=meta.get("country", ""),
|
||||||
|
)
|
||||||
|
opportunities.append(opp)
|
||||||
|
|
||||||
|
# Sort by overall score descending
|
||||||
|
opportunities.sort(key=lambda o: o.overall_score, reverse=True)
|
||||||
|
return opportunities
|
||||||
|
|
||||||
|
def categorize_sources(
|
||||||
|
self, opportunities: list[LinkOpportunity]
|
||||||
|
) -> dict[str, list[LinkOpportunity]]:
|
||||||
|
"""Group opportunities by source category."""
|
||||||
|
categorized: dict[str, list[LinkOpportunity]] = {}
|
||||||
|
for opp in opportunities:
|
||||||
|
cat = opp.category
|
||||||
|
if cat not in categorized:
|
||||||
|
categorized[cat] = []
|
||||||
|
categorized[cat].append(opp)
|
||||||
|
return categorized
|
||||||
|
|
||||||
|
# -- Orchestration -------------------------------------------------------
|
||||||
|
|
||||||
|
async def analyze(
|
||||||
|
self,
|
||||||
|
target_url: str,
|
||||||
|
competitor_urls: list[str],
|
||||||
|
min_dr: float = 0,
|
||||||
|
country_filter: str = "",
|
||||||
|
limit: int = 1000,
|
||||||
|
) -> LinkGapResult:
|
||||||
|
"""Orchestrate full link gap analysis."""
|
||||||
|
target_domain = urlparse(target_url).netloc or target_url
|
||||||
|
comp_domains = [urlparse(c).netloc or c for c in competitor_urls]
|
||||||
|
|
||||||
|
logger.info(f"Starting link gap analysis: {target_domain} vs {comp_domains}")
|
||||||
|
|
||||||
|
result = LinkGapResult(
|
||||||
|
target_url=target_url,
|
||||||
|
target_domain=target_domain,
|
||||||
|
competitor_urls=competitor_urls,
|
||||||
|
competitor_domains=comp_domains,
|
||||||
|
timestamp=datetime.now().isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Phase 1: Fetch target DR and referring domains
|
||||||
|
logger.info("Phase 1: Fetching target data...")
|
||||||
|
target_dr_task = self.get_domain_rating(target_url)
|
||||||
|
target_rd_task = self.get_referring_domains(target_url, limit=limit)
|
||||||
|
|
||||||
|
target_dr, target_refdomains = await asyncio.gather(
|
||||||
|
target_dr_task, target_rd_task, return_exceptions=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
result.target_dr = target_dr if isinstance(target_dr, (int, float)) else 0
|
||||||
|
target_rd_list = target_refdomains if isinstance(target_refdomains, list) else []
|
||||||
|
target_domain_set = {
|
||||||
|
rd.get("domain", rd.get("domain_from", "")).lower()
|
||||||
|
for rd in target_rd_list
|
||||||
|
if rd.get("domain", rd.get("domain_from", ""))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Phase 2: Fetch competitor referring domains (parallel)
|
||||||
|
logger.info("Phase 2: Fetching competitor data...")
|
||||||
|
comp_rd_tasks = {
|
||||||
|
comp_url: self.get_referring_domains(comp_url, limit=limit)
|
||||||
|
for comp_url in competitor_urls
|
||||||
|
}
|
||||||
|
comp_results = {}
|
||||||
|
for comp_url, task in comp_rd_tasks.items():
|
||||||
|
try:
|
||||||
|
comp_rd = await task
|
||||||
|
comp_results[comp_url] = comp_rd if isinstance(comp_rd, list) else []
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to fetch refdomains for {comp_url}: {e}")
|
||||||
|
comp_results[comp_url] = []
|
||||||
|
|
||||||
|
# Build competitor domain maps
|
||||||
|
competitor_domain_maps: dict[str, set[str]] = {}
|
||||||
|
for comp_url, rd_list in comp_results.items():
|
||||||
|
comp_domain = urlparse(comp_url).netloc or comp_url
|
||||||
|
competitor_domain_maps[comp_domain] = {
|
||||||
|
rd.get("domain", rd.get("domain_from", "")).lower()
|
||||||
|
for rd in rd_list
|
||||||
|
if rd.get("domain", rd.get("domain_from", ""))
|
||||||
|
}
|
||||||
|
|
||||||
|
# Phase 3: Find gaps
|
||||||
|
logger.info("Phase 3: Finding link gaps...")
|
||||||
|
raw_gaps = self.find_gaps(target_domain_set, competitor_domain_maps)
|
||||||
|
logger.info(f"Found {len(raw_gaps)} gap domains")
|
||||||
|
|
||||||
|
# Phase 4: Score opportunities
|
||||||
|
logger.info("Phase 4: Scoring opportunities...")
|
||||||
|
opportunities = self.score_opportunities(
|
||||||
|
raw_gaps, comp_results, len(competitor_urls)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Apply filters
|
||||||
|
if min_dr > 0:
|
||||||
|
opportunities = [o for o in opportunities if o.dr >= min_dr]
|
||||||
|
|
||||||
|
if country_filter:
|
||||||
|
country_lower = country_filter.lower()
|
||||||
|
opportunities = [
|
||||||
|
o for o in opportunities
|
||||||
|
if o.country.lower() == country_lower or not o.country
|
||||||
|
]
|
||||||
|
|
||||||
|
result.opportunities = opportunities
|
||||||
|
result.top_opportunities = opportunities[:50]
|
||||||
|
|
||||||
|
# Phase 5: Build summary
|
||||||
|
logger.info("Phase 5: Building summary...")
|
||||||
|
result.summary = self._build_summary(
|
||||||
|
opportunities, comp_results, len(target_rd_list)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Phase 6: Generate recommendations
|
||||||
|
self._generate_issues(result)
|
||||||
|
self._generate_recommendations(result)
|
||||||
|
|
||||||
|
logger.info(f"Link gap analysis complete: {len(opportunities)} opportunities found")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# -- Helpers -------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _detect_category(domain: str) -> str:
|
||||||
|
"""Detect the category of a domain based on patterns."""
|
||||||
|
domain_lower = domain.lower()
|
||||||
|
|
||||||
|
for category, patterns in SOURCE_CATEGORY_PATTERNS.items():
|
||||||
|
for pattern in patterns:
|
||||||
|
if pattern in domain_lower:
|
||||||
|
return category
|
||||||
|
|
||||||
|
# Fallback heuristics
|
||||||
|
if domain_lower.endswith((".edu", ".ac.kr", ".gov", ".go.kr")):
|
||||||
|
return "edu_gov"
|
||||||
|
|
||||||
|
return "other"
|
||||||
|
|
||||||
|
def _build_summary(
|
||||||
|
self,
|
||||||
|
opportunities: list[LinkOpportunity],
|
||||||
|
comp_results: dict[str, list],
|
||||||
|
target_rd_count: int,
|
||||||
|
) -> GapSummary:
|
||||||
|
"""Build summary statistics from opportunities."""
|
||||||
|
summary = GapSummary()
|
||||||
|
summary.total_opportunities = len(opportunities)
|
||||||
|
summary.target_refdomains_count = target_rd_count
|
||||||
|
|
||||||
|
if opportunities:
|
||||||
|
dr_values = [o.dr for o in opportunities if o.dr > 0]
|
||||||
|
summary.avg_dr = round(sum(dr_values) / max(len(dr_values), 1), 1)
|
||||||
|
summary.high_dr_count = sum(1 for o in opportunities if o.dr >= 50)
|
||||||
|
|
||||||
|
# Category breakdown
|
||||||
|
cat_counts: dict[str, int] = {}
|
||||||
|
country_counts: dict[str, int] = {}
|
||||||
|
for opp in opportunities:
|
||||||
|
cat_counts[opp.category] = cat_counts.get(opp.category, 0) + 1
|
||||||
|
if opp.country:
|
||||||
|
country_counts[opp.country] = country_counts.get(opp.country, 0) + 1
|
||||||
|
|
||||||
|
summary.category_breakdown = dict(
|
||||||
|
sorted(cat_counts.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
)
|
||||||
|
summary.top_countries = sorted(
|
||||||
|
[{"country": k, "count": v} for k, v in country_counts.items()],
|
||||||
|
key=lambda x: x["count"], reverse=True,
|
||||||
|
)[:10]
|
||||||
|
|
||||||
|
# Competitor refdomains counts
|
||||||
|
for comp_url, rd_list in comp_results.items():
|
||||||
|
comp_domain = urlparse(comp_url).netloc or comp_url
|
||||||
|
summary.total_competitor_refdomains[comp_domain] = len(rd_list)
|
||||||
|
|
||||||
|
return summary
|
||||||
|
|
||||||
|
def _generate_issues(self, result: LinkGapResult) -> None:
|
||||||
|
"""Generate issues based on gap analysis."""
|
||||||
|
issues = []
|
||||||
|
|
||||||
|
if result.summary:
|
||||||
|
# Large gap warning
|
||||||
|
if result.summary.total_opportunities > 500:
|
||||||
|
issues.append({
|
||||||
|
"type": "warning",
|
||||||
|
"category": "link_gap",
|
||||||
|
"message": (
|
||||||
|
f"Large link gap: {result.summary.total_opportunities} domains "
|
||||||
|
"link to competitors but not to you"
|
||||||
|
),
|
||||||
|
})
|
||||||
|
|
||||||
|
# High-DR gap
|
||||||
|
if result.summary.high_dr_count > 50:
|
||||||
|
issues.append({
|
||||||
|
"type": "error",
|
||||||
|
"category": "authority_gap",
|
||||||
|
"message": (
|
||||||
|
f"{result.summary.high_dr_count} high-authority domains (DR 50+) "
|
||||||
|
"link to competitors but not to you"
|
||||||
|
),
|
||||||
|
})
|
||||||
|
|
||||||
|
# Category-specific gaps
|
||||||
|
news_gap = result.summary.category_breakdown.get("news", 0)
|
||||||
|
if news_gap > 20:
|
||||||
|
issues.append({
|
||||||
|
"type": "warning",
|
||||||
|
"category": "pr_gap",
|
||||||
|
"message": f"{news_gap} news/media domains link to competitors - consider digital PR",
|
||||||
|
})
|
||||||
|
|
||||||
|
edu_gap = result.summary.category_breakdown.get("edu_gov", 0)
|
||||||
|
if edu_gap > 5:
|
||||||
|
issues.append({
|
||||||
|
"type": "info",
|
||||||
|
"category": "edu_gov_gap",
|
||||||
|
"message": f"{edu_gap} .edu/.gov domains link to competitors - high-authority opportunity",
|
||||||
|
})
|
||||||
|
|
||||||
|
result.issues = issues
|
||||||
|
|
||||||
|
def _generate_recommendations(self, result: LinkGapResult) -> None:
|
||||||
|
"""Generate actionable recommendations."""
|
||||||
|
recs = []
|
||||||
|
|
||||||
|
if not result.opportunities:
|
||||||
|
recs.append("No significant link gaps found. Consider expanding competitor list.")
|
||||||
|
result.recommendations = recs
|
||||||
|
return
|
||||||
|
|
||||||
|
# Top opportunities by category
|
||||||
|
categorized = self.categorize_sources(result.top_opportunities[:100])
|
||||||
|
|
||||||
|
if "news" in categorized:
|
||||||
|
news_count = len(categorized["news"])
|
||||||
|
top_news = [o.domain for o in categorized["news"][:3]]
|
||||||
|
recs.append(
|
||||||
|
f"Pursue {news_count} news/media link opportunities. "
|
||||||
|
f"Top targets: {', '.join(top_news)}. "
|
||||||
|
"Strategy: create newsworthy content, press releases, expert commentary."
|
||||||
|
)
|
||||||
|
|
||||||
|
if "blog" in categorized:
|
||||||
|
blog_count = len(categorized["blog"])
|
||||||
|
recs.append(
|
||||||
|
f"Target {blog_count} blog/content site opportunities via guest posting, "
|
||||||
|
"collaborative content, and expert interviews."
|
||||||
|
)
|
||||||
|
|
||||||
|
if "directory" in categorized:
|
||||||
|
dir_count = len(categorized["directory"])
|
||||||
|
recs.append(
|
||||||
|
f"Submit to {dir_count} relevant directories and listing sites. "
|
||||||
|
"Low effort, moderate impact for local SEO signals."
|
||||||
|
)
|
||||||
|
|
||||||
|
if "forum" in categorized:
|
||||||
|
forum_count = len(categorized["forum"])
|
||||||
|
recs.append(
|
||||||
|
f"Engage in {forum_count} forum/community sites with helpful answers "
|
||||||
|
"and resource sharing. Build presence before linking."
|
||||||
|
)
|
||||||
|
|
||||||
|
if "korean_platform" in categorized:
|
||||||
|
kr_count = len(categorized["korean_platform"])
|
||||||
|
recs.append(
|
||||||
|
f"Build presence on {kr_count} Korean platforms (Naver, Tistory, Brunch). "
|
||||||
|
"Critical for Korean SERP visibility."
|
||||||
|
)
|
||||||
|
|
||||||
|
if "edu_gov" in categorized:
|
||||||
|
eg_count = len(categorized["edu_gov"])
|
||||||
|
recs.append(
|
||||||
|
f"Target {eg_count} .edu/.gov link opportunities through scholarship "
|
||||||
|
"programs, research partnerships, or government resource contributions."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Multi-competitor overlap
|
||||||
|
multi_comp = [o for o in result.top_opportunities if o.competitor_count >= 2]
|
||||||
|
if multi_comp:
|
||||||
|
recs.append(
|
||||||
|
f"{len(multi_comp)} domains link to multiple competitors but not to you. "
|
||||||
|
"These are high-priority targets as they validate industry relevance."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Quick wins: high feasibility, moderate impact
|
||||||
|
quick_wins = [
|
||||||
|
o for o in result.opportunities[:100]
|
||||||
|
if o.feasibility_score >= 60 and o.impact_score >= 30
|
||||||
|
]
|
||||||
|
if quick_wins:
|
||||||
|
recs.append(
|
||||||
|
f"Prioritize {len(quick_wins)} quick-win opportunities with high "
|
||||||
|
"feasibility and moderate impact for fastest link acquisition."
|
||||||
|
)
|
||||||
|
|
||||||
|
result.recommendations = recs
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Output Formatting
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def format_rich_output(result: LinkGapResult) -> None:
|
||||||
|
"""Display gap analysis results using Rich tables."""
|
||||||
|
console.print(f"\n[bold cyan]Link Gap Analysis: {result.target_domain}[/bold cyan]")
|
||||||
|
console.print(f"[dim]vs {', '.join(result.competitor_domains)}[/dim]")
|
||||||
|
console.print(f"[dim]Timestamp: {result.timestamp}[/dim]\n")
|
||||||
|
|
||||||
|
# Summary
|
||||||
|
if result.summary:
|
||||||
|
summary_table = Table(title="Summary", show_header=True, header_style="bold magenta")
|
||||||
|
summary_table.add_column("Metric", style="cyan")
|
||||||
|
summary_table.add_column("Value", style="green")
|
||||||
|
summary_table.add_row("Target DR", str(result.target_dr))
|
||||||
|
summary_table.add_row("Target Referring Domains", str(result.summary.target_refdomains_count))
|
||||||
|
summary_table.add_row("Total Gap Opportunities", str(result.summary.total_opportunities))
|
||||||
|
summary_table.add_row("Avg Opportunity DR", str(result.summary.avg_dr))
|
||||||
|
summary_table.add_row("High-DR Opportunities (50+)", str(result.summary.high_dr_count))
|
||||||
|
|
||||||
|
for comp, count in result.summary.total_competitor_refdomains.items():
|
||||||
|
summary_table.add_row(f" {comp} Refdomains", str(count))
|
||||||
|
|
||||||
|
console.print(summary_table)
|
||||||
|
|
||||||
|
# Category breakdown
|
||||||
|
if result.summary and result.summary.category_breakdown:
|
||||||
|
cat_table = Table(title="\nCategory Breakdown", show_header=True, header_style="bold magenta")
|
||||||
|
cat_table.add_column("Category", style="cyan")
|
||||||
|
cat_table.add_column("Count", style="green")
|
||||||
|
for cat, count in result.summary.category_breakdown.items():
|
||||||
|
cat_table.add_row(cat, str(count))
|
||||||
|
console.print(cat_table)
|
||||||
|
|
||||||
|
# Top opportunities
|
||||||
|
if result.top_opportunities:
|
||||||
|
opp_table = Table(
|
||||||
|
title=f"\nTop Opportunities (showing {min(25, len(result.top_opportunities))})",
|
||||||
|
show_header=True,
|
||||||
|
header_style="bold magenta",
|
||||||
|
)
|
||||||
|
opp_table.add_column("Domain", style="cyan", max_width=35)
|
||||||
|
opp_table.add_column("DR", style="green", justify="right")
|
||||||
|
opp_table.add_column("Category", style="yellow")
|
||||||
|
opp_table.add_column("Comps", justify="right")
|
||||||
|
opp_table.add_column("Score", style="bold green", justify="right")
|
||||||
|
opp_table.add_column("Feasibility", justify="right")
|
||||||
|
opp_table.add_column("Impact", justify="right")
|
||||||
|
|
||||||
|
for opp in result.top_opportunities[:25]:
|
||||||
|
opp_table.add_row(
|
||||||
|
opp.domain[:35],
|
||||||
|
str(int(opp.dr)),
|
||||||
|
opp.category,
|
||||||
|
str(opp.competitor_count),
|
||||||
|
f"{opp.overall_score:.1f}",
|
||||||
|
f"{opp.feasibility_score:.0f}",
|
||||||
|
f"{opp.impact_score:.0f}",
|
||||||
|
)
|
||||||
|
console.print(opp_table)
|
||||||
|
|
||||||
|
# Issues
|
||||||
|
if result.issues:
|
||||||
|
console.print("\n[bold red]Issues:[/bold red]")
|
||||||
|
for issue in result.issues:
|
||||||
|
icon_map = {"error": "[red]ERROR[/red]", "warning": "[yellow]WARN[/yellow]", "info": "[blue]INFO[/blue]"}
|
||||||
|
icon = icon_map.get(issue["type"], "[dim]INFO[/dim]")
|
||||||
|
console.print(f" {icon} [{issue['category']}] {issue['message']}")
|
||||||
|
|
||||||
|
# Recommendations
|
||||||
|
if result.recommendations:
|
||||||
|
console.print("\n[bold green]Recommendations:[/bold green]")
|
||||||
|
for i, rec in enumerate(result.recommendations, 1):
|
||||||
|
console.print(f" {i}. {rec}")
|
||||||
|
|
||||||
|
console.print()
|
||||||
|
|
||||||
|
|
||||||
|
def result_to_dict(result: LinkGapResult) -> dict[str, Any]:
|
||||||
|
"""Convert gap result to JSON-serializable dict."""
|
||||||
|
return {
|
||||||
|
"target_url": result.target_url,
|
||||||
|
"target_domain": result.target_domain,
|
||||||
|
"target_dr": result.target_dr,
|
||||||
|
"competitor_urls": result.competitor_urls,
|
||||||
|
"competitor_domains": result.competitor_domains,
|
||||||
|
"summary": asdict(result.summary) if result.summary else None,
|
||||||
|
"opportunities": [asdict(o) for o in result.opportunities],
|
||||||
|
"top_opportunities": [asdict(o) for o in result.top_opportunities],
|
||||||
|
"issues": result.issues,
|
||||||
|
"recommendations": result.recommendations,
|
||||||
|
"timestamp": result.timestamp,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
"""Parse command-line arguments."""
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Link Gap Finder - Identify link building opportunities vs competitors",
|
||||||
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||||
|
epilog="""
|
||||||
|
Examples:
|
||||||
|
python link_gap_finder.py --target https://example.com --competitor https://comp1.com --json
|
||||||
|
python link_gap_finder.py --target https://example.com --competitor https://comp1.com --competitor https://comp2.com --min-dr 30 --json
|
||||||
|
python link_gap_finder.py --target https://example.com --competitor https://comp1.com --country kr --output gap_report.json
|
||||||
|
""",
|
||||||
|
)
|
||||||
|
parser.add_argument("--target", required=True, help="Target URL or domain")
|
||||||
|
parser.add_argument(
|
||||||
|
"--competitor", action="append", required=True,
|
||||||
|
help="Competitor URL or domain (can be repeated)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--min-dr", type=float, default=0,
|
||||||
|
help="Minimum DR filter for opportunities (default: 0)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--country", default="",
|
||||||
|
help="Filter by country code (e.g., kr, us, jp)",
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--limit", type=int, default=1000,
|
||||||
|
help="Max referring domains to fetch per site (default: 1000)",
|
||||||
|
)
|
||||||
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||||
|
parser.add_argument("--output", "-o", help="Save output to file")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
"""Main entry point."""
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
finder = LinkGapFinder()
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await finder.analyze(
|
||||||
|
target_url=args.target,
|
||||||
|
competitor_urls=args.competitor,
|
||||||
|
min_dr=args.min_dr,
|
||||||
|
country_filter=args.country,
|
||||||
|
limit=args.limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.json or args.output:
|
||||||
|
output_data = result_to_dict(result)
|
||||||
|
json_str = json.dumps(output_data, indent=2, ensure_ascii=False)
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(json_str)
|
||||||
|
logger.info(f"Report saved to {args.output}")
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
print(json_str)
|
||||||
|
else:
|
||||||
|
format_rich_output(result)
|
||||||
|
|
||||||
|
finder.print_stats()
|
||||||
|
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
logger.warning("Analysis interrupted by user")
|
||||||
|
sys.exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Analysis failed: {e}")
|
||||||
|
if args.json:
|
||||||
|
print(json.dumps({"error": str(e)}, indent=2))
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -0,0 +1,8 @@
|
|||||||
|
# 22-seo-link-building dependencies
|
||||||
|
requests>=2.31.0
|
||||||
|
aiohttp>=3.9.0
|
||||||
|
pandas>=2.1.0
|
||||||
|
tenacity>=8.2.0
|
||||||
|
tqdm>=4.66.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
rich>=13.7.0
|
||||||
109
custom-skills/22-seo-link-building/desktop/SKILL.md
Normal file
109
custom-skills/22-seo-link-building/desktop/SKILL.md
Normal file
@@ -0,0 +1,109 @@
|
|||||||
|
---
|
||||||
|
name: seo-link-building
|
||||||
|
description: |
|
||||||
|
Link building diagnosis and backlink analysis tool.
|
||||||
|
Triggers: backlink audit, link building, referring domains, toxic links, link gap, broken backlinks, 백링크 분석, 링크빌딩.
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Link Building Diagnosis
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Analyze backlink profiles, detect toxic links, find competitor link gaps, track link velocity, and map Korean platform links. Provides actionable link building recommendations.
|
||||||
|
|
||||||
|
## Core Capabilities
|
||||||
|
|
||||||
|
1. **Backlink Profile Audit** - DR, referring domains, dofollow ratio, anchor distribution
|
||||||
|
2. **Toxic Link Detection** - PBN patterns, spam domains, link farm identification
|
||||||
|
3. **Competitor Link Gap Analysis** - Domains linking to competitors but not target
|
||||||
|
4. **Link Velocity Tracking** - New/lost referring domains over time
|
||||||
|
5. **Broken Backlink Recovery** - Find and reclaim broken high-DR backlinks
|
||||||
|
6. **Korean Platform Mapping** - Naver Blog, Cafe, Tistory, Brunch, Korean news
|
||||||
|
|
||||||
|
## MCP Tool Usage
|
||||||
|
|
||||||
|
### SEO Data
|
||||||
|
```
|
||||||
|
our-seo-agent CLI: Primary backlink data source (future); use --input for pre-fetched JSON
|
||||||
|
WebSearch / WebFetch: Supplementary backlink data
|
||||||
|
```
|
||||||
|
|
||||||
|
### Notion for Report Storage
|
||||||
|
```
|
||||||
|
mcp__notion__notion-create-pages: Save audit report to SEO Audit Log
|
||||||
|
mcp__notion__notion-update-page: Update existing audit entries
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflow
|
||||||
|
|
||||||
|
### 1. Backlink Profile Audit
|
||||||
|
1. Fetch Domain Rating via `site-explorer-domain-rating`
|
||||||
|
2. Get backlink stats via `site-explorer-backlinks-stats`
|
||||||
|
3. Retrieve referring domains via `site-explorer-referring-domains`
|
||||||
|
4. Analyze anchor distribution via `site-explorer-anchors`
|
||||||
|
5. Detect toxic links (PBN patterns, spam keywords, suspicious TLDs)
|
||||||
|
6. Map Korean platform links from referring domains
|
||||||
|
7. Report with issues and recommendations
|
||||||
|
|
||||||
|
### 2. Link Gap Analysis
|
||||||
|
1. Fetch target referring domains
|
||||||
|
2. Fetch competitor referring domains (parallel)
|
||||||
|
3. Compute set difference (competitor - target)
|
||||||
|
4. Score opportunities by DR, traffic, category
|
||||||
|
5. Categorize sources (news, blog, forum, directory, Korean platform)
|
||||||
|
6. Rank by feasibility and impact
|
||||||
|
7. Report top opportunities with recommendations
|
||||||
|
|
||||||
|
### 3. Link Velocity Check
|
||||||
|
1. Fetch refdomains-history for last 90 days
|
||||||
|
2. Calculate new/lost referring domains per period
|
||||||
|
3. Determine velocity trend (growing/stable/declining)
|
||||||
|
4. Flag declining velocity as issue
|
||||||
|
|
||||||
|
### 4. Broken Backlink Recovery
|
||||||
|
1. Fetch broken backlinks via `site-explorer-broken-backlinks`
|
||||||
|
2. Sort by DR (highest value first)
|
||||||
|
3. Recommend 301 redirects or content recreation
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## Link Building Audit: [domain]
|
||||||
|
|
||||||
|
### Overview
|
||||||
|
- Domain Rating: [DR]
|
||||||
|
- Referring Domains: [count]
|
||||||
|
- Dofollow Ratio: [ratio]
|
||||||
|
- Toxic Links: [count] ([risk level])
|
||||||
|
|
||||||
|
### Anchor Distribution
|
||||||
|
| Type | Count | % |
|
||||||
|
|------|-------|---|
|
||||||
|
| Branded | [n] | [%] |
|
||||||
|
| Exact Match | [n] | [%] |
|
||||||
|
| Generic | [n] | [%] |
|
||||||
|
| Naked URL | [n] | [%] |
|
||||||
|
|
||||||
|
### Toxic Links (Top 10)
|
||||||
|
| Domain | Risk Score | Reason |
|
||||||
|
|--------|-----------|--------|
|
||||||
|
|
||||||
|
### Korean Platform Links
|
||||||
|
| Platform | Count |
|
||||||
|
|----------|-------|
|
||||||
|
|
||||||
|
### Link Velocity
|
||||||
|
| Period | New | Lost |
|
||||||
|
|--------|-----|------|
|
||||||
|
|
||||||
|
### Recommendations
|
||||||
|
1. [Priority actions]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
All audit reports MUST be saved to OurDigital SEO Audit Log:
|
||||||
|
- **Database ID**: `2c8581e5-8a1e-8035-880b-e38cefc2f3ef`
|
||||||
|
- **Properties**: Issue (title), Site (url), Category (Link Building), Priority, Found Date, Audit ID
|
||||||
|
- **Language**: Korean with English technical terms
|
||||||
|
- **Audit ID Format**: LINK-YYYYMMDD-NNN
|
||||||
8
custom-skills/22-seo-link-building/desktop/skill.yaml
Normal file
8
custom-skills/22-seo-link-building/desktop/skill.yaml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
name: seo-link-building
|
||||||
|
description: |
|
||||||
|
Link building diagnosis and backlink analysis. Triggers: backlink audit, link building, referring domains, toxic links, link gap, broken backlinks.
|
||||||
|
allowed-tools:
|
||||||
|
- mcp__ahrefs__*
|
||||||
|
- mcp__notion__*
|
||||||
|
- WebSearch
|
||||||
|
- WebFetch
|
||||||
70
custom-skills/22-seo-link-building/desktop/tools/ahrefs.md
Normal file
70
custom-skills/22-seo-link-building/desktop/tools/ahrefs.md
Normal file
@@ -0,0 +1,70 @@
|
|||||||
|
# Ahrefs
|
||||||
|
|
||||||
|
## Tools Used
|
||||||
|
|
||||||
|
### site-explorer-all-backlinks
|
||||||
|
- **Purpose**: Get all backlinks for a target domain
|
||||||
|
- **Parameters**: target, mode (domain/prefix/url), limit, order_by
|
||||||
|
- **Returns**: List of backlinks with source URL, domain, DR, anchor, dofollow status
|
||||||
|
|
||||||
|
### site-explorer-backlinks-stats
|
||||||
|
- **Purpose**: Backlink statistics overview
|
||||||
|
- **Parameters**: target, mode
|
||||||
|
- **Returns**: Total backlinks, referring domains, dofollow/nofollow counts
|
||||||
|
|
||||||
|
### site-explorer-referring-domains
|
||||||
|
- **Purpose**: List all referring domains
|
||||||
|
- **Parameters**: target, mode, limit, order_by
|
||||||
|
- **Returns**: Domains with DR, backlinks count, traffic, country
|
||||||
|
|
||||||
|
### site-explorer-anchors
|
||||||
|
- **Purpose**: Anchor text distribution
|
||||||
|
- **Parameters**: target, mode, limit, order_by
|
||||||
|
- **Returns**: Anchor texts with backlink and referring domain counts
|
||||||
|
|
||||||
|
### site-explorer-broken-backlinks
|
||||||
|
- **Purpose**: Find broken backlinks for recovery
|
||||||
|
- **Parameters**: target, mode, limit, order_by
|
||||||
|
- **Returns**: Broken links with source, target URL, HTTP code, DR
|
||||||
|
|
||||||
|
### site-explorer-domain-rating
|
||||||
|
- **Purpose**: Get Domain Rating for a target
|
||||||
|
- **Parameters**: target
|
||||||
|
- **Returns**: Domain Rating value and Ahrefs rank
|
||||||
|
|
||||||
|
### site-explorer-domain-rating-history
|
||||||
|
- **Purpose**: DR trend over time
|
||||||
|
- **Parameters**: target, date_from
|
||||||
|
- **Returns**: Historical DR data points
|
||||||
|
|
||||||
|
### site-explorer-refdomains-history
|
||||||
|
- **Purpose**: Referring domains trend over time
|
||||||
|
- **Parameters**: target, mode, date_from
|
||||||
|
- **Returns**: Historical referring domain counts
|
||||||
|
|
||||||
|
### site-explorer-linked-domains
|
||||||
|
- **Purpose**: Domains linked from the target
|
||||||
|
- **Parameters**: target, mode, limit
|
||||||
|
- **Returns**: Outgoing linked domains with counts
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- Ahrefs MCP tools are available via `mcp__ahrefs__*` prefix
|
||||||
|
- No API key needed when using MCP (handled by tool server)
|
||||||
|
- Rate limits: Follow Ahrefs plan limits (typically 500 rows/request)
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
```
|
||||||
|
# Get backlink stats
|
||||||
|
mcp__ahrefs__site-explorer-backlinks-stats(target="example.com", mode="domain")
|
||||||
|
|
||||||
|
# Get referring domains sorted by DR
|
||||||
|
mcp__ahrefs__site-explorer-referring-domains(target="example.com", mode="domain", limit=500, order_by="domain_rating:desc")
|
||||||
|
|
||||||
|
# Get anchor text distribution
|
||||||
|
mcp__ahrefs__site-explorer-anchors(target="example.com", mode="domain", limit=200)
|
||||||
|
|
||||||
|
# Find broken backlinks
|
||||||
|
mcp__ahrefs__site-explorer-broken-backlinks(target="example.com", mode="domain", limit=100)
|
||||||
|
```
|
||||||
39
custom-skills/22-seo-link-building/desktop/tools/notion.md
Normal file
39
custom-skills/22-seo-link-building/desktop/tools/notion.md
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# Notion
|
||||||
|
|
||||||
|
## Tools Used
|
||||||
|
|
||||||
|
### notion-create-pages
|
||||||
|
- **Purpose**: Save link building audit reports to SEO Audit Log
|
||||||
|
- **Database ID**: `2c8581e5-8a1e-8035-880b-e38cefc2f3ef`
|
||||||
|
- **Required Properties**:
|
||||||
|
- Issue (title): Report title in Korean with date
|
||||||
|
- Site (url): Audited website URL
|
||||||
|
- Category (select): "Link Building"
|
||||||
|
- Priority (select): Critical / High / Medium / Low
|
||||||
|
- Found Date (date): YYYY-MM-DD
|
||||||
|
- Audit ID (rich_text): LINK-YYYYMMDD-NNN
|
||||||
|
|
||||||
|
### notion-update-page
|
||||||
|
- **Purpose**: Update existing audit entries with follow-up findings
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- Notion MCP tools available via `mcp__notion__*` prefix
|
||||||
|
- Authentication handled by MCP tool server
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
```
|
||||||
|
# Create a link building audit report
|
||||||
|
mcp__notion__notion-create-pages(
|
||||||
|
parent={"database_id": "2c8581e5-8a1e-8035-880b-e38cefc2f3ef"},
|
||||||
|
properties={
|
||||||
|
"Issue": {"title": [{"text": {"content": "백링크 프로필 분석 - example.com (2025-01-15)"}}]},
|
||||||
|
"Site": {"url": "https://example.com"},
|
||||||
|
"Category": {"select": {"name": "Link Building"}},
|
||||||
|
"Priority": {"select": {"name": "High"}},
|
||||||
|
"Found Date": {"date": {"start": "2025-01-15"}},
|
||||||
|
"Audit ID": {"rich_text": [{"text": {"content": "LINK-20250115-001"}}]}
|
||||||
|
}
|
||||||
|
)
|
||||||
|
```
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
# WebSearch
|
||||||
|
|
||||||
|
## Tools Used
|
||||||
|
|
||||||
|
### WebSearch
|
||||||
|
- **Purpose**: Research link building strategies, competitor insights, and industry best practices
|
||||||
|
- **Usage**: Supplement Ahrefs data with web research for context
|
||||||
|
|
||||||
|
### WebFetch
|
||||||
|
- **Purpose**: Fetch specific web pages for content analysis and link prospecting
|
||||||
|
- **Usage**: Verify link opportunities, check page content relevance
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
```
|
||||||
|
# Research link building strategies for a niche
|
||||||
|
WebSearch("link building strategies for SaaS companies 2025")
|
||||||
|
|
||||||
|
# Research Korean link building opportunities
|
||||||
|
WebSearch("네이버 블로그 백링크 전략 2025")
|
||||||
|
|
||||||
|
# Check if a target page is relevant for outreach
|
||||||
|
WebFetch("https://example.com/resources", "What topics does this page cover?")
|
||||||
|
```
|
||||||
140
custom-skills/23-seo-content-strategy/code/CLAUDE.md
Normal file
140
custom-skills/23-seo-content-strategy/code/CLAUDE.md
Normal file
@@ -0,0 +1,140 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Content strategy tool for SEO-driven content planning. Performs content inventory via sitemap crawl and our-seo-agent CLI, scores content performance, detects content decay, analyzes topic gaps vs competitors, maps topic clusters, and generates content briefs. Supports Korean content patterns (Naver Blog format, review/후기 content).
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r scripts/requirements.txt
|
||||||
|
|
||||||
|
# Content audit
|
||||||
|
python scripts/content_auditor.py --url https://example.com --json
|
||||||
|
|
||||||
|
# Content gap analysis
|
||||||
|
python scripts/content_gap_analyzer.py --target https://example.com --competitor https://competitor.com --json
|
||||||
|
|
||||||
|
# Generate content brief
|
||||||
|
python scripts/content_brief_generator.py --keyword "치과 임플란트 비용" --url https://example.com --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
| Script | Purpose | Key Output |
|
||||||
|
|--------|---------|------------|
|
||||||
|
| `content_auditor.py` | Content inventory, performance scoring, decay detection | Content inventory with scores and decay flags |
|
||||||
|
| `content_gap_analyzer.py` | Topic gap analysis and cluster mapping vs competitors | Missing topics, cluster map, editorial calendar |
|
||||||
|
| `content_brief_generator.py` | Generate SEO content briefs with outlines | Brief with outline, keywords, word count targets |
|
||||||
|
| `base_client.py` | Shared utilities | RateLimiter, ConfigManager, BaseAsyncClient |
|
||||||
|
|
||||||
|
## Content Auditor
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full content audit
|
||||||
|
python scripts/content_auditor.py --url https://example.com --json
|
||||||
|
|
||||||
|
# Detect decaying content
|
||||||
|
python scripts/content_auditor.py --url https://example.com --decay --json
|
||||||
|
|
||||||
|
# Filter by content type
|
||||||
|
python scripts/content_auditor.py --url https://example.com --type blog --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Content inventory via sitemap crawl + our-seo-agent CLI or pre-fetched data
|
||||||
|
- Performance scoring (traffic, rankings, backlinks)
|
||||||
|
- Content decay detection (pages losing traffic over time)
|
||||||
|
- Content type classification (blog, product, service, landing, resource)
|
||||||
|
- Word count and freshness assessment
|
||||||
|
- Korean content format analysis (Naver Blog style, 후기/review content)
|
||||||
|
|
||||||
|
## Content Gap Analyzer
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Gap analysis vs competitor
|
||||||
|
python scripts/content_gap_analyzer.py --target https://example.com --competitor https://comp1.com --json
|
||||||
|
|
||||||
|
# With topic cluster mapping
|
||||||
|
python scripts/content_gap_analyzer.py --target https://example.com --competitor https://comp1.com --clusters --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Topic gap identification vs competitors
|
||||||
|
- Topic cluster mapping (pillar + cluster pages)
|
||||||
|
- Content freshness comparison
|
||||||
|
- Content volume comparison
|
||||||
|
- Editorial calendar generation with priority scoring
|
||||||
|
- Korean content opportunity detection
|
||||||
|
|
||||||
|
## Content Brief Generator
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Generate brief for keyword
|
||||||
|
python scripts/content_brief_generator.py --keyword "치과 임플란트 비용" --url https://example.com --json
|
||||||
|
|
||||||
|
# With competitor analysis
|
||||||
|
python scripts/content_brief_generator.py --keyword "dental implant cost" --url https://example.com --competitors 5 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Content outline generation with H2/H3 structure
|
||||||
|
- Target keyword list (primary + secondary + LSI)
|
||||||
|
- Word count recommendation based on top-ranking pages
|
||||||
|
- Competitor content analysis (structure, word count, topics covered)
|
||||||
|
- Internal linking suggestions
|
||||||
|
- Korean content format recommendations
|
||||||
|
|
||||||
|
## Data Sources
|
||||||
|
|
||||||
|
| Source | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `our-seo-agent` CLI | Primary data source (future); use `--input` for pre-fetched JSON |
|
||||||
|
| WebSearch / WebFetch | Supplementary live data |
|
||||||
|
| Notion MCP | Save audit report to database |
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"url": "https://example.com",
|
||||||
|
"content_inventory": {
|
||||||
|
"total_pages": 150,
|
||||||
|
"by_type": {"blog": 80, "product": 40, "service": 20, "other": 10},
|
||||||
|
"avg_performance_score": 45
|
||||||
|
},
|
||||||
|
"decaying_content": [...],
|
||||||
|
"top_performers": [...],
|
||||||
|
"gaps": [...],
|
||||||
|
"clusters": [...],
|
||||||
|
"timestamp": "2025-01-01T00:00:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
**IMPORTANT**: All audit reports MUST be saved to the OurDigital SEO Audit Log database.
|
||||||
|
|
||||||
|
### Database Configuration
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| Database ID | `2c8581e5-8a1e-8035-880b-e38cefc2f3ef` |
|
||||||
|
| URL | https://www.notion.so/dintelligence/2c8581e58a1e8035880be38cefc2f3ef |
|
||||||
|
|
||||||
|
### Required Properties
|
||||||
|
|
||||||
|
| Property | Type | Description |
|
||||||
|
|----------|------|-------------|
|
||||||
|
| Issue | Title | Report title (Korean + date) |
|
||||||
|
| Site | URL | Audited website URL |
|
||||||
|
| Category | Select | Content Strategy |
|
||||||
|
| Priority | Select | Based on gap severity |
|
||||||
|
| Found Date | Date | Audit date (YYYY-MM-DD) |
|
||||||
|
| Audit ID | Rich Text | Format: CONTENT-YYYYMMDD-NNN |
|
||||||
|
|
||||||
|
### Language Guidelines
|
||||||
|
|
||||||
|
- Report content in Korean (한국어)
|
||||||
|
- Keep technical English terms as-is
|
||||||
|
- URLs and code remain unchanged
|
||||||
@@ -0,0 +1,207 @@
|
|||||||
|
"""
|
||||||
|
Base Client - Shared async client utilities
|
||||||
|
===========================================
|
||||||
|
Purpose: Rate-limited async operations for API clients
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from asyncio import Semaphore
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from tenacity import (
|
||||||
|
retry,
|
||||||
|
stop_after_attempt,
|
||||||
|
wait_exponential,
|
||||||
|
retry_if_exception_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Logging setup
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimiter:
|
||||||
|
"""Rate limiter using token bucket algorithm."""
|
||||||
|
|
||||||
|
def __init__(self, rate: float, per: float = 1.0):
|
||||||
|
"""
|
||||||
|
Initialize rate limiter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rate: Number of requests allowed
|
||||||
|
per: Time period in seconds (default: 1 second)
|
||||||
|
"""
|
||||||
|
self.rate = rate
|
||||||
|
self.per = per
|
||||||
|
self.tokens = rate
|
||||||
|
self.last_update = datetime.now()
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def acquire(self) -> None:
|
||||||
|
"""Acquire a token, waiting if necessary."""
|
||||||
|
async with self._lock:
|
||||||
|
now = datetime.now()
|
||||||
|
elapsed = (now - self.last_update).total_seconds()
|
||||||
|
self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
|
||||||
|
self.last_update = now
|
||||||
|
|
||||||
|
if self.tokens < 1:
|
||||||
|
wait_time = (1 - self.tokens) * (self.per / self.rate)
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
self.tokens = 0
|
||||||
|
else:
|
||||||
|
self.tokens -= 1
|
||||||
|
|
||||||
|
|
||||||
|
class BaseAsyncClient:
|
||||||
|
"""Base class for async API clients with rate limiting."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_concurrent: int = 5,
|
||||||
|
requests_per_second: float = 3.0,
|
||||||
|
logger: logging.Logger | None = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize base client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
max_concurrent: Maximum concurrent requests
|
||||||
|
requests_per_second: Rate limit
|
||||||
|
logger: Logger instance
|
||||||
|
"""
|
||||||
|
self.semaphore = Semaphore(max_concurrent)
|
||||||
|
self.rate_limiter = RateLimiter(requests_per_second)
|
||||||
|
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
||||||
|
self.stats = {
|
||||||
|
"requests": 0,
|
||||||
|
"success": 0,
|
||||||
|
"errors": 0,
|
||||||
|
"retries": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=1, min=2, max=10),
|
||||||
|
retry=retry_if_exception_type(Exception),
|
||||||
|
)
|
||||||
|
async def _rate_limited_request(
|
||||||
|
self,
|
||||||
|
coro: Callable[[], Any],
|
||||||
|
) -> Any:
|
||||||
|
"""Execute a request with rate limiting and retry."""
|
||||||
|
async with self.semaphore:
|
||||||
|
await self.rate_limiter.acquire()
|
||||||
|
self.stats["requests"] += 1
|
||||||
|
try:
|
||||||
|
result = await coro()
|
||||||
|
self.stats["success"] += 1
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
self.stats["errors"] += 1
|
||||||
|
self.logger.error(f"Request failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def batch_requests(
|
||||||
|
self,
|
||||||
|
requests: list[Callable[[], Any]],
|
||||||
|
desc: str = "Processing",
|
||||||
|
) -> list[Any]:
|
||||||
|
"""Execute multiple requests concurrently."""
|
||||||
|
try:
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
|
has_tqdm = True
|
||||||
|
except ImportError:
|
||||||
|
has_tqdm = False
|
||||||
|
|
||||||
|
async def execute(req: Callable) -> Any:
|
||||||
|
try:
|
||||||
|
return await self._rate_limited_request(req)
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
tasks = [execute(req) for req in requests]
|
||||||
|
|
||||||
|
if has_tqdm:
|
||||||
|
results = []
|
||||||
|
for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
|
||||||
|
result = await coro
|
||||||
|
results.append(result)
|
||||||
|
return results
|
||||||
|
else:
|
||||||
|
return await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
def print_stats(self) -> None:
|
||||||
|
"""Print request statistics."""
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
self.logger.info("Request Statistics:")
|
||||||
|
self.logger.info(f" Total Requests: {self.stats['requests']}")
|
||||||
|
self.logger.info(f" Successful: {self.stats['success']}")
|
||||||
|
self.logger.info(f" Errors: {self.stats['errors']}")
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigManager:
|
||||||
|
"""Manage API configuration and credentials."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def google_credentials_path(self) -> str | None:
|
||||||
|
"""Get Google service account credentials path."""
|
||||||
|
# Prefer SEO-specific credentials, fallback to general credentials
|
||||||
|
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
|
||||||
|
if os.path.exists(seo_creds):
|
||||||
|
return seo_creds
|
||||||
|
return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pagespeed_api_key(self) -> str | None:
|
||||||
|
"""Get PageSpeed Insights API key."""
|
||||||
|
return os.getenv("PAGESPEED_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_api_key(self) -> str | None:
|
||||||
|
"""Get Custom Search API key."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_engine_id(self) -> str | None:
|
||||||
|
"""Get Custom Search Engine ID."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def notion_token(self) -> str | None:
|
||||||
|
"""Get Notion API token."""
|
||||||
|
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
|
||||||
|
|
||||||
|
def validate_google_credentials(self) -> bool:
|
||||||
|
"""Validate Google credentials are configured."""
|
||||||
|
creds_path = self.google_credentials_path
|
||||||
|
if not creds_path:
|
||||||
|
return False
|
||||||
|
return os.path.exists(creds_path)
|
||||||
|
|
||||||
|
def get_required(self, key: str) -> str:
|
||||||
|
"""Get required environment variable or raise error."""
|
||||||
|
value = os.getenv(key)
|
||||||
|
if not value:
|
||||||
|
raise ValueError(f"Missing required environment variable: {key}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton config instance
|
||||||
|
config = ConfigManager()
|
||||||
@@ -0,0 +1,716 @@
|
|||||||
|
"""
|
||||||
|
Content Auditor - SEO Content Inventory & Performance Analysis
|
||||||
|
==============================================================
|
||||||
|
Purpose: Build content inventory, score performance, detect decay,
|
||||||
|
classify content types, and analyze Korean content patterns.
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from base_client import BaseAsyncClient, config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data classes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ContentPage:
|
||||||
|
"""Single content page with performance metrics."""
|
||||||
|
url: str
|
||||||
|
title: str = ""
|
||||||
|
content_type: str = "other"
|
||||||
|
word_count: int = 0
|
||||||
|
traffic: int = 0
|
||||||
|
keywords_count: int = 0
|
||||||
|
backlinks: int = 0
|
||||||
|
performance_score: float = 0.0
|
||||||
|
last_modified: str = ""
|
||||||
|
is_decaying: bool = False
|
||||||
|
decay_rate: float = 0.0
|
||||||
|
korean_pattern: str = ""
|
||||||
|
topics: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ContentInventory:
|
||||||
|
"""Aggregated content inventory summary."""
|
||||||
|
total_pages: int = 0
|
||||||
|
by_type: dict[str, int] = field(default_factory=dict)
|
||||||
|
avg_performance_score: float = 0.0
|
||||||
|
avg_word_count: float = 0.0
|
||||||
|
pages: list[ContentPage] = field(default_factory=list)
|
||||||
|
freshness_distribution: dict[str, int] = field(default_factory=dict)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ContentAuditResult:
|
||||||
|
"""Full content audit result."""
|
||||||
|
url: str
|
||||||
|
timestamp: str = ""
|
||||||
|
content_inventory: ContentInventory = field(default_factory=ContentInventory)
|
||||||
|
top_performers: list[ContentPage] = field(default_factory=list)
|
||||||
|
decaying_content: list[ContentPage] = field(default_factory=list)
|
||||||
|
korean_content_analysis: dict[str, Any] = field(default_factory=dict)
|
||||||
|
recommendations: list[str] = field(default_factory=list)
|
||||||
|
errors: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# URL pattern rules for content type classification
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
CONTENT_TYPE_PATTERNS = {
|
||||||
|
"blog": [
|
||||||
|
r"/blog/", r"/post/", r"/posts/", r"/article/", r"/articles/",
|
||||||
|
r"/news/", r"/magazine/", r"/stories/", r"/insights/",
|
||||||
|
r"/블로그/", r"/소식/", r"/뉴스/",
|
||||||
|
],
|
||||||
|
"product": [
|
||||||
|
r"/product/", r"/products/", r"/shop/", r"/store/",
|
||||||
|
r"/item/", r"/goods/", r"/catalog/",
|
||||||
|
r"/제품/", r"/상품/", r"/쇼핑/",
|
||||||
|
],
|
||||||
|
"service": [
|
||||||
|
r"/service/", r"/services/", r"/solutions/", r"/offering/",
|
||||||
|
r"/진료/", r"/서비스/", r"/시술/", r"/치료/",
|
||||||
|
],
|
||||||
|
"landing": [
|
||||||
|
r"/lp/", r"/landing/", r"/campaign/", r"/promo/",
|
||||||
|
r"/event/", r"/이벤트/", r"/프로모션/",
|
||||||
|
],
|
||||||
|
"resource": [
|
||||||
|
r"/resource/", r"/resources/", r"/guide/", r"/guides/",
|
||||||
|
r"/whitepaper/", r"/ebook/", r"/download/", r"/faq/",
|
||||||
|
r"/help/", r"/support/", r"/가이드/", r"/자료/",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
KOREAN_CONTENT_PATTERNS = {
|
||||||
|
"naver_blog_style": [
|
||||||
|
r"후기", r"리뷰", r"체험", r"솔직후기", r"방문후기",
|
||||||
|
r"사용후기", r"이용후기",
|
||||||
|
],
|
||||||
|
"listicle": [
|
||||||
|
r"추천", r"베스트", r"TOP\s*\d+", r"\d+선", r"\d+가지",
|
||||||
|
r"모음", r"정리", r"비교",
|
||||||
|
],
|
||||||
|
"how_to": [
|
||||||
|
r"방법", r"하는\s*법", r"하는\s*방법", r"가이드",
|
||||||
|
r"따라하기", r"시작하기", r"알아보기",
|
||||||
|
],
|
||||||
|
"informational": [
|
||||||
|
r"이란", r"뜻", r"의미", r"차이", r"비교",
|
||||||
|
r"장단점", r"효과", r"부작용", r"비용", r"가격",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ContentAuditor
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class ContentAuditor(BaseAsyncClient):
|
||||||
|
"""Content auditor using Ahrefs API and sitemap crawling."""
|
||||||
|
|
||||||
|
def __init__(self, max_concurrent: int = 5, requests_per_second: float = 2.0):
|
||||||
|
super().__init__(max_concurrent=max_concurrent, requests_per_second=requests_per_second)
|
||||||
|
self.session: aiohttp.ClientSession | None = None
|
||||||
|
|
||||||
|
async def _ensure_session(self) -> aiohttp.ClientSession:
|
||||||
|
if self.session is None or self.session.closed:
|
||||||
|
timeout = aiohttp.ClientTimeout(total=30)
|
||||||
|
self.session = aiohttp.ClientSession(timeout=timeout)
|
||||||
|
return self.session
|
||||||
|
|
||||||
|
async def close(self) -> None:
|
||||||
|
if self.session and not self.session.closed:
|
||||||
|
await self.session.close()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Ahrefs data retrieval
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def get_top_pages(self, url: str, limit: int = 100) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Retrieve top pages via Ahrefs site-explorer-top-pages.
|
||||||
|
|
||||||
|
Returns list of dicts with keys: url, traffic, keywords, value, top_keyword.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Fetching top pages from Ahrefs for {url}")
|
||||||
|
target = urlparse(url).netloc or url
|
||||||
|
try:
|
||||||
|
# Ahrefs MCP call: site-explorer-top-pages
|
||||||
|
# In MCP context this would be called by the agent.
|
||||||
|
# Standalone fallback: use REST API if AHREFS_API_KEY is set.
|
||||||
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||||
|
if not api_key:
|
||||||
|
self.logger.warning("AHREFS_API_KEY not set; returning empty top pages")
|
||||||
|
return []
|
||||||
|
|
||||||
|
resp = requests.get(
|
||||||
|
"https://api.ahrefs.com/v3/site-explorer/top-pages",
|
||||||
|
params={"target": target, "limit": limit, "select": "url,traffic,keywords,value,top_keyword"},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
pages = data.get("pages", data.get("items", []))
|
||||||
|
self.logger.info(f"Retrieved {len(pages)} top pages")
|
||||||
|
return pages
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Ahrefs top-pages lookup failed: {exc}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def get_pages_by_traffic(self, url: str, limit: int = 100) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Retrieve pages sorted by organic traffic via Ahrefs site-explorer-pages-by-traffic.
|
||||||
|
|
||||||
|
Returns list of dicts with keys: url, traffic, keywords, top_keyword.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Fetching pages-by-traffic from Ahrefs for {url}")
|
||||||
|
target = urlparse(url).netloc or url
|
||||||
|
try:
|
||||||
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||||
|
if not api_key:
|
||||||
|
self.logger.warning("AHREFS_API_KEY not set; returning empty traffic pages")
|
||||||
|
return []
|
||||||
|
|
||||||
|
resp = requests.get(
|
||||||
|
"https://api.ahrefs.com/v3/site-explorer/pages-by-traffic",
|
||||||
|
params={"target": target, "limit": limit, "select": "url,traffic,keywords,top_keyword"},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
pages = data.get("pages", data.get("items", []))
|
||||||
|
self.logger.info(f"Retrieved {len(pages)} pages by traffic")
|
||||||
|
return pages
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Ahrefs pages-by-traffic lookup failed: {exc}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Sitemap crawling
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def crawl_sitemap(self, url: str) -> list[str]:
|
||||||
|
"""Discover URLs from sitemap.xml."""
|
||||||
|
sitemap_urls_to_try = [
|
||||||
|
f"{url.rstrip('/')}/sitemap.xml",
|
||||||
|
f"{url.rstrip('/')}/sitemap_index.xml",
|
||||||
|
f"{url.rstrip('/')}/post-sitemap.xml",
|
||||||
|
]
|
||||||
|
discovered: list[str] = []
|
||||||
|
session = await self._ensure_session()
|
||||||
|
|
||||||
|
for sitemap_url in sitemap_urls_to_try:
|
||||||
|
try:
|
||||||
|
async with session.get(sitemap_url) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
continue
|
||||||
|
text = await resp.text()
|
||||||
|
soup = BeautifulSoup(text, "lxml-xml")
|
||||||
|
|
||||||
|
# Sitemap index
|
||||||
|
sitemaps = soup.find_all("sitemap")
|
||||||
|
if sitemaps:
|
||||||
|
for sm in sitemaps:
|
||||||
|
loc = sm.find("loc")
|
||||||
|
if loc:
|
||||||
|
child_urls = await self._parse_sitemap(session, loc.text.strip())
|
||||||
|
discovered.extend(child_urls)
|
||||||
|
else:
|
||||||
|
urls = soup.find_all("url")
|
||||||
|
for u in urls:
|
||||||
|
loc = u.find("loc")
|
||||||
|
if loc:
|
||||||
|
discovered.append(loc.text.strip())
|
||||||
|
|
||||||
|
if discovered:
|
||||||
|
self.logger.info(f"Discovered {len(discovered)} URLs from {sitemap_url}")
|
||||||
|
break
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.debug(f"Failed to fetch {sitemap_url}: {exc}")
|
||||||
|
|
||||||
|
return list(set(discovered))
|
||||||
|
|
||||||
|
async def _parse_sitemap(self, session: aiohttp.ClientSession, sitemap_url: str) -> list[str]:
|
||||||
|
"""Parse a single sitemap XML and return URLs."""
|
||||||
|
urls: list[str] = []
|
||||||
|
try:
|
||||||
|
async with session.get(sitemap_url) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
return urls
|
||||||
|
text = await resp.text()
|
||||||
|
soup = BeautifulSoup(text, "lxml-xml")
|
||||||
|
for u in soup.find_all("url"):
|
||||||
|
loc = u.find("loc")
|
||||||
|
if loc:
|
||||||
|
urls.append(loc.text.strip())
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.debug(f"Failed to parse sitemap {sitemap_url}: {exc}")
|
||||||
|
return urls
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Content type classification
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def classify_content_type(url: str, title: str = "") -> str:
|
||||||
|
"""
|
||||||
|
Classify content type based on URL path patterns and title.
|
||||||
|
|
||||||
|
Returns one of: blog, product, service, landing, resource, other.
|
||||||
|
"""
|
||||||
|
combined = f"{url.lower()} {title.lower()}"
|
||||||
|
scores: dict[str, int] = {}
|
||||||
|
|
||||||
|
for ctype, patterns in CONTENT_TYPE_PATTERNS.items():
|
||||||
|
score = 0
|
||||||
|
for pattern in patterns:
|
||||||
|
if re.search(pattern, combined, re.IGNORECASE):
|
||||||
|
score += 1
|
||||||
|
if score > 0:
|
||||||
|
scores[ctype] = score
|
||||||
|
|
||||||
|
if not scores:
|
||||||
|
return "other"
|
||||||
|
return max(scores, key=scores.get)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Performance scoring
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def score_performance(page: ContentPage) -> float:
|
||||||
|
"""
|
||||||
|
Compute composite performance score (0-100) from traffic, keywords, backlinks.
|
||||||
|
|
||||||
|
Weights:
|
||||||
|
- Traffic: 50% (log-scaled, 10k+ traffic = max)
|
||||||
|
- Keywords count: 30% (log-scaled, 500+ = max)
|
||||||
|
- Backlinks: 20% (log-scaled, 100+ = max)
|
||||||
|
"""
|
||||||
|
import math
|
||||||
|
|
||||||
|
traffic_score = min(100, (math.log10(max(page.traffic, 1)) / math.log10(10000)) * 100)
|
||||||
|
keywords_score = min(100, (math.log10(max(page.keywords_count, 1)) / math.log10(500)) * 100)
|
||||||
|
backlinks_score = min(100, (math.log10(max(page.backlinks, 1)) / math.log10(100)) * 100)
|
||||||
|
|
||||||
|
composite = (traffic_score * 0.50) + (keywords_score * 0.30) + (backlinks_score * 0.20)
|
||||||
|
return round(min(100, max(0, composite)), 1)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Content decay detection
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def detect_decay(pages: list[ContentPage], threshold: float = -20.0) -> list[ContentPage]:
|
||||||
|
"""
|
||||||
|
Flag pages with declining traffic trend.
|
||||||
|
|
||||||
|
Uses a simple heuristic: pages with low performance score relative to
|
||||||
|
their keyword count indicate potential decay. In production, historical
|
||||||
|
traffic data from Ahrefs metrics-history would be used.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
pages: List of content pages with metrics.
|
||||||
|
threshold: Decay rate threshold (percentage decline).
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of pages flagged as decaying.
|
||||||
|
"""
|
||||||
|
decaying: list[ContentPage] = []
|
||||||
|
for page in pages:
|
||||||
|
# Heuristic: high keyword count but low traffic suggests decay
|
||||||
|
if page.keywords_count > 10 and page.traffic < 50:
|
||||||
|
page.is_decaying = True
|
||||||
|
page.decay_rate = -50.0 if page.traffic == 0 else round(
|
||||||
|
-((page.keywords_count * 10 - page.traffic) / max(page.keywords_count * 10, 1)) * 100, 1
|
||||||
|
)
|
||||||
|
if page.decay_rate <= threshold:
|
||||||
|
decaying.append(page)
|
||||||
|
elif page.performance_score < 20 and page.keywords_count > 5:
|
||||||
|
page.is_decaying = True
|
||||||
|
page.decay_rate = round(-max(30, 100 - page.performance_score * 2), 1)
|
||||||
|
if page.decay_rate <= threshold:
|
||||||
|
decaying.append(page)
|
||||||
|
|
||||||
|
decaying.sort(key=lambda p: p.decay_rate)
|
||||||
|
return decaying
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Freshness assessment
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def analyze_freshness(pages: list[ContentPage]) -> dict[str, int]:
|
||||||
|
"""
|
||||||
|
Categorize pages by freshness based on last_modified dates.
|
||||||
|
|
||||||
|
Returns distribution: fresh (< 3 months), aging (3-12 months),
|
||||||
|
stale (> 12 months), unknown (no date).
|
||||||
|
"""
|
||||||
|
now = datetime.now()
|
||||||
|
distribution = {"fresh": 0, "aging": 0, "stale": 0, "unknown": 0}
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
|
if not page.last_modified:
|
||||||
|
distribution["unknown"] += 1
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
# Try common date formats
|
||||||
|
for fmt in ("%Y-%m-%dT%H:%M:%S", "%Y-%m-%d", "%Y-%m-%dT%H:%M:%S%z"):
|
||||||
|
try:
|
||||||
|
modified = datetime.strptime(
|
||||||
|
page.last_modified.replace("+00:00", "").replace("Z", ""), fmt.replace("%z", "")
|
||||||
|
)
|
||||||
|
break
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
distribution["unknown"] += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
age = now - modified
|
||||||
|
if age < timedelta(days=90):
|
||||||
|
distribution["fresh"] += 1
|
||||||
|
elif age < timedelta(days=365):
|
||||||
|
distribution["aging"] += 1
|
||||||
|
else:
|
||||||
|
distribution["stale"] += 1
|
||||||
|
except Exception:
|
||||||
|
distribution["unknown"] += 1
|
||||||
|
|
||||||
|
return distribution
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Korean content pattern identification
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def identify_korean_patterns(pages: list[ContentPage]) -> dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Detect Korean content patterns across pages.
|
||||||
|
|
||||||
|
Identifies Naver Blog style review content, listicles,
|
||||||
|
how-to guides, and informational content patterns.
|
||||||
|
|
||||||
|
Returns summary with counts and example URLs per pattern.
|
||||||
|
"""
|
||||||
|
results: dict[str, Any] = {
|
||||||
|
"total_korean_content": 0,
|
||||||
|
"patterns": {},
|
||||||
|
}
|
||||||
|
|
||||||
|
for pattern_name, keywords in KOREAN_CONTENT_PATTERNS.items():
|
||||||
|
matches: list[dict[str, str]] = []
|
||||||
|
for page in pages:
|
||||||
|
combined = f"{page.url} {page.title}"
|
||||||
|
for keyword in keywords:
|
||||||
|
if re.search(keyword, combined, re.IGNORECASE):
|
||||||
|
matches.append({"url": page.url, "title": page.title, "matched_keyword": keyword})
|
||||||
|
break
|
||||||
|
|
||||||
|
results["patterns"][pattern_name] = {
|
||||||
|
"count": len(matches),
|
||||||
|
"examples": matches[:5],
|
||||||
|
}
|
||||||
|
|
||||||
|
korean_urls = set()
|
||||||
|
for pattern_data in results["patterns"].values():
|
||||||
|
for example in pattern_data["examples"]:
|
||||||
|
korean_urls.add(example["url"])
|
||||||
|
results["total_korean_content"] = len(korean_urls)
|
||||||
|
|
||||||
|
return results
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Orchestration
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def audit(
|
||||||
|
self,
|
||||||
|
url: str,
|
||||||
|
detect_decay_flag: bool = False,
|
||||||
|
content_type_filter: str | None = None,
|
||||||
|
limit: int = 200,
|
||||||
|
) -> ContentAuditResult:
|
||||||
|
"""
|
||||||
|
Run full content audit: inventory, scoring, decay, Korean patterns.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
url: Target website URL.
|
||||||
|
detect_decay_flag: Whether to run decay detection.
|
||||||
|
content_type_filter: Filter by content type (blog, product, etc.).
|
||||||
|
limit: Maximum pages to analyze.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ContentAuditResult with inventory, top performers, decay, analysis.
|
||||||
|
"""
|
||||||
|
result = ContentAuditResult(
|
||||||
|
url=url,
|
||||||
|
timestamp=datetime.now().isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.logger.info(f"Starting content audit for {url}")
|
||||||
|
|
||||||
|
# 1. Gather pages from Ahrefs and sitemap
|
||||||
|
top_pages_data, traffic_pages_data, sitemap_urls = await asyncio.gather(
|
||||||
|
self.get_top_pages(url, limit=limit),
|
||||||
|
self.get_pages_by_traffic(url, limit=limit),
|
||||||
|
self.crawl_sitemap(url),
|
||||||
|
)
|
||||||
|
|
||||||
|
# 2. Merge and deduplicate pages
|
||||||
|
page_map: dict[str, ContentPage] = {}
|
||||||
|
|
||||||
|
for item in top_pages_data:
|
||||||
|
page_url = item.get("url", "")
|
||||||
|
if not page_url:
|
||||||
|
continue
|
||||||
|
page_map[page_url] = ContentPage(
|
||||||
|
url=page_url,
|
||||||
|
title=item.get("top_keyword", ""),
|
||||||
|
traffic=int(item.get("traffic", 0)),
|
||||||
|
keywords_count=int(item.get("keywords", 0)),
|
||||||
|
backlinks=int(item.get("value", 0)),
|
||||||
|
)
|
||||||
|
|
||||||
|
for item in traffic_pages_data:
|
||||||
|
page_url = item.get("url", "")
|
||||||
|
if not page_url:
|
||||||
|
continue
|
||||||
|
if page_url in page_map:
|
||||||
|
existing = page_map[page_url]
|
||||||
|
existing.traffic = max(existing.traffic, int(item.get("traffic", 0)))
|
||||||
|
existing.keywords_count = max(existing.keywords_count, int(item.get("keywords", 0)))
|
||||||
|
else:
|
||||||
|
page_map[page_url] = ContentPage(
|
||||||
|
url=page_url,
|
||||||
|
title=item.get("top_keyword", ""),
|
||||||
|
traffic=int(item.get("traffic", 0)),
|
||||||
|
keywords_count=int(item.get("keywords", 0)),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add sitemap URLs not already present
|
||||||
|
for s_url in sitemap_urls:
|
||||||
|
if s_url not in page_map:
|
||||||
|
page_map[s_url] = ContentPage(url=s_url)
|
||||||
|
|
||||||
|
# 3. Classify and score
|
||||||
|
all_pages: list[ContentPage] = []
|
||||||
|
for page in page_map.values():
|
||||||
|
page.content_type = self.classify_content_type(page.url, page.title)
|
||||||
|
page.performance_score = self.score_performance(page)
|
||||||
|
all_pages.append(page)
|
||||||
|
|
||||||
|
# 4. Filter by content type if requested
|
||||||
|
if content_type_filter:
|
||||||
|
all_pages = [p for p in all_pages if p.content_type == content_type_filter]
|
||||||
|
|
||||||
|
# 5. Build inventory
|
||||||
|
by_type: dict[str, int] = {}
|
||||||
|
for page in all_pages:
|
||||||
|
by_type[page.content_type] = by_type.get(page.content_type, 0) + 1
|
||||||
|
|
||||||
|
avg_score = (
|
||||||
|
sum(p.performance_score for p in all_pages) / len(all_pages)
|
||||||
|
if all_pages else 0.0
|
||||||
|
)
|
||||||
|
avg_word_count = (
|
||||||
|
sum(p.word_count for p in all_pages) / len(all_pages)
|
||||||
|
if all_pages else 0.0
|
||||||
|
)
|
||||||
|
|
||||||
|
freshness = self.analyze_freshness(all_pages)
|
||||||
|
|
||||||
|
result.content_inventory = ContentInventory(
|
||||||
|
total_pages=len(all_pages),
|
||||||
|
by_type=by_type,
|
||||||
|
avg_performance_score=round(avg_score, 1),
|
||||||
|
avg_word_count=round(avg_word_count, 1),
|
||||||
|
pages=sorted(all_pages, key=lambda p: p.performance_score, reverse=True)[:limit],
|
||||||
|
freshness_distribution=freshness,
|
||||||
|
)
|
||||||
|
|
||||||
|
# 6. Top performers
|
||||||
|
result.top_performers = sorted(all_pages, key=lambda p: p.performance_score, reverse=True)[:20]
|
||||||
|
|
||||||
|
# 7. Decay detection
|
||||||
|
if detect_decay_flag:
|
||||||
|
result.decaying_content = self.detect_decay(all_pages)
|
||||||
|
|
||||||
|
# 8. Korean content analysis
|
||||||
|
result.korean_content_analysis = self.identify_korean_patterns(all_pages)
|
||||||
|
|
||||||
|
# 9. Recommendations
|
||||||
|
result.recommendations = self._generate_recommendations(result)
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"Audit complete: {len(all_pages)} pages, "
|
||||||
|
f"{len(result.top_performers)} top performers, "
|
||||||
|
f"{len(result.decaying_content)} decaying"
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generate_recommendations(result: ContentAuditResult) -> list[str]:
|
||||||
|
"""Generate actionable recommendations from audit data."""
|
||||||
|
recs: list[str] = []
|
||||||
|
inv = result.content_inventory
|
||||||
|
|
||||||
|
# Low average score
|
||||||
|
if inv.avg_performance_score < 30:
|
||||||
|
recs.append(
|
||||||
|
"전체 콘텐츠 평균 성과 점수가 낮습니다 ({:.0f}/100). "
|
||||||
|
"상위 콘텐츠 패턴을 분석하여 저성과 페이지를 개선하세요.".format(inv.avg_performance_score)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Stale content
|
||||||
|
stale = inv.freshness_distribution.get("stale", 0)
|
||||||
|
total = inv.total_pages or 1
|
||||||
|
if stale / total > 0.3:
|
||||||
|
recs.append(
|
||||||
|
f"오래된 콘텐츠가 {stale}개 ({stale * 100 // total}%)입니다. "
|
||||||
|
"콘텐츠 업데이트 또는 통합을 고려하세요."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Decaying content
|
||||||
|
if len(result.decaying_content) > 5:
|
||||||
|
recs.append(
|
||||||
|
f"트래픽이 감소하는 콘텐츠가 {len(result.decaying_content)}개 감지되었습니다. "
|
||||||
|
"상위 감소 페이지부터 콘텐츠 리프레시를 진행하세요."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Content type balance
|
||||||
|
blog_count = inv.by_type.get("blog", 0)
|
||||||
|
if blog_count == 0:
|
||||||
|
recs.append(
|
||||||
|
"블로그 콘텐츠가 없습니다. SEO 트래픽 확보를 위해 "
|
||||||
|
"블로그 콘텐츠 전략을 수립하세요."
|
||||||
|
)
|
||||||
|
|
||||||
|
# Korean content opportunities
|
||||||
|
korean = result.korean_content_analysis
|
||||||
|
review_count = korean.get("patterns", {}).get("naver_blog_style", {}).get("count", 0)
|
||||||
|
if review_count == 0:
|
||||||
|
recs.append(
|
||||||
|
"후기/리뷰 콘텐츠가 없습니다. 한국 시장에서 후기 콘텐츠는 "
|
||||||
|
"전환율에 큰 영향을 미치므로 후기 콘텐츠 생성을 권장합니다."
|
||||||
|
)
|
||||||
|
|
||||||
|
if not recs:
|
||||||
|
recs.append("현재 콘텐츠 전략이 양호합니다. 지속적인 모니터링을 권장합니다.")
|
||||||
|
|
||||||
|
return recs
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="SEO Content Auditor - inventory, scoring, and decay detection",
|
||||||
|
)
|
||||||
|
parser.add_argument("--url", required=True, help="Target website URL")
|
||||||
|
parser.add_argument("--decay", action="store_true", help="Enable content decay detection")
|
||||||
|
parser.add_argument("--type", dest="content_type", help="Filter by content type (blog, product, service, landing, resource)")
|
||||||
|
parser.add_argument("--limit", type=int, default=200, help="Maximum pages to analyze (default: 200)")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||||
|
parser.add_argument("--output", help="Save output to file")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def format_text_report(result: ContentAuditResult) -> str:
|
||||||
|
"""Format audit result as human-readable text."""
|
||||||
|
lines: list[str] = []
|
||||||
|
lines.append(f"## Content Audit: {result.url}")
|
||||||
|
lines.append(f"**Date**: {result.timestamp[:10]}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
inv = result.content_inventory
|
||||||
|
lines.append(f"### Content Inventory")
|
||||||
|
lines.append(f"- Total pages: {inv.total_pages}")
|
||||||
|
lines.append(f"- Average performance score: {inv.avg_performance_score}/100")
|
||||||
|
lines.append(f"- Content types: {json.dumps(inv.by_type, ensure_ascii=False)}")
|
||||||
|
lines.append(f"- Freshness: {json.dumps(inv.freshness_distribution, ensure_ascii=False)}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("### Top Performers")
|
||||||
|
for i, page in enumerate(result.top_performers[:10], 1):
|
||||||
|
lines.append(f" {i}. [{page.performance_score:.0f}] {page.url} (traffic: {page.traffic})")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if result.decaying_content:
|
||||||
|
lines.append("### Decaying Content")
|
||||||
|
for i, page in enumerate(result.decaying_content[:10], 1):
|
||||||
|
lines.append(f" {i}. [{page.decay_rate:+.0f}%] {page.url} (traffic: {page.traffic})")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if result.korean_content_analysis.get("patterns"):
|
||||||
|
lines.append("### Korean Content Patterns")
|
||||||
|
for pattern_name, data in result.korean_content_analysis["patterns"].items():
|
||||||
|
lines.append(f" - {pattern_name}: {data['count']} pages")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("### Recommendations")
|
||||||
|
for i, rec in enumerate(result.recommendations, 1):
|
||||||
|
lines.append(f" {i}. {rec}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
auditor = ContentAuditor()
|
||||||
|
try:
|
||||||
|
result = await auditor.audit(
|
||||||
|
url=args.url,
|
||||||
|
detect_decay_flag=args.decay,
|
||||||
|
content_type_filter=args.content_type,
|
||||||
|
limit=args.limit,
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
output = json.dumps(asdict(result), ensure_ascii=False, indent=2, default=str)
|
||||||
|
else:
|
||||||
|
output = format_text_report(result)
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(output)
|
||||||
|
logger.info(f"Output saved to {args.output}")
|
||||||
|
else:
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await auditor.close()
|
||||||
|
auditor.print_stats()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -0,0 +1,738 @@
|
|||||||
|
"""
|
||||||
|
Content Brief Generator - SEO Content Brief Creation
|
||||||
|
=====================================================
|
||||||
|
Purpose: Generate detailed SEO content briefs with outlines,
|
||||||
|
keyword lists, word count targets, and internal linking suggestions.
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
|
from base_client import BaseAsyncClient, config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data classes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class OutlineSection:
|
||||||
|
"""A single heading section in the content outline."""
|
||||||
|
heading: str
|
||||||
|
level: int = 2 # H2 or H3
|
||||||
|
talking_points: list[str] = field(default_factory=list)
|
||||||
|
target_words: int = 200
|
||||||
|
keywords_to_include: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CompetitorPageAnalysis:
|
||||||
|
"""Analysis of a single competitor page for the target keyword."""
|
||||||
|
url: str
|
||||||
|
title: str = ""
|
||||||
|
word_count: int = 0
|
||||||
|
headings: list[dict[str, str]] = field(default_factory=list)
|
||||||
|
topics_covered: list[str] = field(default_factory=list)
|
||||||
|
content_type: str = ""
|
||||||
|
has_images: bool = False
|
||||||
|
has_video: bool = False
|
||||||
|
has_faq: bool = False
|
||||||
|
has_table: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ContentBrief:
|
||||||
|
"""Complete SEO content brief."""
|
||||||
|
primary_keyword: str
|
||||||
|
secondary_keywords: list[str] = field(default_factory=list)
|
||||||
|
lsi_keywords: list[str] = field(default_factory=list)
|
||||||
|
target_word_count: int = 1500
|
||||||
|
word_count_range: tuple[int, int] = (1200, 1800)
|
||||||
|
suggested_title: str = ""
|
||||||
|
meta_description: str = ""
|
||||||
|
outline: list[OutlineSection] = field(default_factory=list)
|
||||||
|
competitor_analysis: list[CompetitorPageAnalysis] = field(default_factory=list)
|
||||||
|
internal_links: list[dict[str, str]] = field(default_factory=list)
|
||||||
|
content_format: str = "blog"
|
||||||
|
korean_format_recommendations: list[str] = field(default_factory=list)
|
||||||
|
search_intent: str = "informational"
|
||||||
|
notes: list[str] = field(default_factory=list)
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Search intent patterns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
INTENT_PATTERNS = {
|
||||||
|
"transactional": [
|
||||||
|
r"buy", r"purchase", r"price", r"cost", r"order", r"shop",
|
||||||
|
r"구매", r"주문", r"가격", r"비용", r"할인", r"쿠폰",
|
||||||
|
],
|
||||||
|
"navigational": [
|
||||||
|
r"login", r"sign in", r"official", r"website",
|
||||||
|
r"로그인", r"공식", r"홈페이지",
|
||||||
|
],
|
||||||
|
"commercial": [
|
||||||
|
r"best", r"top", r"review", r"compare", r"vs",
|
||||||
|
r"추천", r"비교", r"후기", r"리뷰", r"순위",
|
||||||
|
],
|
||||||
|
"informational": [
|
||||||
|
r"what", r"how", r"why", r"guide", r"tutorial",
|
||||||
|
r"이란", r"방법", r"가이드", r"효과", r"원인",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Korean content format recommendations
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
KOREAN_FORMAT_TIPS = {
|
||||||
|
"transactional": [
|
||||||
|
"가격 비교표를 포함하세요 (경쟁사 가격 대비)",
|
||||||
|
"실제 비용 사례를 3개 이상 제시하세요",
|
||||||
|
"결제 방법 및 할인 정보를 명확히 안내하세요",
|
||||||
|
"CTA(행동 유도) 버튼을 여러 위치에 배치하세요",
|
||||||
|
],
|
||||||
|
"commercial": [
|
||||||
|
"네이버 블로그 스타일의 솔직한 후기 톤을 사용하세요",
|
||||||
|
"장단점을 균형 있게 비교하세요",
|
||||||
|
"실제 사용 사진 또는 전후 비교 이미지를 포함하세요",
|
||||||
|
"별점 또는 점수 평가 체계를 추가하세요",
|
||||||
|
"FAQ 섹션을 포함하세요 (네이버 검색 노출에 유리)",
|
||||||
|
],
|
||||||
|
"informational": [
|
||||||
|
"핵심 정보를 글 상단에 요약하세요 (두괄식 구성)",
|
||||||
|
"전문 용어는 쉬운 설명을 병기하세요",
|
||||||
|
"인포그래픽 또는 도표를 활용하세요",
|
||||||
|
"관련 콘텐츠 내부 링크를 3-5개 포함하세요",
|
||||||
|
"전문가 인용 또는 출처를 명시하세요 (E-E-A-T 강화)",
|
||||||
|
],
|
||||||
|
"navigational": [
|
||||||
|
"공식 정보와 연락처를 최상단에 배치하세요",
|
||||||
|
"지도 임베드를 포함하세요 (네이버 지도/구글 맵)",
|
||||||
|
"영업시간, 주소, 전화번호를 명확히 표시하세요",
|
||||||
|
],
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ContentBriefGenerator
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class ContentBriefGenerator(BaseAsyncClient):
|
||||||
|
"""Generate comprehensive SEO content briefs."""
|
||||||
|
|
||||||
|
def __init__(self, max_concurrent: int = 5, requests_per_second: float = 2.0):
|
||||||
|
super().__init__(max_concurrent=max_concurrent, requests_per_second=requests_per_second)
|
||||||
|
self.session: aiohttp.ClientSession | None = None
|
||||||
|
|
||||||
|
async def _ensure_session(self) -> aiohttp.ClientSession:
|
||||||
|
if self.session is None or self.session.closed:
|
||||||
|
timeout = aiohttp.ClientTimeout(total=30)
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (compatible; SEOContentBrief/1.0)",
|
||||||
|
}
|
||||||
|
self.session = aiohttp.ClientSession(timeout=timeout, headers=headers)
|
||||||
|
return self.session
|
||||||
|
|
||||||
|
async def close(self) -> None:
|
||||||
|
if self.session and not self.session.closed:
|
||||||
|
await self.session.close()
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Analyze top ranking results
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def analyze_top_results(
|
||||||
|
self,
|
||||||
|
keyword: str,
|
||||||
|
site_url: str | None = None,
|
||||||
|
num_competitors: int = 5,
|
||||||
|
) -> list[CompetitorPageAnalysis]:
|
||||||
|
"""
|
||||||
|
Analyze top ranking pages for a keyword using Ahrefs SERP data.
|
||||||
|
|
||||||
|
Falls back to fetching pages directly if Ahrefs data is unavailable.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Analyzing top results for: {keyword}")
|
||||||
|
results: list[CompetitorPageAnalysis] = []
|
||||||
|
|
||||||
|
# Try Ahrefs organic keywords to find ranking pages
|
||||||
|
try:
|
||||||
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||||
|
if api_key:
|
||||||
|
resp = requests.get(
|
||||||
|
"https://api.ahrefs.com/v3/serp-overview",
|
||||||
|
params={"keyword": keyword, "select": "url,title,position,traffic"},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
data = resp.json()
|
||||||
|
serp_items = data.get("positions", data.get("items", []))[:num_competitors]
|
||||||
|
for item in serp_items:
|
||||||
|
analysis = CompetitorPageAnalysis(
|
||||||
|
url=item.get("url", ""),
|
||||||
|
title=item.get("title", ""),
|
||||||
|
)
|
||||||
|
results.append(analysis)
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Ahrefs SERP lookup failed: {exc}")
|
||||||
|
|
||||||
|
# Fetch and analyze each page
|
||||||
|
session = await self._ensure_session()
|
||||||
|
for analysis in results[:num_competitors]:
|
||||||
|
if not analysis.url:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
async with session.get(analysis.url) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
continue
|
||||||
|
html = await resp.text()
|
||||||
|
self._analyze_page_content(analysis, html)
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.debug(f"Failed to fetch {analysis.url}: {exc}")
|
||||||
|
|
||||||
|
self.logger.info(f"Analyzed {len(results)} competitor pages")
|
||||||
|
return results
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _analyze_page_content(analysis: CompetitorPageAnalysis, html: str) -> None:
|
||||||
|
"""Parse HTML and extract content metrics."""
|
||||||
|
soup = BeautifulSoup(html, "html.parser")
|
||||||
|
|
||||||
|
# Title
|
||||||
|
title_tag = soup.find("title")
|
||||||
|
if title_tag and not analysis.title:
|
||||||
|
analysis.title = title_tag.get_text(strip=True)
|
||||||
|
|
||||||
|
# Word count (visible text only)
|
||||||
|
for tag in soup(["script", "style", "nav", "header", "footer"]):
|
||||||
|
tag.decompose()
|
||||||
|
visible_text = soup.get_text(separator=" ", strip=True)
|
||||||
|
analysis.word_count = len(visible_text.split())
|
||||||
|
|
||||||
|
# Headings
|
||||||
|
headings: list[dict[str, str]] = []
|
||||||
|
for level in range(1, 7):
|
||||||
|
for h in soup.find_all(f"h{level}"):
|
||||||
|
text = h.get_text(strip=True)
|
||||||
|
if text:
|
||||||
|
headings.append({"level": f"H{level}", "text": text})
|
||||||
|
analysis.headings = headings
|
||||||
|
|
||||||
|
# Content features
|
||||||
|
analysis.has_images = len(soup.find_all("img")) > 2
|
||||||
|
analysis.has_video = bool(soup.find("video") or soup.find("iframe", src=re.compile(r"youtube|vimeo")))
|
||||||
|
analysis.has_faq = bool(
|
||||||
|
soup.find(string=re.compile(r"FAQ|자주\s*묻는\s*질문|Q\s*&\s*A", re.IGNORECASE))
|
||||||
|
or soup.find("script", type="application/ld+json", string=re.compile(r"FAQPage"))
|
||||||
|
)
|
||||||
|
analysis.has_table = bool(soup.find("table"))
|
||||||
|
|
||||||
|
# Topics covered (from H2 headings)
|
||||||
|
analysis.topics_covered = [
|
||||||
|
h["text"] for h in headings if h["level"] == "H2"
|
||||||
|
][:15]
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Extract content outline
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def extract_outline(
|
||||||
|
self,
|
||||||
|
keyword: str,
|
||||||
|
top_results: list[CompetitorPageAnalysis],
|
||||||
|
) -> list[OutlineSection]:
|
||||||
|
"""
|
||||||
|
Build recommended H2/H3 outline by aggregating competitor headings.
|
||||||
|
|
||||||
|
Identifies common topics across top-ranking pages and structures
|
||||||
|
them into a logical outline.
|
||||||
|
"""
|
||||||
|
# Collect all H2 headings
|
||||||
|
h2_topics: dict[str, int] = {}
|
||||||
|
h3_by_h2: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
for result in top_results:
|
||||||
|
current_h2 = ""
|
||||||
|
for heading in result.headings:
|
||||||
|
text = heading["text"].strip()
|
||||||
|
if heading["level"] == "H2":
|
||||||
|
current_h2 = text
|
||||||
|
h2_topics[text] = h2_topics.get(text, 0) + 1
|
||||||
|
elif heading["level"] == "H3" and current_h2:
|
||||||
|
if current_h2 not in h3_by_h2:
|
||||||
|
h3_by_h2[current_h2] = []
|
||||||
|
h3_by_h2[current_h2].append(text)
|
||||||
|
|
||||||
|
# Sort H2s by frequency (most common topics first)
|
||||||
|
sorted_h2s = sorted(h2_topics.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
# Build outline
|
||||||
|
outline: list[OutlineSection] = []
|
||||||
|
target_word_count = self.calculate_word_count(top_results)
|
||||||
|
words_per_section = target_word_count // max(len(sorted_h2s), 5)
|
||||||
|
|
||||||
|
for h2_text, frequency in sorted_h2s[:8]:
|
||||||
|
section = OutlineSection(
|
||||||
|
heading=h2_text,
|
||||||
|
level=2,
|
||||||
|
target_words=words_per_section,
|
||||||
|
talking_points=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Add H3 subtopics
|
||||||
|
if h2_text in h3_by_h2:
|
||||||
|
unique_h3s = list(dict.fromkeys(h3_by_h2[h2_text]))[:5]
|
||||||
|
for h3_text in unique_h3s:
|
||||||
|
subsection = OutlineSection(
|
||||||
|
heading=h3_text,
|
||||||
|
level=3,
|
||||||
|
target_words=words_per_section // 3,
|
||||||
|
)
|
||||||
|
section.talking_points.append(h3_text)
|
||||||
|
|
||||||
|
outline.append(section)
|
||||||
|
|
||||||
|
# Ensure FAQ section if common
|
||||||
|
faq_count = sum(1 for r in top_results if r.has_faq)
|
||||||
|
if faq_count >= 2 and not any("FAQ" in s.heading or "질문" in s.heading for s in outline):
|
||||||
|
outline.append(OutlineSection(
|
||||||
|
heading="자주 묻는 질문 (FAQ)",
|
||||||
|
level=2,
|
||||||
|
target_words=300,
|
||||||
|
talking_points=[
|
||||||
|
f"{keyword} 관련 자주 묻는 질문 5-7개",
|
||||||
|
"Schema markup (FAQPage) 적용 권장",
|
||||||
|
],
|
||||||
|
))
|
||||||
|
|
||||||
|
return outline
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Keyword suggestions
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def suggest_keywords(self, primary_keyword: str) -> dict[str, list[str]]:
|
||||||
|
"""
|
||||||
|
Generate primary, secondary, and LSI keyword suggestions.
|
||||||
|
|
||||||
|
Uses Ahrefs related keywords and matching terms data.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Generating keyword suggestions for: {primary_keyword}")
|
||||||
|
result = {
|
||||||
|
"primary": [primary_keyword],
|
||||||
|
"secondary": [],
|
||||||
|
"lsi": [],
|
||||||
|
}
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||||
|
if not api_key:
|
||||||
|
self.logger.warning("AHREFS_API_KEY not set; returning basic keywords only")
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Matching terms
|
||||||
|
resp = requests.get(
|
||||||
|
"https://api.ahrefs.com/v3/keywords-explorer/matching-terms",
|
||||||
|
params={"keyword": primary_keyword, "limit": 20, "select": "keyword,volume,difficulty"},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if resp.status_code == 200:
|
||||||
|
data = resp.json()
|
||||||
|
terms = data.get("keywords", data.get("items", []))
|
||||||
|
for term in terms:
|
||||||
|
kw = term.get("keyword", "")
|
||||||
|
if kw and kw.lower() != primary_keyword.lower():
|
||||||
|
result["secondary"].append(kw)
|
||||||
|
|
||||||
|
# Related terms (LSI)
|
||||||
|
resp2 = requests.get(
|
||||||
|
"https://api.ahrefs.com/v3/keywords-explorer/related-terms",
|
||||||
|
params={"keyword": primary_keyword, "limit": 15, "select": "keyword,volume"},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if resp2.status_code == 200:
|
||||||
|
data2 = resp2.json()
|
||||||
|
related = data2.get("keywords", data2.get("items", []))
|
||||||
|
for term in related:
|
||||||
|
kw = term.get("keyword", "")
|
||||||
|
if kw and kw not in result["secondary"]:
|
||||||
|
result["lsi"].append(kw)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Keyword suggestion lookup failed: {exc}")
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Word count calculation
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def calculate_word_count(top_results: list[CompetitorPageAnalysis]) -> int:
|
||||||
|
"""
|
||||||
|
Calculate target word count based on top 5 ranking pages.
|
||||||
|
|
||||||
|
Returns the average word count of top 5 with +/- 20% range.
|
||||||
|
"""
|
||||||
|
word_counts = [r.word_count for r in top_results[:5] if r.word_count > 100]
|
||||||
|
|
||||||
|
if not word_counts:
|
||||||
|
return 1500 # Default fallback
|
||||||
|
|
||||||
|
avg = sum(word_counts) / len(word_counts)
|
||||||
|
# Round to nearest 100
|
||||||
|
target = round(avg / 100) * 100
|
||||||
|
return max(800, min(5000, target))
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal linking suggestions
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def suggest_internal_links(
|
||||||
|
self,
|
||||||
|
keyword: str,
|
||||||
|
site_url: str,
|
||||||
|
) -> list[dict[str, str]]:
|
||||||
|
"""
|
||||||
|
Find related existing pages on the site for internal linking.
|
||||||
|
|
||||||
|
Uses Ahrefs organic keywords to find pages ranking for related terms.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Finding internal link opportunities for {keyword} on {site_url}")
|
||||||
|
links: list[dict[str, str]] = []
|
||||||
|
target = urlparse(site_url).netloc or site_url
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||||
|
if not api_key:
|
||||||
|
return links
|
||||||
|
|
||||||
|
resp = requests.get(
|
||||||
|
"https://api.ahrefs.com/v3/site-explorer/organic-keywords",
|
||||||
|
params={
|
||||||
|
"target": target,
|
||||||
|
"limit": 50,
|
||||||
|
"select": "keyword,url,position,traffic",
|
||||||
|
},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
if resp.status_code != 200:
|
||||||
|
return links
|
||||||
|
|
||||||
|
data = resp.json()
|
||||||
|
keywords_data = data.get("keywords", data.get("items", []))
|
||||||
|
|
||||||
|
# Find pages ranking for related keywords
|
||||||
|
keyword_lower = keyword.lower()
|
||||||
|
keyword_words = set(keyword_lower.split())
|
||||||
|
|
||||||
|
seen_urls: set[str] = set()
|
||||||
|
for item in keywords_data:
|
||||||
|
kw = item.get("keyword", "").lower()
|
||||||
|
url = item.get("url", "")
|
||||||
|
|
||||||
|
if not url or url in seen_urls:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check keyword relevance
|
||||||
|
kw_words = set(kw.split())
|
||||||
|
overlap = keyword_words & kw_words
|
||||||
|
if overlap and kw != keyword_lower:
|
||||||
|
links.append({
|
||||||
|
"url": url,
|
||||||
|
"anchor_text": kw,
|
||||||
|
"relevance": f"{len(overlap)}/{len(keyword_words)} word overlap",
|
||||||
|
"current_traffic": str(item.get("traffic", 0)),
|
||||||
|
})
|
||||||
|
seen_urls.add(url)
|
||||||
|
|
||||||
|
links.sort(key=lambda l: int(l.get("current_traffic", "0")), reverse=True)
|
||||||
|
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Internal link suggestion failed: {exc}")
|
||||||
|
|
||||||
|
return links[:10]
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Search intent detection
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def detect_search_intent(keyword: str) -> str:
|
||||||
|
"""Classify keyword search intent."""
|
||||||
|
keyword_lower = keyword.lower()
|
||||||
|
scores: dict[str, int] = {}
|
||||||
|
|
||||||
|
for intent, patterns in INTENT_PATTERNS.items():
|
||||||
|
score = sum(1 for p in patterns if re.search(p, keyword_lower, re.IGNORECASE))
|
||||||
|
if score > 0:
|
||||||
|
scores[intent] = score
|
||||||
|
|
||||||
|
if not scores:
|
||||||
|
return "informational"
|
||||||
|
return max(scores, key=scores.get)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Orchestration
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def generate(
|
||||||
|
self,
|
||||||
|
keyword: str,
|
||||||
|
site_url: str,
|
||||||
|
num_competitors: int = 5,
|
||||||
|
) -> ContentBrief:
|
||||||
|
"""
|
||||||
|
Generate a comprehensive SEO content brief.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
keyword: Primary target keyword.
|
||||||
|
site_url: Target website URL.
|
||||||
|
num_competitors: Number of competitor pages to analyze.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ContentBrief with outline, keywords, and recommendations.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Generating content brief for: {keyword}")
|
||||||
|
|
||||||
|
# Detect search intent
|
||||||
|
intent = self.detect_search_intent(keyword)
|
||||||
|
|
||||||
|
# Run analyses in parallel
|
||||||
|
top_results_task = self.analyze_top_results(keyword, site_url, num_competitors)
|
||||||
|
keywords_task = self.suggest_keywords(keyword)
|
||||||
|
internal_links_task = self.suggest_internal_links(keyword, site_url)
|
||||||
|
|
||||||
|
top_results, keyword_data, internal_links = await asyncio.gather(
|
||||||
|
top_results_task, keywords_task, internal_links_task,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Calculate word count target
|
||||||
|
target_word_count = self.calculate_word_count(top_results)
|
||||||
|
word_count_min = int(target_word_count * 0.8)
|
||||||
|
word_count_max = int(target_word_count * 1.2)
|
||||||
|
|
||||||
|
# Build outline
|
||||||
|
outline = self.extract_outline(keyword, top_results)
|
||||||
|
|
||||||
|
# Generate title suggestion
|
||||||
|
suggested_title = self._generate_title(keyword, intent)
|
||||||
|
|
||||||
|
# Generate meta description
|
||||||
|
meta_description = self._generate_meta_description(keyword, intent)
|
||||||
|
|
||||||
|
# Korean format recommendations
|
||||||
|
korean_tips = KOREAN_FORMAT_TIPS.get(intent, KOREAN_FORMAT_TIPS["informational"])
|
||||||
|
|
||||||
|
brief = ContentBrief(
|
||||||
|
primary_keyword=keyword,
|
||||||
|
secondary_keywords=keyword_data.get("secondary", [])[:10],
|
||||||
|
lsi_keywords=keyword_data.get("lsi", [])[:10],
|
||||||
|
target_word_count=target_word_count,
|
||||||
|
word_count_range=(word_count_min, word_count_max),
|
||||||
|
suggested_title=suggested_title,
|
||||||
|
meta_description=meta_description,
|
||||||
|
outline=outline,
|
||||||
|
competitor_analysis=top_results,
|
||||||
|
internal_links=internal_links,
|
||||||
|
content_format=self._suggest_format(intent, top_results),
|
||||||
|
korean_format_recommendations=korean_tips,
|
||||||
|
search_intent=intent,
|
||||||
|
timestamp=datetime.now().isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"Brief generated: {len(outline)} sections, "
|
||||||
|
f"{target_word_count} target words, "
|
||||||
|
f"{len(keyword_data.get('secondary', []))} secondary keywords"
|
||||||
|
)
|
||||||
|
|
||||||
|
return brief
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generate_title(keyword: str, intent: str) -> str:
|
||||||
|
"""Generate a suggested title based on keyword and intent."""
|
||||||
|
templates = {
|
||||||
|
"informational": "{keyword} - 완벽 가이드 (2025년 최신)",
|
||||||
|
"commercial": "{keyword} 추천 TOP 10 비교 (전문가 리뷰)",
|
||||||
|
"transactional": "{keyword} 가격 비교 및 구매 가이드",
|
||||||
|
"navigational": "{keyword} - 공식 안내",
|
||||||
|
}
|
||||||
|
template = templates.get(intent, templates["informational"])
|
||||||
|
return template.format(keyword=keyword)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generate_meta_description(keyword: str, intent: str) -> str:
|
||||||
|
"""Generate a suggested meta description."""
|
||||||
|
templates = {
|
||||||
|
"informational": (
|
||||||
|
f"{keyword}에 대해 알아야 할 모든 것을 정리했습니다. "
|
||||||
|
"전문가가 알려주는 핵심 정보와 실용적인 가이드를 확인하세요."
|
||||||
|
),
|
||||||
|
"commercial": (
|
||||||
|
f"{keyword} 비교 분석! 장단점, 가격, 실제 후기를 "
|
||||||
|
"한눈에 비교하고 최적의 선택을 하세요."
|
||||||
|
),
|
||||||
|
"transactional": (
|
||||||
|
f"{keyword} 최저가 비교 및 구매 방법을 안내합니다. "
|
||||||
|
"합리적인 가격으로 구매하는 팁을 확인하세요."
|
||||||
|
),
|
||||||
|
"navigational": (
|
||||||
|
f"{keyword} 공식 정보 및 이용 안내. "
|
||||||
|
"정확한 정보를 빠르게 확인하세요."
|
||||||
|
),
|
||||||
|
}
|
||||||
|
return templates.get(intent, templates["informational"])
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _suggest_format(intent: str, results: list[CompetitorPageAnalysis]) -> str:
|
||||||
|
"""Suggest content format based on intent and competitor analysis."""
|
||||||
|
if intent == "commercial":
|
||||||
|
return "listicle"
|
||||||
|
if intent == "informational":
|
||||||
|
return "guide"
|
||||||
|
if intent == "transactional":
|
||||||
|
return "landing"
|
||||||
|
|
||||||
|
# Check competitor patterns
|
||||||
|
avg_word_count = (
|
||||||
|
sum(r.word_count for r in results) / len(results) if results else 0
|
||||||
|
)
|
||||||
|
if avg_word_count > 3000:
|
||||||
|
return "comprehensive_guide"
|
||||||
|
return "blog"
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="SEO Content Brief Generator",
|
||||||
|
)
|
||||||
|
parser.add_argument("--keyword", required=True, help="Primary target keyword")
|
||||||
|
parser.add_argument("--url", required=True, help="Target website URL")
|
||||||
|
parser.add_argument("--competitors", type=int, default=5, help="Number of competitor pages to analyze (default: 5)")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||||
|
parser.add_argument("--output", help="Save output to file")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def format_text_report(brief: ContentBrief) -> str:
|
||||||
|
"""Format content brief as human-readable text."""
|
||||||
|
lines: list[str] = []
|
||||||
|
lines.append(f"## Content Brief: {brief.primary_keyword}")
|
||||||
|
lines.append(f"**Date**: {brief.timestamp[:10]}")
|
||||||
|
lines.append(f"**Search Intent**: {brief.search_intent}")
|
||||||
|
lines.append(f"**Content Format**: {brief.content_format}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("### Target Metrics")
|
||||||
|
lines.append(f"- Word count: {brief.target_word_count} ({brief.word_count_range[0]}-{brief.word_count_range[1]})")
|
||||||
|
lines.append(f"- Suggested title: {brief.suggested_title}")
|
||||||
|
lines.append(f"- Meta description: {brief.meta_description}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("### Keywords")
|
||||||
|
lines.append(f"- **Primary**: {brief.primary_keyword}")
|
||||||
|
if brief.secondary_keywords:
|
||||||
|
lines.append(f"- **Secondary**: {', '.join(brief.secondary_keywords[:8])}")
|
||||||
|
if brief.lsi_keywords:
|
||||||
|
lines.append(f"- **LSI**: {', '.join(brief.lsi_keywords[:8])}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("### Content Outline")
|
||||||
|
for section in brief.outline:
|
||||||
|
prefix = "##" if section.level == 2 else "###"
|
||||||
|
lines.append(f" {prefix} {section.heading} (~{section.target_words}w)")
|
||||||
|
for point in section.talking_points:
|
||||||
|
lines.append(f" - {point}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if brief.competitor_analysis:
|
||||||
|
lines.append(f"### Competitor Analysis ({len(brief.competitor_analysis)} pages)")
|
||||||
|
for comp in brief.competitor_analysis:
|
||||||
|
lines.append(f" - **{comp.title or comp.url}**")
|
||||||
|
lines.append(f" Word count: {comp.word_count} | Headings: {len(comp.headings)}")
|
||||||
|
features = []
|
||||||
|
if comp.has_images:
|
||||||
|
features.append("images")
|
||||||
|
if comp.has_video:
|
||||||
|
features.append("video")
|
||||||
|
if comp.has_faq:
|
||||||
|
features.append("FAQ")
|
||||||
|
if comp.has_table:
|
||||||
|
features.append("table")
|
||||||
|
if features:
|
||||||
|
lines.append(f" Features: {', '.join(features)}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if brief.internal_links:
|
||||||
|
lines.append(f"### Internal Linking Suggestions ({len(brief.internal_links)})")
|
||||||
|
for link in brief.internal_links[:7]:
|
||||||
|
lines.append(f" - [{link['anchor_text']}]({link['url']})")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if brief.korean_format_recommendations:
|
||||||
|
lines.append("### Korean Content Format Recommendations")
|
||||||
|
for tip in brief.korean_format_recommendations:
|
||||||
|
lines.append(f" - {tip}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
generator = ContentBriefGenerator()
|
||||||
|
try:
|
||||||
|
brief = await generator.generate(
|
||||||
|
keyword=args.keyword,
|
||||||
|
site_url=args.url,
|
||||||
|
num_competitors=args.competitors,
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
output = json.dumps(asdict(brief), ensure_ascii=False, indent=2, default=str)
|
||||||
|
else:
|
||||||
|
output = format_text_report(brief)
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(output)
|
||||||
|
logger.info(f"Output saved to {args.output}")
|
||||||
|
else:
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
await generator.close()
|
||||||
|
generator.print_stats()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -0,0 +1,694 @@
|
|||||||
|
"""
|
||||||
|
Content Gap Analyzer - Topic Gap Detection & Cluster Mapping
|
||||||
|
=============================================================
|
||||||
|
Purpose: Identify content gaps vs competitors, build topic clusters,
|
||||||
|
and generate prioritized editorial calendars.
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import math
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from collections import defaultdict
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from sklearn.feature_extraction.text import TfidfVectorizer
|
||||||
|
from sklearn.cluster import AgglomerativeClustering
|
||||||
|
|
||||||
|
from base_client import BaseAsyncClient, config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data classes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TopicGap:
|
||||||
|
"""A topic present in competitors but missing from target."""
|
||||||
|
topic: str
|
||||||
|
competitor_urls: list[str] = field(default_factory=list)
|
||||||
|
competitor_keywords: list[str] = field(default_factory=list)
|
||||||
|
estimated_traffic: int = 0
|
||||||
|
priority_score: float = 0.0
|
||||||
|
difficulty: str = "medium"
|
||||||
|
content_type_suggestion: str = "blog"
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class TopicCluster:
|
||||||
|
"""Topic cluster with pillar and supporting cluster pages."""
|
||||||
|
pillar_topic: str
|
||||||
|
pillar_keyword: str = ""
|
||||||
|
cluster_topics: list[str] = field(default_factory=list)
|
||||||
|
cluster_keywords: list[str] = field(default_factory=list)
|
||||||
|
total_volume: int = 0
|
||||||
|
coverage_score: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class CalendarEntry:
|
||||||
|
"""Prioritized editorial calendar entry."""
|
||||||
|
topic: str
|
||||||
|
priority: str = "medium"
|
||||||
|
target_date: str = ""
|
||||||
|
content_type: str = "blog"
|
||||||
|
target_word_count: int = 1500
|
||||||
|
primary_keyword: str = ""
|
||||||
|
estimated_traffic: int = 0
|
||||||
|
cluster_name: str = ""
|
||||||
|
notes: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ContentGapResult:
|
||||||
|
"""Full content gap analysis result."""
|
||||||
|
target_url: str
|
||||||
|
competitor_urls: list[str] = field(default_factory=list)
|
||||||
|
timestamp: str = ""
|
||||||
|
target_topics_count: int = 0
|
||||||
|
competitor_topics_count: int = 0
|
||||||
|
gaps: list[TopicGap] = field(default_factory=list)
|
||||||
|
clusters: list[TopicCluster] = field(default_factory=list)
|
||||||
|
calendar: list[CalendarEntry] = field(default_factory=list)
|
||||||
|
content_volume_comparison: dict[str, int] = field(default_factory=dict)
|
||||||
|
korean_opportunities: list[dict[str, Any]] = field(default_factory=dict)
|
||||||
|
recommendations: list[str] = field(default_factory=list)
|
||||||
|
errors: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Korean opportunity patterns
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
KOREAN_OPPORTUNITY_PATTERNS = [
|
||||||
|
{"pattern": r"후기|리뷰", "label": "review_content", "description": "후기/리뷰 콘텐츠"},
|
||||||
|
{"pattern": r"비용|가격|견적", "label": "pricing_content", "description": "비용/가격 정보 콘텐츠"},
|
||||||
|
{"pattern": r"비교|차이", "label": "comparison_content", "description": "비교 콘텐츠"},
|
||||||
|
{"pattern": r"추천|베스트|TOP", "label": "recommendation_content", "description": "추천/리스트 콘텐츠"},
|
||||||
|
{"pattern": r"방법|하는\s*법|가이드", "label": "how_to_content", "description": "가이드/방법 콘텐츠"},
|
||||||
|
{"pattern": r"부작용|주의|위험", "label": "safety_content", "description": "안전/부작용 정보"},
|
||||||
|
{"pattern": r"효과|결과|전후", "label": "results_content", "description": "효과/결과 콘텐츠"},
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# ContentGapAnalyzer
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class ContentGapAnalyzer(BaseAsyncClient):
|
||||||
|
"""Analyze content gaps between target and competitor sites."""
|
||||||
|
|
||||||
|
def __init__(self, max_concurrent: int = 5, requests_per_second: float = 2.0):
|
||||||
|
super().__init__(max_concurrent=max_concurrent, requests_per_second=requests_per_second)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Ahrefs data retrieval
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def get_competitor_topics(self, competitor_url: str, limit: int = 100) -> list[dict]:
|
||||||
|
"""
|
||||||
|
Get top pages and keywords for a competitor via Ahrefs.
|
||||||
|
|
||||||
|
Returns list of dicts: url, traffic, keywords, top_keyword, title.
|
||||||
|
"""
|
||||||
|
self.logger.info(f"Fetching competitor topics for {competitor_url}")
|
||||||
|
target = urlparse(competitor_url).netloc or competitor_url
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||||
|
if not api_key:
|
||||||
|
self.logger.warning("AHREFS_API_KEY not set; returning empty competitor topics")
|
||||||
|
return []
|
||||||
|
|
||||||
|
resp = requests.get(
|
||||||
|
"https://api.ahrefs.com/v3/site-explorer/top-pages",
|
||||||
|
params={
|
||||||
|
"target": target,
|
||||||
|
"limit": limit,
|
||||||
|
"select": "url,traffic,keywords,value,top_keyword",
|
||||||
|
},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
pages = data.get("pages", data.get("items", []))
|
||||||
|
self.logger.info(f"Retrieved {len(pages)} competitor topics from {competitor_url}")
|
||||||
|
return pages
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Failed to get competitor topics for {competitor_url}: {exc}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def get_target_keywords(self, target_url: str, limit: int = 200) -> set[str]:
|
||||||
|
"""Get the set of keywords the target site already ranks for."""
|
||||||
|
self.logger.info(f"Fetching target keywords for {target_url}")
|
||||||
|
target = urlparse(target_url).netloc or target_url
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||||
|
if not api_key:
|
||||||
|
return set()
|
||||||
|
|
||||||
|
resp = requests.get(
|
||||||
|
"https://api.ahrefs.com/v3/site-explorer/organic-keywords",
|
||||||
|
params={"target": target, "limit": limit, "select": "keyword,position,traffic"},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
keywords = data.get("keywords", data.get("items", []))
|
||||||
|
return {kw.get("keyword", "").lower() for kw in keywords if kw.get("keyword")}
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Failed to get target keywords: {exc}")
|
||||||
|
return set()
|
||||||
|
|
||||||
|
async def get_organic_competitors(self, target_url: str, limit: int = 10) -> list[str]:
|
||||||
|
"""Discover organic competitors via Ahrefs."""
|
||||||
|
self.logger.info(f"Discovering organic competitors for {target_url}")
|
||||||
|
target = urlparse(target_url).netloc or target_url
|
||||||
|
|
||||||
|
try:
|
||||||
|
api_key = config.get_required("AHREFS_API_KEY") if hasattr(config, "get_required") else None
|
||||||
|
if not api_key:
|
||||||
|
return []
|
||||||
|
|
||||||
|
resp = requests.get(
|
||||||
|
"https://api.ahrefs.com/v3/site-explorer/organic-competitors",
|
||||||
|
params={"target": target, "limit": limit},
|
||||||
|
headers={"Authorization": f"Bearer {api_key}"},
|
||||||
|
timeout=30,
|
||||||
|
)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
competitors = data.get("competitors", data.get("items", []))
|
||||||
|
return [c.get("domain", "") for c in competitors if c.get("domain")]
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Failed to discover competitors: {exc}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Gap analysis
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def find_topic_gaps(
|
||||||
|
self,
|
||||||
|
target_url: str,
|
||||||
|
competitor_urls: list[str],
|
||||||
|
) -> tuple[list[TopicGap], set[str], dict[str, int]]:
|
||||||
|
"""
|
||||||
|
Identify topics covered by competitors but missing from target.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
- List of TopicGap objects.
|
||||||
|
- Set of target keywords (for reference).
|
||||||
|
- Content volume comparison dict.
|
||||||
|
"""
|
||||||
|
# Gather target keywords
|
||||||
|
target_keywords = await self.get_target_keywords(target_url)
|
||||||
|
|
||||||
|
# Gather competitor data in parallel
|
||||||
|
competitor_tasks = [self.get_competitor_topics(c_url) for c_url in competitor_urls]
|
||||||
|
competitor_results = await asyncio.gather(*competitor_tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
# Build competitor topic map
|
||||||
|
competitor_topic_map: dict[str, TopicGap] = {}
|
||||||
|
content_volume: dict[str, int] = {target_url: len(target_keywords)}
|
||||||
|
|
||||||
|
for c_url, c_result in zip(competitor_urls, competitor_results):
|
||||||
|
if isinstance(c_result, Exception):
|
||||||
|
self.logger.warning(f"Error fetching {c_url}: {c_result}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
pages = c_result if isinstance(c_result, list) else []
|
||||||
|
content_volume[c_url] = len(pages)
|
||||||
|
|
||||||
|
for page in pages:
|
||||||
|
top_keyword = page.get("top_keyword", "").strip().lower()
|
||||||
|
if not top_keyword:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Skip if target already covers this keyword
|
||||||
|
if top_keyword in target_keywords:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Check for fuzzy matches (keyword contained in target set)
|
||||||
|
is_covered = any(
|
||||||
|
top_keyword in tk or tk in top_keyword
|
||||||
|
for tk in target_keywords
|
||||||
|
if len(tk) > 3
|
||||||
|
)
|
||||||
|
if is_covered:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if top_keyword not in competitor_topic_map:
|
||||||
|
competitor_topic_map[top_keyword] = TopicGap(
|
||||||
|
topic=top_keyword,
|
||||||
|
estimated_traffic=int(page.get("traffic", 0)),
|
||||||
|
)
|
||||||
|
|
||||||
|
gap = competitor_topic_map[top_keyword]
|
||||||
|
gap.competitor_urls.append(page.get("url", c_url))
|
||||||
|
gap.competitor_keywords.append(top_keyword)
|
||||||
|
gap.estimated_traffic = max(gap.estimated_traffic, int(page.get("traffic", 0)))
|
||||||
|
|
||||||
|
# Score gaps
|
||||||
|
gaps = list(competitor_topic_map.values())
|
||||||
|
for gap in gaps:
|
||||||
|
competitor_count = len(set(gap.competitor_urls))
|
||||||
|
traffic_score = min(100, math.log10(max(gap.estimated_traffic, 1)) / math.log10(10000) * 100)
|
||||||
|
competition_score = (competitor_count / max(len(competitor_urls), 1)) * 100
|
||||||
|
gap.priority_score = round((traffic_score * 0.6) + (competition_score * 0.4), 1)
|
||||||
|
|
||||||
|
# Difficulty estimation
|
||||||
|
if competitor_count >= 3:
|
||||||
|
gap.difficulty = "high"
|
||||||
|
elif competitor_count >= 2:
|
||||||
|
gap.difficulty = "medium"
|
||||||
|
else:
|
||||||
|
gap.difficulty = "low"
|
||||||
|
|
||||||
|
# Content type suggestion
|
||||||
|
gap.content_type_suggestion = self._suggest_content_type(gap.topic)
|
||||||
|
|
||||||
|
gaps.sort(key=lambda g: g.priority_score, reverse=True)
|
||||||
|
return gaps, target_keywords, content_volume
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _suggest_content_type(topic: str) -> str:
|
||||||
|
"""Suggest content type based on topic keywords."""
|
||||||
|
topic_lower = topic.lower()
|
||||||
|
if any(w in topic_lower for w in ["how to", "guide", "tutorial", "방법", "가이드"]):
|
||||||
|
return "guide"
|
||||||
|
if any(w in topic_lower for w in ["best", "top", "review", "추천", "후기", "비교"]):
|
||||||
|
return "listicle"
|
||||||
|
if any(w in topic_lower for w in ["what is", "이란", "뜻", "의미"]):
|
||||||
|
return "informational"
|
||||||
|
if any(w in topic_lower for w in ["cost", "price", "비용", "가격"]):
|
||||||
|
return "landing"
|
||||||
|
return "blog"
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Topic cluster mapping
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_topic_clusters(
|
||||||
|
self,
|
||||||
|
topics: list[str],
|
||||||
|
n_clusters: int | None = None,
|
||||||
|
min_cluster_size: int = 3,
|
||||||
|
) -> list[TopicCluster]:
|
||||||
|
"""
|
||||||
|
Group topics into pillar/cluster structure using TF-IDF + hierarchical clustering.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
topics: List of topic strings.
|
||||||
|
n_clusters: Number of clusters (auto-detected if None).
|
||||||
|
min_cluster_size: Minimum topics per cluster.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of TopicCluster objects.
|
||||||
|
"""
|
||||||
|
if len(topics) < min_cluster_size:
|
||||||
|
self.logger.warning("Too few topics for clustering")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Vectorize topics
|
||||||
|
vectorizer = TfidfVectorizer(
|
||||||
|
max_features=500,
|
||||||
|
stop_words="english",
|
||||||
|
ngram_range=(1, 2),
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
tfidf_matrix = vectorizer.fit_transform(topics)
|
||||||
|
except ValueError as exc:
|
||||||
|
self.logger.warning(f"TF-IDF vectorization failed: {exc}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Auto-detect cluster count
|
||||||
|
if n_clusters is None:
|
||||||
|
n_clusters = max(2, min(len(topics) // 5, 15))
|
||||||
|
|
||||||
|
n_clusters = min(n_clusters, len(topics) - 1)
|
||||||
|
|
||||||
|
# Hierarchical clustering
|
||||||
|
clustering = AgglomerativeClustering(
|
||||||
|
n_clusters=n_clusters,
|
||||||
|
metric="cosine",
|
||||||
|
linkage="average",
|
||||||
|
)
|
||||||
|
labels = clustering.fit_predict(tfidf_matrix.toarray())
|
||||||
|
|
||||||
|
# Build cluster objects
|
||||||
|
cluster_map: dict[int, list[str]] = defaultdict(list)
|
||||||
|
for topic, label in zip(topics, labels):
|
||||||
|
cluster_map[label].append(topic)
|
||||||
|
|
||||||
|
clusters: list[TopicCluster] = []
|
||||||
|
for label, cluster_topics in sorted(cluster_map.items()):
|
||||||
|
if len(cluster_topics) < min_cluster_size:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Pick the longest topic as pillar (usually broader)
|
||||||
|
pillar = max(cluster_topics, key=len)
|
||||||
|
subtopics = [t for t in cluster_topics if t != pillar]
|
||||||
|
|
||||||
|
cluster = TopicCluster(
|
||||||
|
pillar_topic=pillar,
|
||||||
|
pillar_keyword=pillar,
|
||||||
|
cluster_topics=subtopics[:20],
|
||||||
|
cluster_keywords=[t for t in subtopics[:20]],
|
||||||
|
total_volume=0,
|
||||||
|
coverage_score=0.0,
|
||||||
|
)
|
||||||
|
clusters.append(cluster)
|
||||||
|
|
||||||
|
clusters.sort(key=lambda c: len(c.cluster_topics), reverse=True)
|
||||||
|
return clusters
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Editorial calendar generation
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def generate_calendar(
|
||||||
|
self,
|
||||||
|
gaps: list[TopicGap],
|
||||||
|
clusters: list[TopicCluster],
|
||||||
|
weeks_ahead: int = 12,
|
||||||
|
entries_per_week: int = 2,
|
||||||
|
) -> list[CalendarEntry]:
|
||||||
|
"""
|
||||||
|
Generate prioritized editorial calendar from gaps and clusters.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
gaps: List of topic gaps (sorted by priority).
|
||||||
|
clusters: List of topic clusters.
|
||||||
|
weeks_ahead: Number of weeks to plan.
|
||||||
|
entries_per_week: Content pieces per week.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
List of CalendarEntry objects.
|
||||||
|
"""
|
||||||
|
calendar: list[CalendarEntry] = []
|
||||||
|
today = datetime.now()
|
||||||
|
|
||||||
|
# Build cluster lookup
|
||||||
|
topic_to_cluster: dict[str, str] = {}
|
||||||
|
for cluster in clusters:
|
||||||
|
for topic in cluster.cluster_topics:
|
||||||
|
topic_to_cluster[topic] = cluster.pillar_topic
|
||||||
|
topic_to_cluster[cluster.pillar_topic] = cluster.pillar_topic
|
||||||
|
|
||||||
|
# Prioritize: pillar topics first, then by priority score
|
||||||
|
pillar_topics = {c.pillar_topic for c in clusters}
|
||||||
|
pillar_gaps = [g for g in gaps if g.topic in pillar_topics]
|
||||||
|
other_gaps = [g for g in gaps if g.topic not in pillar_topics]
|
||||||
|
ordered_gaps = pillar_gaps + other_gaps
|
||||||
|
|
||||||
|
max_entries = weeks_ahead * entries_per_week
|
||||||
|
week_offset = 0
|
||||||
|
slot_in_week = 0
|
||||||
|
|
||||||
|
for gap in ordered_gaps[:max_entries]:
|
||||||
|
target_date = today + timedelta(weeks=week_offset, days=slot_in_week * 3)
|
||||||
|
|
||||||
|
# Determine priority label
|
||||||
|
if gap.priority_score >= 70:
|
||||||
|
priority = "high"
|
||||||
|
elif gap.priority_score >= 40:
|
||||||
|
priority = "medium"
|
||||||
|
else:
|
||||||
|
priority = "low"
|
||||||
|
|
||||||
|
# Word count based on content type
|
||||||
|
word_count_map = {
|
||||||
|
"guide": 2500,
|
||||||
|
"listicle": 2000,
|
||||||
|
"informational": 1800,
|
||||||
|
"landing": 1200,
|
||||||
|
"blog": 1500,
|
||||||
|
}
|
||||||
|
|
||||||
|
entry = CalendarEntry(
|
||||||
|
topic=gap.topic,
|
||||||
|
priority=priority,
|
||||||
|
target_date=target_date.strftime("%Y-%m-%d"),
|
||||||
|
content_type=gap.content_type_suggestion,
|
||||||
|
target_word_count=word_count_map.get(gap.content_type_suggestion, 1500),
|
||||||
|
primary_keyword=gap.topic,
|
||||||
|
estimated_traffic=gap.estimated_traffic,
|
||||||
|
cluster_name=topic_to_cluster.get(gap.topic, "uncategorized"),
|
||||||
|
)
|
||||||
|
calendar.append(entry)
|
||||||
|
|
||||||
|
slot_in_week += 1
|
||||||
|
if slot_in_week >= entries_per_week:
|
||||||
|
slot_in_week = 0
|
||||||
|
week_offset += 1
|
||||||
|
|
||||||
|
return calendar
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Korean opportunity detection
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def detect_korean_opportunities(gaps: list[TopicGap]) -> list[dict[str, Any]]:
|
||||||
|
"""Detect Korean-market content opportunities in gaps."""
|
||||||
|
opportunities: list[dict[str, Any]] = []
|
||||||
|
|
||||||
|
for gap in gaps:
|
||||||
|
for pattern_info in KOREAN_OPPORTUNITY_PATTERNS:
|
||||||
|
if re.search(pattern_info["pattern"], gap.topic, re.IGNORECASE):
|
||||||
|
opportunities.append({
|
||||||
|
"topic": gap.topic,
|
||||||
|
"pattern": pattern_info["label"],
|
||||||
|
"description": pattern_info["description"],
|
||||||
|
"estimated_traffic": gap.estimated_traffic,
|
||||||
|
"priority_score": gap.priority_score,
|
||||||
|
})
|
||||||
|
break
|
||||||
|
|
||||||
|
opportunities.sort(key=lambda o: o["priority_score"], reverse=True)
|
||||||
|
return opportunities
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Orchestration
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def analyze(
|
||||||
|
self,
|
||||||
|
target_url: str,
|
||||||
|
competitor_urls: list[str],
|
||||||
|
build_clusters: bool = False,
|
||||||
|
) -> ContentGapResult:
|
||||||
|
"""
|
||||||
|
Run full content gap analysis.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
target_url: Target website URL.
|
||||||
|
competitor_urls: List of competitor URLs.
|
||||||
|
build_clusters: Whether to build topic clusters.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
ContentGapResult with gaps, clusters, and calendar.
|
||||||
|
"""
|
||||||
|
result = ContentGapResult(
|
||||||
|
target_url=target_url,
|
||||||
|
competitor_urls=competitor_urls,
|
||||||
|
timestamp=datetime.now().isoformat(),
|
||||||
|
)
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"Starting gap analysis: {target_url} vs {len(competitor_urls)} competitors"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 1. Find topic gaps
|
||||||
|
gaps, target_keywords, content_volume = await self.find_topic_gaps(
|
||||||
|
target_url, competitor_urls
|
||||||
|
)
|
||||||
|
|
||||||
|
result.gaps = gaps
|
||||||
|
result.target_topics_count = len(target_keywords)
|
||||||
|
result.competitor_topics_count = sum(content_volume.get(c, 0) for c in competitor_urls)
|
||||||
|
result.content_volume_comparison = content_volume
|
||||||
|
|
||||||
|
# 2. Build topic clusters if requested
|
||||||
|
if build_clusters and gaps:
|
||||||
|
all_topics = [g.topic for g in gaps]
|
||||||
|
result.clusters = self.build_topic_clusters(all_topics)
|
||||||
|
|
||||||
|
# 3. Generate editorial calendar
|
||||||
|
result.calendar = self.generate_calendar(gaps, result.clusters)
|
||||||
|
|
||||||
|
# 4. Detect Korean opportunities
|
||||||
|
result.korean_opportunities = self.detect_korean_opportunities(gaps)
|
||||||
|
|
||||||
|
# 5. Recommendations
|
||||||
|
result.recommendations = self._generate_recommendations(result)
|
||||||
|
|
||||||
|
self.logger.info(
|
||||||
|
f"Gap analysis complete: {len(gaps)} gaps, "
|
||||||
|
f"{len(result.clusters)} clusters, "
|
||||||
|
f"{len(result.calendar)} calendar entries"
|
||||||
|
)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _generate_recommendations(result: ContentGapResult) -> list[str]:
|
||||||
|
"""Generate strategic recommendations from gap analysis."""
|
||||||
|
recs: list[str] = []
|
||||||
|
|
||||||
|
gap_count = len(result.gaps)
|
||||||
|
if gap_count > 50:
|
||||||
|
recs.append(
|
||||||
|
f"경쟁사 대비 {gap_count}개의 콘텐츠 격차가 발견되었습니다. "
|
||||||
|
"우선순위 상위 20개 주제부터 콘텐츠 생성을 시작하세요."
|
||||||
|
)
|
||||||
|
elif gap_count > 20:
|
||||||
|
recs.append(
|
||||||
|
f"{gap_count}개의 콘텐츠 격차가 있습니다. "
|
||||||
|
"높은 트래픽 기회부터 순차적으로 콘텐츠를 생성하세요."
|
||||||
|
)
|
||||||
|
elif gap_count > 0:
|
||||||
|
recs.append(
|
||||||
|
f"{gap_count}개의 콘텐츠 격차가 발견되었습니다. "
|
||||||
|
"비교적 적은 격차이므로 빠른 시일 내 모두 커버할 수 있습니다."
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.clusters:
|
||||||
|
recs.append(
|
||||||
|
f"{len(result.clusters)}개의 토픽 클러스터를 구성했습니다. "
|
||||||
|
"필러 콘텐츠부터 작성하여 내부 링크 구조를 강화하세요."
|
||||||
|
)
|
||||||
|
|
||||||
|
if result.korean_opportunities:
|
||||||
|
recs.append(
|
||||||
|
f"한국어 시장 기회가 {len(result.korean_opportunities)}개 발견되었습니다. "
|
||||||
|
"후기, 비용, 비교 콘텐츠는 한국 검색 시장에서 높은 전환율을 보입니다."
|
||||||
|
)
|
||||||
|
|
||||||
|
high_priority = [g for g in result.gaps if g.priority_score >= 70]
|
||||||
|
if high_priority:
|
||||||
|
top_topics = ", ".join(g.topic for g in high_priority[:3])
|
||||||
|
recs.append(
|
||||||
|
f"최우선 주제: {top_topics}. "
|
||||||
|
"이 주제들은 높은 트래픽 잠재력과 경쟁사 커버리지를 가지고 있습니다."
|
||||||
|
)
|
||||||
|
|
||||||
|
if not recs:
|
||||||
|
recs.append("경쟁사 대비 콘텐츠 커버리지가 양호합니다. 기존 콘텐츠 최적화에 집중하세요.")
|
||||||
|
|
||||||
|
return recs
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def build_parser() -> argparse.ArgumentParser:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="SEO Content Gap Analyzer - topic gaps, clusters, calendar",
|
||||||
|
)
|
||||||
|
parser.add_argument("--target", required=True, help="Target website URL")
|
||||||
|
parser.add_argument(
|
||||||
|
"--competitor", action="append", dest="competitors", required=True,
|
||||||
|
help="Competitor URL (can be repeated)",
|
||||||
|
)
|
||||||
|
parser.add_argument("--clusters", action="store_true", help="Build topic clusters")
|
||||||
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||||
|
parser.add_argument("--output", help="Save output to file")
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def format_text_report(result: ContentGapResult) -> str:
|
||||||
|
"""Format gap analysis result as human-readable text."""
|
||||||
|
lines: list[str] = []
|
||||||
|
lines.append(f"## Content Gap Analysis: {result.target_url}")
|
||||||
|
lines.append(f"**Date**: {result.timestamp[:10]}")
|
||||||
|
lines.append(f"**Competitors**: {', '.join(result.competitor_urls)}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("### Content Volume Comparison")
|
||||||
|
for site, count in result.content_volume_comparison.items():
|
||||||
|
lines.append(f" - {site}: {count} topics")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append(f"### Topic Gaps ({len(result.gaps)} found)")
|
||||||
|
for i, gap in enumerate(result.gaps[:20], 1):
|
||||||
|
lines.append(
|
||||||
|
f" {i}. [{gap.priority_score:.0f}] {gap.topic} "
|
||||||
|
f"(traffic: {gap.estimated_traffic}, difficulty: {gap.difficulty})"
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if result.clusters:
|
||||||
|
lines.append(f"### Topic Clusters ({len(result.clusters)})")
|
||||||
|
for i, cluster in enumerate(result.clusters, 1):
|
||||||
|
lines.append(f" {i}. **{cluster.pillar_topic}** ({len(cluster.cluster_topics)} subtopics)")
|
||||||
|
for sub in cluster.cluster_topics[:5]:
|
||||||
|
lines.append(f" - {sub}")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if result.calendar:
|
||||||
|
lines.append(f"### Editorial Calendar ({len(result.calendar)} entries)")
|
||||||
|
for entry in result.calendar[:15]:
|
||||||
|
lines.append(
|
||||||
|
f" - [{entry.target_date}] {entry.topic} "
|
||||||
|
f"({entry.content_type}, {entry.target_word_count}w, priority: {entry.priority})"
|
||||||
|
)
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
if result.korean_opportunities:
|
||||||
|
lines.append(f"### Korean Market Opportunities ({len(result.korean_opportunities)})")
|
||||||
|
for opp in result.korean_opportunities[:10]:
|
||||||
|
lines.append(f" - {opp['topic']} ({opp['description']})")
|
||||||
|
lines.append("")
|
||||||
|
|
||||||
|
lines.append("### Recommendations")
|
||||||
|
for i, rec in enumerate(result.recommendations, 1):
|
||||||
|
lines.append(f" {i}. {rec}")
|
||||||
|
|
||||||
|
return "\n".join(lines)
|
||||||
|
|
||||||
|
|
||||||
|
async def main() -> None:
|
||||||
|
parser = build_parser()
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
analyzer = ContentGapAnalyzer()
|
||||||
|
result = await analyzer.analyze(
|
||||||
|
target_url=args.target,
|
||||||
|
competitor_urls=args.competitors,
|
||||||
|
build_clusters=args.clusters,
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
output = json.dumps(asdict(result), ensure_ascii=False, indent=2, default=str)
|
||||||
|
else:
|
||||||
|
output = format_text_report(result)
|
||||||
|
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(output)
|
||||||
|
logger.info(f"Output saved to {args.output}")
|
||||||
|
else:
|
||||||
|
print(output)
|
||||||
|
|
||||||
|
analyzer.print_stats()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -0,0 +1,11 @@
|
|||||||
|
# 23-seo-content-strategy dependencies
|
||||||
|
requests>=2.31.0
|
||||||
|
aiohttp>=3.9.0
|
||||||
|
beautifulsoup4>=4.12.0
|
||||||
|
lxml>=5.1.0
|
||||||
|
pandas>=2.1.0
|
||||||
|
scikit-learn>=1.3.0
|
||||||
|
tenacity>=8.2.0
|
||||||
|
tqdm>=4.66.0
|
||||||
|
python-dotenv>=1.0.0
|
||||||
|
rich>=13.7.0
|
||||||
132
custom-skills/23-seo-content-strategy/desktop/SKILL.md
Normal file
132
custom-skills/23-seo-content-strategy/desktop/SKILL.md
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
---
|
||||||
|
name: seo-content-strategy
|
||||||
|
description: |
|
||||||
|
Content strategy and planning for SEO. Triggers: content audit, content strategy, content gap, topic clusters, content brief, editorial calendar, content decay, 콘텐츠 전략, 콘텐츠 감사.
|
||||||
|
---
|
||||||
|
|
||||||
|
# SEO Content Strategy
|
||||||
|
|
||||||
|
## Purpose
|
||||||
|
|
||||||
|
Audit existing content performance, identify topic gaps vs competitors, map topic clusters, detect content decay, and generate SEO content briefs. Supports Korean content patterns (Naver Blog format, 후기/review content, 추천 listicles).
|
||||||
|
|
||||||
|
## Core Capabilities
|
||||||
|
|
||||||
|
1. **Content Audit** - Inventory, performance scoring, decay detection
|
||||||
|
2. **Content Gap Analysis** - Topic gaps vs competitors, cluster mapping
|
||||||
|
3. **Content Brief Generation** - Outlines, keywords, word count targets
|
||||||
|
4. **Editorial Calendar** - Prioritized content creation schedule
|
||||||
|
5. **Korean Content Patterns** - Naver Blog style, 후기, 추천 format analysis
|
||||||
|
|
||||||
|
## MCP Tool Usage
|
||||||
|
|
||||||
|
### SEO Data
|
||||||
|
```
|
||||||
|
our-seo-agent CLI: Primary content/traffic data source (future); use --input for pre-fetched JSON
|
||||||
|
WebSearch / WebFetch: Supplementary content data
|
||||||
|
```
|
||||||
|
|
||||||
|
### WebSearch for Content Research
|
||||||
|
```
|
||||||
|
WebSearch: Research content topics and competitor strategies
|
||||||
|
WebFetch: Analyze competitor page content and structure
|
||||||
|
```
|
||||||
|
|
||||||
|
### Notion for Report Storage
|
||||||
|
```
|
||||||
|
notion-create-pages: Save audit reports to SEO Audit Log
|
||||||
|
```
|
||||||
|
|
||||||
|
## Workflow
|
||||||
|
|
||||||
|
### 1. Content Audit
|
||||||
|
1. Crawl sitemap to discover all content URLs
|
||||||
|
2. Fetch top pages data via our-seo-agent CLI, pre-fetched JSON, or WebSearch
|
||||||
|
3. Classify content types (blog, product, service, landing, resource)
|
||||||
|
4. Score each page performance (0-100 composite)
|
||||||
|
5. Detect decaying content (traffic decline patterns)
|
||||||
|
6. Analyze freshness distribution (fresh/aging/stale)
|
||||||
|
7. Identify Korean content patterns (후기, 추천, 방법 formats)
|
||||||
|
8. Generate recommendations
|
||||||
|
|
||||||
|
### 2. Content Gap Analysis
|
||||||
|
1. Gather target site keywords via our-seo-agent CLI or pre-fetched data
|
||||||
|
2. Gather competitor top pages and keywords
|
||||||
|
3. Identify topics present in competitors but missing from target
|
||||||
|
4. Score gaps by priority (traffic potential + competition coverage)
|
||||||
|
5. Build topic clusters using TF-IDF + hierarchical clustering
|
||||||
|
6. Generate editorial calendar with priority and dates
|
||||||
|
7. Detect Korean market content opportunities
|
||||||
|
|
||||||
|
### 3. Content Brief Generation
|
||||||
|
1. Analyze top 5-10 ranking pages for target keyword
|
||||||
|
2. Extract headings, word counts, content features (FAQ, images, video)
|
||||||
|
3. Build recommended H2/H3 outline from competitor patterns
|
||||||
|
4. Suggest primary, secondary, and LSI keywords
|
||||||
|
5. Calculate target word count (avg of top 5 +/- 20%)
|
||||||
|
6. Find internal linking opportunities on the target site
|
||||||
|
7. Detect search intent (informational, commercial, transactional, navigational)
|
||||||
|
8. Add Korean format recommendations based on intent
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```markdown
|
||||||
|
## Content Audit: [domain]
|
||||||
|
|
||||||
|
### Content Inventory
|
||||||
|
- Total pages: [count]
|
||||||
|
- By type: blog [n], product [n], service [n], other [n]
|
||||||
|
- Average performance score: [score]/100
|
||||||
|
|
||||||
|
### Top Performers
|
||||||
|
1. [score] [url] (traffic: [n])
|
||||||
|
...
|
||||||
|
|
||||||
|
### Decaying Content
|
||||||
|
1. [decay rate] [url] (traffic: [n])
|
||||||
|
...
|
||||||
|
|
||||||
|
### Content Gaps vs Competitors
|
||||||
|
1. [priority] [topic] (est. traffic: [n], difficulty: [level])
|
||||||
|
...
|
||||||
|
|
||||||
|
### Topic Clusters
|
||||||
|
1. **[Pillar Topic]** ([n] subtopics)
|
||||||
|
- [subtopic 1]
|
||||||
|
- [subtopic 2]
|
||||||
|
|
||||||
|
### Editorial Calendar
|
||||||
|
- [date] [topic] ([type], [word count], priority: [level])
|
||||||
|
...
|
||||||
|
|
||||||
|
### Recommendations
|
||||||
|
1. [Priority actions]
|
||||||
|
```
|
||||||
|
|
||||||
|
## Common Issues
|
||||||
|
|
||||||
|
| Issue | Impact | Fix |
|
||||||
|
|-------|--------|-----|
|
||||||
|
| No blog content | High | Build blog content strategy with topic clusters |
|
||||||
|
| Content decay (traffic loss) | High | Refresh and update declining pages |
|
||||||
|
| Missing competitor topics | Medium | Create content for high-priority gaps |
|
||||||
|
| No 후기/review content | Medium | Add Korean review-style content for conversions |
|
||||||
|
| Stale content (>12 months) | Medium | Update or consolidate outdated pages |
|
||||||
|
| No topic clusters | Medium | Organize content into pillar/cluster structure |
|
||||||
|
| Missing FAQ sections | Low | Add FAQ schema for featured snippet opportunities |
|
||||||
|
|
||||||
|
## Limitations
|
||||||
|
|
||||||
|
- our-seo-agent CLI or pre-fetched JSON required for traffic and keyword data
|
||||||
|
- Competitor analysis limited to publicly available content
|
||||||
|
- Content decay detection uses heuristic without historical data in standalone mode
|
||||||
|
- Topic clustering requires minimum 3 topics per cluster
|
||||||
|
- Word count analysis requires accessible competitor pages (no JS rendering)
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
All audit reports MUST be saved to OurDigital SEO Audit Log:
|
||||||
|
- **Database ID**: `2c8581e5-8a1e-8035-880b-e38cefc2f3ef`
|
||||||
|
- **Properties**: Issue (title), Site (url), Category, Priority, Found Date, Audit ID
|
||||||
|
- **Language**: Korean with English technical terms
|
||||||
|
- **Audit ID Format**: CONTENT-YYYYMMDD-NNN
|
||||||
8
custom-skills/23-seo-content-strategy/desktop/skill.yaml
Normal file
8
custom-skills/23-seo-content-strategy/desktop/skill.yaml
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
name: seo-content-strategy
|
||||||
|
description: |
|
||||||
|
Content strategy and planning for SEO. Triggers: content audit, content strategy, content gap, topic clusters, content brief, editorial calendar, content decay.
|
||||||
|
allowed-tools:
|
||||||
|
- mcp__ahrefs__*
|
||||||
|
- mcp__notion__*
|
||||||
|
- WebSearch
|
||||||
|
- WebFetch
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# Ahrefs
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# Notion
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
@@ -0,0 +1,15 @@
|
|||||||
|
# WebSearch
|
||||||
|
|
||||||
|
> TODO: Document tool usage for this skill
|
||||||
|
|
||||||
|
## Available Commands
|
||||||
|
|
||||||
|
- [ ] List commands
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
- [ ] Add configuration details
|
||||||
|
|
||||||
|
## Examples
|
||||||
|
|
||||||
|
- [ ] Add usage examples
|
||||||
137
custom-skills/24-seo-ecommerce/code/CLAUDE.md
Normal file
137
custom-skills/24-seo-ecommerce/code/CLAUDE.md
Normal file
@@ -0,0 +1,137 @@
|
|||||||
|
# CLAUDE.md
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
E-commerce SEO audit tool for product page optimization, product schema validation, category taxonomy analysis, and marketplace presence checking. Supports Naver Smart Store optimization and Korean marketplace platforms (Coupang, Gmarket, 11번가).
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pip install -r scripts/requirements.txt
|
||||||
|
|
||||||
|
# E-commerce SEO audit
|
||||||
|
python scripts/ecommerce_auditor.py --url https://example.com --json
|
||||||
|
|
||||||
|
# Product schema validation
|
||||||
|
python scripts/product_schema_checker.py --url https://example.com/product --json
|
||||||
|
```
|
||||||
|
|
||||||
|
## Scripts
|
||||||
|
|
||||||
|
| Script | Purpose | Key Output |
|
||||||
|
|--------|---------|------------|
|
||||||
|
| `ecommerce_auditor.py` | Full e-commerce SEO audit | Product page issues, category structure, marketplace presence |
|
||||||
|
| `product_schema_checker.py` | Validate product structured data | Schema completeness, errors, rich result eligibility |
|
||||||
|
| `base_client.py` | Shared utilities | RateLimiter, ConfigManager, BaseAsyncClient |
|
||||||
|
|
||||||
|
## E-Commerce Auditor
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Full audit
|
||||||
|
python scripts/ecommerce_auditor.py --url https://example.com --json
|
||||||
|
|
||||||
|
# Product page audit only
|
||||||
|
python scripts/ecommerce_auditor.py --url https://example.com --scope products --json
|
||||||
|
|
||||||
|
# Category taxonomy analysis
|
||||||
|
python scripts/ecommerce_auditor.py --url https://example.com --scope categories --json
|
||||||
|
|
||||||
|
# Check Korean marketplace presence
|
||||||
|
python scripts/ecommerce_auditor.py --url https://example.com --korean-marketplaces --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Product page SEO audit (titles, meta descriptions, image alt text, H1 structure)
|
||||||
|
- Category taxonomy analysis (depth, breadcrumb implementation, faceted navigation)
|
||||||
|
- Duplicate content detection (parameter URLs, product variants, pagination)
|
||||||
|
- Pagination/infinite scroll SEO validation (rel=prev/next, canonical tags)
|
||||||
|
- Internal linking structure for product discovery
|
||||||
|
- Naver Smart Store optimization checks
|
||||||
|
- Korean marketplace presence (Coupang, Gmarket, 11번가 product listing detection)
|
||||||
|
|
||||||
|
## Product Schema Checker
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Validate single product page
|
||||||
|
python scripts/product_schema_checker.py --url https://example.com/product/123 --json
|
||||||
|
|
||||||
|
# Batch validate from sitemap
|
||||||
|
python scripts/product_schema_checker.py --sitemap https://example.com/product-sitemap.xml --sample 50 --json
|
||||||
|
```
|
||||||
|
|
||||||
|
**Capabilities**:
|
||||||
|
- Product schema validation (Product, Offer, AggregateRating, Review, BreadcrumbList)
|
||||||
|
- Required property completeness check (name, image, description, offers, price, availability)
|
||||||
|
- Optional property recommendations (brand, sku, gtin, mpn, review, aggregateRating)
|
||||||
|
- Rich result eligibility assessment
|
||||||
|
- Price and availability markup validation
|
||||||
|
- Merchant listing schema support
|
||||||
|
- Korean market: Naver Shopping structured data requirements
|
||||||
|
|
||||||
|
## Data Sources
|
||||||
|
|
||||||
|
| Source | Purpose |
|
||||||
|
|--------|---------|
|
||||||
|
| `our-seo-agent` CLI | Primary data source (future); use `--input` for pre-fetched JSON |
|
||||||
|
| WebSearch / WebFetch | Supplementary live data |
|
||||||
|
| Notion MCP | Save audit report to database |
|
||||||
|
|
||||||
|
## Output Format
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"url": "https://example.com",
|
||||||
|
"product_pages_audited": 50,
|
||||||
|
"issues": {
|
||||||
|
"critical": [...],
|
||||||
|
"high": [...],
|
||||||
|
"medium": [...],
|
||||||
|
"low": [...]
|
||||||
|
},
|
||||||
|
"category_structure": {
|
||||||
|
"max_depth": 4,
|
||||||
|
"breadcrumbs_present": true,
|
||||||
|
"faceted_nav_issues": [...]
|
||||||
|
},
|
||||||
|
"schema_validation": {
|
||||||
|
"pages_with_schema": 42,
|
||||||
|
"pages_without_schema": 8,
|
||||||
|
"common_errors": [...]
|
||||||
|
},
|
||||||
|
"korean_marketplaces": {
|
||||||
|
"naver_smart_store": {"found": true, "url": "..."},
|
||||||
|
"coupang": {"found": false},
|
||||||
|
"gmarket": {"found": false}
|
||||||
|
},
|
||||||
|
"score": 65,
|
||||||
|
"timestamp": "2025-01-01T00:00:00"
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Notion Output (Required)
|
||||||
|
|
||||||
|
**IMPORTANT**: All audit reports MUST be saved to the OurDigital SEO Audit Log database.
|
||||||
|
|
||||||
|
### Database Configuration
|
||||||
|
|
||||||
|
| Field | Value |
|
||||||
|
|-------|-------|
|
||||||
|
| Database ID | `2c8581e5-8a1e-8035-880b-e38cefc2f3ef` |
|
||||||
|
| URL | https://www.notion.so/dintelligence/2c8581e58a1e8035880be38cefc2f3ef |
|
||||||
|
|
||||||
|
### Required Properties
|
||||||
|
|
||||||
|
| Property | Type | Description |
|
||||||
|
|----------|------|-------------|
|
||||||
|
| Issue | Title | Report title (Korean + date) |
|
||||||
|
| Site | URL | Audited website URL |
|
||||||
|
| Category | Select | E-Commerce SEO |
|
||||||
|
| Priority | Select | Based on issue severity |
|
||||||
|
| Found Date | Date | Audit date (YYYY-MM-DD) |
|
||||||
|
| Audit ID | Rich Text | Format: ECOM-YYYYMMDD-NNN |
|
||||||
|
|
||||||
|
### Language Guidelines
|
||||||
|
|
||||||
|
- Report content in Korean (한국어)
|
||||||
|
- Keep technical English terms as-is (e.g., Schema Markup, Product, Offer)
|
||||||
|
- URLs and code remain unchanged
|
||||||
207
custom-skills/24-seo-ecommerce/code/scripts/base_client.py
Normal file
207
custom-skills/24-seo-ecommerce/code/scripts/base_client.py
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
"""
|
||||||
|
Base Client - Shared async client utilities
|
||||||
|
===========================================
|
||||||
|
Purpose: Rate-limited async operations for API clients
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
from asyncio import Semaphore
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any, Callable, TypeVar
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from tenacity import (
|
||||||
|
retry,
|
||||||
|
stop_after_attempt,
|
||||||
|
wait_exponential,
|
||||||
|
retry_if_exception_type,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Logging setup
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||||
|
)
|
||||||
|
|
||||||
|
T = TypeVar("T")
|
||||||
|
|
||||||
|
|
||||||
|
class RateLimiter:
|
||||||
|
"""Rate limiter using token bucket algorithm."""
|
||||||
|
|
||||||
|
def __init__(self, rate: float, per: float = 1.0):
|
||||||
|
"""
|
||||||
|
Initialize rate limiter.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
rate: Number of requests allowed
|
||||||
|
per: Time period in seconds (default: 1 second)
|
||||||
|
"""
|
||||||
|
self.rate = rate
|
||||||
|
self.per = per
|
||||||
|
self.tokens = rate
|
||||||
|
self.last_update = datetime.now()
|
||||||
|
self._lock = asyncio.Lock()
|
||||||
|
|
||||||
|
async def acquire(self) -> None:
|
||||||
|
"""Acquire a token, waiting if necessary."""
|
||||||
|
async with self._lock:
|
||||||
|
now = datetime.now()
|
||||||
|
elapsed = (now - self.last_update).total_seconds()
|
||||||
|
self.tokens = min(self.rate, self.tokens + elapsed * (self.rate / self.per))
|
||||||
|
self.last_update = now
|
||||||
|
|
||||||
|
if self.tokens < 1:
|
||||||
|
wait_time = (1 - self.tokens) * (self.per / self.rate)
|
||||||
|
await asyncio.sleep(wait_time)
|
||||||
|
self.tokens = 0
|
||||||
|
else:
|
||||||
|
self.tokens -= 1
|
||||||
|
|
||||||
|
|
||||||
|
class BaseAsyncClient:
|
||||||
|
"""Base class for async API clients with rate limiting."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_concurrent: int = 5,
|
||||||
|
requests_per_second: float = 3.0,
|
||||||
|
logger: logging.Logger | None = None,
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Initialize base client.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
max_concurrent: Maximum concurrent requests
|
||||||
|
requests_per_second: Rate limit
|
||||||
|
logger: Logger instance
|
||||||
|
"""
|
||||||
|
self.semaphore = Semaphore(max_concurrent)
|
||||||
|
self.rate_limiter = RateLimiter(requests_per_second)
|
||||||
|
self.logger = logger or logging.getLogger(self.__class__.__name__)
|
||||||
|
self.stats = {
|
||||||
|
"requests": 0,
|
||||||
|
"success": 0,
|
||||||
|
"errors": 0,
|
||||||
|
"retries": 0,
|
||||||
|
}
|
||||||
|
|
||||||
|
@retry(
|
||||||
|
stop=stop_after_attempt(3),
|
||||||
|
wait=wait_exponential(multiplier=1, min=2, max=10),
|
||||||
|
retry=retry_if_exception_type(Exception),
|
||||||
|
)
|
||||||
|
async def _rate_limited_request(
|
||||||
|
self,
|
||||||
|
coro: Callable[[], Any],
|
||||||
|
) -> Any:
|
||||||
|
"""Execute a request with rate limiting and retry."""
|
||||||
|
async with self.semaphore:
|
||||||
|
await self.rate_limiter.acquire()
|
||||||
|
self.stats["requests"] += 1
|
||||||
|
try:
|
||||||
|
result = await coro()
|
||||||
|
self.stats["success"] += 1
|
||||||
|
return result
|
||||||
|
except Exception as e:
|
||||||
|
self.stats["errors"] += 1
|
||||||
|
self.logger.error(f"Request failed: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def batch_requests(
|
||||||
|
self,
|
||||||
|
requests: list[Callable[[], Any]],
|
||||||
|
desc: str = "Processing",
|
||||||
|
) -> list[Any]:
|
||||||
|
"""Execute multiple requests concurrently."""
|
||||||
|
try:
|
||||||
|
from tqdm.asyncio import tqdm
|
||||||
|
has_tqdm = True
|
||||||
|
except ImportError:
|
||||||
|
has_tqdm = False
|
||||||
|
|
||||||
|
async def execute(req: Callable) -> Any:
|
||||||
|
try:
|
||||||
|
return await self._rate_limited_request(req)
|
||||||
|
except Exception as e:
|
||||||
|
return {"error": str(e)}
|
||||||
|
|
||||||
|
tasks = [execute(req) for req in requests]
|
||||||
|
|
||||||
|
if has_tqdm:
|
||||||
|
results = []
|
||||||
|
for coro in tqdm.as_completed(tasks, total=len(tasks), desc=desc):
|
||||||
|
result = await coro
|
||||||
|
results.append(result)
|
||||||
|
return results
|
||||||
|
else:
|
||||||
|
return await asyncio.gather(*tasks, return_exceptions=True)
|
||||||
|
|
||||||
|
def print_stats(self) -> None:
|
||||||
|
"""Print request statistics."""
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
self.logger.info("Request Statistics:")
|
||||||
|
self.logger.info(f" Total Requests: {self.stats['requests']}")
|
||||||
|
self.logger.info(f" Successful: {self.stats['success']}")
|
||||||
|
self.logger.info(f" Errors: {self.stats['errors']}")
|
||||||
|
self.logger.info("=" * 40)
|
||||||
|
|
||||||
|
|
||||||
|
class ConfigManager:
|
||||||
|
"""Manage API configuration and credentials."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def google_credentials_path(self) -> str | None:
|
||||||
|
"""Get Google service account credentials path."""
|
||||||
|
# Prefer SEO-specific credentials, fallback to general credentials
|
||||||
|
seo_creds = os.path.expanduser("~/.credential/ourdigital-seo-agent.json")
|
||||||
|
if os.path.exists(seo_creds):
|
||||||
|
return seo_creds
|
||||||
|
return os.getenv("GOOGLE_APPLICATION_CREDENTIALS")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def pagespeed_api_key(self) -> str | None:
|
||||||
|
"""Get PageSpeed Insights API key."""
|
||||||
|
return os.getenv("PAGESPEED_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_api_key(self) -> str | None:
|
||||||
|
"""Get Custom Search API key."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_API_KEY")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def custom_search_engine_id(self) -> str | None:
|
||||||
|
"""Get Custom Search Engine ID."""
|
||||||
|
return os.getenv("CUSTOM_SEARCH_ENGINE_ID")
|
||||||
|
|
||||||
|
@property
|
||||||
|
def notion_token(self) -> str | None:
|
||||||
|
"""Get Notion API token."""
|
||||||
|
return os.getenv("NOTION_TOKEN") or os.getenv("NOTION_API_KEY")
|
||||||
|
|
||||||
|
def validate_google_credentials(self) -> bool:
|
||||||
|
"""Validate Google credentials are configured."""
|
||||||
|
creds_path = self.google_credentials_path
|
||||||
|
if not creds_path:
|
||||||
|
return False
|
||||||
|
return os.path.exists(creds_path)
|
||||||
|
|
||||||
|
def get_required(self, key: str) -> str:
|
||||||
|
"""Get required environment variable or raise error."""
|
||||||
|
value = os.getenv(key)
|
||||||
|
if not value:
|
||||||
|
raise ValueError(f"Missing required environment variable: {key}")
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
# Singleton config instance
|
||||||
|
config = ConfigManager()
|
||||||
1046
custom-skills/24-seo-ecommerce/code/scripts/ecommerce_auditor.py
Normal file
1046
custom-skills/24-seo-ecommerce/code/scripts/ecommerce_auditor.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,805 @@
|
|||||||
|
"""
|
||||||
|
Product Schema Checker
|
||||||
|
======================
|
||||||
|
Purpose: Validate Product structured data (JSON-LD, Microdata, RDFa)
|
||||||
|
for Google and Naver rich result eligibility.
|
||||||
|
Python: 3.10+
|
||||||
|
"""
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from dataclasses import asdict, dataclass, field
|
||||||
|
from datetime import datetime
|
||||||
|
from typing import Any
|
||||||
|
from urllib.parse import urljoin, urlparse
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from rich.console import Console
|
||||||
|
from rich.table import Table
|
||||||
|
|
||||||
|
from base_client import BaseAsyncClient, config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
console = Console()
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Data classes
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SchemaProperty:
|
||||||
|
"""Single property within a schema object."""
|
||||||
|
name: str
|
||||||
|
value: Any
|
||||||
|
required: bool
|
||||||
|
valid: bool
|
||||||
|
error: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ProductSchema:
|
||||||
|
"""Validation result for one product schema on a page."""
|
||||||
|
url: str
|
||||||
|
schema_type: str # Product, Offer, AggregateRating, etc.
|
||||||
|
properties: list[dict] # list of SchemaProperty as dicts
|
||||||
|
is_valid: bool = False
|
||||||
|
rich_result_eligible: bool = False
|
||||||
|
errors: list[str] = field(default_factory=list)
|
||||||
|
warnings: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SchemaCheckResult:
|
||||||
|
"""Complete schema check result for one or more pages."""
|
||||||
|
urls_checked: int = 0
|
||||||
|
pages_with_schema: int = 0
|
||||||
|
pages_without_schema: int = 0
|
||||||
|
schemas: list[dict] = field(default_factory=list)
|
||||||
|
common_errors: list[str] = field(default_factory=list)
|
||||||
|
common_warnings: list[str] = field(default_factory=list)
|
||||||
|
naver_shopping_issues: list[dict] = field(default_factory=list)
|
||||||
|
score: int = 0
|
||||||
|
timestamp: str = ""
|
||||||
|
|
||||||
|
def calculate_score(self) -> int:
|
||||||
|
"""Score 0-100 based on schema completeness."""
|
||||||
|
if self.urls_checked == 0:
|
||||||
|
self.score = 0
|
||||||
|
return 0
|
||||||
|
coverage = self.pages_with_schema / self.urls_checked
|
||||||
|
valid_schemas = sum(1 for s in self.schemas if s.get("is_valid"))
|
||||||
|
validity_rate = valid_schemas / max(len(self.schemas), 1)
|
||||||
|
eligible = sum(1 for s in self.schemas if s.get("rich_result_eligible"))
|
||||||
|
eligibility_rate = eligible / max(len(self.schemas), 1)
|
||||||
|
self.score = int(coverage * 40 + validity_rate * 35 + eligibility_rate * 25)
|
||||||
|
return self.score
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Schema requirements
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
PRODUCT_REQUIRED = {"name", "image", "description"}
|
||||||
|
PRODUCT_RECOMMENDED = {
|
||||||
|
"brand", "sku", "gtin", "gtin8", "gtin13", "gtin14", "mpn",
|
||||||
|
"offers", "review", "aggregateRating", "color", "material",
|
||||||
|
}
|
||||||
|
|
||||||
|
OFFER_REQUIRED = {"price", "priceCurrency", "availability"}
|
||||||
|
OFFER_RECOMMENDED = {
|
||||||
|
"url", "priceValidUntil", "itemCondition", "seller",
|
||||||
|
"shippingDetails", "hasMerchantReturnPolicy",
|
||||||
|
}
|
||||||
|
|
||||||
|
AGGREGATE_RATING_REQUIRED = {"ratingValue", "reviewCount"}
|
||||||
|
AGGREGATE_RATING_RECOMMENDED = {"bestRating", "worstRating", "ratingCount"}
|
||||||
|
|
||||||
|
REVIEW_REQUIRED = {"author", "reviewRating"}
|
||||||
|
REVIEW_RECOMMENDED = {"datePublished", "reviewBody", "name"}
|
||||||
|
|
||||||
|
BREADCRUMB_REQUIRED = {"itemListElement"}
|
||||||
|
|
||||||
|
AVAILABILITY_VALUES = {
|
||||||
|
"https://schema.org/InStock",
|
||||||
|
"https://schema.org/OutOfStock",
|
||||||
|
"https://schema.org/PreOrder",
|
||||||
|
"https://schema.org/BackOrder",
|
||||||
|
"https://schema.org/Discontinued",
|
||||||
|
"https://schema.org/InStoreOnly",
|
||||||
|
"https://schema.org/OnlineOnly",
|
||||||
|
"https://schema.org/LimitedAvailability",
|
||||||
|
"https://schema.org/SoldOut",
|
||||||
|
"http://schema.org/InStock",
|
||||||
|
"http://schema.org/OutOfStock",
|
||||||
|
"http://schema.org/PreOrder",
|
||||||
|
"http://schema.org/BackOrder",
|
||||||
|
"http://schema.org/Discontinued",
|
||||||
|
"InStock", "OutOfStock", "PreOrder", "BackOrder", "Discontinued",
|
||||||
|
}
|
||||||
|
|
||||||
|
ITEM_CONDITION_VALUES = {
|
||||||
|
"https://schema.org/NewCondition",
|
||||||
|
"https://schema.org/UsedCondition",
|
||||||
|
"https://schema.org/RefurbishedCondition",
|
||||||
|
"https://schema.org/DamagedCondition",
|
||||||
|
"http://schema.org/NewCondition",
|
||||||
|
"http://schema.org/UsedCondition",
|
||||||
|
"http://schema.org/RefurbishedCondition",
|
||||||
|
"NewCondition", "UsedCondition", "RefurbishedCondition",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Main checker
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
class ProductSchemaChecker(BaseAsyncClient):
|
||||||
|
"""Validate Product structured data on e-commerce pages."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
max_concurrent: int = 10,
|
||||||
|
requests_per_second: float = 5.0,
|
||||||
|
timeout: int = 30,
|
||||||
|
):
|
||||||
|
super().__init__(max_concurrent=max_concurrent, requests_per_second=requests_per_second)
|
||||||
|
self.timeout = aiohttp.ClientTimeout(total=timeout)
|
||||||
|
self.headers = {
|
||||||
|
"User-Agent": (
|
||||||
|
"Mozilla/5.0 (compatible; ProductSchemaChecker/1.0; "
|
||||||
|
"+https://ourdigital.org)"
|
||||||
|
),
|
||||||
|
"Accept": "text/html,application/xhtml+xml",
|
||||||
|
"Accept-Language": "ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7",
|
||||||
|
}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Page fetching
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def _fetch_page(self, session: aiohttp.ClientSession, url: str) -> str:
|
||||||
|
"""Fetch page HTML."""
|
||||||
|
try:
|
||||||
|
async with session.get(url, headers=self.headers, timeout=self.timeout,
|
||||||
|
allow_redirects=True, ssl=False) as resp:
|
||||||
|
return await resp.text(errors="replace")
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Failed to fetch {url}: {exc}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Schema extraction
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def extract_schemas(self, html: str, page_url: str) -> list[dict]:
|
||||||
|
"""Extract all structured data from HTML (JSON-LD, Microdata, RDFa)."""
|
||||||
|
schemas: list[dict] = []
|
||||||
|
soup = BeautifulSoup(html, "lxml")
|
||||||
|
|
||||||
|
# --- JSON-LD ---
|
||||||
|
for script in soup.find_all("script", attrs={"type": "application/ld+json"}):
|
||||||
|
try:
|
||||||
|
text = script.string or script.get_text()
|
||||||
|
if not text:
|
||||||
|
continue
|
||||||
|
data = json.loads(text)
|
||||||
|
if isinstance(data, list):
|
||||||
|
for item in data:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
schemas.append(item)
|
||||||
|
elif isinstance(data, dict):
|
||||||
|
# Handle @graph
|
||||||
|
if "@graph" in data:
|
||||||
|
for item in data["@graph"]:
|
||||||
|
if isinstance(item, dict):
|
||||||
|
schemas.append(item)
|
||||||
|
else:
|
||||||
|
schemas.append(data)
|
||||||
|
except (json.JSONDecodeError, TypeError) as exc:
|
||||||
|
self.logger.debug(f"JSON-LD parse error on {page_url}: {exc}")
|
||||||
|
|
||||||
|
# --- Microdata ---
|
||||||
|
for item_scope in soup.find_all(attrs={"itemscope": True}):
|
||||||
|
item_type = item_scope.get("itemtype", "")
|
||||||
|
if "Product" in item_type or "Offer" in item_type:
|
||||||
|
microdata = self._parse_microdata(item_scope)
|
||||||
|
if microdata:
|
||||||
|
schemas.append(microdata)
|
||||||
|
|
||||||
|
return schemas
|
||||||
|
|
||||||
|
def _parse_microdata(self, element) -> dict:
|
||||||
|
"""Parse microdata from an itemscope element."""
|
||||||
|
result: dict[str, Any] = {}
|
||||||
|
item_type = element.get("itemtype", "")
|
||||||
|
if item_type:
|
||||||
|
type_name = item_type.rstrip("/").split("/")[-1]
|
||||||
|
result["@type"] = type_name
|
||||||
|
|
||||||
|
for prop in element.find_all(attrs={"itemprop": True}, recursive=True):
|
||||||
|
name = prop.get("itemprop", "")
|
||||||
|
if not name:
|
||||||
|
continue
|
||||||
|
# Nested itemscope
|
||||||
|
if prop.get("itemscope") is not None:
|
||||||
|
result[name] = self._parse_microdata(prop)
|
||||||
|
elif prop.name == "meta":
|
||||||
|
result[name] = prop.get("content", "")
|
||||||
|
elif prop.name == "link":
|
||||||
|
result[name] = prop.get("href", "")
|
||||||
|
elif prop.name == "img":
|
||||||
|
result[name] = prop.get("src", "")
|
||||||
|
elif prop.name == "time":
|
||||||
|
result[name] = prop.get("datetime", prop.get_text(strip=True))
|
||||||
|
else:
|
||||||
|
result[name] = prop.get_text(strip=True)
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Validation methods
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def validate_product_schema(self, schema_data: dict, page_url: str) -> ProductSchema:
|
||||||
|
"""Validate a Product schema object."""
|
||||||
|
ps = ProductSchema(
|
||||||
|
url=page_url,
|
||||||
|
schema_type="Product",
|
||||||
|
properties=[],
|
||||||
|
)
|
||||||
|
|
||||||
|
# Check required properties
|
||||||
|
for prop_name in PRODUCT_REQUIRED:
|
||||||
|
value = schema_data.get(prop_name)
|
||||||
|
valid = bool(value)
|
||||||
|
error = "" if valid else f"Missing required property: {prop_name}"
|
||||||
|
sp = SchemaProperty(
|
||||||
|
name=prop_name, value=value, required=True, valid=valid, error=error,
|
||||||
|
)
|
||||||
|
ps.properties.append(asdict(sp))
|
||||||
|
if not valid:
|
||||||
|
ps.errors.append(error)
|
||||||
|
|
||||||
|
# Check recommended properties
|
||||||
|
for prop_name in PRODUCT_RECOMMENDED:
|
||||||
|
value = schema_data.get(prop_name)
|
||||||
|
sp = SchemaProperty(
|
||||||
|
name=prop_name, value=value if value else None,
|
||||||
|
required=False, valid=bool(value),
|
||||||
|
error="" if value else f"Missing recommended property: {prop_name}",
|
||||||
|
)
|
||||||
|
ps.properties.append(asdict(sp))
|
||||||
|
if not value:
|
||||||
|
ps.warnings.append(f"Missing recommended property: {prop_name}")
|
||||||
|
|
||||||
|
# Validate offers
|
||||||
|
offers = schema_data.get("offers")
|
||||||
|
if offers:
|
||||||
|
if isinstance(offers, list):
|
||||||
|
for offer in offers:
|
||||||
|
offer_errors = self.validate_offer_schema(offer)
|
||||||
|
ps.errors.extend(offer_errors["errors"])
|
||||||
|
ps.warnings.extend(offer_errors["warnings"])
|
||||||
|
elif isinstance(offers, dict):
|
||||||
|
offer_errors = self.validate_offer_schema(offers)
|
||||||
|
ps.errors.extend(offer_errors["errors"])
|
||||||
|
ps.warnings.extend(offer_errors["warnings"])
|
||||||
|
else:
|
||||||
|
ps.errors.append("Missing 'offers' property (required for rich results)")
|
||||||
|
|
||||||
|
# Validate aggregateRating
|
||||||
|
agg_rating = schema_data.get("aggregateRating")
|
||||||
|
if agg_rating and isinstance(agg_rating, dict):
|
||||||
|
rating_result = self.validate_aggregate_rating(agg_rating)
|
||||||
|
ps.errors.extend(rating_result["errors"])
|
||||||
|
ps.warnings.extend(rating_result["warnings"])
|
||||||
|
|
||||||
|
# Validate reviews
|
||||||
|
review = schema_data.get("review")
|
||||||
|
if review:
|
||||||
|
reviews = review if isinstance(review, list) else [review]
|
||||||
|
for r in reviews[:5]: # Check up to 5 reviews
|
||||||
|
if isinstance(r, dict):
|
||||||
|
review_result = self.validate_review_schema(r)
|
||||||
|
ps.errors.extend(review_result["errors"])
|
||||||
|
ps.warnings.extend(review_result["warnings"])
|
||||||
|
|
||||||
|
ps.is_valid = len(ps.errors) == 0
|
||||||
|
ps.rich_result_eligible = self.check_rich_result_eligibility(schema_data)
|
||||||
|
|
||||||
|
return ps
|
||||||
|
|
||||||
|
def validate_offer_schema(self, offer_data: dict) -> dict[str, list[str]]:
|
||||||
|
"""Validate an Offer schema object."""
|
||||||
|
errors: list[str] = []
|
||||||
|
warnings: list[str] = []
|
||||||
|
|
||||||
|
for prop_name in OFFER_REQUIRED:
|
||||||
|
value = offer_data.get(prop_name)
|
||||||
|
if not value:
|
||||||
|
errors.append(f"Offer missing required property: {prop_name}")
|
||||||
|
|
||||||
|
# Validate price format
|
||||||
|
price = offer_data.get("price")
|
||||||
|
if price is not None:
|
||||||
|
price_str = str(price).replace(",", "").strip()
|
||||||
|
if not re.match(r"^\d+(\.\d+)?$", price_str):
|
||||||
|
errors.append(f"Invalid price format: '{price}' (must be numeric)")
|
||||||
|
elif float(price_str) <= 0:
|
||||||
|
warnings.append(f"Price is zero or negative: {price}")
|
||||||
|
|
||||||
|
# Validate priceCurrency
|
||||||
|
currency = offer_data.get("priceCurrency", "")
|
||||||
|
valid_currencies = {"KRW", "USD", "EUR", "JPY", "CNY", "GBP"}
|
||||||
|
if currency and currency.upper() not in valid_currencies:
|
||||||
|
warnings.append(f"Unusual currency code: {currency}")
|
||||||
|
|
||||||
|
# Validate availability
|
||||||
|
availability = offer_data.get("availability", "")
|
||||||
|
if availability and availability not in AVAILABILITY_VALUES:
|
||||||
|
errors.append(
|
||||||
|
f"Invalid availability value: '{availability}'. "
|
||||||
|
f"Use schema.org values like https://schema.org/InStock"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Validate itemCondition
|
||||||
|
condition = offer_data.get("itemCondition", "")
|
||||||
|
if condition and condition not in ITEM_CONDITION_VALUES:
|
||||||
|
warnings.append(f"Invalid itemCondition: '{condition}'")
|
||||||
|
|
||||||
|
# Check recommended
|
||||||
|
for prop_name in OFFER_RECOMMENDED:
|
||||||
|
if not offer_data.get(prop_name):
|
||||||
|
warnings.append(f"Offer missing recommended property: {prop_name}")
|
||||||
|
|
||||||
|
return {"errors": errors, "warnings": warnings}
|
||||||
|
|
||||||
|
def validate_aggregate_rating(self, rating_data: dict) -> dict[str, list[str]]:
|
||||||
|
"""Validate AggregateRating schema."""
|
||||||
|
errors: list[str] = []
|
||||||
|
warnings: list[str] = []
|
||||||
|
|
||||||
|
for prop_name in AGGREGATE_RATING_REQUIRED:
|
||||||
|
value = rating_data.get(prop_name)
|
||||||
|
if value is None:
|
||||||
|
errors.append(f"AggregateRating missing required: {prop_name}")
|
||||||
|
|
||||||
|
# Validate ratingValue range
|
||||||
|
rating_value = rating_data.get("ratingValue")
|
||||||
|
best_rating = rating_data.get("bestRating", 5)
|
||||||
|
worst_rating = rating_data.get("worstRating", 1)
|
||||||
|
if rating_value is not None:
|
||||||
|
try:
|
||||||
|
rv = float(rating_value)
|
||||||
|
br = float(best_rating)
|
||||||
|
wr = float(worst_rating)
|
||||||
|
if rv < wr or rv > br:
|
||||||
|
errors.append(
|
||||||
|
f"ratingValue ({rv}) outside range [{wr}, {br}]"
|
||||||
|
)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
errors.append(f"Invalid ratingValue format: {rating_value}")
|
||||||
|
|
||||||
|
# Validate reviewCount
|
||||||
|
review_count = rating_data.get("reviewCount")
|
||||||
|
if review_count is not None:
|
||||||
|
try:
|
||||||
|
rc = int(review_count)
|
||||||
|
if rc < 0:
|
||||||
|
errors.append(f"Negative reviewCount: {rc}")
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
errors.append(f"Invalid reviewCount format: {review_count}")
|
||||||
|
|
||||||
|
for prop_name in AGGREGATE_RATING_RECOMMENDED:
|
||||||
|
if not rating_data.get(prop_name):
|
||||||
|
warnings.append(f"AggregateRating missing recommended: {prop_name}")
|
||||||
|
|
||||||
|
return {"errors": errors, "warnings": warnings}
|
||||||
|
|
||||||
|
def validate_review_schema(self, review_data: dict) -> dict[str, list[str]]:
|
||||||
|
"""Validate Review schema."""
|
||||||
|
errors: list[str] = []
|
||||||
|
warnings: list[str] = []
|
||||||
|
|
||||||
|
# Author validation
|
||||||
|
author = review_data.get("author")
|
||||||
|
if not author:
|
||||||
|
errors.append("Review missing required: author")
|
||||||
|
elif isinstance(author, dict):
|
||||||
|
author_name = author.get("name", "")
|
||||||
|
if not author_name:
|
||||||
|
errors.append("Review author missing 'name' property")
|
||||||
|
elif isinstance(author, str):
|
||||||
|
if len(author.strip()) == 0:
|
||||||
|
errors.append("Review author is empty string")
|
||||||
|
|
||||||
|
# reviewRating validation
|
||||||
|
review_rating = review_data.get("reviewRating")
|
||||||
|
if not review_rating:
|
||||||
|
errors.append("Review missing required: reviewRating")
|
||||||
|
elif isinstance(review_rating, dict):
|
||||||
|
rv = review_rating.get("ratingValue")
|
||||||
|
if rv is None:
|
||||||
|
errors.append("reviewRating missing ratingValue")
|
||||||
|
|
||||||
|
for prop_name in REVIEW_RECOMMENDED:
|
||||||
|
if not review_data.get(prop_name):
|
||||||
|
warnings.append(f"Review missing recommended: {prop_name}")
|
||||||
|
|
||||||
|
return {"errors": errors, "warnings": warnings}
|
||||||
|
|
||||||
|
def validate_breadcrumb(self, schema_data: dict) -> dict[str, list[str]]:
|
||||||
|
"""Validate BreadcrumbList schema."""
|
||||||
|
errors: list[str] = []
|
||||||
|
warnings: list[str] = []
|
||||||
|
|
||||||
|
items = schema_data.get("itemListElement")
|
||||||
|
if not items:
|
||||||
|
errors.append("BreadcrumbList missing itemListElement")
|
||||||
|
return {"errors": errors, "warnings": warnings}
|
||||||
|
|
||||||
|
if not isinstance(items, list):
|
||||||
|
errors.append("itemListElement should be an array")
|
||||||
|
return {"errors": errors, "warnings": warnings}
|
||||||
|
|
||||||
|
for i, item in enumerate(items):
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
errors.append(f"Breadcrumb item {i} is not an object")
|
||||||
|
continue
|
||||||
|
position = item.get("position")
|
||||||
|
if position is None:
|
||||||
|
errors.append(f"Breadcrumb item {i} missing 'position'")
|
||||||
|
name = item.get("name") or (item.get("item", {}).get("name") if isinstance(item.get("item"), dict) else None)
|
||||||
|
if not name:
|
||||||
|
warnings.append(f"Breadcrumb item {i} missing 'name'")
|
||||||
|
|
||||||
|
return {"errors": errors, "warnings": warnings}
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Rich result eligibility
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def check_rich_result_eligibility(self, schema_data: dict) -> bool:
|
||||||
|
"""Assess Google rich result eligibility for Product schema."""
|
||||||
|
# Must have name, image, and offers with price
|
||||||
|
if not schema_data.get("name"):
|
||||||
|
return False
|
||||||
|
if not schema_data.get("image"):
|
||||||
|
return False
|
||||||
|
|
||||||
|
offers = schema_data.get("offers")
|
||||||
|
if not offers:
|
||||||
|
return False
|
||||||
|
|
||||||
|
offer_list = offers if isinstance(offers, list) else [offers]
|
||||||
|
for offer in offer_list:
|
||||||
|
if not isinstance(offer, dict):
|
||||||
|
continue
|
||||||
|
if offer.get("price") and offer.get("priceCurrency") and offer.get("availability"):
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Naver Shopping requirements
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def check_naver_shopping_requirements(self, schema_data: dict, page_url: str) -> list[dict]:
|
||||||
|
"""Check Naver Shopping specific schema requirements."""
|
||||||
|
issues: list[dict] = []
|
||||||
|
|
||||||
|
# Naver Shopping requires Product name in Korean for Korean market
|
||||||
|
name = schema_data.get("name", "")
|
||||||
|
korean_chars = len(re.findall(r"[\uac00-\ud7af]", str(name)))
|
||||||
|
if korean_chars == 0 and name:
|
||||||
|
issues.append({
|
||||||
|
"url": page_url,
|
||||||
|
"type": "naver_product_name",
|
||||||
|
"severity": "medium",
|
||||||
|
"message": "Product name has no Korean characters",
|
||||||
|
"recommendation": "Include Korean product name for Naver Shopping visibility.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Naver prefers specific category mapping
|
||||||
|
if not schema_data.get("category"):
|
||||||
|
issues.append({
|
||||||
|
"url": page_url,
|
||||||
|
"type": "naver_category",
|
||||||
|
"severity": "low",
|
||||||
|
"message": "Missing 'category' property for Naver Shopping categorization",
|
||||||
|
"recommendation": "Add category property matching Naver Shopping category taxonomy.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Naver requires image
|
||||||
|
image = schema_data.get("image")
|
||||||
|
if not image:
|
||||||
|
issues.append({
|
||||||
|
"url": page_url,
|
||||||
|
"type": "naver_image",
|
||||||
|
"severity": "high",
|
||||||
|
"message": "Missing product image (required for Naver Shopping)",
|
||||||
|
"recommendation": "Add at least one high-quality product image URL.",
|
||||||
|
})
|
||||||
|
elif isinstance(image, str):
|
||||||
|
if not image.startswith("http"):
|
||||||
|
issues.append({
|
||||||
|
"url": page_url,
|
||||||
|
"type": "naver_image_url",
|
||||||
|
"severity": "medium",
|
||||||
|
"message": "Product image URL is relative (should be absolute)",
|
||||||
|
"recommendation": "Use absolute URLs for product images.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Naver requires price in KRW
|
||||||
|
offers = schema_data.get("offers")
|
||||||
|
if offers:
|
||||||
|
offer_list = offers if isinstance(offers, list) else [offers]
|
||||||
|
for offer in offer_list:
|
||||||
|
if isinstance(offer, dict):
|
||||||
|
currency = offer.get("priceCurrency", "")
|
||||||
|
if currency and currency.upper() != "KRW":
|
||||||
|
issues.append({
|
||||||
|
"url": page_url,
|
||||||
|
"type": "naver_currency",
|
||||||
|
"severity": "medium",
|
||||||
|
"message": f"Price currency is {currency}, not KRW",
|
||||||
|
"recommendation": "For Naver Shopping, provide price in KRW.",
|
||||||
|
})
|
||||||
|
|
||||||
|
# Check brand/manufacturer
|
||||||
|
if not schema_data.get("brand") and not schema_data.get("manufacturer"):
|
||||||
|
issues.append({
|
||||||
|
"url": page_url,
|
||||||
|
"type": "naver_brand",
|
||||||
|
"severity": "low",
|
||||||
|
"message": "Missing brand/manufacturer (helpful for Naver Shopping filters)",
|
||||||
|
"recommendation": "Add brand or manufacturer property.",
|
||||||
|
})
|
||||||
|
|
||||||
|
return issues
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Orchestrator
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
async def check(
|
||||||
|
self,
|
||||||
|
urls: list[str] | None = None,
|
||||||
|
sitemap_url: str | None = None,
|
||||||
|
sample_size: int = 50,
|
||||||
|
) -> SchemaCheckResult:
|
||||||
|
"""Run schema validation on URLs or sitemap."""
|
||||||
|
result = SchemaCheckResult(timestamp=datetime.now().isoformat())
|
||||||
|
target_urls: list[str] = []
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
if sitemap_url:
|
||||||
|
# Fetch URLs from sitemap
|
||||||
|
target_urls = await self._urls_from_sitemap(session, sitemap_url, sample_size)
|
||||||
|
if urls:
|
||||||
|
target_urls.extend(urls)
|
||||||
|
|
||||||
|
target_urls = list(set(target_urls))[:sample_size]
|
||||||
|
result.urls_checked = len(target_urls)
|
||||||
|
self.logger.info(f"Checking {len(target_urls)} URLs for Product schema")
|
||||||
|
|
||||||
|
error_counter: dict[str, int] = {}
|
||||||
|
warning_counter: dict[str, int] = {}
|
||||||
|
|
||||||
|
for url in target_urls:
|
||||||
|
html = await self._fetch_page(session, url)
|
||||||
|
if not html:
|
||||||
|
result.pages_without_schema += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
schemas = self.extract_schemas(html, url)
|
||||||
|
product_schemas = [
|
||||||
|
s for s in schemas
|
||||||
|
if self._get_schema_type(s) in ("Product", "ProductGroup")
|
||||||
|
]
|
||||||
|
breadcrumb_schemas = [
|
||||||
|
s for s in schemas
|
||||||
|
if self._get_schema_type(s) == "BreadcrumbList"
|
||||||
|
]
|
||||||
|
|
||||||
|
if not product_schemas:
|
||||||
|
result.pages_without_schema += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
result.pages_with_schema += 1
|
||||||
|
|
||||||
|
for ps_data in product_schemas:
|
||||||
|
ps = self.validate_product_schema(ps_data, url)
|
||||||
|
result.schemas.append(asdict(ps))
|
||||||
|
|
||||||
|
for err in ps.errors:
|
||||||
|
error_counter[err] = error_counter.get(err, 0) + 1
|
||||||
|
for warn in ps.warnings:
|
||||||
|
warning_counter[warn] = warning_counter.get(warn, 0) + 1
|
||||||
|
|
||||||
|
# Naver Shopping checks
|
||||||
|
naver_issues = self.check_naver_shopping_requirements(ps_data, url)
|
||||||
|
result.naver_shopping_issues.extend(naver_issues)
|
||||||
|
|
||||||
|
# Validate breadcrumbs
|
||||||
|
for bc_data in breadcrumb_schemas:
|
||||||
|
bc_result = self.validate_breadcrumb(bc_data)
|
||||||
|
for err in bc_result["errors"]:
|
||||||
|
error_counter[err] = error_counter.get(err, 0) + 1
|
||||||
|
|
||||||
|
# Aggregate common errors/warnings
|
||||||
|
result.common_errors = sorted(
|
||||||
|
error_counter.keys(),
|
||||||
|
key=lambda k: error_counter[k],
|
||||||
|
reverse=True,
|
||||||
|
)[:20]
|
||||||
|
result.common_warnings = sorted(
|
||||||
|
warning_counter.keys(),
|
||||||
|
key=lambda k: warning_counter[k],
|
||||||
|
reverse=True,
|
||||||
|
)[:20]
|
||||||
|
|
||||||
|
result.calculate_score()
|
||||||
|
return result
|
||||||
|
|
||||||
|
async def _urls_from_sitemap(
|
||||||
|
self,
|
||||||
|
session: aiohttp.ClientSession,
|
||||||
|
sitemap_url: str,
|
||||||
|
limit: int,
|
||||||
|
) -> list[str]:
|
||||||
|
"""Fetch product URLs from sitemap."""
|
||||||
|
urls: list[str] = []
|
||||||
|
try:
|
||||||
|
async with session.get(sitemap_url, headers=self.headers,
|
||||||
|
timeout=self.timeout, ssl=False) as resp:
|
||||||
|
if resp.status != 200:
|
||||||
|
return urls
|
||||||
|
text = await resp.text(errors="replace")
|
||||||
|
soup = BeautifulSoup(text, "lxml-xml")
|
||||||
|
|
||||||
|
# Handle sitemap index
|
||||||
|
sitemapindex = soup.find_all("sitemap")
|
||||||
|
if sitemapindex:
|
||||||
|
for sm in sitemapindex[:3]:
|
||||||
|
loc = sm.find("loc")
|
||||||
|
if loc:
|
||||||
|
child_urls = await self._urls_from_sitemap(session, loc.text.strip(), limit)
|
||||||
|
urls.extend(child_urls)
|
||||||
|
if len(urls) >= limit:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
for tag in soup.find_all("url"):
|
||||||
|
loc = tag.find("loc")
|
||||||
|
if loc:
|
||||||
|
urls.append(loc.text.strip())
|
||||||
|
if len(urls) >= limit:
|
||||||
|
break
|
||||||
|
except Exception as exc:
|
||||||
|
self.logger.warning(f"Sitemap parse failed: {exc}")
|
||||||
|
|
||||||
|
return urls[:limit]
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_schema_type(schema: dict) -> str:
|
||||||
|
"""Get the @type from a schema dict, handling various formats."""
|
||||||
|
schema_type = schema.get("@type", "")
|
||||||
|
if isinstance(schema_type, list):
|
||||||
|
return schema_type[0] if schema_type else ""
|
||||||
|
return str(schema_type)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI output helpers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def print_rich_report(result: SchemaCheckResult) -> None:
|
||||||
|
"""Print a rich-formatted report to the console."""
|
||||||
|
console.print(f"\n[bold cyan]Product Schema Validation Report[/bold cyan]")
|
||||||
|
console.print(f"Timestamp: {result.timestamp}")
|
||||||
|
console.print(f"URLs checked: {result.urls_checked}")
|
||||||
|
|
||||||
|
# Coverage
|
||||||
|
coverage = (result.pages_with_schema / max(result.urls_checked, 1)) * 100
|
||||||
|
cov_color = "green" if coverage >= 90 else "yellow" if coverage >= 50 else "red"
|
||||||
|
console.print(f"Schema coverage: [{cov_color}]{coverage:.0f}%[/{cov_color}] "
|
||||||
|
f"({result.pages_with_schema}/{result.urls_checked})")
|
||||||
|
|
||||||
|
# Score
|
||||||
|
score_color = "green" if result.score >= 80 else "yellow" if result.score >= 50 else "red"
|
||||||
|
console.print(f"[bold {score_color}]Score: {result.score}/100[/bold {score_color}]")
|
||||||
|
|
||||||
|
# Validity summary
|
||||||
|
valid = sum(1 for s in result.schemas if s.get("is_valid"))
|
||||||
|
eligible = sum(1 for s in result.schemas if s.get("rich_result_eligible"))
|
||||||
|
total = len(result.schemas)
|
||||||
|
|
||||||
|
table = Table(title="Schema Summary")
|
||||||
|
table.add_column("Metric", style="bold")
|
||||||
|
table.add_column("Value", justify="right")
|
||||||
|
table.add_row("Total schemas found", str(total))
|
||||||
|
table.add_row("Valid schemas", str(valid))
|
||||||
|
table.add_row("Rich result eligible", str(eligible))
|
||||||
|
table.add_row("Pages without schema", str(result.pages_without_schema))
|
||||||
|
console.print(table)
|
||||||
|
|
||||||
|
# Common errors
|
||||||
|
if result.common_errors:
|
||||||
|
console.print(f"\n[bold red]Common Errors ({len(result.common_errors)}):[/bold red]")
|
||||||
|
for err in result.common_errors[:10]:
|
||||||
|
console.print(f" [red]-[/red] {err}")
|
||||||
|
|
||||||
|
# Common warnings
|
||||||
|
if result.common_warnings:
|
||||||
|
console.print(f"\n[bold yellow]Common Warnings ({len(result.common_warnings)}):[/bold yellow]")
|
||||||
|
for warn in result.common_warnings[:10]:
|
||||||
|
console.print(f" [yellow]-[/yellow] {warn}")
|
||||||
|
|
||||||
|
# Naver Shopping issues
|
||||||
|
if result.naver_shopping_issues:
|
||||||
|
console.print(f"\n[bold magenta]Naver Shopping Issues ({len(result.naver_shopping_issues)}):[/bold magenta]")
|
||||||
|
seen: set[str] = set()
|
||||||
|
for issue in result.naver_shopping_issues:
|
||||||
|
key = f"{issue['type']}:{issue['message']}"
|
||||||
|
if key not in seen:
|
||||||
|
seen.add(key)
|
||||||
|
console.print(f" [{issue.get('severity', 'medium')}] {issue['message']}")
|
||||||
|
console.print(f" [dim]{issue['recommendation']}[/dim]")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Main
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
description="Product Schema Checker - Validate e-commerce structured data",
|
||||||
|
)
|
||||||
|
group = parser.add_mutually_exclusive_group(required=True)
|
||||||
|
group.add_argument("--url", nargs="+", help="Product page URL(s) to validate")
|
||||||
|
group.add_argument("--sitemap", help="Sitemap URL to fetch product pages from")
|
||||||
|
parser.add_argument(
|
||||||
|
"--sample",
|
||||||
|
type=int,
|
||||||
|
default=50,
|
||||||
|
help="Max URLs to check from sitemap (default: 50)",
|
||||||
|
)
|
||||||
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||||
|
parser.add_argument("--output", type=str, help="Save output to file")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
checker = ProductSchemaChecker()
|
||||||
|
result = asyncio.run(
|
||||||
|
checker.check(
|
||||||
|
urls=args.url,
|
||||||
|
sitemap_url=args.sitemap,
|
||||||
|
sample_size=args.sample,
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if args.json:
|
||||||
|
output = json.dumps(asdict(result), indent=2, ensure_ascii=False, default=str)
|
||||||
|
if args.output:
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(output)
|
||||||
|
console.print(f"[green]Results saved to {args.output}[/green]")
|
||||||
|
else:
|
||||||
|
print(output)
|
||||||
|
else:
|
||||||
|
print_rich_report(result)
|
||||||
|
if args.output:
|
||||||
|
output = json.dumps(asdict(result), indent=2, ensure_ascii=False, default=str)
|
||||||
|
with open(args.output, "w", encoding="utf-8") as f:
|
||||||
|
f.write(output)
|
||||||
|
console.print(f"\n[green]JSON results also saved to {args.output}[/green]")
|
||||||
|
|
||||||
|
checker.print_stats()
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user