Add SEO skills 19-28, 31-32 with full Python implementations
12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,782 @@
|
||||
"""
|
||||
Knowledge Graph Analyzer
|
||||
=========================
|
||||
Purpose: Analyze entity presence in Google Knowledge Graph, Knowledge Panels,
|
||||
Wikipedia, Wikidata, and Korean equivalents (Naver encyclopedia, 지식iN).
|
||||
Python: 3.10+
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import re
|
||||
import sys
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
from urllib.parse import quote, urljoin
|
||||
|
||||
import aiohttp
|
||||
from bs4 import BeautifulSoup
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from base_client import BaseAsyncClient, ConfigManager, config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
console = Console()
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Data classes
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
EXPECTED_ATTRIBUTES = [
|
||||
"name",
|
||||
"type",
|
||||
"description",
|
||||
"logo",
|
||||
"website",
|
||||
"founded",
|
||||
"ceo",
|
||||
"headquarters",
|
||||
"parent_organization",
|
||||
"subsidiaries",
|
||||
"social_twitter",
|
||||
"social_facebook",
|
||||
"social_linkedin",
|
||||
"social_youtube",
|
||||
"social_instagram",
|
||||
"stock_ticker",
|
||||
"industry",
|
||||
"employees",
|
||||
"revenue",
|
||||
]
|
||||
|
||||
|
||||
@dataclass
|
||||
class KnowledgePanelAttribute:
|
||||
"""Single attribute extracted from a Knowledge Panel."""
|
||||
name: str
|
||||
value: str | None = None
|
||||
present: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class KnowledgePanel:
|
||||
"""Represents a detected Knowledge Panel."""
|
||||
detected: bool = False
|
||||
entity_type: str | None = None
|
||||
attributes: list[KnowledgePanelAttribute] = field(default_factory=list)
|
||||
completeness_score: float = 0.0
|
||||
raw_snippet: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class WikiPresence:
|
||||
"""Wikipedia or Wikidata presence record."""
|
||||
platform: str = "" # "wikipedia" or "wikidata"
|
||||
present: bool = False
|
||||
url: str | None = None
|
||||
qid: str | None = None # Wikidata QID (e.g. Q20710)
|
||||
language: str = "en"
|
||||
|
||||
|
||||
@dataclass
|
||||
class NaverPresence:
|
||||
"""Naver encyclopedia and 지식iN presence."""
|
||||
encyclopedia_present: bool = False
|
||||
encyclopedia_url: str | None = None
|
||||
knowledge_in_present: bool = False
|
||||
knowledge_in_count: int = 0
|
||||
knowledge_in_url: str | None = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class KnowledgeGraphResult:
|
||||
"""Full Knowledge Graph analysis result."""
|
||||
entity: str = ""
|
||||
language: str = "en"
|
||||
knowledge_panel: KnowledgePanel = field(default_factory=KnowledgePanel)
|
||||
wikipedia: WikiPresence = field(default_factory=lambda: WikiPresence(platform="wikipedia"))
|
||||
wikidata: WikiPresence = field(default_factory=lambda: WikiPresence(platform="wikidata"))
|
||||
naver: NaverPresence = field(default_factory=NaverPresence)
|
||||
competitors: list[dict[str, Any]] = field(default_factory=list)
|
||||
overall_score: float = 0.0
|
||||
recommendations: list[str] = field(default_factory=list)
|
||||
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return asdict(self)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Knowledge Graph Analyzer
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class KnowledgeGraphAnalyzer(BaseAsyncClient):
|
||||
"""Analyze entity presence in Knowledge Graph and related platforms."""
|
||||
|
||||
GOOGLE_SEARCH_URL = "https://www.google.com/search"
|
||||
WIKIPEDIA_API_URL = "https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title}"
|
||||
WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php"
|
||||
NAVER_SEARCH_URL = "https://search.naver.com/search.naver"
|
||||
NAVER_ENCYCLOPEDIA_URL = "https://terms.naver.com/search.naver"
|
||||
NAVER_KIN_URL = "https://kin.naver.com/search/list.naver"
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": (
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
),
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
}
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.config = config
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Google entity search
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def search_entity(
|
||||
self,
|
||||
entity_name: str,
|
||||
language: str = "en",
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Search Google for entity to detect Knowledge Panel signals."""
|
||||
params = {"q": entity_name, "hl": language, "gl": "us" if language == "en" else "kr"}
|
||||
headers = {**self.HEADERS}
|
||||
if language == "ko":
|
||||
headers["Accept-Language"] = "ko-KR,ko;q=0.9"
|
||||
params["gl"] = "kr"
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
async with session.get(
|
||||
self.GOOGLE_SEARCH_URL, params=params, headers=headers, timeout=aiohttp.ClientTimeout(total=20)
|
||||
) as resp:
|
||||
if resp.status != 200:
|
||||
logger.warning("Google search returned status %d", resp.status)
|
||||
return {"html": "", "status": resp.status}
|
||||
html = await resp.text()
|
||||
return {"html": html, "status": resp.status}
|
||||
except Exception as exc:
|
||||
logger.error("Google search failed: %s", exc)
|
||||
return {"html": "", "status": 0, "error": str(exc)}
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Knowledge Panel detection
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def detect_knowledge_panel(self, search_data: dict[str, Any]) -> KnowledgePanel:
|
||||
"""Parse search results HTML for Knowledge Panel indicators."""
|
||||
html = search_data.get("html", "")
|
||||
if not html:
|
||||
return KnowledgePanel(detected=False)
|
||||
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
kp = KnowledgePanel()
|
||||
|
||||
# Knowledge Panel is typically in a div with class 'kp-wholepage' or 'knowledge-panel'
|
||||
kp_selectors = [
|
||||
"div.kp-wholepage",
|
||||
"div.knowledge-panel",
|
||||
"div[data-attrid='title']",
|
||||
"div.kp-header",
|
||||
"div[class*='kno-']",
|
||||
"div.osrp-blk",
|
||||
]
|
||||
|
||||
kp_element = None
|
||||
for selector in kp_selectors:
|
||||
kp_element = soup.select_one(selector)
|
||||
if kp_element:
|
||||
break
|
||||
|
||||
if kp_element:
|
||||
kp.detected = True
|
||||
kp.raw_snippet = kp_element.get_text(separator=" ", strip=True)[:500]
|
||||
else:
|
||||
# Fallback: check for common KP text patterns
|
||||
text = soup.get_text(separator=" ", strip=True).lower()
|
||||
kp_indicators = [
|
||||
"wikipedia", "description", "founded", "ceo",
|
||||
"headquarters", "subsidiaries", "parent organization",
|
||||
]
|
||||
matches = sum(1 for ind in kp_indicators if ind in text)
|
||||
if matches >= 3:
|
||||
kp.detected = True
|
||||
kp.raw_snippet = text[:500]
|
||||
|
||||
return kp
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Attribute extraction
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def extract_attributes(self, kp: KnowledgePanel, html: str = "") -> list[KnowledgePanelAttribute]:
|
||||
"""Extract entity attributes from Knowledge Panel data."""
|
||||
attributes: list[KnowledgePanelAttribute] = []
|
||||
text = (kp.raw_snippet or "").lower()
|
||||
|
||||
# Parse HTML for structured attribute data
|
||||
soup = BeautifulSoup(html, "lxml") if html else None
|
||||
|
||||
attribute_patterns = {
|
||||
"name": r"^(.+?)(?:\s+is\s+|\s*[-|]\s*)",
|
||||
"type": r"(?:is\s+(?:a|an)\s+)(\w[\w\s]+?)(?:\.|,|\s+based)",
|
||||
"description": r"(?:is\s+)(.{20,200}?)(?:\.\s)",
|
||||
"founded": r"(?:founded|established|incorporated)\s*(?:in|:)?\s*(\d{4})",
|
||||
"ceo": r"(?:ceo|chief executive|chairman)\s*(?::|is)?\s*([A-Z][\w\s.]+?)(?:,|\.|;|\s{2})",
|
||||
"headquarters": r"(?:headquarters?|hq|based in)\s*(?::|is|in)?\s*([A-Z][\w\s,]+?)(?:\.|;|\s{2})",
|
||||
"stock_ticker": r"(?:stock|ticker|symbol)\s*(?::|is)?\s*([A-Z]{1,5}(?:\s*:\s*[A-Z]{1,5})?)",
|
||||
"employees": r"(?:employees?|staff|workforce)\s*(?::|is)?\s*([\d,]+)",
|
||||
"revenue": r"(?:revenue|sales)\s*(?::|is)?\s*([\$\d,.]+\s*(?:billion|million|B|M)?)",
|
||||
"industry": r"(?:industry|sector)\s*(?::|is)?\s*([\w\s&]+?)(?:\.|,|;)",
|
||||
}
|
||||
|
||||
social_patterns = {
|
||||
"social_twitter": r"(?:twitter\.com|x\.com)/(\w+)",
|
||||
"social_facebook": r"facebook\.com/([\w.]+)",
|
||||
"social_linkedin": r"linkedin\.com/(?:company|in)/([\w-]+)",
|
||||
"social_youtube": r"youtube\.com/(?:@|channel/|user/)([\w-]+)",
|
||||
"social_instagram": r"instagram\.com/([\w.]+)",
|
||||
}
|
||||
|
||||
full_text = kp.raw_snippet or ""
|
||||
html_text = ""
|
||||
if soup:
|
||||
html_text = soup.get_text(separator=" ", strip=True)
|
||||
|
||||
combined = f"{full_text} {html_text}"
|
||||
|
||||
for attr_name, pattern in attribute_patterns.items():
|
||||
match = re.search(pattern, combined, re.IGNORECASE)
|
||||
present = match is not None
|
||||
value = match.group(1).strip() if match else None
|
||||
attributes.append(KnowledgePanelAttribute(name=attr_name, value=value, present=present))
|
||||
|
||||
# Social profiles
|
||||
for attr_name, pattern in social_patterns.items():
|
||||
match = re.search(pattern, combined, re.IGNORECASE)
|
||||
present = match is not None
|
||||
value = match.group(1).strip() if match else None
|
||||
attributes.append(KnowledgePanelAttribute(name=attr_name, value=value, present=present))
|
||||
|
||||
# Logo detection from HTML
|
||||
logo_present = False
|
||||
if soup:
|
||||
logo_img = soup.select_one("img[data-atf], g-img img, img.kno-fb-img, img[alt*='logo']")
|
||||
if logo_img:
|
||||
logo_present = True
|
||||
attributes.append(KnowledgePanelAttribute(name="logo", value=None, present=logo_present))
|
||||
|
||||
# Website detection
|
||||
website_present = False
|
||||
if soup:
|
||||
site_link = soup.select_one("a[data-attrid*='website'], a.ab_button[href*='http']")
|
||||
if site_link:
|
||||
website_present = True
|
||||
value = site_link.get("href", "")
|
||||
attributes.append(KnowledgePanelAttribute(name="website", value=value if website_present else None, present=website_present))
|
||||
|
||||
return attributes
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Completeness scoring
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def score_completeness(self, attributes: list[KnowledgePanelAttribute]) -> float:
|
||||
"""Score attribute completeness (0-100) based on filled vs expected."""
|
||||
if not attributes:
|
||||
return 0.0
|
||||
|
||||
weights = {
|
||||
"name": 10, "type": 8, "description": 10, "logo": 8, "website": 10,
|
||||
"founded": 5, "ceo": 5, "headquarters": 5, "parent_organization": 3,
|
||||
"subsidiaries": 3, "social_twitter": 4, "social_facebook": 4,
|
||||
"social_linkedin": 4, "social_youtube": 3, "social_instagram": 3,
|
||||
"stock_ticker": 3, "industry": 5, "employees": 3, "revenue": 4,
|
||||
}
|
||||
|
||||
total_weight = sum(weights.values())
|
||||
earned = 0.0
|
||||
|
||||
attr_map = {a.name: a for a in attributes}
|
||||
for attr_name, weight in weights.items():
|
||||
attr = attr_map.get(attr_name)
|
||||
if attr and attr.present:
|
||||
earned += weight
|
||||
|
||||
return round((earned / total_weight) * 100, 1) if total_weight > 0 else 0.0
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Wikipedia check
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def check_wikipedia(
|
||||
self,
|
||||
entity_name: str,
|
||||
language: str = "en",
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> WikiPresence:
|
||||
"""Check Wikipedia article existence for entity."""
|
||||
wiki = WikiPresence(platform="wikipedia", language=language)
|
||||
title = entity_name.replace(" ", "_")
|
||||
url = self.WIKIPEDIA_API_URL.format(lang=language, title=quote(title))
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
async with session.get(url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=15)) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.json()
|
||||
wiki.present = data.get("type") != "disambiguation"
|
||||
wiki.url = data.get("content_urls", {}).get("desktop", {}).get("page", "")
|
||||
if not wiki.url:
|
||||
wiki.url = f"https://{language}.wikipedia.org/wiki/{quote(title)}"
|
||||
logger.info("Wikipedia article found for '%s' (%s)", entity_name, language)
|
||||
elif resp.status == 404:
|
||||
wiki.present = False
|
||||
logger.info("No Wikipedia article for '%s' (%s)", entity_name, language)
|
||||
else:
|
||||
logger.warning("Wikipedia API returned status %d", resp.status)
|
||||
except Exception as exc:
|
||||
logger.error("Wikipedia check failed: %s", exc)
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
return wiki
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Wikidata check
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def check_wikidata(
|
||||
self,
|
||||
entity_name: str,
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> WikiPresence:
|
||||
"""Check Wikidata QID existence for entity."""
|
||||
wiki = WikiPresence(platform="wikidata")
|
||||
params = {
|
||||
"action": "wbsearchentities",
|
||||
"search": entity_name,
|
||||
"language": "en",
|
||||
"format": "json",
|
||||
"limit": 5,
|
||||
}
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
async with session.get(
|
||||
self.WIKIDATA_API_URL, params=params, headers=self.HEADERS,
|
||||
timeout=aiohttp.ClientTimeout(total=15),
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
data = await resp.json()
|
||||
results = data.get("search", [])
|
||||
if results:
|
||||
top = results[0]
|
||||
wiki.present = True
|
||||
wiki.qid = top.get("id", "")
|
||||
wiki.url = top.get("concepturi", f"https://www.wikidata.org/wiki/{wiki.qid}")
|
||||
logger.info("Wikidata entity found: %s (%s)", wiki.qid, entity_name)
|
||||
else:
|
||||
wiki.present = False
|
||||
logger.info("No Wikidata entity for '%s'", entity_name)
|
||||
else:
|
||||
logger.warning("Wikidata API returned status %d", resp.status)
|
||||
except Exception as exc:
|
||||
logger.error("Wikidata check failed: %s", exc)
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
return wiki
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Naver encyclopedia
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def check_naver_encyclopedia(
|
||||
self,
|
||||
entity_name: str,
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Check Naver encyclopedia (네이버 백과사전) presence."""
|
||||
result = {"present": False, "url": None}
|
||||
params = {"query": entity_name, "searchType": 0}
|
||||
headers = {
|
||||
**self.HEADERS,
|
||||
"Accept-Language": "ko-KR,ko;q=0.9",
|
||||
}
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
async with session.get(
|
||||
self.NAVER_ENCYCLOPEDIA_URL, params=params, headers=headers,
|
||||
timeout=aiohttp.ClientTimeout(total=15),
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
html = await resp.text()
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
# Look for search result entries
|
||||
entries = soup.select("ul.content_list li, div.search_result a, a.title")
|
||||
if entries:
|
||||
result["present"] = True
|
||||
first_link = entries[0].find("a")
|
||||
if first_link and first_link.get("href"):
|
||||
href = first_link["href"]
|
||||
if not href.startswith("http"):
|
||||
href = urljoin("https://terms.naver.com", href)
|
||||
result["url"] = href
|
||||
else:
|
||||
result["url"] = f"https://terms.naver.com/search.naver?query={quote(entity_name)}"
|
||||
logger.info("Naver encyclopedia entry found for '%s'", entity_name)
|
||||
else:
|
||||
# Fallback: check page text for result indicators
|
||||
text = soup.get_text()
|
||||
if entity_name in text and "검색결과가 없습니다" not in text:
|
||||
result["present"] = True
|
||||
result["url"] = f"https://terms.naver.com/search.naver?query={quote(entity_name)}"
|
||||
else:
|
||||
logger.warning("Naver encyclopedia returned status %d", resp.status)
|
||||
except Exception as exc:
|
||||
logger.error("Naver encyclopedia check failed: %s", exc)
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Naver knowledge iN
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def check_naver_knowledge_in(
|
||||
self,
|
||||
entity_name: str,
|
||||
session: aiohttp.ClientSession | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Check Naver knowledge iN (지식iN) entries."""
|
||||
result = {"present": False, "count": 0, "url": None}
|
||||
params = {"query": entity_name}
|
||||
headers = {
|
||||
**self.HEADERS,
|
||||
"Accept-Language": "ko-KR,ko;q=0.9",
|
||||
}
|
||||
|
||||
own_session = session is None
|
||||
if own_session:
|
||||
session = aiohttp.ClientSession()
|
||||
|
||||
try:
|
||||
async with session.get(
|
||||
self.NAVER_KIN_URL, params=params, headers=headers,
|
||||
timeout=aiohttp.ClientTimeout(total=15),
|
||||
) as resp:
|
||||
if resp.status == 200:
|
||||
html = await resp.text()
|
||||
soup = BeautifulSoup(html, "lxml")
|
||||
|
||||
# Extract total result count
|
||||
count_el = soup.select_one("span.number, em.total_count, span.result_count")
|
||||
count = 0
|
||||
if count_el:
|
||||
count_text = count_el.get_text(strip=True).replace(",", "")
|
||||
count_match = re.search(r"(\d+)", count_text)
|
||||
if count_match:
|
||||
count = int(count_match.group(1))
|
||||
|
||||
# Also check for list items
|
||||
entries = soup.select("ul.basic1 li, ul._list li, div.search_list li")
|
||||
if count > 0 or entries:
|
||||
result["present"] = True
|
||||
result["count"] = count if count > 0 else len(entries)
|
||||
result["url"] = f"https://kin.naver.com/search/list.naver?query={quote(entity_name)}"
|
||||
logger.info("Naver 지식iN: %d entries for '%s'", result["count"], entity_name)
|
||||
else:
|
||||
logger.info("No Naver 지식iN entries for '%s'", entity_name)
|
||||
else:
|
||||
logger.warning("Naver 지식iN returned status %d", resp.status)
|
||||
except Exception as exc:
|
||||
logger.error("Naver 지식iN check failed: %s", exc)
|
||||
finally:
|
||||
if own_session:
|
||||
await session.close()
|
||||
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Recommendations
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def generate_recommendations(self, result: KnowledgeGraphResult) -> list[str]:
|
||||
"""Generate actionable recommendations based on analysis."""
|
||||
recs: list[str] = []
|
||||
|
||||
kp = result.knowledge_panel
|
||||
if not kp.detected:
|
||||
recs.append(
|
||||
"Knowledge Panel이 감지되지 않았습니다. Google에 엔티티 등록을 위해 "
|
||||
"Wikipedia 페이지 생성, Wikidata 항목 추가, 구조화된 데이터(Organization schema) 구현을 권장합니다."
|
||||
)
|
||||
elif kp.completeness_score < 50:
|
||||
recs.append(
|
||||
f"Knowledge Panel 완성도가 {kp.completeness_score}%로 낮습니다. "
|
||||
"누락된 속성(소셜 프로필, 설명, 로고 등)을 보강하세요."
|
||||
)
|
||||
|
||||
if not result.wikipedia.present:
|
||||
recs.append(
|
||||
"Wikipedia 문서가 없습니다. 주목할 만한 출처(reliable sources)를 확보한 후 "
|
||||
"Wikipedia 문서 생성을 고려하세요."
|
||||
)
|
||||
|
||||
if not result.wikidata.present:
|
||||
recs.append(
|
||||
"Wikidata 항목이 없습니다. Wikidata에 엔티티를 등록하여 "
|
||||
"Knowledge Graph 인식을 강화하세요."
|
||||
)
|
||||
|
||||
if not result.naver.encyclopedia_present:
|
||||
recs.append(
|
||||
"네이버 백과사전에 등록되어 있지 않습니다. 한국 시장 SEO를 위해 "
|
||||
"네이버 백과사전 등재를 검토하세요."
|
||||
)
|
||||
|
||||
if result.naver.knowledge_in_count < 5:
|
||||
recs.append(
|
||||
"네이버 지식iN에 관련 콘텐츠가 부족합니다. Q&A 콘텐츠를 통해 "
|
||||
"브랜드 엔티티 인지도를 높이세요."
|
||||
)
|
||||
|
||||
# Check social profile completeness
|
||||
attr_map = {a.name: a for a in kp.attributes}
|
||||
missing_social = []
|
||||
for soc in ["social_twitter", "social_facebook", "social_linkedin", "social_youtube"]:
|
||||
attr = attr_map.get(soc)
|
||||
if not attr or not attr.present:
|
||||
missing_social.append(soc.replace("social_", "").title())
|
||||
if missing_social:
|
||||
recs.append(
|
||||
f"소셜 프로필 연결 누락: {', '.join(missing_social)}. "
|
||||
"웹사이트 schema의 sameAs 속성에 소셜 프로필을 추가하세요."
|
||||
)
|
||||
|
||||
if not recs:
|
||||
recs.append("Knowledge Graph 엔티티 상태가 양호합니다. 현재 수준을 유지하세요.")
|
||||
|
||||
return recs
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Main orchestrator
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
async def analyze(
|
||||
self,
|
||||
entity_name: str,
|
||||
language: str = "en",
|
||||
include_wiki: bool = True,
|
||||
include_naver: bool = True,
|
||||
) -> KnowledgeGraphResult:
|
||||
"""Orchestrate full Knowledge Graph analysis."""
|
||||
result = KnowledgeGraphResult(entity=entity_name, language=language)
|
||||
logger.info("Starting Knowledge Graph analysis for '%s' (lang=%s)", entity_name, language)
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
# Step 1: Search entity on Google
|
||||
search_data = await self.search_entity(entity_name, language, session)
|
||||
|
||||
# Step 2: Detect Knowledge Panel
|
||||
kp = self.detect_knowledge_panel(search_data)
|
||||
|
||||
# Step 3: Extract attributes
|
||||
if kp.detected:
|
||||
kp.attributes = self.extract_attributes(kp, search_data.get("html", ""))
|
||||
kp.completeness_score = self.score_completeness(kp.attributes)
|
||||
|
||||
# Detect entity type from attributes
|
||||
for attr in kp.attributes:
|
||||
if attr.name == "type" and attr.present:
|
||||
kp.entity_type = attr.value
|
||||
break
|
||||
|
||||
result.knowledge_panel = kp
|
||||
|
||||
# Step 4: Wikipedia and Wikidata checks (parallel)
|
||||
if include_wiki:
|
||||
wiki_task = self.check_wikipedia(entity_name, language, session)
|
||||
wikidata_task = self.check_wikidata(entity_name, session)
|
||||
result.wikipedia, result.wikidata = await asyncio.gather(wiki_task, wikidata_task)
|
||||
|
||||
# Step 5: Naver checks (parallel)
|
||||
if include_naver:
|
||||
enc_task = self.check_naver_encyclopedia(entity_name, session)
|
||||
kin_task = self.check_naver_knowledge_in(entity_name, session)
|
||||
enc_result, kin_result = await asyncio.gather(enc_task, kin_task)
|
||||
|
||||
result.naver = NaverPresence(
|
||||
encyclopedia_present=enc_result.get("present", False),
|
||||
encyclopedia_url=enc_result.get("url"),
|
||||
knowledge_in_present=kin_result.get("present", False),
|
||||
knowledge_in_count=kin_result.get("count", 0),
|
||||
knowledge_in_url=kin_result.get("url"),
|
||||
)
|
||||
|
||||
# Step 6: Compute overall score
|
||||
scores = []
|
||||
if kp.detected:
|
||||
scores.append(kp.completeness_score * 0.35)
|
||||
else:
|
||||
scores.append(0)
|
||||
scores.append(20.0 if result.wikipedia.present else 0)
|
||||
scores.append(15.0 if result.wikidata.present else 0)
|
||||
scores.append(15.0 if result.naver.encyclopedia_present else 0)
|
||||
scores.append(15.0 if result.naver.knowledge_in_present else 0)
|
||||
result.overall_score = round(sum(scores), 1)
|
||||
|
||||
# Step 7: Recommendations
|
||||
result.recommendations = self.generate_recommendations(result)
|
||||
|
||||
logger.info("Analysis complete. Overall score: %.1f", result.overall_score)
|
||||
return result
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI display helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def display_result(result: KnowledgeGraphResult) -> None:
|
||||
"""Display analysis result in a rich table."""
|
||||
console.print()
|
||||
console.print(f"[bold cyan]Knowledge Graph Analysis: {result.entity}[/bold cyan]")
|
||||
console.print(f"Language: {result.language} | Score: {result.overall_score}/100")
|
||||
console.print()
|
||||
|
||||
# Knowledge Panel table
|
||||
kp = result.knowledge_panel
|
||||
table = Table(title="Knowledge Panel", show_header=True)
|
||||
table.add_column("Property", style="bold")
|
||||
table.add_column("Value")
|
||||
table.add_column("Status")
|
||||
|
||||
table.add_row("Detected", str(kp.detected), "[green]OK[/]" if kp.detected else "[red]Missing[/]")
|
||||
table.add_row("Entity Type", kp.entity_type or "-", "[green]OK[/]" if kp.entity_type else "[yellow]Unknown[/]")
|
||||
table.add_row("Completeness", f"{kp.completeness_score}%", "[green]OK[/]" if kp.completeness_score >= 50 else "[red]Low[/]")
|
||||
|
||||
for attr in kp.attributes:
|
||||
status = "[green]Present[/]" if attr.present else "[red]Missing[/]"
|
||||
table.add_row(f" {attr.name}", attr.value or "-", status)
|
||||
|
||||
console.print(table)
|
||||
console.print()
|
||||
|
||||
# Platform presence table
|
||||
plat_table = Table(title="Platform Presence", show_header=True)
|
||||
plat_table.add_column("Platform", style="bold")
|
||||
plat_table.add_column("Present")
|
||||
plat_table.add_column("Details")
|
||||
|
||||
plat_table.add_row(
|
||||
"Wikipedia",
|
||||
"[green]Yes[/]" if result.wikipedia.present else "[red]No[/]",
|
||||
result.wikipedia.url or "-",
|
||||
)
|
||||
plat_table.add_row(
|
||||
"Wikidata",
|
||||
"[green]Yes[/]" if result.wikidata.present else "[red]No[/]",
|
||||
result.wikidata.qid or "-",
|
||||
)
|
||||
plat_table.add_row(
|
||||
"Naver Encyclopedia",
|
||||
"[green]Yes[/]" if result.naver.encyclopedia_present else "[red]No[/]",
|
||||
result.naver.encyclopedia_url or "-",
|
||||
)
|
||||
plat_table.add_row(
|
||||
"Naver 지식iN",
|
||||
"[green]Yes[/]" if result.naver.knowledge_in_present else "[red]No[/]",
|
||||
f"{result.naver.knowledge_in_count} entries" if result.naver.knowledge_in_present else "-",
|
||||
)
|
||||
|
||||
console.print(plat_table)
|
||||
console.print()
|
||||
|
||||
# Recommendations
|
||||
console.print("[bold yellow]Recommendations:[/bold yellow]")
|
||||
for i, rec in enumerate(result.recommendations, 1):
|
||||
console.print(f" {i}. {rec}")
|
||||
console.print()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Main
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def parse_args() -> argparse.Namespace:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Knowledge Graph & Entity Analyzer",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
)
|
||||
parser.add_argument("--entity", required=True, help="Entity name to analyze")
|
||||
parser.add_argument("--language", default="en", choices=["en", "ko", "ja", "zh"], help="Language (default: en)")
|
||||
parser.add_argument("--wiki", action="store_true", default=True, help="Include Wikipedia/Wikidata check (default: True)")
|
||||
parser.add_argument("--no-wiki", action="store_true", help="Skip Wikipedia/Wikidata check")
|
||||
parser.add_argument("--no-naver", action="store_true", help="Skip Naver checks")
|
||||
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
||||
parser.add_argument("--output", type=str, help="Output file path")
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
async def main() -> None:
|
||||
args = parse_args()
|
||||
|
||||
analyzer = KnowledgeGraphAnalyzer()
|
||||
result = await analyzer.analyze(
|
||||
entity_name=args.entity,
|
||||
language=args.language,
|
||||
include_wiki=not args.no_wiki,
|
||||
include_naver=not args.no_naver,
|
||||
)
|
||||
|
||||
if args.json:
|
||||
output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
|
||||
if args.output:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
f.write(output)
|
||||
console.print(f"[green]Output saved to {args.output}[/green]")
|
||||
else:
|
||||
print(output)
|
||||
else:
|
||||
display_result(result)
|
||||
if args.output:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
json.dump(result.to_dict(), f, ensure_ascii=False, indent=2)
|
||||
console.print(f"[green]Output saved to {args.output}[/green]")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user