12 new skills: Keyword Strategy, SERP Analysis, Position Tracking, Link Building, Content Strategy, E-Commerce SEO, KPI Framework, International SEO, AI Visibility, Knowledge Graph, Competitor Intel, and Crawl Budget. ~20K lines of Python across 25 domain scripts. Updated skill 11 pipeline table and repo CLAUDE.md. Enhanced skill 18 local SEO workflow from jamie.clinic audit. Note: Skill 26 hreflang_validator.py pending (content filter block). Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
783 lines
30 KiB
Python
783 lines
30 KiB
Python
"""
|
|
Knowledge Graph Analyzer
|
|
=========================
|
|
Purpose: Analyze entity presence in Google Knowledge Graph, Knowledge Panels,
|
|
Wikipedia, Wikidata, and Korean equivalents (Naver encyclopedia, 지식iN).
|
|
Python: 3.10+
|
|
"""
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import logging
|
|
import re
|
|
import sys
|
|
from dataclasses import asdict, dataclass, field
|
|
from datetime import datetime
|
|
from typing import Any
|
|
from urllib.parse import quote, urljoin
|
|
|
|
import aiohttp
|
|
from bs4 import BeautifulSoup
|
|
from rich.console import Console
|
|
from rich.table import Table
|
|
|
|
from base_client import BaseAsyncClient, ConfigManager, config
|
|
|
|
logger = logging.getLogger(__name__)
|
|
console = Console()
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Data classes
|
|
# ---------------------------------------------------------------------------
|
|
|
|
EXPECTED_ATTRIBUTES = [
|
|
"name",
|
|
"type",
|
|
"description",
|
|
"logo",
|
|
"website",
|
|
"founded",
|
|
"ceo",
|
|
"headquarters",
|
|
"parent_organization",
|
|
"subsidiaries",
|
|
"social_twitter",
|
|
"social_facebook",
|
|
"social_linkedin",
|
|
"social_youtube",
|
|
"social_instagram",
|
|
"stock_ticker",
|
|
"industry",
|
|
"employees",
|
|
"revenue",
|
|
]
|
|
|
|
|
|
@dataclass
|
|
class KnowledgePanelAttribute:
|
|
"""Single attribute extracted from a Knowledge Panel."""
|
|
name: str
|
|
value: str | None = None
|
|
present: bool = False
|
|
|
|
|
|
@dataclass
|
|
class KnowledgePanel:
|
|
"""Represents a detected Knowledge Panel."""
|
|
detected: bool = False
|
|
entity_type: str | None = None
|
|
attributes: list[KnowledgePanelAttribute] = field(default_factory=list)
|
|
completeness_score: float = 0.0
|
|
raw_snippet: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class WikiPresence:
|
|
"""Wikipedia or Wikidata presence record."""
|
|
platform: str = "" # "wikipedia" or "wikidata"
|
|
present: bool = False
|
|
url: str | None = None
|
|
qid: str | None = None # Wikidata QID (e.g. Q20710)
|
|
language: str = "en"
|
|
|
|
|
|
@dataclass
|
|
class NaverPresence:
|
|
"""Naver encyclopedia and 지식iN presence."""
|
|
encyclopedia_present: bool = False
|
|
encyclopedia_url: str | None = None
|
|
knowledge_in_present: bool = False
|
|
knowledge_in_count: int = 0
|
|
knowledge_in_url: str | None = None
|
|
|
|
|
|
@dataclass
|
|
class KnowledgeGraphResult:
|
|
"""Full Knowledge Graph analysis result."""
|
|
entity: str = ""
|
|
language: str = "en"
|
|
knowledge_panel: KnowledgePanel = field(default_factory=KnowledgePanel)
|
|
wikipedia: WikiPresence = field(default_factory=lambda: WikiPresence(platform="wikipedia"))
|
|
wikidata: WikiPresence = field(default_factory=lambda: WikiPresence(platform="wikidata"))
|
|
naver: NaverPresence = field(default_factory=NaverPresence)
|
|
competitors: list[dict[str, Any]] = field(default_factory=list)
|
|
overall_score: float = 0.0
|
|
recommendations: list[str] = field(default_factory=list)
|
|
timestamp: str = field(default_factory=lambda: datetime.now().isoformat())
|
|
|
|
def to_dict(self) -> dict[str, Any]:
|
|
return asdict(self)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Knowledge Graph Analyzer
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class KnowledgeGraphAnalyzer(BaseAsyncClient):
|
|
"""Analyze entity presence in Knowledge Graph and related platforms."""
|
|
|
|
GOOGLE_SEARCH_URL = "https://www.google.com/search"
|
|
WIKIPEDIA_API_URL = "https://{lang}.wikipedia.org/api/rest_v1/page/summary/{title}"
|
|
WIKIDATA_API_URL = "https://www.wikidata.org/w/api.php"
|
|
NAVER_SEARCH_URL = "https://search.naver.com/search.naver"
|
|
NAVER_ENCYCLOPEDIA_URL = "https://terms.naver.com/search.naver"
|
|
NAVER_KIN_URL = "https://kin.naver.com/search/list.naver"
|
|
|
|
HEADERS = {
|
|
"User-Agent": (
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
|
|
"AppleWebKit/537.36 (KHTML, like Gecko) "
|
|
"Chrome/120.0.0.0 Safari/537.36"
|
|
),
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
}
|
|
|
|
def __init__(self, **kwargs):
|
|
super().__init__(**kwargs)
|
|
self.config = config
|
|
|
|
# ------------------------------------------------------------------
|
|
# Google entity search
|
|
# ------------------------------------------------------------------
|
|
|
|
async def search_entity(
|
|
self,
|
|
entity_name: str,
|
|
language: str = "en",
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> dict[str, Any]:
|
|
"""Search Google for entity to detect Knowledge Panel signals."""
|
|
params = {"q": entity_name, "hl": language, "gl": "us" if language == "en" else "kr"}
|
|
headers = {**self.HEADERS}
|
|
if language == "ko":
|
|
headers["Accept-Language"] = "ko-KR,ko;q=0.9"
|
|
params["gl"] = "kr"
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
async with session.get(
|
|
self.GOOGLE_SEARCH_URL, params=params, headers=headers, timeout=aiohttp.ClientTimeout(total=20)
|
|
) as resp:
|
|
if resp.status != 200:
|
|
logger.warning("Google search returned status %d", resp.status)
|
|
return {"html": "", "status": resp.status}
|
|
html = await resp.text()
|
|
return {"html": html, "status": resp.status}
|
|
except Exception as exc:
|
|
logger.error("Google search failed: %s", exc)
|
|
return {"html": "", "status": 0, "error": str(exc)}
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
# ------------------------------------------------------------------
|
|
# Knowledge Panel detection
|
|
# ------------------------------------------------------------------
|
|
|
|
def detect_knowledge_panel(self, search_data: dict[str, Any]) -> KnowledgePanel:
|
|
"""Parse search results HTML for Knowledge Panel indicators."""
|
|
html = search_data.get("html", "")
|
|
if not html:
|
|
return KnowledgePanel(detected=False)
|
|
|
|
soup = BeautifulSoup(html, "lxml")
|
|
kp = KnowledgePanel()
|
|
|
|
# Knowledge Panel is typically in a div with class 'kp-wholepage' or 'knowledge-panel'
|
|
kp_selectors = [
|
|
"div.kp-wholepage",
|
|
"div.knowledge-panel",
|
|
"div[data-attrid='title']",
|
|
"div.kp-header",
|
|
"div[class*='kno-']",
|
|
"div.osrp-blk",
|
|
]
|
|
|
|
kp_element = None
|
|
for selector in kp_selectors:
|
|
kp_element = soup.select_one(selector)
|
|
if kp_element:
|
|
break
|
|
|
|
if kp_element:
|
|
kp.detected = True
|
|
kp.raw_snippet = kp_element.get_text(separator=" ", strip=True)[:500]
|
|
else:
|
|
# Fallback: check for common KP text patterns
|
|
text = soup.get_text(separator=" ", strip=True).lower()
|
|
kp_indicators = [
|
|
"wikipedia", "description", "founded", "ceo",
|
|
"headquarters", "subsidiaries", "parent organization",
|
|
]
|
|
matches = sum(1 for ind in kp_indicators if ind in text)
|
|
if matches >= 3:
|
|
kp.detected = True
|
|
kp.raw_snippet = text[:500]
|
|
|
|
return kp
|
|
|
|
# ------------------------------------------------------------------
|
|
# Attribute extraction
|
|
# ------------------------------------------------------------------
|
|
|
|
def extract_attributes(self, kp: KnowledgePanel, html: str = "") -> list[KnowledgePanelAttribute]:
|
|
"""Extract entity attributes from Knowledge Panel data."""
|
|
attributes: list[KnowledgePanelAttribute] = []
|
|
text = (kp.raw_snippet or "").lower()
|
|
|
|
# Parse HTML for structured attribute data
|
|
soup = BeautifulSoup(html, "lxml") if html else None
|
|
|
|
attribute_patterns = {
|
|
"name": r"^(.+?)(?:\s+is\s+|\s*[-|]\s*)",
|
|
"type": r"(?:is\s+(?:a|an)\s+)(\w[\w\s]+?)(?:\.|,|\s+based)",
|
|
"description": r"(?:is\s+)(.{20,200}?)(?:\.\s)",
|
|
"founded": r"(?:founded|established|incorporated)\s*(?:in|:)?\s*(\d{4})",
|
|
"ceo": r"(?:ceo|chief executive|chairman)\s*(?::|is)?\s*([A-Z][\w\s.]+?)(?:,|\.|;|\s{2})",
|
|
"headquarters": r"(?:headquarters?|hq|based in)\s*(?::|is|in)?\s*([A-Z][\w\s,]+?)(?:\.|;|\s{2})",
|
|
"stock_ticker": r"(?:stock|ticker|symbol)\s*(?::|is)?\s*([A-Z]{1,5}(?:\s*:\s*[A-Z]{1,5})?)",
|
|
"employees": r"(?:employees?|staff|workforce)\s*(?::|is)?\s*([\d,]+)",
|
|
"revenue": r"(?:revenue|sales)\s*(?::|is)?\s*([\$\d,.]+\s*(?:billion|million|B|M)?)",
|
|
"industry": r"(?:industry|sector)\s*(?::|is)?\s*([\w\s&]+?)(?:\.|,|;)",
|
|
}
|
|
|
|
social_patterns = {
|
|
"social_twitter": r"(?:twitter\.com|x\.com)/(\w+)",
|
|
"social_facebook": r"facebook\.com/([\w.]+)",
|
|
"social_linkedin": r"linkedin\.com/(?:company|in)/([\w-]+)",
|
|
"social_youtube": r"youtube\.com/(?:@|channel/|user/)([\w-]+)",
|
|
"social_instagram": r"instagram\.com/([\w.]+)",
|
|
}
|
|
|
|
full_text = kp.raw_snippet or ""
|
|
html_text = ""
|
|
if soup:
|
|
html_text = soup.get_text(separator=" ", strip=True)
|
|
|
|
combined = f"{full_text} {html_text}"
|
|
|
|
for attr_name, pattern in attribute_patterns.items():
|
|
match = re.search(pattern, combined, re.IGNORECASE)
|
|
present = match is not None
|
|
value = match.group(1).strip() if match else None
|
|
attributes.append(KnowledgePanelAttribute(name=attr_name, value=value, present=present))
|
|
|
|
# Social profiles
|
|
for attr_name, pattern in social_patterns.items():
|
|
match = re.search(pattern, combined, re.IGNORECASE)
|
|
present = match is not None
|
|
value = match.group(1).strip() if match else None
|
|
attributes.append(KnowledgePanelAttribute(name=attr_name, value=value, present=present))
|
|
|
|
# Logo detection from HTML
|
|
logo_present = False
|
|
if soup:
|
|
logo_img = soup.select_one("img[data-atf], g-img img, img.kno-fb-img, img[alt*='logo']")
|
|
if logo_img:
|
|
logo_present = True
|
|
attributes.append(KnowledgePanelAttribute(name="logo", value=None, present=logo_present))
|
|
|
|
# Website detection
|
|
website_present = False
|
|
if soup:
|
|
site_link = soup.select_one("a[data-attrid*='website'], a.ab_button[href*='http']")
|
|
if site_link:
|
|
website_present = True
|
|
value = site_link.get("href", "")
|
|
attributes.append(KnowledgePanelAttribute(name="website", value=value if website_present else None, present=website_present))
|
|
|
|
return attributes
|
|
|
|
# ------------------------------------------------------------------
|
|
# Completeness scoring
|
|
# ------------------------------------------------------------------
|
|
|
|
def score_completeness(self, attributes: list[KnowledgePanelAttribute]) -> float:
|
|
"""Score attribute completeness (0-100) based on filled vs expected."""
|
|
if not attributes:
|
|
return 0.0
|
|
|
|
weights = {
|
|
"name": 10, "type": 8, "description": 10, "logo": 8, "website": 10,
|
|
"founded": 5, "ceo": 5, "headquarters": 5, "parent_organization": 3,
|
|
"subsidiaries": 3, "social_twitter": 4, "social_facebook": 4,
|
|
"social_linkedin": 4, "social_youtube": 3, "social_instagram": 3,
|
|
"stock_ticker": 3, "industry": 5, "employees": 3, "revenue": 4,
|
|
}
|
|
|
|
total_weight = sum(weights.values())
|
|
earned = 0.0
|
|
|
|
attr_map = {a.name: a for a in attributes}
|
|
for attr_name, weight in weights.items():
|
|
attr = attr_map.get(attr_name)
|
|
if attr and attr.present:
|
|
earned += weight
|
|
|
|
return round((earned / total_weight) * 100, 1) if total_weight > 0 else 0.0
|
|
|
|
# ------------------------------------------------------------------
|
|
# Wikipedia check
|
|
# ------------------------------------------------------------------
|
|
|
|
async def check_wikipedia(
|
|
self,
|
|
entity_name: str,
|
|
language: str = "en",
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> WikiPresence:
|
|
"""Check Wikipedia article existence for entity."""
|
|
wiki = WikiPresence(platform="wikipedia", language=language)
|
|
title = entity_name.replace(" ", "_")
|
|
url = self.WIKIPEDIA_API_URL.format(lang=language, title=quote(title))
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
async with session.get(url, headers=self.HEADERS, timeout=aiohttp.ClientTimeout(total=15)) as resp:
|
|
if resp.status == 200:
|
|
data = await resp.json()
|
|
wiki.present = data.get("type") != "disambiguation"
|
|
wiki.url = data.get("content_urls", {}).get("desktop", {}).get("page", "")
|
|
if not wiki.url:
|
|
wiki.url = f"https://{language}.wikipedia.org/wiki/{quote(title)}"
|
|
logger.info("Wikipedia article found for '%s' (%s)", entity_name, language)
|
|
elif resp.status == 404:
|
|
wiki.present = False
|
|
logger.info("No Wikipedia article for '%s' (%s)", entity_name, language)
|
|
else:
|
|
logger.warning("Wikipedia API returned status %d", resp.status)
|
|
except Exception as exc:
|
|
logger.error("Wikipedia check failed: %s", exc)
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
return wiki
|
|
|
|
# ------------------------------------------------------------------
|
|
# Wikidata check
|
|
# ------------------------------------------------------------------
|
|
|
|
async def check_wikidata(
|
|
self,
|
|
entity_name: str,
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> WikiPresence:
|
|
"""Check Wikidata QID existence for entity."""
|
|
wiki = WikiPresence(platform="wikidata")
|
|
params = {
|
|
"action": "wbsearchentities",
|
|
"search": entity_name,
|
|
"language": "en",
|
|
"format": "json",
|
|
"limit": 5,
|
|
}
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
async with session.get(
|
|
self.WIKIDATA_API_URL, params=params, headers=self.HEADERS,
|
|
timeout=aiohttp.ClientTimeout(total=15),
|
|
) as resp:
|
|
if resp.status == 200:
|
|
data = await resp.json()
|
|
results = data.get("search", [])
|
|
if results:
|
|
top = results[0]
|
|
wiki.present = True
|
|
wiki.qid = top.get("id", "")
|
|
wiki.url = top.get("concepturi", f"https://www.wikidata.org/wiki/{wiki.qid}")
|
|
logger.info("Wikidata entity found: %s (%s)", wiki.qid, entity_name)
|
|
else:
|
|
wiki.present = False
|
|
logger.info("No Wikidata entity for '%s'", entity_name)
|
|
else:
|
|
logger.warning("Wikidata API returned status %d", resp.status)
|
|
except Exception as exc:
|
|
logger.error("Wikidata check failed: %s", exc)
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
return wiki
|
|
|
|
# ------------------------------------------------------------------
|
|
# Naver encyclopedia
|
|
# ------------------------------------------------------------------
|
|
|
|
async def check_naver_encyclopedia(
|
|
self,
|
|
entity_name: str,
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> dict[str, Any]:
|
|
"""Check Naver encyclopedia (네이버 백과사전) presence."""
|
|
result = {"present": False, "url": None}
|
|
params = {"query": entity_name, "searchType": 0}
|
|
headers = {
|
|
**self.HEADERS,
|
|
"Accept-Language": "ko-KR,ko;q=0.9",
|
|
}
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
async with session.get(
|
|
self.NAVER_ENCYCLOPEDIA_URL, params=params, headers=headers,
|
|
timeout=aiohttp.ClientTimeout(total=15),
|
|
) as resp:
|
|
if resp.status == 200:
|
|
html = await resp.text()
|
|
soup = BeautifulSoup(html, "lxml")
|
|
# Look for search result entries
|
|
entries = soup.select("ul.content_list li, div.search_result a, a.title")
|
|
if entries:
|
|
result["present"] = True
|
|
first_link = entries[0].find("a")
|
|
if first_link and first_link.get("href"):
|
|
href = first_link["href"]
|
|
if not href.startswith("http"):
|
|
href = urljoin("https://terms.naver.com", href)
|
|
result["url"] = href
|
|
else:
|
|
result["url"] = f"https://terms.naver.com/search.naver?query={quote(entity_name)}"
|
|
logger.info("Naver encyclopedia entry found for '%s'", entity_name)
|
|
else:
|
|
# Fallback: check page text for result indicators
|
|
text = soup.get_text()
|
|
if entity_name in text and "검색결과가 없습니다" not in text:
|
|
result["present"] = True
|
|
result["url"] = f"https://terms.naver.com/search.naver?query={quote(entity_name)}"
|
|
else:
|
|
logger.warning("Naver encyclopedia returned status %d", resp.status)
|
|
except Exception as exc:
|
|
logger.error("Naver encyclopedia check failed: %s", exc)
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
return result
|
|
|
|
# ------------------------------------------------------------------
|
|
# Naver knowledge iN
|
|
# ------------------------------------------------------------------
|
|
|
|
async def check_naver_knowledge_in(
|
|
self,
|
|
entity_name: str,
|
|
session: aiohttp.ClientSession | None = None,
|
|
) -> dict[str, Any]:
|
|
"""Check Naver knowledge iN (지식iN) entries."""
|
|
result = {"present": False, "count": 0, "url": None}
|
|
params = {"query": entity_name}
|
|
headers = {
|
|
**self.HEADERS,
|
|
"Accept-Language": "ko-KR,ko;q=0.9",
|
|
}
|
|
|
|
own_session = session is None
|
|
if own_session:
|
|
session = aiohttp.ClientSession()
|
|
|
|
try:
|
|
async with session.get(
|
|
self.NAVER_KIN_URL, params=params, headers=headers,
|
|
timeout=aiohttp.ClientTimeout(total=15),
|
|
) as resp:
|
|
if resp.status == 200:
|
|
html = await resp.text()
|
|
soup = BeautifulSoup(html, "lxml")
|
|
|
|
# Extract total result count
|
|
count_el = soup.select_one("span.number, em.total_count, span.result_count")
|
|
count = 0
|
|
if count_el:
|
|
count_text = count_el.get_text(strip=True).replace(",", "")
|
|
count_match = re.search(r"(\d+)", count_text)
|
|
if count_match:
|
|
count = int(count_match.group(1))
|
|
|
|
# Also check for list items
|
|
entries = soup.select("ul.basic1 li, ul._list li, div.search_list li")
|
|
if count > 0 or entries:
|
|
result["present"] = True
|
|
result["count"] = count if count > 0 else len(entries)
|
|
result["url"] = f"https://kin.naver.com/search/list.naver?query={quote(entity_name)}"
|
|
logger.info("Naver 지식iN: %d entries for '%s'", result["count"], entity_name)
|
|
else:
|
|
logger.info("No Naver 지식iN entries for '%s'", entity_name)
|
|
else:
|
|
logger.warning("Naver 지식iN returned status %d", resp.status)
|
|
except Exception as exc:
|
|
logger.error("Naver 지식iN check failed: %s", exc)
|
|
finally:
|
|
if own_session:
|
|
await session.close()
|
|
|
|
return result
|
|
|
|
# ------------------------------------------------------------------
|
|
# Recommendations
|
|
# ------------------------------------------------------------------
|
|
|
|
def generate_recommendations(self, result: KnowledgeGraphResult) -> list[str]:
|
|
"""Generate actionable recommendations based on analysis."""
|
|
recs: list[str] = []
|
|
|
|
kp = result.knowledge_panel
|
|
if not kp.detected:
|
|
recs.append(
|
|
"Knowledge Panel이 감지되지 않았습니다. Google에 엔티티 등록을 위해 "
|
|
"Wikipedia 페이지 생성, Wikidata 항목 추가, 구조화된 데이터(Organization schema) 구현을 권장합니다."
|
|
)
|
|
elif kp.completeness_score < 50:
|
|
recs.append(
|
|
f"Knowledge Panel 완성도가 {kp.completeness_score}%로 낮습니다. "
|
|
"누락된 속성(소셜 프로필, 설명, 로고 등)을 보강하세요."
|
|
)
|
|
|
|
if not result.wikipedia.present:
|
|
recs.append(
|
|
"Wikipedia 문서가 없습니다. 주목할 만한 출처(reliable sources)를 확보한 후 "
|
|
"Wikipedia 문서 생성을 고려하세요."
|
|
)
|
|
|
|
if not result.wikidata.present:
|
|
recs.append(
|
|
"Wikidata 항목이 없습니다. Wikidata에 엔티티를 등록하여 "
|
|
"Knowledge Graph 인식을 강화하세요."
|
|
)
|
|
|
|
if not result.naver.encyclopedia_present:
|
|
recs.append(
|
|
"네이버 백과사전에 등록되어 있지 않습니다. 한국 시장 SEO를 위해 "
|
|
"네이버 백과사전 등재를 검토하세요."
|
|
)
|
|
|
|
if result.naver.knowledge_in_count < 5:
|
|
recs.append(
|
|
"네이버 지식iN에 관련 콘텐츠가 부족합니다. Q&A 콘텐츠를 통해 "
|
|
"브랜드 엔티티 인지도를 높이세요."
|
|
)
|
|
|
|
# Check social profile completeness
|
|
attr_map = {a.name: a for a in kp.attributes}
|
|
missing_social = []
|
|
for soc in ["social_twitter", "social_facebook", "social_linkedin", "social_youtube"]:
|
|
attr = attr_map.get(soc)
|
|
if not attr or not attr.present:
|
|
missing_social.append(soc.replace("social_", "").title())
|
|
if missing_social:
|
|
recs.append(
|
|
f"소셜 프로필 연결 누락: {', '.join(missing_social)}. "
|
|
"웹사이트 schema의 sameAs 속성에 소셜 프로필을 추가하세요."
|
|
)
|
|
|
|
if not recs:
|
|
recs.append("Knowledge Graph 엔티티 상태가 양호합니다. 현재 수준을 유지하세요.")
|
|
|
|
return recs
|
|
|
|
# ------------------------------------------------------------------
|
|
# Main orchestrator
|
|
# ------------------------------------------------------------------
|
|
|
|
async def analyze(
|
|
self,
|
|
entity_name: str,
|
|
language: str = "en",
|
|
include_wiki: bool = True,
|
|
include_naver: bool = True,
|
|
) -> KnowledgeGraphResult:
|
|
"""Orchestrate full Knowledge Graph analysis."""
|
|
result = KnowledgeGraphResult(entity=entity_name, language=language)
|
|
logger.info("Starting Knowledge Graph analysis for '%s' (lang=%s)", entity_name, language)
|
|
|
|
async with aiohttp.ClientSession() as session:
|
|
# Step 1: Search entity on Google
|
|
search_data = await self.search_entity(entity_name, language, session)
|
|
|
|
# Step 2: Detect Knowledge Panel
|
|
kp = self.detect_knowledge_panel(search_data)
|
|
|
|
# Step 3: Extract attributes
|
|
if kp.detected:
|
|
kp.attributes = self.extract_attributes(kp, search_data.get("html", ""))
|
|
kp.completeness_score = self.score_completeness(kp.attributes)
|
|
|
|
# Detect entity type from attributes
|
|
for attr in kp.attributes:
|
|
if attr.name == "type" and attr.present:
|
|
kp.entity_type = attr.value
|
|
break
|
|
|
|
result.knowledge_panel = kp
|
|
|
|
# Step 4: Wikipedia and Wikidata checks (parallel)
|
|
if include_wiki:
|
|
wiki_task = self.check_wikipedia(entity_name, language, session)
|
|
wikidata_task = self.check_wikidata(entity_name, session)
|
|
result.wikipedia, result.wikidata = await asyncio.gather(wiki_task, wikidata_task)
|
|
|
|
# Step 5: Naver checks (parallel)
|
|
if include_naver:
|
|
enc_task = self.check_naver_encyclopedia(entity_name, session)
|
|
kin_task = self.check_naver_knowledge_in(entity_name, session)
|
|
enc_result, kin_result = await asyncio.gather(enc_task, kin_task)
|
|
|
|
result.naver = NaverPresence(
|
|
encyclopedia_present=enc_result.get("present", False),
|
|
encyclopedia_url=enc_result.get("url"),
|
|
knowledge_in_present=kin_result.get("present", False),
|
|
knowledge_in_count=kin_result.get("count", 0),
|
|
knowledge_in_url=kin_result.get("url"),
|
|
)
|
|
|
|
# Step 6: Compute overall score
|
|
scores = []
|
|
if kp.detected:
|
|
scores.append(kp.completeness_score * 0.35)
|
|
else:
|
|
scores.append(0)
|
|
scores.append(20.0 if result.wikipedia.present else 0)
|
|
scores.append(15.0 if result.wikidata.present else 0)
|
|
scores.append(15.0 if result.naver.encyclopedia_present else 0)
|
|
scores.append(15.0 if result.naver.knowledge_in_present else 0)
|
|
result.overall_score = round(sum(scores), 1)
|
|
|
|
# Step 7: Recommendations
|
|
result.recommendations = self.generate_recommendations(result)
|
|
|
|
logger.info("Analysis complete. Overall score: %.1f", result.overall_score)
|
|
return result
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI display helpers
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def display_result(result: KnowledgeGraphResult) -> None:
|
|
"""Display analysis result in a rich table."""
|
|
console.print()
|
|
console.print(f"[bold cyan]Knowledge Graph Analysis: {result.entity}[/bold cyan]")
|
|
console.print(f"Language: {result.language} | Score: {result.overall_score}/100")
|
|
console.print()
|
|
|
|
# Knowledge Panel table
|
|
kp = result.knowledge_panel
|
|
table = Table(title="Knowledge Panel", show_header=True)
|
|
table.add_column("Property", style="bold")
|
|
table.add_column("Value")
|
|
table.add_column("Status")
|
|
|
|
table.add_row("Detected", str(kp.detected), "[green]OK[/]" if kp.detected else "[red]Missing[/]")
|
|
table.add_row("Entity Type", kp.entity_type or "-", "[green]OK[/]" if kp.entity_type else "[yellow]Unknown[/]")
|
|
table.add_row("Completeness", f"{kp.completeness_score}%", "[green]OK[/]" if kp.completeness_score >= 50 else "[red]Low[/]")
|
|
|
|
for attr in kp.attributes:
|
|
status = "[green]Present[/]" if attr.present else "[red]Missing[/]"
|
|
table.add_row(f" {attr.name}", attr.value or "-", status)
|
|
|
|
console.print(table)
|
|
console.print()
|
|
|
|
# Platform presence table
|
|
plat_table = Table(title="Platform Presence", show_header=True)
|
|
plat_table.add_column("Platform", style="bold")
|
|
plat_table.add_column("Present")
|
|
plat_table.add_column("Details")
|
|
|
|
plat_table.add_row(
|
|
"Wikipedia",
|
|
"[green]Yes[/]" if result.wikipedia.present else "[red]No[/]",
|
|
result.wikipedia.url or "-",
|
|
)
|
|
plat_table.add_row(
|
|
"Wikidata",
|
|
"[green]Yes[/]" if result.wikidata.present else "[red]No[/]",
|
|
result.wikidata.qid or "-",
|
|
)
|
|
plat_table.add_row(
|
|
"Naver Encyclopedia",
|
|
"[green]Yes[/]" if result.naver.encyclopedia_present else "[red]No[/]",
|
|
result.naver.encyclopedia_url or "-",
|
|
)
|
|
plat_table.add_row(
|
|
"Naver 지식iN",
|
|
"[green]Yes[/]" if result.naver.knowledge_in_present else "[red]No[/]",
|
|
f"{result.naver.knowledge_in_count} entries" if result.naver.knowledge_in_present else "-",
|
|
)
|
|
|
|
console.print(plat_table)
|
|
console.print()
|
|
|
|
# Recommendations
|
|
console.print("[bold yellow]Recommendations:[/bold yellow]")
|
|
for i, rec in enumerate(result.recommendations, 1):
|
|
console.print(f" {i}. {rec}")
|
|
console.print()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Main
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def parse_args() -> argparse.Namespace:
|
|
parser = argparse.ArgumentParser(
|
|
description="Knowledge Graph & Entity Analyzer",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
)
|
|
parser.add_argument("--entity", required=True, help="Entity name to analyze")
|
|
parser.add_argument("--language", default="en", choices=["en", "ko", "ja", "zh"], help="Language (default: en)")
|
|
parser.add_argument("--wiki", action="store_true", default=True, help="Include Wikipedia/Wikidata check (default: True)")
|
|
parser.add_argument("--no-wiki", action="store_true", help="Skip Wikipedia/Wikidata check")
|
|
parser.add_argument("--no-naver", action="store_true", help="Skip Naver checks")
|
|
parser.add_argument("--json", action="store_true", help="Output as JSON")
|
|
parser.add_argument("--output", type=str, help="Output file path")
|
|
return parser.parse_args()
|
|
|
|
|
|
async def main() -> None:
|
|
args = parse_args()
|
|
|
|
analyzer = KnowledgeGraphAnalyzer()
|
|
result = await analyzer.analyze(
|
|
entity_name=args.entity,
|
|
language=args.language,
|
|
include_wiki=not args.no_wiki,
|
|
include_naver=not args.no_naver,
|
|
)
|
|
|
|
if args.json:
|
|
output = json.dumps(result.to_dict(), ensure_ascii=False, indent=2)
|
|
if args.output:
|
|
with open(args.output, "w", encoding="utf-8") as f:
|
|
f.write(output)
|
|
console.print(f"[green]Output saved to {args.output}[/green]")
|
|
else:
|
|
print(output)
|
|
else:
|
|
display_result(result)
|
|
if args.output:
|
|
with open(args.output, "w", encoding="utf-8") as f:
|
|
json.dump(result.to_dict(), f, ensure_ascii=False, indent=2)
|
|
console.print(f"[green]Output saved to {args.output}[/green]")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
asyncio.run(main())
|