directory changes and restructuring
🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,490 @@
|
||||
"""
|
||||
Schema Generator - Generate JSON-LD structured data markup
|
||||
==========================================================
|
||||
Purpose: Generate schema.org structured data in JSON-LD format
|
||||
Python: 3.10+
|
||||
Usage:
|
||||
python schema_generator.py --type organization --name "Company Name" --url "https://example.com"
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s - %(levelname)s - %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Template directory relative to this script
|
||||
TEMPLATE_DIR = Path(__file__).parent.parent / "templates" / "schema_templates"
|
||||
|
||||
|
||||
class SchemaGenerator:
|
||||
"""Generate JSON-LD schema markup from templates."""
|
||||
|
||||
SCHEMA_TYPES = {
|
||||
"organization": "organization.json",
|
||||
"local_business": "local_business.json",
|
||||
"product": "product.json",
|
||||
"article": "article.json",
|
||||
"faq": "faq.json",
|
||||
"breadcrumb": "breadcrumb.json",
|
||||
"website": "website.json",
|
||||
}
|
||||
|
||||
# Business type mappings for LocalBusiness
|
||||
BUSINESS_TYPES = {
|
||||
"restaurant": "Restaurant",
|
||||
"cafe": "CafeOrCoffeeShop",
|
||||
"bar": "BarOrPub",
|
||||
"hotel": "Hotel",
|
||||
"store": "Store",
|
||||
"medical": "MedicalBusiness",
|
||||
"dental": "Dentist",
|
||||
"legal": "LegalService",
|
||||
"real_estate": "RealEstateAgent",
|
||||
"auto": "AutoRepair",
|
||||
"beauty": "BeautySalon",
|
||||
"gym": "HealthClub",
|
||||
"spa": "DaySpa",
|
||||
}
|
||||
|
||||
# Article type mappings
|
||||
ARTICLE_TYPES = {
|
||||
"article": "Article",
|
||||
"blog": "BlogPosting",
|
||||
"news": "NewsArticle",
|
||||
"tech": "TechArticle",
|
||||
"scholarly": "ScholarlyArticle",
|
||||
}
|
||||
|
||||
def __init__(self, template_dir: Path = TEMPLATE_DIR):
|
||||
self.template_dir = template_dir
|
||||
|
||||
def load_template(self, schema_type: str) -> dict:
|
||||
"""Load a schema template file."""
|
||||
if schema_type not in self.SCHEMA_TYPES:
|
||||
raise ValueError(f"Unknown schema type: {schema_type}. "
|
||||
f"Available: {list(self.SCHEMA_TYPES.keys())}")
|
||||
|
||||
template_file = self.template_dir / self.SCHEMA_TYPES[schema_type]
|
||||
if not template_file.exists():
|
||||
raise FileNotFoundError(f"Template not found: {template_file}")
|
||||
|
||||
with open(template_file, "r", encoding="utf-8") as f:
|
||||
return json.load(f)
|
||||
|
||||
def fill_template(self, template: dict, data: dict[str, Any]) -> dict:
|
||||
"""Fill template placeholders with actual data."""
|
||||
template_str = json.dumps(template, ensure_ascii=False)
|
||||
|
||||
# Replace placeholders {{key}} with values
|
||||
for key, value in data.items():
|
||||
placeholder = f"{{{{{key}}}}}"
|
||||
if value is not None:
|
||||
template_str = template_str.replace(placeholder, str(value))
|
||||
|
||||
# Remove unfilled placeholders and their parent objects if empty
|
||||
result = json.loads(template_str)
|
||||
return self._clean_empty_values(result)
|
||||
|
||||
def _clean_empty_values(self, obj: Any) -> Any:
|
||||
"""Remove empty values and unfilled placeholders."""
|
||||
if isinstance(obj, dict):
|
||||
cleaned = {}
|
||||
for key, value in obj.items():
|
||||
cleaned_value = self._clean_empty_values(value)
|
||||
# Skip if value is empty, None, or unfilled placeholder
|
||||
if cleaned_value is None:
|
||||
continue
|
||||
if isinstance(cleaned_value, str) and cleaned_value.startswith("{{"):
|
||||
continue
|
||||
if isinstance(cleaned_value, (list, dict)) and not cleaned_value:
|
||||
continue
|
||||
cleaned[key] = cleaned_value
|
||||
return cleaned if cleaned else None
|
||||
elif isinstance(obj, list):
|
||||
cleaned = []
|
||||
for item in obj:
|
||||
cleaned_item = self._clean_empty_values(item)
|
||||
if cleaned_item is not None:
|
||||
if isinstance(cleaned_item, str) and cleaned_item.startswith("{{"):
|
||||
continue
|
||||
cleaned.append(cleaned_item)
|
||||
return cleaned if cleaned else None
|
||||
elif isinstance(obj, str):
|
||||
if obj.startswith("{{") and obj.endswith("}}"):
|
||||
return None
|
||||
return obj
|
||||
return obj
|
||||
|
||||
def generate_organization(
|
||||
self,
|
||||
name: str,
|
||||
url: str,
|
||||
logo_url: str | None = None,
|
||||
description: str | None = None,
|
||||
founding_date: str | None = None,
|
||||
phone: str | None = None,
|
||||
address: dict | None = None,
|
||||
social_links: list[str] | None = None,
|
||||
) -> dict:
|
||||
"""Generate Organization schema."""
|
||||
template = self.load_template("organization")
|
||||
|
||||
data = {
|
||||
"name": name,
|
||||
"url": url,
|
||||
"logo_url": logo_url,
|
||||
"description": description,
|
||||
"founding_date": founding_date,
|
||||
"phone": phone,
|
||||
}
|
||||
|
||||
if address:
|
||||
data.update({
|
||||
"street_address": address.get("street"),
|
||||
"city": address.get("city"),
|
||||
"region": address.get("region"),
|
||||
"postal_code": address.get("postal_code"),
|
||||
"country": address.get("country", "KR"),
|
||||
})
|
||||
|
||||
if social_links:
|
||||
# Handle social links specially
|
||||
pass
|
||||
|
||||
return self.fill_template(template, data)
|
||||
|
||||
def generate_local_business(
|
||||
self,
|
||||
name: str,
|
||||
business_type: str,
|
||||
address: dict,
|
||||
phone: str | None = None,
|
||||
url: str | None = None,
|
||||
description: str | None = None,
|
||||
hours: dict | None = None,
|
||||
geo: dict | None = None,
|
||||
price_range: str | None = None,
|
||||
rating: float | None = None,
|
||||
review_count: int | None = None,
|
||||
) -> dict:
|
||||
"""Generate LocalBusiness schema."""
|
||||
template = self.load_template("local_business")
|
||||
|
||||
schema_business_type = self.BUSINESS_TYPES.get(
|
||||
business_type.lower(), "LocalBusiness"
|
||||
)
|
||||
|
||||
data = {
|
||||
"business_type": schema_business_type,
|
||||
"name": name,
|
||||
"url": url,
|
||||
"description": description,
|
||||
"phone": phone,
|
||||
"price_range": price_range,
|
||||
"street_address": address.get("street"),
|
||||
"city": address.get("city"),
|
||||
"region": address.get("region"),
|
||||
"postal_code": address.get("postal_code"),
|
||||
"country": address.get("country", "KR"),
|
||||
}
|
||||
|
||||
if geo:
|
||||
data["latitude"] = geo.get("lat")
|
||||
data["longitude"] = geo.get("lng")
|
||||
|
||||
if hours:
|
||||
data.update({
|
||||
"weekday_opens": hours.get("weekday_opens", "09:00"),
|
||||
"weekday_closes": hours.get("weekday_closes", "18:00"),
|
||||
"weekend_opens": hours.get("weekend_opens"),
|
||||
"weekend_closes": hours.get("weekend_closes"),
|
||||
})
|
||||
|
||||
if rating is not None:
|
||||
data["rating"] = str(rating)
|
||||
data["review_count"] = str(review_count or 0)
|
||||
|
||||
return self.fill_template(template, data)
|
||||
|
||||
def generate_product(
|
||||
self,
|
||||
name: str,
|
||||
description: str,
|
||||
price: float,
|
||||
currency: str = "KRW",
|
||||
brand: str | None = None,
|
||||
sku: str | None = None,
|
||||
images: list[str] | None = None,
|
||||
availability: str = "InStock",
|
||||
condition: str = "NewCondition",
|
||||
rating: float | None = None,
|
||||
review_count: int | None = None,
|
||||
url: str | None = None,
|
||||
seller: str | None = None,
|
||||
) -> dict:
|
||||
"""Generate Product schema."""
|
||||
template = self.load_template("product")
|
||||
|
||||
data = {
|
||||
"name": name,
|
||||
"description": description,
|
||||
"price": str(int(price)),
|
||||
"currency": currency,
|
||||
"brand_name": brand,
|
||||
"sku": sku,
|
||||
"product_url": url,
|
||||
"availability": availability,
|
||||
"condition": condition,
|
||||
"seller_name": seller,
|
||||
}
|
||||
|
||||
if images:
|
||||
for i, img in enumerate(images[:3], 1):
|
||||
data[f"image_url_{i}"] = img
|
||||
|
||||
if rating is not None:
|
||||
data["rating"] = str(rating)
|
||||
data["review_count"] = str(review_count or 0)
|
||||
|
||||
return self.fill_template(template, data)
|
||||
|
||||
def generate_article(
|
||||
self,
|
||||
headline: str,
|
||||
description: str,
|
||||
author_name: str,
|
||||
date_published: str,
|
||||
publisher_name: str,
|
||||
article_type: str = "article",
|
||||
date_modified: str | None = None,
|
||||
images: list[str] | None = None,
|
||||
page_url: str | None = None,
|
||||
publisher_logo: str | None = None,
|
||||
author_url: str | None = None,
|
||||
section: str | None = None,
|
||||
word_count: int | None = None,
|
||||
keywords: str | None = None,
|
||||
) -> dict:
|
||||
"""Generate Article schema."""
|
||||
template = self.load_template("article")
|
||||
|
||||
schema_article_type = self.ARTICLE_TYPES.get(
|
||||
article_type.lower(), "Article"
|
||||
)
|
||||
|
||||
data = {
|
||||
"article_type": schema_article_type,
|
||||
"headline": headline,
|
||||
"description": description,
|
||||
"author_name": author_name,
|
||||
"author_url": author_url,
|
||||
"date_published": date_published,
|
||||
"date_modified": date_modified or date_published,
|
||||
"publisher_name": publisher_name,
|
||||
"publisher_logo_url": publisher_logo,
|
||||
"page_url": page_url,
|
||||
"section": section,
|
||||
"word_count": str(word_count) if word_count else None,
|
||||
"keywords": keywords,
|
||||
}
|
||||
|
||||
if images:
|
||||
for i, img in enumerate(images[:2], 1):
|
||||
data[f"image_url_{i}"] = img
|
||||
|
||||
return self.fill_template(template, data)
|
||||
|
||||
def generate_faq(self, questions: list[dict[str, str]]) -> dict:
|
||||
"""Generate FAQPage schema."""
|
||||
schema = {
|
||||
"@context": "https://schema.org",
|
||||
"@type": "FAQPage",
|
||||
"mainEntity": [],
|
||||
}
|
||||
|
||||
for qa in questions:
|
||||
schema["mainEntity"].append({
|
||||
"@type": "Question",
|
||||
"name": qa["question"],
|
||||
"acceptedAnswer": {
|
||||
"@type": "Answer",
|
||||
"text": qa["answer"],
|
||||
},
|
||||
})
|
||||
|
||||
return schema
|
||||
|
||||
def generate_breadcrumb(self, items: list[dict[str, str]]) -> dict:
|
||||
"""Generate BreadcrumbList schema."""
|
||||
schema = {
|
||||
"@context": "https://schema.org",
|
||||
"@type": "BreadcrumbList",
|
||||
"itemListElement": [],
|
||||
}
|
||||
|
||||
for i, item in enumerate(items, 1):
|
||||
schema["itemListElement"].append({
|
||||
"@type": "ListItem",
|
||||
"position": i,
|
||||
"name": item["name"],
|
||||
"item": item["url"],
|
||||
})
|
||||
|
||||
return schema
|
||||
|
||||
def generate_website(
|
||||
self,
|
||||
name: str,
|
||||
url: str,
|
||||
search_url_template: str | None = None,
|
||||
description: str | None = None,
|
||||
language: str = "ko-KR",
|
||||
publisher_name: str | None = None,
|
||||
logo_url: str | None = None,
|
||||
alternate_name: str | None = None,
|
||||
) -> dict:
|
||||
"""Generate WebSite schema."""
|
||||
template = self.load_template("website")
|
||||
|
||||
data = {
|
||||
"site_name": name,
|
||||
"url": url,
|
||||
"description": description,
|
||||
"language": language,
|
||||
"search_url_template": search_url_template,
|
||||
"publisher_name": publisher_name or name,
|
||||
"logo_url": logo_url,
|
||||
"alternate_name": alternate_name,
|
||||
}
|
||||
|
||||
return self.fill_template(template, data)
|
||||
|
||||
def to_json_ld(self, schema: dict, pretty: bool = True) -> str:
|
||||
"""Convert schema dict to JSON-LD string."""
|
||||
indent = 2 if pretty else None
|
||||
return json.dumps(schema, ensure_ascii=False, indent=indent)
|
||||
|
||||
def to_html_script(self, schema: dict) -> str:
|
||||
"""Wrap schema in HTML script tag."""
|
||||
json_ld = self.to_json_ld(schema)
|
||||
return f'<script type="application/ld+json">\n{json_ld}\n</script>'
|
||||
|
||||
|
||||
def main():
|
||||
"""Main entry point for CLI usage."""
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Generate JSON-LD schema markup",
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog="""
|
||||
Examples:
|
||||
# Generate Organization schema
|
||||
python schema_generator.py --type organization --name "My Company" --url "https://example.com"
|
||||
|
||||
# Generate Product schema
|
||||
python schema_generator.py --type product --name "Widget" --price 29900 --currency KRW
|
||||
|
||||
# Generate Article schema
|
||||
python schema_generator.py --type article --headline "Article Title" --author "John Doe"
|
||||
""",
|
||||
)
|
||||
|
||||
parser.add_argument(
|
||||
"--type", "-t",
|
||||
required=True,
|
||||
choices=SchemaGenerator.SCHEMA_TYPES.keys(),
|
||||
help="Schema type to generate",
|
||||
)
|
||||
parser.add_argument("--name", help="Name/title")
|
||||
parser.add_argument("--url", help="URL")
|
||||
parser.add_argument("--description", help="Description")
|
||||
parser.add_argument("--price", type=float, help="Price (for product)")
|
||||
parser.add_argument("--currency", default="KRW", help="Currency code")
|
||||
parser.add_argument("--headline", help="Headline (for article)")
|
||||
parser.add_argument("--author", help="Author name")
|
||||
parser.add_argument("--output", "-o", help="Output file path")
|
||||
parser.add_argument("--html", action="store_true", help="Output as HTML script tag")
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
generator = SchemaGenerator()
|
||||
|
||||
try:
|
||||
if args.type == "organization":
|
||||
schema = generator.generate_organization(
|
||||
name=args.name or "Organization Name",
|
||||
url=args.url or "https://example.com",
|
||||
description=args.description,
|
||||
)
|
||||
elif args.type == "product":
|
||||
schema = generator.generate_product(
|
||||
name=args.name or "Product Name",
|
||||
description=args.description or "Product description",
|
||||
price=args.price or 0,
|
||||
currency=args.currency,
|
||||
)
|
||||
elif args.type == "article":
|
||||
schema = generator.generate_article(
|
||||
headline=args.headline or args.name or "Article Title",
|
||||
description=args.description or "Article description",
|
||||
author_name=args.author or "Author",
|
||||
date_published=datetime.now().strftime("%Y-%m-%d"),
|
||||
publisher_name="Publisher",
|
||||
)
|
||||
elif args.type == "website":
|
||||
schema = generator.generate_website(
|
||||
name=args.name or "Website Name",
|
||||
url=args.url or "https://example.com",
|
||||
description=args.description,
|
||||
)
|
||||
elif args.type == "faq":
|
||||
# Example FAQ
|
||||
schema = generator.generate_faq([
|
||||
{"question": "Question 1?", "answer": "Answer 1"},
|
||||
{"question": "Question 2?", "answer": "Answer 2"},
|
||||
])
|
||||
elif args.type == "breadcrumb":
|
||||
# Example breadcrumb
|
||||
schema = generator.generate_breadcrumb([
|
||||
{"name": "Home", "url": "https://example.com/"},
|
||||
{"name": "Category", "url": "https://example.com/category/"},
|
||||
])
|
||||
elif args.type == "local_business":
|
||||
schema = generator.generate_local_business(
|
||||
name=args.name or "Business Name",
|
||||
business_type="store",
|
||||
address={"street": "123 Main St", "city": "Seoul", "country": "KR"},
|
||||
url=args.url,
|
||||
description=args.description,
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported type: {args.type}")
|
||||
|
||||
if args.html:
|
||||
output = generator.to_html_script(schema)
|
||||
else:
|
||||
output = generator.to_json_ld(schema)
|
||||
|
||||
if args.output:
|
||||
with open(args.output, "w", encoding="utf-8") as f:
|
||||
f.write(output)
|
||||
logger.info(f"Schema written to {args.output}")
|
||||
else:
|
||||
print(output)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error generating schema: {e}")
|
||||
raise
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user