""" Schema Generator - Generate JSON-LD structured data markup ========================================================== Purpose: Generate schema.org structured data in JSON-LD format Python: 3.10+ Usage: python schema_generator.py --type organization --name "Company Name" --url "https://example.com" """ import argparse import json import logging import os import re from datetime import datetime from pathlib import Path from typing import Any logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s", ) logger = logging.getLogger(__name__) # Template directory relative to this script TEMPLATE_DIR = Path(__file__).parent.parent / "templates" / "schema_templates" class SchemaGenerator: """Generate JSON-LD schema markup from templates.""" SCHEMA_TYPES = { "organization": "organization.json", "local_business": "local_business.json", "product": "product.json", "article": "article.json", "faq": "faq.json", "breadcrumb": "breadcrumb.json", "website": "website.json", } # Business type mappings for LocalBusiness BUSINESS_TYPES = { "restaurant": "Restaurant", "cafe": "CafeOrCoffeeShop", "bar": "BarOrPub", "hotel": "Hotel", "store": "Store", "medical": "MedicalBusiness", "dental": "Dentist", "legal": "LegalService", "real_estate": "RealEstateAgent", "auto": "AutoRepair", "beauty": "BeautySalon", "gym": "HealthClub", "spa": "DaySpa", } # Article type mappings ARTICLE_TYPES = { "article": "Article", "blog": "BlogPosting", "news": "NewsArticle", "tech": "TechArticle", "scholarly": "ScholarlyArticle", } def __init__(self, template_dir: Path = TEMPLATE_DIR): self.template_dir = template_dir def load_template(self, schema_type: str) -> dict: """Load a schema template file.""" if schema_type not in self.SCHEMA_TYPES: raise ValueError(f"Unknown schema type: {schema_type}. " f"Available: {list(self.SCHEMA_TYPES.keys())}") template_file = self.template_dir / self.SCHEMA_TYPES[schema_type] if not template_file.exists(): raise FileNotFoundError(f"Template not found: {template_file}") with open(template_file, "r", encoding="utf-8") as f: return json.load(f) def fill_template(self, template: dict, data: dict[str, Any]) -> dict: """Fill template placeholders with actual data.""" template_str = json.dumps(template, ensure_ascii=False) # Replace placeholders {{key}} with values for key, value in data.items(): placeholder = f"{{{{{key}}}}}" if value is not None: template_str = template_str.replace(placeholder, str(value)) # Remove unfilled placeholders and their parent objects if empty result = json.loads(template_str) return self._clean_empty_values(result) def _clean_empty_values(self, obj: Any) -> Any: """Remove empty values and unfilled placeholders.""" if isinstance(obj, dict): cleaned = {} for key, value in obj.items(): cleaned_value = self._clean_empty_values(value) # Skip if value is empty, None, or unfilled placeholder if cleaned_value is None: continue if isinstance(cleaned_value, str) and cleaned_value.startswith("{{"): continue if isinstance(cleaned_value, (list, dict)) and not cleaned_value: continue cleaned[key] = cleaned_value return cleaned if cleaned else None elif isinstance(obj, list): cleaned = [] for item in obj: cleaned_item = self._clean_empty_values(item) if cleaned_item is not None: if isinstance(cleaned_item, str) and cleaned_item.startswith("{{"): continue cleaned.append(cleaned_item) return cleaned if cleaned else None elif isinstance(obj, str): if obj.startswith("{{") and obj.endswith("}}"): return None return obj return obj def generate_organization( self, name: str, url: str, logo_url: str | None = None, description: str | None = None, founding_date: str | None = None, phone: str | None = None, address: dict | None = None, social_links: list[str] | None = None, ) -> dict: """Generate Organization schema.""" template = self.load_template("organization") data = { "name": name, "url": url, "logo_url": logo_url, "description": description, "founding_date": founding_date, "phone": phone, } if address: data.update({ "street_address": address.get("street"), "city": address.get("city"), "region": address.get("region"), "postal_code": address.get("postal_code"), "country": address.get("country", "KR"), }) if social_links: # Handle social links specially pass return self.fill_template(template, data) def generate_local_business( self, name: str, business_type: str, address: dict, phone: str | None = None, url: str | None = None, description: str | None = None, hours: dict | None = None, geo: dict | None = None, price_range: str | None = None, rating: float | None = None, review_count: int | None = None, ) -> dict: """Generate LocalBusiness schema.""" template = self.load_template("local_business") schema_business_type = self.BUSINESS_TYPES.get( business_type.lower(), "LocalBusiness" ) data = { "business_type": schema_business_type, "name": name, "url": url, "description": description, "phone": phone, "price_range": price_range, "street_address": address.get("street"), "city": address.get("city"), "region": address.get("region"), "postal_code": address.get("postal_code"), "country": address.get("country", "KR"), } if geo: data["latitude"] = geo.get("lat") data["longitude"] = geo.get("lng") if hours: data.update({ "weekday_opens": hours.get("weekday_opens", "09:00"), "weekday_closes": hours.get("weekday_closes", "18:00"), "weekend_opens": hours.get("weekend_opens"), "weekend_closes": hours.get("weekend_closes"), }) if rating is not None: data["rating"] = str(rating) data["review_count"] = str(review_count or 0) return self.fill_template(template, data) def generate_product( self, name: str, description: str, price: float, currency: str = "KRW", brand: str | None = None, sku: str | None = None, images: list[str] | None = None, availability: str = "InStock", condition: str = "NewCondition", rating: float | None = None, review_count: int | None = None, url: str | None = None, seller: str | None = None, ) -> dict: """Generate Product schema.""" template = self.load_template("product") data = { "name": name, "description": description, "price": str(int(price)), "currency": currency, "brand_name": brand, "sku": sku, "product_url": url, "availability": availability, "condition": condition, "seller_name": seller, } if images: for i, img in enumerate(images[:3], 1): data[f"image_url_{i}"] = img if rating is not None: data["rating"] = str(rating) data["review_count"] = str(review_count or 0) return self.fill_template(template, data) def generate_article( self, headline: str, description: str, author_name: str, date_published: str, publisher_name: str, article_type: str = "article", date_modified: str | None = None, images: list[str] | None = None, page_url: str | None = None, publisher_logo: str | None = None, author_url: str | None = None, section: str | None = None, word_count: int | None = None, keywords: str | None = None, ) -> dict: """Generate Article schema.""" template = self.load_template("article") schema_article_type = self.ARTICLE_TYPES.get( article_type.lower(), "Article" ) data = { "article_type": schema_article_type, "headline": headline, "description": description, "author_name": author_name, "author_url": author_url, "date_published": date_published, "date_modified": date_modified or date_published, "publisher_name": publisher_name, "publisher_logo_url": publisher_logo, "page_url": page_url, "section": section, "word_count": str(word_count) if word_count else None, "keywords": keywords, } if images: for i, img in enumerate(images[:2], 1): data[f"image_url_{i}"] = img return self.fill_template(template, data) def generate_faq(self, questions: list[dict[str, str]]) -> dict: """Generate FAQPage schema.""" schema = { "@context": "https://schema.org", "@type": "FAQPage", "mainEntity": [], } for qa in questions: schema["mainEntity"].append({ "@type": "Question", "name": qa["question"], "acceptedAnswer": { "@type": "Answer", "text": qa["answer"], }, }) return schema def generate_breadcrumb(self, items: list[dict[str, str]]) -> dict: """Generate BreadcrumbList schema.""" schema = { "@context": "https://schema.org", "@type": "BreadcrumbList", "itemListElement": [], } for i, item in enumerate(items, 1): schema["itemListElement"].append({ "@type": "ListItem", "position": i, "name": item["name"], "item": item["url"], }) return schema def generate_website( self, name: str, url: str, search_url_template: str | None = None, description: str | None = None, language: str = "ko-KR", publisher_name: str | None = None, logo_url: str | None = None, alternate_name: str | None = None, ) -> dict: """Generate WebSite schema.""" template = self.load_template("website") data = { "site_name": name, "url": url, "description": description, "language": language, "search_url_template": search_url_template, "publisher_name": publisher_name or name, "logo_url": logo_url, "alternate_name": alternate_name, } return self.fill_template(template, data) def to_json_ld(self, schema: dict, pretty: bool = True) -> str: """Convert schema dict to JSON-LD string.""" indent = 2 if pretty else None return json.dumps(schema, ensure_ascii=False, indent=indent) def to_html_script(self, schema: dict) -> str: """Wrap schema in HTML script tag.""" json_ld = self.to_json_ld(schema) return f'' def main(): """Main entry point for CLI usage.""" parser = argparse.ArgumentParser( description="Generate JSON-LD schema markup", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" Examples: # Generate Organization schema python schema_generator.py --type organization --name "My Company" --url "https://example.com" # Generate Product schema python schema_generator.py --type product --name "Widget" --price 29900 --currency KRW # Generate Article schema python schema_generator.py --type article --headline "Article Title" --author "John Doe" """, ) parser.add_argument( "--type", "-t", required=True, choices=SchemaGenerator.SCHEMA_TYPES.keys(), help="Schema type to generate", ) parser.add_argument("--name", help="Name/title") parser.add_argument("--url", help="URL") parser.add_argument("--description", help="Description") parser.add_argument("--price", type=float, help="Price (for product)") parser.add_argument("--currency", default="KRW", help="Currency code") parser.add_argument("--headline", help="Headline (for article)") parser.add_argument("--author", help="Author name") parser.add_argument("--output", "-o", help="Output file path") parser.add_argument("--html", action="store_true", help="Output as HTML script tag") args = parser.parse_args() generator = SchemaGenerator() try: if args.type == "organization": schema = generator.generate_organization( name=args.name or "Organization Name", url=args.url or "https://example.com", description=args.description, ) elif args.type == "product": schema = generator.generate_product( name=args.name or "Product Name", description=args.description or "Product description", price=args.price or 0, currency=args.currency, ) elif args.type == "article": schema = generator.generate_article( headline=args.headline or args.name or "Article Title", description=args.description or "Article description", author_name=args.author or "Author", date_published=datetime.now().strftime("%Y-%m-%d"), publisher_name="Publisher", ) elif args.type == "website": schema = generator.generate_website( name=args.name or "Website Name", url=args.url or "https://example.com", description=args.description, ) elif args.type == "faq": # Example FAQ schema = generator.generate_faq([ {"question": "Question 1?", "answer": "Answer 1"}, {"question": "Question 2?", "answer": "Answer 2"}, ]) elif args.type == "breadcrumb": # Example breadcrumb schema = generator.generate_breadcrumb([ {"name": "Home", "url": "https://example.com/"}, {"name": "Category", "url": "https://example.com/category/"}, ]) elif args.type == "local_business": schema = generator.generate_local_business( name=args.name or "Business Name", business_type="store", address={"street": "123 Main St", "city": "Seoul", "country": "KR"}, url=args.url, description=args.description, ) else: raise ValueError(f"Unsupported type: {args.type}") if args.html: output = generator.to_html_script(schema) else: output = generator.to_json_ld(schema) if args.output: with open(args.output, "w", encoding="utf-8") as f: f.write(output) logger.info(f"Schema written to {args.output}") else: print(output) except Exception as e: logger.error(f"Error generating schema: {e}") raise if __name__ == "__main__": main()