Files
our-claude-skills/ourdigital-custom-skills/20-gtm-audit/code/scripts/gtm_audit.py
Andrew Yim eea49f9f8c refactor(skills): Restructure skills to dual-platform architecture
Major refactoring of ourdigital-custom-skills with new numbering system:

## Structure Changes
- Each skill now has code/ (Claude Code) and desktop/ (Claude Desktop) versions
- New progressive numbering: 01-09 General, 10-19 SEO, 20-29 GTM, 30-39 OurDigital, 40-49 Jamie

## Skill Reorganization
- 01-notion-organizer (from 02)
- 10-18: SEO tools split into focused skills (technical, on-page, local, schema, vitals, gsc, gateway)
- 20-21: GTM audit and manager
- 30-32: OurDigital designer, research, presentation
- 40-41: Jamie brand editor and audit

## New Files
- .claude/commands/: Slash command definitions for all skills
- CLAUDE.md: Updated with new skill structure documentation
- REFACTORING_PLAN.md: Migration documentation
- COMPATIBILITY_REPORT.md, SKILLS_COMPARISON.md: Analysis docs

## Removed
- Old skill directories (02-05, 10-14, 20-21 old numbering)
- Consolidated into new structure with _archive/ for reference

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2025-12-22 01:58:24 +09:00

1114 lines
42 KiB
Python

#!/usr/bin/env python3
"""
GTM Audit Script - Comprehensive Google Tag Manager audit with form tracking,
e-commerce checkout flow, and advanced dataLayer validation.
Usage:
python gtm_audit.py --url "https://example.com" [options]
Options:
--url Target URL to audit (required)
--container Expected GTM container ID (e.g., GTM-XXXXXX)
--journey Journey type: pageview, scroll, click, form, checkout, datalayer, full
--output Output file path (default: gtm_audit_report.json)
--timeout Page load timeout in ms (default: 30000)
--headless Run in headless mode (default: True)
"""
import argparse
import json
import re
import sys
from datetime import datetime
from urllib.parse import urlparse, parse_qs
from playwright.sync_api import sync_playwright
# Tag destination patterns
TAG_DESTINATIONS = {
"GA4": [
r"google-analytics\.com/g/collect",
r"analytics\.google\.com/g/collect",
],
"Universal Analytics": [
r"google-analytics\.com/collect",
r"google-analytics\.com/r/collect",
],
"Google Ads": [
r"googleads\.g\.doubleclick\.net",
r"google\.com/pagead",
r"googleadservices\.com/pagead",
],
"Meta Pixel": [
r"facebook\.com/tr",
r"connect\.facebook\.net",
],
"LinkedIn": [
r"px\.ads\.linkedin\.com",
r"snap\.licdn\.com",
],
"TikTok": [
r"analytics\.tiktok\.com",
],
"Twitter/X": [
r"ads-twitter\.com",
r"t\.co/i/adsct",
],
"Kakao": [
r"pixel\.kakao\.com",
],
"Naver": [
r"wcs\.naver\.com",
],
}
# GA4 Required Parameters by Event
GA4_EVENT_REQUIREMENTS = {
"purchase": {
"required": ["transaction_id", "value", "currency"],
"items_required": ["item_id", "item_name"],
},
"add_to_cart": {
"required": ["currency", "value"],
"items_required": ["item_id", "item_name"],
},
"begin_checkout": {
"required": ["currency", "value"],
"items_required": ["item_id", "item_name"],
},
"add_shipping_info": {
"required": ["currency", "value"],
"recommended": ["shipping_tier"],
},
"add_payment_info": {
"required": ["currency", "value"],
"recommended": ["payment_type"],
},
"view_item": {
"required": ["currency", "value"],
"items_required": ["item_id", "item_name"],
},
"view_cart": {
"required": ["currency", "value"],
"items_required": ["item_id", "item_name"],
},
"generate_lead": {
"recommended": ["currency", "value"],
},
"form_submit": {
"recommended": ["form_id", "form_name"],
},
}
# Checkout flow sequence
CHECKOUT_SEQUENCE = [
"view_cart",
"begin_checkout",
"add_shipping_info",
"add_payment_info",
"purchase",
]
class DataLayerValidator:
"""Advanced dataLayer validation and monitoring."""
def __init__(self):
self.events = []
self.issues = []
self.snapshots = []
def validate_event(self, event_data):
"""Validate a single dataLayer event against GA4 specs."""
issues = []
event_name = event_data.get("event")
if not event_name:
return issues
# Check if event has requirements
if event_name in GA4_EVENT_REQUIREMENTS:
reqs = GA4_EVENT_REQUIREMENTS[event_name]
ecommerce = event_data.get("ecommerce", {})
# Check required fields
for field in reqs.get("required", []):
if field not in ecommerce and field not in event_data:
issues.append({
"type": "missing_required",
"event": event_name,
"field": field,
"message": f"Missing required field: {field}",
})
# Check items array
items = ecommerce.get("items", [])
if reqs.get("items_required") and not items:
issues.append({
"type": "missing_items",
"event": event_name,
"message": "E-commerce event missing 'items' array",
})
# Validate items structure
for i, item in enumerate(items):
for field in reqs.get("items_required", []):
if field not in item:
issues.append({
"type": "item_missing_field",
"event": event_name,
"item_index": i,
"field": field,
"message": f"Item {i} missing required field: {field}",
})
# Check data types
if "value" in ecommerce:
if not isinstance(ecommerce["value"], (int, float)):
issues.append({
"type": "wrong_type",
"event": event_name,
"field": "value",
"message": f"'value' should be number, got {type(ecommerce['value']).__name__}",
})
# Check transaction_id uniqueness hint
if event_name == "purchase" and "transaction_id" in ecommerce:
tid = ecommerce["transaction_id"]
if not tid or tid == "" or tid == "undefined":
issues.append({
"type": "invalid_transaction_id",
"event": event_name,
"message": "transaction_id is empty or invalid",
})
return issues
def validate_sequence(self, events):
"""Validate checkout event sequence."""
issues = []
event_names = [e.get("event") for e in events if e.get("event")]
# Find checkout events in order
checkout_events = [e for e in event_names if e in CHECKOUT_SEQUENCE]
# Check sequence
last_idx = -1
for event in checkout_events:
idx = CHECKOUT_SEQUENCE.index(event)
if idx < last_idx:
issues.append({
"type": "sequence_error",
"message": f"Event '{event}' fired out of order",
})
last_idx = idx
return issues
def check_ecommerce_clear(self, events):
"""Check if ecommerce object is cleared before new pushes."""
issues = []
last_had_ecommerce = False
for i, event in enumerate(events):
has_ecommerce = "ecommerce" in event
is_clear = event.get("ecommerce") is None
if has_ecommerce and last_had_ecommerce and not is_clear:
# Previous had ecommerce, this has ecommerce, but no clear
issues.append({
"type": "missing_ecommerce_clear",
"index": i,
"event": event.get("event"),
"message": "E-commerce data should be cleared before new push",
})
if has_ecommerce and not is_clear:
last_had_ecommerce = True
elif is_clear:
last_had_ecommerce = False
return issues
class FormAnalyzer:
"""Form discovery, analysis, and interaction tracking."""
def __init__(self, page):
self.page = page
self.forms = []
self.interactions = []
self.issues = []
def discover_forms(self):
"""Find and analyze all forms on the page."""
forms_data = self.page.evaluate("""
() => {
const forms = document.querySelectorAll('form');
return Array.from(forms).map((form, idx) => {
const fields = Array.from(form.querySelectorAll('input, select, textarea'));
return {
index: idx,
id: form.id || null,
name: form.name || null,
action: form.action || null,
method: form.method || 'get',
className: form.className || null,
fieldCount: fields.length,
fields: fields.map(field => ({
type: field.type || field.tagName.toLowerCase(),
name: field.name || null,
id: field.id || null,
required: field.required || false,
placeholder: field.placeholder || null,
validation: field.pattern || null,
maxLength: field.maxLength > 0 ? field.maxLength : null,
})),
hasSubmitButton: form.querySelector('button[type="submit"], input[type="submit"]') !== null,
};
});
}
""")
self.forms = forms_data
return forms_data
def analyze_form_tracking_readiness(self):
"""Check if forms are ready for GTM tracking."""
issues = []
for form in self.forms:
# Check for identifiers
if not form["id"] and not form["name"]:
issues.append({
"type": "form_no_identifier",
"form_index": form["index"],
"message": f"Form {form['index']} has no id or name attribute",
"recommendation": "Add id or name attribute for reliable form tracking",
})
# Check fields for tracking
for field in form["fields"]:
if field["type"] in ["text", "email", "tel"] and not field["name"] and not field["id"]:
issues.append({
"type": "field_no_identifier",
"form_index": form["index"],
"field_type": field["type"],
"message": "Input field missing name/id for tracking",
})
# Check for submit button
if not form["hasSubmitButton"]:
issues.append({
"type": "form_no_submit",
"form_index": form["index"],
"message": "Form has no submit button - may use JS submission",
"recommendation": "Verify form submission triggers dataLayer push",
})
self.issues = issues
return issues
def simulate_form_interaction(self, form_index=0):
"""Simulate user interaction with a form."""
if form_index >= len(self.forms):
return {"error": "Form index out of range"}
form = self.forms[form_index]
interactions = []
# Find form element
form_selector = f"form:nth-of-type({form_index + 1})"
if form["id"]:
form_selector = f"#{form['id']}"
elif form["name"]:
form_selector = f"form[name='{form['name']}']"
try:
form_element = self.page.locator(form_selector)
# Interact with each field
for field in form["fields"]:
field_selector = None
if field["id"]:
field_selector = f"#{field['id']}"
elif field["name"]:
field_selector = f"[name='{field['name']}']"
if not field_selector:
continue
try:
field_element = self.page.locator(field_selector).first
# Focus event
field_element.focus()
interactions.append({
"action": "focus",
"field": field["name"] or field["id"],
"timestamp": datetime.now().isoformat(),
})
self.page.wait_for_timeout(200)
# Fill based on type
test_values = {
"text": "Test User",
"email": "test@example.com",
"tel": "010-1234-5678",
"number": "100",
"password": "TestPass123!",
}
if field["type"] in test_values:
field_element.fill(test_values[field["type"]])
interactions.append({
"action": "input",
"field": field["name"] or field["id"],
"type": field["type"],
"timestamp": datetime.now().isoformat(),
})
self.page.wait_for_timeout(200)
# Blur event
field_element.blur()
interactions.append({
"action": "blur",
"field": field["name"] or field["id"],
"timestamp": datetime.now().isoformat(),
})
except Exception as e:
interactions.append({
"action": "error",
"field": field["name"] or field["id"],
"error": str(e),
})
self.interactions = interactions
return interactions
except Exception as e:
return {"error": str(e)}
def check_form_events(self, datalayer_events):
"""Check if expected form events are in dataLayer."""
expected_events = ["form_start", "form_submit", "generate_lead"]
found_events = []
missing_events = []
event_names = [e.get("event") for e in datalayer_events]
for expected in expected_events:
if expected in event_names:
found_events.append(expected)
else:
missing_events.append(expected)
return {
"found": found_events,
"missing": missing_events,
"recommendation": "Consider implementing: " + ", ".join(missing_events) if missing_events else None,
}
class CheckoutFlowAnalyzer:
"""E-commerce checkout flow simulation and validation."""
def __init__(self, page):
self.page = page
self.steps_completed = []
self.events_captured = []
self.issues = []
def detect_checkout_elements(self):
"""Find checkout-related elements on page."""
elements = self.page.evaluate("""
() => {
const selectors = {
cart: [
'[class*="cart"]', '[id*="cart"]',
'[class*="basket"]', '[id*="basket"]',
],
checkout: [
'[class*="checkout"]', '[id*="checkout"]',
'button:has-text("Checkout")', 'a:has-text("Checkout")',
'button:has-text("결제")', 'a:has-text("결제")',
],
addToCart: [
'button:has-text("Add to Cart")', 'button:has-text("Add to Bag")',
'button:has-text("장바구니")', 'button:has-text("담기")',
'[class*="add-to-cart"]', '[id*="add-to-cart"]',
],
quantity: [
'[class*="quantity"]', '[name*="quantity"]',
'[class*="qty"]', '[name*="qty"]',
],
removeItem: [
'[class*="remove"]', 'button:has-text("Remove")',
'button:has-text("삭제")', '[class*="delete"]',
],
promoCode: [
'[name*="promo"]', '[name*="coupon"]', '[id*="coupon"]',
'[placeholder*="promo"]', '[placeholder*="coupon"]',
],
};
const found = {};
for (const [type, selectorList] of Object.entries(selectors)) {
found[type] = [];
for (const sel of selectorList) {
try {
const elements = document.querySelectorAll(sel);
elements.forEach(el => {
found[type].push({
selector: sel,
tag: el.tagName.toLowerCase(),
text: el.textContent?.slice(0, 50) || null,
visible: el.offsetParent !== null,
});
});
} catch(e) {}
}
}
return found;
}
""")
return elements
def simulate_add_to_cart(self):
"""Attempt to simulate add-to-cart action."""
try:
# Try common add-to-cart selectors
selectors = [
'button:has-text("Add to Cart")',
'button:has-text("Add to Bag")',
'button:has-text("장바구니")',
'[class*="add-to-cart"]:visible',
'[id*="add-to-cart"]:visible',
]
for selector in selectors:
try:
btn = self.page.locator(selector).first
if btn.is_visible():
btn.click()
self.steps_completed.append({
"step": "add_to_cart",
"selector": selector,
"timestamp": datetime.now().isoformat(),
})
self.page.wait_for_timeout(1500)
return True
except:
continue
return False
except Exception as e:
self.issues.append({"step": "add_to_cart", "error": str(e)})
return False
def simulate_begin_checkout(self):
"""Attempt to click checkout button."""
try:
selectors = [
'button:has-text("Checkout")',
'a:has-text("Checkout")',
'button:has-text("결제하기")',
'button:has-text("주문하기")',
'[class*="checkout-btn"]:visible',
]
for selector in selectors:
try:
btn = self.page.locator(selector).first
if btn.is_visible():
btn.click()
self.steps_completed.append({
"step": "begin_checkout",
"selector": selector,
"timestamp": datetime.now().isoformat(),
})
self.page.wait_for_timeout(2000)
return True
except:
continue
return False
except Exception as e:
self.issues.append({"step": "begin_checkout", "error": str(e)})
return False
def validate_checkout_events(self, datalayer_events):
"""Validate checkout-related events in dataLayer."""
results = {
"events_found": [],
"events_missing": [],
"sequence_valid": True,
"issues": [],
}
event_names = [e.get("event") for e in datalayer_events]
# Check each checkout step
for step in CHECKOUT_SEQUENCE:
if step in event_names:
results["events_found"].append(step)
# Validate event parameters
for event in datalayer_events:
if event.get("event") == step:
validator = DataLayerValidator()
issues = validator.validate_event(event)
results["issues"].extend(issues)
else:
results["events_missing"].append(step)
# Check sequence
found_sequence = [e for e in event_names if e in CHECKOUT_SEQUENCE]
expected_order = [e for e in CHECKOUT_SEQUENCE if e in found_sequence]
if found_sequence != expected_order:
results["sequence_valid"] = False
results["issues"].append({
"type": "sequence_error",
"message": f"Events out of order. Found: {found_sequence}, Expected: {expected_order}",
})
return results
class GTMAuditor:
"""Main GTM audit orchestrator."""
def __init__(self, url, container_id=None, timeout=30000, headless=True):
self.url = url
self.expected_container = container_id
self.timeout = timeout
self.headless = headless
self.report = {
"audit_metadata": {
"url": url,
"timestamp": datetime.now().isoformat(),
"expected_container": container_id,
},
"container_status": {},
"datalayer_analysis": {
"events": [],
"validation_issues": [],
"sequence_issues": [],
},
"form_analysis": {
"forms_found": [],
"tracking_issues": [],
"events_status": {},
},
"checkout_analysis": {
"elements_found": {},
"events_status": {},
"flow_issues": [],
},
"network_requests": [],
"tags_fired": [],
"issues": [],
"recommendations": [],
"checklist": {},
}
self.network_requests = []
self.datalayer_history = []
self.page = None
def _setup_network_monitoring(self, page):
"""Intercept and log network requests to tag destinations."""
def handle_request(request):
url = request.url
for destination, patterns in TAG_DESTINATIONS.items():
for pattern in patterns:
if re.search(pattern, url):
parsed = urlparse(url)
params = parse_qs(parsed.query)
self.network_requests.append({
"destination": destination,
"url": url[:200],
"method": request.method,
"params": {k: v[0] if len(v) == 1 else v for k, v in params.items()},
"timestamp": datetime.now().isoformat(),
})
break
page.on("request", handle_request)
def _setup_datalayer_monitoring(self, page):
"""Inject dataLayer monitoring script."""
page.evaluate("""
() => {
window.__gtmAuditEvents = [];
const originalPush = window.dataLayer.push;
window.dataLayer.push = function() {
const result = originalPush.apply(this, arguments);
for (let i = 0; i < arguments.length; i++) {
window.__gtmAuditEvents.push({
data: JSON.parse(JSON.stringify(arguments[i])),
timestamp: new Date().toISOString()
});
}
return result;
};
}
""")
def _capture_datalayer(self, page):
"""Capture current dataLayer state."""
try:
datalayer = page.evaluate("""
() => {
if (typeof window.dataLayer !== 'undefined') {
return JSON.parse(JSON.stringify(window.dataLayer));
}
return null;
}
""")
return datalayer
except Exception as e:
return {"error": str(e)}
def _capture_monitored_events(self, page):
"""Capture events logged by our monitoring."""
try:
events = page.evaluate("""
() => window.__gtmAuditEvents || []
""")
return events
except:
return []
def _check_gtm_container(self, page):
"""Verify GTM container installation."""
result = page.evaluate("""
() => {
const scripts = document.querySelectorAll('script');
const gtmInfo = {
installed: false,
containers: [],
position: null,
noscript: false,
dataLayerInit: false,
dataLayerInitBeforeGTM: false,
};
gtmInfo.dataLayerInit = typeof window.dataLayer !== 'undefined' &&
Array.isArray(window.dataLayer);
let gtmScriptIndex = -1;
let dataLayerInitIndex = -1;
scripts.forEach((script, index) => {
const src = script.src || '';
const innerHTML = script.innerHTML || '';
// Check for dataLayer init
if (innerHTML.includes('dataLayer') && innerHTML.includes('[]')) {
dataLayerInitIndex = index;
}
const gtmMatch = src.match(/gtm\\.js\\?id=(GTM-[A-Z0-9]+)/);
if (gtmMatch) {
gtmInfo.installed = true;
gtmInfo.containers.push(gtmMatch[1]);
gtmInfo.position = script.closest('head') ? 'head' : 'body';
gtmScriptIndex = index;
}
const inlineMatch = innerHTML.match(/GTM-[A-Z0-9]+/g);
if (inlineMatch) {
gtmInfo.installed = true;
inlineMatch.forEach(id => {
if (!gtmInfo.containers.includes(id)) {
gtmInfo.containers.push(id);
}
});
}
});
gtmInfo.dataLayerInitBeforeGTM = dataLayerInitIndex < gtmScriptIndex && dataLayerInitIndex !== -1;
const noscripts = document.querySelectorAll('noscript');
noscripts.forEach(ns => {
if (ns.innerHTML.includes('googletagmanager.com/ns.html')) {
gtmInfo.noscript = true;
}
});
return gtmInfo;
}
""")
status = {
"installed": result["installed"],
"containers": result["containers"],
"position": result["position"],
"noscript_present": result["noscript"],
"datalayer_initialized": result["dataLayerInit"],
"datalayer_init_before_gtm": result["dataLayerInitBeforeGTM"],
"issues": [],
}
if not result["installed"]:
status["issues"].append("GTM container not detected")
self.report["issues"].append({
"severity": "critical",
"type": "container_missing",
"message": "GTM container script not found on page",
})
if len(result["containers"]) > 1:
status["issues"].append(f"Multiple containers: {result['containers']}")
self.report["issues"].append({
"severity": "warning",
"type": "multiple_containers",
"message": f"Multiple GTM containers found: {', '.join(result['containers'])}",
})
if self.expected_container and self.expected_container not in result["containers"]:
self.report["issues"].append({
"severity": "error",
"type": "container_mismatch",
"message": f"Expected {self.expected_container}, found {result['containers']}",
})
if result["position"] == "body":
self.report["issues"].append({
"severity": "warning",
"type": "script_position",
"message": "GTM script in body - may delay tag firing",
})
if not result["dataLayerInitBeforeGTM"]:
self.report["issues"].append({
"severity": "warning",
"type": "datalayer_order",
"message": "dataLayer should be initialized before GTM script",
})
self.report["container_status"] = status
return status
def _simulate_scroll(self, page):
"""Simulate scroll to trigger scroll-depth tags."""
page.evaluate("""
() => {
const heights = [0.25, 0.5, 0.75, 0.9, 1.0];
const docHeight = document.documentElement.scrollHeight;
heights.forEach((pct, i) => {
setTimeout(() => {
window.scrollTo(0, docHeight * pct);
}, i * 500);
});
}
""")
page.wait_for_timeout(3000)
def _run_form_audit(self, page):
"""Execute form analysis."""
print("📝 Analyzing forms...")
form_analyzer = FormAnalyzer(page)
forms = form_analyzer.discover_forms()
tracking_issues = form_analyzer.analyze_form_tracking_readiness()
self.report["form_analysis"]["forms_found"] = forms
self.report["form_analysis"]["tracking_issues"] = tracking_issues
if forms:
print(f" Found {len(forms)} form(s)")
# Simulate interaction with first form
interactions = form_analyzer.simulate_form_interaction(0)
self.report["form_analysis"]["interactions"] = interactions
# Allow time for events
page.wait_for_timeout(2000)
# Check form events
datalayer = self._capture_datalayer(page)
if datalayer:
events_status = form_analyzer.check_form_events(datalayer)
self.report["form_analysis"]["events_status"] = events_status
else:
print(" No forms found on page")
def _run_checkout_audit(self, page):
"""Execute e-commerce checkout flow analysis."""
print("🛒 Analyzing checkout flow...")
checkout_analyzer = CheckoutFlowAnalyzer(page)
elements = checkout_analyzer.detect_checkout_elements()
self.report["checkout_analysis"]["elements_found"] = elements
# Log what we found
for element_type, found in elements.items():
if found:
print(f" Found {len(found)} {element_type} element(s)")
def _run_datalayer_audit(self, page):
"""Execute deep dataLayer analysis."""
print("📊 Analyzing dataLayer...")
datalayer = self._capture_datalayer(page)
monitored_events = self._capture_monitored_events(page)
if not datalayer:
self.report["datalayer_analysis"]["issues"] = ["dataLayer not found"]
return
validator = DataLayerValidator()
# Validate each event
for event in datalayer:
if isinstance(event, dict):
issues = validator.validate_event(event)
if issues:
self.report["datalayer_analysis"]["validation_issues"].extend(issues)
# Check sequence
sequence_issues = validator.validate_sequence(datalayer)
self.report["datalayer_analysis"]["sequence_issues"] = sequence_issues
# Check ecommerce clearing
clear_issues = validator.check_ecommerce_clear(datalayer)
self.report["datalayer_analysis"]["validation_issues"].extend(clear_issues)
# Store events
events = []
for i, item in enumerate(datalayer):
if isinstance(item, dict) and item.get("event"):
events.append({
"index": i,
"event": item.get("event"),
"has_ecommerce": "ecommerce" in item,
"params": list(item.keys()),
})
self.report["datalayer_analysis"]["events"] = events
print(f" Found {len(events)} events in dataLayer")
def _generate_recommendations(self):
"""Generate recommendations based on findings."""
recs = []
for issue in self.report["issues"]:
if issue["type"] == "container_missing":
recs.append({
"priority": "high",
"action": "Install GTM container",
"details": "Add GTM snippet to <head> section",
})
elif issue["type"] == "datalayer_order":
recs.append({
"priority": "medium",
"action": "Initialize dataLayer before GTM",
"details": "Add 'window.dataLayer = window.dataLayer || [];' before GTM",
})
# Form recommendations
if not self.report["form_analysis"]["forms_found"]:
pass # No forms to track
elif self.report["form_analysis"].get("events_status", {}).get("missing"):
missing = self.report["form_analysis"]["events_status"]["missing"]
recs.append({
"priority": "medium",
"action": "Implement form tracking events",
"details": f"Missing events: {', '.join(missing)}",
})
# DataLayer recommendations
validation_issues = self.report["datalayer_analysis"].get("validation_issues", [])
if validation_issues:
recs.append({
"priority": "high",
"action": "Fix dataLayer validation issues",
"details": f"{len(validation_issues)} issue(s) found in event structure",
})
# Tag coverage
destinations = set(r["destination"] for r in self.network_requests)
if "GA4" not in destinations:
recs.append({
"priority": "high",
"action": "Verify GA4 implementation",
"details": "No GA4 requests detected",
})
self.report["recommendations"] = recs
def _generate_checklist(self):
"""Generate audit checklist."""
self.report["checklist"] = {
"container_health": {
"gtm_installed": self.report["container_status"].get("installed", False),
"correct_container": self.expected_container in self.report["container_status"].get("containers", []) if self.expected_container else True,
"no_duplicates": len(self.report["container_status"].get("containers", [])) <= 1,
"correct_position": self.report["container_status"].get("position") == "head",
"datalayer_init_order": self.report["container_status"].get("datalayer_init_before_gtm", False),
},
"datalayer_quality": {
"initialized": self.report["container_status"].get("datalayer_initialized", False),
"events_present": len(self.report["datalayer_analysis"].get("events", [])) > 0,
"no_validation_errors": len(self.report["datalayer_analysis"].get("validation_issues", [])) == 0,
"correct_sequence": len(self.report["datalayer_analysis"].get("sequence_issues", [])) == 0,
},
"form_tracking": {
"forms_identifiable": all(
f.get("id") or f.get("name")
for f in self.report["form_analysis"].get("forms_found", [])
) if self.report["form_analysis"].get("forms_found") else True,
"form_events_present": len(
self.report["form_analysis"].get("events_status", {}).get("found", [])
) > 0 if self.report["form_analysis"].get("forms_found") else True,
},
"tag_firing": {
"ga4_active": any(r["destination"] == "GA4" for r in self.network_requests),
"requests_captured": len(self.network_requests) > 0,
},
}
def run_audit(self, journey="pageview"):
"""Execute the full audit workflow."""
print(f"🔍 Starting GTM audit for: {self.url}")
print(f" Journey type: {journey}")
with sync_playwright() as p:
browser = p.chromium.launch(headless=self.headless)
context = browser.new_context(
viewport={"width": 1920, "height": 1080},
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) GTMAudit/1.0"
)
page = context.new_page()
self.page = page
self._setup_network_monitoring(page)
try:
print("📄 Loading page...")
page.goto(self.url, timeout=self.timeout, wait_until="networkidle")
page.wait_for_timeout(2000)
# Setup dataLayer monitoring after page load
try:
self._setup_datalayer_monitoring(page)
except:
pass
print("🏷️ Checking GTM container...")
self._check_gtm_container(page)
# Run journey-specific audits
if journey in ["scroll", "pageview", "full"]:
print("📜 Simulating scroll...")
self._simulate_scroll(page)
if journey in ["form", "full"]:
self._run_form_audit(page)
if journey in ["checkout", "full"]:
self._run_checkout_audit(page)
if journey in ["datalayer", "full"]:
self._run_datalayer_audit(page)
# Always do basic dataLayer capture
page.wait_for_timeout(2000)
self._run_datalayer_audit(page)
# Store network requests
self.report["network_requests"] = self.network_requests
self.report["tags_fired"] = list(set(r["destination"] for r in self.network_requests))
except Exception as e:
self.report["issues"].append({
"severity": "critical",
"type": "audit_error",
"message": str(e),
})
finally:
browser.close()
self._generate_recommendations()
self._generate_checklist()
print("✅ Audit complete!")
return self.report
def save_report(self, filepath):
"""Save report to JSON file."""
with open(filepath, "w", encoding="utf-8") as f:
json.dump(self.report, f, indent=2, ensure_ascii=False)
print(f"📝 Report saved to: {filepath}")
def print_summary(self):
"""Print audit summary to console."""
print("\n" + "="*60)
print("📋 GTM AUDIT SUMMARY")
print("="*60)
# Container
cs = self.report["container_status"]
print(f"\n🏷️ Container: {'✅ Installed' if cs.get('installed') else '❌ Not Found'}")
if cs.get("containers"):
print(f" IDs: {', '.join(cs['containers'])}")
# DataLayer
dl = self.report["datalayer_analysis"]
print(f"\n📊 DataLayer:")
print(f" Events found: {len(dl.get('events', []))}")
print(f" Validation issues: {len(dl.get('validation_issues', []))}")
# Forms
fa = self.report["form_analysis"]
if fa.get("forms_found"):
print(f"\n📝 Forms:")
print(f" Forms found: {len(fa['forms_found'])}")
print(f" Tracking issues: {len(fa.get('tracking_issues', []))}")
# Tags
print(f"\n🔥 Tags Fired: {', '.join(self.report['tags_fired']) if self.report['tags_fired'] else 'None detected'}")
# Issues
print(f"\n⚠️ Total Issues: {len(self.report['issues'])}")
for issue in self.report["issues"][:5]:
print(f" - [{issue['severity'].upper()}] {issue['message']}")
# Recommendations
print(f"\n💡 Recommendations: {len(self.report['recommendations'])}")
for rec in self.report["recommendations"][:3]:
print(f" - [{rec['priority'].upper()}] {rec['action']}")
print("\n" + "="*60)
def main():
parser = argparse.ArgumentParser(description="GTM Audit Tool")
parser.add_argument("--url", required=True, help="Target URL to audit")
parser.add_argument("--container", help="Expected GTM container ID (e.g., GTM-XXXXXX)")
parser.add_argument("--journey", default="full",
choices=["pageview", "scroll", "click", "form", "checkout", "datalayer", "full"],
help="Journey type to simulate")
parser.add_argument("--output", default="gtm_audit_report.json", help="Output file path")
parser.add_argument("--timeout", type=int, default=30000, help="Page load timeout (ms)")
parser.add_argument("--headless", action="store_true", default=True, help="Run headless")
args = parser.parse_args()
auditor = GTMAuditor(
url=args.url,
container_id=args.container,
timeout=args.timeout,
headless=args.headless,
)
report = auditor.run_audit(journey=args.journey)
auditor.save_report(args.output)
auditor.print_summary()
if __name__ == "__main__":
main()