- 13-ourdigital-gtm-audit: Lightweight audit-only tool (original) - GTM container validation - DataLayer event checking - Form and checkout analysis - No Notion integration, no inject mode - 14-ourdigital-gtm-manager: Comprehensive management toolkit - Full audit capabilities - DataLayerInjector for custom HTML tag generation - Notion integration for audit logging - 20+ GA4 event templates 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1114 lines
42 KiB
Python
1114 lines
42 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
GTM Audit Script - Comprehensive Google Tag Manager audit with form tracking,
|
|
e-commerce checkout flow, and advanced dataLayer validation.
|
|
|
|
Usage:
|
|
python gtm_audit.py --url "https://example.com" [options]
|
|
|
|
Options:
|
|
--url Target URL to audit (required)
|
|
--container Expected GTM container ID (e.g., GTM-XXXXXX)
|
|
--journey Journey type: pageview, scroll, click, form, checkout, datalayer, full
|
|
--output Output file path (default: gtm_audit_report.json)
|
|
--timeout Page load timeout in ms (default: 30000)
|
|
--headless Run in headless mode (default: True)
|
|
"""
|
|
|
|
import argparse
|
|
import json
|
|
import re
|
|
import sys
|
|
from datetime import datetime
|
|
from urllib.parse import urlparse, parse_qs
|
|
from playwright.sync_api import sync_playwright
|
|
|
|
# Tag destination patterns
|
|
TAG_DESTINATIONS = {
|
|
"GA4": [
|
|
r"google-analytics\.com/g/collect",
|
|
r"analytics\.google\.com/g/collect",
|
|
],
|
|
"Universal Analytics": [
|
|
r"google-analytics\.com/collect",
|
|
r"google-analytics\.com/r/collect",
|
|
],
|
|
"Google Ads": [
|
|
r"googleads\.g\.doubleclick\.net",
|
|
r"google\.com/pagead",
|
|
r"googleadservices\.com/pagead",
|
|
],
|
|
"Meta Pixel": [
|
|
r"facebook\.com/tr",
|
|
r"connect\.facebook\.net",
|
|
],
|
|
"LinkedIn": [
|
|
r"px\.ads\.linkedin\.com",
|
|
r"snap\.licdn\.com",
|
|
],
|
|
"TikTok": [
|
|
r"analytics\.tiktok\.com",
|
|
],
|
|
"Twitter/X": [
|
|
r"ads-twitter\.com",
|
|
r"t\.co/i/adsct",
|
|
],
|
|
"Kakao": [
|
|
r"pixel\.kakao\.com",
|
|
],
|
|
"Naver": [
|
|
r"wcs\.naver\.com",
|
|
],
|
|
}
|
|
|
|
# GA4 Required Parameters by Event
|
|
GA4_EVENT_REQUIREMENTS = {
|
|
"purchase": {
|
|
"required": ["transaction_id", "value", "currency"],
|
|
"items_required": ["item_id", "item_name"],
|
|
},
|
|
"add_to_cart": {
|
|
"required": ["currency", "value"],
|
|
"items_required": ["item_id", "item_name"],
|
|
},
|
|
"begin_checkout": {
|
|
"required": ["currency", "value"],
|
|
"items_required": ["item_id", "item_name"],
|
|
},
|
|
"add_shipping_info": {
|
|
"required": ["currency", "value"],
|
|
"recommended": ["shipping_tier"],
|
|
},
|
|
"add_payment_info": {
|
|
"required": ["currency", "value"],
|
|
"recommended": ["payment_type"],
|
|
},
|
|
"view_item": {
|
|
"required": ["currency", "value"],
|
|
"items_required": ["item_id", "item_name"],
|
|
},
|
|
"view_cart": {
|
|
"required": ["currency", "value"],
|
|
"items_required": ["item_id", "item_name"],
|
|
},
|
|
"generate_lead": {
|
|
"recommended": ["currency", "value"],
|
|
},
|
|
"form_submit": {
|
|
"recommended": ["form_id", "form_name"],
|
|
},
|
|
}
|
|
|
|
# Checkout flow sequence
|
|
CHECKOUT_SEQUENCE = [
|
|
"view_cart",
|
|
"begin_checkout",
|
|
"add_shipping_info",
|
|
"add_payment_info",
|
|
"purchase",
|
|
]
|
|
|
|
|
|
class DataLayerValidator:
|
|
"""Advanced dataLayer validation and monitoring."""
|
|
|
|
def __init__(self):
|
|
self.events = []
|
|
self.issues = []
|
|
self.snapshots = []
|
|
|
|
def validate_event(self, event_data):
|
|
"""Validate a single dataLayer event against GA4 specs."""
|
|
issues = []
|
|
event_name = event_data.get("event")
|
|
|
|
if not event_name:
|
|
return issues
|
|
|
|
# Check if event has requirements
|
|
if event_name in GA4_EVENT_REQUIREMENTS:
|
|
reqs = GA4_EVENT_REQUIREMENTS[event_name]
|
|
ecommerce = event_data.get("ecommerce", {})
|
|
|
|
# Check required fields
|
|
for field in reqs.get("required", []):
|
|
if field not in ecommerce and field not in event_data:
|
|
issues.append({
|
|
"type": "missing_required",
|
|
"event": event_name,
|
|
"field": field,
|
|
"message": f"Missing required field: {field}",
|
|
})
|
|
|
|
# Check items array
|
|
items = ecommerce.get("items", [])
|
|
if reqs.get("items_required") and not items:
|
|
issues.append({
|
|
"type": "missing_items",
|
|
"event": event_name,
|
|
"message": "E-commerce event missing 'items' array",
|
|
})
|
|
|
|
# Validate items structure
|
|
for i, item in enumerate(items):
|
|
for field in reqs.get("items_required", []):
|
|
if field not in item:
|
|
issues.append({
|
|
"type": "item_missing_field",
|
|
"event": event_name,
|
|
"item_index": i,
|
|
"field": field,
|
|
"message": f"Item {i} missing required field: {field}",
|
|
})
|
|
|
|
# Check data types
|
|
if "value" in ecommerce:
|
|
if not isinstance(ecommerce["value"], (int, float)):
|
|
issues.append({
|
|
"type": "wrong_type",
|
|
"event": event_name,
|
|
"field": "value",
|
|
"message": f"'value' should be number, got {type(ecommerce['value']).__name__}",
|
|
})
|
|
|
|
# Check transaction_id uniqueness hint
|
|
if event_name == "purchase" and "transaction_id" in ecommerce:
|
|
tid = ecommerce["transaction_id"]
|
|
if not tid or tid == "" or tid == "undefined":
|
|
issues.append({
|
|
"type": "invalid_transaction_id",
|
|
"event": event_name,
|
|
"message": "transaction_id is empty or invalid",
|
|
})
|
|
|
|
return issues
|
|
|
|
def validate_sequence(self, events):
|
|
"""Validate checkout event sequence."""
|
|
issues = []
|
|
event_names = [e.get("event") for e in events if e.get("event")]
|
|
|
|
# Find checkout events in order
|
|
checkout_events = [e for e in event_names if e in CHECKOUT_SEQUENCE]
|
|
|
|
# Check sequence
|
|
last_idx = -1
|
|
for event in checkout_events:
|
|
idx = CHECKOUT_SEQUENCE.index(event)
|
|
if idx < last_idx:
|
|
issues.append({
|
|
"type": "sequence_error",
|
|
"message": f"Event '{event}' fired out of order",
|
|
})
|
|
last_idx = idx
|
|
|
|
return issues
|
|
|
|
def check_ecommerce_clear(self, events):
|
|
"""Check if ecommerce object is cleared before new pushes."""
|
|
issues = []
|
|
last_had_ecommerce = False
|
|
|
|
for i, event in enumerate(events):
|
|
has_ecommerce = "ecommerce" in event
|
|
is_clear = event.get("ecommerce") is None
|
|
|
|
if has_ecommerce and last_had_ecommerce and not is_clear:
|
|
# Previous had ecommerce, this has ecommerce, but no clear
|
|
issues.append({
|
|
"type": "missing_ecommerce_clear",
|
|
"index": i,
|
|
"event": event.get("event"),
|
|
"message": "E-commerce data should be cleared before new push",
|
|
})
|
|
|
|
if has_ecommerce and not is_clear:
|
|
last_had_ecommerce = True
|
|
elif is_clear:
|
|
last_had_ecommerce = False
|
|
|
|
return issues
|
|
|
|
|
|
class FormAnalyzer:
|
|
"""Form discovery, analysis, and interaction tracking."""
|
|
|
|
def __init__(self, page):
|
|
self.page = page
|
|
self.forms = []
|
|
self.interactions = []
|
|
self.issues = []
|
|
|
|
def discover_forms(self):
|
|
"""Find and analyze all forms on the page."""
|
|
forms_data = self.page.evaluate("""
|
|
() => {
|
|
const forms = document.querySelectorAll('form');
|
|
return Array.from(forms).map((form, idx) => {
|
|
const fields = Array.from(form.querySelectorAll('input, select, textarea'));
|
|
return {
|
|
index: idx,
|
|
id: form.id || null,
|
|
name: form.name || null,
|
|
action: form.action || null,
|
|
method: form.method || 'get',
|
|
className: form.className || null,
|
|
fieldCount: fields.length,
|
|
fields: fields.map(field => ({
|
|
type: field.type || field.tagName.toLowerCase(),
|
|
name: field.name || null,
|
|
id: field.id || null,
|
|
required: field.required || false,
|
|
placeholder: field.placeholder || null,
|
|
validation: field.pattern || null,
|
|
maxLength: field.maxLength > 0 ? field.maxLength : null,
|
|
})),
|
|
hasSubmitButton: form.querySelector('button[type="submit"], input[type="submit"]') !== null,
|
|
};
|
|
});
|
|
}
|
|
""")
|
|
|
|
self.forms = forms_data
|
|
return forms_data
|
|
|
|
def analyze_form_tracking_readiness(self):
|
|
"""Check if forms are ready for GTM tracking."""
|
|
issues = []
|
|
|
|
for form in self.forms:
|
|
# Check for identifiers
|
|
if not form["id"] and not form["name"]:
|
|
issues.append({
|
|
"type": "form_no_identifier",
|
|
"form_index": form["index"],
|
|
"message": f"Form {form['index']} has no id or name attribute",
|
|
"recommendation": "Add id or name attribute for reliable form tracking",
|
|
})
|
|
|
|
# Check fields for tracking
|
|
for field in form["fields"]:
|
|
if field["type"] in ["text", "email", "tel"] and not field["name"] and not field["id"]:
|
|
issues.append({
|
|
"type": "field_no_identifier",
|
|
"form_index": form["index"],
|
|
"field_type": field["type"],
|
|
"message": "Input field missing name/id for tracking",
|
|
})
|
|
|
|
# Check for submit button
|
|
if not form["hasSubmitButton"]:
|
|
issues.append({
|
|
"type": "form_no_submit",
|
|
"form_index": form["index"],
|
|
"message": "Form has no submit button - may use JS submission",
|
|
"recommendation": "Verify form submission triggers dataLayer push",
|
|
})
|
|
|
|
self.issues = issues
|
|
return issues
|
|
|
|
def simulate_form_interaction(self, form_index=0):
|
|
"""Simulate user interaction with a form."""
|
|
if form_index >= len(self.forms):
|
|
return {"error": "Form index out of range"}
|
|
|
|
form = self.forms[form_index]
|
|
interactions = []
|
|
|
|
# Find form element
|
|
form_selector = f"form:nth-of-type({form_index + 1})"
|
|
if form["id"]:
|
|
form_selector = f"#{form['id']}"
|
|
elif form["name"]:
|
|
form_selector = f"form[name='{form['name']}']"
|
|
|
|
try:
|
|
form_element = self.page.locator(form_selector)
|
|
|
|
# Interact with each field
|
|
for field in form["fields"]:
|
|
field_selector = None
|
|
if field["id"]:
|
|
field_selector = f"#{field['id']}"
|
|
elif field["name"]:
|
|
field_selector = f"[name='{field['name']}']"
|
|
|
|
if not field_selector:
|
|
continue
|
|
|
|
try:
|
|
field_element = self.page.locator(field_selector).first
|
|
|
|
# Focus event
|
|
field_element.focus()
|
|
interactions.append({
|
|
"action": "focus",
|
|
"field": field["name"] or field["id"],
|
|
"timestamp": datetime.now().isoformat(),
|
|
})
|
|
self.page.wait_for_timeout(200)
|
|
|
|
# Fill based on type
|
|
test_values = {
|
|
"text": "Test User",
|
|
"email": "test@example.com",
|
|
"tel": "010-1234-5678",
|
|
"number": "100",
|
|
"password": "TestPass123!",
|
|
}
|
|
|
|
if field["type"] in test_values:
|
|
field_element.fill(test_values[field["type"]])
|
|
interactions.append({
|
|
"action": "input",
|
|
"field": field["name"] or field["id"],
|
|
"type": field["type"],
|
|
"timestamp": datetime.now().isoformat(),
|
|
})
|
|
self.page.wait_for_timeout(200)
|
|
|
|
# Blur event
|
|
field_element.blur()
|
|
interactions.append({
|
|
"action": "blur",
|
|
"field": field["name"] or field["id"],
|
|
"timestamp": datetime.now().isoformat(),
|
|
})
|
|
|
|
except Exception as e:
|
|
interactions.append({
|
|
"action": "error",
|
|
"field": field["name"] or field["id"],
|
|
"error": str(e),
|
|
})
|
|
|
|
self.interactions = interactions
|
|
return interactions
|
|
|
|
except Exception as e:
|
|
return {"error": str(e)}
|
|
|
|
def check_form_events(self, datalayer_events):
|
|
"""Check if expected form events are in dataLayer."""
|
|
expected_events = ["form_start", "form_submit", "generate_lead"]
|
|
found_events = []
|
|
missing_events = []
|
|
|
|
event_names = [e.get("event") for e in datalayer_events]
|
|
|
|
for expected in expected_events:
|
|
if expected in event_names:
|
|
found_events.append(expected)
|
|
else:
|
|
missing_events.append(expected)
|
|
|
|
return {
|
|
"found": found_events,
|
|
"missing": missing_events,
|
|
"recommendation": "Consider implementing: " + ", ".join(missing_events) if missing_events else None,
|
|
}
|
|
|
|
|
|
class CheckoutFlowAnalyzer:
|
|
"""E-commerce checkout flow simulation and validation."""
|
|
|
|
def __init__(self, page):
|
|
self.page = page
|
|
self.steps_completed = []
|
|
self.events_captured = []
|
|
self.issues = []
|
|
|
|
def detect_checkout_elements(self):
|
|
"""Find checkout-related elements on page."""
|
|
elements = self.page.evaluate("""
|
|
() => {
|
|
const selectors = {
|
|
cart: [
|
|
'[class*="cart"]', '[id*="cart"]',
|
|
'[class*="basket"]', '[id*="basket"]',
|
|
],
|
|
checkout: [
|
|
'[class*="checkout"]', '[id*="checkout"]',
|
|
'button:has-text("Checkout")', 'a:has-text("Checkout")',
|
|
'button:has-text("결제")', 'a:has-text("결제")',
|
|
],
|
|
addToCart: [
|
|
'button:has-text("Add to Cart")', 'button:has-text("Add to Bag")',
|
|
'button:has-text("장바구니")', 'button:has-text("담기")',
|
|
'[class*="add-to-cart"]', '[id*="add-to-cart"]',
|
|
],
|
|
quantity: [
|
|
'[class*="quantity"]', '[name*="quantity"]',
|
|
'[class*="qty"]', '[name*="qty"]',
|
|
],
|
|
removeItem: [
|
|
'[class*="remove"]', 'button:has-text("Remove")',
|
|
'button:has-text("삭제")', '[class*="delete"]',
|
|
],
|
|
promoCode: [
|
|
'[name*="promo"]', '[name*="coupon"]', '[id*="coupon"]',
|
|
'[placeholder*="promo"]', '[placeholder*="coupon"]',
|
|
],
|
|
};
|
|
|
|
const found = {};
|
|
for (const [type, selectorList] of Object.entries(selectors)) {
|
|
found[type] = [];
|
|
for (const sel of selectorList) {
|
|
try {
|
|
const elements = document.querySelectorAll(sel);
|
|
elements.forEach(el => {
|
|
found[type].push({
|
|
selector: sel,
|
|
tag: el.tagName.toLowerCase(),
|
|
text: el.textContent?.slice(0, 50) || null,
|
|
visible: el.offsetParent !== null,
|
|
});
|
|
});
|
|
} catch(e) {}
|
|
}
|
|
}
|
|
return found;
|
|
}
|
|
""")
|
|
return elements
|
|
|
|
def simulate_add_to_cart(self):
|
|
"""Attempt to simulate add-to-cart action."""
|
|
try:
|
|
# Try common add-to-cart selectors
|
|
selectors = [
|
|
'button:has-text("Add to Cart")',
|
|
'button:has-text("Add to Bag")',
|
|
'button:has-text("장바구니")',
|
|
'[class*="add-to-cart"]:visible',
|
|
'[id*="add-to-cart"]:visible',
|
|
]
|
|
|
|
for selector in selectors:
|
|
try:
|
|
btn = self.page.locator(selector).first
|
|
if btn.is_visible():
|
|
btn.click()
|
|
self.steps_completed.append({
|
|
"step": "add_to_cart",
|
|
"selector": selector,
|
|
"timestamp": datetime.now().isoformat(),
|
|
})
|
|
self.page.wait_for_timeout(1500)
|
|
return True
|
|
except:
|
|
continue
|
|
|
|
return False
|
|
except Exception as e:
|
|
self.issues.append({"step": "add_to_cart", "error": str(e)})
|
|
return False
|
|
|
|
def simulate_begin_checkout(self):
|
|
"""Attempt to click checkout button."""
|
|
try:
|
|
selectors = [
|
|
'button:has-text("Checkout")',
|
|
'a:has-text("Checkout")',
|
|
'button:has-text("결제하기")',
|
|
'button:has-text("주문하기")',
|
|
'[class*="checkout-btn"]:visible',
|
|
]
|
|
|
|
for selector in selectors:
|
|
try:
|
|
btn = self.page.locator(selector).first
|
|
if btn.is_visible():
|
|
btn.click()
|
|
self.steps_completed.append({
|
|
"step": "begin_checkout",
|
|
"selector": selector,
|
|
"timestamp": datetime.now().isoformat(),
|
|
})
|
|
self.page.wait_for_timeout(2000)
|
|
return True
|
|
except:
|
|
continue
|
|
|
|
return False
|
|
except Exception as e:
|
|
self.issues.append({"step": "begin_checkout", "error": str(e)})
|
|
return False
|
|
|
|
def validate_checkout_events(self, datalayer_events):
|
|
"""Validate checkout-related events in dataLayer."""
|
|
results = {
|
|
"events_found": [],
|
|
"events_missing": [],
|
|
"sequence_valid": True,
|
|
"issues": [],
|
|
}
|
|
|
|
event_names = [e.get("event") for e in datalayer_events]
|
|
|
|
# Check each checkout step
|
|
for step in CHECKOUT_SEQUENCE:
|
|
if step in event_names:
|
|
results["events_found"].append(step)
|
|
|
|
# Validate event parameters
|
|
for event in datalayer_events:
|
|
if event.get("event") == step:
|
|
validator = DataLayerValidator()
|
|
issues = validator.validate_event(event)
|
|
results["issues"].extend(issues)
|
|
else:
|
|
results["events_missing"].append(step)
|
|
|
|
# Check sequence
|
|
found_sequence = [e for e in event_names if e in CHECKOUT_SEQUENCE]
|
|
expected_order = [e for e in CHECKOUT_SEQUENCE if e in found_sequence]
|
|
|
|
if found_sequence != expected_order:
|
|
results["sequence_valid"] = False
|
|
results["issues"].append({
|
|
"type": "sequence_error",
|
|
"message": f"Events out of order. Found: {found_sequence}, Expected: {expected_order}",
|
|
})
|
|
|
|
return results
|
|
|
|
|
|
class GTMAuditor:
|
|
"""Main GTM audit orchestrator."""
|
|
|
|
def __init__(self, url, container_id=None, timeout=30000, headless=True):
|
|
self.url = url
|
|
self.expected_container = container_id
|
|
self.timeout = timeout
|
|
self.headless = headless
|
|
self.report = {
|
|
"audit_metadata": {
|
|
"url": url,
|
|
"timestamp": datetime.now().isoformat(),
|
|
"expected_container": container_id,
|
|
},
|
|
"container_status": {},
|
|
"datalayer_analysis": {
|
|
"events": [],
|
|
"validation_issues": [],
|
|
"sequence_issues": [],
|
|
},
|
|
"form_analysis": {
|
|
"forms_found": [],
|
|
"tracking_issues": [],
|
|
"events_status": {},
|
|
},
|
|
"checkout_analysis": {
|
|
"elements_found": {},
|
|
"events_status": {},
|
|
"flow_issues": [],
|
|
},
|
|
"network_requests": [],
|
|
"tags_fired": [],
|
|
"issues": [],
|
|
"recommendations": [],
|
|
"checklist": {},
|
|
}
|
|
self.network_requests = []
|
|
self.datalayer_history = []
|
|
self.page = None
|
|
|
|
def _setup_network_monitoring(self, page):
|
|
"""Intercept and log network requests to tag destinations."""
|
|
def handle_request(request):
|
|
url = request.url
|
|
for destination, patterns in TAG_DESTINATIONS.items():
|
|
for pattern in patterns:
|
|
if re.search(pattern, url):
|
|
parsed = urlparse(url)
|
|
params = parse_qs(parsed.query)
|
|
self.network_requests.append({
|
|
"destination": destination,
|
|
"url": url[:200],
|
|
"method": request.method,
|
|
"params": {k: v[0] if len(v) == 1 else v for k, v in params.items()},
|
|
"timestamp": datetime.now().isoformat(),
|
|
})
|
|
break
|
|
|
|
page.on("request", handle_request)
|
|
|
|
def _setup_datalayer_monitoring(self, page):
|
|
"""Inject dataLayer monitoring script."""
|
|
page.evaluate("""
|
|
() => {
|
|
window.__gtmAuditEvents = [];
|
|
const originalPush = window.dataLayer.push;
|
|
window.dataLayer.push = function() {
|
|
const result = originalPush.apply(this, arguments);
|
|
for (let i = 0; i < arguments.length; i++) {
|
|
window.__gtmAuditEvents.push({
|
|
data: JSON.parse(JSON.stringify(arguments[i])),
|
|
timestamp: new Date().toISOString()
|
|
});
|
|
}
|
|
return result;
|
|
};
|
|
}
|
|
""")
|
|
|
|
def _capture_datalayer(self, page):
|
|
"""Capture current dataLayer state."""
|
|
try:
|
|
datalayer = page.evaluate("""
|
|
() => {
|
|
if (typeof window.dataLayer !== 'undefined') {
|
|
return JSON.parse(JSON.stringify(window.dataLayer));
|
|
}
|
|
return null;
|
|
}
|
|
""")
|
|
return datalayer
|
|
except Exception as e:
|
|
return {"error": str(e)}
|
|
|
|
def _capture_monitored_events(self, page):
|
|
"""Capture events logged by our monitoring."""
|
|
try:
|
|
events = page.evaluate("""
|
|
() => window.__gtmAuditEvents || []
|
|
""")
|
|
return events
|
|
except:
|
|
return []
|
|
|
|
def _check_gtm_container(self, page):
|
|
"""Verify GTM container installation."""
|
|
result = page.evaluate("""
|
|
() => {
|
|
const scripts = document.querySelectorAll('script');
|
|
const gtmInfo = {
|
|
installed: false,
|
|
containers: [],
|
|
position: null,
|
|
noscript: false,
|
|
dataLayerInit: false,
|
|
dataLayerInitBeforeGTM: false,
|
|
};
|
|
|
|
gtmInfo.dataLayerInit = typeof window.dataLayer !== 'undefined' &&
|
|
Array.isArray(window.dataLayer);
|
|
|
|
let gtmScriptIndex = -1;
|
|
let dataLayerInitIndex = -1;
|
|
|
|
scripts.forEach((script, index) => {
|
|
const src = script.src || '';
|
|
const innerHTML = script.innerHTML || '';
|
|
|
|
// Check for dataLayer init
|
|
if (innerHTML.includes('dataLayer') && innerHTML.includes('[]')) {
|
|
dataLayerInitIndex = index;
|
|
}
|
|
|
|
const gtmMatch = src.match(/gtm\\.js\\?id=(GTM-[A-Z0-9]+)/);
|
|
if (gtmMatch) {
|
|
gtmInfo.installed = true;
|
|
gtmInfo.containers.push(gtmMatch[1]);
|
|
gtmInfo.position = script.closest('head') ? 'head' : 'body';
|
|
gtmScriptIndex = index;
|
|
}
|
|
|
|
const inlineMatch = innerHTML.match(/GTM-[A-Z0-9]+/g);
|
|
if (inlineMatch) {
|
|
gtmInfo.installed = true;
|
|
inlineMatch.forEach(id => {
|
|
if (!gtmInfo.containers.includes(id)) {
|
|
gtmInfo.containers.push(id);
|
|
}
|
|
});
|
|
}
|
|
});
|
|
|
|
gtmInfo.dataLayerInitBeforeGTM = dataLayerInitIndex < gtmScriptIndex && dataLayerInitIndex !== -1;
|
|
|
|
const noscripts = document.querySelectorAll('noscript');
|
|
noscripts.forEach(ns => {
|
|
if (ns.innerHTML.includes('googletagmanager.com/ns.html')) {
|
|
gtmInfo.noscript = true;
|
|
}
|
|
});
|
|
|
|
return gtmInfo;
|
|
}
|
|
""")
|
|
|
|
status = {
|
|
"installed": result["installed"],
|
|
"containers": result["containers"],
|
|
"position": result["position"],
|
|
"noscript_present": result["noscript"],
|
|
"datalayer_initialized": result["dataLayerInit"],
|
|
"datalayer_init_before_gtm": result["dataLayerInitBeforeGTM"],
|
|
"issues": [],
|
|
}
|
|
|
|
if not result["installed"]:
|
|
status["issues"].append("GTM container not detected")
|
|
self.report["issues"].append({
|
|
"severity": "critical",
|
|
"type": "container_missing",
|
|
"message": "GTM container script not found on page",
|
|
})
|
|
|
|
if len(result["containers"]) > 1:
|
|
status["issues"].append(f"Multiple containers: {result['containers']}")
|
|
self.report["issues"].append({
|
|
"severity": "warning",
|
|
"type": "multiple_containers",
|
|
"message": f"Multiple GTM containers found: {', '.join(result['containers'])}",
|
|
})
|
|
|
|
if self.expected_container and self.expected_container not in result["containers"]:
|
|
self.report["issues"].append({
|
|
"severity": "error",
|
|
"type": "container_mismatch",
|
|
"message": f"Expected {self.expected_container}, found {result['containers']}",
|
|
})
|
|
|
|
if result["position"] == "body":
|
|
self.report["issues"].append({
|
|
"severity": "warning",
|
|
"type": "script_position",
|
|
"message": "GTM script in body - may delay tag firing",
|
|
})
|
|
|
|
if not result["dataLayerInitBeforeGTM"]:
|
|
self.report["issues"].append({
|
|
"severity": "warning",
|
|
"type": "datalayer_order",
|
|
"message": "dataLayer should be initialized before GTM script",
|
|
})
|
|
|
|
self.report["container_status"] = status
|
|
return status
|
|
|
|
def _simulate_scroll(self, page):
|
|
"""Simulate scroll to trigger scroll-depth tags."""
|
|
page.evaluate("""
|
|
() => {
|
|
const heights = [0.25, 0.5, 0.75, 0.9, 1.0];
|
|
const docHeight = document.documentElement.scrollHeight;
|
|
heights.forEach((pct, i) => {
|
|
setTimeout(() => {
|
|
window.scrollTo(0, docHeight * pct);
|
|
}, i * 500);
|
|
});
|
|
}
|
|
""")
|
|
page.wait_for_timeout(3000)
|
|
|
|
def _run_form_audit(self, page):
|
|
"""Execute form analysis."""
|
|
print("📝 Analyzing forms...")
|
|
|
|
form_analyzer = FormAnalyzer(page)
|
|
forms = form_analyzer.discover_forms()
|
|
tracking_issues = form_analyzer.analyze_form_tracking_readiness()
|
|
|
|
self.report["form_analysis"]["forms_found"] = forms
|
|
self.report["form_analysis"]["tracking_issues"] = tracking_issues
|
|
|
|
if forms:
|
|
print(f" Found {len(forms)} form(s)")
|
|
# Simulate interaction with first form
|
|
interactions = form_analyzer.simulate_form_interaction(0)
|
|
self.report["form_analysis"]["interactions"] = interactions
|
|
|
|
# Allow time for events
|
|
page.wait_for_timeout(2000)
|
|
|
|
# Check form events
|
|
datalayer = self._capture_datalayer(page)
|
|
if datalayer:
|
|
events_status = form_analyzer.check_form_events(datalayer)
|
|
self.report["form_analysis"]["events_status"] = events_status
|
|
else:
|
|
print(" No forms found on page")
|
|
|
|
def _run_checkout_audit(self, page):
|
|
"""Execute e-commerce checkout flow analysis."""
|
|
print("🛒 Analyzing checkout flow...")
|
|
|
|
checkout_analyzer = CheckoutFlowAnalyzer(page)
|
|
elements = checkout_analyzer.detect_checkout_elements()
|
|
|
|
self.report["checkout_analysis"]["elements_found"] = elements
|
|
|
|
# Log what we found
|
|
for element_type, found in elements.items():
|
|
if found:
|
|
print(f" Found {len(found)} {element_type} element(s)")
|
|
|
|
def _run_datalayer_audit(self, page):
|
|
"""Execute deep dataLayer analysis."""
|
|
print("📊 Analyzing dataLayer...")
|
|
|
|
datalayer = self._capture_datalayer(page)
|
|
monitored_events = self._capture_monitored_events(page)
|
|
|
|
if not datalayer:
|
|
self.report["datalayer_analysis"]["issues"] = ["dataLayer not found"]
|
|
return
|
|
|
|
validator = DataLayerValidator()
|
|
|
|
# Validate each event
|
|
for event in datalayer:
|
|
if isinstance(event, dict):
|
|
issues = validator.validate_event(event)
|
|
if issues:
|
|
self.report["datalayer_analysis"]["validation_issues"].extend(issues)
|
|
|
|
# Check sequence
|
|
sequence_issues = validator.validate_sequence(datalayer)
|
|
self.report["datalayer_analysis"]["sequence_issues"] = sequence_issues
|
|
|
|
# Check ecommerce clearing
|
|
clear_issues = validator.check_ecommerce_clear(datalayer)
|
|
self.report["datalayer_analysis"]["validation_issues"].extend(clear_issues)
|
|
|
|
# Store events
|
|
events = []
|
|
for i, item in enumerate(datalayer):
|
|
if isinstance(item, dict) and item.get("event"):
|
|
events.append({
|
|
"index": i,
|
|
"event": item.get("event"),
|
|
"has_ecommerce": "ecommerce" in item,
|
|
"params": list(item.keys()),
|
|
})
|
|
|
|
self.report["datalayer_analysis"]["events"] = events
|
|
print(f" Found {len(events)} events in dataLayer")
|
|
|
|
def _generate_recommendations(self):
|
|
"""Generate recommendations based on findings."""
|
|
recs = []
|
|
|
|
for issue in self.report["issues"]:
|
|
if issue["type"] == "container_missing":
|
|
recs.append({
|
|
"priority": "high",
|
|
"action": "Install GTM container",
|
|
"details": "Add GTM snippet to <head> section",
|
|
})
|
|
elif issue["type"] == "datalayer_order":
|
|
recs.append({
|
|
"priority": "medium",
|
|
"action": "Initialize dataLayer before GTM",
|
|
"details": "Add 'window.dataLayer = window.dataLayer || [];' before GTM",
|
|
})
|
|
|
|
# Form recommendations
|
|
if not self.report["form_analysis"]["forms_found"]:
|
|
pass # No forms to track
|
|
elif self.report["form_analysis"].get("events_status", {}).get("missing"):
|
|
missing = self.report["form_analysis"]["events_status"]["missing"]
|
|
recs.append({
|
|
"priority": "medium",
|
|
"action": "Implement form tracking events",
|
|
"details": f"Missing events: {', '.join(missing)}",
|
|
})
|
|
|
|
# DataLayer recommendations
|
|
validation_issues = self.report["datalayer_analysis"].get("validation_issues", [])
|
|
if validation_issues:
|
|
recs.append({
|
|
"priority": "high",
|
|
"action": "Fix dataLayer validation issues",
|
|
"details": f"{len(validation_issues)} issue(s) found in event structure",
|
|
})
|
|
|
|
# Tag coverage
|
|
destinations = set(r["destination"] for r in self.network_requests)
|
|
if "GA4" not in destinations:
|
|
recs.append({
|
|
"priority": "high",
|
|
"action": "Verify GA4 implementation",
|
|
"details": "No GA4 requests detected",
|
|
})
|
|
|
|
self.report["recommendations"] = recs
|
|
|
|
def _generate_checklist(self):
|
|
"""Generate audit checklist."""
|
|
self.report["checklist"] = {
|
|
"container_health": {
|
|
"gtm_installed": self.report["container_status"].get("installed", False),
|
|
"correct_container": self.expected_container in self.report["container_status"].get("containers", []) if self.expected_container else True,
|
|
"no_duplicates": len(self.report["container_status"].get("containers", [])) <= 1,
|
|
"correct_position": self.report["container_status"].get("position") == "head",
|
|
"datalayer_init_order": self.report["container_status"].get("datalayer_init_before_gtm", False),
|
|
},
|
|
"datalayer_quality": {
|
|
"initialized": self.report["container_status"].get("datalayer_initialized", False),
|
|
"events_present": len(self.report["datalayer_analysis"].get("events", [])) > 0,
|
|
"no_validation_errors": len(self.report["datalayer_analysis"].get("validation_issues", [])) == 0,
|
|
"correct_sequence": len(self.report["datalayer_analysis"].get("sequence_issues", [])) == 0,
|
|
},
|
|
"form_tracking": {
|
|
"forms_identifiable": all(
|
|
f.get("id") or f.get("name")
|
|
for f in self.report["form_analysis"].get("forms_found", [])
|
|
) if self.report["form_analysis"].get("forms_found") else True,
|
|
"form_events_present": len(
|
|
self.report["form_analysis"].get("events_status", {}).get("found", [])
|
|
) > 0 if self.report["form_analysis"].get("forms_found") else True,
|
|
},
|
|
"tag_firing": {
|
|
"ga4_active": any(r["destination"] == "GA4" for r in self.network_requests),
|
|
"requests_captured": len(self.network_requests) > 0,
|
|
},
|
|
}
|
|
|
|
def run_audit(self, journey="pageview"):
|
|
"""Execute the full audit workflow."""
|
|
print(f"🔍 Starting GTM audit for: {self.url}")
|
|
print(f" Journey type: {journey}")
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.launch(headless=self.headless)
|
|
context = browser.new_context(
|
|
viewport={"width": 1920, "height": 1080},
|
|
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) GTMAudit/1.0"
|
|
)
|
|
page = context.new_page()
|
|
self.page = page
|
|
|
|
self._setup_network_monitoring(page)
|
|
|
|
try:
|
|
print("📄 Loading page...")
|
|
page.goto(self.url, timeout=self.timeout, wait_until="networkidle")
|
|
page.wait_for_timeout(2000)
|
|
|
|
# Setup dataLayer monitoring after page load
|
|
try:
|
|
self._setup_datalayer_monitoring(page)
|
|
except:
|
|
pass
|
|
|
|
print("🏷️ Checking GTM container...")
|
|
self._check_gtm_container(page)
|
|
|
|
# Run journey-specific audits
|
|
if journey in ["scroll", "pageview", "full"]:
|
|
print("📜 Simulating scroll...")
|
|
self._simulate_scroll(page)
|
|
|
|
if journey in ["form", "full"]:
|
|
self._run_form_audit(page)
|
|
|
|
if journey in ["checkout", "full"]:
|
|
self._run_checkout_audit(page)
|
|
|
|
if journey in ["datalayer", "full"]:
|
|
self._run_datalayer_audit(page)
|
|
|
|
# Always do basic dataLayer capture
|
|
page.wait_for_timeout(2000)
|
|
self._run_datalayer_audit(page)
|
|
|
|
# Store network requests
|
|
self.report["network_requests"] = self.network_requests
|
|
self.report["tags_fired"] = list(set(r["destination"] for r in self.network_requests))
|
|
|
|
except Exception as e:
|
|
self.report["issues"].append({
|
|
"severity": "critical",
|
|
"type": "audit_error",
|
|
"message": str(e),
|
|
})
|
|
finally:
|
|
browser.close()
|
|
|
|
self._generate_recommendations()
|
|
self._generate_checklist()
|
|
|
|
print("✅ Audit complete!")
|
|
return self.report
|
|
|
|
def save_report(self, filepath):
|
|
"""Save report to JSON file."""
|
|
with open(filepath, "w", encoding="utf-8") as f:
|
|
json.dump(self.report, f, indent=2, ensure_ascii=False)
|
|
print(f"📝 Report saved to: {filepath}")
|
|
|
|
def print_summary(self):
|
|
"""Print audit summary to console."""
|
|
print("\n" + "="*60)
|
|
print("📋 GTM AUDIT SUMMARY")
|
|
print("="*60)
|
|
|
|
# Container
|
|
cs = self.report["container_status"]
|
|
print(f"\n🏷️ Container: {'✅ Installed' if cs.get('installed') else '❌ Not Found'}")
|
|
if cs.get("containers"):
|
|
print(f" IDs: {', '.join(cs['containers'])}")
|
|
|
|
# DataLayer
|
|
dl = self.report["datalayer_analysis"]
|
|
print(f"\n📊 DataLayer:")
|
|
print(f" Events found: {len(dl.get('events', []))}")
|
|
print(f" Validation issues: {len(dl.get('validation_issues', []))}")
|
|
|
|
# Forms
|
|
fa = self.report["form_analysis"]
|
|
if fa.get("forms_found"):
|
|
print(f"\n📝 Forms:")
|
|
print(f" Forms found: {len(fa['forms_found'])}")
|
|
print(f" Tracking issues: {len(fa.get('tracking_issues', []))}")
|
|
|
|
# Tags
|
|
print(f"\n🔥 Tags Fired: {', '.join(self.report['tags_fired']) if self.report['tags_fired'] else 'None detected'}")
|
|
|
|
# Issues
|
|
print(f"\n⚠️ Total Issues: {len(self.report['issues'])}")
|
|
for issue in self.report["issues"][:5]:
|
|
print(f" - [{issue['severity'].upper()}] {issue['message']}")
|
|
|
|
# Recommendations
|
|
print(f"\n💡 Recommendations: {len(self.report['recommendations'])}")
|
|
for rec in self.report["recommendations"][:3]:
|
|
print(f" - [{rec['priority'].upper()}] {rec['action']}")
|
|
|
|
print("\n" + "="*60)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="GTM Audit Tool")
|
|
parser.add_argument("--url", required=True, help="Target URL to audit")
|
|
parser.add_argument("--container", help="Expected GTM container ID (e.g., GTM-XXXXXX)")
|
|
parser.add_argument("--journey", default="full",
|
|
choices=["pageview", "scroll", "click", "form", "checkout", "datalayer", "full"],
|
|
help="Journey type to simulate")
|
|
parser.add_argument("--output", default="gtm_audit_report.json", help="Output file path")
|
|
parser.add_argument("--timeout", type=int, default=30000, help="Page load timeout (ms)")
|
|
parser.add_argument("--headless", action="store_true", default=True, help="Run headless")
|
|
|
|
args = parser.parse_args()
|
|
|
|
auditor = GTMAuditor(
|
|
url=args.url,
|
|
container_id=args.container,
|
|
timeout=args.timeout,
|
|
headless=args.headless,
|
|
)
|
|
|
|
report = auditor.run_audit(journey=args.journey)
|
|
auditor.save_report(args.output)
|
|
auditor.print_summary()
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|