#!/usr/bin/env python3 """ Synthesize research content and extract presentation topics Analyzes research data to identify key themes, agenda items, and slide structure """ import json import argparse from typing import Dict, List, Any from collections import Counter import re class ContentSynthesizer: """Analyzes research and generates presentation structure""" def __init__(self, research_data: Dict): self.research_data = research_data self.synthesis = { "metadata": {}, "executive_summary": "", "key_topics": [], "agenda_items": [], "supporting_data": [], "recommendations": [], "slide_plan": [] } def synthesize(self) -> Dict[str, Any]: """Execute complete synthesis pipeline""" self.extract_metadata() self.generate_executive_summary() self.extract_key_topics() self.derive_agenda_items() self.collect_supporting_data() self.extract_recommendations() self.create_slide_plan() return self.synthesis def extract_metadata(self): """Extract presentation metadata from research""" source = self.research_data.get("metadata", {}) self.synthesis["metadata"] = { "title": source.get("title", "Research Presentation"), "date": source.get("last_edited", ""), "author": source.get("created_by", ""), "tags": source.get("tags", []) } def generate_executive_summary(self): """Create concise executive summary""" sections = self.research_data.get("content", {}).get("sections", []) # Find executive summary section or generate from content for section in sections: if "executive" in section.get("title", "").lower(): self.synthesis["executive_summary"] = section.get("content", "") return # Generate summary from first paragraph of each section summary_parts = [] for section in sections[:3]: # First 3 sections content = section.get("content", "") first_sentence = content.split(".")[0] + "." summary_parts.append(first_sentence) self.synthesis["executive_summary"] = " ".join(summary_parts) def extract_key_topics(self): """Identify main topics from research""" sections = self.research_data.get("content", {}).get("sections", []) for section in sections: topic = { "title": section.get("title", ""), "importance": self.calculate_importance(section), "key_points": self.extract_key_points(section), "data_points": section.get("data_points", []), "speaker_notes": self.generate_speaker_notes(section) } # Include subsections as subtopics if section.get("subsections"): topic["subtopics"] = [ { "title": sub.get("title", ""), "key_points": self.extract_key_points(sub) } for sub in section["subsections"] ] self.synthesis["key_topics"].append(topic) # Sort by importance self.synthesis["key_topics"].sort( key=lambda x: x["importance"], reverse=True ) def calculate_importance(self, section: Dict) -> float: """Calculate topic importance score""" score = 1.0 # Higher level sections are more important if section.get("level") == 1: score += 0.5 # Sections with data are more important if section.get("data_points"): score += 0.3 * len(section["data_points"]) # Sections with action items are important if section.get("action_items"): score += 0.4 # Length indicates detail content_length = len(section.get("content", "")) if content_length > 500: score += 0.2 return score def extract_key_points(self, section: Dict) -> List[str]: """Extract bullet points from section content""" content = section.get("content", "") # Extract sentences that look like key points key_points = [] sentences = content.split(".") for sentence in sentences: sentence = sentence.strip() # Look for important indicators if any(indicator in sentence.lower() for indicator in ["key", "important", "significant", "critical", "major"]): key_points.append(sentence + ".") # Or if it's short and punchy elif 10 < len(sentence) < 100: key_points.append(sentence + ".") # Add action items if present if section.get("action_items"): key_points.extend(section["action_items"]) return key_points[:5] # Limit to 5 points per slide def generate_speaker_notes(self, section: Dict) -> str: """Generate speaker notes for section""" content = section.get("content", "") # Take first 2-3 sentences as speaker notes sentences = content.split(".")[:3] notes = ". ".join(sentences).strip() # Add context about data if present if section.get("data_points"): notes += " Key metrics to highlight: " metrics = [f"{dp['metric']}: {dp['value']}" for dp in section["data_points"]] notes += ", ".join(metrics) return notes def derive_agenda_items(self): """Generate meeting agenda from topics""" # Create agenda from top topics for i, topic in enumerate(self.synthesis["key_topics"][:5]): agenda_item = { "order": i + 1, "title": topic["title"], "duration": self.estimate_duration(topic), "discussion_points": topic["key_points"][:3], "decision_required": self.needs_decision(topic) } self.synthesis["agenda_items"].append(agenda_item) def estimate_duration(self, topic: Dict) -> int: """Estimate discussion time in minutes""" base_time = 5 # Add time for subtopics if topic.get("subtopics"): base_time += 2 * len(topic["subtopics"]) # Add time for data discussion if topic.get("data_points"): base_time += 3 # Cap at 15 minutes per topic return min(base_time, 15) def needs_decision(self, topic: Dict) -> bool: """Check if topic requires a decision""" indicators = ["recommend", "decide", "choose", "select", "approve"] content = " ".join(topic.get("key_points", [])) return any(ind in content.lower() for ind in indicators) def collect_supporting_data(self): """Aggregate all data points from research""" sections = self.research_data.get("content", {}).get("sections", []) for section in sections: if section.get("data_points"): for data_point in section["data_points"]: self.synthesis["supporting_data"].append({ "source": section["title"], "metric": data_point["metric"], "value": data_point["value"], "context": section.get("title", "") }) def extract_recommendations(self): """Extract actionable recommendations""" sections = self.research_data.get("content", {}).get("sections", []) for section in sections: # Look for recommendation sections if "recommend" in section.get("title", "").lower(): if section.get("action_items"): self.synthesis["recommendations"].extend( section["action_items"] ) else: # Extract from content content = section.get("content", "") if "recommend" in content.lower(): # Simple extraction of recommendation sentences sentences = content.split(".") for sentence in sentences: if "recommend" in sentence.lower(): self.synthesis["recommendations"].append( sentence.strip() + "." ) def create_slide_plan(self): """Generate detailed slide-by-slide plan""" slides = [] # Title slide slides.append({ "number": 1, "type": "title", "title": self.synthesis["metadata"]["title"], "subtitle": f"Research Synthesis - {self.synthesis['metadata']['date']}", "speaker_notes": self.synthesis["executive_summary"] }) # Executive Summary slide slides.append({ "number": 2, "type": "executive_summary", "title": "Executive Summary", "content": self.synthesis["executive_summary"], "key_points": self.synthesis["key_topics"][0]["key_points"][:3], "speaker_notes": "Overview of key findings and recommendations" }) # Agenda slide if self.synthesis["agenda_items"]: slides.append({ "number": 3, "type": "agenda", "title": "Agenda", "items": [item["title"] for item in self.synthesis["agenda_items"]], "total_duration": sum(item["duration"] for item in self.synthesis["agenda_items"]), "speaker_notes": "Today's discussion topics and time allocation" }) # Content slides for each major topic slide_num = 4 for topic in self.synthesis["key_topics"][:6]: # Limit to 6 main topics slides.append({ "number": slide_num, "type": "content", "title": topic["title"], "bullets": topic["key_points"], "data": topic.get("data_points", []), "speaker_notes": topic["speaker_notes"] }) slide_num += 1 # Add subtopic slides if important if topic.get("subtopics") and topic["importance"] > 1.5: for subtopic in topic["subtopics"][:2]: # Max 2 subtopic slides slides.append({ "number": slide_num, "type": "content", "title": subtopic["title"], "bullets": subtopic["key_points"], "speaker_notes": f"Deep dive into {subtopic['title']}" }) slide_num += 1 # Data summary slide if we have metrics if self.synthesis["supporting_data"]: slides.append({ "number": slide_num, "type": "data_visualization", "title": "Key Metrics", "data_points": self.synthesis["supporting_data"][:8], "chart_type": "dashboard", "speaker_notes": "Summary of key performance indicators" }) slide_num += 1 # Recommendations slide if self.synthesis["recommendations"]: slides.append({ "number": slide_num, "type": "recommendations", "title": "Recommendations", "items": self.synthesis["recommendations"][:5], "speaker_notes": "Proposed next steps based on research findings" }) slide_num += 1 # Thank you / Questions slide slides.append({ "number": slide_num, "type": "closing", "title": "Thank You", "subtitle": "Questions & Discussion", "speaker_notes": "Open floor for questions and discussion" }) self.synthesis["slide_plan"] = slides def main(): parser = argparse.ArgumentParser( description="Synthesize research content into presentation structure" ) parser.add_argument( "research_file", help="Input research JSON file" ) parser.add_argument( "--output", default="synthesis.json", help="Output synthesis JSON file" ) parser.add_argument( "--max-slides", type=int, default=15, help="Maximum number of slides (default: 15)" ) args = parser.parse_args() print(f"🔍 Synthesizing content from: {args.research_file}") # Load research data with open(args.research_file, 'r', encoding='utf-8') as f: research_data = json.load(f) # Synthesize content synthesizer = ContentSynthesizer(research_data) synthesis = synthesizer.synthesize() # Limit slides if specified if args.max_slides and len(synthesis["slide_plan"]) > args.max_slides: synthesis["slide_plan"] = synthesis["slide_plan"][:args.max_slides] # Save synthesis with open(args.output, 'w', encoding='utf-8') as f: json.dump(synthesis, f, indent=2, ensure_ascii=False) print(f"✅ Synthesis saved to: {args.output}") print(f"📊 Generated plan for {len(synthesis['slide_plan'])} slides") print(f"🎯 Identified {len(synthesis['key_topics'])} key topics") print(f"📝 Created {len(synthesis['agenda_items'])} agenda items") if __name__ == "__main__": main()