#!/usr/bin/env python3
"""
Synthesize research content and extract presentation topics
Analyzes research data to identify key themes, agenda items, and slide structure
"""

import json
import argparse
from typing import Dict, List, Any
from collections import Counter
import re

class ContentSynthesizer:
    """Analyzes research and generates presentation structure"""
    
    def __init__(self, research_data: Dict):
        self.research_data = research_data
        self.synthesis = {
            "metadata": {},
            "executive_summary": "",
            "key_topics": [],
            "agenda_items": [],
            "supporting_data": [],
            "recommendations": [],
            "slide_plan": []
        }
    
    def synthesize(self) -> Dict[str, Any]:
        """Execute complete synthesis pipeline"""
        self.extract_metadata()
        self.generate_executive_summary()
        self.extract_key_topics()
        self.derive_agenda_items()
        self.collect_supporting_data()
        self.extract_recommendations()
        self.create_slide_plan()
        
        return self.synthesis
    
    def extract_metadata(self):
        """Extract presentation metadata from research"""
        source = self.research_data.get("metadata", {})
        self.synthesis["metadata"] = {
            "title": source.get("title", "Research Presentation"),
            "date": source.get("last_edited", ""),
            "author": source.get("created_by", ""),
            "tags": source.get("tags", [])
        }
    
    def generate_executive_summary(self):
        """Create concise executive summary"""
        sections = self.research_data.get("content", {}).get("sections", [])
        
        # Find executive summary section or generate from content
        for section in sections:
            if "executive" in section.get("title", "").lower():
                self.synthesis["executive_summary"] = section.get("content", "")
                return
        
        # Generate summary from first paragraph of each section
        summary_parts = []
        for section in sections[:3]:  # First 3 sections
            content = section.get("content", "")
            first_sentence = content.split(".")[0] + "."
            summary_parts.append(first_sentence)
        
        self.synthesis["executive_summary"] = " ".join(summary_parts)
    
    def extract_key_topics(self):
        """Identify main topics from research"""
        sections = self.research_data.get("content", {}).get("sections", [])
        
        for section in sections:
            topic = {
                "title": section.get("title", ""),
                "importance": self.calculate_importance(section),
                "key_points": self.extract_key_points(section),
                "data_points": section.get("data_points", []),
                "speaker_notes": self.generate_speaker_notes(section)
            }
            
            # Include subsections as subtopics
            if section.get("subsections"):
                topic["subtopics"] = [
                    {
                        "title": sub.get("title", ""),
                        "key_points": self.extract_key_points(sub)
                    }
                    for sub in section["subsections"]
                ]
            
            self.synthesis["key_topics"].append(topic)
        
        # Sort by importance
        self.synthesis["key_topics"].sort(
            key=lambda x: x["importance"],
            reverse=True
        )
    
    def calculate_importance(self, section: Dict) -> float:
        """Calculate topic importance score"""
        score = 1.0
        
        # Higher level sections are more important
        if section.get("level") == 1:
            score += 0.5
        
        # Sections with data are more important
        if section.get("data_points"):
            score += 0.3 * len(section["data_points"])
        
        # Sections with action items are important
        if section.get("action_items"):
            score += 0.4
        
        # Length indicates detail
        content_length = len(section.get("content", ""))
        if content_length > 500:
            score += 0.2
        
        return score
    
    def extract_key_points(self, section: Dict) -> List[str]:
        """Extract bullet points from section content"""
        content = section.get("content", "")
        
        # Extract sentences that look like key points
        key_points = []
        sentences = content.split(".")
        
        for sentence in sentences:
            sentence = sentence.strip()
            # Look for important indicators
            if any(indicator in sentence.lower() for indicator in 
                   ["key", "important", "significant", "critical", "major"]):
                key_points.append(sentence + ".")
            # Or if it's short and punchy
            elif 10 < len(sentence) < 100:
                key_points.append(sentence + ".")
        
        # Add action items if present
        if section.get("action_items"):
            key_points.extend(section["action_items"])
        
        return key_points[:5]  # Limit to 5 points per slide
    
    def generate_speaker_notes(self, section: Dict) -> str:
        """Generate speaker notes for section"""
        content = section.get("content", "")
        
        # Take first 2-3 sentences as speaker notes
        sentences = content.split(".")[:3]
        notes = ". ".join(sentences).strip()
        
        # Add context about data if present
        if section.get("data_points"):
            notes += " Key metrics to highlight: "
            metrics = [f"{dp['metric']}: {dp['value']}" 
                      for dp in section["data_points"]]
            notes += ", ".join(metrics)
        
        return notes
    
    def derive_agenda_items(self):
        """Generate meeting agenda from topics"""
        # Create agenda from top topics
        for i, topic in enumerate(self.synthesis["key_topics"][:5]):
            agenda_item = {
                "order": i + 1,
                "title": topic["title"],
                "duration": self.estimate_duration(topic),
                "discussion_points": topic["key_points"][:3],
                "decision_required": self.needs_decision(topic)
            }
            self.synthesis["agenda_items"].append(agenda_item)
    
    def estimate_duration(self, topic: Dict) -> int:
        """Estimate discussion time in minutes"""
        base_time = 5
        
        # Add time for subtopics
        if topic.get("subtopics"):
            base_time += 2 * len(topic["subtopics"])
        
        # Add time for data discussion
        if topic.get("data_points"):
            base_time += 3
        
        # Cap at 15 minutes per topic
        return min(base_time, 15)
    
    def needs_decision(self, topic: Dict) -> bool:
        """Check if topic requires a decision"""
        indicators = ["recommend", "decide", "choose", "select", "approve"]
        content = " ".join(topic.get("key_points", []))
        
        return any(ind in content.lower() for ind in indicators)
    
    def collect_supporting_data(self):
        """Aggregate all data points from research"""
        sections = self.research_data.get("content", {}).get("sections", [])
        
        for section in sections:
            if section.get("data_points"):
                for data_point in section["data_points"]:
                    self.synthesis["supporting_data"].append({
                        "source": section["title"],
                        "metric": data_point["metric"],
                        "value": data_point["value"],
                        "context": section.get("title", "")
                    })
    
    def extract_recommendations(self):
        """Extract actionable recommendations"""
        sections = self.research_data.get("content", {}).get("sections", [])
        
        for section in sections:
            # Look for recommendation sections
            if "recommend" in section.get("title", "").lower():
                if section.get("action_items"):
                    self.synthesis["recommendations"].extend(
                        section["action_items"]
                    )
                else:
                    # Extract from content
                    content = section.get("content", "")
                    if "recommend" in content.lower():
                        # Simple extraction of recommendation sentences
                        sentences = content.split(".")
                        for sentence in sentences:
                            if "recommend" in sentence.lower():
                                self.synthesis["recommendations"].append(
                                    sentence.strip() + "."
                                )
    
    def create_slide_plan(self):
        """Generate detailed slide-by-slide plan"""
        slides = []
        
        # Title slide
        slides.append({
            "number": 1,
            "type": "title",
            "title": self.synthesis["metadata"]["title"],
            "subtitle": f"Research Synthesis - {self.synthesis['metadata']['date']}",
            "speaker_notes": self.synthesis["executive_summary"]
        })
        
        # Executive Summary slide
        slides.append({
            "number": 2,
            "type": "executive_summary",
            "title": "Executive Summary",
            "content": self.synthesis["executive_summary"],
            "key_points": self.synthesis["key_topics"][0]["key_points"][:3],
            "speaker_notes": "Overview of key findings and recommendations"
        })
        
        # Agenda slide
        if self.synthesis["agenda_items"]:
            slides.append({
                "number": 3,
                "type": "agenda",
                "title": "Agenda",
                "items": [item["title"] for item in self.synthesis["agenda_items"]],
                "total_duration": sum(item["duration"] for item in self.synthesis["agenda_items"]),
                "speaker_notes": "Today's discussion topics and time allocation"
            })
        
        # Content slides for each major topic
        slide_num = 4
        for topic in self.synthesis["key_topics"][:6]:  # Limit to 6 main topics
            slides.append({
                "number": slide_num,
                "type": "content",
                "title": topic["title"],
                "bullets": topic["key_points"],
                "data": topic.get("data_points", []),
                "speaker_notes": topic["speaker_notes"]
            })
            slide_num += 1
            
            # Add subtopic slides if important
            if topic.get("subtopics") and topic["importance"] > 1.5:
                for subtopic in topic["subtopics"][:2]:  # Max 2 subtopic slides
                    slides.append({
                        "number": slide_num,
                        "type": "content",
                        "title": subtopic["title"],
                        "bullets": subtopic["key_points"],
                        "speaker_notes": f"Deep dive into {subtopic['title']}"
                    })
                    slide_num += 1
        
        # Data summary slide if we have metrics
        if self.synthesis["supporting_data"]:
            slides.append({
                "number": slide_num,
                "type": "data_visualization",
                "title": "Key Metrics",
                "data_points": self.synthesis["supporting_data"][:8],
                "chart_type": "dashboard",
                "speaker_notes": "Summary of key performance indicators"
            })
            slide_num += 1
        
        # Recommendations slide
        if self.synthesis["recommendations"]:
            slides.append({
                "number": slide_num,
                "type": "recommendations",
                "title": "Recommendations",
                "items": self.synthesis["recommendations"][:5],
                "speaker_notes": "Proposed next steps based on research findings"
            })
            slide_num += 1
        
        # Thank you / Questions slide
        slides.append({
            "number": slide_num,
            "type": "closing",
            "title": "Thank You",
            "subtitle": "Questions & Discussion",
            "speaker_notes": "Open floor for questions and discussion"
        })
        
        self.synthesis["slide_plan"] = slides

def main():
    parser = argparse.ArgumentParser(
        description="Synthesize research content into presentation structure"
    )
    parser.add_argument(
        "research_file",
        help="Input research JSON file"
    )
    parser.add_argument(
        "--output",
        default="synthesis.json",
        help="Output synthesis JSON file"
    )
    parser.add_argument(
        "--max-slides",
        type=int,
        default=15,
        help="Maximum number of slides (default: 15)"
    )
    
    args = parser.parse_args()
    
    print(f"🔍 Synthesizing content from: {args.research_file}")
    
    # Load research data
    with open(args.research_file, 'r', encoding='utf-8') as f:
        research_data = json.load(f)
    
    # Synthesize content
    synthesizer = ContentSynthesizer(research_data)
    synthesis = synthesizer.synthesize()
    
    # Limit slides if specified
    if args.max_slides and len(synthesis["slide_plan"]) > args.max_slides:
        synthesis["slide_plan"] = synthesis["slide_plan"][:args.max_slides]
    
    # Save synthesis
    with open(args.output, 'w', encoding='utf-8') as f:
        json.dump(synthesis, f, indent=2, ensure_ascii=False)
    
    print(f"✅ Synthesis saved to: {args.output}")
    print(f"📊 Generated plan for {len(synthesis['slide_plan'])} slides")
    print(f"🎯 Identified {len(synthesis['key_topics'])} key topics")
    print(f"📝 Created {len(synthesis['agenda_items'])} agenda items")

if __name__ == "__main__":
    main()