feat(reference-curator): Add Claude.ai Projects export format

Add claude-project/ folder with skill files formatted for upload to Claude.ai Projects (web interface): - reference-curator-complete.md: All 6 skills consolidated - INDEX.md: Overview and workflow documentation - Individual skill files (01-06) without YAML frontmatter Add --claude-ai option to install.sh: - Lists available files for upload - Optionally copies to custom destination directory - Provides upload instructions for Claude.ai Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-29 00:33:06 +07:00
parent 8762f68e6e
commit 243b9d851c
10 changed files with 1987 additions and 0 deletions
--- a/custom-skills/90-reference-curator/claude-project/06-markdown-exporter.md
+++ b/custom-skills/90-reference-curator/claude-project/06-markdown-exporter.md
@@ -0,0 +1,290 @@
+
+# Markdown Exporter
+
+Exports approved content as structured markdown files for Claude Projects or fine-tuning.
+
+## Export Configuration
+
+```yaml
+# ~/.config/reference-curator/export_config.yaml
+output:
+  base_path: ~/reference-library/exports/
+  
+  project_files:
+    structure: nested_by_topic  # flat | nested_by_topic | nested_by_source
+    index_file: INDEX.md
+    include_metadata: true
+  
+  fine_tuning:
+    format: jsonl
+    max_tokens_per_sample: 4096
+    include_system_prompt: true
+
+quality:
+  min_score_for_export: 0.80
+```
+
+## Export Workflow
+
+### Step 1: Query Approved Content
+
+```python
+def get_exportable_content(cursor, min_score=0.80, topic_filter=None):
+    """Get all approved content meeting quality threshold."""
+    sql = """
+    SELECT d.doc_id, d.title, d.url, 
+           dc.summary, dc.key_concepts, dc.code_snippets, dc.structured_content,
+           t.topic_slug, t.topic_name,
+           rl.quality_score, s.credibility_tier, s.vendor
+    FROM documents d
+    JOIN distilled_content dc ON d.doc_id = dc.doc_id
+    JOIN document_topics dt ON d.doc_id = dt.doc_id
+    JOIN topics t ON dt.topic_id = t.topic_id
+    JOIN review_logs rl ON dc.distill_id = rl.distill_id
+    JOIN sources s ON d.source_id = s.source_id
+    WHERE rl.decision = 'approve'
+    AND rl.quality_score >= %s
+    AND rl.review_id = (
+        SELECT MAX(review_id) FROM review_logs 
+        WHERE distill_id = dc.distill_id
+    )
+    """
+    params = [min_score]
+    
+    if topic_filter:
+        sql += " AND t.topic_slug IN (%s)" % ','.join(['%s'] * len(topic_filter))
+        params.extend(topic_filter)
+    
+    sql += " ORDER BY t.topic_slug, rl.quality_score DESC"
+    cursor.execute(sql, params)
+    return cursor.fetchall()
+```
+
+### Step 2: Organize by Structure
+
+**Nested by Topic (recommended):**
+```
+exports/
+├── INDEX.md
+├── prompt-engineering/
+│   ├── _index.md
+│   ├── 01-chain-of-thought.md
+│   ├── 02-few-shot-prompting.md
+│   └── 03-system-prompts.md
+├── claude-models/
+│   ├── _index.md
+│   ├── 01-model-comparison.md
+│   └── 02-context-windows.md
+└── agent-building/
+    ├── _index.md
+    └── 01-tool-use.md
+```
+
+**Flat Structure:**
+```
+exports/
+├── INDEX.md
+├── prompt-engineering-chain-of-thought.md
+├── prompt-engineering-few-shot.md
+└── claude-models-comparison.md
+```
+
+### Step 3: Generate Files
+
+**Document File Template:**
+```python
+def generate_document_file(doc, include_metadata=True):
+    content = []
+    
+    if include_metadata:
+        content.append("---")
+        content.append(f"title: {doc['title']}")
+        content.append(f"source: {doc['url']}")
+        content.append(f"vendor: {doc['vendor']}")
+        content.append(f"tier: {doc['credibility_tier']}")
+        content.append(f"quality_score: {doc['quality_score']:.2f}")
+        content.append(f"exported: {datetime.now().isoformat()}")
+        content.append("---")
+        content.append("")
+    
+    content.append(doc['structured_content'])
+    
+    return "\n".join(content)
+```
+
+**Topic Index Template:**
+```python
+def generate_topic_index(topic_slug, topic_name, documents):
+    content = [
+        f"# {topic_name}",
+        "",
+        f"This section contains {len(documents)} reference documents.",
+        "",
+        "## Contents",
+        ""
+    ]
+    
+    for i, doc in enumerate(documents, 1):
+        filename = generate_filename(doc['title'])
+        content.append(f"{i}. [{doc['title']}]({filename})")
+    
+    return "\n".join(content)
+```
+
+**Root INDEX Template:**
+```python
+def generate_root_index(topics_with_counts, export_date):
+    content = [
+        "# Reference Library",
+        "",
+        f"Exported: {export_date}",
+        "",
+        "## Topics",
+        ""
+    ]
+    
+    for topic in topics_with_counts:
+        content.append(f"- [{topic['name']}]({topic['slug']}/) ({topic['count']} documents)")
+    
+    content.extend([
+        "",
+        "## Quality Standards",
+        "",
+        "All documents in this library have:",
+        "- Passed quality review (score ≥ 0.80)",
+        "- Been distilled for conciseness",
+        "- Verified source attribution"
+    ])
+    
+    return "\n".join(content)
+```
+
+### Step 4: Write Files
+
+```python
+def export_project_files(content_list, config):
+    base_path = Path(config['output']['base_path'])
+    structure = config['output']['project_files']['structure']
+    
+    # Group by topic
+    by_topic = defaultdict(list)
+    for doc in content_list:
+        by_topic[doc['topic_slug']].append(doc)
+    
+    # Create directories and files
+    for topic_slug, docs in by_topic.items():
+        if structure == 'nested_by_topic':
+            topic_dir = base_path / topic_slug
+            topic_dir.mkdir(parents=True, exist_ok=True)
+            
+            # Write topic index
+            topic_index = generate_topic_index(topic_slug, docs[0]['topic_name'], docs)
+            (topic_dir / '_index.md').write_text(topic_index)
+            
+            # Write document files
+            for i, doc in enumerate(docs, 1):
+                filename = f"{i:02d}-{slugify(doc['title'])}.md"
+                file_content = generate_document_file(doc)
+                (topic_dir / filename).write_text(file_content)
+    
+    # Write root INDEX
+    topics_summary = [
+        {"slug": slug, "name": docs[0]['topic_name'], "count": len(docs)}
+        for slug, docs in by_topic.items()
+    ]
+    root_index = generate_root_index(topics_summary, datetime.now().isoformat())
+    (base_path / 'INDEX.md').write_text(root_index)
+```
+
+### Step 5: Fine-tuning Export (Optional)
+
+```python
+def export_fine_tuning_dataset(content_list, config):
+    """Export as JSONL for fine-tuning."""
+    output_path = Path(config['output']['base_path']) / 'fine_tuning.jsonl'
+    max_tokens = config['output']['fine_tuning']['max_tokens_per_sample']
+    
+    with open(output_path, 'w') as f:
+        for doc in content_list:
+            sample = {
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": "You are an expert on AI and prompt engineering."
+                    },
+                    {
+                        "role": "user", 
+                        "content": f"Explain {doc['title']}"
+                    },
+                    {
+                        "role": "assistant",
+                        "content": truncate_to_tokens(doc['structured_content'], max_tokens)
+                    }
+                ],
+                "metadata": {
+                    "source": doc['url'],
+                    "topic": doc['topic_slug'],
+                    "quality_score": doc['quality_score']
+                }
+            }
+            f.write(json.dumps(sample) + '\n')
+```
+
+### Step 6: Log Export Job
+
+```python
+def log_export_job(cursor, export_name, export_type, output_path, 
+                   topic_filter, total_docs, total_tokens):
+    sql = """
+    INSERT INTO export_jobs 
+    (export_name, export_type, output_format, topic_filter, output_path,
+     total_documents, total_tokens, status, started_at, completed_at)
+    VALUES (%s, %s, 'markdown', %s, %s, %s, %s, 'completed', NOW(), NOW())
+    """
+    cursor.execute(sql, (
+        export_name, export_type, 
+        json.dumps(topic_filter) if topic_filter else None,
+        str(output_path), total_docs, total_tokens
+    ))
+```
+
+## Cross-Reference Generation
+
+Link related documents:
+
+```python
+def add_cross_references(doc, all_docs):
+    """Find and link related documents."""
+    related = []
+    doc_concepts = set(c['term'].lower() for c in doc['key_concepts'])
+    
+    for other in all_docs:
+        if other['doc_id'] == doc['doc_id']:
+            continue
+        other_concepts = set(c['term'].lower() for c in other['key_concepts'])
+        overlap = len(doc_concepts & other_concepts)
+        if overlap >= 2:
+            related.append({
+                "title": other['title'],
+                "path": generate_relative_path(doc, other),
+                "overlap": overlap
+            })
+    
+    return sorted(related, key=lambda x: x['overlap'], reverse=True)[:5]
+```
+
+## Output Verification
+
+After export, verify:
+- [ ] All files readable and valid markdown
+- [ ] INDEX.md links resolve correctly
+- [ ] No broken cross-references
+- [ ] Total token count matches expectation
+- [ ] No duplicate content
+
+## Integration
+
+| From | Input | To |
+|------|-------|-----|
+| quality-reviewer | Approved content IDs | markdown-exporter |
+| markdown-exporter | Structured files | Project knowledge / Fine-tuning |