feat(reference-curator): Add pipeline orchestrator and refactor skill format

Pipeline Orchestrator:
- Add 07-pipeline-orchestrator skill with code/CLAUDE.md and desktop/SKILL.md
- Add /reference-curator-pipeline slash command for full workflow automation
- Add pipeline_runs and pipeline_iteration_tracker tables to schema.sql
- Add v_pipeline_status and v_pipeline_iterations views
- Add pipeline_config.yaml configuration template
- Update AGENTS.md with Reference Curator Skills section
- Update claude-project files with pipeline documentation

Skill Format Refactoring:
- Extract YAML frontmatter from SKILL.md files to separate skill.yaml
- Add tools/ directories with MCP tool documentation
- Update SKILL-FORMAT-REQUIREMENTS.md with new structure
- Add migrate-skill-structure.py script for format conversion

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
2026-01-29 01:01:02 +07:00
parent 243b9d851c
commit d1cd1298a8
91 changed files with 2475 additions and 281 deletions

View File

@@ -0,0 +1,40 @@
# Pipeline Orchestrator Configuration
# Copy to ~/.config/reference-curator/pipeline_config.yaml
pipeline:
# Discovery stage
max_sources: 10
# Crawler stage
max_pages: 50
# Auto-approve settings
auto_approve: false
approval_threshold: 0.85
qa_loop:
# Maximum iterations before escalating to manual review
max_refactor_iterations: 3
max_deep_research_iterations: 2
max_total_iterations: 5
export:
# Default export format: project_files, fine_tuning, jsonl
default_format: project_files
# Include rejected documents in a separate folder
include_rejected: false
state:
# State management backend: mysql or file
backend: ${STATE_BACKEND:-file}
# File-based state directory (used when backend=file)
state_directory: ${REFERENCE_LIBRARY_PATH:-~/reference-library}/pipeline_state/
logging:
# Log level: DEBUG, INFO, WARNING, ERROR
level: INFO
# Save detailed logs for each run
save_run_logs: true

View File

@@ -187,7 +187,91 @@ CREATE TABLE export_jobs (
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- -----------------------------
-- 4. Tracking & Monitoring Tables
-- 4. Pipeline Orchestration Tables
-- -----------------------------
CREATE TABLE pipeline_runs (
run_id INT AUTO_INCREMENT PRIMARY KEY,
-- Input configuration
run_type ENUM('topic', 'urls', 'manifest') NOT NULL,
input_value TEXT NOT NULL,
-- Status tracking
status ENUM('running', 'completed', 'failed', 'paused') DEFAULT 'running',
current_stage ENUM('discovery', 'crawling', 'storing', 'distilling', 'reviewing', 'exporting') DEFAULT 'discovery',
-- Configuration options
options JSON,
/*
Example options JSON:
{
"max_sources": 10,
"max_pages": 50,
"auto_approve": false,
"threshold": 0.85,
"max_iterations": 3,
"export_format": "project_files"
}
*/
-- Pipeline statistics
stats JSON,
/*
Example stats JSON:
{
"sources_discovered": 5,
"pages_crawled": 45,
"documents_stored": 45,
"documents_distilled": 45,
"approved": 40,
"refactored": 8,
"deep_researched": 2,
"rejected": 3,
"needs_manual_review": 2
}
*/
-- Export information
export_path VARCHAR(500),
export_document_count INT,
-- Timing
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
completed_at TIMESTAMP NULL,
-- Error handling
error_message TEXT,
error_stage VARCHAR(50),
INDEX idx_status (status),
INDEX idx_run_type (run_type),
INDEX idx_started_at (started_at)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
CREATE TABLE pipeline_iteration_tracker (
tracker_id INT AUTO_INCREMENT PRIMARY KEY,
run_id INT NOT NULL,
doc_id INT NOT NULL,
-- Iteration counts
refactor_count INT DEFAULT 0,
deep_research_count INT DEFAULT 0,
-- Final status
final_decision ENUM('approved', 'rejected', 'needs_manual_review'),
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
FOREIGN KEY (run_id) REFERENCES pipeline_runs(run_id) ON DELETE CASCADE,
FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE,
UNIQUE INDEX idx_run_doc (run_id, doc_id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- -----------------------------
-- 5. Tracking & Monitoring Tables
-- -----------------------------
CREATE TABLE crawl_schedule (
@@ -218,7 +302,7 @@ CREATE TABLE change_detection (
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
-- -----------------------------
-- 5. Default Data
-- 6. Default Data
-- -----------------------------
INSERT INTO topics (topic_name, topic_slug, description) VALUES
@@ -240,7 +324,7 @@ INSERT INTO sources (source_name, source_type, base_url, credibility_tier, vendo
('Google AI Docs', 'official_docs', 'https://ai.google.dev/docs', 'tier1_official', 'google');
-- -----------------------------
-- 6. Useful Views
-- 7. Useful Views
-- -----------------------------
CREATE OR REPLACE VIEW v_pending_reviews AS
@@ -283,3 +367,39 @@ AND rl.review_id = (
WHERE distill_id = dc.distill_id
)
ORDER BY t.topic_slug, rl.quality_score DESC;
CREATE OR REPLACE VIEW v_pipeline_status AS
SELECT
pr.run_id,
pr.run_type,
pr.input_value,
pr.status,
pr.current_stage,
pr.started_at,
pr.completed_at,
TIMESTAMPDIFF(MINUTE, pr.started_at, COALESCE(pr.completed_at, NOW())) as duration_minutes,
JSON_EXTRACT(pr.stats, '$.sources_discovered') as sources_discovered,
JSON_EXTRACT(pr.stats, '$.pages_crawled') as pages_crawled,
JSON_EXTRACT(pr.stats, '$.documents_stored') as documents_stored,
JSON_EXTRACT(pr.stats, '$.approved') as approved,
JSON_EXTRACT(pr.stats, '$.rejected') as rejected,
JSON_EXTRACT(pr.stats, '$.needs_manual_review') as needs_manual_review,
pr.export_path,
pr.error_message
FROM pipeline_runs pr
ORDER BY pr.started_at DESC;
CREATE OR REPLACE VIEW v_pipeline_iterations AS
SELECT
pit.run_id,
pr.input_value,
d.title,
d.url,
pit.refactor_count,
pit.deep_research_count,
(pit.refactor_count + pit.deep_research_count) as total_iterations,
pit.final_decision
FROM pipeline_iteration_tracker pit
JOIN pipeline_runs pr ON pit.run_id = pr.run_id
JOIN documents d ON pit.doc_id = d.doc_id
ORDER BY pit.run_id DESC, pit.total_iterations DESC;