feat(reference-curator): Add pipeline orchestrator and refactor skill format
Pipeline Orchestrator: - Add 07-pipeline-orchestrator skill with code/CLAUDE.md and desktop/SKILL.md - Add /reference-curator-pipeline slash command for full workflow automation - Add pipeline_runs and pipeline_iteration_tracker tables to schema.sql - Add v_pipeline_status and v_pipeline_iterations views - Add pipeline_config.yaml configuration template - Update AGENTS.md with Reference Curator Skills section - Update claude-project files with pipeline documentation Skill Format Refactoring: - Extract YAML frontmatter from SKILL.md files to separate skill.yaml - Add tools/ directories with MCP tool documentation - Update SKILL-FORMAT-REQUIREMENTS.md with new structure - Add migrate-skill-structure.py script for format conversion Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,40 @@
|
||||
# Pipeline Orchestrator Configuration
|
||||
# Copy to ~/.config/reference-curator/pipeline_config.yaml
|
||||
|
||||
pipeline:
|
||||
# Discovery stage
|
||||
max_sources: 10
|
||||
|
||||
# Crawler stage
|
||||
max_pages: 50
|
||||
|
||||
# Auto-approve settings
|
||||
auto_approve: false
|
||||
approval_threshold: 0.85
|
||||
|
||||
qa_loop:
|
||||
# Maximum iterations before escalating to manual review
|
||||
max_refactor_iterations: 3
|
||||
max_deep_research_iterations: 2
|
||||
max_total_iterations: 5
|
||||
|
||||
export:
|
||||
# Default export format: project_files, fine_tuning, jsonl
|
||||
default_format: project_files
|
||||
|
||||
# Include rejected documents in a separate folder
|
||||
include_rejected: false
|
||||
|
||||
state:
|
||||
# State management backend: mysql or file
|
||||
backend: ${STATE_BACKEND:-file}
|
||||
|
||||
# File-based state directory (used when backend=file)
|
||||
state_directory: ${REFERENCE_LIBRARY_PATH:-~/reference-library}/pipeline_state/
|
||||
|
||||
logging:
|
||||
# Log level: DEBUG, INFO, WARNING, ERROR
|
||||
level: INFO
|
||||
|
||||
# Save detailed logs for each run
|
||||
save_run_logs: true
|
||||
@@ -187,7 +187,91 @@ CREATE TABLE export_jobs (
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
|
||||
|
||||
-- -----------------------------
|
||||
-- 4. Tracking & Monitoring Tables
|
||||
-- 4. Pipeline Orchestration Tables
|
||||
-- -----------------------------
|
||||
|
||||
CREATE TABLE pipeline_runs (
|
||||
run_id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
|
||||
-- Input configuration
|
||||
run_type ENUM('topic', 'urls', 'manifest') NOT NULL,
|
||||
input_value TEXT NOT NULL,
|
||||
|
||||
-- Status tracking
|
||||
status ENUM('running', 'completed', 'failed', 'paused') DEFAULT 'running',
|
||||
current_stage ENUM('discovery', 'crawling', 'storing', 'distilling', 'reviewing', 'exporting') DEFAULT 'discovery',
|
||||
|
||||
-- Configuration options
|
||||
options JSON,
|
||||
/*
|
||||
Example options JSON:
|
||||
{
|
||||
"max_sources": 10,
|
||||
"max_pages": 50,
|
||||
"auto_approve": false,
|
||||
"threshold": 0.85,
|
||||
"max_iterations": 3,
|
||||
"export_format": "project_files"
|
||||
}
|
||||
*/
|
||||
|
||||
-- Pipeline statistics
|
||||
stats JSON,
|
||||
/*
|
||||
Example stats JSON:
|
||||
{
|
||||
"sources_discovered": 5,
|
||||
"pages_crawled": 45,
|
||||
"documents_stored": 45,
|
||||
"documents_distilled": 45,
|
||||
"approved": 40,
|
||||
"refactored": 8,
|
||||
"deep_researched": 2,
|
||||
"rejected": 3,
|
||||
"needs_manual_review": 2
|
||||
}
|
||||
*/
|
||||
|
||||
-- Export information
|
||||
export_path VARCHAR(500),
|
||||
export_document_count INT,
|
||||
|
||||
-- Timing
|
||||
started_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
completed_at TIMESTAMP NULL,
|
||||
|
||||
-- Error handling
|
||||
error_message TEXT,
|
||||
error_stage VARCHAR(50),
|
||||
|
||||
INDEX idx_status (status),
|
||||
INDEX idx_run_type (run_type),
|
||||
INDEX idx_started_at (started_at)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
|
||||
|
||||
CREATE TABLE pipeline_iteration_tracker (
|
||||
tracker_id INT AUTO_INCREMENT PRIMARY KEY,
|
||||
run_id INT NOT NULL,
|
||||
doc_id INT NOT NULL,
|
||||
|
||||
-- Iteration counts
|
||||
refactor_count INT DEFAULT 0,
|
||||
deep_research_count INT DEFAULT 0,
|
||||
|
||||
-- Final status
|
||||
final_decision ENUM('approved', 'rejected', 'needs_manual_review'),
|
||||
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
|
||||
|
||||
FOREIGN KEY (run_id) REFERENCES pipeline_runs(run_id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (doc_id) REFERENCES documents(doc_id) ON DELETE CASCADE,
|
||||
|
||||
UNIQUE INDEX idx_run_doc (run_id, doc_id)
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
|
||||
|
||||
-- -----------------------------
|
||||
-- 5. Tracking & Monitoring Tables
|
||||
-- -----------------------------
|
||||
|
||||
CREATE TABLE crawl_schedule (
|
||||
@@ -218,7 +302,7 @@ CREATE TABLE change_detection (
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
|
||||
|
||||
-- -----------------------------
|
||||
-- 5. Default Data
|
||||
-- 6. Default Data
|
||||
-- -----------------------------
|
||||
|
||||
INSERT INTO topics (topic_name, topic_slug, description) VALUES
|
||||
@@ -240,7 +324,7 @@ INSERT INTO sources (source_name, source_type, base_url, credibility_tier, vendo
|
||||
('Google AI Docs', 'official_docs', 'https://ai.google.dev/docs', 'tier1_official', 'google');
|
||||
|
||||
-- -----------------------------
|
||||
-- 6. Useful Views
|
||||
-- 7. Useful Views
|
||||
-- -----------------------------
|
||||
|
||||
CREATE OR REPLACE VIEW v_pending_reviews AS
|
||||
@@ -283,3 +367,39 @@ AND rl.review_id = (
|
||||
WHERE distill_id = dc.distill_id
|
||||
)
|
||||
ORDER BY t.topic_slug, rl.quality_score DESC;
|
||||
|
||||
CREATE OR REPLACE VIEW v_pipeline_status AS
|
||||
SELECT
|
||||
pr.run_id,
|
||||
pr.run_type,
|
||||
pr.input_value,
|
||||
pr.status,
|
||||
pr.current_stage,
|
||||
pr.started_at,
|
||||
pr.completed_at,
|
||||
TIMESTAMPDIFF(MINUTE, pr.started_at, COALESCE(pr.completed_at, NOW())) as duration_minutes,
|
||||
JSON_EXTRACT(pr.stats, '$.sources_discovered') as sources_discovered,
|
||||
JSON_EXTRACT(pr.stats, '$.pages_crawled') as pages_crawled,
|
||||
JSON_EXTRACT(pr.stats, '$.documents_stored') as documents_stored,
|
||||
JSON_EXTRACT(pr.stats, '$.approved') as approved,
|
||||
JSON_EXTRACT(pr.stats, '$.rejected') as rejected,
|
||||
JSON_EXTRACT(pr.stats, '$.needs_manual_review') as needs_manual_review,
|
||||
pr.export_path,
|
||||
pr.error_message
|
||||
FROM pipeline_runs pr
|
||||
ORDER BY pr.started_at DESC;
|
||||
|
||||
CREATE OR REPLACE VIEW v_pipeline_iterations AS
|
||||
SELECT
|
||||
pit.run_id,
|
||||
pr.input_value,
|
||||
d.title,
|
||||
d.url,
|
||||
pit.refactor_count,
|
||||
pit.deep_research_count,
|
||||
(pit.refactor_count + pit.deep_research_count) as total_iterations,
|
||||
pit.final_decision
|
||||
FROM pipeline_iteration_tracker pit
|
||||
JOIN pipeline_runs pr ON pit.run_id = pr.run_id
|
||||
JOIN documents d ON pit.doc_id = d.doc_id
|
||||
ORDER BY pit.run_id DESC, pit.total_iterations DESC;
|
||||
|
||||
Reference in New Issue
Block a user