-- ============================================================================
-- Matcher Learning System - Database Schema
-- ============================================================================
-- Purpose: Track iterative learning progress for PMS matcher
-- Author: Claude Code (Iterative Learning System)
-- Date: 2026-01-06
--
-- Tables:
--   1. matcher_iterations - Track each test cycle's metrics
--   2. matcher_results - Store every match result for analysis
--   3. matcher_patterns - Track learned patterns/rules
--   4. matcher_failures - Deep analysis of match failures
-- ============================================================================

USE quantix;

-- ============================================================================
-- Table 1: matcher_iterations
-- Purpose: Track performance metrics for each iteration cycle
-- ============================================================================

CREATE TABLE IF NOT EXISTS matcher_iterations (
    iteration_id INT PRIMARY KEY AUTO_INCREMENT,
    iteration_number INT NOT NULL,
    test_date DATETIME NOT NULL DEFAULT NOW(),

    -- Target
    source_pms VARCHAR(20) NOT NULL COMMENT 'cloudbeds, hostify, or both',

    -- Overall Statistics
    total_reservations INT NOT NULL,
    matched_count INT NOT NULL,
    unmatched_count INT NOT NULL,
    match_rate_percent DECIMAL(5,2) NOT NULL,

    -- Confidence Distribution
    high_confidence_count INT NOT NULL COMMENT '≥80%',
    medium_confidence_count INT NOT NULL COMMENT '60-79%',
    low_confidence_count INT NOT NULL COMMENT '<60%',
    avg_confidence DECIMAL(5,2) NOT NULL,

    -- Tier Breakdown
    tier0_count INT NOT NULL DEFAULT 0 COMMENT 'Combo matches',
    tier1_count INT NOT NULL DEFAULT 0 COMMENT 'Perfect (100%)',
    tier2_count INT NOT NULL DEFAULT 0 COMMENT 'Contains (90%)',
    tier3_count INT NOT NULL DEFAULT 0 COMMENT 'Similarity (70%)',
    tier4_count INT NOT NULL DEFAULT 0 COMMENT 'Street+unit (50-65%)',

    -- Learning Metadata
    patterns_added INT NOT NULL DEFAULT 0,
    patterns_removed INT NOT NULL DEFAULT 0,
    thresholds_adjusted INT NOT NULL DEFAULT 0,

    -- Improvement Tracking
    improvement_vs_previous DECIMAL(5,2) DEFAULT NULL COMMENT 'Percentage points gained',
    cumulative_improvement DECIMAL(5,2) DEFAULT NULL COMMENT 'vs iteration 1 baseline',

    -- Notes
    iteration_notes TEXT DEFAULT NULL,

    INDEX idx_iteration_number (iteration_number),
    INDEX idx_test_date (test_date)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='Track each learning iteration cycle';


-- ============================================================================
-- Table 2: matcher_results
-- Purpose: Store every single match result for deep analysis
-- ============================================================================

CREATE TABLE IF NOT EXISTS matcher_results (
    result_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    iteration_id INT NOT NULL,

    -- Reservation Identity
    reservation_id VARCHAR(32) NOT NULL COMMENT 'PK from reservation table',
    source_pms VARCHAR(20) NOT NULL COMMENT 'cloudbeds or hostify',
    reservation_text VARCHAR(500) NOT NULL COMMENT 'Original anuncio text',

    -- Match Result
    matched BOOLEAN NOT NULL COMMENT 'TRUE if matched, FALSE if unmatched',
    matched_property_id VARCHAR(32) DEFAULT NULL,
    matched_property_name VARCHAR(255) DEFAULT NULL,

    match_tier TINYINT DEFAULT NULL COMMENT '0-4',
    match_confidence TINYINT DEFAULT NULL COMMENT '0-100',
    match_pattern VARCHAR(150) DEFAULT NULL COMMENT 'Pattern description',
    match_explanation TEXT DEFAULT NULL COMMENT 'AI explanation',
    match_scores JSON DEFAULT NULL COMMENT '{"street":90, "building":85, ...}',

    -- Semantic Tokens (for learning)
    extracted_street VARCHAR(255) DEFAULT NULL,
    extracted_unit VARCHAR(50) DEFAULT NULL,
    extracted_building_number VARCHAR(50) DEFAULT NULL,
    semantic_tokens JSON DEFAULT NULL COMMENT 'Full token extraction',

    -- Closest Alternatives (for failures)
    closest_property_1 VARCHAR(255) DEFAULT NULL,
    closest_similarity_1 TINYINT DEFAULT NULL,
    closest_property_2 VARCHAR(255) DEFAULT NULL,
    closest_similarity_2 TINYINT DEFAULT NULL,
    closest_property_3 VARCHAR(255) DEFAULT NULL,
    closest_similarity_3 TINYINT DEFAULT NULL,

    -- User Feedback (manual override)
    user_verified BOOLEAN DEFAULT NULL COMMENT 'NULL=auto, TRUE=correct, FALSE=wrong',
    corrected_property_id VARCHAR(32) DEFAULT NULL,
    feedback_notes TEXT DEFAULT NULL,

    created_at DATETIME NOT NULL DEFAULT NOW(),

    UNIQUE KEY unique_iteration_reservation (iteration_id, reservation_id),
    INDEX idx_matched (matched),
    INDEX idx_confidence (match_confidence),
    INDEX idx_source_pms (source_pms),
    INDEX idx_property (matched_property_id),
    FOREIGN KEY (iteration_id) REFERENCES matcher_iterations(iteration_id) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='Store every match result for analysis';


-- ============================================================================
-- Table 3: matcher_patterns
-- Purpose: Track what patterns/rules were learned and applied
-- ============================================================================

CREATE TABLE IF NOT EXISTS matcher_patterns (
    pattern_id INT PRIMARY KEY AUTO_INCREMENT,
    iteration_discovered INT NOT NULL COMMENT 'When pattern was found',
    iteration_applied INT DEFAULT NULL COMMENT 'When added to matcher',
    iteration_removed INT DEFAULT NULL COMMENT 'When removed (if ineffective)',

    -- Pattern Identity
    pattern_name VARCHAR(100) NOT NULL COMMENT 'e.g., tlacotalpan_numeric_unit',
    pattern_type VARCHAR(50) NOT NULL COMMENT 'abbreviation, unit_format, threshold, semantic, combo',
    pattern_category VARCHAR(50) NOT NULL COMMENT 'street, unit, building, combo, similarity',

    -- Pattern Description
    pattern_description TEXT NOT NULL,

    -- Pattern Logic
    match_logic TEXT NOT NULL COMMENT 'PHP code or description',
    regex_pattern VARCHAR(500) DEFAULT NULL,
    lookup_table JSON DEFAULT NULL COMMENT 'For abbreviation mappings',

    -- Threshold Adjustments
    threshold_field VARCHAR(50) DEFAULT NULL COMMENT 'similarity, confidence, etc.',
    threshold_old TINYINT DEFAULT NULL,
    threshold_new TINYINT DEFAULT NULL,

    -- Effectiveness Tracking
    matches_found INT NOT NULL DEFAULT 0 COMMENT 'How many matches this pattern created',
    matches_verified INT NOT NULL DEFAULT 0 COMMENT 'How many were correct',
    matches_rejected INT NOT NULL DEFAULT 0 COMMENT 'How many were wrong',
    success_rate DECIMAL(5,2) NOT NULL DEFAULT 0.00,

    -- Status
    status VARCHAR(20) NOT NULL DEFAULT 'pending' COMMENT 'pending, active, removed, testing',

    -- Metadata
    discovered_from_failures INT NOT NULL COMMENT 'Count of failures that led to this pattern',
    example_reservation_text VARCHAR(500) DEFAULT NULL,

    added_date DATETIME NOT NULL DEFAULT NOW(),
    removed_date DATETIME DEFAULT NULL,
    last_tested DATETIME DEFAULT NULL,

    INDEX idx_iteration (iteration_discovered),
    INDEX idx_pattern_type (pattern_type),
    INDEX idx_status (status),
    FOREIGN KEY (iteration_discovered) REFERENCES matcher_iterations(iteration_id)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='Track learned patterns and rules';


-- ============================================================================
-- Table 4: matcher_failures
-- Purpose: Deep analysis of why reservations failed to match
-- ============================================================================

CREATE TABLE IF NOT EXISTS matcher_failures (
    failure_id BIGINT PRIMARY KEY AUTO_INCREMENT,
    iteration_id INT NOT NULL,

    -- What Failed
    reservation_id VARCHAR(32) NOT NULL,
    source_pms VARCHAR(20) NOT NULL,
    reservation_text VARCHAR(500) NOT NULL,

    -- Root Cause Analysis
    failure_reason VARCHAR(100) NOT NULL COMMENT 'street_not_found, unit_format_unknown, low_similarity, etc.',
    failure_category VARCHAR(50) NOT NULL COMMENT 'extraction, matching, threshold, missing_property',

    -- Extracted Data (what we got)
    extracted_street VARCHAR(255) DEFAULT NULL,
    extracted_unit VARCHAR(50) DEFAULT NULL,
    extracted_building VARCHAR(50) DEFAULT NULL,
    semantic_tokens JSON DEFAULT NULL,

    -- Closest Matches
    closest_match_property VARCHAR(255) DEFAULT NULL,
    closest_match_similarity TINYINT DEFAULT NULL,
    closest_match_why_failed TEXT DEFAULT NULL COMMENT 'Why didn\'t this match?',

    -- Learning Insights
    suggested_pattern_type VARCHAR(50) DEFAULT NULL COMMENT 'What kind of fix would help?',
    suggested_rule TEXT DEFAULT NULL COMMENT 'Specific fix suggestion',
    required_threshold_adjustment TINYINT DEFAULT NULL,

    -- Pattern Generation
    auto_generated_pattern_id INT DEFAULT NULL COMMENT 'If a pattern was created from this',

    -- Status
    resolved BOOLEAN NOT NULL DEFAULT FALSE COMMENT 'TRUE if later iteration matched it',
    resolved_iteration INT DEFAULT NULL,
    resolved_by_pattern INT DEFAULT NULL,

    created_at DATETIME NOT NULL DEFAULT NOW(),

    INDEX idx_iteration (iteration_id),
    INDEX idx_failure_reason (failure_reason),
    INDEX idx_resolved (resolved),
    INDEX idx_source_pms (source_pms),
    FOREIGN KEY (iteration_id) REFERENCES matcher_iterations(iteration_id) ON DELETE CASCADE,
    FOREIGN KEY (auto_generated_pattern_id) REFERENCES matcher_patterns(pattern_id) ON DELETE SET NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='Deep analysis of match failures';


-- ============================================================================
-- Verification Queries
-- ============================================================================

-- Check tables were created
SHOW TABLES LIKE 'matcher_%';

-- Display table structures
DESCRIBE matcher_iterations;
DESCRIBE matcher_results;
DESCRIBE matcher_patterns;
DESCRIBE matcher_failures;

-- ============================================================================
-- Success Message
-- ============================================================================

SELECT 'Matcher Learning Tables Created Successfully!' as Status,
       'Run Iteration 1 to start learning!' as NextStep;
