-- ============================================================================
-- CFDI Matcher Database Schema
-- Created: 2026-01-14
-- Purpose: Invoice-Payment reconciliation system with iterative learning
-- ============================================================================

-- Drop existing tables if recreating
-- DROP TABLE IF EXISTS cfdi_matcher_results;
-- DROP TABLE IF EXISTS cfdi_matcher_failures;
-- DROP TABLE IF EXISTS cfdi_matcher_patterns;
-- DROP TABLE IF EXISTS cfdi_matcher_iterations;

-- ============================================================================
-- Table: cfdi_matcher_iterations
-- Purpose: Track each matching run with aggregate statistics
-- ============================================================================

CREATE TABLE IF NOT EXISTS cfdi_matcher_iterations (
    iteration_id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
    iteration_number INT NOT NULL,
    test_date DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,

    -- Scope of this iteration
    source_type VARCHAR(20) NOT NULL DEFAULT 'both' COMMENT 'invoices, deposits, both',
    date_range_start DATE NULL COMMENT 'Filter invoices/deposits from this date',
    date_range_end DATE NULL COMMENT 'Filter invoices/deposits until this date',

    -- Counts
    total_invoices INT NOT NULL DEFAULT 0,
    total_deposits INT NOT NULL DEFAULT 0,
    matched_count INT NOT NULL DEFAULT 0,
    unmatched_invoices INT NOT NULL DEFAULT 0,
    unmatched_deposits INT NOT NULL DEFAULT 0,

    -- Match quality distribution
    match_rate_percent DECIMAL(5,2) NOT NULL DEFAULT 0,
    high_confidence_count INT NOT NULL DEFAULT 0 COMMENT 'Confidence >= 80',
    medium_confidence_count INT NOT NULL DEFAULT 0 COMMENT 'Confidence 50-79',
    low_confidence_count INT NOT NULL DEFAULT 0 COMMENT 'Confidence < 50',
    avg_confidence DECIMAL(5,2) NOT NULL DEFAULT 0,

    -- Tier breakdown
    tier0_count INT NOT NULL DEFAULT 0 COMMENT 'Exact matches 95-100%',
    tier1_count INT NOT NULL DEFAULT 0 COMMENT 'Strong matches 80-94%',
    tier2_count INT NOT NULL DEFAULT 0 COMMENT 'Probable matches 65-79%',
    tier3_count INT NOT NULL DEFAULT 0 COMMENT 'Possible matches 50-64%',
    tier4_count INT NOT NULL DEFAULT 0 COMMENT 'Weak matches 40-49%',

    -- Learning metrics
    patterns_added INT NOT NULL DEFAULT 0,
    patterns_removed INT NOT NULL DEFAULT 0,
    thresholds_adjusted INT NOT NULL DEFAULT 0,
    improvement_vs_previous DECIMAL(5,2) NULL COMMENT 'Percentage point improvement',
    cumulative_improvement DECIMAL(5,2) NULL COMMENT 'Total improvement vs iteration 1',

    -- Financial metrics
    total_invoice_amount DECIMAL(18,2) NULL COMMENT 'Sum of all invoice totals',
    total_deposit_amount DECIMAL(18,2) NULL COMMENT 'Sum of all deposits',
    matched_invoice_amount DECIMAL(18,2) NULL COMMENT 'Sum of matched invoice amounts',
    matched_deposit_amount DECIMAL(18,2) NULL COMMENT 'Sum of matched deposit amounts',
    reconciliation_gap DECIMAL(18,2) NULL COMMENT 'Difference between matched amounts',

    -- Notes and metadata
    iteration_notes TEXT NULL,
    execution_time_seconds INT NULL,
    created_by VARCHAR(32) DEFAULT 'system',

    -- Indexes
    INDEX idx_iteration_number (iteration_number),
    INDEX idx_test_date (test_date),
    INDEX idx_match_rate (match_rate_percent)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
COMMENT='Tracks each CFDI matcher iteration run with statistics';

-- ============================================================================
-- Table: cfdi_matcher_results
-- Purpose: Store individual match results (invoice → deposit mapping)
-- ============================================================================

CREATE TABLE IF NOT EXISTS cfdi_matcher_results (
    result_id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY,
    iteration_id INT NOT NULL,

    -- Invoice info
    invoice_id VARCHAR(36) NOT NULL COMMENT 'eleyeme_cfdi_emitido_id',
    invoice_uuid VARCHAR(50) NULL,
    invoice_date DATE NOT NULL,
    invoice_amount DECIMAL(18,2) NOT NULL,
    invoice_client_name VARCHAR(200) NULL,
    invoice_client_rfc VARCHAR(13) NULL,

    -- Deposit info (if matched)
    matched TINYINT(1) NOT NULL DEFAULT 0,
    deposit_id VARCHAR(32) NULL COMMENT 'banco_cuenta_mov_id',
    deposit_date DATETIME NULL,
    deposit_amount DECIMAL(16,2) NULL,
    deposit_reference TEXT NULL COMMENT 'numero field from banco_cuenta_mov',

    -- Match quality
    match_tier TINYINT NULL COMMENT '0-4, lower is better',
    match_confidence TINYINT NULL COMMENT '0-100',
    match_pattern VARCHAR(150) NULL COMMENT 'Which pattern matched',
    match_explanation TEXT NULL COMMENT 'Human-readable explanation',

    -- Scoring dimensions (JSON)
    match_scores JSON NULL COMMENT 'Breakdown: amount_score, date_score, client_score, text_score',

    -- Date/amount analysis
    days_between_invoice_deposit INT NULL COMMENT 'Payment delay in days',
    amount_difference DECIMAL(18,2) NULL COMMENT 'deposit - invoice',
    amount_difference_percent DECIMAL(5,2) NULL COMMENT 'Percentage variance',

    -- Text extraction
    extracted_client_name VARCHAR(200) NULL COMMENT 'Extracted from deposit reference',
    extracted_invoice_number VARCHAR(50) NULL COMMENT 'Extracted from deposit reference',
    text_similarity_score TINYINT NULL COMMENT '0-100 text match score',

    -- Alternative matches (top 3 if no clear winner)
    alternative_deposit_1_id VARCHAR(32) NULL,
    alternative_deposit_1_score TINYINT NULL,
    alternative_deposit_2_id VARCHAR(32) NULL,
    alternative_deposit_2_score TINYINT NULL,
    alternative_deposit_3_id VARCHAR(32) NULL,
    alternative_deposit_3_score TINYINT NULL,

    -- User verification
    user_verified TINYINT(1) NULL DEFAULT NULL COMMENT 'NULL=pending, 1=correct, 0=incorrect',
    corrected_deposit_id VARCHAR(32) NULL COMMENT 'If user corrected the match',
    feedback_notes TEXT NULL,
    verified_by VARCHAR(32) NULL,
    verified_at DATETIME NULL,

    -- Status tracking
    auto_applied TINYINT(1) NOT NULL DEFAULT 0 COMMENT 'Was this auto-linked to deposit?',
    needs_review TINYINT(1) NOT NULL DEFAULT 0 COMMENT 'Flag for manual review',

    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,

    -- Indexes
    INDEX idx_iteration (iteration_id),
    INDEX idx_invoice (invoice_id),
    INDEX idx_deposit (deposit_id),
    INDEX idx_matched (matched),
    INDEX idx_confidence (match_confidence),
    INDEX idx_tier (match_tier),
    INDEX idx_date_invoice (invoice_date),
    INDEX idx_verified (user_verified),
    INDEX idx_needs_review (needs_review),

    FOREIGN KEY (iteration_id) REFERENCES cfdi_matcher_iterations(iteration_id)
        ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
COMMENT='Individual invoice-deposit match results';

-- ============================================================================
-- Table: cfdi_matcher_patterns
-- Purpose: Discovered matching patterns with success rates
-- ============================================================================

CREATE TABLE IF NOT EXISTS cfdi_matcher_patterns (
    pattern_id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,

    -- Discovery tracking
    iteration_discovered INT NOT NULL,
    iteration_applied INT NULL COMMENT 'When pattern was first applied',
    iteration_removed INT NULL COMMENT 'If pattern was deprecated',

    -- Pattern identification
    pattern_name VARCHAR(100) NOT NULL UNIQUE,
    pattern_type VARCHAR(50) NOT NULL COMMENT 'exact_amount, date_window, client_fuzzy, etc',
    pattern_category VARCHAR(50) NOT NULL COMMENT 'amount, date, client, text, hybrid',
    pattern_description TEXT NOT NULL,

    -- Pattern logic
    match_logic TEXT NOT NULL COMMENT 'Pseudocode or description',

    -- Parameters (JSON for flexibility)
    parameters JSON NULL COMMENT 'e.g., {"date_window_days": 7, "amount_tolerance_pct": 1}',

    -- Text patterns
    regex_pattern VARCHAR(500) NULL COMMENT 'If text-based matching',
    lookup_table JSON NULL COMMENT 'Client name variations, etc.',

    -- Threshold tracking
    threshold_field VARCHAR(50) NULL COMMENT 'confidence, similarity, etc.',
    threshold_old TINYINT NULL,
    threshold_new TINYINT NULL,

    -- Performance metrics
    matches_found INT NOT NULL DEFAULT 0,
    matches_verified INT NOT NULL DEFAULT 0 COMMENT 'User confirmed correct',
    matches_rejected INT NOT NULL DEFAULT 0 COMMENT 'User marked incorrect',
    success_rate DECIMAL(5,2) NOT NULL DEFAULT 0 COMMENT 'verified / (verified + rejected) * 100',

    -- Status
    status VARCHAR(20) NOT NULL DEFAULT 'pending' COMMENT 'pending, active, deprecated',

    -- Discovery context
    discovered_from_failures INT NULL COMMENT 'How many failures led to this pattern',
    example_invoice_id VARCHAR(36) NULL,
    example_deposit_id VARCHAR(32) NULL,
    example_explanation TEXT NULL,

    -- Metadata
    added_date DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,
    removed_date DATETIME NULL,
    last_tested DATETIME NULL,
    notes TEXT NULL,

    -- Indexes
    INDEX idx_iteration_discovered (iteration_discovered),
    INDEX idx_pattern_type (pattern_type),
    INDEX idx_status (status),
    INDEX idx_success_rate (success_rate),

    FOREIGN KEY (iteration_discovered) REFERENCES cfdi_matcher_iterations(iteration_id)
        ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
COMMENT='Discovered matching patterns and their success rates';

-- ============================================================================
-- Table: cfdi_matcher_failures
-- Purpose: Track failed matches for analysis and pattern discovery
-- ============================================================================

CREATE TABLE IF NOT EXISTS cfdi_matcher_failures (
    failure_id BIGINT NOT NULL AUTO_INCREMENT PRIMARY KEY,
    iteration_id INT NOT NULL,

    -- Failed item
    failure_type VARCHAR(20) NOT NULL COMMENT 'unmatched_invoice, unmatched_deposit',

    -- Invoice info (if unmatched invoice)
    invoice_id VARCHAR(36) NULL,
    invoice_uuid VARCHAR(50) NULL,
    invoice_date DATE NULL,
    invoice_amount DECIMAL(18,2) NULL,
    invoice_client_name VARCHAR(200) NULL,
    invoice_client_rfc VARCHAR(13) NULL,

    -- Deposit info (if unmatched deposit)
    deposit_id VARCHAR(32) NULL,
    deposit_date DATETIME NULL,
    deposit_amount DECIMAL(16,2) NULL,
    deposit_reference TEXT NULL,
    deposit_client_name VARCHAR(100) NULL,
    deposit_client_rfc VARCHAR(13) NULL,

    -- Analysis
    closest_match_id VARCHAR(36) NULL COMMENT 'Closest invoice/deposit ID',
    closest_match_score TINYINT NULL,
    closest_match_reason TEXT NULL COMMENT 'Why it almost matched',

    -- Possible reasons for failure
    failure_reasons JSON NULL COMMENT 'Array of diagnostic reasons',
    suggested_patterns JSON NULL COMMENT 'Potential patterns to implement',

    -- Manual resolution
    manually_resolved TINYINT(1) NOT NULL DEFAULT 0,
    resolved_match_id VARCHAR(36) NULL COMMENT 'User-provided correct match',
    resolved_by VARCHAR(32) NULL,
    resolved_at DATETIME NULL,
    resolution_notes TEXT NULL,

    -- Pattern extraction
    pattern_created_from_this TINYINT(1) NOT NULL DEFAULT 0,
    pattern_id INT NULL COMMENT 'If a pattern was created from this failure',

    created_at DATETIME NOT NULL DEFAULT CURRENT_TIMESTAMP,

    -- Indexes
    INDEX idx_iteration (iteration_id),
    INDEX idx_failure_type (failure_type),
    INDEX idx_invoice (invoice_id),
    INDEX idx_deposit (deposit_id),
    INDEX idx_resolved (manually_resolved),
    INDEX idx_pattern_created (pattern_created_from_this),

    FOREIGN KEY (iteration_id) REFERENCES cfdi_matcher_iterations(iteration_id)
        ON DELETE CASCADE,
    FOREIGN KEY (pattern_id) REFERENCES cfdi_matcher_patterns(pattern_id)
        ON DELETE SET NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci
COMMENT='Failed matches for analysis and learning';

-- ============================================================================
-- Initial Data
-- ============================================================================

-- Insert iteration 0 (baseline before any matching)
INSERT INTO cfdi_matcher_iterations (iteration_number, iteration_notes, created_by)
VALUES (0, 'Baseline iteration - no matching performed yet', 'system')
ON DUPLICATE KEY UPDATE iteration_notes = 'Baseline iteration - no matching performed yet';

-- ============================================================================
-- End of schema
-- ============================================================================
