<?php
/**
 * CFDI Matcher Library
 * Core matching functions for invoice-payment reconciliation
 *
 * Purpose: Reusable matching algorithms and utilities
 * Author: Claude Code (Invoice-Payment Matcher)
 * Date: 2026-01-14
 *
 * Based on PMS Matcher architecture with adaptations for financial matching
 */

// ============================================================================
// ESTADO_DE_CUENTA PARSING (SUPERVISED LEARNING - ITERATION 2)
// ============================================================================

/**
 * Parse Estado_de_Cuenta field to extract deposit sequence number and month/bank
 *
 * NEW UNDERSTANDING (2026-01-15):
 * Format: "ING [SEQ] [MES] [YY] [BANCO]"
 * - SEQ = Deposit sequence number for that month (1-indexed, chronological)
 * - MES = Month scope
 * - YY = Year
 * - BANCO = Bank filter
 *
 * Examples:
 *   "ING 13 MAR 24 SANTANDER" → 13th SANTANDER deposit in March 2024
 *   "ING 01 OCT 24 BBVA" → 1st BBVA deposit in October 2024
 *
 * @param string $estado_texto - Estado_de_Cuenta field value
 * @return array|null - ['sequence' => int, 'month' => int, 'year' => int, 'bank_id' => int, 'bank_name' => string] or null
 */
function parse_estado_cuenta($estado_texto) {
    if (empty($estado_texto) || strlen($estado_texto) < 10) {
        return null;
    }

    // Month mapping (Spanish abbreviations to numbers)
    $meses = [
        'ENE' => 1, 'FEB' => 2, 'MAR' => 3, 'ABR' => 4,
        'MAY' => 5, 'JUN' => 6, 'JUL' => 7, 'AGO' => 8,
        'SEP' => 9, 'OCT' => 10, 'NOV' => 11, 'DIC' => 12
    ];

    $month_names = [
        1 => 'Enero', 2 => 'Febrero', 3 => 'Marzo', 4 => 'Abril',
        5 => 'Mayo', 6 => 'Junio', 7 => 'Julio', 8 => 'Agosto',
        9 => 'Septiembre', 10 => 'Octubre', 11 => 'Noviembre', 12 => 'Diciembre'
    ];

    // Pattern: ING [SEQUENCE] [MONTH] [YEAR] [BANK]
    // Example: "ING 13 MAR 24 SANTANDER"
    $pattern = '/ING\s+(\d{1,2})\s+([A-Z]{3})\s+(\d{2})\s+(SANTANDER|BBVA)/i';

    if (preg_match($pattern, $estado_texto, $matches)) {
        $sequence = (int)$matches[1];  // Deposit sequence number (1-indexed)
        $mes_abbr = strtoupper($matches[2]);
        $year = (int)('20' . $matches[3]); // Convert 24 → 2024
        $banco_texto = strtoupper($matches[4]);

        // Validate month
        if (!isset($meses[$mes_abbr])) {
            return null;
        }

        $month = $meses[$mes_abbr];

        // Validate sequence number is reasonable (1-99)
        if ($sequence < 1 || $sequence > 99) {
            return null;
        }

        // Map bank name to banco_cuenta_id
        // SANTANDER → 3, BBVA → 2
        $bank_id = ($banco_texto === 'SANTANDER') ? 3 : 2;

        return [
            'sequence' => $sequence,         // NEW: Deposit sequence in month
            'month' => $month,                // Month number (1-12)
            'month_name' => $month_names[$month],
            'year' => $year,                  // Full year (2024)
            'bank_id' => $bank_id,           // banco_cuenta_id
            'bank_name' => $banco_texto,     // SANTANDER or BBVA
            'batch_id' => sprintf('%d_%d_%d_%d', $bank_id, $year, $month, $sequence) // Unique batch identifier
        ];
    }

    return null;
}

/**
 * Validate parsed Estado data is reasonable
 */
function validate_estado_data($estado_data) {
    if (!$estado_data) {
        return false;
    }

    // Validate year is reasonable (2020-2030)
    if ($estado_data['year'] < 2020 || $estado_data['year'] > 2030) {
        return false;
    }

    // Validate month (1-12)
    if ($estado_data['month'] < 1 || $estado_data['month'] > 12) {
        return false;
    }

    // Validate sequence (1-99)
    if ($estado_data['sequence'] < 1 || $estado_data['sequence'] > 99) {
        return false;
    }

    return true;
}

// ============================================================================
// MONTHLY DEPOSIT INDEXING (Tier -1 Sequence Matching)
// ============================================================================

/**
 * Build monthly deposit index for a specific bank/month/year
 *
 * Creates a chronologically-sorted list of deposits with 1-based sequence numbers
 *
 * @param array $deposits - All deposits from banco_cuenta_mov
 * @param int $banco_id - Bank account ID (2=BBVA, 3=SANTANDER)
 * @param int $year - Year (e.g., 2024)
 * @param int $month - Month (1-12)
 * @return array - Deposits sorted by fecha ASC with sequence numbers
 */
function build_monthly_deposit_index($deposits, $banco_id, $year, $month) {
    // Filter deposits for this bank/month/year
    $filtered = array_filter($deposits, function($deposit) use ($banco_id, $year, $month) {
        // Check bank
        if ($deposit['banco_cuenta_id'] != $banco_id) {
            return false;
        }

        // Check deposit amount > 0
        if ($deposit['deposit'] <= 0) {
            return false;
        }

        // Parse date
        $deposit_timestamp = strtotime($deposit['fecha']);
        $deposit_year = (int)date('Y', $deposit_timestamp);
        $deposit_month = (int)date('n', $deposit_timestamp); // n = month without leading zeros

        // Check year and month match
        return ($deposit_year === $year && $deposit_month === $month);
    });

    // Sort by fecha chronologically (ASC)
    usort($filtered, function($a, $b) {
        return strtotime($a['fecha']) - strtotime($b['fecha']);
    });

    // Add 1-based sequence numbers
    $indexed = [];
    $sequence = 1;
    foreach ($filtered as $deposit) {
        $deposit['_sequence'] = $sequence; // Add sequence metadata
        $indexed[] = $deposit;
        $sequence++;
    }

    return $indexed;
}

/**
 * Get deposit at specific sequence position from monthly index
 *
 * @param array $monthly_index - Result from build_monthly_deposit_index()
 * @param int $sequence - 1-based sequence number
 * @return array|null - Deposit data or null if sequence out of range
 */
function get_deposit_by_sequence($monthly_index, $sequence) {
    // Sequence is 1-based, array is 0-based
    $index = $sequence - 1;

    if (isset($monthly_index[$index])) {
        return $monthly_index[$index];
    }

    return null;
}

/**
 * Get monthly deposit statistics for debugging
 *
 * @param array $monthly_index - Result from build_monthly_deposit_index()
 * @return array - Statistics: count, total_amount, date_range
 */
function get_monthly_index_stats($monthly_index) {
    if (empty($monthly_index)) {
        return [
            'count' => 0,
            'total_amount' => 0,
            'first_date' => null,
            'last_date' => null
        ];
    }

    $total = 0;
    foreach ($monthly_index as $deposit) {
        $total += $deposit['deposit'];
    }

    return [
        'count' => count($monthly_index),
        'total_amount' => $total,
        'first_date' => $monthly_index[0]['fecha'],
        'last_date' => $monthly_index[count($monthly_index) - 1]['fecha'],
        'sequences' => range(1, count($monthly_index))
    ];
}

// ============================================================================
// NORMALIZATION & TEXT UTILITIES
// ============================================================================

/**
 * Normalize text for comparison (remove accents, lowercase, trim)
 */
function normalize_text_cfdi($text) {
    if (empty($text)) return '';

    // Remove accents
    $accents = [
        'Á'=>'A', 'É'=>'E', 'Í'=>'I', 'Ó'=>'O', 'Ú'=>'U',
        'á'=>'a', 'é'=>'e', 'í'=>'i', 'ó'=>'o', 'ú'=>'u',
        'Ñ'=>'N', 'ñ'=>'n', 'Ü'=>'U', 'ü'=>'u'
    ];
    $text = strtr($text, $accents);

    // Lowercase
    $text = mb_strtolower($text, 'UTF-8');

    // Normalize whitespace
    $text = trim(preg_replace('/\s+/', ' ', $text));

    return $text;
}

/**
 * Extract potential client names from bank reference text
 * Returns array of potential client name strings
 */
function extract_client_from_reference($reference_text) {
    $reference_text = trim($reference_text);
    if (empty($reference_text)) return [];

    $potential_names = [];

    // Pattern 1: After "REF" keyword
    if (preg_match('/REF\s+([A-Z][A-Za-z\s]+)/i', $reference_text, $matches)) {
        $potential_names[] = trim($matches[1]);
    }

    // Pattern 2: After SPEI RECIBIDO
    if (preg_match('/SPEI RECIBIDO\s+(.+)$/i', $reference_text, $matches)) {
        $potential_names[] = trim($matches[1]);
    }

    // Pattern 3: Capitalized words (likely company names)
    if (preg_match_all('/\b[A-Z][A-Z]+\b/', $reference_text, $matches)) {
        foreach ($matches[0] as $word) {
            if (strlen($word) > 3) { // Avoid short acronyms
                $potential_names[] = $word;
            }
        }
    }

    return array_unique($potential_names);
}

/**
 * Extract potential invoice/folio numbers from reference text
 */
function extract_invoice_number_from_reference($reference_text) {
    $numbers = [];

    // Pattern 1: Folio/invoice keywords followed by number
    if (preg_match('/(?:FOLIO|FAC|FACTURA|INV)\s*#?\s*(\d+)/i', $reference_text, $matches)) {
        $numbers[] = $matches[1];
    }

    // Pattern 2: Stand-alone numbers (6+ digits)
    if (preg_match_all('/\b(\d{6,})\b/', $reference_text, $matches)) {
        $numbers = array_merge($numbers, $matches[1]);
    }

    return array_unique($numbers);
}

/**
 * Calculate string similarity percentage (0-100)
 */
function text_similarity($text1, $text2) {
    if (empty($text1) || empty($text2)) return 0;

    $norm1 = normalize_text_cfdi($text1);
    $norm2 = normalize_text_cfdi($text2);

    if ($norm1 === $norm2) return 100;

    similar_text($norm1, $norm2, $percent);
    return round($percent);
}

// ============================================================================
// DATE & AMOUNT UTILITIES
// ============================================================================

/**
 * Calculate days between two dates
 * Returns positive number if $date2 is after $date1
 */
function days_between($date1, $date2) {
    $d1 = new DateTime($date1);
    $d2 = new DateTime($date2);
    $diff = $d1->diff($d2);
    return $diff->days * ($diff->invert ? -1 : 1);
}

/**
 * Check if two dates are within N days of each other
 */
function dates_within_window($date1, $date2, $days_window) {
    $days_diff = abs(days_between($date1, $date2));
    return $days_diff <= $days_window;
}

/**
 * Calculate amount difference percentage
 * Returns positive if amount2 > amount1, negative otherwise
 */
function amount_difference_percent($amount1, $amount2) {
    if ($amount1 == 0) return ($amount2 == 0) ? 0 : 999;
    return round((($amount2 - $amount1) / $amount1) * 100, 2);
}

/**
 * Check if two amounts are within tolerance percentage
 */
function amounts_within_tolerance($amount1, $amount2, $tolerance_percent) {
    $diff_pct = abs(amount_difference_percent($amount1, $amount2));
    return $diff_pct <= $tolerance_percent;
}

// ============================================================================
// TIER 0: EXACT MATCHES (Confidence: 95-100%)
// ============================================================================

/**
 * Tier 0.1: Perfect amount + close date match
 */
function match_cfdi_tier0_exact_amount_date($invoice, $deposit) {
    // Amount must be exact or within 0.01%
    if (!amounts_within_tolerance($invoice['Total'], $deposit['deposit'], 0.01)) {
        return ['match' => false];
    }

    // Date within ±3 days
    $days_diff = days_between($invoice['Fecha_Emision'], $deposit['fecha']);

    if (abs($days_diff) <= 3) {
        $confidence = 100 - abs($days_diff); // 100 for same day, 99 for 1 day, etc.

        return [
            'match' => true,
            'tier' => 0,
            'confidence' => $confidence,
            'pattern' => 'exact_amount_close_date',
            'amount_score' => 100,
            'date_score' => 100 - (abs($days_diff) * 10),
            'client_score' => 0,
            'text_score' => 0,
            'days_diff' => $days_diff,
            'amount_diff' => $deposit['deposit'] - $invoice['Total'],
            'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
        ];
    }

    return ['match' => false];
}

/**
 * Tier 0.2: UUID found in deposit reference
 */
function match_cfdi_tier0_uuid_in_reference($invoice, $deposit) {
    $uuid = $invoice['UUID'];
    $reference = $deposit['numero'] ?? '';

    if (empty($uuid) || empty($reference)) {
        return ['match' => false];
    }

    // Check if UUID appears in reference (full or partial)
    $uuid_normalized = strtoupper(str_replace('-', '', $uuid));
    $reference_normalized = strtoupper(str_replace('-', '', $reference));

    if (strpos($reference_normalized, $uuid_normalized) !== false) {
        return [
            'match' => true,
            'tier' => 0,
            'confidence' => 100,
            'pattern' => 'uuid_in_reference',
            'amount_score' => 0,
            'date_score' => 0,
            'client_score' => 0,
            'text_score' => 100,
            'days_diff' => days_between($invoice['Fecha_Emision'], $deposit['fecha']),
            'amount_diff' => $deposit['deposit'] - $invoice['Total'],
            'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
        ];
    }

    // Check for partial UUID match (first 8 characters)
    $uuid_partial = substr($uuid_normalized, 0, 8);
    if (strlen($uuid_partial) >= 8 && strpos($reference_normalized, $uuid_partial) !== false) {
        return [
            'match' => true,
            'tier' => 0,
            'confidence' => 95,
            'pattern' => 'uuid_partial_in_reference',
            'amount_score' => 0,
            'date_score' => 0,
            'client_score' => 0,
            'text_score' => 95,
            'days_diff' => days_between($invoice['Fecha_Emision'], $deposit['fecha']),
            'amount_diff' => $deposit['deposit'] - $invoice['Total'],
            'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
        ];
    }

    return ['match' => false];
}

/**
 * Tier 0.3: RFC match + exact amount
 */
function match_cfdi_tier0_rfc_amount($invoice, $deposit) {
    $invoice_rfc = $invoice['RFC_Receptor'] ?? '';
    $deposit_rfc = $deposit['cliente_rfc'] ?? '';

    if (empty($invoice_rfc) || empty($deposit_rfc)) {
        return ['match' => false];
    }

    // RFC must match exactly
    if (strtoupper($invoice_rfc) !== strtoupper($deposit_rfc)) {
        return ['match' => false];
    }

    // Amount within 1%
    if (!amounts_within_tolerance($invoice['Total'], $deposit['deposit'], 1)) {
        return ['match' => false];
    }

    return [
        'match' => true,
        'tier' => 0,
        'confidence' => 98,
        'pattern' => 'rfc_exact_amount',
        'amount_score' => 95,
        'date_score' => 0,
        'client_score' => 100,
        'text_score' => 0,
        'days_diff' => days_between($invoice['Fecha_Emision'], $deposit['fecha']),
        'amount_diff' => $deposit['deposit'] - $invoice['Total'],
        'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
    ];
}

// ============================================================================
// TIER 0.5: ESTADO-GUIDED MATCHES (Confidence: 90-100%) - SUPERVISED LEARNING
// ============================================================================

/**
 * Tier -1: Estado Sequence Matching (HIGHEST CONFIDENCE)
 *
 * NEW (2026-01-15): Uses sequence number from Estado_de_Cuenta to directly identify deposit.
 * Format: "ING 13 MAR 24 SANTANDER" → 13th deposit in March 2024 for SANTANDER
 *
 * This is THE MOST ACCURATE tier because Estado explicitly tells us:
 * - Exact deposit sequence number in month
 * - Month/Year scope
 * - Bank filter
 *
 * @param array $invoice - Invoice with Estado_de_Cuenta field
 * @param array $deposits - All deposits (will be filtered and indexed)
 * @return array - Match result or ['match' => false]
 */
function match_cfdi_tier_minus1_estado_sequence($invoice, $deposits) {
    $estado = $invoice['Estado_de_Cuenta'] ?? '';

    // Parse Estado_de_Cuenta
    $estado_data = parse_estado_cuenta($estado);

    if (!$estado_data || !validate_estado_data($estado_data)) {
        return ['match' => false]; // No Estado or invalid
    }

    // Build monthly deposit index for this bank/month/year
    $monthly_index = build_monthly_deposit_index(
        $deposits,
        $estado_data['bank_id'],
        $estado_data['year'],
        $estado_data['month']
    );

    // Check if we have enough deposits for this sequence
    if (count($monthly_index) < $estado_data['sequence']) {
        return [
            'match' => false,
            'reason' => sprintf(
                'Sequence %d out of range (only %d deposits in %s %d for %s)',
                $estado_data['sequence'],
                count($monthly_index),
                $estado_data['month_name'],
                $estado_data['year'],
                $estado_data['bank_name']
            )
        ];
    }

    // Get deposit at sequence position
    $matched_deposit = get_deposit_by_sequence($monthly_index, $estado_data['sequence']);

    if (!$matched_deposit) {
        return ['match' => false];
    }

    // Calculate match quality metrics
    $amount_diff = abs($invoice['Total'] - $matched_deposit['deposit']);
    $amount_diff_pct = ($invoice['Total'] > 0) ?
        round($amount_diff / $invoice['Total'] * 100, 2) : 0;

    $days_diff = round(
        (strtotime($matched_deposit['fecha']) - strtotime($invoice['Fecha_Emision'])) / 86400
    );

    // Confidence: 99% (Estado explicitly tells us this is the right deposit!)
    $confidence = 99;

    // Lower confidence slightly if amounts don't match well
    if ($amount_diff_pct > 10) {
        $confidence = 95; // Still high confidence, but amounts are off
    }

    return [
        'match' => true,
        'tier' => -1,
        'confidence' => $confidence,
        'deposit' => $matched_deposit,
        'pattern' => sprintf('estado_sequence_%d_%s_%d_%s',
            $estado_data['sequence'],
            $estado_data['month_name'],
            $estado_data['year'],
            $estado_data['bank_name']
        ),
        'explanation' => sprintf(
            'Invoice explicitly references deposit #%d in %s %d (%s bank)',
            $estado_data['sequence'],
            $estado_data['month_name'],
            $estado_data['year'],
            $estado_data['bank_name']
        ),
        'amount_diff' => $amount_diff,
        'amount_diff_pct' => $amount_diff_pct,
        'days_diff' => $days_diff,
        'estado_data' => $estado_data, // Include for batch detection
        'monthly_index_stats' => get_monthly_index_stats($monthly_index)
    ];
}

/**
 * Tier 0.5: Match using Estado_de_Cuenta as ground truth
 *
 * LEGACY (Pre 2026-01-15): This was our old Estado matching before we understood sequences.
 * Kept as fallback if Tier -1 sequence matching fails.
 *
 * This tier uses the manually-filled Estado_de_Cuenta field to guide matching.
 * It provides supervised learning - we know the exact bank and approximate date.
 *
 * @param array $invoice - Invoice with Estado_de_Cuenta field
 * @param array $deposits - All deposits (will be filtered by bank)
 * @return array - Match result or ['match' => false]
 */
function match_cfdi_tier0_5_estado_guided($invoice, $deposits) {
    $estado = $invoice['Estado_de_Cuenta'] ?? '';

    // Parse Estado_de_Cuenta
    $parsed = parse_estado_cuenta($estado);

    if (!$parsed) {
        return ['match' => false]; // No Estado or parse failed
    }

    // Validate date is reasonable
    if (!validate_estado_date($parsed['date'])) {
        return ['match' => false];
    }

    $target_bank_id = $parsed['bank_id'];
    $target_date = $parsed['date'];

    // Filter deposits by bank
    $bank_deposits = array_filter($deposits, function($dep) use ($target_bank_id) {
        return ($dep['banco_cuenta_id'] ?? null) == $target_bank_id;
    });

    if (empty($bank_deposits)) {
        return ['match' => false]; // No deposits for this bank
    }

    // Find best match: exact amount + closest date within ±7 days of Estado date
    $best_match = null;
    $best_score = 0;

    foreach ($bank_deposits as $deposit) {
        // Amount must be very close (±1%)
        if (!amounts_within_tolerance($invoice['Total'], $deposit['deposit'], 1)) {
            continue;
        }

        // Calculate days from Estado target date
        $days_from_estado = days_between($target_date, $deposit['fecha']);

        // Must be within ±7 days of Estado date
        if (abs($days_from_estado) > 7) {
            continue;
        }

        // Score: closer to Estado date = higher score
        $date_score = 100 - (abs($days_from_estado) * 5); // -5 per day

        // Amount score
        $amount_diff_pct = abs(amount_difference_percent($invoice['Total'], $deposit['deposit']));
        $amount_score = 100 - ($amount_diff_pct * 10); // Penalize amount difference

        // Combined score (70% date, 30% amount - date is more reliable from Estado)
        $combined_score = ($date_score * 0.7) + ($amount_score * 0.3);

        if ($combined_score > $best_score) {
            $best_score = $combined_score;
            $best_match = [
                'match' => true,
                'tier' => 0.5, // Between Tier 0 and Tier 1
                'confidence' => round(min(100, 90 + ($best_score / 10))), // 90-100 range
                'pattern' => 'estado_guided_' . $parsed['bank_name'],
                'amount_score' => round($amount_score),
                'date_score' => round($date_score),
                'client_score' => 0,
                'text_score' => 100, // High because Estado is manual ground truth
                'days_diff' => days_between($invoice['Fecha_Emision'], $deposit['fecha']),
                'days_from_estado' => $days_from_estado,
                'estado_target_date' => $target_date,
                'amount_diff' => $deposit['deposit'] - $invoice['Total'],
                'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit']),
                'deposit' => $deposit, // Store the matched deposit
            ];
        }
    }

    if ($best_match) {
        return $best_match;
    }

    return ['match' => false];
}

// ============================================================================
// TIER 1: STRONG MATCHES (Confidence: 80-94%)
// ============================================================================

/**
 * Tier 1.1: Amount match (±1%) + date within ±7 days
 */
function match_cfdi_tier1_amount_week($invoice, $deposit) {
    // Amount within 1%
    if (!amounts_within_tolerance($invoice['Total'], $deposit['deposit'], 1)) {
        return ['match' => false];
    }

    // Date within ±7 days
    $days_diff = days_between($invoice['Fecha_Emision'], $deposit['fecha']);

    if (abs($days_diff) <= 7) {
        // Confidence decreases with days away
        $date_penalty = abs($days_diff) * 2; // -2 per day
        $confidence = 94 - $date_penalty;

        return [
            'match' => true,
            'tier' => 1,
            'confidence' => max(80, $confidence),
            'pattern' => 'amount_1pct_date_week',
            'amount_score' => 95,
            'date_score' => 100 - $date_penalty,
            'client_score' => 0,
            'text_score' => 0,
            'days_diff' => $days_diff,
            'amount_diff' => $deposit['deposit'] - $invoice['Total'],
            'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
        ];
    }

    return ['match' => false];
}

/**
 * Tier 1.2: Client name fuzzy match + amount within ±5%
 */
function match_cfdi_tier1_client_fuzzy($invoice, $deposit) {
    $invoice_client = $invoice['Nombre_Receptor'] ?? '';
    $deposit_client = $deposit['cliente'] ?? '';

    // Need both client names
    if (empty($invoice_client) || empty($deposit_client)) {
        return ['match' => false];
    }

    // Calculate text similarity
    $similarity = text_similarity($invoice_client, $deposit_client);

    if ($similarity < 70) {
        return ['match' => false];
    }

    // Amount within 5%
    if (!amounts_within_tolerance($invoice['Total'], $deposit['deposit'], 5)) {
        return ['match' => false];
    }

    // Confidence based on similarity
    $confidence = round(80 + ($similarity - 70) / 3); // 80-90 range

    return [
        'match' => true,
        'tier' => 1,
        'confidence' => min(94, $confidence),
        'pattern' => 'client_fuzzy_amount_5pct',
        'amount_score' => 85,
        'date_score' => 0,
        'client_score' => $similarity,
        'text_score' => $similarity,
        'days_diff' => days_between($invoice['Fecha_Emision'], $deposit['fecha']),
        'amount_diff' => $deposit['deposit'] - $invoice['Total'],
        'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
    ];
}

/**
 * Tier 1.3: RFC exact match + date within ±14 days
 */
function match_cfdi_tier1_rfc_twoweeks($invoice, $deposit) {
    $invoice_rfc = $invoice['RFC_Receptor'] ?? '';
    $deposit_rfc = $deposit['cliente_rfc'] ?? '';

    if (empty($invoice_rfc) || empty($deposit_rfc)) {
        return ['match' => false];
    }

    if (strtoupper($invoice_rfc) !== strtoupper($deposit_rfc)) {
        return ['match' => false];
    }

    $days_diff = days_between($invoice['Fecha_Emision'], $deposit['fecha']);

    if (abs($days_diff) <= 14) {
        // Confidence decreases with days away
        $date_penalty = abs($days_diff) * 1; // -1 per day
        $confidence = 94 - $date_penalty;

        return [
            'match' => true,
            'tier' => 1,
            'confidence' => max(80, $confidence),
            'pattern' => 'rfc_exact_date_twoweeks',
            'amount_score' => 0,
            'date_score' => 100 - $date_penalty * 2,
            'client_score' => 100,
            'text_score' => 0,
            'days_diff' => $days_diff,
            'amount_diff' => $deposit['deposit'] - $invoice['Total'],
            'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
        ];
    }

    return ['match' => false];
}

// ============================================================================
// TIER 2: PROBABLE MATCHES (Confidence: 65-79%)
// ============================================================================

/**
 * Tier 2.1: Amount match (±5%) + date within ±30 days
 */
function match_cfdi_tier2_amount_month($invoice, $deposit) {
    if (!amounts_within_tolerance($invoice['Total'], $deposit['deposit'], 5)) {
        return ['match' => false];
    }

    $days_diff = days_between($invoice['Fecha_Emision'], $deposit['fecha']);

    if (abs($days_diff) <= 30) {
        $date_penalty = abs($days_diff) * 0.5; // -0.5 per day
        $confidence = 79 - $date_penalty;

        return [
            'match' => true,
            'tier' => 2,
            'confidence' => max(65, round($confidence)),
            'pattern' => 'amount_5pct_date_month',
            'amount_score' => 80,
            'date_score' => max(50, 100 - $date_penalty * 2),
            'client_score' => 0,
            'text_score' => 0,
            'days_diff' => $days_diff,
            'amount_diff' => $deposit['deposit'] - $invoice['Total'],
            'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
        ];
    }

    return ['match' => false];
}

/**
 * Tier 2.2: Client name similarity (≥85%) + amount within ±10%
 */
function match_cfdi_tier2_client_amount($invoice, $deposit) {
    $invoice_client = $invoice['Nombre_Receptor'] ?? '';
    $deposit_client = $deposit['cliente'] ?? '';

    if (empty($invoice_client) || empty($deposit_client)) {
        return ['match' => false];
    }

    $similarity = text_similarity($invoice_client, $deposit_client);

    if ($similarity < 60) {
        return ['match' => false];
    }

    if (!amounts_within_tolerance($invoice['Total'], $deposit['deposit'], 10)) {
        return ['match' => false];
    }

    $confidence = round(65 + ($similarity - 60) / 3); // 65-75 range

    return [
        'match' => true,
        'tier' => 2,
        'confidence' => min(79, $confidence),
        'pattern' => 'client_similarity_amount_10pct',
        'amount_score' => 70,
        'date_score' => 0,
        'client_score' => $similarity,
        'text_score' => $similarity,
        'days_diff' => days_between($invoice['Fecha_Emision'], $deposit['fecha']),
        'amount_diff' => $deposit['deposit'] - $invoice['Total'],
        'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
    ];
}

// ============================================================================
// TIER 3: POSSIBLE MATCHES (Confidence: 50-64%)
// ============================================================================

/**
 * Tier 3.1: Amount match (±10%) + date within ±60 days
 */
function match_cfdi_tier3_amount_twomonths($invoice, $deposit) {
    if (!amounts_within_tolerance($invoice['Total'], $deposit['deposit'], 10)) {
        return ['match' => false];
    }

    $days_diff = days_between($invoice['Fecha_Emision'], $deposit['fecha']);

    if (abs($days_diff) <= 60) {
        $date_penalty = abs($days_diff) * 0.2;
        $confidence = 64 - $date_penalty;

        return [
            'match' => true,
            'tier' => 3,
            'confidence' => max(50, round($confidence)),
            'pattern' => 'amount_10pct_date_twomonths',
            'amount_score' => 65,
            'date_score' => max(30, 100 - $date_penalty * 3),
            'client_score' => 0,
            'text_score' => 0,
            'days_diff' => $days_diff,
            'amount_diff' => $deposit['deposit'] - $invoice['Total'],
            'amount_diff_pct' => amount_difference_percent($invoice['Total'], $deposit['deposit'])
        ];
    }

    return ['match' => false];
}

// ============================================================================
// MASTER MATCHING FUNCTION
// ============================================================================

/**
 * Try all matching tiers for an invoice against ALL deposits
 * NEW in Iteration 2: Tier 0.5 uses Estado_de_Cuenta for supervised learning
 *
 * @param array $invoice - Invoice to match
 * @param array $deposits - Array of ALL deposits (Tier 0.5 will filter by bank)
 * @return array - Match result with best deposit, or ['match' => false]
 */
function match_invoice_to_all_deposits($invoice, $deposits) {
    // NEW: Try Tier -1 FIRST (Estado sequence - HIGHEST confidence)
    // This tier uses the explicit deposit sequence number from Estado_de_Cuenta
    // Example: "ING 13 MAR 24 SANTANDER" = 13th deposit in March 2024 for SANTANDER bank
    $result = match_cfdi_tier_minus1_estado_sequence($invoice, $deposits);
    if ($result['match']) {
        // Extract the matched deposit from result
        $matched_deposit = $result['deposit'];
        unset($result['deposit']); // Remove from result to keep consistent structure
        $result['matched_deposit'] = $matched_deposit;
        return $result;
    }

    // Try Tier 0.5 (Estado date/bank - LEGACY fallback for invoices without sequence)
    // This tier looks at ALL deposits and filters by bank/date
    $result = match_cfdi_tier0_5_estado_guided($invoice, $deposits);
    if ($result['match']) {
        // Extract the matched deposit from result
        $matched_deposit = $result['deposit'];
        unset($result['deposit']); // Remove from result to keep consistent structure
        $result['matched_deposit'] = $matched_deposit;
        return $result;
    }

    // Fall back to original matching logic (invoice vs individual deposit)
    return match_invoice_to_deposit($invoice, null);
}

/**
 * Try all matching tiers for an invoice-deposit pair
 * Returns best match result or ['match' => false]
 *
 * @param array $invoice - Invoice to match
 * @param array $deposit - Single deposit to match against (or null to return no match)
 * @return array - Match result or ['match' => false]
 */
function match_invoice_to_deposit($invoice, $deposit) {
    if ($deposit === null) {
        return ['match' => false];
    }

    // Try Tier 0 (exact matches)
    $result = match_cfdi_tier0_exact_amount_date($invoice, $deposit);
    if ($result['match']) return $result;

    $result = match_cfdi_tier0_uuid_in_reference($invoice, $deposit);
    if ($result['match']) return $result;

    $result = match_cfdi_tier0_rfc_amount($invoice, $deposit);
    if ($result['match']) return $result;

    // Try Tier 1 (strong matches)
    $result = match_cfdi_tier1_amount_week($invoice, $deposit);
    if ($result['match']) return $result;

    $result = match_cfdi_tier1_client_fuzzy($invoice, $deposit);
    if ($result['match']) return $result;

    $result = match_cfdi_tier1_rfc_twoweeks($invoice, $deposit);
    if ($result['match']) return $result;

    // Try Tier 2 (probable matches)
    $result = match_cfdi_tier2_amount_month($invoice, $deposit);
    if ($result['match']) return $result;

    $result = match_cfdi_tier2_client_amount($invoice, $deposit);
    if ($result['match']) return $result;

    // Try Tier 3 (possible matches)
    $result = match_cfdi_tier3_amount_twomonths($invoice, $deposit);
    if ($result['match']) return $result;

    // No match found
    return ['match' => false];
}

// ============================================================================
// HELPER FUNCTIONS FOR ITERATION MANAGEMENT
// ============================================================================

/**
 * Get next iteration number
 */
function get_next_cfdi_iteration_number() {
    $sql = "SELECT COALESCE(MAX(iteration_number), 0) + 1 as next_num
            FROM cfdi_matcher_iterations";
    $result = ia_singleton($sql);
    return (int)$result;
}

/**
 * Create new iteration record
 * Returns iteration_id
 */
function create_cfdi_iteration($iteration_number, $source_type = 'both', $date_start = null, $date_end = null) {
    $values = [
        'iteration_number' => $iteration_number,
        'source_type' => $source_type,
        'date_range_start' => $date_start,
        'date_range_end' => $date_end,
        'created_by' => $_SESSION['usuario_id'] ?? 'system'
    ];

    ia_insert('cfdi_matcher_iterations', $values);

    // Get the inserted ID
    $sql = "SELECT iteration_id FROM cfdi_matcher_iterations
            WHERE iteration_number = {$iteration_number}
            ORDER BY iteration_id DESC LIMIT 1";

    return ia_singleton($sql);
}

/**
 * Update iteration with final statistics
 */
function update_cfdi_iteration_stats($iteration_id, $stats) {
    $stats['iteration_id'] = $iteration_id; // Ensure ID is in the array
    ia_update('cfdi_matcher_iterations', $stats);
}

/**
 * Log a match result
 */
function log_cfdi_match_result($iteration_id, $invoice, $deposit, $match_result) {
    $data = [
        'iteration_id' => $iteration_id,
        'invoice_id' => $invoice['eleyeme_cfdi_emitido_id'],
        'invoice_uuid' => $invoice['UUID'],
        'invoice_date' => $invoice['Fecha_Emision'],
        'invoice_amount' => $invoice['Total'],
        'invoice_client_name' => $invoice['Nombre_Receptor'],
        'invoice_client_rfc' => $invoice['RFC_Receptor'],
        'matched' => $match_result['match'] ? 1 : 0,
    ];

    if ($match_result['match']) {
        $data['deposit_id'] = $deposit['banco_cuenta_mov_id'];
        $data['deposit_date'] = $deposit['fecha'];
        $data['deposit_amount'] = $deposit['deposit'];
        $data['deposit_reference'] = $deposit['numero'];
        $data['match_tier'] = $match_result['tier'];
        $data['match_confidence'] = $match_result['confidence'];
        $data['match_pattern'] = $match_result['pattern'];
        $data['days_between_invoice_deposit'] = $match_result['days_diff'];
        $data['amount_difference'] = $match_result['amount_diff'];
        $data['amount_difference_percent'] = $match_result['amount_diff_pct'];

        // Store detailed scores as JSON
        $scores = [
            'amount_score' => $match_result['amount_score'],
            'date_score' => $match_result['date_score'],
            'client_score' => $match_result['client_score'],
            'text_score' => $match_result['text_score'],
        ];
        $data['match_scores'] = json_encode($scores);

        // Generate explanation
        $explanation = generate_match_explanation($match_result, $invoice, $deposit);
        $data['match_explanation'] = $explanation;

        // Flag for review if confidence is low
        $data['needs_review'] = ($match_result['confidence'] < 80) ? 1 : 0;
    }

    ia_insert('cfdi_matcher_results', $data);
}

/**
 * Log a failure (unmatched invoice or deposit)
 */
function log_cfdi_failure($iteration_id, $type, $item, $closest_matches = []) {
    $data = [
        'iteration_id' => $iteration_id,
        'failure_type' => $type,
    ];

    if ($type === 'unmatched_invoice') {
        $data['invoice_id'] = $item['eleyeme_cfdi_emitido_id'];
        $data['invoice_uuid'] = $item['UUID'];
        $data['invoice_date'] = $item['Fecha_Emision'];
        $data['invoice_amount'] = $item['Total'];
        $data['invoice_client_name'] = $item['Nombre_Receptor'];
        $data['invoice_client_rfc'] = $item['RFC_Receptor'];
    } else {
        $data['deposit_id'] = $item['banco_cuenta_mov_id'];
        $data['deposit_date'] = $item['fecha'];
        $data['deposit_amount'] = $item['deposit'];
        $data['deposit_reference'] = $item['numero'];
        $data['deposit_client_name'] = $item['cliente'];
        $data['deposit_client_rfc'] = $item['cliente_rfc'];
    }

    if (!empty($closest_matches)) {
        $data['closest_match_id'] = $closest_matches[0]['id'] ?? null;
        $data['closest_match_score'] = $closest_matches[0]['score'] ?? null;
    }

    ia_insert('cfdi_matcher_failures', $data);
}

/**
 * Generate human-readable match explanation
 */
function generate_match_explanation($match_result, $invoice, $deposit) {
    $lines = [];

    $tier_names = [
        0 => 'Exact Match',
        1 => 'Strong Match',
        2 => 'Probable Match',
        3 => 'Possible Match',
        4 => 'Weak Match'
    ];

    $tier_name = $tier_names[$match_result['tier']] ?? 'Unknown';
    $lines[] = "Tier {$match_result['tier']}: {$tier_name} ({$match_result['confidence']}%)";
    $lines[] = "Pattern: {$match_result['pattern']}";

    $lines[] = "\nScore Breakdown:";
    $lines[] = "  Amount: {$match_result['amount_score']}%";
    $lines[] = "  Date: {$match_result['date_score']}%";
    $lines[] = "  Client: {$match_result['client_score']}%";
    $lines[] = "  Text: {$match_result['text_score']}%";

    $lines[] = "\nDetails:";
    $lines[] = "  Invoice: " . number_format($invoice['Total'], 2);
    $lines[] = "  Deposit: " . number_format($deposit['deposit'], 2);
    $lines[] = "  Difference: " . number_format($match_result['amount_diff'], 2) .
               " ({$match_result['amount_diff_pct']}%)";
    $lines[] = "  Days between: {$match_result['days_diff']} days";

    if ($match_result['confidence'] < 70) {
        $lines[] = "\n⚠️  LOW CONFIDENCE - Recommend manual review";
    }

    return implode("\n", $lines);
}

// ============================================================================
// LINK MANAGEMENT FUNCTIONS (Production Data Integration)
// ============================================================================

/**
 * Get active link for an invoice
 *
 * Checks if an invoice already has an active link to a bank deposit
 *
 * @param string $invoice_id - eleyeme_cfdi_emitido_id
 * @return array|null - Link data or null if no active link exists
 */
function get_active_invoice_link($invoice_id) {
    $sql = "SELECT
                cml.cfdi_matcher_link_id,
                cml.banco_cuenta_mov_link_id,
                cml.deposit_id,
                cml.match_tier,
                cml.match_confidence,
                cml.match_pattern,
                cml.created_by_iteration,
                cml.last_confirmed_iteration
            FROM cfdi_matcher_links cml
            WHERE cml.invoice_id = '" . mysqli_real_escape_string($GLOBALS['gIAsql_link'], $invoice_id) . "'
              AND cml.link_status = 'active'
            LIMIT 1";

    $result = ia_sqlArrayIndx($sql);
    return !empty($result) ? $result[0] : null;
}

/**
 * Create new invoice-deposit link
 *
 * Creates entries in both banco_cuenta_mov_link and cfdi_matcher_links
 *
 * @param int $iteration_id - Current iteration ID
 * @param int $iteration_number - Current iteration number
 * @param array $invoice - Invoice data from eleyeme_cfdi_emitidos
 * @param array $deposit - Deposit data from banco_cuenta_mov
 * @param array $match_result - Match algorithm results
 * @return string|false - cfdi_matcher_link_id on success, false on failure
 */
function create_cfdi_link($iteration_id, $iteration_number, $invoice, $deposit, $match_result) {
    global $gIAsql_link;

    // Generate UUIDs
    $link_id = generateUUID();
    $matcher_link_id = generateUUID();

    // 1. Create entry in banco_cuenta_mov_link (production link table)
    $link_data = [
        'banco_cuenta_mov_link_id' => $link_id,
        'banco_cuenta_mov_id' => $deposit['banco_cuenta_mov_id'],
        'link_to' => 'eleyeme_cfdi_emitidos',
        'link' => $invoice['eleyeme_cfdi_emitido_id'],
        'tienda_id' => $deposit['tienda_id'] ?? null,
        'link_por' => 'cfdi_matcher_iteration_' . $iteration_number,
        'link_el' => date('Y-m-d H:i:s')
    ];

    $result1 = ia_insert('banco_cuenta_mov_link', $link_data);

    if (!$result1) {
        error_log("Failed to create banco_cuenta_mov_link for invoice {$invoice['eleyeme_cfdi_emitido_id']}");
        return false;
    }

    // 2. Create entry in cfdi_matcher_links (matcher metadata table)
    $matcher_data = [
        'cfdi_matcher_link_id' => $matcher_link_id,
        'banco_cuenta_mov_link_id' => $link_id,
        'iteration_id' => $iteration_id,
        'invoice_id' => $invoice['eleyeme_cfdi_emitido_id'],
        'deposit_id' => $deposit['banco_cuenta_mov_id'],
        'match_tier' => $match_result['tier'],
        'match_confidence' => $match_result['confidence'],
        'match_pattern' => $match_result['pattern'] ?? '',
        'match_explanation' => $match_result['explanation'] ?? '',
        'link_status' => 'active',
        'created_by_iteration' => $iteration_id,
        'last_confirmed_iteration' => $iteration_id,
        'user_verified' => 0
    ];

    $result2 = ia_insert('cfdi_matcher_links', $matcher_data);

    if (!$result2) {
        error_log("Failed to create cfdi_matcher_links for invoice {$invoice['eleyeme_cfdi_emitido_id']}");
        // Rollback banco_cuenta_mov_link
        ia_query("DELETE FROM banco_cuenta_mov_link WHERE banco_cuenta_mov_link_id = '$link_id'");
        return false;
    }

    return $matcher_link_id;
}

/**
 * Update existing link (same deposit, just confirming)
 *
 * Updates last_confirmed_iteration when iteration confirms existing match
 *
 * @param string $matcher_link_id - cfdi_matcher_link_id to update
 * @param int $iteration_id - Current iteration ID
 * @return bool - Success status
 */
function update_cfdi_link($matcher_link_id, $iteration_id) {
    $update_data = [
        'cfdi_matcher_link_id' => $matcher_link_id,
        'last_confirmed_iteration' => $iteration_id,
        'updated_at' => date('Y-m-d H:i:s')
    ];

    return ia_update('cfdi_matcher_links', $update_data);
}

/**
 * Supersede existing link with new one
 *
 * Marks old link as superseded and creates new active link
 *
 * @param string $old_matcher_link_id - Link to supersede
 * @param int $iteration_id - Current iteration ID
 * @param int $iteration_number - Current iteration number
 * @param array $invoice - Invoice data
 * @param array $new_deposit - New deposit to link to
 * @param array $match_result - New match results
 * @return string|false - New cfdi_matcher_link_id or false
 */
function supersede_cfdi_link($old_matcher_link_id, $iteration_id, $iteration_number, $invoice, $new_deposit, $match_result) {
    global $gIAsql_link;

    // 1. Mark old link as superseded
    $supersede_data = [
        'cfdi_matcher_link_id' => $old_matcher_link_id,
        'link_status' => 'superseded',
        'superseded_by_iteration' => $iteration_id,
        'updated_at' => date('Y-m-d H:i:s')
    ];

    $result1 = ia_update('cfdi_matcher_links', $supersede_data);

    if (!$result1) {
        error_log("Failed to supersede link $old_matcher_link_id");
        return false;
    }

    // 2. Create new active link
    return create_cfdi_link($iteration_id, $iteration_number, $invoice, $new_deposit, $match_result);
}

/**
 * Delete/deactivate link
 *
 * Marks link as deleted when match is no longer valid
 *
 * @param string $matcher_link_id - Link to delete
 * @param int $iteration_id - Iteration that removed the match
 * @return bool - Success status
 */
function delete_cfdi_link($matcher_link_id, $iteration_id) {
    $delete_data = [
        'cfdi_matcher_link_id' => $matcher_link_id,
        'link_status' => 'deleted',
        'deleted_by_iteration' => $iteration_id,
        'updated_at' => date('Y-m-d H:i:s')
    ];

    return ia_update('cfdi_matcher_links', $delete_data);
}

/**
 * Get link management statistics for iteration
 *
 * Returns counts of created, updated, superseded, deleted links
 *
 * @param int $iteration_id - Iteration to analyze
 * @return array - ['created' => int, 'updated' => int, 'superseded' => int, 'deleted' => int]
 */
function get_iteration_link_stats($iteration_id) {
    $stats = [
        'created' => 0,
        'updated' => 0,
        'superseded' => 0,
        'deleted' => 0
    ];

    // Created (new links)
    $sql = "SELECT COUNT(*) FROM cfdi_matcher_links
            WHERE created_by_iteration = $iteration_id
              AND link_status = 'active'";
    $stats['created'] = ia_singleton($sql) ?? 0;

    // Updated (confirmed existing)
    $sql = "SELECT COUNT(*) FROM cfdi_matcher_links
            WHERE last_confirmed_iteration = $iteration_id
              AND created_by_iteration != $iteration_id
              AND link_status = 'active'";
    $stats['updated'] = ia_singleton($sql) ?? 0;

    // Superseded (replaced)
    $sql = "SELECT COUNT(*) FROM cfdi_matcher_links
            WHERE superseded_by_iteration = $iteration_id";
    $stats['superseded'] = ia_singleton($sql) ?? 0;

    // Deleted (removed)
    $sql = "SELECT COUNT(*) FROM cfdi_matcher_links
            WHERE deleted_by_iteration = $iteration_id";
    $stats['deleted'] = ia_singleton($sql) ?? 0;

    return $stats;
}

?>
