<?php
/**
 * Iteration 5 - Fix VS Abbreviation Bug + Add Missing Patterns
 */

require_once("../../inc/config.php");

echo "<!DOCTYPE html><html><head><title>Iteration 5</title></head><body>";
echo "<h1>Iteration 5 - Bug Fixes & Pattern Expansion</h1>";
echo "<p><strong>Fixes Applied:</strong></p>";
echo "<ul>";
echo "<li>✓ Fixed VS abbreviation: 'versalles' → 'vicente suarez' (BUG FIX!)</li>";
echo "<li>✓ Added P.E. handling (Paseo de la Reforma with periods)</li>";
echo "<li>✓ Improved Ver pattern matching (Ver 4, Ver PH 7)</li>";
echo "<li>✓ Added GH unit format (Guest House: GH1, GH2, GH3)</li>";
echo "</ul>";
echo "<hr>";

// Normalize function with accent removal
function normalize_iter5($text) {
    $accents = ['Á'=>'A', 'É'=>'E', 'Í'=>'I', 'Ó'=>'O', 'Ú'=>'U',
                'á'=>'a', 'é'=>'e', 'í'=>'i', 'ó'=>'o', 'ú'=>'u',
                'Ñ'=>'N', 'ñ'=>'n', 'Ü'=>'U', 'ü'=>'u'];
    $text = strtr($text, $accents);
    $text = mb_strtolower($text, 'UTF-8');
    $text = trim(preg_replace('/\s+/', ' ', $text));
    return $text;
}

// Pattern matcher with FIXED abbreviations
function match_iter5($text, &$propiedades) {
    // Pre-process: Convert "P.E." to "PE" before normalization
    $text_clean = str_replace('P.E.', 'PE', $text);

    $norm = strtolower(trim(preg_replace('/[^a-z0-9\s\-]/i', ' ', $text_clean)));

    // FIXED ABBREVIATION MAP - VS now correctly maps to vicente suarez!
    $abbr_map = [
        'slp' => 'san luis potosi',
        'vs' => 'vicente suarez',  // ✓ FIXED!
        'ver' => 'veracruz',
        'pe' => 'paseo de la reforma',
        'p' => 'paseo de la reforma'
    ];

    // Pattern 0: ABBR##-###
    if (preg_match('/^([a-z]{2,4})(\d{2,3})\s*[\-]\s*(\d{2,4})/i', $norm, $matches)) {
        $abbr = $matches[1];
        $building_num = $matches[2];
        $unit_num = $matches[3];

        $street = $abbr_map[strtolower($abbr)] ?? $abbr;

        foreach ($propiedades as $prop) {
            $prop_norm = normalize_iter5($prop['nombre_propiedad']);

            // Check if street and unit match
            if (strpos($prop_norm, $street) !== false &&
                (strpos($prop_norm, ' ' . $unit_num) !== false || strpos($prop_norm, '|' . $unit_num) !== false)) {
                return ['match' => true, 'prop' => $prop, 'confidence' => 90, 'pattern' => 'pattern_0_fixed'];
            }
        }
    }

    // Pattern 0.3: ABBR## - #unit or ABBR## - GH# (space-dash-space)
    if (preg_match('/^([a-z]{2,4})(\d{2,3})\s+\-\s+\#?([a-z]*\d+)/i', $norm, $matches)) {
        $abbr = $matches[1];
        $building_num = $matches[2];
        $unit_num = $matches[3];

        $street = $abbr_map[strtolower($abbr)] ?? $abbr;

        foreach ($propiedades as $prop) {
            $prop_norm = normalize_iter5($prop['nombre_propiedad']);

            // Match with flexible unit separators
            if (strpos($prop_norm, $street) !== false) {
                // Try exact unit match
                if (strpos($prop_norm, $unit_num) !== false) {
                    return ['match' => true, 'prop' => $prop, 'confidence' => 90, 'pattern' => 'pattern_0_3_fixed'];
                }

                // Try GH unit match (GH1 → GH 1)
                if (preg_match('/gh(\d)/', $unit_num, $gh_match)) {
                    if (strpos($prop_norm, 'gh ' . $gh_match[1]) !== false) {
                        return ['match' => true, 'prop' => $prop, 'confidence' => 90, 'pattern' => 'pattern_0_3_gh'];
                    }
                }
            }
        }
    }

    // Pattern 0.5: ABBR # or ABBR PH #
    if (preg_match('/^([a-z]{2,4})\s+(ph\s+)?(\d+)/i', $norm, $matches)) {
        $abbr = $matches[1];
        $is_ph = !empty($matches[2]);
        $unit_num = $matches[3];

        $street = $abbr_map[strtolower($abbr)] ?? null;

        if ($street) {
            // For Ver, guess building 26
            if (strtolower($abbr) === 'ver') {
                $street = 'veracruz 26';
            }

            foreach ($propiedades as $prop) {
                $prop_norm = normalize_iter5($prop['nombre_propiedad']);

                if (strpos($prop_norm, $street) !== false &&
                    (strpos($prop_norm, ' ' . $unit_num) !== false || strpos($prop_norm, '|' . $unit_num) !== false)) {
                    $pattern_name = $is_ph ? 'pattern_0_5_ph' : 'pattern_0_5';
                    return ['match' => true, 'prop' => $prop, 'confidence' => 85, 'pattern' => $pattern_name];
                }
            }
        }
    }

    return ['match' => false];
}

// Load propiedades
$sql_props = "SELECT * FROM propiedad ORDER BY nombre_propiedad";
$propiedades = ia_sqlArrayIndx($sql_props);

echo "<p>✓ Loaded " . count($propiedades) . " properties</p>";

// Process ALL unmatched Hostify (or low confidence)
$sql_hostify = "SELECT *
                FROM hostify_reserva
                WHERE check_in >= '2025-01-01'
                  AND (propiedad_id IS NULL OR match_confidence < 80)";
$unmatched_hf = ia_sqlArrayIndx($sql_hostify);

echo "<p>✓ Found " . count($unmatched_hf) . " Hostify reservations to process</p>";
echo "<p><strong>Re-running matcher with FIXED patterns...</strong></p>";

$newly_matched = 0;
$improved = 0;
$pattern_counts = [];

echo "<h3>Live Matching Progress:</h3>";
echo "<table border='1' cellpadding='5' style='border-collapse: collapse; font-size: 0.9em;'>";
echo "<tr><th>Anuncio</th><th>Pattern</th><th>Matched Property</th></tr>";

$shown = 0;
foreach ($unmatched_hf as $reserva) {
    $result = match_iter5($reserva['anuncio'], $propiedades);

    if ($result['match']) {
        $propiedad_id = $result['prop']['propiedad_id'];
        $confidence = $result['confidence'];
        $pattern = $result['pattern'];
        $reserva_id = $reserva['hostify_reserva_id'];

        $sql_update = "UPDATE hostify_reserva
                      SET propiedad_id = " . strit($propiedad_id) . ",
                          match_tier = 0,
                          match_confidence = $confidence,
                          match_pattern = " . strit($pattern) . ",
                          match_timestamp = NOW()
                      WHERE hostify_reserva_id = " . strit($reserva_id);
        ia_query($sql_update);

        if (empty($reserva['propiedad_id'])) {
            $newly_matched++;
        } else {
            $improved++;
        }

        // Track pattern usage
        $pattern_counts[$pattern] = ($pattern_counts[$pattern] ?? 0) + 1;

        // Show first 20 matches
        if ($shown < 20) {
            echo "<tr>";
            echo "<td>{$reserva['anuncio']}</td>";
            echo "<td style='color: blue;'><strong>$pattern</strong></td>";
            echo "<td>{$result['prop']['nombre_propiedad']}</td>";
            echo "</tr>";
            $shown++;
        }
    }
}

if ($shown >= 20 && ($newly_matched + $improved) > 20) {
    echo "<tr><td colspan='3' style='text-align: center; color: #666;'>... and " . (($newly_matched + $improved) - 20) . " more ...</td></tr>";
}

echo "</table>";

echo "<hr>";
echo "<h2>📊 Iteration 5 Results:</h2>";
echo "<ul>";
echo "<li style='color: green;'><strong>Newly Matched:</strong> $newly_matched</li>";
echo "<li style='color: blue;'><strong>Improved Confidence:</strong> $improved</li>";
echo "<li><strong>Total Gain:</strong> " . ($newly_matched + $improved) . " improvements</li>";
echo "</ul>";

echo "<h3>Pattern Usage Breakdown:</h3>";
echo "<table border='1' cellpadding='8'>";
echo "<tr><th>Pattern</th><th>Matches</th></tr>";
foreach ($pattern_counts as $pattern => $count) {
    echo "<tr><td><strong>$pattern</strong></td><td style='text-align: center;'>$count</td></tr>";
}
echo "</table>";

// Now get full stats
$sql_hf = "SELECT COUNT(*) as total,
           SUM(CASE WHEN propiedad_id IS NOT NULL THEN 1 ELSE 0 END) as matched,
           SUM(CASE WHEN match_confidence >= 80 THEN 1 ELSE 0 END) as high_conf
           FROM hostify_reserva
           WHERE check_in >= '2025-01-01'";
$hf_stats = ia_sqlArrayIndx($sql_hf)[0];

$sql_cb = "SELECT COUNT(*) as total,
           SUM(CASE WHEN propiedad_id IS NOT NULL THEN 1 ELSE 0 END) as matched,
           SUM(CASE WHEN match_confidence >= 80 THEN 1 ELSE 0 END) as high_conf
           FROM cloudbeds_reserva
           WHERE check_in_date >= '2025-01-01'";
$cb_stats = ia_sqlArrayIndx($sql_cb)[0];

$total = $cb_stats['total'] + $hf_stats['total'];
$matched = $cb_stats['matched'] + $hf_stats['matched'];
$high_conf = $cb_stats['high_conf'] + $hf_stats['high_conf'];
$match_rate = $total > 0 ? ($matched / $total) * 100 : 0;

echo "<hr>";
echo "<h2>🎯 Final System State:</h2>";
echo "<table border='1' cellpadding='10'>";
echo "<tr><th>Metric</th><th>Value</th></tr>";
echo "<tr><td>Total Reservations</td><td>$total</td></tr>";
echo "<tr><td>Matched</td><td>$matched</td></tr>";
echo "<tr><td><strong>Match Rate</strong></td><td><strong>" . number_format($match_rate, 2) . "%</strong></td></tr>";
echo "<tr><td><strong>High Confidence (≥80%)</strong></td><td><strong>$high_conf</strong></td></tr>";
echo "<tr><td style='color: green;'><strong>Gain vs Iteration 4</strong></td><td style='color: green;'><strong>+" . ($high_conf - 529) . "</strong></td></tr>";
echo "</table>";

echo "<h2>✅ Iteration 5 Complete!</h2>";
echo "<p><strong>Bug Fix Impact:</strong> The VS abbreviation fix alone should unlock ~50+ matches!</p>";
echo "<p><strong>Cumulative Gain:</strong> From 420 (baseline) to $high_conf (+" . ($high_conf - 420) . ")</p>";

echo "</body></html>";
?>
