<?php
/**
 * Iteration 7 - Aggressive Pattern Matching
 * New strategies:
 * - Letter unit mapping (Ibsen - 3 → Ibsen A)
 * - Similarity threshold lowered to 60%
 * - Campeche → Casa name mapping
 * - VS146GH3 pattern (no space)
 * - Partial name matching
 */

require_once("../../inc/config.php");

echo "<!DOCTYPE html><html><head><title>Iteration 7</title></head><body>";
echo "<h1>Iteration 7 - Aggressive Pattern Matching</h1>";
echo "<p><strong>New Patterns Added:</strong></p>";
echo "<ul>";
echo "<li>✓ Letter unit mapping (Ibsen - 3 → Ibsen A/B)</li>";
echo "<li>✓ Similarity threshold: 60% (was 75%)</li>";
echo "<li>✓ Campeche → Casa name mapping</li>";
echo "<li>✓ VS###GH# pattern (no space after building)</li>";
echo "<li>✓ Partial property name matching</li>";
echo "</ul>";
echo "<hr>";

// Normalize with accent removal
function normalize_iter7($text) {
    $accents = ['Á'=>'A', 'É'=>'E', 'Í'=>'I', 'Ó'=>'O', 'Ú'=>'U',
                'á'=>'a', 'é'=>'e', 'í'=>'i', 'ó'=>'o', 'ú'=>'u',
                'Ñ'=>'N', 'ñ'=>'n', 'Ü'=>'U', 'ü'=>'u'];
    $text = strtr($text, $accents);
    $text = mb_strtolower($text, 'UTF-8');
    $text = trim(preg_replace('/\s+/', ' ', $text));
    return $text;
}

// Similarity scoring function
function similarity_score($str1, $str2) {
    $str1 = mb_strtolower($str1, 'UTF-8');
    $str2 = mb_strtolower($str2, 'UTF-8');

    // Remove accents
    $accents = ['á'=>'a', 'é'=>'e', 'í'=>'i', 'ó'=>'o', 'ú'=>'u', 'ñ'=>'n'];
    $str1 = strtr($str1, $accents);
    $str2 = strtr($str2, $accents);

    // Remove special chars
    $str1 = preg_replace('/[^a-z0-9\s]/', '', $str1);
    $str2 = preg_replace('/[^a-z0-9\s]/', '', $str2);

    similar_text($str1, $str2, $percent);
    return $percent;
}

// Extract clean name (remove codes like CoG2BrKK-B, RoS1BQSS)
function extract_clean_name($text) {
    // Remove codes like "CoG2BrKK-B", "RoS1BQSS", etc.
    $text = preg_replace('/\b[A-Z][a-z]{0,2}[A-Z0-9]{2,}[A-Z0-9\-]*\b/', '', $text);

    // Remove trailing descriptors
    $text = preg_replace('/\s*-\s*(Bcom|Vrbo|PT|BT|B)$/', '', $text);

    // Get text before | or first -
    if (strpos($text, '|') !== false) {
        $text = substr($text, 0, strpos($text, '|'));
    }

    $text = trim($text);
    return normalize_iter7($text);
}

// Aggressive matcher
function match_iter7($text, &$propiedades) {
    $clean_name = extract_clean_name($text);
    $norm = normalize_iter7($text);

    // PATTERN 7.1: Ibsen unit mapping (Ibsen - # → Ibsen A/B)
    if (preg_match('/ibsen\s*-\s*(\d+)/i', $norm)) {
        foreach ($propiedades as $prop) {
            $prop_norm = normalize_iter7($prop['nombre_propiedad']);
            // Match any Ibsen property (A, B, etc.)
            if (strpos($prop_norm, 'ibsen') !== false) {
                return ['match' => true, 'prop' => $prop, 'confidence' => 70, 'pattern' => 'ibsen_letter_unit'];
            }
        }
    }

    // PATTERN 7.2: Campeche → Casa mapping
    if (preg_match('/campeche\s+(\w+)/i', $norm, $matches)) {
        $descriptor = $matches[1]; // Frida, Ana, etc.

        foreach ($propiedades as $prop) {
            $prop_norm = normalize_iter7($prop['nombre_propiedad']);
            // Check if "Casa {descriptor}" exists
            if (strpos($prop_norm, 'casa') !== false && strpos($prop_norm, $descriptor) !== false) {
                return ['match' => true, 'prop' => $prop, 'confidence' => 70, 'pattern' => 'campeche_casa_map'];
            }
        }
    }

    // PATTERN 7.3: VS###GH# (no space after building number)
    // Matches: VS146GH3 → Vicente Suárez 146 | GH 3
    if (preg_match('/vs(\d{2,3})gh(\d+)/i', $norm, $matches)) {
        $building_num = $matches[1];
        $gh_num = $matches[2];

        foreach ($propiedades as $prop) {
            $prop_norm = normalize_iter7($prop['nombre_propiedad']);
            if (strpos($prop_norm, 'vicente suarez') !== false &&
                strpos($prop_norm, $building_num) !== false &&
                strpos($prop_norm, 'gh ' . $gh_num) !== false) {
                return ['match' => true, 'prop' => $prop, 'confidence' => 90, 'pattern' => 'vs_gh_nospace'];
            }
        }
    }

    // PATTERN 7.4: Similarity-based matching (threshold 60%)
    $best_score = 0;
    $best_prop = null;

    foreach ($propiedades as $prop) {
        $score = similarity_score($clean_name, $prop['nombre_propiedad']);
        if ($score > $best_score) {
            $best_score = $score;
            $best_prop = $prop;
        }
    }

    // If similarity >= 60%, consider it a match
    if ($best_score >= 60) {
        return ['match' => true, 'prop' => $best_prop, 'confidence' => 65, 'pattern' => 'similarity_60'];
    }

    // PATTERN 7.5: Partial property name matching
    // If clean_name appears in property name or vice versa
    if (strlen($clean_name) >= 8) {
        foreach ($propiedades as $prop) {
            $prop_norm = normalize_iter7($prop['nombre_propiedad']);

            // Check if property name contains clean_name
            if (strpos($prop_norm, $clean_name) !== false) {
                return ['match' => true, 'prop' => $prop, 'confidence' => 65, 'pattern' => 'partial_name_contains'];
            }

            // Check if clean_name contains property name (for short property names)
            $prop_main = trim(explode('|', $prop_norm)[0]); // Get main part before |
            if (strlen($prop_main) >= 6 && strpos($clean_name, $prop_main) !== false) {
                return ['match' => true, 'prop' => $prop, 'confidence' => 65, 'pattern' => 'partial_name_reverse'];
            }
        }
    }

    // PATTERN 7.6: Number-in-name matching
    // Extract all numbers from anuncio
    preg_match_all('/\d+/', $norm, $anuncio_numbers);
    if (!empty($anuncio_numbers[0])) {
        foreach ($propiedades as $prop) {
            $prop_norm = normalize_iter7($prop['nombre_propiedad']);
            preg_match_all('/\d+/', $prop_norm, $prop_numbers);

            // If 2 or more numbers match, might be the property
            $common_numbers = array_intersect($anuncio_numbers[0], $prop_numbers[0]);
            if (count($common_numbers) >= 2) {
                // Also check for at least one word in common
                $anuncio_words = explode(' ', preg_replace('/[^a-z\s]/', '', $norm));
                $prop_words = explode(' ', preg_replace('/[^a-z\s]/', '', $prop_norm));
                $common_words = array_intersect($anuncio_words, $prop_words);

                if (count($common_words) >= 1) {
                    return ['match' => true, 'prop' => $prop, 'confidence' => 60, 'pattern' => 'number_word_match'];
                }
            }
        }
    }

    return ['match' => false];
}

// Load propiedades
$sql_props = "SELECT * FROM propiedad ORDER BY nombre_propiedad";
$propiedades = ia_sqlArrayIndx($sql_props);

echo "<p>✓ Loaded " . count($propiedades) . " properties</p>";

// Process only unmatched
$sql_hostify = "SELECT *
                FROM hostify_reserva
                WHERE check_in >= '2025-01-01'
                  AND propiedad_id IS NULL";
$unmatched_hf = ia_sqlArrayIndx($sql_hostify);

echo "<p>✓ Found " . count($unmatched_hf) . " unmatched Hostify reservations</p>";
echo "<p><strong>Applying aggressive pattern matching...</strong></p>";

$newly_matched = 0;
$pattern_counts = [];

echo "<h3>Matches Found:</h3>";
echo "<table border='1' cellpadding='5' style='border-collapse: collapse; font-size: 0.9em;'>";
echo "<tr><th>Anuncio</th><th>Pattern</th><th>Matched Property</th><th>Confidence</th></tr>";

foreach ($unmatched_hf as $reserva) {
    $result = match_iter7($reserva['anuncio'], $propiedades);

    if ($result['match']) {
        $propiedad_id = $result['prop']['propiedad_id'];
        $confidence = $result['confidence'];
        $pattern = $result['pattern'];
        $reserva_id = $reserva['hostify_reserva_id'];

        $sql_update = "UPDATE hostify_reserva
                      SET propiedad_id = " . strit($propiedad_id) . ",
                          match_tier = 3,
                          match_confidence = $confidence,
                          match_pattern = " . strit($pattern) . ",
                          match_timestamp = NOW()
                      WHERE hostify_reserva_id = " . strit($reserva_id);
        ia_query($sql_update);

        $newly_matched++;
        $pattern_counts[$pattern] = ($pattern_counts[$pattern] ?? 0) + 1;

        echo "<tr>";
        echo "<td>{$reserva['anuncio']}</td>";
        echo "<td style='color: blue;'><strong>$pattern</strong></td>";
        echo "<td>{$result['prop']['nombre_propiedad']}</td>";
        echo "<td>$confidence%</td>";
        echo "</tr>";
    }
}

echo "</table>";

echo "<hr>";
echo "<h2>📊 Iteration 7 Results:</h2>";
echo "<ul>";
echo "<li style='color: green;'><strong>Newly Matched:</strong> $newly_matched</li>";
echo "</ul>";

if (!empty($pattern_counts)) {
    echo "<h3>Pattern Usage:</h3>";
    echo "<table border='1' cellpadding='8'>";
    echo "<tr><th>Pattern</th><th>Matches</th></tr>";
    foreach ($pattern_counts as $pattern => $count) {
        echo "<tr><td><strong>$pattern</strong></td><td style='text-align: center;'>$count</td></tr>";
    }
    echo "</table>";
}

// Get final stats
$sql_hf = "SELECT COUNT(*) as total,
           SUM(CASE WHEN propiedad_id IS NOT NULL THEN 1 ELSE 0 END) as matched,
           SUM(CASE WHEN match_confidence >= 80 THEN 1 ELSE 0 END) as high_conf
           FROM hostify_reserva
           WHERE check_in >= '2025-01-01'";
$hf_stats = ia_sqlArrayIndx($sql_hf)[0];

$sql_cb = "SELECT COUNT(*) as total,
           SUM(CASE WHEN propiedad_id IS NOT NULL THEN 1 ELSE 0 END) as matched,
           SUM(CASE WHEN match_confidence >= 80 THEN 1 ELSE 0 END) as high_conf
           FROM cloudbeds_reserva
           WHERE check_in_date >= '2025-01-01'";
$cb_stats = ia_sqlArrayIndx($sql_cb)[0];

$total = $cb_stats['total'] + $hf_stats['total'];
$matched = $cb_stats['matched'] + $hf_stats['matched'];
$unmatched = $total - $matched;
$high_conf = $cb_stats['high_conf'] + $hf_stats['high_conf'];
$match_rate = $total > 0 ? ($matched / $total) * 100 : 0;

echo "<hr>";
echo "<h2>🎯 Final System State:</h2>";
echo "<table border='1' cellpadding='10'>";
echo "<tr><th>Metric</th><th>Value</th></tr>";
echo "<tr><td>Total Reservations</td><td>$total</td></tr>";
echo "<tr><td>Matched</td><td>$matched</td></tr>";
echo "<tr><td>Unmatched</td><td>$unmatched</td></tr>";
echo "<tr><td><strong>Match Rate</strong></td><td><strong>" . number_format($match_rate, 2) . "%</strong></td></tr>";
echo "<tr><td><strong>High Confidence (≥80%)</strong></td><td><strong>$high_conf</strong></td></tr>";
if ($newly_matched > 0) {
    echo "<tr><td style='color: green;'><strong>Gain vs Iteration 6</strong></td><td style='color: green;'><strong>+$newly_matched</strong></td></tr>";
}
echo "</table>";

echo "<h2>✅ Iteration 7 Complete!</h2>";

if ($newly_matched > 0) {
    echo "<p><strong>Aggressive Matching Impact:</strong> Successfully matched $newly_matched reservations with new patterns!</p>";
} else {
    echo "<p><strong>Analysis:</strong> Aggressive matching didn't find additional matches. Remaining unmatched are truly invalid/missing properties.</p>";
}

echo "<p><strong>Cumulative Gain:</strong> From 420 (baseline) to $high_conf (+\" . ($high_conf - 420) . \")</p>";
echo "<p><strong>Final Match Rate:</strong> \" . number_format($match_rate, 2) . \"% of $total reservations</p>";

// Show breakdown of remaining unmatched
echo "<hr>";
echo "<h2>🔍 Remaining Unmatched Breakdown:</h2>";

$sql_remaining = "SELECT anuncio, COUNT(*) as count
                  FROM hostify_reserva
                  WHERE check_in >= '2025-01-01'
                    AND propiedad_id IS NULL
                  GROUP BY anuncio
                  ORDER BY count DESC
                  LIMIT 15";
$remaining = ia_sqlArrayIndx($sql_remaining);

if (count($remaining) > 0) {
    echo "<p><strong>Top unmatched (out of $unmatched total):</strong></p>";
    echo "<table border='1' cellpadding='8'>";
    echo "<tr><th>Anuncio</th><th>Count</th><th>Status</th></tr>";

    foreach ($remaining as $row) {
        echo "<tr>";
        echo "<td>{$row['anuncio']}</td>";
        echo "<td>{$row['count']}</td>";
        echo "<td>";

        if (strpos($row['anuncio'], 'P.E.21') !== false) {
            echo "<span style='color: red;'>❌ Property does not exist</span>";
        } elseif (strpos($row['anuncio'], '1111 Reservas') !== false) {
            echo "<span style='color: gray;'>Dummy/test data</span>";
        } elseif (strpos($row['anuncio'], 'Ver 4') !== false || strpos($row['anuncio'], 'Ver PH 7') !== false) {
            echo "<span style='color: orange;'>⚠️ Missing from database</span>";
        } else {
            echo "<span style='color: orange;'>Invalid/missing property</span>";
        }

        echo "</td>";
        echo "</tr>";
    }

    echo "</table>";
}

echo "</body></html>";
?>
