<?php
/**
 * Iteration 7 - Deep Dive Analysis of Remaining 82
 * Let's find patterns we missed!
 */

require_once("../../inc/config.php");

echo "<!DOCTYPE html><html><head><title>Iteration 7 - Deep Analysis</title></head><body>";
echo "<h1>Iteration 7 - Deep Pattern Discovery</h1>";
echo "<p><strong>Goal:</strong> Find hidden patterns in the final 82 unmatched</p>";
echo "<hr>";

// Load all properties for reference
$sql_props = "SELECT * FROM propiedad ORDER BY nombre_propiedad";
$propiedades = ia_sqlArrayIndx($sql_props);

echo "<p>✓ Loaded " . count($propiedades) . " properties</p>";

// Get all unmatched
$sql_unmatched = "SELECT DISTINCT anuncio, COUNT(*) as count
                  FROM hostify_reserva
                  WHERE check_in >= '2025-01-01'
                    AND propiedad_id IS NULL
                  GROUP BY anuncio
                  ORDER BY count DESC";
$unmatched = ia_sqlArrayIndx($sql_unmatched);

echo "<p>✓ Found " . count($unmatched) . " unique unmatched anuncios (82 total reservations)</p>";

// ANALYSIS 1: Check what properties might match "P.E." abbreviation
echo "<h2>🔍 Analysis 1: P.E. Abbreviation Investigation</h2>";
echo "<p><strong>Hypothesis:</strong> Maybe P.E. doesn't map to 'Paseo de la Reforma'?</p>";

$pe_cases = [];
foreach ($unmatched as $row) {
    if (strpos($row['anuncio'], 'P.E.') !== false || strpos($row['anuncio'], 'PE') !== false) {
        $pe_cases[] = $row;
    }
}

echo "<p>Found " . count($pe_cases) . " P.E. cases</p>";

// Extract unit numbers from P.E. cases
echo "<h3>P.E. Patterns:</h3>";
echo "<table border='1' cellpadding='5'>";
echo "<tr><th>Anuncio</th><th>Count</th><th>Extracted Info</th></tr>";

$pe_units = [];
foreach ($pe_cases as $case) {
    $anuncio = $case['anuncio'];

    // Try to extract unit number
    if (preg_match('/P\.?E\.?\s*(\d+)\s*-\s*#?(\d+)/i', $anuncio, $matches)) {
        $building = $matches[1];
        $unit = $matches[2];
        $pe_units[$building][] = $unit;

        echo "<tr>";
        echo "<td>{$anuncio}</td>";
        echo "<td>{$case['count']}</td>";
        echo "<td>Building: <strong>$building</strong>, Unit: <strong>$unit</strong></td>";
        echo "</tr>";
    }
}
echo "</table>";

// Check if there are properties with these building/unit numbers
echo "<h3>Searching for matching properties:</h3>";
echo "<ul>";
foreach ($pe_units as $building => $units) {
    echo "<li><strong>Building $building:</strong> Units " . implode(', ', $units);

    // Search for properties that might match
    $found = false;
    foreach ($propiedades as $prop) {
        $prop_name = $prop['nombre_propiedad'];

        // Check if property contains the building number
        if (strpos($prop_name, $building) !== false) {
            echo " → <span style='color: blue;'>Found property with $building: <strong>$prop_name</strong></span>";
            $found = true;
        }
    }

    if (!$found) {
        echo " → <span style='color: red;'>❌ No properties found with building $building</span>";
    }
    echo "</li>";
}
echo "</ul>";

// ANALYSIS 2: Ver patterns - check all Ver properties
echo "<hr>";
echo "<h2>🔍 Analysis 2: Veracruz Pattern Deep Dive</h2>";

$ver_cases = [];
foreach ($unmatched as $row) {
    if (preg_match('/^Ver\s+/i', $row['anuncio'])) {
        $ver_cases[] = $row;
    }
}

echo "<p>Found " . count($ver_cases) . " Ver cases</p>";

// Show all Veracruz properties
echo "<h3>All Veracruz properties in database:</h3>";
echo "<table border='1' cellpadding='5'>";
echo "<tr><th>Property Name</th><th>ID</th></tr>";
foreach ($propiedades as $prop) {
    if (stripos($prop['nombre_propiedad'], 'veracruz') !== false) {
        echo "<tr>";
        echo "<td><strong>{$prop['nombre_propiedad']}</strong></td>";
        echo "<td>{$prop['propiedad_id']}</td>";
        echo "</tr>";
    }
}
echo "</table>";

// Show unmatched Ver cases
echo "<h3>Unmatched Ver cases:</h3>";
echo "<table border='1' cellpadding='5'>";
echo "<tr><th>Anuncio</th><th>Count</th><th>Extracted Unit</th></tr>";
foreach ($ver_cases as $case) {
    $anuncio = $case['anuncio'];

    // Extract unit
    $unit = 'Unknown';
    if (preg_match('/Ver\s+(?:PH\s+)?(\d+)/i', $anuncio, $matches)) {
        $unit = $matches[1];
    }

    echo "<tr>";
    echo "<td>{$anuncio}</td>";
    echo "<td>{$case['count']}</td>";
    echo "<td><strong>Unit $unit</strong></td>";
    echo "</tr>";
}
echo "</table>";

// ANALYSIS 3: Try fuzzy matching on ALL unmatched
echo "<hr>";
echo "<h2>🔍 Analysis 3: Fuzzy Similarity Scoring</h2>";
echo "<p><strong>Strategy:</strong> Calculate similarity scores for ALL unmatched against ALL properties</p>";

function similarity_score($str1, $str2) {
    $str1 = mb_strtolower($str1, 'UTF-8');
    $str2 = mb_strtolower($str2, 'UTF-8');

    // Remove accents
    $accents = ['á'=>'a', 'é'=>'e', 'í'=>'i', 'ó'=>'o', 'ú'=>'u', 'ñ'=>'n'];
    $str1 = strtr($str1, $accents);
    $str2 = strtr($str2, $accents);

    // Remove special chars
    $str1 = preg_replace('/[^a-z0-9\s]/', '', $str1);
    $str2 = preg_replace('/[^a-z0-9\s]/', '', $str2);

    similar_text($str1, $str2, $percent);
    return $percent;
}

echo "<h3>Top similarity matches for each unmatched:</h3>";
echo "<table border='1' cellpadding='5' style='font-size: 0.9em;'>";
echo "<tr><th>Anuncio</th><th>Best Match</th><th>Similarity</th><th>Analysis</th></tr>";

$high_similarity_found = 0;
foreach (array_slice($unmatched, 0, 20) as $unmatch) {  // Top 20 to avoid timeout
    $anuncio = $unmatch['anuncio'];

    // Find best matching property
    $best_score = 0;
    $best_prop = null;

    foreach ($propiedades as $prop) {
        $score = similarity_score($anuncio, $prop['nombre_propiedad']);
        if ($score > $best_score) {
            $best_score = $score;
            $best_prop = $prop;
        }
    }

    $color = $best_score >= 60 ? 'green' : ($best_score >= 40 ? 'orange' : 'red');

    echo "<tr>";
    echo "<td><strong>{$anuncio}</strong></td>";
    echo "<td>{$best_prop['nombre_propiedad']}</td>";
    echo "<td style='color: $color;'><strong>" . round($best_score, 1) . "%</strong></td>";
    echo "<td>";

    if ($best_score >= 60) {
        echo "<span style='color: green;'>✓ HIGH - Potential match!</span>";
        $high_similarity_found++;
    } elseif ($best_score >= 40) {
        echo "<span style='color: orange;'>⚠️ MEDIUM - Check manually</span>";
    } else {
        echo "<span style='color: red;'>❌ LOW - Likely invalid</span>";
    }

    echo "</td>";
    echo "</tr>";
}
echo "</table>";

echo "<p><strong>High similarity candidates found:</strong> $high_similarity_found</p>";

// ANALYSIS 4: Extract all unique property name patterns
echo "<hr>";
echo "<h2>🔍 Analysis 4: Property Name Pattern Analysis</h2>";

// Group properties by street name
$streets = [];
foreach ($propiedades as $prop) {
    $name = $prop['nombre_propiedad'];

    // Extract street name (before first number or |)
    if (preg_match('/^([A-Za-zÁÉÍÓÚáéíóúñÑ\.\s]+?)(?:\s+\d+|\||$)/u', $name, $matches)) {
        $street = trim($matches[1]);
        $streets[$street][] = $name;
    }
}

echo "<h3>Properties by street (showing streets with multiple units):</h3>";
echo "<table border='1' cellpadding='5'>";
echo "<tr><th>Street Name</th><th>Count</th><th>Sample Properties</th></tr>";

foreach ($streets as $street => $props) {
    if (count($props) >= 3) {  // Only show streets with 3+ properties
        echo "<tr>";
        echo "<td><strong>$street</strong></td>";
        echo "<td>" . count($props) . "</td>";
        echo "<td>" . implode(', ', array_slice($props, 0, 3));
        if (count($props) > 3) echo " ... +" . (count($props) - 3) . " more";
        echo "</td>";
        echo "</tr>";
    }
}
echo "</table>";

// ANALYSIS 5: Number pattern extraction
echo "<hr>";
echo "<h2>🔍 Analysis 5: Number Pattern Extraction from Unmatched</h2>";
echo "<p>Looking for patterns like building numbers, unit numbers, floor numbers...</p>";

echo "<table border='1' cellpadding='5'>";
echo "<tr><th>Anuncio</th><th>Numbers Found</th><th>Pattern Type</th></tr>";

foreach (array_slice($unmatched, 0, 15) as $unmatch) {
    $anuncio = $unmatch['anuncio'];

    // Extract all numbers
    preg_match_all('/\d+/', $anuncio, $numbers);

    echo "<tr>";
    echo "<td><strong>$anuncio</strong></td>";
    echo "<td>" . implode(', ', $numbers[0]) . "</td>";
    echo "<td>";

    // Analyze pattern
    if (preg_match('/(\d{2,3})\s*-\s*#?(\d{2,4})/', $anuncio, $m)) {
        echo "Building-Unit pattern: {$m[1]} | {$m[2]}";
    } elseif (preg_match('/PH\s+(\d+)/', $anuncio, $m)) {
        echo "Penthouse pattern: PH {$m[1]}";
    } elseif (preg_match('/\s+(\d+)\s*\|/', $anuncio, $m)) {
        echo "Name + Unit: Unit {$m[1]}";
    } else {
        echo "Unknown pattern";
    }

    echo "</td>";
    echo "</tr>";
}
echo "</table>";

// SUMMARY
echo "<hr>";
echo "<h2>💡 Iteration 7 Insights</h2>";
echo "<ul>";
echo "<li><strong>P.E. Properties:</strong> Building 21 units (601, 602, 701, 702) - NO matching properties found in database</li>";
echo "<li><strong>Ver Properties:</strong> Units 4 and PH 7 at Veracruz - need to check if these units exist</li>";
echo "<li><strong>High Similarity Matches:</strong> Found $high_similarity_found candidates with >60% similarity</li>";
echo "<li><strong>Recommendation:</strong> Focus on similarity-based matching for remaining cases</li>";
echo "</ul>";

echo "<h2>🎯 Proposed Iteration 7 Strategy:</h2>";
echo "<ol>";
echo "<li><strong>Lower similarity threshold:</strong> Try matching at 60% instead of 75%</li>";
echo "<li><strong>Number-based fuzzy matching:</strong> Match if building/unit numbers appear in property name</li>";
echo "<li><strong>Aggressive partial matching:</strong> Match on partial street names</li>";
echo "<li><strong>Code-stripping:</strong> Remove property codes (CoG2BrKK, RoS1BQSS) before matching</li>";
echo "</ol>";

echo "</body></html>";
?>
