<?php
/**
 * Fill Postal Code Data in propiedad Table - Web UI
 *
 * Purpose: Extract colonia names from propiedad.direccion and match them
 *          against the codigo_postal government catalog to fill postal code fields
 *
 * Author: Claude Code
 * Date: 2026-01-03
 *
 * Usage:
 *   - Preview: ?action=preview (default)
 *   - Apply high confidence: ?action=apply_high
 *   - Apply all: ?action=apply_all
 *   - Export CSV: ?action=export_csv
 */

require_once("../../inc/config.php");

// Configuration
$HIGH_CONFIDENCE_THRESHOLD = 90;

// Action handler
$action = $_GET['action'] ?? 'preview';
$debug = isset($_GET['debug']);

?>
<!DOCTYPE html>
<html>
<head>
    <title>Fill Postal Codes - Propiedades</title>
    <style>
        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            margin: 20px;
            background: #f5f5f5;
        }
        .container {
            max-width: 1600px;
            margin: 0 auto;
            background: white;
            padding: 30px;
            border-radius: 8px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }
        h1 {
            color: #2c3e50;
            border-bottom: 3px solid #27ae60;
            padding-bottom: 10px;
        }

        .stats {
            background: #ecf0f1;
            padding: 20px;
            border-radius: 5px;
            margin: 20px 0;
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
            gap: 15px;
        }
        .stat-box {
            background: white;
            padding: 15px;
            border-radius: 5px;
            text-align: center;
            border-left: 4px solid #27ae60;
        }
        .stat-box h3 { margin: 0 0 5px 0; font-size: 2em; color: #27ae60; }
        .stat-box p { margin: 0; color: #7f8c8d; font-size: 0.9em; }
        .stat-box.exact { border-color: #28a745; }
        .stat-box.exact h3 { color: #28a745; }
        .stat-box.fuzzy { border-color: #17a2b8; }
        .stat-box.fuzzy h3 { color: #17a2b8; }
        .stat-box.manual { border-color: #ffc107; }
        .stat-box.manual h3 { color: #ffc107; }
        .stat-box.notfound { border-color: #dc3545; }
        .stat-box.notfound h3 { color: #dc3545; }

        table {
            width: 100%;
            border-collapse: collapse;
            margin: 20px 0;
            font-size: 0.85em;
        }
        th {
            background: #34495e;
            color: white;
            padding: 12px;
            text-align: left;
            position: sticky;
            top: 0;
        }
        td {
            padding: 10px;
            border-bottom: 1px solid #ecf0f1;
        }
        tr:hover { background: #f8f9fa; }

        .match-exact { background-color: #d4edda; border-left: 4px solid #28a745; }
        .match-fuzzy { background-color: #d1ecf1; border-left: 4px solid #17a2b8; }
        .match-manual { background-color: #fff3cd; border-left: 4px solid #ffc107; }
        .match-direccion_fallback { background-color: #ffe5cc; border-left: 4px solid #fd7e14; }
        .match-ambiguous { background-color: #f8d7da; border-left: 4px solid #dc3545; }
        .match-none { background-color: #f8f9fa; border-left: 4px solid #6c757d; }

        .badge {
            display: inline-block;
            padding: 4px 10px;
            border-radius: 12px;
            font-size: 0.85em;
            font-weight: bold;
            color: white;
        }
        .badge-exact { background: #28a745; }
        .badge-fuzzy { background: #17a2b8; }
        .badge-manual { background: #ffc107; color: #333; }
        .badge-direccion_fallback { background: #fd7e14; color: white; }
        .badge-ambiguous { background: #dc3545; }
        .badge-none { background: #6c757d; }

        .actions {
            margin: 30px 0;
            padding: 20px;
            background: #e8f8f0;
            border-radius: 5px;
        }
        .btn {
            display: inline-block;
            padding: 12px 24px;
            margin: 5px;
            border: none;
            border-radius: 5px;
            cursor: pointer;
            font-size: 1em;
            text-decoration: none;
            transition: all 0.3s;
        }
        .btn-primary { background: #3498db; color: white; }
        .btn-primary:hover { background: #2980b9; }
        .btn-success { background: #28a745; color: white; }
        .btn-success:hover { background: #218838; }
        .btn-warning { background: #ffc107; color: #333; }
        .btn-warning:hover { background: #e0a800; }

        .alert {
            padding: 15px;
            margin: 20px 0;
            border-radius: 5px;
            border-left: 4px solid;
        }
        .alert-success { background: #d4edda; border-color: #28a745; color: #155724; }
        .alert-info { background: #d1ecf1; border-color: #17a2b8; color: #0c5460; }
        .alert-warning { background: #fff3cd; border-color: #ffc107; color: #856404; }

        code {
            background: #f4f4f4;
            padding: 2px 6px;
            border-radius: 3px;
            font-family: 'Courier New', monospace;
        }

        .detail {
            font-size: 0.8em;
            color: #7f8c8d;
        }
    </style>
</head>
<body>
<div class="container">
    <h1>📮 Fill Postal Codes - Propiedades</h1>
    <p style="color: #7f8c8d; font-style: italic;">4-Tier Matching: Manual Overrides → Exact Match → Fuzzy Match → Direccion Fallback</p>

<?php

// Safe escape function
function esc($value) {
    return htmlspecialchars($value ?? '', ENT_QUOTES, 'UTF-8');
}

// ============================================================================
// MANUAL OVERRIDE TABLE
// ============================================================================

function getManualOverrides() {
    return [
        'CUAUHTEMOC' => ['06000', 'CUAUHTEMOC', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'CUAUTÉMOC' => ['06000', 'CUAUHTEMOC', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'POLANCO' => ['11510', 'POLANCO I SECCION', '016', 'MIGUEL HIDALGO', 'DIF', 'CIUDAD DE MEXICO'],
        'HIPODROMO CONDESA' => ['06170', 'HIPODROMO CONDESA', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'HIPÓDROMO CONDESA' => ['06170', 'HIPODROMO CONDESA', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'STA MARIA LA RIBERA' => ['06400', 'SANTA MARIA LA RIBERA', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'SANTA MARIA LA RIBERA' => ['06400', 'SANTA MARIA LA RIBERA', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'CHAPULTEPEC MORALES' => ['11520', 'GRANADA', '016', 'MIGUEL HIDALGO', 'DIF', 'CIUDAD DE MEXICO'],
    ];
}

// ============================================================================
// MATCHING FUNCTIONS
// ============================================================================

function extractColoniaFromDireccion($direccion) {
    if (empty($direccion) || strpos($direccion, ',') === false) {
        return null;
    }
    $parts = explode(',', $direccion);
    $colonia = trim(end($parts));
    return empty($colonia) ? null : $colonia;
}

function normalizeColoniaName($colonia) {
    if (empty($colonia)) return '';
    $normalized = mb_strtoupper(trim($colonia), 'UTF-8');
    $normalized = str_replace(
        ['Á', 'É', 'Í', 'Ó', 'Ú', 'Ñ', 'Ü'],
        ['A', 'E', 'I', 'O', 'U', 'N', 'U'],
        $normalized
    );
    return $normalized;
}

function findManualOverride($colonia) {
    $overrides = getManualOverrides();
    $normalized = normalizeColoniaName($colonia);

    if (isset($overrides[$normalized])) {
        $data = $overrides[$normalized];
        return [
            'match_type' => 'manual',
            'confidence' => 100,
            'data' => [
                'codigo_postal' => $data[0],
                'colonia' => $data[1],
                'municipio' => $data[2],
                'municipio_descripcion' => $data[3],
                'estado' => $data[4],
                'estado_descripcion' => $data[5]
            ]
        ];
    }
    return null;
}

function findPostalCodeExact($colonia) {
    $normalized = normalizeColoniaName($colonia);

    $sql = "SELECT
                codigo_postal, colonia, municipio, municipio_descripcion, estado, estado_descripcion
            FROM codigo_postal
            WHERE UPPER(TRIM(colonia)) = " . strit($normalized) . "
              AND municipio_descripcion IN ('CUAUHTEMOC', 'MIGUEL HIDALGO', 'BENITO JUAREZ')
            ORDER BY codigo_postal
            LIMIT 1";

    $result = ia_sqlArrayIndx($sql);

    if (!empty($result)) {
        return [
            'match_type' => 'exact',
            'confidence' => 100,
            'data' => $result[0]
        ];
    }
    return null;
}

function findPostalCodeFuzzy($colonia) {
    $normalized = normalizeColoniaName($colonia);

    $sql = "SELECT
                codigo_postal, colonia, municipio, municipio_descripcion, estado, estado_descripcion
            FROM codigo_postal
            WHERE UPPER(colonia) LIKE CONCAT('%', " . strit($normalized) . ", '%')
              AND municipio_descripcion IN ('CUAUHTEMOC', 'MIGUEL HIDALGO', 'BENITO JUAREZ')
            ORDER BY LENGTH(colonia) ASC, codigo_postal ASC
            LIMIT 5";

    $results = ia_sqlArrayIndx($sql);

    if (empty($results)) return null;

    $unique_cps = array_unique(array_column($results, 'codigo_postal'));

    if (count($unique_cps) > 1) {
        return [
            'match_type' => 'ambiguous',
            'confidence' => 50,
            'data' => $results[0],
            'alternatives' => $results
        ];
    }

    return [
        'match_type' => 'fuzzy',
        'confidence' => 90,
        'data' => $results[0]
    ];
}

function findPostalCodeFromFullDireccion($direccion) {
    // Tier 4: Fallback using full direccion text when colonia extraction fails
    if (empty($direccion)) return null;

    $normalized = normalizeColoniaName($direccion);

    // Strategy 1: Try first part (street name) if comma exists
    if (strpos($direccion, ',') !== false) {
        $parts = explode(',', $direccion);
        $street = trim($parts[0]);

        // Extract potential street name (before numbers)
        if (preg_match('/^([a-záéíóúñü\s]+)/i', $street, $matches)) {
            $street_name = trim($matches[1]);
            $street_normalized = normalizeColoniaName($street_name);

            if (strlen($street_normalized) >= 5) {
                $sql = "SELECT
                            codigo_postal, colonia, municipio, municipio_descripcion, estado, estado_descripcion
                        FROM codigo_postal
                        WHERE UPPER(colonia) LIKE CONCAT('%', " . strit($street_normalized) . ", '%')
                          AND municipio_descripcion IN ('CUAUHTEMOC', 'MIGUEL HIDALGO', 'BENITO JUAREZ')
                        ORDER BY LENGTH(colonia) ASC, codigo_postal ASC
                        LIMIT 3";

                $results = ia_sqlArrayIndx($sql);

                if (!empty($results)) {
                    return [
                        'match_type' => 'direccion_fallback',
                        'confidence' => 75,
                        'data' => $results[0],
                        'note' => 'Matched using street name from direccion'
                    ];
                }
            }
        }
    }

    // Strategy 2: Search using full direccion as last resort
    $sql = "SELECT
                codigo_postal, colonia, municipio, municipio_descripcion, estado, estado_descripcion
            FROM codigo_postal
            WHERE UPPER(colonia) LIKE CONCAT('%', " . strit($normalized) . ", '%')
              AND municipio_descripcion IN ('CUAUHTEMOC', 'MIGUEL HIDALGO', 'BENITO JUAREZ')
            ORDER BY LENGTH(colonia) ASC, codigo_postal ASC
            LIMIT 3";

    $results = ia_sqlArrayIndx($sql);

    if (!empty($results)) {
        return [
            'match_type' => 'direccion_fallback',
            'confidence' => 70,
            'data' => $results[0],
            'note' => 'Matched using full direccion'
        ];
    }

    return null;
}

function findPostalCodeData($colonia, $full_direccion = null) {
    if (empty($colonia) && empty($full_direccion)) return null;

    // Tier 1: Manual overrides (100% confidence)
    if (!empty($colonia)) {
        $manual = findManualOverride($colonia);
        if ($manual) return $manual;
    }

    // Tier 2: Exact match on colonia (100% confidence)
    if (!empty($colonia)) {
        $exact = findPostalCodeExact($colonia);
        if ($exact) return $exact;
    }

    // Tier 3: Fuzzy match on colonia (90% confidence)
    if (!empty($colonia)) {
        $fuzzy = findPostalCodeFuzzy($colonia);
        if ($fuzzy) return $fuzzy;
    }

    // Tier 4: Fallback using full direccion (70-75% confidence)
    if (!empty($full_direccion)) {
        $direccion_match = findPostalCodeFromFullDireccion($full_direccion);
        if ($direccion_match) return $direccion_match;
    }

    return null;
}

// ============================================================================
// LOAD AND PROCESS DATA
// ============================================================================

$propiedades = ia_sqlArrayIndx("SELECT * FROM propiedad ORDER BY nombre_propiedad");

$matches = [];
$stats = [
    'total' => count($propiedades),
    'exact_match' => 0,
    'fuzzy_match' => 0,
    'manual_match' => 0,
    'manual_override' => 0,
    'direccion_fallback' => 0,
    'ambiguous' => 0,
    'not_found' => 0,
    'skipped_no_direccion' => 0,
    'high_confidence' => 0,
];

foreach ($propiedades as $prop) {
    if (empty($prop['direccion'])) {
        $stats['skipped_no_direccion']++;
        $matches[] = [
            'propiedad' => $prop,
            'match' => null,
            'colonia' => null,
            'reason' => 'No direccion'
        ];
        continue;
    }

    $colonia = extractColoniaFromDireccion($prop['direccion']);

    // Try matching with colonia (and fallback to full direccion if no colonia)
    $match = findPostalCodeData($colonia, $prop['direccion']);

    if ($match) {
        $stats[$match['match_type'] . '_match']++;
        if ($match['confidence'] >= $HIGH_CONFIDENCE_THRESHOLD) {
            $stats['high_confidence']++;
        }
    } else {
        $stats['not_found']++;
    }

    $matches[] = [
        'propiedad' => $prop,
        'match' => $match,
        'colonia' => $colonia,
        'reason' => null
    ];
}

// ============================================================================
// ACTION HANDLERS
// ============================================================================

if ($action === 'apply_high' || $action === 'apply_all') {
    $threshold = ($action === 'apply_high') ? $HIGH_CONFIDENCE_THRESHOLD : 0;
    $updates = 0;

    echo "<div class='alert alert-warning'>";
    echo "<strong>Applying updates (confidence ≥ {$threshold}%)...</strong>";
    echo "</div>";

    foreach ($matches as $match_data) {
        $prop = $match_data['propiedad'];
        $match = $match_data['match'];

        if ($match && $match['confidence'] >= $threshold) {
            $data = $match['data'];
            $sql = "UPDATE propiedad SET
                        codigo_postal = '{$data['codigo_postal']}',
                        colonia = '{$data['colonia']}',
                        estado = '{$data['estado']}',
                        estado_descripcion = '{$data['estado_descripcion']}',
                        municipio = '{$data['municipio']}',
                        municipio_descripcion = '{$data['municipio_descripcion']}'
                    WHERE propiedad_id = '{$prop['propiedad_id']}'";
            ia_query($sql);
            $updates++;
        }
    }

    echo "<div class='alert alert-success'>";
    echo "<strong>✓ Success!</strong><br>";
    echo "Updated <strong>{$updates}</strong> properties with postal code data";
    echo "</div>";

    echo "<a href='fill_postal_codes.php' class='btn btn-primary'>← Back to Preview</a>";
    echo "</div></body></html>";
    exit;
}

if ($action === 'export_csv') {
    header('Content-Type: text/csv; charset=utf-8');
    header('Content-Disposition: attachment; filename=postal_codes_fill_' . date('Y-m-d_His') . '.csv');

    $output = fopen('php://output', 'w');
    fputcsv($output, [
        'propiedad_id', 'nombre_propiedad', 'direccion', 'colonia_extracted',
        'match_type', 'confidence', 'codigo_postal', 'colonia_oficial',
        'municipio', 'estado'
    ]);

    foreach ($matches as $match_data) {
        $prop = $match_data['propiedad'];
        $match = $match_data['match'];

        if ($match) {
            fputcsv($output, [
                $prop['propiedad_id'],
                $prop['nombre_propiedad'],
                $prop['direccion'],
                $match_data['colonia'],
                $match['match_type'],
                $match['confidence'],
                $match['data']['codigo_postal'],
                $match['data']['colonia'],
                $match['data']['municipio_descripcion'],
                $match['data']['estado_descripcion']
            ]);
        }
    }

    fclose($output);
    exit;
}

// ============================================================================
// DISPLAY RESULTS
// ============================================================================

echo "<div class='alert alert-info'>";
echo "<strong>Data Loaded:</strong> {$stats['total']} properties";
echo "</div>";

echo "<h2>Statistics</h2>";
echo "<div class='stats'>";
echo "<div class='stat-box'><h3>{$stats['total']}</h3><p>Total Properties</p></div>";
echo "<div class='stat-box exact'><h3>{$stats['exact_match']}</h3><p>Exact Matches</p></div>";
echo "<div class='stat-box fuzzy'><h3>{$stats['fuzzy_match']}</h3><p>Fuzzy Matches</p></div>";
echo "<div class='stat-box manual'><h3>{$stats['manual_override']}</h3><p>Manual Overrides</p></div>";
echo "<div class='stat-box' style='border-color: #fd7e14;'><h3 style='color: #fd7e14;'>{$stats['direccion_fallback']}</h3><p>Direccion Fallback (Tier 4)</p></div>";
echo "<div class='stat-box'><h3>{$stats['ambiguous']}</h3><p>Ambiguous (needs review)</p></div>";
echo "<div class='stat-box notfound'><h3>{$stats['not_found']}</h3><p>Not Found</p></div>";
echo "<div class='stat-box'><h3>{$stats['skipped_no_direccion']}</h3><p>Skipped (no address)</p></div>";
echo "<div class='stat-box'><h3>{$stats['high_confidence']}</h3><p>High Confidence (≥90%)</p></div>";
echo "</div>";

echo "<div class='actions'>";
echo "<h3>Actions</h3>";
echo "<a href='?action=apply_high' class='btn btn-success' onclick='return confirm(\"Apply {$stats['high_confidence']} high-confidence matches?\")'>✓ Apply High Confidence (≥90%)</a>";
echo "<a href='?action=apply_all' class='btn btn-warning' onclick='return confirm(\"Apply ALL matches including low confidence?\")'>⚠️ Apply All Matches</a>";
echo "<a href='?action=export_csv' class='btn btn-primary'>📥 Export CSV</a>";
echo "</div>";

echo "<h2>Match Results (showing first 100)</h2>";
echo "<table>";
echo "<thead><tr>";
echo "<th>Propiedad</th>";
echo "<th>Dirección</th>";
echo "<th>Colonia Extraída</th>";
echo "<th>→</th>";
echo "<th>Código Postal</th>";
echo "<th>Colonia Oficial</th>";
echo "<th>Municipio</th>";
echo "<th>Match Type</th>";
echo "<th>Conf</th>";
echo "</tr></thead>";
echo "<tbody>";

$display_count = 0;
foreach ($matches as $match_data) {
    if ($display_count >= 100) break;
    $display_count++;

    $prop = $match_data['propiedad'];
    $match = $match_data['match'];
    $colonia = $match_data['colonia'];

    if ($match) {
        $row_class = "match-" . $match['match_type'];
        $badge_class = "badge-" . $match['match_type'];
        $data = $match['data'];

        echo "<tr class='{$row_class}'>";
        echo "<td><strong>" . esc($prop['nombre_propiedad']) . "</strong></td>";
        echo "<td><small>" . esc($prop['direccion']) . "</small></td>";
        echo "<td><code>" . esc($colonia) . "</code></td>";
        echo "<td>→</td>";
        echo "<td><strong>{$data['codigo_postal']}</strong></td>";
        echo "<td>{$data['colonia']}</td>";
        echo "<td><small>{$data['municipio_descripcion']}</small></td>";
        echo "<td><span class='badge {$badge_class}'>" . ucfirst($match['match_type']) . "</span></td>";
        echo "<td><strong>{$match['confidence']}%</strong></td>";
        echo "</tr>";
    } else {
        echo "<tr class='match-none'>";
        echo "<td><strong>" . esc($prop['nombre_propiedad']) . "</strong></td>";
        echo "<td><small>" . esc($prop['direccion']) . "</small></td>";
        echo "<td><code>" . esc($colonia) . "</code></td>";
        echo "<td colspan='5' style='color: #999; font-style: italic;'>" . esc($match_data['reason'] ?? 'No match found') . "</td>";
        echo "<td><span class='badge badge-none'>0%</span></td>";
        echo "</tr>";
    }
}

echo "</tbody></table>";
echo "<p class='detail'>Showing first 100 of " . count($matches) . " total properties</p>";

?>

</div>
</body>
</html>
