<?php
/**
 * Fill Postal Code Data in propiedad Table via Fuzzy Matching
 *
 * This script extracts colonia names from propiedad.direccion and matches them
 * against the codigo_postal government catalog to fill postal code fields.
 *
 * Usage:
 *   Preview mode: php fill_postal_codes_propiedad.php --preview
 *   Execute mode: php fill_postal_codes_propiedad.php --execute
 *
 * @author Claude Code
 * @date 2025-01-01
 */

// Configuration
$preview_mode = in_array('--preview', $argv) || !in_array('--execute', $argv);
$verbose = in_array('--verbose', $argv) || in_array('-v', $argv);

// Database connection - minimal bootstrap
$gIAsql = [
    'host' => 'localhost',
    'port' => '3306',
    'user' => 'root',
    'pwd' => 'M@chiavell1',
    'dbname' => 'quantix',
    'link' => null
];

// Connect to database via socket
$gIAsql['link'] = mysqli_connect(
    $gIAsql['host'],
    $gIAsql['user'],
    $gIAsql['pwd'],
    $gIAsql['dbname'],
    null,
    '/lamp/mysql/mysql.sock'
);

if (!$gIAsql['link']) {
    die("Database connection failed: " . mysqli_connect_error() . "\n");
}

mysqli_set_charset($gIAsql['link'], 'utf8mb4');

// Helper functions
function ia_query($sql) {
    global $gIAsql;
    return mysqli_query($gIAsql['link'], $sql);
}

function ia_singleton($sql, $default = null) {
    $result = ia_query($sql);
    if ($result && $row = mysqli_fetch_row($result)) {
        return $row[0];
    }
    return $default;
}

function ia_sqlArrayIndx($sql) {
    $result = ia_query($sql);
    $data = [];
    if ($result) {
        while ($row = mysqli_fetch_assoc($result)) {
            $data[] = $row;
        }
    }
    return $data;
}

function strit($value) {
    global $gIAsql;
    return "'" . mysqli_real_escape_string($gIAsql['link'], $value) . "'";
}

function ia_update($table, $values, $where) {
    $sets = [];
    foreach ($values as $key => $val) {
        if ($key !== array_key_first($where)) { // Don't update PK
            $sets[] = "$key = " . strit($val);
        }
    }

    $where_clause = [];
    foreach ($where as $key => $val) {
        $where_clause[] = "$key = " . strit($val);
    }

    return "UPDATE $table SET " . implode(', ', $sets) . " WHERE " . implode(' AND ', $where_clause);
}

// Statistics
$stats = [
    'total' => 0,
    'processed' => 0,
    'exact_match' => 0,
    'fuzzy_match' => 0,
    'manual_override' => 0,
    'ambiguous' => 0,
    'not_found' => 0,
    'skipped_no_direccion' => 0,
    'updated' => 0,
    'errors' => 0
];

$matches = [];
$errors = [];

/**
 * Manual override table for edge cases and ambiguous colonias
 */
function getManualOverrides() {
    return [
        // Exact name => [codigo_postal, colonia_oficial, municipio, municipio_desc, estado, estado_desc]
        'CUAUHTEMOC' => ['06000', 'CUAUHTEMOC', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'], // Default for ambiguous
        'CUAUTÉMOC' => ['06000', 'CUAUHTEMOC', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'CUAUTEMOC' => ['06000', 'CUAUHTEMOC', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'POLANCO' => ['11510', 'POLANCO I SECCION', '016', 'MIGUEL HIDALGO', 'DIF', 'CIUDAD DE MEXICO'], // Default to section I
        'HIPODROMO CONDESA' => ['06170', 'HIPODROMO CONDESA', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'HIPÓDROMO CONDESA' => ['06170', 'HIPODROMO CONDESA', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'CDMX' => ['11800', 'ESCANDON I SECCION', '016', 'MIGUEL HIDALGO', 'DIF', 'CIUDAD DE MEXICO'], // Generic CDMX fallback (for "Astronomos 62")
        'STA MARIA LA RIBERA' => ['06400', 'SANTA MARIA LA RIBERA', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'SANTA MARIA LA RIBERA' => ['06400', 'SANTA MARIA LA RIBERA', '015', 'CUAUHTEMOC', 'DIF', 'CIUDAD DE MEXICO'],
        'CHAPULTEPEC MORALES' => ['11520', 'GRANADA', '016', 'MIGUEL HIDALGO', 'DIF', 'CIUDAD DE MEXICO'], // Near Ejercito Nacional - Granada area
    ];
}

/**
 * Extract colonia from direccion field
 * Expected format: "Street Address, Colonia"
 */
function extractColoniaFromDireccion($direccion) {
    if (empty($direccion)) {
        return null;
    }

    // Check if there's a comma
    if (strpos($direccion, ',') === false) {
        return null;
    }

    // Get text after last comma
    $parts = explode(',', $direccion);
    $colonia = trim(end($parts));

    return empty($colonia) ? null : $colonia;
}

/**
 * Normalize colonia name for matching
 * Removes accents, converts to uppercase, trims whitespace
 */
function normalizeColoniaName($colonia) {
    if (empty($colonia)) {
        return '';
    }

    // Convert to uppercase
    $normalized = mb_strtoupper(trim($colonia), 'UTF-8');

    // Remove common accents
    $normalized = str_replace(
        ['Á', 'É', 'Í', 'Ó', 'Ú', 'Ñ', 'Ü'],
        ['A', 'E', 'I', 'O', 'U', 'N', 'U'],
        $normalized
    );

    return $normalized;
}

/**
 * Tier 1: Find postal code with exact match
 */
function findPostalCodeExact($colonia) {
    global $verbose;

    $normalized = normalizeColoniaName($colonia);

    // Try exact match in CDMX municipalities
    $sql = "SELECT
                cp.codigo_postal,
                cp.colonia,
                cp.municipio,
                cp.municipio_descripcion,
                cp.estado,
                cp.estado_descripcion
            FROM codigo_postal cp
            WHERE UPPER(TRIM(cp.colonia)) = " . strit($normalized) . "
              AND cp.municipio_descripcion IN ('CUAUHTEMOC', 'MIGUEL HIDALGO', 'BENITO JUAREZ')
            ORDER BY cp.codigo_postal
            LIMIT 1";

    $result = ia_singleton($sql, null);

    if ($result) {
        $data = ia_sqlArrayIndx("SELECT
                cp.codigo_postal,
                cp.colonia,
                cp.municipio,
                cp.municipio_descripcion,
                cp.estado,
                cp.estado_descripcion
            FROM codigo_postal cp
            WHERE UPPER(TRIM(cp.colonia)) = " . strit($normalized) . "
              AND cp.municipio_descripcion IN ('CUAUHTEMOC', 'MIGUEL HIDALGO', 'BENITO JUAREZ')
            ORDER BY cp.codigo_postal
            LIMIT 1");

        if (!empty($data)) {
            if ($verbose) {
                echo "  ✓ Exact match found: {$data[0]['colonia']} → {$data[0]['codigo_postal']}\n";
            }
            return [
                'match_type' => 'exact',
                'confidence' => 100,
                'data' => $data[0]
            ];
        }
    }

    return null;
}

/**
 * Tier 2: Find postal code with fuzzy (LIKE) match
 */
function findPostalCodeFuzzy($colonia) {
    global $verbose;

    $normalized = normalizeColoniaName($colonia);

    // Try LIKE match - prefer shortest colonia name (most specific)
    $sql = "SELECT
                cp.codigo_postal,
                cp.colonia,
                cp.municipio,
                cp.municipio_descripcion,
                cp.estado,
                cp.estado_descripcion,
                LENGTH(cp.colonia) as colonia_length
            FROM codigo_postal cp
            WHERE UPPER(cp.colonia) LIKE CONCAT('%', " . strit($normalized) . ", '%')
              AND cp.municipio_descripcion IN ('CUAUHTEMOC', 'MIGUEL HIDALGO', 'BENITO JUAREZ')
            ORDER BY LENGTH(cp.colonia) ASC, cp.codigo_postal ASC
            LIMIT 5";

    $results = ia_sqlArrayIndx($sql);

    if (empty($results)) {
        return null;
    }

    // If multiple results, check if they're all the same postal code
    $unique_cps = array_unique(array_column($results, 'codigo_postal'));

    if (count($unique_cps) > 1) {
        if ($verbose) {
            echo "  ! Ambiguous match: " . count($results) . " different postal codes found\n";
            foreach ($results as $r) {
                echo "    - {$r['colonia']} → {$r['codigo_postal']}\n";
            }
        }
        return [
            'match_type' => 'ambiguous',
            'confidence' => 50,
            'data' => $results[0], // Return first one
            'alternatives' => $results
        ];
    }

    // Single match or multiple with same postal code
    if ($verbose) {
        echo "  ✓ Fuzzy match found: {$results[0]['colonia']} → {$results[0]['codigo_postal']}\n";
    }

    return [
        'match_type' => 'fuzzy',
        'confidence' => 90,
        'data' => $results[0]
    ];
}

/**
 * Tier 3: Manual override table lookup
 */
function findManualOverride($colonia) {
    global $verbose;

    $overrides = getManualOverrides();
    $normalized = normalizeColoniaName($colonia);

    if (isset($overrides[$normalized])) {
        $data = $overrides[$normalized];

        if ($verbose) {
            echo "  ✓ Manual override found: {$colonia} → {$data[0]}\n";
        }

        return [
            'match_type' => 'manual',
            'confidence' => 100,
            'data' => [
                'codigo_postal' => $data[0],
                'colonia' => $data[1],
                'municipio' => $data[2],
                'municipio_descripcion' => $data[3],
                'estado' => $data[4],
                'estado_descripcion' => $data[5]
            ]
        ];
    }

    return null;
}

/**
 * Find postal code data for a colonia using 3-tier matching
 */
function findPostalCodeData($colonia) {
    if (empty($colonia)) {
        return null;
    }

    // Tier 3: Check manual overrides first (highest priority for edge cases)
    $manual = findManualOverride($colonia);
    if ($manual) {
        return $manual;
    }

    // Tier 1: Try exact match
    $exact = findPostalCodeExact($colonia);
    if ($exact) {
        return $exact;
    }

    // Tier 2: Try fuzzy match
    $fuzzy = findPostalCodeFuzzy($colonia);
    if ($fuzzy) {
        return $fuzzy;
    }

    return null;
}

/**
 * Update propiedad record with postal code data
 */
function updatePropiedadPostalData($propiedad_id, $cp_data, $preview = true) {
    global $stats, $verbose;

    if ($preview) {
        if ($verbose) {
            echo "  [PREVIEW] Would update propiedad_id: {$propiedad_id}\n";
        }
        return true;
    }

    // Prepare update values
    $values = [
        'propiedad_id' => $propiedad_id,
        'codigo_postal' => $cp_data['codigo_postal'],
        'colonia' => $cp_data['colonia'],
        'estado' => $cp_data['estado'],
        'estado_descripcion' => $cp_data['estado_descripcion'],
        'municipio' => $cp_data['municipio'],
        'municipio_descripcion' => $cp_data['municipio_descripcion']
    ];

    // Execute update
    $sql = ia_update('propiedad', $values, ['propiedad_id' => $propiedad_id]);
    $result = ia_query($sql);

    if ($result) {
        $stats['updated']++;
        return true;
    } else {
        $stats['errors']++;
        return false;
    }
}

/**
 * Main processing function
 */
function processProperties($preview = true) {
    global $stats, $matches, $errors, $verbose;

    echo "\n";
    echo "================================================================================\n";
    echo "  Postal Code Fill Script for propiedad Table\n";
    echo "  Mode: " . ($preview ? "PREVIEW (no changes will be made)" : "EXECUTE (will update database)") . "\n";
    echo "================================================================================\n\n";

    // Fetch all properties with direccion
    $sql = "SELECT
                propiedad_id,
                nombre_propiedad,
                direccion
            FROM propiedad
            ORDER BY nombre_propiedad";

    $propiedades = ia_sqlArrayIndx($sql);
    $stats['total'] = count($propiedades);

    echo "Total properties found: {$stats['total']}\n\n";

    // Process each property
    foreach ($propiedades as $prop) {
        $stats['processed']++;

        echo "[{$stats['processed']}/{$stats['total']}] Processing: {$prop['nombre_propiedad']}\n";

        // Check if direccion exists
        if (empty($prop['direccion'])) {
            echo "  ⚠ Skipped: No direccion data\n\n";
            $stats['skipped_no_direccion']++;
            continue;
        }

        // Extract colonia
        $colonia = extractColoniaFromDireccion($prop['direccion']);
        if (empty($colonia)) {
            echo "  ⚠ Skipped: Could not extract colonia from: {$prop['direccion']}\n\n";
            $stats['skipped_no_direccion']++;
            continue;
        }

        echo "  Direccion: {$prop['direccion']}\n";
        echo "  Extracted colonia: {$colonia}\n";

        // Find postal code data
        $match = findPostalCodeData($colonia);

        if ($match) {
            // Record match
            $matches[] = [
                'propiedad_id' => $prop['propiedad_id'],
                'nombre_propiedad' => $prop['nombre_propiedad'],
                'direccion' => $prop['direccion'],
                'colonia_extraida' => $colonia,
                'match_type' => $match['match_type'],
                'confidence' => $match['confidence'],
                'cp_data' => $match['data']
            ];

            // Update statistics
            switch ($match['match_type']) {
                case 'exact':
                    $stats['exact_match']++;
                    break;
                case 'fuzzy':
                    $stats['fuzzy_match']++;
                    break;
                case 'manual':
                    $stats['manual_override']++;
                    break;
                case 'ambiguous':
                    $stats['ambiguous']++;
                    break;
            }

            // Display match info
            echo "  ✓ Match found ({$match['match_type']}, {$match['confidence']}% confidence)\n";
            echo "    → CP: {$match['data']['codigo_postal']}\n";
            echo "    → Colonia: {$match['data']['colonia']}\n";
            echo "    → Municipio: {$match['data']['municipio_descripcion']}\n";

            if (isset($match['alternatives']) && count($match['alternatives']) > 1) {
                echo "    ⚠ WARNING: Multiple matches found, using first one\n";
            }

            // Update database
            $success = updatePropiedadPostalData($prop['propiedad_id'], $match['data'], $preview);

            if (!$success && !$preview) {
                echo "    ✗ ERROR: Failed to update database\n";
                $errors[] = [
                    'propiedad_id' => $prop['propiedad_id'],
                    'error' => 'Database update failed'
                ];
            }
        } else {
            echo "  ✗ No match found\n";
            $stats['not_found']++;

            $errors[] = [
                'propiedad_id' => $prop['propiedad_id'],
                'nombre_propiedad' => $prop['nombre_propiedad'],
                'direccion' => $prop['direccion'],
                'colonia_extraida' => $colonia,
                'error' => 'No matching postal code found'
            ];
        }

        echo "\n";
    }
}

/**
 * Display final statistics and reports
 */
function displayFinalReport($preview = true) {
    global $stats, $matches, $errors;

    echo "\n";
    echo "================================================================================\n";
    echo "  FINAL REPORT\n";
    echo "================================================================================\n\n";

    echo "STATISTICS:\n";
    echo "  Total properties: {$stats['total']}\n";
    echo "  Processed: {$stats['processed']}\n";
    echo "  Skipped (no direccion): {$stats['skipped_no_direccion']}\n";
    echo "\n";
    echo "MATCHING RESULTS:\n";
    echo "  ✓ Exact matches: {$stats['exact_match']}\n";
    echo "  ✓ Fuzzy matches: {$stats['fuzzy_match']}\n";
    echo "  ✓ Manual overrides: {$stats['manual_override']}\n";
    echo "  ! Ambiguous matches: {$stats['ambiguous']}\n";
    echo "  ✗ Not found: {$stats['not_found']}\n";
    echo "\n";

    $success_rate = $stats['processed'] > 0
        ? round(($stats['exact_match'] + $stats['fuzzy_match'] + $stats['manual_override'] + $stats['ambiguous']) / $stats['processed'] * 100, 1)
        : 0;

    echo "SUCCESS RATE: {$success_rate}% ({$stats['exact_match']} + {$stats['fuzzy_match']} + {$stats['manual_override']} + {$stats['ambiguous']} of {$stats['processed']})\n";

    if (!$preview) {
        echo "\nDATABASE UPDATES:\n";
        echo "  Updated records: {$stats['updated']}\n";
        echo "  Errors: {$stats['errors']}\n";
    }

    // Show unmatched properties
    if (!empty($errors)) {
        echo "\n";
        echo "UNMATCHED PROPERTIES (require manual intervention):\n";
        echo "---------------------------------------------------\n";
        foreach ($errors as $err) {
            echo "  • {$err['nombre_propiedad']}\n";
            echo "    Direccion: {$err['direccion']}\n";
            echo "    Colonia extracted: {$err['colonia_extraida']}\n";
            echo "    Error: {$err['error']}\n\n";
        }
    }

    // Save detailed report to JSON
    $report_file = __DIR__ . '/postal_codes_fill_report_' . date('Y-m-d_His') . '.json';
    $report_data = [
        'timestamp' => date('Y-m-d H:i:s'),
        'mode' => $preview ? 'preview' : 'execute',
        'statistics' => $stats,
        'matches' => $matches,
        'errors' => $errors
    ];

    file_put_contents($report_file, json_encode($report_data, JSON_PRETTY_PRINT | JSON_UNESCAPED_UNICODE));
    echo "\nDetailed report saved to: {$report_file}\n";

    // Save CSV of unmatched for manual review
    if (!empty($errors)) {
        $csv_file = __DIR__ . '/unmatched_properties_' . date('Y-m-d_His') . '.csv';
        $fp = fopen($csv_file, 'w');
        fputcsv($fp, ['Propiedad ID', 'Nombre', 'Direccion', 'Colonia Extraida', 'Error']);
        foreach ($errors as $err) {
            fputcsv($fp, [
                $err['propiedad_id'],
                $err['nombre_propiedad'],
                $err['direccion'],
                $err['colonia_extraida'],
                $err['error']
            ]);
        }
        fclose($fp);
        echo "Unmatched properties CSV saved to: {$csv_file}\n";
    }

    echo "\n";
}

// ============================================================================
// MAIN EXECUTION
// ============================================================================

try {
    processProperties($preview_mode);
    displayFinalReport($preview_mode);

    if ($preview_mode) {
        echo "================================================================================\n";
        echo "  This was a PREVIEW. No changes were made to the database.\n";
        echo "  To execute updates, run: php " . basename(__FILE__) . " --execute\n";
        echo "================================================================================\n\n";
    } else {
        echo "================================================================================\n";
        echo "  Updates completed successfully!\n";
        echo "  Review the report above and check unmatched properties.\n";
        echo "================================================================================\n\n";
    }

    exit(0);

} catch (Exception $e) {
    echo "\n\n";
    echo "================================================================================\n";
    echo "  FATAL ERROR\n";
    echo "================================================================================\n";
    echo "Error: " . $e->getMessage() . "\n";
    echo "File: " . $e->getFile() . ":" . $e->getLine() . "\n";
    echo "Trace:\n" . $e->getTraceAsString() . "\n";
    exit(1);
}
