<?php
/**
 * PMS Matcher Library - Core Matching Functions
 * 
 * Extracted from link_pms_propiedades.php for CLI and testing use
 * Contains all matching algorithms for Cloudbeds and Hostify
 * 
 * Created: 2026-01-08
 */

// ============================================================================
// CORE NORMALIZATION FUNCTIONS
// ============================================================================

function normalize_text($text) {
    if (empty($text)) return '';
    
    $accents = [
        'Á'=>'A', 'É'=>'E', 'Í'=>'I', 'Ó'=>'O', 'Ú'=>'U',
        'á'=>'a', 'é'=>'e', 'í'=>'i', 'ó'=>'o', 'ú'=>'u',
        'Ñ'=>'N', 'ñ'=>'n', 'Ü'=>'U', 'ü'=>'u'
    ];
    $text = strtr($text, $accents);
    $text = mb_strtolower($text, 'UTF-8');
    $text = trim(preg_replace('/\s+/', ' ', $text));
    
    return $text;
}

function extract_street_name($text) {
    $normalized = normalize_text($text);
    
    // Expand known abbreviations
    $abbr_map = [
        'slp37' => 'san luis potosi 37',
        'slp 37' => 'san luis potosi 37',
        'vs146' => 'vicente suarez 146',
        'vs 146' => 'vicente suarez 146',
        'ver 2' => 'veracruz 26',
        'ver2' => 'veracruz 26',
    ];
    
    foreach ($abbr_map as $abbr => $full) {
        if (strpos($normalized, $abbr . '-') !== false) {
            $normalized = str_replace($abbr . '-', $full . ' | ', $normalized);
            break;
        }
        if (strpos($normalized, $abbr) !== false) {
            $normalized = str_replace($abbr, $full, $normalized);
            break;
        }
    }
    
    if (preg_match('/\d/', $normalized, $matches, PREG_OFFSET_CAPTURE)) {
        $pos = $matches[0][1];
        return trim(substr($normalized, 0, $pos));
    }
    
    return $normalized;
}

function extract_unit_number($text) {
    $normalized = normalize_text($text);
    
    // Pattern 1: Pure numbers (103, 401, 302)
    if (preg_match('/\b(\d{2,4})\b/', $normalized, $matches)) {
        return $matches[1];
    }
    
    // Pattern 2: Penthouse (PH1, PH2)
    if (preg_match('/ph\s*(\w+)/i', $normalized, $matches)) {
        return 'ph' . normalize_text($matches[1]);
    }
    
    // Pattern 3: Suite (SU4, Suite 4)
    if (preg_match('/su(?:ite)?\s*(\w+)/i', $normalized, $matches)) {
        return 'su' . normalize_text($matches[1]);
    }
    
    // Pattern 4: Single letter suffix (A, B, C)
    if (preg_match('/\b([A-Z])\b/', $text, $matches)) {
        return strtolower($matches[1]);
    }
    
    return '';
}

// ============================================================================
// CLOUDBEDS MATCHING FUNCTIONS
// ============================================================================

function match_cloudbeds($propiedad_nombre, $propiedad_direccion, $cb_property, $cb_room_number) {
    $prop_street = extract_street_name($propiedad_nombre);
    if (empty($prop_street) || strlen($prop_street) < 3) {
        $prop_street = extract_street_name($propiedad_direccion);
    }
    
    $cb_street = extract_street_name($cb_property);
    
    // PART 1: Building match (60% weight)
    $building_score = 0;
    $norm_cb_property = normalize_text($cb_property);
    $norm_prop_nombre = normalize_text($propiedad_nombre);
    $norm_prop_dir = normalize_text($propiedad_direccion);
    
    if (strlen($norm_cb_property) >= 5) {
        if (strpos($norm_prop_nombre, $norm_cb_property) !== false ||
            strpos($norm_prop_dir, $norm_cb_property) !== false) {
            $building_score = 100;
        } elseif (strlen($prop_street) >= 5 && strlen($cb_street) >= 5) {
            similar_text($prop_street, $cb_street, $percent);
            $building_score = $percent;
        }
    }
    
    // PART 2: Unit match (40% weight)
    $unit_score = 0;
    $prop_unit = extract_unit_number($propiedad_nombre);
    $cb_unit = extract_unit_number($cb_room_number);
    
    if (!empty($prop_unit) && !empty($cb_unit)) {
        if ($prop_unit === $cb_unit) {
            $unit_score = 100;
        } else {
            similar_text($prop_unit, $cb_unit, $percent);
            $unit_score = $percent;
        }
    } elseif (empty($cb_unit) || $cb_room_number === 'N/A') {
        $unit_score = $building_score;
    }
    
    $confidence = round(($building_score * 0.6) + ($unit_score * 0.4));
    
    $tier = 0;
    if ($confidence >= 95) $tier = 1;
    elseif ($confidence >= 80) $tier = 2;
    elseif ($confidence >= 65) $tier = 3;
    elseif ($confidence >= 40) $tier = 4;
    
    if ($tier > 0) {
        return [
            'match' => true,
            'confidence' => $confidence,
            'tier' => $tier,
            'building_score' => round($building_score),
            'unit_score' => round($unit_score),
            'prop_unit' => $prop_unit,
            'cb_unit' => $cb_unit,
            'pattern' => "cloudbeds_hybrid"
        ];
    }
    
    return ['match' => false];
}

// ============================================================================
// HOSTIFY MATCHING FUNCTIONS
// ============================================================================

function match_hostify_tier1($propiedad_name, $anuncio) {
    $norm_prop = normalize_text($propiedad_name);
    $norm_anuncio = normalize_text($anuncio);
    
    if ($norm_prop === $norm_anuncio) {
        return ['match' => true, 'confidence' => 100, 'tier' => 1, 'pattern' => 'tier1_exact'];
    }
    return ['match' => false];
}

function match_hostify_tier2($propiedad_name, $propiedad_direccion, $anuncio) {
    $norm_prop = normalize_text($propiedad_name);
    $norm_dir = normalize_text($propiedad_direccion);
    $norm_anuncio = normalize_text($anuncio);
    
    $anuncio_segments = preg_split('/[\|,]/', $norm_anuncio);
    $anuncio_segments = array_map('trim', $anuncio_segments);
    
    foreach ($anuncio_segments as $segment) {
        if (strlen($segment) >= 5 && strlen($norm_prop) >= 5) {
            if (strpos($segment, $norm_prop) !== false || strpos($norm_prop, $segment) !== false) {
                return ['match' => true, 'confidence' => 90, 'tier' => 2, 'pattern' => 'tier2_contains'];
            }
            if (strlen($norm_dir) >= 5 && (strpos($segment, $norm_dir) !== false || strpos($norm_dir, $segment) !== false)) {
                return ['match' => true, 'confidence' => 90, 'tier' => 2, 'pattern' => 'tier2_contains_dir'];
            }
        }
    }
    return ['match' => false];
}

function match_hostify_tier3($propiedad_name, $propiedad_direccion, $anuncio, $threshold = 85) {
    $norm_prop = normalize_text($propiedad_name);
    $norm_dir = normalize_text($propiedad_direccion);
    $norm_anuncio = normalize_text($anuncio);
    
    similar_text($norm_prop, $norm_anuncio, $percent);
    if ($percent >= $threshold) {
        return ['match' => true, 'confidence' => 70, 'tier' => 3, 'pattern' => 'tier3_similarity'];
    }
    
    similar_text($norm_dir, $norm_anuncio, $percent);
    if ($percent >= $threshold) {
        return ['match' => true, 'confidence' => 70, 'tier' => 3, 'pattern' => 'tier3_similarity_dir'];
    }
    
    $anuncio_segments = preg_split('/[\|,]/', $norm_anuncio);
    foreach ($anuncio_segments as $segment) {
        $segment = trim($segment);
        if (strlen($segment) >= 5) {
            similar_text($norm_prop, $segment, $percent);
            if ($percent >= $threshold) {
                return ['match' => true, 'confidence' => 70, 'tier' => 3, 'pattern' => 'tier3_similarity_segment'];
            }
        }
    }
    return ['match' => false];
}

function match_hostify_tier4($propiedad_name, $propiedad_direccion, $anuncio) {
    $prop_street = extract_street_name($propiedad_name);
    if (empty($prop_street) || strlen($prop_street) < 3) {
        $prop_street = extract_street_name($propiedad_direccion);
    }
    $anuncio_street = extract_street_name($anuncio);
    
    if (strlen($prop_street) < 5 || strlen($anuncio_street) < 5) {
        return ['match' => false];
    }
    
    if (strpos($anuncio_street, $prop_street) !== false || strpos($prop_street, $anuncio_street) !== false) {
        $prop_unit = extract_unit_number($propiedad_name);
        $anuncio_unit = extract_unit_number($anuncio);
        
        $confidence = 50;
        if (!empty($prop_unit) && !empty($anuncio_unit) && $prop_unit === $anuncio_unit) {
            $confidence = 65;
        }
        
        return ['match' => true, 'confidence' => $confidence, 'tier' => 4, 'pattern' => 'tier4_street'];
    }
    return ['match' => false];
}

// ============================================================================
// COMBO DETECTION (TIER 0 - HIGHEST PRIORITY)
// ============================================================================

function expand_combo_anuncio($text) {
    $norm = normalize_text($text);
    
    // Pattern 0: "ABBR##-###" (SLP37-301, VS146-102)
    if (preg_match('/^([a-z]{2,4})(\d{2,3})\s*[\-]\s*(\d{2,4})/i', $norm, $matches)) {
        $abbr_map = ['slp' => 'san luis potosi', 'vs' => 'vicente suarez', 'ver' => 'veracruz'];
        $street = $abbr_map[strtolower($matches[1])] ?? $matches[1];
        return ['units' => [$matches[3]], 'type' => 'abbr_code', 'street' => $street . ' ' . $matches[2]];
    }
    
    // Pattern 0.5: "ABBR #" or "ABBR PH #" (Ver 2, Ver PH 7)
    if (preg_match('/^([a-z]{2,4})\s+(ph\s+)?(\d+)/i', $norm, $matches)) {
        $abbr_map = ['slp' => 'san luis potosi', 'vs' => 'vicente suarez', 'ver' => 'veracruz'];
        $street = $abbr_map[strtolower($matches[1])] ?? $matches[1];
        $guessed = (strtolower($matches[1]) === 'ver') ? 'veracruz 26' : $street;
        return ['units' => [$matches[3]], 'type' => 'abbr_unit', 'street' => $guessed];
    }
    
    // Pattern 1: "Doble X y Y"
    if (preg_match('/doble\s+(\d+)\s+y\s+(\d+)/i', $norm, $matches)) {
        return ['units' => [$matches[1], $matches[2]], 'type' => 'doble', 'street' => extract_street_name($text)];
    }
    
    // Pattern 5: "Suite 1, Suite 4, Suite 10"
    if (preg_match_all('/suite\s+(\d+)/i', $norm, $matches)) {
        if (count($matches[1]) > 1) {
            return ['units' => $matches[1], 'type' => 'multi_suite', 'street' => extract_street_name($text)];
        }
    }
    
    // Pattern 6: "204, 103, 401" (comma-separated)
    if (strpos($text, ',') !== false && preg_match_all('/\b(\d{2,4})\b/', $norm, $matches)) {
        if (count($matches[1]) >= 2) {
            return ['units' => $matches[1], 'type' => 'comma_list', 'street' => extract_street_name($text)];
        }
    }
    
    return ['units' => []];
}

// ============================================================================
// MAIN HOSTIFY MATCHER (tries all tiers)
// ============================================================================

function match_hostify($propiedad_name, $propiedad_direccion, $anuncio) {
    $best = ['match' => false, 'confidence' => 0];
    
    // Try Tier 1: Exact
    $result = match_hostify_tier1($propiedad_name, $anuncio);
    if ($result['match'] && $result['confidence'] > $best['confidence']) $best = $result;
    
    // Try Tier 2: Contains
    $result = match_hostify_tier2($propiedad_name, $propiedad_direccion, $anuncio);
    if ($result['match'] && $result['confidence'] > $best['confidence']) $best = $result;
    
    // Try Tier 3: Similarity
    $result = match_hostify_tier3($propiedad_name, $propiedad_direccion, $anuncio);
    if ($result['match'] && $result['confidence'] > $best['confidence']) $best = $result;
    
    // Try Tier 4: Street
    $result = match_hostify_tier4($propiedad_name, $propiedad_direccion, $anuncio);
    if ($result['match'] && $result['confidence'] > $best['confidence']) $best = $result;
    
    return $best;
}
