<?php
/**
 * DateFormatDetector - The Chronologist
 * Intelligently detects date formats by analyzing data patterns
 *
 * Strategy:
 * 1. If any value has a component > 12, we know the format (day can't exceed 12)
 * 2. Use column name hints (check_in, fecha) for ambiguous cases
 * 3. Apply detected format consistently to all values in the column
 */

class DateFormatDetector
{

    /**
     * Detect the date format used in a column of values
     *
     * @param array $values All values from the column (for pattern analysis)
     * @param string $columnName Column name (for hints like 'check_in', 'fecha')
     * @return string Detected format (e.g., 'd/m/Y', 'm/d/Y', 'Y-m-d')
     */
    public static function detectFormat($values, $columnName = '')
    {
        // Priority 1: Analyze value patterns with weighted voting
        $formatScores = [
            'd/m/Y' => 0,   // 4-digit year
            'm/d/Y' => 0,   // 4-digit year
            'Y-m-d' => 0,   // 4-digit year
            'd-m-Y' => 0,   // 4-digit year
            'Y/m/d' => 0,   // 4-digit year
            'd/m/y' => 0,   // 2-digit year
            'm/d/y' => 0,   // 2-digit year
            'd-m-y' => 0,   // 2-digit year
        ];

        foreach ($values as $value) {
            if (empty($value) || !is_string($value)) {
                continue;
            }

            $detected = self::analyzeValue($value);
            if ($detected) {
                $formatScores[$detected]++;
            }
        }

        // Return format with most votes
        arsort($formatScores);
        $topFormat = key($formatScores);
        $topScore = $formatScores[$topFormat];
        $secondScore = $formatScores['m/d/Y'] + $formatScores['d/m/Y'] - $topScore;

        // Priority 2: If data gives a clear answer (not ambiguous), use it
        // Clear answer means either ISO format has votes, or one slash format beats the other
        if (in_array($topFormat, ['Y-m-d', 'Y/m/d'])) {
            return $topFormat;
        }

        // If we have clear votes for slash formats (one format has more than other)
        if ($topScore > $secondScore) {
            return $topFormat;
        }

        // Priority 3: Data is ambiguous - use column name hint as tiebreaker
        $hintedFormat = self::getFormatFromColumnName($columnName);
        if ($hintedFormat) {
            return $hintedFormat;
        }

        // Default to European (common in Latin America)
        return 'd/m/Y';
    }

    /**
     * Get format hint from column name
     *
     * @param string $columnName Column name for hints
     * @return string|null Suggested format or null
     */
    private static function getFormatFromColumnName($columnName)
    {
        if (empty($columnName)) {
            return null;
        }

        // Spanish/Latin American hints - suggest European format
        if (preg_match('/fecha|check.*in|check.*out|llegada|salida|alta|inicio|fin|nacimiento|creado|modificado/i', $columnName)) {
            return 'd/m/Y';
        }

        return null;
    }

    /**
     * Analyze a single value to detect its format
     *
     * @param string $value Date value to analyze
     * @return string|null Detected format or null
     */
    private static function analyzeValue($value)
    {
        // Try YYYY-MM-DD or YYYY/MM/DD (check this first to avoid confusion)
        if (preg_match('/^(\d{4})[\/\-\.](\d{1,2})[\/\-\.](\d{1,2})$/', $value, $m)) {
            return 'Y-m-d';
        }

        // Try DD/MM/YY or D/M/YY with various separators (2-digit year)
        // Check 2-digit year BEFORE 4-digit to avoid regex greediness issues
        if (preg_match('/^(\d{1,2})[\/\-\.](\d{1,2})[\/\-\.](\d{2})$/', $value, $m)) {
            $first = (int) $m[1];
            $second = (int) $m[2];
            $yearStr = $m[3];  // Keep as string to check exact length

            // Make sure it's exactly 2 digits (not part of a larger number)
            if (strlen($yearStr) === 2) {
                // Clear indication: one part > 12 means it's the day
                if ($first > 12 && $second <= 12) {
                    return 'd/m/y';  // Must be DD/MM/YY
                }
                if ($second > 12 && $first <= 12) {
                    return 'm/d/y';  // Must be MM/DD/YY
                }

                // Both > 12 is invalid, but we'll still try to parse
                if ($first > 12 && $second > 12) {
                    return 'd/m/y';  // Default to European for invalid
                }

                // Both <= 12: ambiguous BUT since we have exactly 2-digit year,
                // return the 2-digit format with default assumption (European)
                return 'd/m/y';  // Default to European 2-digit year
            }
        }

        // Try DD/MM/YYYY or D/M/YYYY with various separators (4-digit year)
        if (preg_match('/^(\d{1,2})[\/\-\.](\d{1,2})[\/\-\.](\d{4})$/', $value, $m)) {
            $first = (int) $m[1];
            $second = (int) $m[2];

            // Clear indication: one part > 12 means it's the day
            if ($first > 12 && $second <= 12) {
                return 'd/m/Y';  // Must be DD/MM
            }
            if ($second > 12 && $first <= 12) {
                return 'm/d/Y';  // Must be MM/DD
            }

            // Both > 12 is invalid, but we'll still try to parse
            if ($first > 12 && $second > 12) {
                return 'd/m/Y';  // Default to European for invalid
            }

            // Both <= 12: ambiguous - return null to let voting decide
            return null;
        }

        return null;
    }

    /**
     * Parse a date using the detected format
     *
     * @param string $value Date value
     * @param string $format Detected format
     * @param string $targetFormat 'DATE' or 'DATETIME'
     * @return string|null Parsed date in YYYY-MM-DD format or null
     */
    public static function parseDate($value, $format, $targetFormat = 'DATE')
    {
        if (empty($value) || empty($format)) {
            return null;
        }

        $date = DateTime::createFromFormat($format, $value);
        if ($date !== false) {
            // Handle 2-digit year pivot: 00-49 => 2000-2049, 50-99 => 1950-1999
            // This prevents dates like "25" being interpreted as year 0025 instead of 2025
            // PHP's DateTime with 'y' format auto-converts to 4-digit year, so we need to reverse it
            if (in_array($format, ['d/m/y', 'm/d/y', 'd-m-y', 'm-d-y', 'y-m-d', 'y/m/d'])) {
                $year = (int) $date->format('Y');

                // Extract the original 2-digit year from the input value
                // Pattern: extract last 2 digits of the date string
                if (preg_match('/(\d{2})$/', $value, $matches)) {
                    $twoDigitYear = (int) $matches[1];

                    // Apply our pivot: 00-49 => 2000-2049, 50-99 => 1950-1999
                    $adjustedYear = ($twoDigitYear <= 49) ? (2000 + $twoDigitYear) : (1900 + $twoDigitYear);
                    $date->setDate($adjustedYear, (int) $date->format('m'), (int) $date->format('d'));
                }
            }

            return $targetFormat === 'DATETIME'
                ? $date->format('Y-m-d H:i:s')
                : $date->format('Y-m-d');
        }

        return null;
    }

    /**
     * Parse an entire column of dates using detected format
     *
     * @param array $values All values in the column
     * @param string $columnName Column name for hints
     * @param string $targetFormat 'DATE' or 'DATETIME'
     * @return array ['parsed' => array of parsed dates, 'format' => detected format]
     */
    public static function parseColumn($values, $columnName = '', $targetFormat = 'DATE')
    {
        $format = self::detectFormat($values, $columnName);
        $parsed = [];

        foreach ($values as $value) {
            $parsed[] = self::parseDate($value, $format, $targetFormat);
        }

        return [
            'parsed' => $parsed,
            'format' => $format,
        ];
    }

    /**
     * Detect if a value looks like a date (any format)
     *
     * @param mixed $value Value to check
     * @return bool True if value appears to be a date
     */
    public static function looksLikeDate($value)
    {
        if (empty($value) || !is_string($value)) {
            return false;
        }

        $value = trim($value);

        // Numeric (Excel serial date)
        if (is_numeric($value)) {
            return true;
        }

        // Common date patterns
        $datePatterns = [
            '/^\d{1,2}[\/\-\.]\d{1,2}[\/\-\.]\d{4}$/',   // DD/MM/YYYY or MM/DD/YYYY
            '/^\d{4}[\/\-\.]\d{1,2}[\/\-\.]\d{1,2}$/',   // YYYY-MM-DD
            '/^\d{1,2}[\/\-\.]\d{1,2}[\/\-\.]\d{2}$/',   // Short year
        ];

        foreach ($datePatterns as $pattern) {
            if (preg_match($pattern, $value)) {
                return true;
            }
        }

        return false;
    }
}
