<?php
/**
 * DataValidator - Type-Based Value Validator
 *
 * Validates and sanitizes values based on MySQL column types
 * Strategy:
 *   1. Try to fix/convert invalid values
 *   2. Fall back to NULL if fix fails and column is nullable
 *   3. Fall back to type-appropriate default if column is NOT NULL
 */

class DataValidator {

    /**
     * Main validation entry point
     * Routes to appropriate type-specific validator
     *
     * @param mixed $value The value to validate
     * @param string $type MySQL column type (VARCHAR, INT, DECIMAL, DATE, etc.)
     * @param mixed $length Column length/precision (for VARCHAR, DECIMAL)
     * @param bool $nullable Whether column allows NULL
     * @param array|null $enumValues Allowed values for ENUM type
     * @return array ['value' => mixed, 'corrected' => bool, 'original' => mixed, 'reason' => string|null]
     */
    public static function validate($value, $type, $length = null, $nullable = true, $enumValues = null) {
        $originalValue = $value;
        $corrected = false;
        $reason = null;

        // Normalize type to uppercase
        $type = strtoupper(trim($type));

        // Route to appropriate validator based on type
        switch ($type) {
            case 'INT':
            case 'INTEGER':
            case 'BIGINT':
            case 'SMALLINT':
            case 'TINYINT':
                $result = self::validateInt($value, $nullable);
                break;

            case 'DECIMAL':
            case 'NUMERIC':
            case 'FLOAT':
            case 'DOUBLE':
                $result = self::validateDecimal($value, $length, $nullable);
                break;

            case 'VARCHAR':
            case 'CHAR':
                $result = self::validateVarchar($value, $length, $nullable);
                break;

            case 'TEXT':
            case 'TINYTEXT':
            case 'MEDIUMTEXT':
            case 'LONGTEXT':
                $result = self::validateText($value, $nullable);
                break;

            case 'DATE':
                $result = self::validateDate($value, $nullable);
                break;

            case 'DATETIME':
            case 'TIMESTAMP':
                $result = self::validateDatetime($value, $nullable);
                break;

            case 'BOOLEAN':
            case 'BOOL':
                $result = self::validateBoolean($value, $nullable);
                break;

            case 'ENUM':
                $result = self::validateEnum($value, $enumValues, $nullable);
                break;

            default:
                // Unknown type - pass through as-is
                $result = ['value' => $value, 'corrected' => false, 'reason' => null];
                break;
        }

        // Add original value to result for audit trail
        $result['original'] = $originalValue;

        return $result;
    }

    /**
     * Validate INTEGER types
     * Extracts integers from strings, formulas, etc.
     */
    public static function validateInt($value, $nullable = true) {
        // Handle NULL/empty
        if ($value === null || $value === '') {
            return [
                'value' => $nullable ? null : 0,
                'corrected' => false,
                'reason' => null
            ];
        }

        // Already a valid integer
        if (is_int($value)) {
            return ['value' => $value, 'corrected' => false, 'reason' => null];
        }

        // Try to extract integer from string
        $stringValue = trim((string)$value);

        // Detect Excel formulas (=A1+B1, =SUM(...), etc.)
        if (preg_match('/^=/', $stringValue)) {
            return [
                'value' => $nullable ? null : 0,
                'corrected' => true,
                'reason' => 'Excel formula detected, cannot evaluate'
            ];
        }

        // Extract numeric portion from string (handles "123", "123.45", "$123", "123 units", etc.)
        if (preg_match('/-?\d+/', $stringValue, $matches)) {
            $intValue = (int)$matches[0];
            $corrected = ($stringValue !== (string)$intValue); // Was it modified?
            return [
                'value' => $intValue,
                'corrected' => $corrected,
                'reason' => $corrected ? 'Extracted integer from string' : null
            ];
        }

        // Boolean-like values
        $lowerValue = strtolower($stringValue);
        if (in_array($lowerValue, ['true', 'yes', 'on', '1'])) {
            return ['value' => 1, 'corrected' => true, 'reason' => 'Converted boolean to integer'];
        }
        if (in_array($lowerValue, ['false', 'no', 'off', '0'])) {
            return ['value' => 0, 'corrected' => true, 'reason' => 'Converted boolean to integer'];
        }

        // Could not parse - use default
        return [
            'value' => $nullable ? null : 0,
            'corrected' => true,
            'reason' => 'Invalid integer format, using default'
        ];
    }

    /**
     * Detect number format (European vs US/International)
     *
     * European format: 1.234,56 (period = thousands, comma = decimal)
     * US format: 1,234.56 (comma = thousands, period = decimal)
     *
     * @param string $value The number string to analyze
     * @return string 'european' or 'us'
     */
    private static function detectNumberFormat($value) {
        // Remove spaces and currency symbols for analysis
        $cleaned = trim($value);
        $cleaned = str_replace([' ', '$', '€', '£', '¥', 'MXN', 'USD', 'EUR'], '', $cleaned);

        // Count commas and periods
        $commaCount = substr_count($cleaned, ',');
        $periodCount = substr_count($cleaned, '.');

        // Find last comma and last period positions
        $lastCommaPos = strrpos($cleaned, ',');
        $lastPeriodPos = strrpos($cleaned, '.');

        // Strategy 1: If ONLY comma exists and no period → likely European decimal
        // Example: "17,012" → European (17.012)
        if ($commaCount > 0 && $periodCount === 0) {
            return 'european';
        }

        // Strategy 2: If ONLY period exists and no comma → US decimal
        // Example: "17.012" → US (17.012)
        if ($periodCount > 0 && $commaCount === 0) {
            return 'us';
        }

        // Strategy 3: Both exist → check which comes last (last one is decimal separator)
        // Example: "1.234,56" → comma last → European
        // Example: "1,234.56" → period last → US
        if ($commaCount > 0 && $periodCount > 0) {
            if ($lastCommaPos > $lastPeriodPos) {
                return 'european'; // Comma is decimal separator
            } else {
                return 'us'; // Period is decimal separator
            }
        }

        // Default: US format
        return 'us';
    }

    /**
     * Normalize European number format to US format
     * Converts: "1.234,56" → "1234.56" or "17,012" → "17.012"
     *
     * @param string $value European formatted number
     * @return string US formatted number
     */
    private static function normalizeEuropeanNumber($value) {
        // Remove spaces and currency symbols
        $cleaned = trim($value);
        $cleaned = str_replace([' ', '$', '€', '£', '¥', 'MXN', 'USD', 'EUR'], '', $cleaned);

        // European format uses:
        // - Period (.) as thousand separator → remove it
        // - Comma (,) as decimal separator → convert to period

        $cleaned = str_replace('.', '', $cleaned);  // Remove thousand separators (periods)
        $cleaned = str_replace(',', '.', $cleaned); // Convert decimal separator (comma → period)

        return $cleaned;
    }

    /**
     * Validate DECIMAL/FLOAT types
     * Handles formulas, currency symbols, European/US number formats
     */
    public static function validateDecimal($value, $length = null, $nullable = true) {
        // Handle NULL/empty
        if ($value === null || $value === '') {
            return [
                'value' => $nullable ? null : '0.00',
                'corrected' => false,
                'reason' => null
            ];
        }

        // Already a valid number
        if (is_numeric($value)) {
            return ['value' => $value, 'corrected' => false, 'reason' => null];
        }

        // Try to extract decimal from string
        $stringValue = trim((string)$value);

        // Detect Excel formulas (=N60*P60, =A1+B1, =SUM(...), etc.)
        if (preg_match('/^=/', $stringValue)) {
            return [
                'value' => $nullable ? null : '0.00',
                'corrected' => true,
                'reason' => 'Excel formula detected, cannot evaluate'
            ];
        }

        // STEP 1: Detect number format (European vs US)
        $format = self::detectNumberFormat($stringValue);

        // STEP 2: Normalize to US format if European detected
        $cleanedValue = $stringValue;
        if ($format === 'european') {
            $cleanedValue = self::normalizeEuropeanNumber($stringValue);
        } else {
            // US format: Remove currency symbols and thousand separators (commas)
            $cleanedValue = str_replace(['$', '€', '£', '¥', 'MXN', 'USD', 'EUR'], '', $cleanedValue);
            $cleanedValue = str_replace([',', ' '], '', $cleanedValue); // Remove thousand separators
        }

        // Extract decimal number (handles negative, decimals, scientific notation)
        if (preg_match('/-?\d+\.?\d*/', $cleanedValue, $matches)) {
            $decimalValue = $matches[0];

            // Validate precision if length is specified (e.g., "10,2" means max 10 digits, 2 decimals)
            if ($length !== null && is_string($length) && strpos($length, ',') !== false) {
                list($precision, $scale) = explode(',', $length);
                $precision = (int)$precision;
                $scale = (int)$scale;

                // Format to specified scale
                $decimalValue = number_format((float)$decimalValue, $scale, '.', '');
            }

            $corrected = ($stringValue !== $decimalValue);
            return [
                'value' => $decimalValue,
                'corrected' => $corrected,
                'reason' => $corrected ? 'Extracted decimal from string' : null
            ];
        }

        // Could not parse - use default
        return [
            'value' => $nullable ? null : '0.00',
            'corrected' => true,
            'reason' => 'Invalid decimal format, using default'
        ];
    }

    /**
     * Validate VARCHAR types
     * Truncates to max length, cleans special characters
     */
    public static function validateVarchar($value, $length = null, $nullable = true) {
        // Handle NULL/empty
        if ($value === null || $value === '') {
            return [
                'value' => $nullable ? null : '',
                'corrected' => false,
                'reason' => null
            ];
        }

        // Convert to string
        $stringValue = (string)$value;

        // Clean using DataCleaner
        require_once __DIR__ . '/DataCleaner.php';
        $cleanedValue = DataCleaner::clean($stringValue);

        // Handle NULL result from cleaning
        if ($cleanedValue === null) {
            return [
                'value' => $nullable ? null : '',
                'corrected' => true,
                'reason' => 'Value cleaned to null'
            ];
        }

        // Truncate to max length if specified
        $truncated = false;
        if ($length !== null && mb_strlen($cleanedValue) > $length) {
            $cleanedValue = mb_substr($cleanedValue, 0, $length);
            $truncated = true;
        }

        $corrected = ($stringValue !== $cleanedValue) || $truncated;
        $reason = null;
        if ($truncated) {
            $reason = "Truncated to $length characters";
        } elseif ($corrected) {
            $reason = 'Cleaned special characters';
        }

        return [
            'value' => $cleanedValue,
            'corrected' => $corrected,
            'reason' => $reason
        ];
    }

    /**
     * Validate TEXT types
     * Similar to VARCHAR but no length limit
     */
    public static function validateText($value, $nullable = true) {
        // Handle NULL/empty
        if ($value === null || $value === '') {
            return [
                'value' => $nullable ? null : '',
                'corrected' => false,
                'reason' => null
            ];
        }

        // Convert to string and clean
        $stringValue = (string)$value;

        require_once __DIR__ . '/DataCleaner.php';
        $cleanedValue = DataCleaner::clean($stringValue);

        if ($cleanedValue === null) {
            return [
                'value' => $nullable ? null : '',
                'corrected' => true,
                'reason' => 'Value cleaned to null'
            ];
        }

        $corrected = ($stringValue !== $cleanedValue);
        return [
            'value' => $cleanedValue,
            'corrected' => $corrected,
            'reason' => $corrected ? 'Cleaned special characters' : null
        ];
    }

    /**
     * Validate DATE types
     * Delegates to DataCleaner::parseDate()
     */
    public static function validateDate($value, $nullable = true) {
        // Handle NULL/empty
        if ($value === null || $value === '') {
            return [
                'value' => $nullable ? null : date('Y-m-d'),
                'corrected' => false,
                'reason' => null
            ];
        }

        require_once __DIR__ . '/DataCleaner.php';
        $parsedDate = DataCleaner::parseDate($value, 'DATE');

        if ($parsedDate === null) {
            // Parsing failed
            return [
                'value' => $nullable ? null : date('Y-m-d'),
                'corrected' => true,
                'reason' => 'Invalid date format, using default'
            ];
        }

        // Check if value was modified
        $corrected = ($value !== $parsedDate);
        return [
            'value' => $parsedDate,
            'corrected' => $corrected,
            'reason' => $corrected ? 'Parsed and formatted date' : null
        ];
    }

    /**
     * Validate DATETIME/TIMESTAMP types
     * Delegates to DataCleaner::parseDate()
     */
    public static function validateDatetime($value, $nullable = true) {
        // Handle NULL/empty
        if ($value === null || $value === '') {
            return [
                'value' => $nullable ? null : date('Y-m-d H:i:s'),
                'corrected' => false,
                'reason' => null
            ];
        }

        require_once __DIR__ . '/DataCleaner.php';
        $parsedDatetime = DataCleaner::parseDate($value, 'DATETIME');

        if ($parsedDatetime === null) {
            // Parsing failed
            return [
                'value' => $nullable ? null : date('Y-m-d H:i:s'),
                'corrected' => true,
                'reason' => 'Invalid datetime format, using default'
            ];
        }

        // Check if value was modified
        $corrected = ($value !== $parsedDatetime);
        return [
            'value' => $parsedDatetime,
            'corrected' => $corrected,
            'reason' => $corrected ? 'Parsed and formatted datetime' : null
        ];
    }

    /**
     * Validate BOOLEAN types
     * Parses: true/false, yes/no, 1/0, on/off
     */
    public static function validateBoolean($value, $nullable = true) {
        // Handle NULL/empty
        if ($value === null || $value === '') {
            return [
                'value' => $nullable ? null : 0,
                'corrected' => false,
                'reason' => null
            ];
        }

        // Already boolean
        if (is_bool($value)) {
            return ['value' => $value ? 1 : 0, 'corrected' => false, 'reason' => null];
        }

        // Already 0 or 1
        if ($value === 0 || $value === 1) {
            return ['value' => $value, 'corrected' => false, 'reason' => null];
        }

        // Parse string representations
        $stringValue = strtolower(trim((string)$value));

        // True values
        if (in_array($stringValue, ['true', 'yes', 'on', '1', 't', 'y'])) {
            return [
                'value' => 1,
                'corrected' => ($value !== 1),
                'reason' => 'Converted to boolean'
            ];
        }

        // False values
        if (in_array($stringValue, ['false', 'no', 'off', '0', 'f', 'n'])) {
            return [
                'value' => 0,
                'corrected' => ($value !== 0),
                'reason' => 'Converted to boolean'
            ];
        }

        // Could not parse - use default
        return [
            'value' => $nullable ? null : 0,
            'corrected' => true,
            'reason' => 'Invalid boolean format, using default'
        ];
    }

    /**
     * Validate ENUM types
     * Checks against allowed values
     */
    public static function validateEnum($value, $enumValues = null, $nullable = true) {
        // Handle NULL/empty
        if ($value === null || $value === '') {
            return [
                'value' => $nullable ? null : '',
                'corrected' => false,
                'reason' => null
            ];
        }

        // No enum values provided - cannot validate
        if ($enumValues === null || !is_array($enumValues)) {
            return ['value' => $value, 'corrected' => false, 'reason' => null];
        }

        $stringValue = (string)$value;

        // Check if value is in allowed list (case-insensitive)
        foreach ($enumValues as $allowedValue) {
            if (strcasecmp($stringValue, $allowedValue) === 0) {
                return [
                    'value' => $allowedValue, // Use the canonical form
                    'corrected' => ($stringValue !== $allowedValue),
                    'reason' => ($stringValue !== $allowedValue) ? 'Normalized enum value' : null
                ];
            }
        }

        // Value not in allowed list
        return [
            'value' => $nullable ? null : (isset($enumValues[0]) ? $enumValues[0] : ''),
            'corrected' => true,
            'reason' => 'Value not in allowed ENUM values, using default'
        ];
    }

    /**
     * Get default value for a given data type
     * Used as fallback when validation fails and column is NOT NULL
     */
    public static function getDefaultValue($type) {
        $type = strtoupper(trim($type));

        switch ($type) {
            case 'INT':
            case 'INTEGER':
            case 'BIGINT':
            case 'SMALLINT':
            case 'TINYINT':
                return 0;

            case 'DECIMAL':
            case 'NUMERIC':
            case 'FLOAT':
            case 'DOUBLE':
                return '0.00';

            case 'VARCHAR':
            case 'CHAR':
            case 'TEXT':
            case 'TINYTEXT':
            case 'MEDIUMTEXT':
            case 'LONGTEXT':
            case 'ENUM':
                return '';

            case 'DATE':
                return date('Y-m-d');

            case 'DATETIME':
            case 'TIMESTAMP':
                return date('Y-m-d H:i:s');

            case 'BOOLEAN':
            case 'BOOL':
                return 0;

            default:
                return null;
        }
    }
}
