Requests_IDNAEncoder::utf8_to_codepoints( string $input )
Convert a UTF-8 string to a UCS-4 codepoint array
Description Description
Parameters Parameters
- $input
-
(string) (Required)
Return Return
(array) Unicode code points
Source Source
File: wp-includes/Requests/IDNAEncoder.php
protected static function utf8_to_codepoints($input) { $codepoints = array(); // Get number of bytes $strlen = strlen($input); for ($position = 0; $position < $strlen; $position++) { $value = ord($input[$position]); // One byte sequence: if ((~$value & 0x80) === 0x80) { $character = $value; $length = 1; $remaining = 0; } // Two byte sequence: elseif (($value & 0xE0) === 0xC0) { $character = ($value & 0x1F) << 6; $length = 2; $remaining = 1; } // Three byte sequence: elseif (($value & 0xF0) === 0xE0) { $character = ($value & 0x0F) << 12; $length = 3; $remaining = 2; } // Four byte sequence: elseif (($value & 0xF8) === 0xF0) { $character = ($value & 0x07) << 18; $length = 4; $remaining = 3; } // Invalid byte: else { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $value); } if ($remaining > 0) { if ($position + $length > $strlen) { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); } for ($position++; $remaining > 0; $position++) { $value = ord($input[$position]); // If it is invalid, count the sequence as invalid and reprocess the current byte: if (($value & 0xC0) !== 0x80) { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); } $character |= ($value & 0x3F) << (--$remaining * 6); } $position--; } if ( // Non-shortest form sequences are invalid $length > 1 && $character <= 0x7F || $length > 2 && $character <= 0x7FF || $length > 3 && $character <= 0xFFFF // Outside of range of ucschar codepoints // Noncharacters || ($character & 0xFFFE) === 0xFFFE || $character >= 0xFDD0 && $character <= 0xFDEF || ( // Everything else not in ucschar $character > 0xD7FF && $character < 0xF900 || $character < 0x20 || $character > 0x7E && $character < 0xA0 || $character > 0xEFFFD ) ) { throw new Requests_Exception('Invalid Unicode codepoint', 'idna.invalidcodepoint', $character); } $codepoints[] = $character; } return $codepoints; }
Expand full source code Collapse full source code View on Trac