From 1ffa1965f4163200b3418cf93adc357d88c81fff Mon Sep 17 00:00:00 2001 From: svollbehr Date: Wed, 25 Mar 2009 22:18:50 +0000 Subject: [PATCH] Fix a defect in unicode handling git-svn-id: http://php-reader.googlecode.com/svn/trunk@145 51a70ab9-7547-0410-9469-37e369ee0574 --- src/ASF/Object/CodecList.php | 8 +- src/ASF/Object/ContentDescription.php | 20 +-- src/ASF/Object/ExtendedContentDescription.php | 10 +- src/ASF/Object/ExtendedStreamProperties.php | 8 +- src/ASF/Object/LanguageList.php | 4 +- src/ASF/Object/Marker.php | 8 +- src/ASF/Object/Metadata.php | 8 +- src/ASF/Object/MetadataLibrary.php | 8 +- src/ASF/Object/ScriptCommand.php | 4 +- src/ID3/Frame/APIC.php | 9 +- src/ID3/Frame/AbstractText.php | 11 +- src/ID3/Frame/COMM.php | 16 +- src/ID3/Frame/COMR.php | 19 ++- src/ID3/Frame/GEOB.php | 19 ++- src/ID3/Frame/IPLS.php | 18 ++- src/ID3/Frame/OWNE.php | 15 +- src/ID3/Frame/SYLT.php | 20 ++- src/ID3/Frame/TXXX.php | 17 +- src/ID3/Frame/USER.php | 14 +- src/ID3/Frame/USLT.php | 17 +- src/ID3/Frame/WXXX.php | 11 +- src/Transform.php | 150 +++++++----------- tests/TestTransform.php | 45 +----- 23 files changed, 226 insertions(+), 233 deletions(-) diff --git a/src/ASF/Object/CodecList.php b/src/ASF/Object/CodecList.php index 3228bfd..2824b52 100644 --- a/src/ASF/Object/CodecList.php +++ b/src/ASF/Object/CodecList.php @@ -84,11 +84,11 @@ final class ASF_Object_CodecList extends ASF_Object $codecNameLength = $this->_reader->readUInt16LE() * 2; $entry["codecName"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($codecNameLength)); + $this->_reader->readString16($codecNameLength)); $codecDescriptionLength = $this->_reader->readUInt16LE() * 2; $entry["codecDescription"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($codecDescriptionLength)); + $this->_reader->readString16($codecDescriptionLength)); $codecInformationLength = $this->_reader->readUInt16LE(); $entry["codecInformation"] = $this->_reader->read($codecInformationLength); @@ -165,11 +165,11 @@ final class ASF_Object_CodecList extends ASF_Object Transform::toUInt16LE(strlen($codecName = iconv ($this->getOption("encoding"), "utf-16le", $this->_entries[$i]["codecName"]) . "\0\0") / 2) . - Transform::toString16LE($codecName) . + Transform::toString16($codecName) . Transform::toUInt16LE(strlen($codecDescription = iconv ($this->getOption("encoding"), "utf-16le", $this->_entries[$i]["codecDescription"]) . "\0\0") / 2) . - Transform::toString16LE($codecDescription) . + Transform::toString16($codecDescription) . Transform::toUInt16LE(strlen($this->_entries[$i]["codecInformation"])) . $this->_entries[$i]["codecInformation"]; } diff --git a/src/ASF/Object/ContentDescription.php b/src/ASF/Object/ContentDescription.php index 94ade54..12a4698 100644 --- a/src/ASF/Object/ContentDescription.php +++ b/src/ASF/Object/ContentDescription.php @@ -92,19 +92,19 @@ final class ASF_Object_ContentDescription extends ASF_Object $this->_title = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($titleLen)); + $this->_reader->readString16($titleLen)); $this->_author = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($authorLen)); + $this->_reader->readString16($authorLen)); $this->_copyright = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($copyrightLen)); + $this->_reader->readString16($copyrightLen)); $this->_description = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($descriptionLen)); + $this->_reader->readString16($descriptionLen)); $this->_rating = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($ratingLen)); + $this->_reader->readString16($ratingLen)); } /** @@ -225,11 +225,11 @@ final class ASF_Object_ContentDescription extends ASF_Object Transform::toUInt16LE(strlen($copyright)) . Transform::toUInt16LE(strlen($description)) . Transform::toUInt16LE(strlen($rating)) . - Transform::toString16LE($title) . - Transform::toString16LE($author) . - Transform::toString16LE($copyright) . - Transform::toString16LE($description) . - Transform::toString16LE($rating); + Transform::toString16($title) . + Transform::toString16($author) . + Transform::toString16($copyright) . + Transform::toString16($description) . + Transform::toString16($rating); $this->setSize(24 /* for header */ + strlen($data)); return Transform::toGUID($this->getIdentifier()) . diff --git a/src/ASF/Object/ExtendedContentDescription.php b/src/ASF/Object/ExtendedContentDescription.php index 625af54..c4f6253 100644 --- a/src/ASF/Object/ExtendedContentDescription.php +++ b/src/ASF/Object/ExtendedContentDescription.php @@ -45,6 +45,8 @@ require_once("ASF/Object.php"); * This object contains unlimited number of attribute fields giving more * information about the file. * + * @todo Implement better handling of various types of attributes + * according to http://msdn.microsoft.com/en-us/library/aa384495(VS.85).aspx * @package php-reader * @subpackage ASF * @author Sven Vollbehr @@ -76,7 +78,7 @@ final class ASF_Object_ExtendedContentDescription extends ASF_Object $nameLen = $this->_reader->readUInt16LE(); $name = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($nameLen)); + $this->_reader->readString16($nameLen)); $valueDataType = $this->_reader->readUInt16LE(); $valueLen = $this->_reader->readUInt16LE(); @@ -84,7 +86,7 @@ final class ASF_Object_ExtendedContentDescription extends ASF_Object case 0: // string $this->_contentDescriptors[$name] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($valueLen)); + $this->_reader->readString16($valueLen)); break; case 1: // byte array $this->_contentDescriptors[$name] = $this->_reader->read($valueLen); @@ -183,7 +185,7 @@ final class ASF_Object_ExtendedContentDescription extends ASF_Object $descriptor = iconv ($this->getOption("encoding"), "utf-16le", $name ? $name . "\0" : ""); $data .= Transform::toUInt16LE(strlen($descriptor)) . - Transform::toString16LE($descriptor); + Transform::toString16($descriptor); if (is_string($value)) { /* There is no way to distinguish byte arrays from unicode strings and @@ -203,7 +205,7 @@ final class ASF_Object_ExtendedContentDescription extends ASF_Object ($this->getOption("encoding"), "utf-16le", $value) . "\0\0"; $data .= Transform::toUInt16LE(0) . Transform::toUInt16LE(strlen($value)) . - Transform::toString16LE($value); + Transform::toString16($value); } } else if (is_bool($value)) diff --git a/src/ASF/Object/ExtendedStreamProperties.php b/src/ASF/Object/ExtendedStreamProperties.php index 17770e5..065269f 100644 --- a/src/ASF/Object/ExtendedStreamProperties.php +++ b/src/ASF/Object/ExtendedStreamProperties.php @@ -206,7 +206,7 @@ final class ASF_Object_ExtendedStreamProperties extends ASF_Object $streamNameLength = $this->_reader->readUInt16LE(); $streamName["streamName"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($streamNameLength)); + $this->_reader->readString16($streamNameLength)); $this->_streamNames[] = $streamName; } for ($i = 0; $i < $payloadExtensionSystemCount; $i++) { @@ -216,7 +216,7 @@ final class ASF_Object_ExtendedStreamProperties extends ASF_Object $extensionSystemInfoLength = $this->_reader->readUInt32LE(); $payloadExtensionSystem["extensionSystemInfo"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($extensionSystemInfoLength)); + $this->_reader->readString16($extensionSystemInfoLength)); $this->_payloadExtensionSystems[] = $payloadExtensionSystem; } } @@ -680,7 +680,7 @@ final class ASF_Object_ExtendedStreamProperties extends ASF_Object Transform::toUInt16LE(strlen($streamName = iconv ($this->getOption("encoding"), "utf-16le", $this->_streamNames["streamName"]) . "\0\0")) . - Transform::toString16LE($streamName); + Transform::toString16($streamName); for ($i = 0; $i < $payloadExtensionSystemCount; $i++) $data .= Transform::toGUID($this->_streamNames["extensionSystemId"]) . @@ -688,7 +688,7 @@ final class ASF_Object_ExtendedStreamProperties extends ASF_Object Transform::toUInt16LE(strlen($extensionSystemInfo = iconv ($this->getOption("encoding"), "utf-16le", $this->_streamNames["extensionSystemInfo"]) . "\0\0")) . - Transform::toString16LE($extensionSystemInfo); + Transform::toString16($extensionSystemInfo); $this->setSize(24 /* for header */ + strlen($data)); return Transform::toGUID($this->getIdentifier()) . diff --git a/src/ASF/Object/LanguageList.php b/src/ASF/Object/LanguageList.php index a39e61d..7e355b4 100644 --- a/src/ASF/Object/LanguageList.php +++ b/src/ASF/Object/LanguageList.php @@ -74,7 +74,7 @@ final class ASF_Object_LanguageList extends ASF_Object $languageIdRecordsCount = $this->_reader->readUInt16LE(); for ($i = 0; $i < $languageIdRecordsCount; $i++) { $languageIdLength = $this->_reader->readInt8(); - $languageId = $this->_reader->readString16LE($languageIdLength); + $languageId = $this->_reader->readString16($languageIdLength); $this->_languages[] = iconv ("utf-16le", $this->getOption("encoding"), $languageId); } @@ -123,7 +123,7 @@ final class ASF_Object_LanguageList extends ASF_Object $data .= Transform::toInt8(strlen($languageId = iconv ($this->getOption("encoding"), "utf-16le", $this->_languages[$i]) . - "\0\0")) . Transform::toString16LE($languageId); + "\0\0")) . Transform::toString16($languageId); $this->setSize(24 /* for header */ + strlen($data)); return Transform::toGUID($this->getIdentifier()) . diff --git a/src/ASF/Object/Marker.php b/src/ASF/Object/Marker.php index 78ca2ec..8bbe301 100644 --- a/src/ASF/Object/Marker.php +++ b/src/ASF/Object/Marker.php @@ -82,7 +82,7 @@ final class ASF_Object_Marker extends ASF_Object $nameLength = $this->_reader->readUInt16LE(); $this->_name = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($nameLength)); + $this->_reader->readString16($nameLength)); for ($i = 0; $i < $markersCount; $i++) { $marker = array ("offset" => $this->_reader->readInt64LE(), @@ -93,7 +93,7 @@ final class ASF_Object_Marker extends ASF_Object $descriptionLength = $this->_reader->readUInt32LE(); $marker["description"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($descriptionLength)); + $this->_reader->readString16($descriptionLength)); $this->_markers[] = $marker; } } @@ -186,7 +186,7 @@ final class ASF_Object_Marker extends ASF_Object Transform::toUInt16LE (strlen($name = iconv ($this->getOption("encoding"), "utf-16le", $this->_name) . "\0\0")) . - Transform::toString16LE($name); + Transform::toString16($name); for ($i = 0; $i < $markersCount; $i++) $data .= Transform::toInt64LE($this->_markers[$i]["offset"]) . @@ -198,7 +198,7 @@ final class ASF_Object_Marker extends ASF_Object Transform::toUInt32LE($this->_markers[$i]["sendTime"]) . Transform::toUInt32LE($this->_markers[$i]["flags"]) . Transform::toUInt32LE($descriptionLength) . - Transform::toString16LE($description); + Transform::toString16($description); $this->setSize(24 /* for header */ + strlen($data)); return Transform::toGUID($this->getIdentifier()) . diff --git a/src/ASF/Object/Metadata.php b/src/ASF/Object/Metadata.php index 88a5130..93286f4 100644 --- a/src/ASF/Object/Metadata.php +++ b/src/ASF/Object/Metadata.php @@ -46,6 +46,8 @@ require_once("ASF/Object.php"); * Extended Content Description Object except that it also allows a * stream number to be specified. * + * @todo Implement better handling of various types of attributes + * according to http://msdn.microsoft.com/en-us/library/aa384495(VS.85).aspx * @package php-reader * @subpackage ASF * @author Sven Vollbehr @@ -81,12 +83,12 @@ final class ASF_Object_Metadata extends ASF_Object $dataLength = $this->_reader->readUInt32LE(); $descriptionRecord["name"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($nameLength)); + $this->_reader->readString16($nameLength)); switch ($dataType) { case 0: // Unicode string $descriptionRecord["data"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($dataLength)); + $this->_reader->readString16($dataLength)); break; case 1: // BYTE array $descriptionRecord["data"] = $this->_reader->read($dataLength); @@ -192,7 +194,7 @@ final class ASF_Object_Metadata extends ASF_Object $value = ($value ? $value . "\0\0" : ""); $data .= Transform::toUInt16LE(0) . Transform::toUInt32LE(strlen($value)) . $name . - Transform::toString16LE($value); + Transform::toString16($value); } } else if (is_bool($this->_descriptionRecords[$i]["data"])) { diff --git a/src/ASF/Object/MetadataLibrary.php b/src/ASF/Object/MetadataLibrary.php index 0f95381..882fa0f 100644 --- a/src/ASF/Object/MetadataLibrary.php +++ b/src/ASF/Object/MetadataLibrary.php @@ -50,6 +50,8 @@ require_once("ASF/Object.php"); * with language IDs, attributes that are defined more than once, large * attributes, and attributes with the GUID data type. * + * @todo Implement better handling of various types of attributes + * according to http://msdn.microsoft.com/en-us/library/aa384495(VS.85).aspx * @package php-reader * @subpackage ASF * @author Sven Vollbehr @@ -86,12 +88,12 @@ final class ASF_Object_MetadataLibrary extends ASF_Object $dataLength = $this->_reader->readUInt32LE(); $descriptionRecord["name"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($nameLength)); + $this->_reader->readString16($nameLength)); switch ($dataType) { case 0: // Unicode string $descriptionRecord["data"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($dataLength)); + $this->_reader->readString16($dataLength)); break; case 1: // BYTE array $descriptionRecord["data"] = $this->_reader->read($dataLength); @@ -224,7 +226,7 @@ final class ASF_Object_MetadataLibrary extends ASF_Object $value = ($value ? $value . "\0\0" : ""); $data .= Transform::toUInt16LE(0) . Transform::toUInt32LE(strlen($value)) . $name . - Transform::toString16LE($value); + Transform::toString16($value); } } } diff --git a/src/ASF/Object/ScriptCommand.php b/src/ASF/Object/ScriptCommand.php index 8e67b5a..91e7452 100644 --- a/src/ASF/Object/ScriptCommand.php +++ b/src/ASF/Object/ScriptCommand.php @@ -106,7 +106,7 @@ final class ASF_Object_ScriptCommand extends ASF_Object $commandTypeNameLength = $this->_reader->readUInt16LE(); $commandTypes[] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($commandTypeNameLength * 2)); + $this->_reader->readString16($commandTypeNameLength * 2)); } for ($i = 0; $i < $commandsCount; $i++) { $command = array @@ -115,7 +115,7 @@ final class ASF_Object_ScriptCommand extends ASF_Object $commandNameLength = $this->_reader->readUInt16LE(); $command["name"] = iconv ("utf-16le", $this->getOption("encoding"), - $this->_reader->readString16LE($commandNameLength * 2)); + $this->_reader->readString16($commandNameLength * 2)); $this->_commands[] = $command; } } diff --git a/src/ID3/Frame/APIC.php b/src/ID3/Frame/APIC.php index f5523a0..322a8d1 100644 --- a/src/ID3/Frame/APIC.php +++ b/src/ID3/Frame/APIC.php @@ -124,13 +124,13 @@ final class ID3_Frame_APIC extends ID3_Frame list ($this->_description, $this->_imageData) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16"); + (Transform::fromString16($this->_description), "utf-16"); break; case self::UTF16BE: list ($this->_description, $this->_imageData) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16be"); + (Transform::fromString16($this->_description), "utf-16be"); break; case self::UTF8: list ($this->_description, $this->_imageData) = @@ -259,11 +259,12 @@ final class ID3_Frame_APIC extends ID3_Frame Transform::toUInt8($this->_imageType); switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_description . "\0\0"; + $data .= Transform::toString16 + ($this->_description, Transform::LITTLE_ENDIAN_ORDER, 1); break; case self::UTF16: case self::UTF16BE: - $data .= $this->_description . "\0\0"; + $data .= Transform::toString16($this->_description, false, 1); break; default: $data .= $this->_description . "\0"; diff --git a/src/ID3/Frame/AbstractText.php b/src/ID3/Frame/AbstractText.php index a9c64e3..cf7753f 100644 --- a/src/ID3/Frame/AbstractText.php +++ b/src/ID3/Frame/AbstractText.php @@ -89,11 +89,13 @@ abstract class ID3_Frame_AbstractText extends ID3_Frame switch ($encoding) { case self::UTF16: $this->_text = $this->_convertString - ($this->_explodeString16($this->_data), "utf-16"); + ($this->_explodeString16(Transform::fromString16($this->_data)), + "utf-16"); break; case self::UTF16BE: $this->_text = $this->_convertString - ($this->_explodeString16($this->_data), "utf-16be"); + ($this->_explodeString16(Transform::fromString16($this->_data)), + "utf-16be"); break; case self::UTF8: $this->_text = $this->_convertString @@ -169,7 +171,10 @@ abstract class ID3_Frame_AbstractText extends ID3_Frame $data = Transform::toUInt8($this->_encoding); switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . implode(0xfeff . "\0\0", $this->_text); + $array = $this->_text; + foreach ($array as &$text) + $text = Transform::toString16($text, Transform::LITTLE_ENDIAN_ORDER); + $data .= implode("\0\0", $array); break; case self::UTF16: case self::UTF16BE: diff --git a/src/ID3/Frame/COMM.php b/src/ID3/Frame/COMM.php index 15e679e..d11e4ab 100644 --- a/src/ID3/Frame/COMM.php +++ b/src/ID3/Frame/COMM.php @@ -100,16 +100,17 @@ final class ID3_Frame_COMM extends ID3_Frame list ($this->_description, $this->_text) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16"); - $this->_text = $this->_convertString($this->_text, "utf-16"); + (Transform::fromString16($this->_description), "utf-16"); + $this->_text = $this->_convertString + (Transform::fromString16($this->_text), "utf-16"); break; case self::UTF16BE: list ($this->_description, $this->_text) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16be"); + (Transform::fromString16($this->_description), "utf-16be"); $this->_text = $this->_convertString - ($this->_text, "utf-16be"); + (Transform::fromString16($this->_text), "utf-16be"); break; case self::UTF8: list ($this->_description, $this->_text) = @@ -236,11 +237,14 @@ final class ID3_Frame_COMM extends ID3_Frame $data = Transform::toUInt8($this->_encoding) . $this->_language; switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_description . "\0\0" . 0xfeff . $this->_text; + $data .= Transform::toString16 + ($this->_description, Transform::LITTLE_ENDIAN_ORDER, 1) . + Transform::toString16($this->_text, Transform::LITTLE_ENDIAN_ORDER); break; case self::UTF16: case self::UTF16BE: - $data .= $this->_description . "\0\0" . $this->_text; + $data .= Transform::toString16($this->_description, false, 1) . + Transform::toString16($this->_text); break; default: $data .= $this->_description . "\0" . $this->_text; diff --git a/src/ID3/Frame/COMR.php b/src/ID3/Frame/COMR.php index 8ac3f15..29d54a1 100644 --- a/src/ID3/Frame/COMR.php +++ b/src/ID3/Frame/COMR.php @@ -135,16 +135,18 @@ final class ID3_Frame_COMR extends ID3_Frame case self::UTF16: list ($this->_seller, $this->_description, $this->_data) = $this->_explodeString16($this->_data, 3); - $this->_seller = $this->_convertString($this->_seller, "utf-16"); + $this->_seller = $this->_convertString + (Transform::fromString16($this->_seller), "utf-16"); $this->_description = $this->_convertString - ($this->_description, "utf-16"); + (Transform::fromString16($this->_description), "utf-16"); break; case self::UTF16BE: list ($this->_seller, $this->_description, $this->_data) = $this->_explodeString16($this->_data, 3); - $this->_seller = $this->_convertString($this->_seller, "utf-16be"); + $this->_seller = $this->_convertString + (Transform::fromString16($this->_seller), "utf-16be"); $this->_description = $this->_convertString - ($this->_description, "utf-16be"); + (Transform::fromString16($this->_description), "utf-16be"); break; case self::UTF8: list ($this->_seller, $this->_description, $this->_data) = @@ -378,12 +380,15 @@ final class ID3_Frame_COMR extends ID3_Frame Transform::toUInt8($this->_delivery); switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_seller . "\0\0" . 0xfeff . - $this->_description . "\0\0"; + $data .= Transform::toString16 + ($this->_seller, Transform::LITTLE_ENDIAN_ORDER, 1) . + Transform::toString16 + ($this->_description, Transform::LITTLE_ENDIAN_ORDER, 1); break; case self::UTF16: case self::UTF16BE: - $data .= $this->_seller . "\0\0" . $this->_description . "\0\0"; + $data .= Transform::toString16($this->_seller, false, 1) . + Transform::toString16($this->_description, false, 1); break; default: $data .= $this->_seller . "\0" . $this->_description . "\0"; diff --git a/src/ID3/Frame/GEOB.php b/src/ID3/Frame/GEOB.php index 8f06cac..0a3c674 100644 --- a/src/ID3/Frame/GEOB.php +++ b/src/ID3/Frame/GEOB.php @@ -95,16 +95,18 @@ final class ID3_Frame_GEOB extends ID3_Frame case self::UTF16: list ($this->_filename, $this->_description, $this->_objectData) = $this->_explodeString16($this->_data, 3); - $this->_filename = $this->_convertString($this->_filename, "utf-16"); + $this->_filename = $this->_convertString + (Transform::fromString16($this->_filename), "utf-16"); $this->_description = $this->_convertString - ($this->_description, "utf-16"); + (Transform::fromString16($this->_description), "utf-16"); break; case self::UTF16BE: list ($this->_filename, $this->_description, $this->_objectData) = $this->_explodeString16($this->_data, 3); - $this->_filename = $this->_convertString($this->_filename, "utf-16be"); + $this->_filename = $this->_convertString + (Transform::fromString16($this->_filename), "utf-16be"); $this->_description = $this->_convertString - ($this->_description, "utf-16be"); + (Transform::fromString16($this->_description), "utf-16be"); break; case self::UTF8: list ($this->_filename, $this->_description, $this->_objectData) = @@ -233,12 +235,15 @@ final class ID3_Frame_GEOB extends ID3_Frame $data = Transform::toUInt8($this->_encoding) . $this->_mimeType . "\0"; switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_filename . "\0\0" . 0xfeff . - $this->_description . "\0\0"; + $data .= Transform::toString16 + ($this->_filename, Transform::LITTLE_ENDIAN_ORDER, 1) . + Transform::toString16 + ($this->_description, Transform::LITTLE_ENDIAN_ORDER, 1); break; case self::UTF16: case self::UTF16BE: - $data .= $this->_filename . "\0\0" . $this->_description . "\0\0"; + $data .= Transform::toString16($this->_filename, false, 1) . + Transform::toString16($this->_description, false, 1); break; default: $data .= $this->_filename . "\0" . $this->_description . "\0"; diff --git a/src/ID3/Frame/IPLS.php b/src/ID3/Frame/IPLS.php index c661224..b789605 100644 --- a/src/ID3/Frame/IPLS.php +++ b/src/ID3/Frame/IPLS.php @@ -82,23 +82,25 @@ final class ID3_Frame_IPLS extends ID3_Frame $encoding = Transform::fromUInt8($this->_data[0]); $data = substr($this->_data, 1); + $order = Transform::MACHINE_ENDIAN_ORDER; switch ($encoding) { case self::UTF16: $data = $this->_explodeString16($data); foreach ($data as &$str) - $str = $this->_convertString($str, "utf-16"); + $str = $this->_convertString + (Transform::fromString16($str, $order), "utf-16"); break; case self::UTF16BE: $data = $this->_explodeString16($data); foreach ($data as &$str) - $str = $this->_convertString($str, "utf-16be"); + $str = $this->_convertString + (Transform::fromString16($str), "utf-16be"); break; case self::UTF8: $data = $this->_convertString($this->_explodeString8($data), "utf-8"); break; default: - $data = $this->_convertString - ($this->_explodeString8($data), "iso-8859-1"); + $data = $this->_convertString($this->_explodeString8($data), "iso-8859-1"); } for ($i = 0; $i < count($data) - 1; $i += 2) @@ -171,11 +173,13 @@ final class ID3_Frame_IPLS extends ID3_Frame foreach ($entry as $key => $val) { switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $key . "\0\0" . 0xfeff . $val . "\0\0"; - break; + $data .= Transform::toString16 + ($key, Transform::LITTLE_ENDIAN_ORDER, 1) . + Transform::toString16($val, Transform::LITTLE_ENDIAN_ORDER, 1); case self::UTF16: case self::UTF16BE: - $data .= $key . "\0\0" . $val . "\0\0"; + $data .= Transform::toString16($key, false, 1) . + Transform::toString16($val, false, 1); break; default: $data .= $key . "\0" . $val . "\0"; diff --git a/src/ID3/Frame/OWNE.php b/src/ID3/Frame/OWNE.php index 3cb7f77..4af0048 100644 --- a/src/ID3/Frame/OWNE.php +++ b/src/ID3/Frame/OWNE.php @@ -99,10 +99,12 @@ final class ID3_Frame_OWNE extends ID3_Frame switch ($encoding) { case self::UTF16: - $this->_seller = $this->_convertString($this->_data, "utf-16"); + $this->_seller = $this->_convertString + (Transform::fromString16($this->_data), "utf-16"); break; case self::UTF16BE: - $this->_seller = $this->_convertString($this->_data, "utf-16be"); + $this->_seller = $this->_convertString + (Transform::fromString16($this->_data), "utf-16be"); break; case self::UTF8: $this->_seller = $this->_convertString @@ -221,10 +223,15 @@ final class ID3_Frame_OWNE extends ID3_Frame $this->_price . "\0" . $this->_date; switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_seller; + $data .= Transform::toString16 + ($this->_seller, Transform::LITTLE_ENDIAN_ORDER); + break; + case self::UTF16: + case self::UTF16BE: + $data .= Transform::toString16($this->_seller); break; default: - $data .= $this->_seller; + $data .= Transform::toString8($this->_seller); } return $data; } diff --git a/src/ID3/Frame/SYLT.php b/src/ID3/Frame/SYLT.php index 009e57a..a92866c 100644 --- a/src/ID3/Frame/SYLT.php +++ b/src/ID3/Frame/SYLT.php @@ -118,13 +118,13 @@ final class ID3_Frame_SYLT extends ID3_Frame list($this->_description, $this->_data) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16"); + (Transform::fromString16($this->_description), "utf-16"); break; case self::UTF16BE: list($this->_description, $this->_data) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16be"); + (Transform::fromString16($this->_description), "utf-16be"); break; case self::UTF8: list($this->_description, $this->_data) = @@ -144,12 +144,14 @@ final class ID3_Frame_SYLT extends ID3_Frame case self::UTF16: list($syllable, $this->_data) = $this->_explodeString16($this->_data, 2); - $syllable = $this->_convertString($syllable, "utf-16"); + $syllable = $this->_convertString + (Transform::fromString16($syllable), "utf-16"); break; case self::UTF16BE: list($syllable, $this->_data) = $this->_explodeString16($this->_data, 2); - $syllable = $this->_convertString($syllable, "utf-16be"); + $syllable = $this->_convertString + (Transform::fromString16($syllable), "utf-16be"); break; case self::UTF8: list($syllable, $this->_data) = @@ -308,11 +310,12 @@ final class ID3_Frame_SYLT extends ID3_Frame Transform::toUInt8($this->_format) . Transform::toUInt8($this->_type); switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_description . "\0\0"; + $data .= Transform::toString16 + ($this->_description, Transform::LITTLE_ENDIAN_ORDER, 1); break; case self::UTF16: case self::UTF16BE: - $data .= $this->_description . "\0\0"; + $data .= Transform::toString16($this->_description, false, 1); break; default: $data .= $this->_description . "\0"; @@ -320,11 +323,12 @@ final class ID3_Frame_SYLT extends ID3_Frame foreach ($this->_events as $timestamp => $syllable) { switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $syllable . "\0\0"; + $data .= Transform::toString16 + ($syllable, Transform::LITTLE_ENDIAN_ORDER, 1); break; case self::UTF16: case self::UTF16BE: - $data .= $syllable . "\0\0"; + $data .= Transform::toString16($syllable, false, 1); break; default: $data .= $syllable . "\0"; diff --git a/src/ID3/Frame/TXXX.php b/src/ID3/Frame/TXXX.php index 36fefed..95ede88 100644 --- a/src/ID3/Frame/TXXX.php +++ b/src/ID3/Frame/TXXX.php @@ -86,16 +86,17 @@ final class ID3_Frame_TXXX extends ID3_Frame_AbstractText list($this->_description, $this->_text) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16"); - $this->_text = $this->_convertString(array($this->_text), "utf-16"); + (Transform::fromString16($this->_description), "utf-16"); + $this->_text = $this->_convertString + (array(Transform::fromString16($this->_text)), "utf-16"); break; case self::UTF16BE: list($this->_description, $this->_text) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16be"); + (Transform::fromString16($this->_description), "utf-16be"); $this->_text = $this->_convertString - (array($this->_text), "utf-16be"); + (array(Transform::fromString16($this->_text)), "utf-16be"); break; case self::UTF8: list($this->_description, $this->_text) = $this->_convertString @@ -139,11 +140,15 @@ final class ID3_Frame_TXXX extends ID3_Frame_AbstractText $data = Transform::toUInt8($this->_encoding); switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_description . "\0\0" . 0xfeff . $this->_text[0]; + $data .= Transform::toString16 + ($this->_description, Transform::LITTLE_ENDIAN_ORDER, 1) . + Transform::toString16 + ($this->_text[0], Transform::LITTLE_ENDIAN_ORDER, 1); break; case self::UTF16: case self::UTF16BE: - $data .= $this->_description . "\0\0" . $this->_text[0]; + $data .= Transform::toString16($this->_description, false, 1) . + Transform::toString16($this->_text[0], false, 1); break; default: $data .= $this->_description . "\0" . $this->_text[0]; diff --git a/src/ID3/Frame/USER.php b/src/ID3/Frame/USER.php index 4f598f3..ef365df 100644 --- a/src/ID3/Frame/USER.php +++ b/src/ID3/Frame/USER.php @@ -92,10 +92,12 @@ final class ID3_Frame_USER extends ID3_Frame switch ($encoding) { case self::UTF16: - $this->_text = $this->_convertString($this->_data, "utf-16"); + $this->_text = $this->_convertString + (Transform::fromString16($this->_data), "utf-16"); break; case self::UTF16BE: - $this->_text = $this->_convertString($this->_data, "utf-16be"); + $this->_text = $this->_convertString + (Transform::fromString16($this->_data), "utf-16be"); break; case self::UTF8: $this->_text = $this->_convertString @@ -188,7 +190,13 @@ final class ID3_Frame_USER extends ID3_Frame $data = Transform::toUInt8($this->_encoding) . $this->_language; switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_text; + $data .= Transform::toString16 + ($this->_text, Transform::MACHINE_ENDIAN_ORDER); + break; + case self::UTF16: + case self::UTF16BE: + $data .= Transform::toString16($this->_text); + break; default: $data .= $this->_text; } diff --git a/src/ID3/Frame/USLT.php b/src/ID3/Frame/USLT.php index f5d839e..33e2257 100644 --- a/src/ID3/Frame/USLT.php +++ b/src/ID3/Frame/USLT.php @@ -97,15 +97,17 @@ final class ID3_Frame_USLT extends ID3_Frame list ($this->_description, $this->_text) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16"); - $this->_text = $this->_convertString($this->_text, "utf-16"); + (Transform::fromString16($this->_description), "utf-16"); + $this->_text = $this->_convertString + (Transform::fromString16($this->_text), "utf-16"); break; case self::UTF16BE: list ($this->_description, $this->_text) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16be"); - $this->_text = $this->_convertString($this->_text, "utf-16be"); + (Transform::fromString16($this->_description), "utf-16be"); + $this->_text = $this->_convertString + (Transform::fromString16($this->_text), "utf-16be"); break; case self::UTF8: list ($this->_description, $this->_text) = @@ -232,11 +234,14 @@ final class ID3_Frame_USLT extends ID3_Frame $data = Transform::toUInt8($this->_encoding) . $this->_language; switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_description . "\0\0" . 0xfeff . $this->_text; + $data .= Transform::toString16 + ($this->_description, Transform::LITTLE_ENDIAN_ORDER) . "\0\0" . + Transform::toString16($this->_text, Transform::LITTLE_ENDIAN_ORDER); break; case self::UTF16: case self::UTF16BE: - $data .= $this->_description . "\0\0" . $this->_text; + $data .= Transform::toString16($this->_description) . "\0\0" . + Transform::toString16($this->_text); break; default: $data .= $this->_description . "\0" . $this->_text; diff --git a/src/ID3/Frame/WXXX.php b/src/ID3/Frame/WXXX.php index 9ad9ea7..e758079 100644 --- a/src/ID3/Frame/WXXX.php +++ b/src/ID3/Frame/WXXX.php @@ -88,13 +88,13 @@ final class ID3_Frame_WXXX extends ID3_Frame_AbstractLink list($this->_description, $this->_link) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16"); + (Transform::fromString16($this->_description), "utf-16"); break; case self::UTF16BE: list($this->_description, $this->_link) = $this->_explodeString16($this->_data, 2); $this->_description = $this->_convertString - ($this->_description, "utf-16be"); + (Transform::fromString16($this->_description), "utf-16be"); break; case self::UTF8: list($this->_description, $this->_link) = @@ -166,14 +166,15 @@ final class ID3_Frame_WXXX extends ID3_Frame_AbstractLink $data = Transform::toUInt8($this->_encoding); switch ($this->_encoding) { case self::UTF16LE: - $data .= 0xfeff . $this->_description . "\0\0"; + $data .= Transform::toString16 + ($this->_description, Tranform::LITTLE_ENDIAN_ORDER, 1); break; case self::UTF16: case self::UTF16BE: - $data .= $this->_description . "\0\0"; + $data .= Transform::toString16($this->_description, false, 1); break; default: - $data .= $this->_description . "\0"; + $data .= Transform::toString8($this->_description, 1); } return $data . $this->_link; } diff --git a/src/Transform.php b/src/Transform.php index bab345e..339af14 100644 --- a/src/Transform.php +++ b/src/Transform.php @@ -2,7 +2,7 @@ /** * PHP Reader Library * - * Copyright (c) 2006-2008 The PHP Reader Project Workgroup. All rights + * Copyright (c) 2006-2009 The PHP Reader Project Workgroup. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -30,7 +30,7 @@ * POSSIBILITY OF SUCH DAMAGE. * * @package php-reader - * @copyright Copyright (c) 2006-2008 The PHP Reader Project Workgroup + * @copyright Copyright (c) 2006-2009 The PHP Reader Project Workgroup * @license http://code.google.com/p/php-reader/wiki/License New BSD License * @version $Id$ */ @@ -41,7 +41,7 @@ * @package php-reader * @author Sven Vollbehr * @author Ryan Butterfield - * @copyright Copyright (c) 2006-2008 The PHP Reader Project Workgroup + * @copyright Copyright (c) 2006-2009 The PHP Reader Project Workgroup * @license http://code.google.com/p/php-reader/wiki/License New BSD License * @version $Rev$ * @static @@ -530,13 +530,21 @@ final class Transform } /** - * Returns string as binary data padded to given length with zeros. + * Returns string as binary data padded to given length with zeros. If length + * is smaller than the length of the string, it is considered as the length of + * the padding. * - * @param string $value The input value. + * @param string $value The input value. + * @param integer $length The length to which to pad the value. + * @param string $padding The padding character. * @return string */ - public static function toString8($value, $length, $padding = "\0") + public static function toString8($value, $length = false, $padding = "\0") { + if ($length === false) + $length = strlen($value); + if ($length < ($tmp = strlen($value))) + $length = $tmp + $length; return str_pad($value, $length, $padding); } @@ -552,113 +560,71 @@ final class Transform } /** - * Returns machine-ordered multibyte string as UTF-16 defined-order binary - * data. The byte order is stored using a byte order mask (BOM) in the binary - * data string. + * Returns the multibyte string as binary data with given byte order mark + * (BOM) and padded to given length with zeros. Length is given in unicode + * characters so each character adds two zeros to the string. If length is + * smaller than the length of the string, it is considered as the length of + * the padding. + * + * If byte order mark is false no mark is inserted to the binary + * data. * - * @param string $value The input value. - * @param integer $order The byte order of the binary data string. + * @param string $value The input value. + * @param integer $order The byte order of the binary data string. + * @param integer $length The length to which to pad the value. + * @param string $padding The padding character. * @return string */ - public static function toString16($value, $order = self::MACHINE_ENDIAN_ORDER) + public static function toString16 + ($value, $order = false, $length = false, $padding = "\0") { - $format = $order == self::BIG_ENDIAN_ORDER ? "n" : - ($order == self::LITTLE_ENDIAN_ORDER ? "v" : "S"); - $string = pack($format, 0xfeff); - foreach (unpack("S*", $value) as $char) - $string .= pack($format, $char); - return $string; + if ($length === false) + $length = (int)(strlen($value) / 2); + if ($length < ($tmp = strlen($value) / 2)) + $length = $tmp + $length; + if ($order == self::BIG_ENDIAN_ORDER && + !(ord($value[0]) == 0xfe && ord($value[1]) == 0xff)) { + $value = 0xfeff . $value; + $length++; + } + if ($order == self::LITTLE_ENDIAN_ORDER && + !(ord($value[0]) == 0xff && ord($value[1]) == 0xfe)) { + $value = 0xfffe . $value; + $length++; + } + return str_pad($value, $length * 2, $padding); } /** - * Returns UTF-16 formatted binary data as machine-ordered multibyte string. - * The byte order is determined from the byte order mark included in the - * binary data string. The order parameter is updated if a BOM is found. + * Returns binary data as multibyte Unicode string. Removes terminating zero. + * + * The byte order is possibly determined from the byte order mark included in + * the binary data string. The order parameter is updated if the BOM is found. * * @param string $value The binary data string. - * @param integer $order The endian to decode using if no BOM was found. + * @param integer $order The endianess of the string. + * @param integer $trimOrder Whether to remove the byte order mark from the + * string. * @return string */ public static function fromString16 - ($value, &$order = self::MACHINE_ENDIAN_ORDER) + ($value, &$order = false, $trimOrder = false) { if (strlen($value) < 2) return ""; if (ord($value[0]) == 0xfe && ord($value[1]) == 0xff) { $order = self::BIG_ENDIAN_ORDER; - return self::fromString16BE(substr($value, 2)); + if ($trimOrder) + $value = substr($value, 2); } - else if (ord($value[0]) == 0xff && ord($value[1]) == 0xfe) { + if (ord($value[0]) == 0xff && ord($value[1]) == 0xfe) { $order = self::LITTLE_ENDIAN_ORDER; - return self::fromString16LE(substr($value, 2)); + if ($trimOrder) + $value = substr($value, 2); } - else if ($order == self::BIG_ENDIAN_ORDER || - ($order == self::MACHINE_ENDIAN_ORDER && self::isBigEndian())) - return self::fromString16BE($value); - else - return self::fromString16LE($value); - } - - /** - * Returns machine-ordered multibyte string as little-endian ordered binary - * data. - * - * @param string $value The input value. - * @return string - */ - public static function toString16LE($value) - { - $string = ""; - foreach (unpack("S*", $value) as $char) - $string .= pack("v", $char); - return $string; - } - - /** - * Returns little-endian ordered binary data as machine ordered multibyte - * string. Removes terminating zero. - * - * @param string $value The binary data string. - * @return string - */ - public static function fromString16LE($value) - { - $string = ""; - foreach (unpack("v*", substr($value, -2) == "\0\0" ? - substr($value, 0, -2) : $value) as $char) - $string .= pack("S", $char); - return $string; - } - - /** - * Returns machine ordered multibyte string as big-endian ordered binary data. - * - * @param string $value The input value. - * @return string - */ - public static function toString16BE($value) - { - $string = ""; - foreach (unpack("S*", $value) as $char) - $string .= pack("n", $char); - return $string; - } - - /** - * Returns big-endian ordered binary data as machine ordered multibyte string. - * Removes terminating zero. - * - * @param string $value The binary data string. - * @return string - */ - public static function fromString16BE($value) - { - $string = ""; - foreach (unpack("n*", substr($value, -2) == "\0\0" ? - substr($value, 0, -2) : $value) as $char) - $string .= pack("S", $char); - return $string; + + return substr($value, -2) == "\0\0" ? substr($value, 0, -2) : $value; } /** diff --git a/tests/TestTransform.php b/tests/TestTransform.php index 30712eb..41d739f 100644 --- a/tests/TestTransform.php +++ b/tests/TestTransform.php @@ -2,7 +2,8 @@ /** * PHP Reader Library * - * Copyright (c) 2008 The PHP Reader Project Workgroup. All rights reserved. + * Copyright (c) 2008-2009 The PHP Reader Project Workgroup. All rights + * reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -30,7 +31,7 @@ * * @package php-reader * @subpackage Tests - * @copyright Copyright (c) 2008 The PHP Reader Project Workgroup + * @copyright Copyright (c) 2008-2009 The PHP Reader Project Workgroup * @license http://code.google.com/p/php-reader/wiki/License New BSD License * @version $Id$ */ @@ -47,7 +48,7 @@ require_once("Transform.php"); * @subpackage Tests * @author Sven Vollbehr * @author Ryan Butterfield - * @copyright Copyright (c) 2008 The PHP Reader Project Workgroup + * @copyright Copyright (c) 2008-2009 The PHP Reader Project Workgroup * @license http://code.google.com/p/php-reader/wiki/License New BSD License * @version $Rev$ */ @@ -160,43 +161,9 @@ final class TestTransform extends PHPUnit_Framework_TestCase $this->assertEquals("00e4", Transform::fromHHex (Transform::fromString16(Transform::toString16("\x00\xe4")))); $this->assertEquals - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0.", - Transform::fromString16(Transform::toString16LE - ("\xff\xfe\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0."))); - $this->assertEquals - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0.", - Transform::fromString16(Transform::toString16BE - ("\xff\xfe\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0."))); - $this->assertEquals - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0.", + ("\xff\xfe\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0.", Transform::fromString16(Transform::toString16 - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0.", - Transform::LITTLE_ENDIAN_ORDER))); - $this->assertEquals - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0.", - Transform::fromString16(Transform::toString16 - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0.", - Transform::BIG_ENDIAN_ORDER))); - } - - function testString16LE() - { - $this->assertEquals - ("fffe", Transform::fromHHex(Transform::toString16LE("\xff\xfe"))); - $this->assertEquals - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0.", - Transform::fromString16LE(Transform::toString16LE - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0."))); - } - - function testString16BE() - { - $this->assertEquals - ("feff", Transform::fromHHex(Transform::toString16BE("\xff\xfe"))); - $this->assertEquals - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0.", - Transform::fromString16BE(Transform::toString16BE - ("\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0."))); + ("\xff\xfe\0T\0h\0i\0s\0 \0i\0s\0 \0a\0 \0t\0e\0s\0t\0."))); } function testHHex()