Add Zend_Mime_Magic class proposal

git-svn-id: http://php-reader.googlecode.com/svn/branches/zend@173 51a70ab9-7547-0410-9469-37e369ee0574
This commit is contained in:
svollbehr
2010-03-06 22:17:38 +00:00
parent 308466cbad
commit f43c6fee0e
3 changed files with 170 additions and 411 deletions

View File

@@ -1,177 +0,0 @@
<?php
/**
* PHP Reader Library
*
* Copyright (c) 2006-2008 The PHP Reader Project Workgroup. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the project workgroup nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package php-reader
* @copyright Copyright (c) 2006-2008 PHP Reader Project Workgroup
* @license http://code.google.com/p/php-reader/wiki/License New BSD License
* @version $Id$
*/
/**#@+ @ignore */
require_once("Reader.php");
/**#@-*/
/**
* This class is used to classify the given file using some magic bytes
* characteristic to a particular file type. The classification information can
* be a MIME type or just text describing the file.
*
* This method is slower than determining the type by file suffix but on the
* other hand reduces the risk of fail positives during the test.
*
* The magic file consists of ASCII characters defining the magic numbers for
* different file types. Each row has 4 to 5 columns, empty and commented lines
* (those starting with a hash character) are ignored. Columns are described
* below.
*
* o <b>1</b> -- byte number to begin checking from. ">" indicates a dependency
* upon the previous non-">" line
* o <b>2</b> -- type of data to match. Can be one of following
* - <i>byte</i> (single character)
* - <i>short</i> (machine-order 16-bit integer)
* - <i>long</i> (machine-order 32-bit integer)
* - <i>string</i> (arbitrary-length string)
* - <i>date</i> (long integer date (seconds since Unix epoch/1970))
* - <i>beshort</i> (big-endian 16-bit integer)
* - <i>belong</i> (big-endian 32-bit integer)
* - <i>bedate</i> (big-endian 32-bit integer date)
* - <i>leshort</i> (little-endian 16-bit integer)
* - <i>lelong</i> (little-endian 32-bit integer)
* - <i>ledate</i> (little-endian 32-bit integer date)
* o <b>3</b> -- contents of data to match
* o <b>4</b> -- file description/MIME type if matched
* o <b>5</b> -- optional MIME encoding if matched and if above was a MIME type
*
* @package php-reader
* @author Sven Vollbehr <svollbehr@gmail.com>
* @copyright Copyright (c) 2006-2008 PHP Reader Project Workgroup
* @license http://code.google.com/p/php-reader/wiki/License New BSD License
* @version $Rev$
*/
final class Magic
{
/** @var string */
private $_magic;
/**
* Reads the magic information from given magic file.
*
* @param string $filename The path to the magic file.
*/
public function __construct($filename)
{
$reader = new Reader($filename);
$this->_magic = $reader->read($reader->getSize());
}
/**
* Returns the recognized MIME type/description of the given file. The type
* is determined by the content using magic bytes characteristic for the
* particular file type.
*
* If the type could not be found, the function returns the default value, or
* <var>false</var>.
*
* @param string $filename The file path whose type to determine.
* @param string $default The default value.
* @return string|false
*/
public function getType($filename, $default = false)
{
$reader = new Reader($filename);
$parentOffset = 0;
foreach (preg_split("/^/m", $this->_magic) as $line) {
$chunks = array();
if (!preg_match("/^(?P<Dependant>>?)(?P<Byte>\d+)\s+(?P<MatchType>\S+)" .
"\s+(?P<MatchData>\S+)(?:\s+(?P<MIMEType>[a-z]+\/[a-z-" .
"0-9]+)?(?:\s+(?P<Description>.+))?)?$/", $line, $chunks))
continue;
if ($chunks["Dependant"]) {
$reader->setOffset($parentOffset);
$reader->skip($chunks["Byte"]);
} else
$reader->setOffset($parentOffset = $chunks["Byte"]);
$matchType = strtolower($chunks["MatchType"]);
$matchData = preg_replace
(array("/\\\\ /", "/\\\\\\\\/", "/\\\\([0-7]{1,3})/e",
"/\\\\x([0-9A-Fa-f]{1,2})/e", "/0x([0-9A-Fa-f]+)/e"),
array(" ", "\\\\", "pack(\"H*\", base_convert(\"$1\", 8, 16));",
"pack(\"H*\", \"$1\");", "hexdec(\"$1\");"),
$chunks["MatchData"]);
switch ($matchType) {
case "byte": // single character
$data = $reader->readInt8();
break;
case "short": // machine-order 16-bit integer
$data = $reader->readInt16();
break;
case "long": // machine-order 32-bit integer
$data = $reader->readInt32();
break;
case "string": // arbitrary-length string
$data = $reader->readString8(strlen($matchData));
break;
case "date": // long integer date (seconds since Unix epoch/1970)
$data = $reader->readInt64BE();
break;
case "beshort": // big-endian 16-bit integer
$data = $reader->readUInt16BE();
break;
case "belong": // big-endian 32-bit integer
case "bedate": // big-endian 32-bit integer date
$data = $reader->readUInt32BE();
break;
case "leshort": // little-endian 16-bit integer
$data = $reader->readUInt16LE();
break;
case "lelong": // little-endian 32-bit integer
case "ledate": // little-endian 32-bit integer date
$data = $reader->readUInt32LE();
break;
default:
$data = null;
break;
}
if (strcmp($data, $matchData) == 0) {
if (!empty($chunks["MIMEType"]))
return $chunks["MIMEType"];
if (!empty($chunks["Description"]))
return $chunks["Description"];
}
}
return $default;
}
}

View File

@@ -1,234 +0,0 @@
<?php
/**
* PHP Reader Library
*
* Copyright (c) 2006-2009 The PHP Reader Project Workgroup. All rights
* reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the project workgroup nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package php-reader
* @copyright Copyright (c) 2006-2009 The PHP Reader Project Workgroup
* @license http://code.google.com/p/php-reader/wiki/License New BSD License
* @version $Id$
*/
/**#@+ @ignore */
require_once("Reader/Exception.php");
require_once("Transform.php");
/**#@-*/
/**
* The Reader class encapsulates a file. It is hence responsible of upkeeping
* the connection to the file, keeping track of the cursor position and reading
* data from it.
*
* @package php-reader
* @author Sven Vollbehr <svollbehr@gmail.com>
* @author Ryan Butterfield <buttza@gmail.com>
* @copyright Copyright (c) 2006-2009 The PHP Reader Project Workgroup
* @license http://code.google.com/p/php-reader/wiki/License New BSD License
* @version $Rev$
*/
class Reader
{
/** @var resource */
private $_fd;
/** @var integer */
private $_size;
/**
* Constructs the Reader class with given file.
*
* @param string $filename The path to the file.
* @param string $mode The type of access.
* @throws Reader_Exception if the file cannot be read.
*/
public function __construct($filename, $mode = "rb")
{
if (is_resource($filename) &&
in_array(get_resource_type($filename), array("file", "stream")))
$this->_fd = $filename;
else if (($this->_fd = fopen($filename, $mode)) === false)
throw new Reader_Exception("Unable to open file:" . $filename);
fseek($this->_fd, 0, SEEK_END);
$this->_size = ftell($this->_fd);
fseek($this->_fd, 0);
}
/**
* Closes the file.
*/
public function __destruct()
{
@fclose($this->_fd);
}
/**
* Checks whether there is more to be read in the file. Returns
* <var>true</var> if the end of the file has not yet been reached;
* <var>false</var> otherwise.
*
* @return boolean
*/
public function available()
{
return $this->getOffset() < $this->_size;
}
/**
* Jumps <var>size</var> amount of bytes in the file stream.
*
* @param integer $size The amount of bytes.
* @return void
* @throws Reader_Exception if <var>size</var> attribute is negative.
*/
public function skip($size)
{
if ($size < 0)
throw new Reader_Exception("Invalid argument");
if ($size == 0)
return;
fseek($this->_fd, $size, SEEK_CUR);
}
/**
* Reads <var>length</var> amount of bytes from the file stream.
*
* @param integer $length The amount of bytes.
* @return string
* @throws Reader_Exception if <var>length</var> attribute is negative.
*/
public function read($length)
{
if ($length < 0)
throw new Reader_Exception("Invalid argument");
if ($length == 0)
return "";
return fread($this->_fd, $length);
}
/**
* Returns the current point of operation.
*
* @return integer
*/
public function getOffset()
{
return ftell($this->_fd);
}
/**
* Sets the point of operation, ie the cursor offset value. The offset can
* also be set to a negative value when it is interpreted as an offset from
* the end of the file instead of the beginning.
*
* @param integer $offset The new point of operation.
* @return void
*/
public function setOffset($offset)
{
fseek($this->_fd, $offset < 0 ? $this->_size + $offset : $offset);
}
/**
* Returns the file size in bytes.
*
* @return integer
*/
public function getSize() { return $this->_size; }
/**
* Magic function so that $obj->value will work.
*
* @param string $name The field name.
* @return mixed
*/
public function __get($name)
{
if (method_exists($this, "get" . ucfirst(strtolower($name))))
return call_user_func(array($this, "get" . ucfirst(strtolower($name))));
else throw new Reader_Exception("Unknown field: " . $name);
}
/**
* Magic function so that assignments with $obj->value will work.
*
* @param string $name The field name.
* @param string $value The field value.
* @return mixed
*/
public function __set($name, $value)
{
if (method_exists($this, "set" . ucfirst(strtolower($name))))
call_user_func
(array($this, "set" . ucfirst(strtolower($name))), $value);
else throw new Reader_Exception("Unknown field: " . $name);
}
/**
* Magic function to delegate the call to helper methods of
* <var>Transform</var> class to transform read data in another format.
*
* The read data length is determined from the helper method name. For methods
* where arbitrary data lengths are accepted a parameter can be used to
* specify the length.
*
* @param string $method The method to be called.
* @param string $params The parameters should the function accept them.
* @return mixed
* @throws Reader_Exception if no such transformer is implemented
*/
public function __call($method, $params)
{
$chunks = array();
// To keep compatibility with PHP 5.0.0 we use a static array instead of
// method_exists to check if a method of the Transform class can be called.
static $methods = array(
"isLittleEndian", "isBigEndian", "toInt64LE", "fromInt64LE", "toInt64BE",
"fromInt64BE", "toInt32", "fromInt32", "toInt32LE", "fromInt32LE",
"toInt32BE", "fromInt32BE", "toUInt32LE", "fromUInt32LE", "toUInt32BE",
"fromUInt32BE", "toInt16", "fromInt16", "toInt16LE", "fromInt16LE",
"toInt16BE", "fromInt16BE", "toUInt16LE", "fromUInt16LE", "toUInt16BE",
"fromUInt16BE", "toInt8", "fromInt8", "toUInt8", "fromUInt8", "toFloat",
"fromFloat", "toFloatLE", "fromFloatLE", "toFloatBE", "fromFloatBE",
"toString8", "fromString8", "toString16", "fromString16", "toString16LE",
"fromString16LE", "toString16BE", "fromString16BE", "toHHex", "fromHHex",
"toLHex", "fromLHex", "toGUID", "fromGUID"
);
if (preg_match
("/read([a-z]{3,6})?(\d{1,2})?(?:LE|BE)?/i", $method, $chunks) &&
in_array(preg_replace("/^read/", "from", $method), $methods))
return call_user_func
(array("Transform", preg_replace("/^read/", "from", $method)),
$this->read(preg_match("/String|(?:H|L)Hex/", $chunks[1]) ?
(isset($params[0]) ? $params[0] : 1) :
($chunks[1] == "GUID" ? 16 : $chunks[2] / 8)));
else throw new Reader_Exception("Unknown method: " . $method);
}
}

170
src/Zend/Mime/Magic.php Normal file
View File

@@ -0,0 +1,170 @@
<?php
/**
* Zend Framework
*
* LICENSE
*
* This source file is subject to the new BSD license that is bundled
* with this package in the file LICENSE.txt.
* It is also available through the world-wide-web at this URL:
* http://framework.zend.com/license/new-bsd
* If you did not receive a copy of the license and are unable to
* obtain it through the world-wide-web, please send an email
* to license@zend.com so we can send you a copy immediately.
*
* @category Zend
* @package Zend_Mime
* @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id$
*/
/**#@+ @ignore */
require_once 'Zend/Io/FileReader.php';
/**#@-*/
/**
* This class is used to classify the given file using some magic bytes
* characteristic to a particular file type. The classification information can
* be a MIME type or just text describing the file.
*
* This method is slower than determining the type by file suffix but on the
* other hand reduces the risk of fail positives during the test.
*
* The magic file consists of ASCII characters defining the magic numbers for
* different file types. Each row has 4 to 5 columns, empty and commented lines
* (those starting with a hash character) are ignored. Columns are described
* below.
*
* o <b>1</b> -- byte number to begin checking from. '>' indicates a dependency
* upon the previous non-'>' line
* o <b>2</b> -- type of data to match. Can be one of following
* - <i>byte</i> (single character)
* - <i>short</i> (machine-order 16-bit integer)
* - <i>long</i> (machine-order 32-bit integer)
* - <i>string</i> (arbitrary-length string)
* - <i>date</i> (long integer date (seconds since Unix epoch/1970))
* - <i>beshort</i> (big-endian 16-bit integer)
* - <i>belong</i> (big-endian 32-bit integer)
* - <i>bedate</i> (big-endian 32-bit integer date)
* - <i>leshort</i> (little-endian 16-bit integer)
* - <i>lelong</i> (little-endian 32-bit integer)
* - <i>ledate</i> (little-endian 32-bit integer date)
* o <b>3</b> -- contents of data to match
* o <b>4</b> -- file description/MIME type if matched
* o <b>5</b> -- optional MIME encoding if matched and if above was a MIME type
*
* @category Zend
* @package Zend_Mime
* @author Sven Vollbehr <sven@vollbehr.eu>
* @copyright Copyright (c) 2005-2009 Zend Technologies USA Inc. (http://www.zend.com)
* @license http://framework.zend.com/license/new-bsd New BSD License
* @version $Id$
*/
final class Zend_Mime_Magic
{
/** @var string */
private $_magic;
/**
* Reads the magic information from given magic file.
*
* @param string $filename The path to the magic file.
*/
public function __construct($filename)
{
$reader = new Zend_Io_FileReader($filename);
$this->_magic = $reader->read($reader->getSize());
}
/**
* Returns the recognized MIME type/description of the given file. The type
* is determined by the content using magic bytes characteristic for the
* particular file type.
*
* If the type could not be found, the function returns the default value,
* or <var>null</var>.
*
* @param string $filename The file path whose type to determine.
* @param string $default The default value.
* @return string|false
*/
public function getType($filename, $default = null)
{
$reader = new Zend_Io_FileReader($filename);
$parentOffset = 0;
foreach (preg_split('/^/m', $this->_magic) as $line) {
$chunks = array();
if (!preg_match("/^(?P<Dependant>>?)(?P<Byte>\d+)\s+(?P<MatchType" .
">\S+)\s+(?P<MatchData>\S+)(?:\s+(?P<MIMEType>[a-" .
"z]+\/[a-z-0-9]+)?(?:\s+(?P<Description>.+))?)?$/",
$line, $chunks)) {
continue;
}
if ($chunks['Dependant']) {
$reader->setOffset($parentOffset);
$reader->skip($chunks['Byte']);
} else {
$reader->setOffset($parentOffset = $chunks['Byte']);
}
$matchType = strtolower($chunks['MatchType']);
$matchData = preg_replace
(array("/\\\\ /", "/\\\\\\\\/", "/\\\\([0-7]{1,3})/e",
"/\\\\x([0-9A-Fa-f]{1,2})/e", "/0x([0-9A-Fa-f]+)/e"),
array(" ", "\\\\",
"pack(\"H*\", base_convert(\"$1\", 8, 16));",
"pack(\"H*\", \"$1\");", "hexdec(\"$1\");"),
$chunks["MatchData"]);
switch ($matchType) {
case 'byte': // single character
$data = $reader->readInt8();
break;
case 'short': // machine-order 16-bit integer
$data = $reader->readInt16();
break;
case 'long': // machine-order 32-bit integer
$data = $reader->readInt32();
break;
case 'string': // arbitrary-length string
$data = $reader->readString8(strlen($matchData));
break;
case 'date': // long integer date (seconds since Unix epoch)
$data = $reader->readInt64BE();
break;
case 'beshort': // big-endian 16-bit integer
$data = $reader->readUInt16BE();
break;
case 'belong': // big-endian 32-bit integer
// break intentionally omitted
case 'bedate': // big-endian 32-bit integer date
$data = $reader->readUInt32BE();
break;
case 'leshort': // little-endian 16-bit integer
$data = $reader->readUInt16LE();
break;
case 'lelong': // little-endian 32-bit integer
// break intentionally omitted
case 'ledate': // little-endian 32-bit integer date
$data = $reader->readUInt32LE();
break;
default:
$data = null;
break;
}
if (strcmp($data, $matchData) == 0) {
if (!empty($chunks['MIMEType'])) {
return $chunks['MIMEType'];
}
if (!empty($chunks['Description'])) {
return $chunks['Description'];
}
}
}
return $default;
}
}