diff --git a/docs/chs/README.md b/docs/chs/README.md index acc0c02..f33f8bd 100644 --- a/docs/chs/README.md +++ b/docs/chs/README.md @@ -503,7 +503,7 @@ $beanbun->afterDownloadPage = function($beanbun) { ``` php // 输出下载页面后发现的所有链接,不加入队列 $beanbun->discoverUrl = function($beanbun) { - $urls = Helper::getUrlbyHtml($beanbun->page, $beanbun->url); + $urls = Helper::getUrlByHtml($beanbun->page, $beanbun->url); print_r($urls); }; ``` @@ -727,7 +727,7 @@ $beanbun->beforDownloadPage = function($beanbun) { ``` php // 取出页面中所有的 url 加入队列,均以 POST 来请求 $beanbun->discoverUrl = function ($beanbun) { - $urls = Helper::getUrlbyHtml($beanbun->page, $beanbun->url); + $urls = Helper::getUrlByHtml($beanbun->page, $beanbun->url); foreach ($urls as $url) { $beanbun->queue()->add($url, [ 'method' => 'POST' @@ -913,7 +913,7 @@ Helper 类中定义了一些辅助方法,帮助用户更方便的爬取网页 ### 静态方法 -#### getUrlbyHtml +#### getUrlByHtml

返回网页中的完整链接。接受两个参数,第一个参数为网页 html,第二个参数为网页的 url

@@ -931,7 +931,7 @@ $html =<< STR; -$urls = Helper::getUrlbyHtml($html, $url); +$urls = Helper::getUrlByHtml($html, $url); print_r($urls); // Array diff --git a/src/Beanbun.php b/src/Beanbun.php index feb09f3..0f0b4d9 100644 --- a/src/Beanbun.php +++ b/src/Beanbun.php @@ -1,8 +1,8 @@ commands = $argv; $this->name = isset($config['name']) - ? $config['name'] - : current(explode('.', $this->commands[0])); + ? $config['name'] + : current(explode('.', $this->commands[0])); $this->logFile = isset($config['logFile']) ? $config['logFile'] : __DIR__ . '/' . $this->name . '_access.log'; $this->setQueue(); $this->setDownloader(); @@ -162,17 +162,17 @@ public function check() $error = false; $text = ''; $version_ok = $pcntl_loaded = $posix_loaded = true; - if(!version_compare(phpversion(), "5.3.3", ">=")) { + if (!version_compare(phpversion(), "5.3.3", ">=")) { $text .= "PHP Version >= 5.3.3 \033[31;40m [fail] \033[0m\n"; $error = true; } - if(!in_array("pcntl", get_loaded_extensions())) { + if (!in_array("pcntl", get_loaded_extensions())) { $text .= "Extension posix check \033[31;40m [fail] \033[0m\n"; $error = true; } - if(!in_array("posix", get_loaded_extensions())) { + if (!in_array("posix", get_loaded_extensions())) { $text .= "Extension posix check \033[31;40m [fail] \033[0m\n"; $error = true; } @@ -183,12 +183,12 @@ public function check() "pcntl_signal_dispatch", ); - if($disable_func_string = ini_get("disable_functions")) { + if ($disable_func_string = ini_get("disable_functions")) { $disable_func_map = array_flip(explode(",", $disable_func_string)); } - foreach($check_func_map as $func) { - if(isset($disable_func_map[$func])) { + foreach ($check_func_map as $func) { + if (isset($disable_func_map[$func])) { $text .= "\033[31;40mFunction " . implode(', ', $check_func_map) . "may be disabled. Please check disable_functions in php.ini\033[0m\n"; $error = true; break; @@ -203,7 +203,7 @@ public function check() public function initHooks() { - $this->startWorkerHooks[] = function($beanbun) { + $this->startWorkerHooks[] = function ($beanbun) { $beanbun->id = $beanbun->worker->id; $beanbun->log("Beanbun worker {$beanbun->id} is starting ..."); }; @@ -212,7 +212,7 @@ public function initHooks() $this->startWorkerHooks[] = $this->startWorker; } - $this->startWorkerHooks[] = function($beanbun) { + $this->startWorkerHooks[] = function ($beanbun) { $beanbun->queue()->maxQueueSize = $beanbun->max; $beanbun->timer_id = Beanbun::timer($beanbun->interval, [$beanbun, 'crawler']); }; @@ -244,7 +244,7 @@ public function initHooks() } if ($this->daemonize) { - $this->afterDiscoverHooks[] = function($beanbun) { + $this->afterDiscoverHooks[] = function ($beanbun) { $beanbun->queue()->queued($beanbun->queue); }; } @@ -261,7 +261,7 @@ public function initHooks() // 爬虫进程 public function onWorkerStart($worker) { - foreach($this->startWorkerHooks as $hook) { + foreach ($this->startWorkerHooks as $hook) { call_user_func($hook, $this); } } @@ -277,8 +277,7 @@ public function queue() public function setQueue($callback = null, $args = [ 'host' => '127.0.0.1', 'port' => '2207', - ]) - { + ]) { if ($callback === 'memory' || $callback === null) { $this->queueFactory = function ($args) { return new \Beanbun\Queue\MemoryQueue($args); @@ -290,7 +289,7 @@ public function setQueue($callback = null, $args = [ } else { $this->queueFactory = $callback; } - + $this->queueArgs = $args; } @@ -322,10 +321,10 @@ public function log($msg) public function setLog($callback = null) { $this->logFactory = $callback === null - ? function ($msg, $beanbun) { - echo date('Y-m-d H:i:s') . " {$beanbun->name} : $msg\n"; - } - : $callback; + ? function ($msg, $beanbun) { + echo date('Y-m-d H:i:s') . " {$beanbun->name} : $msg\n"; + } + : $callback; } public function error($msg = null) @@ -340,8 +339,8 @@ public function crawler() array_shift($allHooks); array_pop($allHooks); - foreach($allHooks as $hooks) { - foreach($this->$hooks as $hook) { + foreach ($allHooks as $hooks) { + foreach ($this->$hooks as $hook) { call_user_func($hook, $this); } } @@ -364,7 +363,7 @@ public function crawler() public function onWorkerStop($worker) { - foreach($this->stopWorkerHooks as $hook) { + foreach ($this->stopWorkerHooks as $hook) { call_user_func($hook, $this); } } @@ -407,7 +406,7 @@ public function defaultBeforeDownloadPage() if (!is_array($queue)) { $this->queue = $queue = [ 'url' => $queue, - 'options' => [] + 'options' => [], ]; } @@ -471,6 +470,6 @@ public function middleware($middleware, $action = 'handle') $middleware->$action($this); } else { call_user_func($middleware, $this); - } + } } } diff --git a/src/Lib/BloomFilter.php b/src/Lib/BloomFilter.php index f417a97..312b34d 100644 --- a/src/Lib/BloomFilter.php +++ b/src/Lib/BloomFilter.php @@ -1,165 +1,180 @@ bitField = $this->initializeBitFieldOfLength($m); - $this->m = (int)$m; - $this->k = (int)$k; - } - - /** - * Calculates the optimal number of k given m and a - * typical number of items to be stored. - * - * @param int $m Size of the bit field - * @param int $n Typical number of items to insert - * @return int Optimal number for k - */ - public static function getK($m, $n) { - return ceil(($m / $n) * log(2)); - } - - /** - * Returns an instance based on the bit field size and expected number of stored items. - * Automates the calculation of k. - * - * @param int $m Bit field size - * @param int $n Expected number of stored values - * @return BloomFilter - */ - public static function constructForTypicalSize($m, $n) { - return new self($m, self::getK($m, $n)); - } - - /** - * Unserializes in instance from an ASCII safe string representation produced by __toString. - * - * @param string $string String representation - * @return BloomFilter Unserialized instance - */ - public static function unserializeFromStringRepresentation($string) { - if (!preg_match('~k:(?P\d+)/m:(?P\d+)\((?P[0-9a-zA-Z+/=]+)\)~', $string, $matches)) { - throw new InvalidArgumentException('Invalid string representation'); - } - $bf = new self((int)$matches['m'], (int)$matches['k']); - $bf->bitField = base64_decode($matches['bitfield']); - return $bf; - } - protected function initializeBitFieldOfLength($length) { - return str_repeat("\x00", ceil($length / 8)); - } - - protected function setBitAtPosition($pos) { - list($char, $byte) = $this->position2CharAndByte($pos); - $this->bitField[$char] = $this->bitField[$char] | $byte; - } - - protected function getBitAtPosition($pos) { - list($char, $byte) = $this->position2CharAndByte($pos); - return ($this->bitField[$char] & $byte) === $byte; - } - - /** - * Returns a tuple with the char offset into the bitfield string - * in index 0 and a bitmask for the specific position in index 1. - * E.g.: Position 9 -> (1, "10000000") (2nd byte, "first" bit) - * - * @param int $pos The $pos'th bit in the bit field. - * @return array array(int $charOffset, string $bitmask) - */ - protected function position2CharAndByte($pos) { - if ($pos > $this->m) { - throw new InvalidArgumentException("\$pos of $pos beyond bitfield length of $this->m"); - } - static $positionMap = array( - 8 => "\x01", - 7 => "\x02", - 6 => "\x04", - 5 => "\x08", - 4 => "\x10", - 3 => "\x20", - 2 => "\x40", - 1 => "\x80" - ); - - $char = (int)ceil($pos / 8) - 1; - $byte = $positionMap[$pos % 8 ?: 8]; - return array($char, $byte); - } - - /** - * Calculates the positions a value hashes to in the bitfield. - * - * @param string $value The value to insert into the bitfield. - * @return SplFixedArray Array containing the numeric positions in the bitfield. - */ - protected function positions($value) { - mt_srand(crc32($value)); - $positions = new SplFixedArray($this->k); - for ($i = 0; $i < $this->k; $i++) { - $positions[$i] = mt_rand(1, $this->m); - } - return $positions; - } - - /** - * Add a value into the set. - * - * @param string $value - */ - public function add($value) { - foreach ($this->positions($value) as $position) { - $this->setBitAtPosition($position); - } - } - - /** - * Checks if the value may have been added to the set before. - * False positives are possible, false negatives are not. - * - * @param string $value - * @return boolean - */ - public function maybeInSet($value) { - foreach ($this->positions($value) as $position) { - if (!$this->getBitAtPosition($position)) { - return false; - } - } - return true; - } - - /** - * Returns an ASCII representation of the current bit field. - * - * @return string - */ - public function showBitField() { - return join(array_map(function ($chr) { return str_pad(base_convert(bin2hex($chr), 16, 2), 8, '0', STR_PAD_LEFT); }, str_split($this->bitField))); - } - - /** - * Returns an ASCII safe representation of the BloomFilter object. - * This representation can be unserialized using unserializeFromStringRepresentation(). - * - * @return string - */ - public function __toString() { - return "k:$this->k/m:$this->m(" . base64_encode($this->bitField) . ')'; - } - -} \ No newline at end of file +class BloomFilter +{ + protected $bitField = ''; + protected $m; + protected $k; + + /** + * @param int $m Size of the bit field. Actual memory used will be $m/8 bytes. + * @param int $k Number of hash functions + */ + public function __construct($m, $k) + { + if (!is_numeric($m) || !is_numeric($k)) { + throw new InvalidArgumentException('$m and $k must be integers'); + } + $this->bitField = $this->initializeBitFieldOfLength($m); + $this->m = (int) $m; + $this->k = (int) $k; + } + + /** + * Calculates the optimal number of k given m and a + * typical number of items to be stored. + * + * @param int $m Size of the bit field + * @param int $n Typical number of items to insert + * @return int Optimal number for k + */ + public static function getK($m, $n) + { + return ceil(($m / $n) * log(2)); + } + + /** + * Returns an instance based on the bit field size and expected number of stored items. + * Automates the calculation of k. + * + * @param int $m Bit field size + * @param int $n Expected number of stored values + * @return BloomFilter + */ + public static function constructForTypicalSize($m, $n) + { + return new self($m, self::getK($m, $n)); + } + + /** + * Unserializes in instance from an ASCII safe string representation produced by __toString. + * + * @param string $string String representation + * @return BloomFilter Unserialized instance + */ + public static function unserializeFromStringRepresentation($string) + { + if (!preg_match('~k:(?P\d+)/m:(?P\d+)\((?P[0-9a-zA-Z+/=]+)\)~', $string, $matches)) { + throw new InvalidArgumentException('Invalid string representation'); + } + $bf = new self((int) $matches['m'], (int) $matches['k']); + $bf->bitField = base64_decode($matches['bitfield']); + return $bf; + } + protected function initializeBitFieldOfLength($length) + { + return str_repeat("\x00", ceil($length / 8)); + } + + protected function setBitAtPosition($pos) + { + list($char, $byte) = $this->position2CharAndByte($pos); + $this->bitField[$char] = $this->bitField[$char] | $byte; + } + + protected function getBitAtPosition($pos) + { + list($char, $byte) = $this->position2CharAndByte($pos); + return ($this->bitField[$char] & $byte) === $byte; + } + + /** + * Returns a tuple with the char offset into the bitfield string + * in index 0 and a bitmask for the specific position in index 1. + * E.g.: Position 9 -> (1, "10000000") (2nd byte, "first" bit) + * + * @param int $pos The $pos'th bit in the bit field. + * @return array array(int $charOffset, string $bitmask) + */ + protected function position2CharAndByte($pos) + { + if ($pos > $this->m) { + throw new InvalidArgumentException("\$pos of $pos beyond bitfield length of $this->m"); + } + static $positionMap = array( + 8 => "\x01", + 7 => "\x02", + 6 => "\x04", + 5 => "\x08", + 4 => "\x10", + 3 => "\x20", + 2 => "\x40", + 1 => "\x80", + ); + + $char = (int) ceil($pos / 8) - 1; + $byte = $positionMap[$pos % 8 ?: 8]; + return array($char, $byte); + } + + /** + * Calculates the positions a value hashes to in the bitfield. + * + * @param string $value The value to insert into the bitfield. + * @return SplFixedArray Array containing the numeric positions in the bitfield. + */ + protected function positions($value) + { + mt_srand(crc32($value)); + $positions = new SplFixedArray($this->k); + for ($i = 0; $i < $this->k; $i++) { + $positions[$i] = mt_rand(1, $this->m); + } + return $positions; + } + + /** + * Add a value into the set. + * + * @param string $value + */ + public function add($value) + { + foreach ($this->positions($value) as $position) { + $this->setBitAtPosition($position); + } + } + + /** + * Checks if the value may have been added to the set before. + * False positives are possible, false negatives are not. + * + * @param string $value + * @return boolean + */ + public function maybeInSet($value) + { + foreach ($this->positions($value) as $position) { + if (!$this->getBitAtPosition($position)) { + return false; + } + } + return true; + } + + /** + * Returns an ASCII representation of the current bit field. + * + * @return string + */ + public function showBitField() + { + return join(array_map(function ($chr) {return str_pad(base_convert(bin2hex($chr), 16, 2), 8, '0', STR_PAD_LEFT);}, str_split($this->bitField))); + } + + /** + * Returns an ASCII safe representation of the BloomFilter object. + * This representation can be unserialized using unserializeFromStringRepresentation(). + * + * @return string + */ + public function __toString() + { + return "k:$this->k/m:$this->m(" . base64_encode($this->bitField) . ')'; + } + +} diff --git a/src/Lib/DbConnection.php b/src/Lib/DbConnection.php index 3ce3c48..404c91b 100644 --- a/src/Lib/DbConnection.php +++ b/src/Lib/DbConnection.php @@ -1,9 +1,9 @@ database_type = strtolower($options['database_type']); } - } - else - { + } else { return false; } - if (isset($options['prefix'])) - { + if (isset($options['prefix'])) { $this->prefix = $options['prefix']; } - if (isset($options['option'])) - { + if (isset($options['option'])) { $this->option = $options['option']; } - if (isset($options['command']) && is_array($options['command'])) - { + if (isset($options['command']) && is_array($options['command'])) { $commands = $options['command']; - } - else - { + } else { $commands = []; } - if (isset($options['dsn'])) - { - if (isset($options['dsn']['driver'])) - { + if (isset($options['dsn'])) { + if (isset($options['dsn']['driver'])) { $attr = $options['dsn']; - } - else - { + } else { return false; } - } - else - { + } else { if ( isset($options['port']) && is_int($options['port'] * 1) - ) - { + ) { $port = $options['port']; } $is_port = isset($port); - switch ($this->database_type) - { + switch ($this->database_type) { case 'mariadb': case 'mysql': $attr = [ 'driver' => 'mysql', - 'dbname' => $options['database_name'] + 'dbname' => $options['database_name'], ]; - if (isset($options['socket'])) - { + if (isset($options['socket'])) { $attr['unix_socket'] = $options['socket']; - } - else - { + } else { $attr['host'] = $options['server']; - if ($is_port) - { + if ($is_port) { $attr['port'] = $port; } } @@ -97,10 +76,9 @@ public function __construct($options = null) $attr = [ 'driver' => 'pgsql', 'host' => $options['server'], - 'dbname' => $options['database_name'] + 'dbname' => $options['database_name'], ]; - if ($is_port) - { + if ($is_port) { $attr['port'] = $port; } break; @@ -108,10 +86,9 @@ public function __construct($options = null) $attr = [ 'driver' => 'dblib', 'host' => $options['server'], - 'dbname' => $options['database_name'] + 'dbname' => $options['database_name'], ]; - if ($is_port) - { + if ($is_port) { $attr['port'] = $port; } break; @@ -119,33 +96,28 @@ public function __construct($options = null) $attr = [ 'driver' => 'oci', 'dbname' => $options['server'] ? - '//' . $options['server'] . ($is_port ? ':' . $port : ':1521') . '/' . $options['database_name'] : - $options['database_name'] + '//' . $options['server'] . ($is_port ? ':' . $port : ':1521') . '/' . $options['database_name'] : + $options['database_name'], ]; - if (isset($options['charset'])) - { + if (isset($options['charset'])) { $attr['charset'] = $options['charset']; } break; case 'mssql': - if (strstr(PHP_OS, 'WIN')) - { + if (strstr(PHP_OS, 'WIN')) { $attr = [ 'driver' => 'sqlsrv', 'server' => $options['server'], - 'database' => $options['database_name'] + 'database' => $options['database_name'], ]; - } - else - { + } else { $attr = [ 'driver' => 'dblib', 'host' => $options['server'], - 'dbname' => $options['database_name'] + 'dbname' => $options['database_name'], ]; } - if ($is_port) - { + if ($is_port) { $attr['port'] = $port; } // Keep MSSQL QUOTED_IDENTIFIER is ON for standard quoting @@ -161,14 +133,10 @@ public function __construct($options = null) $driver = $attr['driver']; unset($attr['driver']); $stack = []; - foreach ($attr as $key => $value) - { - if (is_int($key)) - { + foreach ($attr as $key => $value) { + if (is_int($key)) { $stack[] = $value; - } - else - { + } else { $stack[] = $key . '=' . $value; } } @@ -176,8 +144,7 @@ public function __construct($options = null) if ( in_array($this->database_type, ['mariadb', 'mysql', 'pgsql', 'sybase', 'mssql']) && $options['charset'] - ) - { + ) { $commands[] = "SET NAMES '" . $options['charset'] . "'"; } $this->pdo = new PDO( @@ -186,12 +153,10 @@ public function __construct($options = null) $options['password'], $this->option ); - foreach ($commands as $value) - { + foreach ($commands as $value) { $this->pdo->exec($value); } - } - catch (PDOException $e) { + } catch (PDOException $e) { throw new Exception($e->getMessage()); } } @@ -201,8 +166,7 @@ public function closeConnection() } public function query($query) { - if ($this->debug_mode) - { + if ($this->debug_mode) { echo $query; $this->debug_mode = false; return false; @@ -212,8 +176,7 @@ public function query($query) } public function exec($query) { - if ($this->debug_mode) - { + if ($this->debug_mode) { echo $query; $this->debug_mode = false; return false; @@ -232,40 +195,30 @@ protected function tableQuote($table) protected function columnQuote($string) { preg_match('/(\(JSON\)\s*|^#)?([a-zA-Z0-9_]*)\.([a-zA-Z0-9_]*)/', $string, $column_match); - if (isset($column_match[ 2 ], $column_match[ 3 ])) - { - return '"' . $this->prefix . $column_match[ 2 ] . '"."' . $column_match[ 3 ] . '"'; + if (isset($column_match[2], $column_match[3])) { + return '"' . $this->prefix . $column_match[2] . '"."' . $column_match[3] . '"'; } return '"' . $string . '"'; } protected function columnPush(&$columns) { - if ($columns == '*') - { + if ($columns == '*') { return $columns; } - if (is_string($columns)) - { + if (is_string($columns)) { $columns = [$columns]; } $stack = []; - foreach ($columns as $key => $value) - { - if (is_array($value)) - { + foreach ($columns as $key => $value) { + if (is_array($value)) { $stack[] = $this->columnPush($value); - } - else - { + } else { preg_match('/([a-zA-Z0-9_\-\.]*)\s*\(([a-zA-Z0-9_\-]*)\)/i', $value, $match); - if (isset($match[ 1 ], $match[ 2 ])) - { - $stack[] = $this->columnQuote( $match[ 1 ] ) . ' AS ' . $this->columnQuote( $match[ 2 ] ); - $columns[ $key ] = $match[ 2 ]; - } - else - { - $stack[] = $this->columnQuote( $value ); + if (isset($match[1], $match[2])) { + $stack[] = $this->columnQuote($match[1]) . ' AS ' . $this->columnQuote($match[2]); + $columns[$key] = $match[2]; + } else { + $stack[] = $this->columnQuote($value); } } } @@ -274,8 +227,7 @@ protected function columnPush(&$columns) protected function arrayQuote($array) { $temp = []; - foreach ($array as $value) - { + foreach ($array as $value) { $temp[] = is_int($value) ? $value : $this->pdo->quote($value); } return implode($temp, ','); @@ -283,8 +235,7 @@ protected function arrayQuote($array) protected function innerConjunct($data, $conjunctor, $outer_conjunctor) { $haystack = []; - foreach ($data as $value) - { + foreach ($data as $value) { $haystack[] = '(' . $this->dataImplode($value, $conjunctor) . ')'; } return implode($outer_conjunctor . ' ', $haystack); @@ -292,46 +243,36 @@ protected function innerConjunct($data, $conjunctor, $outer_conjunctor) protected function fnQuote($column, $string) { return (strpos($column, '#') === 0 && preg_match('/^[A-Z0-9\_]*\([^)]*\)$/', $string)) ? - $string : - $this->quote($string); + $string : + $this->quote($string); } protected function dataImplode($data, $conjunctor, $outer_conjunctor = null) { $wheres = []; - foreach ($data as $key => $value) - { + foreach ($data as $key => $value) { $type = gettype($value); if ( preg_match("/^(AND|OR)(\s+#.*)?$/i", $key, $relation_match) && $type == 'array' - ) - { + ) { $wheres[] = 0 !== count(array_diff_key($value, array_keys(array_keys($value)))) ? - '(' . $this->dataImplode($value, ' ' . $relation_match[ 1 ]) . ')' : - '(' . $this->innerConjunct($value, ' ' . $relation_match[ 1 ], $conjunctor) . ')'; - } - else - { + '(' . $this->dataImplode($value, ' ' . $relation_match[1]) . ')' : + '(' . $this->innerConjunct($value, ' ' . $relation_match[1], $conjunctor) . ')'; + } else { if ( is_int($key) && preg_match('/([\w\.\-]+)\[(\>|\>\=|\<|\<\=|\!|\=)\]([\w\.\-]+)/i', $value, $match) - ) - { - $operator = $match[ 2 ]; - - $wheres[] = $this->columnQuote($match[ 1 ]) . ' ' . $operator . ' ' . $this->columnQuote($match[ 3 ]); - } - else - { + ) { + $operator = $match[2]; + + $wheres[] = $this->columnQuote($match[1]) . ' ' . $operator . ' ' . $this->columnQuote($match[3]); + } else { preg_match('/(#?)([\w\.\-]+)(\[(\>|\>\=|\<|\<\=|\!|\<\>|\>\<|\!?~)\])?/i', $key, $match); - $column = $this->columnQuote($match[ 2 ]); - if (isset($match[ 4 ])) - { - $operator = $match[ 4 ]; - if ($operator == '!') - { - switch ($type) - { + $column = $this->columnQuote($match[2]); + if (isset($match[4])) { + $operator = $match[4]; + if ($operator == '!') { + switch ($type) { case 'NULL': $wheres[] = $column . ' IS NOT NULL'; break; @@ -350,74 +291,53 @@ protected function dataImplode($data, $conjunctor, $outer_conjunctor = null) break; } } - if ($operator == '<>' || $operator == '><') - { - if ($type == 'array') - { - if ($operator == '><') - { + if ($operator == '<>' || $operator == '><') { + if ($type == 'array') { + if ($operator == '><') { $column .= ' NOT'; } - if (is_numeric($value[ 0 ]) && is_numeric($value[ 1 ])) - { - $wheres[] = '(' . $column . ' BETWEEN ' . $value[ 0 ] . ' AND ' . $value[ 1 ] . ')'; - } - else - { - $wheres[] = '(' . $column . ' BETWEEN ' . $this->quote($value[ 0 ]) . ' AND ' . $this->quote($value[ 1 ]) . ')'; + if (is_numeric($value[0]) && is_numeric($value[1])) { + $wheres[] = '(' . $column . ' BETWEEN ' . $value[0] . ' AND ' . $value[1] . ')'; + } else { + $wheres[] = '(' . $column . ' BETWEEN ' . $this->quote($value[0]) . ' AND ' . $this->quote($value[1]) . ')'; } } } - if ($operator == '~' || $operator == '!~') - { - if ($type != 'array') - { + if ($operator == '~' || $operator == '!~') { + if ($type != 'array') { $value = [$value]; } $connector = ' OR '; $stack = array_values($value); - if (is_array($stack[0])) - { - if (isset($value['AND']) || isset($value['OR'])) - { + if (is_array($stack[0])) { + if (isset($value['AND']) || isset($value['OR'])) { $connector = ' ' . array_keys($value)[0] . ' '; $value = $stack[0]; } } $like_clauses = []; - foreach ($value as $item) - { + foreach ($value as $item) { $item = strval($item); - if (!preg_match('/(\[.+\]|_|%.+|.+%)/', $item)) - { + if (!preg_match('/(\[.+\]|_|%.+|.+%)/', $item)) { $item = '%' . $item . '%'; } $like_clauses[] = $column . ($operator === '!~' ? ' NOT' : '') . ' LIKE ' . $this->fnQuote($key, $item); } $wheres[] = '(' . implode($connector, $like_clauses) . ')'; } - if (in_array($operator, ['>', '>=', '<', '<='])) - { + if (in_array($operator, ['>', '>=', '<', '<='])) { $condition = $column . ' ' . $operator . ' '; - if (is_numeric($value)) - { + if (is_numeric($value)) { $condition .= $value; - } - elseif (strpos($key, '#') === 0) - { + } elseif (strpos($key, '#') === 0) { $condition .= $this->fnQuote($key, $value); - } - else - { + } else { $condition .= $this->quote($value); } $wheres[] = $condition; } - } - else - { - switch ($type) - { + } else { + switch ($type) { case 'NULL': $wheres[] = $column . ' IS NULL'; break; @@ -444,106 +364,78 @@ protected function dataImplode($data, $conjunctor, $outer_conjunctor = null) protected function whereClause($where) { $where_clause = ''; - if (is_array($where)) - { + if (is_array($where)) { $where_keys = array_keys($where); $where_AND = preg_grep("/^AND\s*#?$/i", $where_keys); $where_OR = preg_grep("/^OR\s*#?$/i", $where_keys); $single_condition = array_diff_key($where, array_flip( ['AND', 'OR', 'GROUP', 'ORDER', 'HAVING', 'LIMIT', 'LIKE', 'MATCH'] )); - if ($single_condition != []) - { + if ($single_condition != []) { $condition = $this->dataImplode($single_condition, ' AND'); - if ($condition != '') - { + if ($condition != '') { $where_clause = ' WHERE ' . $condition; } } - if (!empty($where_AND)) - { + if (!empty($where_AND)) { $value = array_values($where_AND); - $where_clause = ' WHERE ' . $this->dataImplode($where[ $value[ 0 ] ], ' AND'); + $where_clause = ' WHERE ' . $this->dataImplode($where[$value[0]], ' AND'); } - if (!empty($where_OR)) - { + if (!empty($where_OR)) { $value = array_values($where_OR); - $where_clause = ' WHERE ' . $this->dataImplode($where[ $value[ 0 ] ], ' OR'); + $where_clause = ' WHERE ' . $this->dataImplode($where[$value[0]], ' OR'); } - if (isset($where[ 'MATCH' ])) - { - $MATCH = $where[ 'MATCH' ]; - if (is_array($MATCH) && isset($MATCH[ 'columns' ], $MATCH[ 'keyword' ])) - { - $columns = str_replace('.', '"."', implode($MATCH[ 'columns' ], '", "')); - $keywords = $this->quote($MATCH[ 'keyword' ]); + if (isset($where['MATCH'])) { + $MATCH = $where['MATCH']; + if (is_array($MATCH) && isset($MATCH['columns'], $MATCH['keyword'])) { + $columns = str_replace('.', '"."', implode($MATCH['columns'], '", "')); + $keywords = $this->quote($MATCH['keyword']); $where_clause .= ($where_clause != '' ? ' AND ' : ' WHERE ') . ' MATCH ("' . $columns . '") AGAINST (' . $keywords . ')'; } } - if (isset($where[ 'GROUP' ])) - { - $where_clause .= ' GROUP BY ' . $this->columnQuote($where[ 'GROUP' ]); - if (isset($where[ 'HAVING' ])) - { - $where_clause .= ' HAVING ' . $this->dataImplode($where[ 'HAVING' ], ' AND'); + if (isset($where['GROUP'])) { + $where_clause .= ' GROUP BY ' . $this->columnQuote($where['GROUP']); + if (isset($where['HAVING'])) { + $where_clause .= ' HAVING ' . $this->dataImplode($where['HAVING'], ' AND'); } } - if (isset($where[ 'ORDER' ])) - { - $ORDER = $where[ 'ORDER' ]; - if (is_array($ORDER)) - { + if (isset($where['ORDER'])) { + $ORDER = $where['ORDER']; + if (is_array($ORDER)) { $stack = []; - foreach ($ORDER as $column => $value) - { - if (is_array($value)) - { + foreach ($ORDER as $column => $value) { + if (is_array($value)) { $stack[] = 'FIELD(' . $this->columnQuote($column) . ', ' . $this->arrayQuote($value) . ')'; - } - else if ($value === 'ASC' || $value === 'DESC') - { + } else if ($value === 'ASC' || $value === 'DESC') { $stack[] = $this->columnQuote($column) . ' ' . $value; - } - else if (is_int($column)) - { + } else if (is_int($column)) { $stack[] = $this->columnQuote($value); } } $where_clause .= ' ORDER BY ' . implode($stack, ','); - } - else - { + } else { $where_clause .= ' ORDER BY ' . $this->columnQuote($ORDER); } } - if (isset($where[ 'LIMIT' ])) - { - $LIMIT = $where[ 'LIMIT' ]; - if (is_numeric($LIMIT)) - { + if (isset($where['LIMIT'])) { + $LIMIT = $where['LIMIT']; + if (is_numeric($LIMIT)) { $where_clause .= ' LIMIT ' . $LIMIT; } if ( is_array($LIMIT) && - is_numeric($LIMIT[ 0 ]) && - is_numeric($LIMIT[ 1 ]) - ) - { - if ($this->database_type === 'pgsql') - { - $where_clause .= ' OFFSET ' . $LIMIT[ 0 ] . ' LIMIT ' . $LIMIT[ 1 ]; - } - else - { - $where_clause .= ' LIMIT ' . $LIMIT[ 0 ] . ',' . $LIMIT[ 1 ]; + is_numeric($LIMIT[0]) && + is_numeric($LIMIT[1]) + ) { + if ($this->database_type === 'pgsql') { + $where_clause .= ' OFFSET ' . $LIMIT[0] . ' LIMIT ' . $LIMIT[1]; + } else { + $where_clause .= ' LIMIT ' . $LIMIT[0] . ',' . $LIMIT[1]; } } } - } - else - { - if ($where != null) - { + } else { + if ($where != null) { $where_clause .= ' ' . $where; } } @@ -552,164 +444,121 @@ protected function whereClause($where) protected function selectContext($table, $join, &$columns = null, $where = null, $column_fn = null) { preg_match('/([a-zA-Z0-9_\-]*)\s*\(([a-zA-Z0-9_\-]*)\)/i', $table, $table_match); - if (isset($table_match[ 1 ], $table_match[ 2 ])) - { - $table = $this->tableQuote($table_match[ 1 ]); - $table_query = $table . ' AS ' . $this->tableQuote($table_match[ 2 ]); - } - else - { + if (isset($table_match[1], $table_match[2])) { + $table = $this->tableQuote($table_match[1]); + $table_query = $table . ' AS ' . $this->tableQuote($table_match[2]); + } else { $table = $this->tableQuote($table); $table_query = $table; } $join_key = is_array($join) ? array_keys($join) : null; if ( - isset($join_key[ 0 ]) && - strpos($join_key[ 0 ], '[') === 0 - ) - { + isset($join_key[0]) && + strpos($join_key[0], '[') === 0 + ) { $table_join = []; $join_array = [ '>' => 'LEFT', '<' => 'RIGHT', '<>' => 'FULL', - '><' => 'INNER' + '><' => 'INNER', ]; - foreach($join as $sub_table => $relation) - { + foreach ($join as $sub_table => $relation) { preg_match('/(\[(\<|\>|\>\<|\<\>)\])?([a-zA-Z0-9_\-]*)\s?(\(([a-zA-Z0-9_\-]*)\))?/', $sub_table, $match); - if ($match[ 2 ] != '' && $match[ 3 ] != '') - { - if (is_string($relation)) - { + if ($match[2] != '' && $match[3] != '') { + if (is_string($relation)) { $relation = 'USING ("' . $relation . '")'; } - if (is_array($relation)) - { + if (is_array($relation)) { // For ['column1', 'column2'] - if (isset($relation[ 0 ])) - { + if (isset($relation[0])) { $relation = 'USING ("' . implode($relation, '", "') . '")'; - } - else - { + } else { $joins = []; - foreach ($relation as $key => $value) - { + foreach ($relation as $key => $value) { $joins[] = ( strpos($key, '.') > 0 ? - // For ['tableB.column' => 'column'] - $this->columnQuote($key) : - // For ['column1' => 'column2'] - $table . '."' . $key . '"' + // For ['tableB.column' => 'column'] + $this->columnQuote($key) : + // For ['column1' => 'column2'] + $table . '."' . $key . '"' ) . ' = ' . - $this->tableQuote(isset($match[ 5 ]) ? $match[ 5 ] : $match[ 3 ]) . '."' . $value . '"'; + $this->tableQuote(isset($match[5]) ? $match[5] : $match[3]) . '."' . $value . '"'; } $relation = 'ON ' . implode($joins, ' AND '); } } - $table_name = $this->tableQuote($match[ 3 ]) . ' '; - if (isset($match[ 5 ])) - { - $table_name .= 'AS ' . $this->tableQuote($match[ 5 ]) . ' '; + $table_name = $this->tableQuote($match[3]) . ' '; + if (isset($match[5])) { + $table_name .= 'AS ' . $this->tableQuote($match[5]) . ' '; } - $table_join[] = $join_array[ $match[ 2 ] ] . ' JOIN ' . $table_name . $relation; + $table_join[] = $join_array[$match[2]] . ' JOIN ' . $table_name . $relation; } } $table_query .= ' ' . implode($table_join, ' '); - } - else - { - if (is_null($columns)) - { - if (is_null($where)) - { + } else { + if (is_null($columns)) { + if (is_null($where)) { if ( is_array($join) && isset($column_fn) - ) - { + ) { $where = $join; $columns = null; - } - else - { + } else { $where = null; $columns = $join; } - } - else - { + } else { $where = $join; $columns = null; } - } - else - { + } else { $where = $columns; $columns = $join; } } - if (isset($column_fn)) - { - if ($column_fn == 1) - { + if (isset($column_fn)) { + if ($column_fn == 1) { $column = '1'; - if (is_null($where)) - { + if (is_null($where)) { $where = $columns; } - } - else - { - if (empty($columns)) - { + } else { + if (empty($columns)) { $columns = '*'; $where = $join; } $column = $column_fn . '(' . $this->columnPush($columns) . ')'; } - } - else - { + } else { $column = $this->columnPush($columns); } return 'SELECT ' . $column . ' FROM ' . $table_query . $this->whereClause($where); } protected function dataMap($index, $key, $value, $data, &$stack) { - if (is_array($value)) - { + if (is_array($value)) { $sub_stack = []; - foreach ($value as $sub_key => $sub_value) - { - if (is_array($sub_value)) - { - $current_stack = $stack[ $index ][ $key ]; + foreach ($value as $sub_key => $sub_value) { + if (is_array($sub_value)) { + $current_stack = $stack[$index][$key]; $this->dataMap(false, $sub_key, $sub_value, $data, $current_stack); - $stack[ $index ][ $key ][ $sub_key ] = $current_stack[ 0 ][ $sub_key ]; - } - else - { + $stack[$index][$key][$sub_key] = $current_stack[0][$sub_key]; + } else { $this->dataMap(false, preg_replace('/^[\w]*\./i', "", $sub_value), $sub_key, $data, $sub_stack); - $stack[ $index ][ $key ] = $sub_stack; + $stack[$index][$key] = $sub_stack; } } - } - else - { - if ($index !== false) - { - $stack[ $index ][ $value ] = $data[ $value ]; - } - else - { - if (preg_match('/[a-zA-Z0-9_\-\.]*\s*\(([a-zA-Z0-9_\-]*)\)/i', $key, $key_match)) - { - $key = $key_match[ 1 ]; + } else { + if ($index !== false) { + $stack[$index][$value] = $data[$value]; + } else { + if (preg_match('/[a-zA-Z0-9_\-\.]*\s*\(([a-zA-Z0-9_\-]*)\)/i', $key, $key_match)) { + $key = $key_match[1]; } - $stack[ $key ] = $data[ $key ]; + $stack[$key] = $data[$key]; } } } @@ -717,28 +566,22 @@ public function select($table, $join, $columns = null, $where = null) { $column = $where == null ? $join : $columns; $is_single_column = (is_string($column) && $column !== '*'); - + $query = $this->query($this->selectContext($table, $join, $columns, $where)); $stack = []; $index = 0; - if (!$query) - { + if (!$query) { return false; } - if ($columns === '*') - { + if ($columns === '*') { return $query->fetchAll(PDO::FETCH_ASSOC); } - if ($is_single_column) - { + if ($is_single_column) { return $query->fetchAll(PDO::FETCH_COLUMN); } - while ($row = $query->fetch(PDO::FETCH_ASSOC)) - { - foreach ($columns as $key => $value) - { - if (!is_array($value)) - { + while ($row = $query->fetch(PDO::FETCH_ASSOC)) { + foreach ($columns as $key => $value) { + if (!is_array($value)) { $value = preg_replace('/^[\w]*\./i', "", $value); } $this->dataMap($index, $key, $value, $row, $stack); @@ -753,40 +596,31 @@ public function insert($table, $datas) $columns = []; $fields = []; // Check indexed or associative array - if (!isset($datas[ 0 ])) - { + if (!isset($datas[0])) { $datas = [$datas]; } - foreach ($datas as $data) - { - foreach ($data as $key => $value) - { + foreach ($datas as $data) { + foreach ($data as $key => $value) { $columns[] = $key; } } $columns = array_unique($columns); - foreach ($datas as $data) - { + foreach ($datas as $data) { $values = []; - foreach ($columns as $key) - { - if (!isset($data[$key])) - { + foreach ($columns as $key) { + if (!isset($data[$key])) { $values[] = 'NULL'; - } - else - { + } else { $value = $data[$key]; - switch (gettype($value)) - { + switch (gettype($value)) { case 'NULL': $values[] = 'NULL'; break; case 'array': preg_match("/\(JSON\)\s*([\w]+)/i", $key, $column_match); - $values[] = isset($column_match[ 0 ]) ? - $this->quote(json_encode($value)) : - $this->quote(serialize($value)); + $values[] = isset($column_match[0]) ? + $this->quote(json_encode($value)) : + $this->quote(serialize($value)); break; case 'boolean': $values[] = ($value ? '1' : '0'); @@ -801,8 +635,7 @@ public function insert($table, $datas) } $stack[] = '(' . implode($values, ', ') . ')'; } - foreach ($columns as $key) - { + foreach ($columns as $key) { $fields[] = $this->columnQuote(preg_replace("/^(\(JSON\)\s*|#)/i", "", $key)); } return $this->exec('INSERT INTO ' . $this->tableQuote($table) . ' (' . implode(', ', $fields) . ') VALUES ' . implode(', ', $stack)); @@ -810,29 +643,23 @@ public function insert($table, $datas) public function update($table, $data, $where = null) { $fields = []; - foreach ($data as $key => $value) - { + foreach ($data as $key => $value) { preg_match('/([\w]+)(\[(\+|\-|\*|\/)\])?/i', $key, $match); - if (isset($match[ 3 ])) - { - if (is_numeric($value)) - { - $fields[] = $this->columnQuote($match[ 1 ]) . ' = ' . $this->columnQuote($match[ 1 ]) . ' ' . $match[ 3 ] . ' ' . $value; + if (isset($match[3])) { + if (is_numeric($value)) { + $fields[] = $this->columnQuote($match[1]) . ' = ' . $this->columnQuote($match[1]) . ' ' . $match[3] . ' ' . $value; } - } - else - { + } else { $column = $this->columnQuote(preg_replace("/^(\(JSON\)\s*|#)/i", "", $key)); - switch (gettype($value)) - { + switch (gettype($value)) { case 'NULL': $fields[] = $column . ' = NULL'; break; case 'array': preg_match("/\(JSON\)\s*([\w]+)/i", $key, $column_match); $fields[] = $column . ' = ' . $this->quote( - isset($column_match[ 0 ]) ? json_encode($value) : serialize($value) - ); + isset($column_match[0]) ? json_encode($value) : serialize($value) + ); break; case 'boolean': $fields[] = $column . ' = ' . ($value ? '1' : '0'); @@ -853,33 +680,24 @@ public function delete($table, $where) } public function replace($table, $columns, $search = null, $replace = null, $where = null) { - if (is_array($columns)) - { + if (is_array($columns)) { $replace_query = []; - foreach ($columns as $column => $replacements) - { - foreach ($replacements as $replace_search => $replace_replacement) - { + foreach ($columns as $column => $replacements) { + foreach ($replacements as $replace_search => $replace_replacement) { $replace_query[] = $column . ' = REPLACE(' . $this->columnQuote($column) . ', ' . $this->quote($replace_search) . ', ' . $this->quote($replace_replacement) . ')'; } } $replace_query = implode(', ', $replace_query); $where = $search; - } - else - { - if (is_array($search)) - { + } else { + if (is_array($search)) { $replace_query = []; - foreach ($search as $replace_search => $replace_replacement) - { + foreach ($search as $replace_search => $replace_replacement) { $replace_query[] = $columns . ' = REPLACE(' . $this->columnQuote($columns) . ', ' . $this->quote($replace_search) . ', ' . $this->quote($replace_replacement) . ')'; } $replace_query = implode(', ', $replace_query); $where = $replace; - } - else - { + } else { $replace_query = $columns . ' = REPLACE(' . $this->columnQuote($columns) . ', ' . $this->quote($search) . ', ' . $this->quote($replace) . ')'; } } @@ -890,38 +708,28 @@ public function get($table, $join = null, $columns = null, $where = null) $column = $where == null ? $join : $columns; $is_single_column = (is_string($column) && $column !== '*'); $query = $this->query($this->selectContext($table, $join, $columns, $where) . ' LIMIT 1'); - if ($query) - { + if ($query) { $data = $query->fetchAll(PDO::FETCH_ASSOC); - if (isset($data[ 0 ])) - { - if ($is_single_column) - { - return $data[ 0 ][ preg_replace('/^[\w]*\./i', "", $column) ]; + if (isset($data[0])) { + if ($is_single_column) { + return $data[0][preg_replace('/^[\w]*\./i', "", $column)]; } - - if ($column === '*') - { - return $data[ 0 ]; + + if ($column === '*') { + return $data[0]; } $stack = []; - foreach ($columns as $key => $value) - { - if (!is_array($value)) - { + foreach ($columns as $key => $value) { + if (!is_array($value)) { $value = preg_replace('/^[\w]*\./i', "", $value); } - $this->dataMap(0, $key, $value, $data[ 0 ], $stack); + $this->dataMap(0, $key, $value, $data[0], $stack); } - return $stack[ 0 ]; - } - else - { + return $stack[0]; + } else { return false; } - } - else - { + } else { return false; } } @@ -929,12 +737,9 @@ public function has($table, $join, $where = null) { $column = null; $query = $this->query('SELECT EXISTS(' . $this->selectContext($table, $join, $column, $where, 1) . ')'); - if ($query) - { + if ($query) { return $query->fetchColumn() === '1'; - } - else - { + } else { return false; } } @@ -946,26 +751,20 @@ public function count($table, $join = null, $column = null, $where = null) public function max($table, $join, $column = null, $where = null) { $query = $this->query($this->selectContext($table, $join, $column, $where, 'MAX')); - if ($query) - { + if ($query) { $max = $query->fetchColumn(); return is_numeric($max) ? $max + 0 : $max; - } - else - { + } else { return false; } } public function min($table, $join, $column = null, $where = null) { $query = $this->query($this->selectContext($table, $join, $column, $where, 'MIN')); - if ($query) - { + if ($query) { $min = $query->fetchColumn(); return is_numeric($min) ? $min + 0 : $min; - } - else - { + } else { return false; } } @@ -981,32 +780,23 @@ public function sum($table, $join, $column = null, $where = null) } public function action($actions) { - if (is_callable($actions)) - { + if (is_callable($actions)) { $this->pdo->beginTransaction(); $result = $actions($this); - if ($result === false) - { + if ($result === false) { $this->pdo->rollBack(); - } - else - { + } else { $this->pdo->commit(); } - } - else - { + } else { return false; } } public function id() { - if ($this->database_type == 'oracle') - { + if ($this->database_type == 'oracle') { return 0; - } - elseif ($this->database_type == 'mssql') - { + } elseif ($this->database_type == 'mssql') { return $this->pdo->query('SELECT SCOPE_IDENTITY()')->fetchColumn(); } return $this->pdo->lastInsertId(); @@ -1035,11 +825,10 @@ public function info() 'driver' => 'DRIVER_NAME', 'client' => 'CLIENT_VERSION', 'version' => 'SERVER_VERSION', - 'connection' => 'CONNECTION_STATUS' + 'connection' => 'CONNECTION_STATUS', ]; - foreach ($output as $key => $value) - { - $output[ $key ] = @$this->pdo->getAttribute(constant('PDO::ATTR_' . $value)); + foreach ($output as $key => $value) { + $output[$key] = @$this->pdo->getAttribute(constant('PDO::ATTR_' . $value)); } return $output; } diff --git a/src/Lib/Helper.php b/src/Lib/Helper.php index 96b698c..f9aea48 100644 --- a/src/Lib/Helper.php +++ b/src/Lib/Helper.php @@ -1,122 +1,123 @@ [ - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', - 'Mozilla/5.0 (X11; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0', - 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:29.0) Gecko/20100101 Firefox/29.0', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36', - 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14', - 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0', - 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36', - 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)', - 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)', - 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)', - 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)', - 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', - 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', - ], - 'android' => [ - 'Mozilla/5.0 (Android; Mobile; rv:29.0) Gecko/29.0 Firefox/29.0', - 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36' - ], - 'ios' => [ - 'Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) CriOS/34.0.1847.18 Mobile/11B554a Safari/9537.53', - 'Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53', - 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A366 Safari/600.1.4', - 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A366 Safari/600.1.4' - ] - ]; + public static $userAgentArray = [ + 'pc' => [ + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', + 'Mozilla/5.0 (X11; Linux x86_64; rv:29.0) Gecko/20100101 Firefox/29.0', + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:29.0) Gecko/20100101 Firefox/29.0', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36', + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/537.75.14', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20100101 Firefox/29.0', + 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36', + 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)', + 'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)', + 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0)', + 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)', + 'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)', + 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko', + ], + 'android' => [ + 'Mozilla/5.0 (Android; Mobile; rv:29.0) Gecko/29.0 Firefox/29.0', + 'Mozilla/5.0 (Linux; Android 4.4.2; Nexus 4 Build/KOT49H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.114 Mobile Safari/537.36', + ], + 'ios' => [ + 'Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) CriOS/34.0.1847.18 Mobile/11B554a Safari/9537.53', + 'Mozilla/5.0 (iPad; CPU OS 7_0_4 like Mac OS X) AppleWebKit/537.51.1 (KHTML, like Gecko) Version/7.0 Mobile/11B554a Safari/9537.53', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_0_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A366 Safari/600.1.4', + 'Mozilla/5.0 (iPhone; CPU iPhone OS 8_0 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12A366 Safari/600.1.4', + ], + ]; - public static function getUrlbyHtml($html, $url) - { - $pattern = "'<\s*a\s.*?href\s*=\s*([\"\'])?(?(1) (.*?)\\1 | ([^\s\>]+))'isx"; + public static function getUrlByHtml($html, $url) + { + $pattern = "'<\s*a\s.*?href\s*=\s*([\"\'])?(?(1) (.*?)\\1 | ([^\s\>]+))'isx"; preg_match_all($pattern, $html, $match); $match = array_merge($match[2], $match[3]); $hrefs = array_flip(array_flip(array_filter($match))); foreach ($hrefs as $key => $href) { - $hrefs[$key] = self::formatUrl($href, $url); + $hrefs[$key] = self::formatUrl($href, $url); } return array_flip(array_flip($hrefs)); - } + } - public static function formatUrl($l1, $l2) - { - if(strlen($l1) > 0){ - $I1 = str_replace([chr(34), chr(39)], '', $l1); - } else { - return $l1; - } - $url_parsed = parse_url($l2); - $scheme = $url_parsed['scheme']; - if($scheme != '') { - $scheme .= '://'; - } - $host = $url_parsed['host']; - $l3 = $scheme.$host; - if(strlen($l3) == 0) { - return $l1; - } - $path = dirname($url_parsed['path']); - if($path[0] == '\\') { - $path = ''; - } - $pos = strpos($I1, '#'); - if($pos>0) { - $I1 = substr($I1, 0, $pos); - } - //判断类型 - if(preg_match("/^(http|https|ftp):(\/\/|\\\\)(([\w\/\\\+\-~`@:%])+\.)+([\w\/\\\.\=\?\+\-~`@\':!%#]|(&)|&)+/i", $I1)) { - return $I1; - } elseif($I1[0] == '/') { - return $I1 = $l3.$I1; - } elseif (substr($I1,0,3) == '../') {//相对路径 - while(substr($I1,0,3) == '../') { - $I1 = substr($I1, strlen($I1)-(strlen($I1)-3), strlen($I1)-3); - if(strlen($path) > 0){ - $path = dirname($path); - } - } - return $I1 = $path=='/' ? $l3.$path.$I1 : $l3.$path."/".$I1; - } elseif(substr($I1, 0, 2) == './') { - return $I1 = $l3.$path.substr($I1, strlen($I1)-(strlen($I1)-1), strlen($I1)-1); - } elseif(strtolower(substr($I1, 0, 7))=='mailto:'||strtolower(substr($I1, 0, 11))=='javascript:') { - return false; - } else { - return $I1 = $l3.$path.'/'.$I1; - } - } + public static function formatUrl($l1, $l2) + { + if (strlen($l1) > 0) { + $I1 = str_replace([chr(34), chr(39)], '', $l1); + } else { + return $l1; + } + $url_parsed = parse_url($l2); + $scheme = $url_parsed['scheme']; + if ($scheme != '') { + $scheme .= '://'; + } + $host = $url_parsed['host']; + $l3 = $scheme . $host; + if (strlen($l3) == 0) { + return $l1; + } + $path = dirname($url_parsed['path']); + if ($path[0] == '\\') { + $path = ''; + } + $pos = strpos($I1, '#'); + if ($pos > 0) { + $I1 = substr($I1, 0, $pos); + } + //判断类型 + if (preg_match("/^(http|https|ftp):(\/\/|\\\\)(([\w\/\\\+\-~`@:%])+\.)+([\w\/\\\.\=\?\+\-~`@\':!%#]|(&)|&)+/i", $I1)) { + return $I1; + } elseif ($I1[0] == '/') { + return $I1 = $l3 . $I1; + } elseif (substr($I1, 0, 3) == '../') { + //相对路径 + while (substr($I1, 0, 3) == '../') { + $I1 = substr($I1, strlen($I1) - (strlen($I1) - 3), strlen($I1) - 3); + if (strlen($path) > 0) { + $path = dirname($path); + } + } + return $I1 = $path == '/' ? $l3 . $path . $I1 : $l3 . $path . "/" . $I1; + } elseif (substr($I1, 0, 2) == './') { + return $I1 = $l3 . $path . substr($I1, strlen($I1) - (strlen($I1) - 1), strlen($I1) - 1); + } elseif (strtolower(substr($I1, 0, 7)) == 'mailto:' || strtolower(substr($I1, 0, 11)) == 'javascript:') { + return false; + } else { + return $I1 = $l3 . $path . '/' . $I1; + } + } - public static function getDomain($url) - { - $parseUrl = parse_url($url); - $domain = $parseUrl['scheme'].'://'; + public static function getDomain($url) + { + $parseUrl = parse_url($url); + $domain = $parseUrl['scheme'] . '://'; - return $domain; - } + return $domain; + } - public static function randUserAgent($type = 'pc') - { - switch ($type) { - case 'pc': - return self::$userAgentArray['pc'][array_rand(self::$userAgentArray['pc'])].rand(0, 10000); - break; - case 'android': - return self::$userAgentArray['android'][array_rand(self::$userAgentArray['android'])].rand(0, 10000); - break; - case 'ios': - return self::$userAgentArray['ios'][array_rand(self::$userAgentArray['ios'])].rand(0, 10000); - break; - case 'mobile': - $userAgentArray = array_merge(self::$userAgentArray['android'], self::$userAgentArray['ios']); - return $userAgentArray[array_rand($userAgentArray)].rand(0, 10000); - default: - return $type; - break; - } - } -} \ No newline at end of file + public static function randUserAgent($type = 'pc') + { + switch ($type) { + case 'pc': + return self::$userAgentArray['pc'][array_rand(self::$userAgentArray['pc'])] . rand(0, 10000); + break; + case 'android': + return self::$userAgentArray['android'][array_rand(self::$userAgentArray['android'])] . rand(0, 10000); + break; + case 'ios': + return self::$userAgentArray['ios'][array_rand(self::$userAgentArray['ios'])] . rand(0, 10000); + break; + case 'mobile': + $userAgentArray = array_merge(self::$userAgentArray['android'], self::$userAgentArray['ios']); + return $userAgentArray[array_rand($userAgentArray)] . rand(0, 10000); + default: + return $type; + break; + } + } +} diff --git a/src/Queue/MemoryQueue.php b/src/Queue/MemoryQueue.php index 9edd8f3..2a19528 100644 --- a/src/Queue/MemoryQueue.php +++ b/src/Queue/MemoryQueue.php @@ -30,7 +30,7 @@ public static function server($ip = '0.0.0.0', $port = 2207) } echo "Memory queue is starting...\n"; fclose(STDOUT); - $STDOUT = fopen(__DIR__.'/server.log', "a"); + $STDOUT = fopen(__DIR__ . '/server.log', "a"); self::$server[$key] = ''; } elseif ($argv[1] == 'stop') { @@ -39,7 +39,7 @@ public static function server($ip = '0.0.0.0', $port = 2207) $globalServer = new Server($ip, $port); Worker::$daemonize = true; - Worker::$stdoutFile = __DIR__.'/server.log'; + Worker::$stdoutFile = __DIR__ . '/server.log'; @Worker::runAll(); } @@ -63,12 +63,12 @@ public function __construct($config) } else { $this->globalData->add($this->queuedKey, []); } - + $this->globalData->add('beanbun', []); if (!isset($this->globalData->beanbun[$this->name])) { $name = $this->name; - $this->globalData->up('beanbun', function($value) use($name) { + $this->globalData->up('beanbun', function ($value) use ($name) { if (!in_array($name, $value)) { $value[] = $name; } @@ -80,7 +80,7 @@ public function __construct($config) public function add($url, $options = []) { if ($this->maxQueueSize != 0 && $this->count() >= $this->maxQueueSize) { - return ; + return; } $queue = [ @@ -89,14 +89,14 @@ public function add($url, $options = []) ]; if ($this->isQueued($queue)) { - return ; + return; } - + $this->globalData->push($this->key, $queue); } public function next() - { + { if ($this->algorithm == 'depth') { $queue = $this->globalData->shift($this->key); } else { @@ -147,7 +147,7 @@ public function clean() unset($this->globalData->{$this->key}); unset($this->globalData->{$this->queuedKey}); $name = $this->name; - $this->globalData->up('beanbun', function($value) use($name){ + $this->globalData->up('beanbun', function ($value) use ($name) { $key = array_search($name, $value); if ($key !== false) { unset($value[$key]); diff --git a/src/Queue/RedisQueue.php b/src/Queue/RedisQueue.php index 0b0e2d0..df7ec30 100644 --- a/src/Queue/RedisQueue.php +++ b/src/Queue/RedisQueue.php @@ -47,7 +47,7 @@ public function getInstance() public function add($url, $options = []) { if ($this->maxQueueSize != 0 && $this->count() >= $this->maxQueueSize) { - return ; + return; } $queue = serialize([ @@ -56,7 +56,7 @@ public function add($url, $options = []) ]); if ($this->isQueued($queue)) { - return ; + return; } $this->getInstance()->rPush($this->key, $queue); @@ -135,7 +135,7 @@ protected function bfHas($item) while ($index < $this->bfHashCount) { $crc = $this->hash($item, $index); $pipe->getbit($this->queuedKey, $crc); - $index ++; + $index++; } $result = $pipe->exec(); return !in_array(0, $result);