当前位置: 首页 > 编程日记 > 正文

网页抓取及下载

downAndroidApk.php

<?php
/*
命令行
d:
cd ApacheServer\php
php.exe D:\ApacheServer\web\crawl\downAndroidApk.php --appidFile=D:\ApacheServer\web\crawl\youxi.txt --newDir=D:\ApacheServer\web\crawl\requestNewDir*/
// 判断必须在php-cli模式下运行,即命令行执行
if (strtolower(php_sapi_name()) != 'cli') {echo " error : this script must run by php-cli mode.";exit;
}echo "\n\n\n =================== begin download =================== \n";
/** 分析参数*/
//创建变量
$new_dir = $old_dir = $appid_file = '';
$sleep_time = 0;
//strpos() 函数返回字符串在另一个字符串中第一次出现的位置。如果没有找到该字符串,则返回 false。
//$argv存储命令行 php.exe 命令后面每一个用空格分割的字符串组成的数组,索引从0起
//DIRECTORY_SEPARATOR 根据不同的系统返回不同的路径分隔符。windows  "\" 或 "/",linux "/"foreach ($argv as $k=>$v) {                                         //根据命令行给定参数,设定相应变量值if (!$new_dir && strpos($v, '-newDir=')===1) {                    // 设置新保存的APP目录$new_dir = substr($v, 9);if (substr($new_dir, -1)==DIRECTORY_SEPARATOR) {$new_dir = substr($new_dir, 0, -1);}} elseif (!$old_dir && strpos($v, '-oldDir=')===1) {            // 设置旧的APP存放目录$old_dir = substr($v, 9);if (substr($old_dir, -1)==DIRECTORY_SEPARATOR) {$old_dir = substr($new_dir, 0, -1);}} elseif (!$appid_file && strpos($v, '-appidFile=')===1) {        // 设置读取ID列表参数$appid_file = substr($v, 12);}elseif (!$sleep_time && strpos($v, '-sleep=')===1) {            // 设置暂停时间$sleep_time = intval(substr($v, 8));$sleep_time<=0 ? $sleep_time=3 : 1;}}/** 检测参数*/
// 判断存储appid列表的文件是否存在
if (!$appid_file || !file_exists($appid_file)) {echo 'error : --appidFile error. app id file is not exists!'."\n";exit;
}
// 判断保存下载APP的目录是否创建
if (!$new_dir || !file_exists($new_dir)) {echo 'error: --newDir error. new app downloaded directory is not exists, please create that.'."\n";exit;
}
// 判断以前保持下载目录的目录是否存在
if (!$old_dir || !file_exists($old_dir)) {echo '-----warong: --oldDir warong. old app downloaded directory is not exists, can not be compared.'."\n";sleep(3);
}// 引入页面抓取类
require('Snoopy.class.php');
// 引入下载类
require('dedehttpdown.class.php');// 分析app列表文件
/*
存储appid的youxi.txt文件的存储格式为
appid1
appid2
appid3
$app_id_array通过file()函数获取的是每行字符串组成的数组,索引从0开始
*/
$app_id_array = file($appid_file);
//打开日志文件
$log = '';
$fp = fopen($new_dir.DIRECTORY_SEPARATOR.'log.txt', 'a');
foreach ($app_id_array as $k=>$appid) {if (empty($appid)) {$log = '-----error : appid "'.$v.'" is can not get info'."\n";continue;}echo '--begin appid: '.$appid."\n";//打印显示开始下载的appid$log = getAppInfo($appid);echo $log[0]."\n";// 打印logfwrite($fp, date('Y-m-d H:i:s', time()).' '.$log[0]."\n");// 如果下载失败,打印失败logif (isset($log[1]) && !empty($log[1])) {fwrite($fp, date('Y-m-d H:i:s', time()).' ----------'.$log[1]."----------\n");}echo '--end appid: '.$appid."\n";if ($sleep_time) {sleep($sleep_time);}
}
fclose($fp);echo "\n =================== end download =================== \n\n\n";/** 获取某应用信息*/
function getAppInfo($app_id) {global $new_dir;//创建抓取类对象$snoopy = new Snoopy();//0对应正常下载信息,1对应失败错误信息$log = array(0=>'', 1=>'');echo "1 get $app_id info start\n";//初始化图片地址,应用名称,版本号$bigicon_url = $title = $app_version ='';//设置要抓取的网页路径$url = 'http://www.wandoujia.com/apps/'.$app_id;$snoopy->fetch($url); //将路径赋给抓取对象$content = $snoopy->results; //获取所有内容//通过正则查找将内容中回车换成空格,回车符两边的 //必须加,因为正则内容必须写在//中$content = preg_replace("/\n/", ' ', $content);//将内容中换行换成空格$content = preg_replace("/\r/", ' ', $content);// 获取图片地址 bigicon_url//$imgs为搜索结果。 $imgs[0]将包含完整模式匹配到的文本, $imgs[1] 将包含第一个捕获子组(正则里第一个括号匹配到的内容)匹配到的文本,以此类推。$i = preg_match('/class="app-icon".*?rc="(.*?)"/', $content, $imgs);$bigicon_url = $imgs[1];// 获取应用名称 titleif ($i) {$i = preg_match('/class="app-name".*?itemprop="name">(.*?)</', $content, $title);$title = trim($title[1]);}// 获取版本号 app_versionif ($i) {$i = preg_match('/<dt>版本<\/dt>.*?<dd>(.*?)</', $content, $app_version);$app_version = trim($app_version[1]);}// 获取APK包大小if ($i) {$i = preg_match('/<dt>大小<\/dt>.*?<dd>(.*?)</', $content, $apk_size);$apk_size = trim($apk_size[1]);}//如果获取app信息成功则进行下载if ($bigicon_url && $title && $app_version) {$app_dir = $new_dir.DIRECTORY_SEPARATOR.$app_id;if (!file_exists($app_dir)) {mkdir($app_dir);chmod($app_dir, 0777);}$log[0] = $app_id.'|'.$title.'|'.$app_version.'|';// 开始下载图片和APK包echo "2 get $app_id info finished\n";echo "3 get $app_id image start\n";$img_size = download($app_id, 'img', $bigicon_url);        // 下载图片echo "4 get $app_id image finished\n";echo "5 get $app_id app start. size: $apk_size\n";$apk_size = str_ireplace('M', '', $apk_size);$app_size = download($app_id, 'app', '', $apk_size);    // 下载APPecho "6 get $app_id app finished\n";//如果返回的文件大小为空则下载失败if ($app_size=='B') {$log[1] = '-----'. $app_id . ' downloaded apk failure.-----';echo $log[1]."\n";}$log[0] .= $app_size;//记录日志文件file_put_contents($app_dir.DIRECTORY_SEPARATOR.'apkinfo.txt', $log[0]);return $log;} else {$log[0] = $app_id.'|';$log[1] = '-----'.$app_id .' get html failed. -----';echo $log[1]."\n";return $log;}
}/* * 下载图片和应用*        $type = 'img/app' 图片或应用. *        $img_url图片地址, 仅仅type为img时需要*        $apk_size在正常下载不好用时需要*/
function download($app_id, $type='img', $img_url='', $apk_size=0) {global $new_dir;//创建下载对象$httpdown = new DedeHttpDown();//下载APPif ($type == 'app') {// 组建下载地址$app_url = 'http://apps.wandoujia.com/apps/'.$app_id.'/download?pos=www/detail';echo $app_url."\n";// 设置文件保存完整路径$app_file = $new_dir . DIRECTORY_SEPARATOR . $app_id . DIRECTORY_SEPARATOR . $app_id.'.apk';$file_have = 0;//有时候会下载不成功,重试两次,不成功则跳过for ($i=1; $i<=2; $i++) {echo 'downloaded times: '.$i."\n";//设置下载路径$httpdown->OpenUrl($app_url);//设置保存路径,到保存文件的扩展名,完整路径$httpdown->SaveToBin($app_file);$httpdown->Close();if (file_exists($app_file)) {$file_have = 1;break;}//等待三秒,防止下载太快网站封IPsleep(3);}// 如果下载失败,使用snoopy尝试一次if (!file_exists($app_file)) {echo 'downloaded times: 3'."\n";//获取php.ini的内存限制大小$memory_limit = ini_get('memory_limit');if ($apk_size && $memory_limit-2>$apk_size) {$snoopy = new Snoopy();$snoopy->fetch($app_url);$content = $snoopy->results;//获取内容file_put_contents($app_file, $content);}}if (file_exists($app_file)) {$file_have = 1;}unset($httpdown);if ($file_have) {//获取下载后文件的大小return filesize($app_file).'B';} else {return 'B';}//否则下载图片} elseif ($type == 'img') {// 设置文件保存完整路径$bigicon = $new_dir . DIRECTORY_SEPARATOR . $app_id . DIRECTORY_SEPARATOR . 'bigicon.png';$httpdown->OpenUrl($img_url);$httpdown->SaveToBin($bigicon);$httpdown->Close();unset($httpdown);if (file_exists($bigicon)) {return filesize($bigicon).'B';} else {return 'B';}}
}
?>

Snoopy.class.php

<?php/*************************************************Snoopy - the PHP net client
Author: Monte Ohrt <monte@ispi.net>
Copyright (c): 1999-2008 New Digital Group, all rights reserved
Version: 1.2.4* This library is free software; you can redistribute it and/or* modify it under the terms of the GNU Lesser General Public* License as published by the Free Software Foundation; either* version 2.1 of the License, or (at your option) any later version.** This library is distributed in the hope that it will be useful,* but WITHOUT ANY WARRANTY; without even the implied warranty of* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU* Lesser General Public License for more details.** You should have received a copy of the GNU Lesser General Public* License along with this library; if not, write to the Free Software* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USAYou may contact the author of Snoopy by e-mail at:
monte@ohrt.comThe latest version of Snoopy can be obtained from:
http://snoopy.sourceforge.net/*************************************************/class Snoopy
{/**** Public variables ****//* user definable vars */var $host            =    "www.php.net";        // host name we are connecting tovar $port            =    80;                    // port we are connecting tovar $proxy_host        =    "";                    // proxy host to usevar $proxy_port        =    "";                    // proxy port to usevar $proxy_user        =    "";                    // proxy user to usevar $proxy_pass        =    "";                    // proxy password to usevar $agent            =    "Snoopy v1.2.4";    // agent we masquerade asvar    $referer        =    "";                    // referer info to passvar $cookies        =    array();            // array of cookies to pass// $cookies["username"]="joe";var    $rawheaders        =    array();            // array of raw headers to send// $rawheaders["Content-type"]="text/html";var $maxredirs        =    5;                    // http redirection depth maximum. 0 = disallowvar $lastredirectaddr    =    "";                // contains address of last redirected addressvar    $offsiteok        =    true;                // allows redirection off-sitevar $maxframes        =    0;                    // frame content depth maximum. 0 = disallowvar $expandlinks    =    true;                // expand links to fully qualified URLs.// this only applies to fetchlinks()// submitlinks(), and submittext()var $passcookies    =    true;                // pass set cookies back through redirects// NOTE: this currently does not respect// dates, domains or paths.var    $user            =    "";                    // user for http authenticationvar    $pass            =    "";                    // password for http authentication// http accept typesvar $accept            =    "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";var $results        =    "";                    // where the content is putvar $error            =    "";                    // error messages sent herevar    $response_code    =    "";                    // response code returned from servervar    $headers        =    array();            // headers returned from server sent herevar    $maxlength        =    500000;                // max return data length (body)var $read_timeout    =    0;                    // timeout on read operations, in seconds// supported only since PHP 4 Beta 4// set to 0 to disallow timeoutsvar $timed_out        =    false;                // if a read operation timed outvar    $status            =    0;                    // http request statusvar $temp_dir        =    "/tmp";                // temporary directory that the webserver// has permission to write to.// under Windows, this should be C:\tempvar    $curl_path        =    "/usr/local/bin/curl";// Snoopy will use cURL for fetching// SSL content if a full system path to// the cURL binary is supplied here.// set to false if you do not have// cURL installed. See http://curl.haxx.se// for details on installing cURL.// Snoopy does *not* use the cURL// library functions built into php,// as these functions are not stable// as of this Snoopy release./**** Private variables ****/    var    $_maxlinelen    =    4096;                // max line length (headers)var $_httpmethod    =    "GET";                // default http request methodvar $_httpversion    =    "HTTP/1.0";            // default http request versionvar $_submit_method    =    "POST";                // default submit methodvar $_submit_type    =    "application/x-www-form-urlencoded";    // default submit typevar $_mime_boundary    =   "";                    // MIME boundary for multipart/form-data submit typevar $_redirectaddr    =    false;                // will be set if page fetched is a redirectvar $_redirectdepth    =    0;                    // increments on an http redirectvar $_frameurls        =     array();            // frame src urlsvar $_framedepth    =    0;                    // increments on frame depthvar $_isproxy        =    false;                // set if using a proxy servervar $_fp_timeout    =    30;                    // timeout for socket connection/*======================================================================*\Function:    fetchPurpose:    fetch the contents of a web page(and possibly other protocols in thefuture like ftp, nntp, gopher, etc.)Input:        $URI    the location of the page to fetchOutput:        $this->results    the output text from the fetch
\*======================================================================*/function fetch($URI){//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);$URI_PARTS = parse_url($URI);if (!empty($URI_PARTS["user"]))$this->user = $URI_PARTS["user"];if (!empty($URI_PARTS["pass"]))$this->pass = $URI_PARTS["pass"];if (empty($URI_PARTS["query"]))$URI_PARTS["query"] = '';if (empty($URI_PARTS["path"]))$URI_PARTS["path"] = '';switch(strtolower($URI_PARTS["scheme"])){case "http":$this->host = $URI_PARTS["host"];if(!empty($URI_PARTS["port"]))$this->port = $URI_PARTS["port"];if($this->_connect($fp)){if($this->_isproxy){// using proxy, send entire URI$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);}else{$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");// no proxy, send only the path$this->_httprequest($path, $fp, $URI, $this->_httpmethod);}$this->_disconnect($fp);if($this->_redirectaddr){/* url was redirected, check if we've hit the max depth */if($this->maxredirs > $this->_redirectdepth){// only follow redirect if it's on this site, or offsiteok is trueif(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok){/* follow the redirect */$this->_redirectdepth++;$this->lastredirectaddr=$this->_redirectaddr;$this->fetch($this->_redirectaddr);}}}if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0){$frameurls = $this->_frameurls;$this->_frameurls = array();while(list(,$frameurl) = each($frameurls)){if($this->_framedepth < $this->maxframes){$this->fetch($frameurl);$this->_framedepth++;}elsebreak;}}                    }else{return false;}return true;                    break;case "https":if(!$this->curl_path)return false;if(function_exists("is_executable"))if (!is_executable($this->curl_path))return false;$this->host = $URI_PARTS["host"];if(!empty($URI_PARTS["port"]))$this->port = $URI_PARTS["port"];if($this->_isproxy){// using proxy, send entire URI$this->_httpsrequest($URI,$URI,$this->_httpmethod);}else{$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");// no proxy, send only the path$this->_httpsrequest($path, $URI, $this->_httpmethod);}if($this->_redirectaddr){/* url was redirected, check if we've hit the max depth */if($this->maxredirs > $this->_redirectdepth){// only follow redirect if it's on this site, or offsiteok is trueif(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok){/* follow the redirect */$this->_redirectdepth++;$this->lastredirectaddr=$this->_redirectaddr;$this->fetch($this->_redirectaddr);}}}if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0){$frameurls = $this->_frameurls;$this->_frameurls = array();while(list(,$frameurl) = each($frameurls)){if($this->_framedepth < $this->maxframes){$this->fetch($frameurl);$this->_framedepth++;}elsebreak;}}                    return true;                    break;default:// not a valid protocol$this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';return false;break;}        return true;}/*======================================================================*\Function:    submitPurpose:    submit an http formInput:        $URI    the location to post the data$formvars    the formvars to use.format: $formvars["var"] = "val";$formfiles  an array of files to submitformat: $formfiles["var"] = "/dir/filename.ext";Output:        $this->results    the text output from the post
\*======================================================================*/function submit($URI, $formvars="", $formfiles=""){unset($postdata);$postdata = $this->_prepare_post_body($formvars, $formfiles);$URI_PARTS = parse_url($URI);if (!empty($URI_PARTS["user"]))$this->user = $URI_PARTS["user"];if (!empty($URI_PARTS["pass"]))$this->pass = $URI_PARTS["pass"];if (empty($URI_PARTS["query"]))$URI_PARTS["query"] = '';if (empty($URI_PARTS["path"]))$URI_PARTS["path"] = '';switch(strtolower($URI_PARTS["scheme"])){case "http":$this->host = $URI_PARTS["host"];if(!empty($URI_PARTS["port"]))$this->port = $URI_PARTS["port"];if($this->_connect($fp)){if($this->_isproxy){// using proxy, send entire URI$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);}else{$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");// no proxy, send only the path$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);}$this->_disconnect($fp);if($this->_redirectaddr){/* url was redirected, check if we've hit the max depth */if($this->maxredirs > $this->_redirectdepth){                        if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        // only follow redirect if it's on this site, or offsiteok is trueif(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok){/* follow the redirect */$this->_redirectdepth++;$this->lastredirectaddr=$this->_redirectaddr;if( strpos( $this->_redirectaddr, "?" ) > 0 )$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to getelse$this->submit($this->_redirectaddr,$formvars, $formfiles);}}}if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0){$frameurls = $this->_frameurls;$this->_frameurls = array();while(list(,$frameurl) = each($frameurls)){                                                        if($this->_framedepth < $this->maxframes){$this->fetch($frameurl);$this->_framedepth++;}elsebreak;}}                    }else{return false;}return true;                    break;case "https":if(!$this->curl_path)return false;if(function_exists("is_executable"))if (!is_executable($this->curl_path))return false;$this->host = $URI_PARTS["host"];if(!empty($URI_PARTS["port"]))$this->port = $URI_PARTS["port"];if($this->_isproxy){// using proxy, send entire URI$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);}else{$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");// no proxy, send only the path$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);}if($this->_redirectaddr){/* url was redirected, check if we've hit the max depth */if($this->maxredirs > $this->_redirectdepth){                        if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);                        // only follow redirect if it's on this site, or offsiteok is trueif(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok){/* follow the redirect */$this->_redirectdepth++;$this->lastredirectaddr=$this->_redirectaddr;if( strpos( $this->_redirectaddr, "?" ) > 0 )$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to getelse$this->submit($this->_redirectaddr,$formvars, $formfiles);}}}if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0){$frameurls = $this->_frameurls;$this->_frameurls = array();while(list(,$frameurl) = each($frameurls)){                                                        if($this->_framedepth < $this->maxframes){$this->fetch($frameurl);$this->_framedepth++;}elsebreak;}}                    return true;                    break;default:// not a valid protocol$this->error    =    'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';return false;break;}        return true;}/*======================================================================*\Function:    fetchlinksPurpose:    fetch the links from a web pageInput:        $URI    where you are fetching fromOutput:        $this->results    an array of the URLs
\*======================================================================*/function fetchlinks($URI){if ($this->fetch($URI)){            if($this->lastredirectaddr)$URI = $this->lastredirectaddr;if(is_array($this->results)){for($x=0;$x<count($this->results);$x++)$this->results[$x] = $this->_striplinks($this->results[$x]);}else$this->results = $this->_striplinks($this->results);if($this->expandlinks)$this->results = $this->_expandlinks($this->results, $URI);return true;}elsereturn false;}/*======================================================================*\Function:    fetchformPurpose:    fetch the form elements from a web pageInput:        $URI    where you are fetching fromOutput:        $this->results    the resulting html form
\*======================================================================*/function fetchform($URI){if ($this->fetch($URI)){            if(is_array($this->results)){for($x=0;$x<count($this->results);$x++)$this->results[$x] = $this->_stripform($this->results[$x]);}else$this->results = $this->_stripform($this->results);return true;}elsereturn false;}/*======================================================================*\Function:    fetchtextPurpose:    fetch the text from a web page, stripping the linksInput:        $URI    where you are fetching fromOutput:        $this->results    the text from the web page
\*======================================================================*/function fetchtext($URI){if($this->fetch($URI)){            if(is_array($this->results)){for($x=0;$x<count($this->results);$x++)$this->results[$x] = $this->_striptext($this->results[$x]);}else$this->results = $this->_striptext($this->results);return true;}elsereturn false;}/*======================================================================*\Function:    submitlinksPurpose:    grab links from a form submissionInput:        $URI    where you are submitting fromOutput:        $this->results    an array of the links from the post
\*======================================================================*/function submitlinks($URI, $formvars="", $formfiles=""){if($this->submit($URI,$formvars, $formfiles)){            if($this->lastredirectaddr)$URI = $this->lastredirectaddr;if(is_array($this->results)){for($x=0;$x<count($this->results);$x++){$this->results[$x] = $this->_striplinks($this->results[$x]);if($this->expandlinks)$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);}}else{$this->results = $this->_striplinks($this->results);if($this->expandlinks)$this->results = $this->_expandlinks($this->results,$URI);}return true;}elsereturn false;}/*======================================================================*\Function:    submittextPurpose:    grab text from a form submissionInput:        $URI    where you are submitting fromOutput:        $this->results    the text from the web page
\*======================================================================*/function submittext($URI, $formvars = "", $formfiles = ""){if($this->submit($URI,$formvars, $formfiles)){            if($this->lastredirectaddr)$URI = $this->lastredirectaddr;if(is_array($this->results)){for($x=0;$x<count($this->results);$x++){$this->results[$x] = $this->_striptext($this->results[$x]);if($this->expandlinks)$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);}}else{$this->results = $this->_striptext($this->results);if($this->expandlinks)$this->results = $this->_expandlinks($this->results,$URI);}return true;}elsereturn false;}/*======================================================================*\Function:    set_submit_multipartPurpose:    Set the form submission content type tomultipart/form-data
\*======================================================================*/function set_submit_multipart(){$this->_submit_type = "multipart/form-data";}/*======================================================================*\Function:    set_submit_normalPurpose:    Set the form submission content type toapplication/x-www-form-urlencoded
\*======================================================================*/function set_submit_normal(){$this->_submit_type = "application/x-www-form-urlencoded";}/*======================================================================*\Private functions
\*======================================================================*//*======================================================================*\Function:    _striplinksPurpose:    strip the hyperlinks from an html documentInput:        $document    document to strip.Output:        $match        an array of the links
\*======================================================================*/function _striplinks($document){    preg_match_all("'<\s*a\s.*?href\s*=\s*            # find <a href=([\"\'])?                    # find single or double quote(?(1) (.*?)\\1 | ([^\s\>]+))        # if quote found, match up to next matching# quote, otherwise match up to next space'isx",$document,$links);// catenate the non-empty matches from the conditional subpatternwhile(list($key,$val) = each($links[2])){if(!empty($val))$match[] = $val;}                while(list($key,$val) = each($links[3])){if(!empty($val))$match[] = $val;}        // return the linksreturn $match;}/*======================================================================*\Function:    _stripformPurpose:    strip the form elements from an html documentInput:        $document    document to strip.Output:        $match        an array of the links
\*======================================================================*/function _stripform($document){    preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);// catenate the matches$match = implode("\r\n",$elements[0]);// return the linksreturn $match;}/*======================================================================*\Function:    _striptextPurpose:    strip the text from an html documentInput:        $document    document to strip.Output:        $text        the resulting text
\*======================================================================*/function _striptext($document){// I didn't use preg eval (//e) since that is only available in PHP 4.0.// so, list your entities one by one here. I included some of the// more common ones.$search = array("'<script[^>]*?>.*?</script>'si",    // strip out javascript"'<[\/\!]*?[^<>]*?>'si",            // strip out html tags"'([\r\n])[\s]+'",                    // strip out white space"'&(quot|#34|#034|#x22);'i",        // replace html entities"'&(amp|#38|#038|#x26);'i",            // added hexadecimal values"'&(lt|#60|#060|#x3c);'i","'&(gt|#62|#062|#x3e);'i","'&(nbsp|#160|#xa0);'i","'&(iexcl|#161);'i","'&(cent|#162);'i","'&(pound|#163);'i","'&(copy|#169);'i","'&(reg|#174);'i","'&(deg|#176);'i","'&(#39|#039|#x27);'","'&(euro|#8364);'i",                // europe"'&a(uml|UML);'",                    // german"'&o(uml|UML);'","'&u(uml|UML);'","'&A(uml|UML);'","'&O(uml|UML);'","'&U(uml|UML);'","'ß'i",);$replace = array(    "","","\\1","\"","&","<",">"," ",chr(161),chr(162),chr(163),chr(169),chr(174),chr(176),chr(39),chr(128),"�","�","�","�","�","�","�",);$text = preg_replace($search,$replace,$document);return $text;}/*======================================================================*\Function:    _expandlinksPurpose:    expand each link into a fully qualified URLInput:        $links            the links to qualify$URI            the full URI to get the base fromOutput:        $expandedLinks    the expanded links
\*======================================================================*/function _expandlinks($links,$URI){preg_match("/^[^\?]+/",$URI,$match);$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);$match = preg_replace("|/$|","",$match);$match_part = parse_url($match);$match_root =$match_part["scheme"]."://".$match_part["host"];$search = array(     "|^http://".preg_quote($this->host)."|i","|^(\/)|i","|^(?!http://)(?!mailto:)|i","|/\./|","|/[^\/]+/\.\./|");$replace = array(    "",$match_root."/",$match."/","/","/");            $expandedLinks = preg_replace($search,$replace,$links);return $expandedLinks;}/*======================================================================*\Function:    _httprequestPurpose:    go get the http data from the serverInput:        $url        the url to fetch$fp            the current open file pointer$URI        the full URI$body        body contents to send if any (POST)Output:        
\*======================================================================*/function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body=""){$cookie_headers = '';if($this->passcookies && $this->_redirectaddr)$this->setcookies();$URI_PARTS = parse_url($URI);if(empty($url))$url = "/";$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";        if(!empty($this->agent))$headers .= "User-Agent: ".$this->agent."\r\n";if(!empty($this->host) && !isset($this->rawheaders['Host'])) {$headers .= "Host: ".$this->host;if(!empty($this->port))$headers .= ":".$this->port;$headers .= "\r\n";}if(!empty($this->accept))$headers .= "Accept: ".$this->accept."\r\n";if(!empty($this->referer))$headers .= "Referer: ".$this->referer."\r\n";if(!empty($this->cookies)){            if(!is_array($this->cookies))$this->cookies = (array)$this->cookies;reset($this->cookies);if ( count($this->cookies) > 0 ) {$cookie_headers .= 'Cookie: ';foreach ( $this->cookies as $cookieKey => $cookieVal ) {$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";}$headers .= substr($cookie_headers,0,-2) . "\r\n";} }if(!empty($this->rawheaders)){if(!is_array($this->rawheaders))$this->rawheaders = (array)$this->rawheaders;while(list($headerKey,$headerVal) = each($this->rawheaders))$headers .= $headerKey.": ".$headerVal."\r\n";}if(!empty($content_type)) {$headers .= "Content-type: $content_type";if ($content_type == "multipart/form-data")$headers .= "; boundary=".$this->_mime_boundary;$headers .= "\r\n";}if(!empty($body))    $headers .= "Content-length: ".strlen($body)."\r\n";if(!empty($this->user) || !empty($this->pass))    $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";//add proxy auth headersif(!empty($this->proxy_user))    $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";$headers .= "\r\n";// set the read timeout if neededif ($this->read_timeout > 0)socket_set_timeout($fp, $this->read_timeout);$this->timed_out = false;fwrite($fp,$headers.$body,strlen($headers.$body));$this->_redirectaddr = false;unset($this->headers);while($currentHeader = fgets($fp,$this->_maxlinelen)){if ($this->read_timeout > 0 && $this->_check_timeout($fp)){$this->status=-100;return false;}if($currentHeader == "\r\n")break;// if a header begins with Location: or URI:, set the redirectif(preg_match("/^(Location:|URI:)/i",$currentHeader)){// get URL portion of the redirectpreg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);// look for :// in the Location header to see if hostname is includedif(!preg_match("|\:\/\/|",$matches[2])){// no host in the path, so prepend$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;// eliminate double slashif(!preg_match("|^/|",$matches[2]))$this->_redirectaddr .= "/".$matches[2];else$this->_redirectaddr .= $matches[2];}else$this->_redirectaddr = $matches[2];}if(preg_match("|^HTTP/|",$currentHeader)){if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status)){$this->status= $status[1];}                $this->response_code = $currentHeader;}$this->headers[] = $currentHeader;}$results = '';do {$_data = fread($fp, $this->maxlength);if (strlen($_data) == 0) {break;}$results .= $_data;} while(true);if ($this->read_timeout > 0 && $this->_check_timeout($fp)){$this->status=-100;return false;}// check if there is a a redirect meta tagif(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)){$this->_redirectaddr = $this->_expandlinks($match[1],$URI);    }// have we hit our frame depth and is there frame src to fetch?if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)){$this->results[] = $results;for($x=0; $x<count($match[1]); $x++)$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);}// have we already fetched framed content?elseif(is_array($this->results))$this->results[] = $results;// no framed contentelse$this->results = $results;return true;}/*======================================================================*\Function:    _httpsrequestPurpose:    go get the https data from the server using curlInput:        $url        the url to fetch$URI        the full URI$body        body contents to send if any (POST)Output:        
\*======================================================================*/function _httpsrequest($url,$URI,$http_method,$content_type="",$body=""){  if($this->passcookies && $this->_redirectaddr)$this->setcookies();$headers = array();        $URI_PARTS = parse_url($URI);if(empty($url))$url = "/";// GET ... header not needed for curl//$headers[] = $http_method." ".$url." ".$this->_httpversion;        if(!empty($this->agent))$headers[] = "User-Agent: ".$this->agent;if(!empty($this->host))if(!empty($this->port))$headers[] = "Host: ".$this->host.":".$this->port;else$headers[] = "Host: ".$this->host;if(!empty($this->accept))$headers[] = "Accept: ".$this->accept;if(!empty($this->referer))$headers[] = "Referer: ".$this->referer;if(!empty($this->cookies)){            if(!is_array($this->cookies))$this->cookies = (array)$this->cookies;reset($this->cookies);if ( count($this->cookies) > 0 ) {$cookie_str = 'Cookie: ';foreach ( $this->cookies as $cookieKey => $cookieVal ) {$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";}$headers[] = substr($cookie_str,0,-2);}}if(!empty($this->rawheaders)){if(!is_array($this->rawheaders))$this->rawheaders = (array)$this->rawheaders;while(list($headerKey,$headerVal) = each($this->rawheaders))$headers[] = $headerKey.": ".$headerVal;}if(!empty($content_type)) {if ($content_type == "multipart/form-data")$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;else$headers[] = "Content-type: $content_type";}if(!empty($body))    $headers[] = "Content-length: ".strlen($body);if(!empty($this->user) || !empty($this->pass))    $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);for($curr_header = 0; $curr_header < count($headers); $curr_header++) {$safer_header = strtr( $headers[$curr_header], "\"", " " );$cmdline_params .= " -H \"".$safer_header."\"";}if(!empty($body))$cmdline_params .= " -d \"$body\"";if($this->read_timeout > 0)$cmdline_params .= " -m ".$this->read_timeout;$headerfile = tempnam($temp_dir, "sno");exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);if($return){$this->error = "Error: cURL could not retrieve the document, error $return.";return false;}$results = implode("\r\n",$results);$result_headers = file("$headerfile");$this->_redirectaddr = false;unset($this->headers);for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++){// if a header begins with Location: or URI:, set the redirectif(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader])){// get URL portion of the redirectpreg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);// look for :// in the Location header to see if hostname is includedif(!preg_match("|\:\/\/|",$matches[2])){// no host in the path, so prepend$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;// eliminate double slashif(!preg_match("|^/|",$matches[2]))$this->_redirectaddr .= "/".$matches[2];else$this->_redirectaddr .= $matches[2];}else$this->_redirectaddr = $matches[2];}if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))$this->response_code = $result_headers[$currentHeader];$this->headers[] = $result_headers[$currentHeader];}// check if there is a a redirect meta tagif(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match)){$this->_redirectaddr = $this->_expandlinks($match[1],$URI);    }// have we hit our frame depth and is there frame src to fetch?if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match)){$this->results[] = $results;for($x=0; $x<count($match[1]); $x++)$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);}// have we already fetched framed content?elseif(is_array($this->results))$this->results[] = $results;// no framed contentelse$this->results = $results;unlink("$headerfile");return true;}/*======================================================================*\Function:    setcookies()Purpose:    set cookies for a redirection
\*======================================================================*/function setcookies(){for($x=0; $x<count($this->headers); $x++){if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))$this->cookies[$match[1]] = urldecode($match[2]);}}/*======================================================================*\Function:    _check_timeoutPurpose:    checks whether timeout has occurredInput:        $fp    file pointer
\*======================================================================*/function _check_timeout($fp){if ($this->read_timeout > 0) {$fp_status = socket_get_status($fp);if ($fp_status["timed_out"]) {$this->timed_out = true;return true;}}return false;}/*======================================================================*\Function:    _connectPurpose:    make a socket connectionInput:        $fp    file pointer
\*======================================================================*/function _connect(&$fp){if(!empty($this->proxy_host) && !empty($this->proxy_port)){$this->_isproxy = true;$host = $this->proxy_host;$port = $this->proxy_port;}else{$host = $this->host;$port = $this->port;}$this->status = 0;if($fp = fsockopen($host,$port,$errno,$errstr,$this->_fp_timeout)){// socket connection succeededreturn true;}else{// socket connection failed$this->status = $errno;switch($errno){case -3:$this->error="socket creation failed (-3)";case -4:$this->error="dns lookup failure (-4)";case -5:$this->error="connection refused or timed out (-5)";default:$this->error="connection failed (".$errno.")";}return false;}}
/*======================================================================*\Function:    _disconnectPurpose:    disconnect a socket connectionInput:        $fp    file pointer
\*======================================================================*/function _disconnect($fp){return(fclose($fp));}/*======================================================================*\Function:    _prepare_post_bodyPurpose:    Prepare post body according to encoding typeInput:        $formvars  - form variables$formfiles - form upload filesOutput:        post body
\*======================================================================*/function _prepare_post_body($formvars, $formfiles){settype($formvars, "array");settype($formfiles, "array");$postdata = '';if (count($formvars) == 0 && count($formfiles) == 0)return;switch ($this->_submit_type) {case "application/x-www-form-urlencoded":reset($formvars);while(list($key,$val) = each($formvars)) {if (is_array($val) || is_object($val)) {while (list($cur_key, $cur_val) = each($val)) {$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";}} else$postdata .= urlencode($key)."=".urlencode($val)."&";}break;case "multipart/form-data":$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));reset($formvars);while(list($key,$val) = each($formvars)) {if (is_array($val) || is_object($val)) {while (list($cur_key, $cur_val) = each($val)) {$postdata .= "--".$this->_mime_boundary."\r\n";$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";$postdata .= "$cur_val\r\n";}} else {$postdata .= "--".$this->_mime_boundary."\r\n";$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";$postdata .= "$val\r\n";}}reset($formfiles);while (list($field_name, $file_names) = each($formfiles)) {settype($file_names, "array");while (list(, $file_name) = each($file_names)) {if (!is_readable($file_name)) continue;$fp = fopen($file_name, "r");$file_content = fread($fp, filesize($file_name));fclose($fp);$base_name = basename($file_name);$postdata .= "--".$this->_mime_boundary."\r\n";$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";$postdata .= "$file_content\r\n";}}$postdata .= "--".$this->_mime_boundary."--\r\n";break;}return $postdata;}
}?>

dedehttpdown.class.php

<?php
/*** 织梦HTTP下载类** @version        $Id: dedehttpdown.class.php 1 11:42 2010年7月6日Z tianya $* @package        DedeCMS.Libraries* @copyright      Copyright (c) 2007 - 2010, DesDev, Inc.* @license        http://help.dedecms.com/usersguide/license.html* @link           http://www.dedecms.com*/
@set_time_limit(0);class DedeHttpDown
{var $m_url = '';var $m_urlpath = '';var $m_scheme = 'http';var $m_host = '';var $m_port = '80';var $m_user = '';var $m_pass = '';var $m_path = '/';var $m_query = '';var $m_fp = '';var $m_error = '';var $m_httphead = '';var $m_html = '';var $m_puthead = '';var $BaseUrlPath = '';var $HomeUrl = '';var $reTry = 0;var $JumpCount = 0;/***  初始化系统** @access    public* @param     string    $url   需要下载的地址* @return    string*/function PrivateInit($url){if($url=='') {return ;}$urls = '';$urls = @parse_url($url);$this->m_url = $url;if(is_array($urls)){$this->m_host = $urls["host"];if(!empty($urls["scheme"])){$this->m_scheme = $urls["scheme"];}if(!empty($urls["user"])){$this->m_user = $urls["user"];}if(!empty($urls["pass"])){$this->m_pass = $urls["pass"];}if(!empty($urls["port"])){$this->m_port = $urls["port"];}if(!empty($urls["path"])){$this->m_path = $urls["path"];}$this->m_urlpath = $this->m_path;if(!empty($urls["query"])){$this->m_query = $urls["query"];$this->m_urlpath .= "?".$this->m_query;}$this->HomeUrl = $urls["host"];$this->BaseUrlPath = $this->HomeUrl.$urls["path"];$this->BaseUrlPath = preg_replace("/\/([^\/]*)\.(.*)$/","/",$this->BaseUrlPath);$this->BaseUrlPath = preg_replace("/\/$/","",$this->BaseUrlPath);}}/***  重设各参数** @access    public* @return    void*/function ResetAny(){$this->m_url = "";$this->m_urlpath = "";$this->m_scheme = "http";$this->m_host = "";$this->m_port = "80";$this->m_user = "";$this->m_pass = "";$this->m_path = "/";$this->m_query = "";$this->m_error = "";}/***  打开指定网址** @access    public* @param     string    $url   地址* @param     string    $requestType   请求类型* @return    string*/function OpenUrl($url,$requestType="GET"){$this->ResetAny();$this->JumpCount = 0;$this->m_httphead = Array() ;$this->m_html = '';$this->reTry = 0;$this->Close();//初始化系统$this->PrivateInit($url);$this->PrivateStartSession($requestType);}/***  转到303重定向网址** @access    public* @param     string   $url   地址* @return    string*/function JumpOpenUrl($url){$this->ResetAny();$this->JumpCount++;$this->m_httphead = Array() ;$this->m_html = "";$this->Close();//初始化系统$this->PrivateInit($url);$this->PrivateStartSession('GET');}/***  获得某操作错误的原因** @access    public* @return    void*/function printError(){echo "错误信息:".$this->m_error;echo "<br/>具体返回头:<br/>";foreach($this->m_httphead as $k=>$v){ echo "$k => $v <br/>\r\n"; }}/***  判别用Get方法发送的头的应答结果是否正确** @access    public* @return    bool*/function IsGetOK(){if( preg_match("/^2/",$this->GetHead("http-state")) ){return TRUE;}else{$this->m_error .= $this->GetHead("http-state")." - ".$this->GetHead("http-describe")."<br/>";return FALSE;}}/***  看看返回的网页是否是text类型** @access    public* @return    bool*/function IsText(){if( preg_match("/^2/",$this->GetHead("http-state")) && preg_match("/text|xml/i",$this->GetHead("content-type")) ){return TRUE;}else{$this->m_error .= "内容为非文本类型或网址重定向<br/>";return FALSE;}}/***  判断返回的网页是否是特定的类型** @access    public* @param     string   $ctype   内容类型* @return    string*/function IsContentType($ctype){if(preg_match("/^2/",$this->GetHead("http-state"))&& $this->GetHead("content-type")==strtolower($ctype)){    return TRUE; }else{$this->m_error .= "类型不对 ".$this->GetHead("content-type")."<br/>";return FALSE;}}/***  用Http协议下载文件** @access    public* @param     string    $savefilename  保存文件名称* @return    string*/function SaveToBin($savefilename){if(!$this->IsGetOK()){return FALSE;}if(@feof($this->m_fp)){$this->m_error = "连接已经关闭!"; return FALSE;}$fp = fopen($savefilename,"w");while(!feof($this->m_fp)){fwrite($fp, fread($this->m_fp, 1024));}fclose($this->m_fp);fclose($fp);return TRUE;}/***  保存网页内容为Text文件** @access    public* @param     string    $savefilename  保存文件名称* @return    string*/function SaveToText($savefilename){if($this->IsText()){$this->SaveBinFile($savefilename);}else{return "";}}/***  用Http协议获得一个网页的内容** @access    public* @return    string*/function GetHtml(){if(!$this->IsText()){return '';}if($this->m_html!=''){return $this->m_html;}if(!$this->m_fp||@feof($this->m_fp)){return '';}while(!feof($this->m_fp)){$this->m_html .= fgets($this->m_fp,256);}@fclose($this->m_fp);return $this->m_html;}/***  开始HTTP会话** @access    public* @param     string    $requestType    请求类型* @return    string*/function PrivateStartSession($requestType="GET"){if(!$this->PrivateOpenHost()){$this->m_error .= "打开远程主机出错!";return FALSE;}$this->reTry++;if($this->GetHead("http-edition")=="HTTP/1.1"){$httpv = "HTTP/1.1";}else{$httpv = "HTTP/1.0";}$ps = explode('?',$this->m_urlpath);$headString = '';//发送固定的起始请求头GET、Host信息if($requestType=="GET"){$headString .= "GET ".$this->m_urlpath." $httpv\r\n";}else{$headString .= "POST ".$ps[0]." $httpv\r\n";}$this->m_puthead["Host"] = $this->m_host;//发送用户自定义的请求头if(!isset($this->m_puthead["Accept"])){$this->m_puthead["Accept"] = "*/*";}if(!isset($this->m_puthead["User-Agent"])){$this->m_puthead["User-Agent"] = "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2)";}if(!isset($this->m_puthead["Refer"])){$this->m_puthead["Refer"] = "http://".$this->m_puthead["Host"];}foreach($this->m_puthead as $k=>$v){$k = trim($k);$v = trim($v);if($k!=""&&$v!=""){$headString .= "$k: $v\r\n";}}fputs($this->m_fp, $headString);if($requestType=="POST"){$postdata = "";if(count($ps)>1){for($i=1;$i<count($ps);$i++){$postdata .= $ps[$i];}}else{$postdata = "OK";}$plen = strlen($postdata);fputs($this->m_fp,"Content-Type: application/x-www-form-urlencoded\r\n");fputs($this->m_fp,"Content-Length: $plen\r\n");}//发送固定的结束请求头//HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束if($httpv=="HTTP/1.1"){fputs($this->m_fp,"Connection: Close\r\n\r\n");}else{fputs($this->m_fp,"\r\n");}if($requestType=="POST"){fputs($this->m_fp,$postdata);}//获取应答头状态信息$httpstas = explode(" ",fgets($this->m_fp,256));$this->m_httphead["http-edition"] = trim($httpstas[0]);$this->m_httphead["http-state"] = trim($httpstas[1]);$this->m_httphead["http-describe"] = "";for($i=2;$i<count($httpstas);$i++){$this->m_httphead["http-describe"] .= " ".trim($httpstas[$i]);}//获取详细应答头while(!feof($this->m_fp)){$line = trim(fgets($this->m_fp,256));if($line == ""){break;}$hkey = "";$hvalue = "";$v = 0;for($i=0;$i<strlen($line);$i++){if($v==1){$hvalue .= $line[$i];}if($line[$i]==":"){$v = 1;}if($v==0){$hkey .= $line[$i];}}$hkey = trim($hkey);if($hkey!=""){$this->m_httphead[strtolower($hkey)] = trim($hvalue);}}//如果连接被不正常关闭,重试if(feof($this->m_fp)){if($this->reTry > 10){return FALSE;}$this->PrivateStartSession($requestType);}//判断是否是3xx开头的应答if(preg_match("/^3/",$this->m_httphead["http-state"])){if($this->JumpCount > 3){return;}if(isset($this->m_httphead["location"])){$newurl = $this->m_httphead["location"];if(preg_match("/^http/i",$newurl)){$this->JumpOpenUrl($newurl);}else{$newurl = $this->FillUrl($newurl);$this->JumpOpenUrl($newurl);}}else{$this->m_error = "无法识别的答复!";}}}/***  获得一个Http头的值** @access    public* @param     string    $headname   头文件名称* @return    string*/function GetHead($headname){$headname = strtolower($headname);return isset($this->m_httphead[$headname]) ? $this->m_httphead[$headname] : '';}/***  设置Http头的值** @access    public* @param     string   $skey  键* @param     string   $svalue  值* @return    string*/function SetHead($skey,$svalue){$this->m_puthead[$skey] = $svalue;}/***  打开连接** @access    public* @return    bool*/function PrivateOpenHost(){if($this->m_host==""){return FALSE;}$errno = "";$errstr = "";$this->m_fp = @fsockopen($this->m_host, $this->m_port, $errno, $errstr,10);if(!$this->m_fp){$this->m_error = $errstr;return FALSE;}else{return TRUE;}}/***  关闭连接** @access    public* @return    void*/function Close(){@fclose($this->m_fp);}/***  补全相对网址** @access    public* @param     string   $surl  需要不全的地址* @return    string*/function FillUrl($surl){$i = 0;$dstr = "";$pstr = "";$okurl = "";$pathStep = 0;$surl = trim($surl);if($surl==""){return "";}$pos = strpos($surl,"#");if($pos>0){$surl = substr($surl,0,$pos);}if($surl[0]=="/"){$okurl = "http://".$this->HomeUrl.$surl;}else if($surl[0]=="."){if(strlen($surl)<=1){return "";}else if($surl[1]=="/"){$okurl = "http://".$this->BaseUrlPath."/".substr($surl,2,strlen($surl)-2);}else{$urls = explode("/",$surl);foreach($urls as $u){if($u==".."){$pathStep++;}else if($i<count($urls)-1){$dstr .= $urls[$i]."/";}else{$dstr .= $urls[$i];}$i++;}$urls = explode("/",$this->BaseUrlPath);if(count($urls) <= $pathStep){return "";}else{$pstr = "http://";for($i=0;$i<count($urls)-$pathStep;$i++){$pstr .= $urls[$i]."/";}$okurl = $pstr.$dstr;}}}else{if(strlen($surl)<7){$okurl = "http://".$this->BaseUrlPath."/".$surl;}else if(strtolower(substr($surl,0,7))=="http://"){$okurl = $surl;}else{$okurl = "http://".$this->BaseUrlPath."/".$surl;}}$okurl = preg_replace("/^(http:\/\/)/i","",$okurl);$okurl = preg_replace("/\/{1,}/", "/", $okurl);return "http://".$okurl;}
}//End Class

转载于:https://www.cnblogs.com/fyy-888/p/5104705.html

相关文章:

javascript中关于this指向问题详解

前 言 LiuDaP 在前端的学习中&#xff0c;我们必然要用到js&#xff0c;js可以说是前端必不可少的的东西。在学习js的过程中&#xff0c;我们会经常用到this这个东西&#xff0c;而this的指向问题就变得尤为重要。今天正好有空闲时间&#xff0c;就给大家详细介绍一下js中关于…

mpvue 转uniapp 导航栏样式错乱问题修复 tabbar 样式修复

效果图&#xff1a;修改前&#xff0c;修改后 找了半天没找到原因&#xff0c;只能自己改样式了&#xff0c;下面是样式代码&#xff08;在app.vue 里面加上就行&#xff09; <style>/*每个页面公共css */uni-tabbar {box-sizing: border-box;position: fixed;left: 0;bo…

css规则_CSS规则,将使您的生活更轻松

css规则by Nick Gard尼克加德(Nick Gard) CSS规则&#xff0c;将使您的生活更轻松 (CSS rules that will make your life easier) After years of writing and maintaining a couple of very large web projects and numerous smaller ones, I have developed some heuristics…

在mybatis中模糊查询有三种写法

<select id"selectStudentsByName" resultType"Student"> <!--第一种--> <!-- select id,name,age,score from student where name like % #{0} % --> <!--第二种--> <!-- select id,name,age,score from student wher…

BZOJ 3566: [SHOI2014]概率充电器

题目&#xff1a;http://www.lydsy.com/JudgeOnline/problem.php?id3566 首先这题正着想不好想&#xff0c;考虑补集转化。 先dfs一遍&#xff0c;令f[u](1-p[u])*∏(1-(1-f[v])*w) f[u]表示u这个点通过其子树并不能联通的概率。 然后考虑v从其父亲连过来的情况&#xff0c;设…

小程序云开发,订阅消息定时批量发送实现代码

需求&#xff1a;做一个类似抽奖结果通知的订阅消息提醒 实现流程&#xff1a; 每个用户需要先授权订阅消息接收&#xff0c;授权成功后把数据存到云开发的数据集合里面&#xff0c;再写个定时器&#xff0c;遍历数据集合的所有数据&#xff0c;拿到后遍历发送订阅消息&#…

机器学习速成课程

Learn the basics of machine learning and data science in this crash course tutorial for beginners from AI Sciences Academy. This course will give you the foundation you need to start learning more advanced material.在此速成课程教程中为AI Sciences Academy的…

H5 画布解决跨域问题,画布保存为图片显示在页面上

实现功能&#xff1a;uniapp H5 使用画布&#xff0c;绘画完之后保存为图片全屏显示完整实现代码&#xff0c;跨域解决方案。 跨域图片解决方案一&#xff1a;&#xff08;使用base64编码&#xff09;网络图片放到画布里面绘画 跨域图片解决方案二&#xff1a;&#xff08;使…

1、IO输入输出流 简介

IO流的分类&#xff1a; * 流向&#xff1a; * 输入流 读取数据 * 输出流 写出数据 * 数据类型&#xff1a; * 字节流 * 字节输入流 读取数据 InputStream * 字节输出流 写出数据 OutputStream * 字符流 * 字符输入流 读取数据 Reader * 字符输出流 写出数据 Writer * * 注意&…

mern技术栈好处?_通过构建运动追踪器应用程序来学习MERN堆栈(MERN教程)

mern技术栈好处?The MERN stack is a popular stack of technologies for building a modern single-page application. In this video course I developed, you will learn the MERN stack by building a full stack exercise tracker application.MERN堆栈是用于构建现代单页…

使用html5进行视频播放

一直以来网页大多是使用 flash 来播放视频。在目前唱衰 flash 的环境下&#xff0c;HTML5 为我们带来了一个网页内视频播放的解决方案—— <video>标签。 在HTML5 中&#xff0c;可以通过HTML标签“audio”和“video”来支持嵌入式的媒体&#xff0c;使开发者能够方便地将…

Linux学习之系统时间同步

一、系统时间的设置 在Linux中设置系统时间&#xff0c;可以用date命令&#xff1a; 1 //查看时间 2 [rootlocalhost ~]# date 3 2008年 12月 12日 星期五 14:44:12 CST 4 //修改时间 5 [rootlocalhost ~]# date --set "1/1/09 00:01" < &#xff08;月/日/年时:分…

uniapp(一) 项目架构,封装

前言&#xff1a; 最近需要搭建一套基于uniapp 的代码模板&#xff0c;适应各平台的快速打包部署&#xff0c;为提高代码复用率&#xff0c;提升生产力&#xff0c;所以需要构建一套优雅的前端项目架构&#xff0c;下面分享记录一下我的封装。 代码封装我暂时分为三个层面&…

linux下安装sbt_如何在Linux上安装SBT

linux下安装sbt介绍 (Introduction) Hi! I am Sanjula, and in this guide I hope to teach you how to install sbt on Linux.嗨&#xff01; 我是Sanjula &#xff0c;我希望在本指南中教您如何在Linux上安装sbt。 Let’s get started!让我们开始吧&#xff01; 什么是sbt&…

switch...case结构

/**switch(要判断的数据){ case 值1: ...;break; case 值2: ...;break; case 值3: ...;break; default: ...;break;}*/ publuc class employee{ int id; // 员工id String name; // 员工名字 int age; // 员工年龄 String phone; // 员工号码 String address; // 员工地址 publ…

自动布局的 弊端 (后续)

自动布局 比 直接写 frame 会慢很多 具体以后再说吧转载于:https://www.cnblogs.com/Ionatan/p/5109211.html

uniapp H5 JSSDK封装使用

先看效果吧, 封装以后使用很方便,两行代码就能得到微信网页开发中的 jssdk 的 wx.config 执行了 wx.ready 还是 wx.error ,如果返回 true 就标识执行了ready , 可以调用JSSDK的 API 了,如下图示例代码: this.$common.Init.call(this); this.wxjssdkInti().then(rr =>…

课程表美化 css_通过这门11小时的免费课程学习HTML和CSS

课程表美化 cssHTML and CSS are essential skills to have for a career in web development. This eleven hour course from John Smilga of Coding Addict will teach HTML and CSS from the scratch. By the end of this course you will be creating your own projects.HT…

JavaScript创建对象的两种方法和遍历对象的属性

创建新对象有两种不同的方法&#xff1a; 定义并创建对象的实例使用函数来定义对象&#xff0c;然后创建新的对象实例1.定义并创建对象的实例 var personnew Object(); person.firstname"John"; person.lastname"Doe"; person.age50; person.eyecolor"…

微信公众号H5订阅消息开发 uniapp订阅消息

简单说一下流程&#xff1a; 在页面带参数跳转到 https://mp.weixin.qq.com/mp/subscribemsg &#xff0c;然后用户授权确认或者取消以后&#xff0c;会返回参数里面 redirect_url 的地址&#xff0c;并且带上openid 等相应参数&#xff0c;在前端的页面判断链接里面携带了相关…

GRUB密码设置

通过编辑GRUB启动参数可以轻松的进入单用户模式从而修改root密码&#xff0c;GRUB的密码设置可分为全局密码和菜单密码。 一&#xff0c;全局密码设置 在splashimage这个参数的下一行可以加上password密码&#xff0c;保存后重新启动计算机&#xff0c;再次登录到GRUB菜单页…

国内使用dropbox_通过创建费用管理器来学习使用Dropbox API

国内使用dropboxLearn how to build an expense organizer with the Dropbox API and JavaScript ES6! 了解如何使用Dropbox API和JavaScript ES6构建费用管理器&#xff01; Dropbox is a content and collaboration platform. Its API allows you to add Dropbox features t…

【洛谷 1345】 奶牛的电信

以前刷试炼场时根本不会的 现在 一眼最小割&#xff01; #include <cstdio> #include <cstring> #include <algorithm> #include <iostream> #define MAXN 1000000 #define INF 100000000 using namespace std; int n,m,s,t; int tot2,g[MAXN],num[MAX…

uniapp兼容H5和小程序订阅消息授权开发封装,使用方便

本文使用 uniapp 框架开发&#xff0c;因为H5的订阅消息和小程序的订阅消息的授权流程不一样&#xff0c;但是很多地方需要使用授权&#xff0c;所以我封装了一个兼容H5和小程序订阅消息授权的方法&#xff0c;使用比较方便&#xff0c;希望能够帮助到你&#xff0c;实测可用。…

dma工作时cpu工不工作_CPU如何工作?

dma工作时cpu工不工作CPU, also known as the microprocessor is the heart and/or brain of a computer. Lets Deep dive into the core of the computer to help us write computer programs efficiently.CPU&#xff0c;也称为微处理器&#xff0c;是计算机的心脏和/或大脑。…

pymsql学习笔记

pymsql学习笔记 1. 执行SQL #!/usr/bin/env python # -*- coding:utf-8 -*- import pymysql# 创建连接 conn pymysql.connect(host127.0.0.1, port3306, userroot, passwd123, dbt1) # 创建游标 cursor conn.cursor()# 执行update&#xff0c;并返回收影响行数, print(effect…

UITextView高度根据内容变化

1. 添加内容变化的通知响应事件&#xff1a;[[NSNotificationCenter defaultCenter] addObserver:self selector:selector(textChanged:) name: UITextViewTextDidChangeNotification object:nil]; 2 实现方法 - (void)textChanged:(NSNotification *)notification{ …

一:搭建一套免费的serverless网站

因为公司需求&#xff0c;需要快速搭建一个公司内部能够访问的资源查看管理的 PC网站&#xff0c;因为没有服务器&#xff0c;没有后端开发&#xff0c;又要快速上线使用&#xff0c;那么 serverless 就成了我的首选方案&#xff0c;下面从零开始搭建。 步骤一. 准备工作 1. …

monorepo_Monorepo开发的要点

monorepoThe word monorepo is a combination between “mono”, as in the Greek word mnos (in translation, alone) and an abbreviation of the word repository. A simple concept if taken verbatim: one lonely repository. The domain is software engineering so we’…

记一次 HTTP信息头管理器使用 的重要性

今天在测试中遇到了一个问题 使用JMeter时请求相关地址参数及方法都填写正确&#xff0c;但是相应数据返回始终不对&#xff0c;例如 查看取样器结果显示 200 正常&#xff0c;但响应数据不符合正常的结果。 经反复检查发现问题如下&#xff1a; 1&#xff09;没有添加HTTP信息…