卓越飞翔博客卓越飞翔博客

卓越飞翔 - 您值得收藏的技术分享站
技术文章1829本站已运行4109

PHP作的一个有声小说抓取批量下载工具

<?php
ini_set("max_execution_time", "9000");
ini_set("pcre.backtrack_limit",-1);
ini_set("pcre.recursion_limit",-1);
ignore_user_abort(true);
set_time_limit(0);
ini_set("memory_limit", "-1");
header('Content-Type: text/html; charset=UTF-8');
error_reporting(E_ALL);
ini_set("display_errors", 1);
$page_f="pageid_.txt";
if(file_exists($page_f))
{
    $page=file_get_contents($page_f);
    if($page!=""){$page=$page+1;}else{$page=1;}
}
else
{
    $page=1;
}
#这里是专辑ID
$id=10923;
#这里63是全部页面结束页
if($page>63){echo "全部下载完成";die();}
 
#下面不要改动
if(file_exists("cookie.txt")){@unlink("cookie.txt");}
$url="https://www.ysts.cc/tingshu/{$id}/?p={$page}";
$header = [
    'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language: zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6,ja;q=0.5,zh-TW;q=0.4',
    'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
    'Content-Type: application/x-www-form-urlencoded; charset=UTF-8',
    'Origin: http://www.ysts.cc',
    "Referer: https://www.ysts.cc/tingshu/{$id}/41440.html",
];
$param =array();
$res = http($url,$param,'GET',$header);
$a_rule = '/<a href="\/tingshu\/'.$id.'\/(\d+)\.html" target="_blank" title="([^"]+)">/s';
preg_match_all($a_rule, $res, $matchA);
$num = count($matchA);
if (empty($matchA[$num-1])){
    exit("未获取");
}
foreach ($matchA[0] as $k=>$v)
{
    $mid=$matchA[1][$k];
    $name=$matchA[2][$k];
    $url="https://www.ysts.cc/tingshu/{$id}/{$mid}.html";
    $param =array();
    $res = http($url,$param,'GET',$header);
    $a_rule = '/<meta name="_c" content="([^"]+)" \/>/s';
    preg_match_all($a_rule, $res, $matchB);
    $num = count($matchB);
    if (empty($matchB[$num-1])){
        exit("未获取SC");
    }
    $sc=$matchB[1][0];
    $url="https://www.ysts.cc/api/act/play";
    $header1=$header;
    $header1[]="sc:{$sc}";
    $param =array('nid'=>$id,'cid'=>$mid);
    echo "{$name}...";
    $res =json_decode(http($url,$param,'POST',$header1),true);
    $param=array();
    $m4a =http($res['url'],$param,'POST',$header1);
    @file_put_contents("m4a/".$name.".m4a",$m4a);
    echo "下载ok!\n";
}
file_put_contents($page_f,$page);
die();
function http($url, $params, $method = 'GET', $header = array(), $multi = false,$cookies=""){
    if(file_exists("cookie.txt") && $cookies=="")
    {
        $cookies=file_get_contents("cookie.txt");
    }
 
    $opts = array(
    CURLOPT_CONNECTTIMEOUT        => 15,
    CURLOPT_TIMEOUT        => 15,
    CURLOPT_RETURNTRANSFER => 1,
    CURLOPT_SSL_VERIFYPEER => false,
    CURLOPT_SSL_VERIFYHOST => false,
    CURLOPT_HTTPHEADER     => $header,
    CURLOPT_FOLLOWLOCATION=> 1,
    CURLOPT_COOKIESESSION =>1,
    CURLOPT_AUTOREFERER =>1,
    );
    if(strtoupper($method)=="GET")
    {
        $opts[CURLOPT_HEADER] =1;
    }
    if($cookies!="")
    {
        $opts[CURLOPT_COOKIE] =$cookies;
    }
    switch(strtoupper($method)){
        case 'GET':
            $opts[CURLOPT_URL] = $url . '?' . http_build_query($params);
            break;
        case 'POST':
            $params = $multi ? $params : http_build_query($params);
            $opts[CURLOPT_URL] = $url;
            $opts[CURLOPT_POST] = 1;
            $opts[CURLOPT_POSTFIELDS] = $params;
            break;
        default:
            throw new Exception('不支持的请求方式!');
    }
    $ch = curl_init();
    curl_setopt_array($ch, $opts);
    $data  = curl_exec($ch);
    $error = curl_error($ch);
    curl_close($ch);
    if($error) throw new Exception('请求发生错误:' . $error);
    if(strtoupper($method)=="GET")
    {
        preg_match('/^Set-Cookie: (.*?);/im', $data, $m);
        if(isset($m[1]))
        {
            @file_put_contents("cookie.txt",$m[1]);
        }
    }
    return  $data;
}
 
?>
卓越飞翔博客
上一篇: 易语言源码 桌面硬件信息显示
下一篇: python版的一个有声小说抓取批量下载工具

相关推荐

留言与评论(共有 0 条评论)
   
验证码:
隐藏边栏