本帖最后由 zaqmlp 于 2021-9-30 13:37 编辑
- <# :
- rem 另存为ANSI编码的bat
- cls&echo off
- cd /d "%~dp0"
- powershell -NoProfile -ExecutionPolicy bypass "[IO.File]::ReadAllText('%~f0',[Text.Encoding]::GetEncoding('GB2312'))|Invoke-Expression"
- pause
- exit
- #>
-
- $webclient=New-Object System.Net.WebClient;
- function gethtml($u){
- $t='';
- for($j=1;$j -le 3;$j++){
- try{
- $webclient.Headers.Add('User-Agent','Mozilla/5.0 (Windows NT 6.1; ) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36');
- $webclient.Encoding=[System.Text.Encoding]::UTF8;
- $t=$webclient.DownloadString($u);
- }catch{
- write-host ('第'+$j.toString()+'次获取网页内容失败');
- start-sleep -Seconds 3;
- };
- };
- return $t;
- };
-
- $outfile='.\结果.txt';
- $totalpage=0;
- $url='https://www.daodaoba.com/?s=vod-type-id-2-area-大陆-p-1.html';
- $html=gethtml $url;
- $m1=[regex]::match($html,'页次:\d+/(\d+)页');
- if($m1.Success){
- $fs=New-Object System.IO.FileStream($outfile, [System.IO.FileMode]::Create);
- $sw=New-Object System.IO.StreamWriter($fs, [Text.Encoding]::GetEncoding('GB2312'));
- $totalpage=[int]$m1.groups[1].value;
- for($i=1;$i -le $totalpage;$i++){
- write ('-----------'+$i.toString()+'/'+$totalpage.toString()+'-----------');
- $p_url=$url -replace '\d+(?=\.html)',$i.toString();
- $html=gethtml $p_url;
- $m2=[regex]::match($html,'<ul [^>]*?id="contents">([\s\S]+?)</ul>');
- if($m2.Success){
- $m3=[regex]::matches($m2.groups[1].value,'<li(?: [^>]+?)?>[\s\S]*?([^>]+)</a></h2>[\s\S]*?主演:</em>([^>]+)</p>[\s\S]*?</li>');
- foreach($it in $m3){
- $title=$it.groups[1].value;
- $cast=$it.groups[2].value;
- $sw.WriteLine($title);
- $sw.Flush();
- }
- }
- }
- $sw.Close();
- $fs.Close();
- }
复制代码
|