批处理之家 - Powered by Discuz! Board

use utf8;
use Encode;
use JE;
use Modern::Perl;
use File::Slurp;
use Mojo::UserAgent;
use Win32::Unicode::File;
use JSON qw/from_json to_json/;
STDOUT->autoflush(1);
my $JE = new JE;
my $ua = Mojo::UserAgent->new();
init_ua();
my %headers;
init_headers(\%headers);
# 这里放准备下载的 keyword 清单
my @list = qw/
ph60e6662d3221e
ph5dc9ba7f1b...
/;
grep { getVideo($_) } @list;
sub getVideo
{
my $viewkey = shift;
my $url = "https://cn.pornhub.com/view_video.php?viewkey=${viewkey}";
my $res = $ua->get( $url )->result;
#print $html->body;
my $js = $res->dom->at(".video-wrapper script")->all_text;
my $id = $res->dom->at("#player")->attr("data-video-id");
my $title = $res->dom->at("title")->text; #unicode
$title =~s/\s+- Pornhub\.com//i;
$title =~s/[\\\/:*?"<>|]/ /g; #替换部分windows文件名不支持的字符
printf "%s %s\n", $id, gbk($title);
my $file = "E:/迅雷下载/${viewkey} ${title}.mp4";
if ( file_type( 'e' => $file ) ) {
printf "%s: file already exists\n", $viewkey;
return;
}
write_file("src.js", $js);
$JE->eval( $js );
my $x = $JE->value;
# 第三个节点对应 720P，->value 转换为Perl字符串
my $videolink = $x->{"qualityItems_${id}"}[2]{"url"}->value;
say $videolink;
my $tx = $ua->get( $videolink );
print $tx->error ? "\nDownloading failed: ".$tx->error->{message} : "\nDownloading finished!\n";
my $fh = Win32::Unicode::File->new("wb", $file);
$fh->write( $tx->result->body );
$fh->close();
}
sub init_headers
{
my $ref = shift;
%$ref =
(
'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng',
'accept-encoding' => 'gzip, deflate, br',
'accept-language' => 'zh-CN,zh;q=0.9,zh-TW;q=0.8',
'user-agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
);
}
sub init_ua
{
$ua = $ua->max_redirects(5);
$ua = $ua->connect_timeout(10);
# Proxy server to use for HTTPS and WebSocket requests.
# https://docs.mojolicious.org/Mojo/UserAgent/Proxy#https
$ua->proxy->https("http://sri:secret\@127.0.0.1:10809")->http("http://sri:secret\@127.0.0.1:10809");
# 代理的具体地址，在代理工具设置、菜单中寻找
# 进度显示
$ua->on(start => sub {
my ($ua, $tx) = @_;
$tx->req->once(finish => sub {
$tx->res->on(progress => sub {
state $prev = 0;
my $msg = shift;
return unless my $len = $msg->headers->content_length;
my $size = $msg->content->progress;
my $progress = int($size / ($len / 100));
if ( $progress ne $prev ) {
printf "%d%%\t", $progress;
}
$prev = $progress;
});
});
});
}
sub gbk { encode('gbk', $_[0]) }
sub utf8 { encode('utf8', $_[0]) }
sub u2gbk { encode('gbk', decode('utf8', $_[0])) }
sub uni { decode('utf8', $_[0]) }

复制代码

=info
Author: 523066680
Date: 2021-11
P站m3u8视频片段批量下载+合并
11月更新后不再出现直接的视频地址，所以只能从m3u8清单下载了。
需要用到的重点模块：
Mojolicious
JE
Win32::Unicode:File
需要有第三方命令工具：
ffmpeg.exe
=cut
use utf8;
use Encode;
use JE;
use Modern::Perl;
use File::Slurp;
use Mojo::UserAgent -signatures;
use Win32::Unicode::File;
use JSON qw/from_json to_json/;
STDOUT->autoflush(1);
my $JE = new JE;
my $ua = Mojo::UserAgent->new()->with_roles('+Queued');
my $loop = Mojo::IOLoop->singleton;
$loop = $loop->max_accepts(3);
$loop = $loop->max_connections(3);
$ua->max_active(5); # 最大在线同时请求数量
init_ua();
my %headers;
init_headers(\%headers);
my @list = qw/
ph61668f9c9____视频ID1
ph61668f9c9____视频ID2
/;
grep { getVideo($_) } @list;
sub getVideo
{
my $viewkey = shift;
my $url = "https://cn.{P站}.com/view_video.php?viewkey=${viewkey}";
my ( $title, $major, $m3u8_content ) = get_m3u8( $url );
my $cache = gbk("E:/temp/ts");
my $file = "E:/temp/${viewkey} ${title}.mp4"; #unicode
if ( file_type( 'e' => $file ) ) {
printf "%s: file already exists\n", $viewkey;
return;
}
mkdir $cache unless -e $cache;
my $buff = ""; #保存清单，用于ffmpeg合成视频
for my $e ( grep { /^seg/ } split(/\r?\n/, $m3u8_content) )
{
$e=~/(seg.*\.ts)/;
$buff .= "file ${1}\r\n";
my $tsfile = $cache ."/". $1;
# 考虑个别片段下载失败的情况，临时判断是10240byte.
# 最好是用 head 请求获取文件的实际大小做判断
#unless ( -e $tsfile and ( (-s $tsfile) > 10240 ) ) {
$ua->get( $major . $e, closure->($tsfile) );
#} else {
# printf "File already exists: %s\n", $e;
#}
}
$loop->start unless $loop->is_running;
write_file( $cache ."/". "content.txt", $buff);
if ( -e "$cache/temp.mp4" )
{
printf "temp.mp4 already exists ? \n";
unlink "$cache/temp.mp4" if -e "$cache/temp.mp4";
}
system("ffmpeg -y -f concat -i \"$cache/content.txt\" -vcodec h264_nvenc -vb 2M \"$cache/temp.mp4\"");
moveW decode('gbk', "$cache/temp.mp4"), $file or warn $!;
# 基于某种原因，temp.mp4可能没有移除？下载多个视频时，前面视频的ffmpeg操作被忽略，直接进行了 moveW
# 可能是判断问题或者上一次中断导致没有删除temp.mp4
# if ( ! -e "$cache/temp.mp4" ) {
# system("ffmpeg -y -f concat -i \"$cache/content.txt\" -vcodec h264_nvenc -vb 2M \"$cache/temp.mp4\"");
# }
# moveW decode('gbk', "$cache/temp.mp4"), $file or warn $!;
}
sub get_m3u8
{
my ($url) = @_;
my $res = $ua->get( $url )->result;
#print $html->body;
my $js = $res->dom->at(".video-wrapper #player script")->all_text;
my $id = $res->dom->at("#player")->attr("data-video-id");
my $title = $res->dom->at("title")->text; #unicode
$title =~s/\s+- P...hub\.com//i;
$title =~s/[\\\/:*?"<>|]/ /g; #替换部分windows文件名不支持的字符
printf "%s %s\n", $id, gbk($title);
# 获取 m3u8 实际地址
$JE->eval( $js );
# media_3 对应 720P，->value 转换为Perl字符串
my ($master_m3u8_url) = $JE->{"media_3"}->value;
$res = $ua->get( $master_m3u8_url )->result;
my ($major) = ($master_m3u8_url =~ /(.+\/)[^\/]+$/); # m3u8 链接主地址末尾已经包含/
write_file("master.m3u8", $res->body);
my ($index_m3u8) = ($res->body =~ /(index.+)\s/i); # 获取索引文件
printf "%s\n", $major . $index_m3u8; # 主地址+尾部详细地址
$res = $ua->get( $major . $index_m3u8 )->result;
write_file("index.m3u8", $res->body);
return ($title, $major, $res->body);
}
sub closure ($file)
{
return
sub ($ua, $tx) {
printf "%s\n", $file;
if ( $tx->result->is_success ) { # 如果失败则不写入文件
write_file( $file, {binmode=>":raw"}, $tx->result->body );
} else {
die "Failed to get segment $file\n";
}
}
}
sub init_headers
{
my $ref = shift;
%$ref =
(
'accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-encoding' => 'gzip, deflate, br',
'accept-language' => 'zh-CN,zh;q=0.9,zh-TW;q=0.8',
'upgrade-insecure-requests' => '1',
'user-agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.82 Safari/537.36',
);
}
sub init_ua
{
$ua = $ua->max_redirects(5);
$ua = $ua->connect_timeout(10);
# 设置代理
# Proxy server to use for HTTPS and WebSocket requests.
# https://docs.mojolicious.org/Mojo/UserAgent/Proxy#https
$ua->proxy->https("http://sri:secret\@127.0.0.1:10809")->http("http://sri:secret\@127.0.0.1:10809");
#$ua->proxy(["http", "https"], "socks://127.0.0.1:1080");
}
sub gbk { encode('gbk', $_[0]) }
sub utf8 { encode('utf8', $_[0]) }
sub u2gbk { encode('gbk', decode('utf8', $_[0])) }
sub uni { decode('utf8', $_[0]) }

复制代码