批处理之家 - Powered by Discuz! Board

:loop
set /a page+=1
wfr %page%.html /any /force /encin:utf-8 /encout:gbk>nul
findstr /i "index\.php\?p=.*下一页" %page%.html >nul&&(
wget -O %page%.html "http://lifeweeker.dooland.com/index.php?p=%page%"
goto loop
)

复制代码

setlocal enabledelayedexpansion
for /f "tokens=3,6 delims== " %%i in ('findstr /ib "<a.href=.http:\/\/www\.dooland\.com\/magazine.*三联生活周刊" 1.html') do (
set issue="%%k
set "issue=!issue:~1,-1!"
md html\!issue! 2>nul
wget -nv -O html\!issue!\%%~ni %%i
)

复制代码

for /f "tokens=3 delims== " %%i in ('findstr /ib "<h2>.*article_.*title" html\11年第18期\21422') do (
wget -nv -O html\11年第18期\%%~ni.html "http://www.dooland.com/magazine/%%~i"
)

复制代码

md txt 2>nul
(for %%i in (html\11年第18期\*.txt) do (
(
for /f "delims=: tokens=2*" %%j in ('findstr /ic:"TITLE : 三联生活周刊:" %%i') do (
echo %%k&echo.
)
findstr /ib "　　" %%i
echo.)
))>txt\11年第18期.txt

复制代码

@echo off
title 读览天下网站部分杂志精选文章整理脚本
setlocal enabledelayedexpansion
:: 网址严格区分大小写
:: 即使没有账号密码，完整的正文内容仍然被嵌在网页源文件中，只不过采用css限制了完整内容的阅览
:: 所以，无需使用账号即可阅览完整的精选文章
set www=http://www.dooland.com/magazine
:Main
cls
title 读览天下杂志精选文章下载脚本
echo.&echo.
echo 选择每个类别前的数字序号
echo 将进入该类别的具体分类进行指定杂志精选文章的下载
echo 每次只能选择一个类别
echo 请勿输入错误的格式，否则，将引发不可预知的错误
echo.
echo ==============================================================
echo.
set num=0
set ConfigDir=config
for %%i in (%ConfigDir%\*.txt) do (
set /a num+=1
set /a mod=!num!%%3
set /p=!num!.%%~ni <nul
if !mod! equ 0 echo.&echo.
)
echo.
echo ==============================================================
echo.
set ClassID=
set /p ClassID= 请选择类别（1/2/3/……）：
if not defined ClassID goto Main
set Class=
set ClassConfig=
set num=0
for %%i in (%ConfigDir%\*.txt) do (
set /a num+=1
if "!num!"=="%ClassID%" (
set Class=%%~ni
set ClassConfig=%%i
)
)
cls
title 准备整理 %Class% 分类下的精选文章
echo.
echo 选择每个项目前的数字编号
echo 可以下载该杂志所有期别的精选文章
echo 可以多选，但是必须以空格分隔
echo 多选时不用考虑先后次序
echo 0与其他选项组合时会造成重复下载
echo 已经整理过的期别不会重复整理（即保留整理进度）
echo 请勿输入错误的格式，否则会引发不可预知的错误
echo.
echo ==============================================================
echo.
set num=0
for /f "tokens=1-3" %%i in (%ClassConfig%) do (
set /a num+=1
set /a mod=!num!%%3
if !num! leq 9 (
set /p=%%i.%%j <nul
if !mod! equ 0 echo.&echo.
)
)
if %num% gtr 9 (
echo.
echo 更多杂志的编号，请按照打开的文本文件中的内容进行输入
start "" %ClassConfig%
)
echo.&echo.
set /p= 0.前面的所有杂志返回上一步请直接回车<nul
echo.
echo ==============================================================
echo.
set MagazineID=
set /p MagazineID= 请输入选择代码（0/1/2/3……）：
if not defined MagazineID goto Main
for %%i in (%MagazineID%) do (
for /f "tokens=1-3" %%j in (%ClassConfig%) do (
set Magazine=%%k
set UrlIndex=%%l
if "%%i"=="0" (
call :DownArticle
) else (
if "%%i"=="%%j" (
call :DownArticle
)
)
)
)
pause
goto Main
:DownArticle
cls
md %Class%\%Magazine%\html 2>nul
del /a /f /q %Class%\%Magazine%\html\*.* 2>nul
set page=0
:DownPages
set /a page+=1
title 正在下载《%Magazine%》期别索引页中的第 %page% 页
wget -nv -O %Class%\%Magazine%\html\%page% "%UrlIndex%?p=%page%"
:: 下载所有的索引页
wfr %Class%\%Magazine%\html\%page% /any /force /encin:utf-8 /encout:gbk>nul
findstr /i "index\.php\?p=.*下一页" "%Class%\%Magazine%\html\%page%">nul&&(
goto DownPages
)
:: 需要防止文章标题中含空格的情况
:: 已经生成了txt的期别不再下载
:: 如果在期别一栏没有数据，则忽略该期的下载，否则，脚本将无法运行下去
for %%i in (%Class%\%Magazine%\html\*.*) do (
for /f "tokens=3,6 delims== " %%j in ('findstr /ib "<h1><a.href=.http:\/\/www\.dooland\.com\/magazine.*%Magazine%" %%i') do (
set issue="%%k
set "issue=!issue:~1,-1!"
if defined issue (
md %Class%\%Magazine%\html\!issue! 2>nul
if not exist %Class%\!Magazine!\txt\!issue!.txt (
del /a /f /q %Class%\%Magazine%\html\!issue! 2>nul
title 正在下载《%Magazine%》!issue! 的文章列表页面
wget -nv -O %Class%\%Magazine%\html\!issue!\%%~nj %%j
wfr %Class%\%Magazine%\html\!issue!\%%~nj /any /force /encin:utf-8 /encout:gbk
call :GetHtml !issue! %Class%\%Magazine%\html\!issue!\%%~nj
if defined article call :GetTxt !issue!
)
)
)
for /f "tokens=3,6 delims== " %%j in ('findstr /ib "<a.href=.http:\/\/www\.dooland\.com\/magazine.*%Magazine%" %%i') do (
set issue="%%k
set "issue=!issue:~1,-1!"
if defined issue (
md %Class%\%Magazine%\html\!issue! 2>nul
if not exist %Class%\!Magazine!\txt\!issue!.txt (
del /a /f /q %Class%\%Magazine%\html\!issue! 2>nul
title 正在下载《%Magazine%》!issue! 的文章列表页面
wget -nv -O %Class%\%Magazine%\html\!issue!\%%~nj %%j
wfr %Class%\%Magazine%\html\!issue!\%%~nj /any /force /encin:utf-8 /encout:gbk
call :GetHtml !issue! %Class%\%Magazine%\html\!issue!\%%~nj
if defined article call :GetTxt !issue!
)
)
)
)
:: 若需要保留网页文件，请把下一条语句注释掉或删掉
rd /q /s %Class%\%Magazine%\html 2>nul
cls
title 处理完毕
echo 处理完毕
goto :eof
:GetHtml
cls
title 正在下载《%Magazine%》%issue% 的精选文章html数据
set "article="
for /f "tokens=3 delims== " %%i in ('findstr /ib "<h2>.*article_.*title" %2') do (
set article=exist
wget -nv -O %Class%\%Magazine%\html\%issue%\%%~ni.html "%www%/%%~i"
)
goto :eof
:GetTxt
cls
title 正在转换《%Magazine%》%issue% 的数据格式
htox32c /ip /o0 %Class%\%Magazine%\html\%issue%\*.html
wfr %Class%\%Magazine%\html\%issue%\*.txt /any /force /encin:utf-8 /encout:gbk
cls
title 正在生成《%Magazine%》%issue% 的文本文件
echo.&echo 正在生成《%Magazine%》%issue% 的文本文件
echo.&echo 请稍候...
md %Class%\%Magazine%\txt 2>nul
(for %%i in (%Class%\%Magazine%\html\%issue%\*.txt) do (
(
for /f "delims=: tokens=2*" %%j in ('findstr /ic:"TITLE : %Magazine%:" %%i') do (
echo %%k&echo.
)
findstr /ib "　　" %%i
echo.)
))>%Class%\%Magazine%\txt\%issue%.txt
del /a /f /q %Class%\%Magazine%\html\%issue%\*.txt 2>nul
goto :eof

复制代码