本帖最后由 WHY 于 2018-12-17 18:21 编辑
zgshige.bat- @echo off
- echo Wait ...
- set "htmlPath=E:\zgshige"
- dir /b /s "%htmlPath%\*.html" > List.txt
- PowerShell -exec ByPass -f "zgshige.PS1" List.txt
- del List.txt
- echo Done!
- pause
复制代码 zgshige.ps1- param([String]$ListFile);
- Add-Type -AssemblyName System.Web;
-
- forEach ($file In (type $ListFile -ReadCount 0)) {
- $str = [IO.File]::ReadAllText($file, [Text.Encoding]::UTF8);
- $arr = $str -split '<div (?:class="text-center b-b b-2x b-lt|id="content|class="m-lg font14|class="p-sm)">';
- If ($arr.Count -eq 5) {
- $s = $arr[1] + '<br />' + $arr[2] + '<br />';
- $s += $arr[3] -replace '[\u4E00-\u9FFF\p{P}](?=<(?!/?br)|\r?\n)', '$&<br />'; #中文及标点后面加<br />
- $s = $s -replace '<h3>(?!TTT)', '$&TTT'; #标题前面加 TTT
- $s = $s -replace '<!--(?:(?!-->)[\s\S])*-->'; #删除注释标签
- $s = $s -replace '</?br>|<br/>', '<br />'; #替换<br>、</br>、<br/>
- $s = $s -replace '</?(?!br|h3|[^a-z])[^>]*>', ''; #删除其它标签,保留换行标签和h3标签
- $s =[regex]::Replace($s, '&[^&;]+;',{param($a); [Web.HttpUtility]::HtmlDecode($a.Value)}); #替换html转义字符
- $s = $s -replace '^\s+';
- $s = $s -replace '(?:<br />\s*){2,}', '<br /><br />';
- $s = $s -replace '\s+', ' ';
- $name = ($file -split '\\')[-2] + '.Log';
- [IO.File]::AppendAllText($name, $s + "`r`n", [Text.Encoding]::UTF8);
- }
- }
复制代码
|