- <# :
- cls&echo off&cd /d "%~dp0"&mode con lines=5000&rem bat存为ANSI/GB2312编码
- path %SYSTEMROOT%\System32\WindowsPowerShell\v1.0;%path%
- powershell -NoProfile -ExecutionPolicy bypass "Get-Content -literal \"%~f0\"|Out-String|Invoke-Expression"
- pause
- exit
- #>
- $folder1="D:\大藏经修改\梵文";
- $folder2="D:\大藏经修改\epub解包后\F1071 釋教最上乘秘密藏陀羅尼集\OEBPS\juans";
- if(-not (test-path -literal $folder1)){write-host ('"'+$folder1+'" path error or not exist');exit;}
- if(-not (test-path -literal $folder2)){write-host ('"'+$folder2+'" path error or not exist');exit;}
- $enc=New-Object System.Text.UTF8Encoding $False;
- $files=@(dir -literal $folder1|?{('.xhtml' -eq $_.Extension) -and ($_ -is [System.IO.FileInfo])});
- if($files.length -ge 1){
- for($i=0;$i -lt $files.length;$i++){
- write-host $files[$i].FullName -ForegroundColor yellow;
- $arr=New-Object -TypeName System.Collections.ArrayList;
- $text1=[IO.File]::ReadAllText($files[$i].FullName, $enc);
- $m1=[regex]::matches($text1, 'class="text_3">([^<]+)');
- $m2=[regex]::match($files[$i].Name, '(?i)_([0-9]+\.xhtml)$');
- if($m2.Success){
- $juansfile=$folder2.trimend('\')+'\'+$m2.groups[1].value;
- if(test-path -literal $juansfile){
- write-host $juansfile -ForegroundColor yellow;
- $global:n=0
- $text2=[IO.File]::ReadAllText($juansfile, $enc);
- $text2=[regex]::replace($text2, 'class="text_3">([^<]+)', {
- param($m3);
- $str=$m3.groups[0].value;
- if($global:n -lt $m1.count){
- if($m3.groups[1].value.Contains($m1[$global:n].groups[1].value)){
- $str=$m1[$global:n].groups[0].value;
- [void]$arr.add('');
- write-host ($m1[$global:n].groups[1].value+' --> '+$m3.groups[1].value);
- }else{
- [void]$arr.add($m1[$global:n].groups[1].value);
- }
- }
- $global:n++;
- return $str;
- });
- [IO.File]::WriteAllText($juansfile, $text2, $enc);
- }else{write-host ('"'+$juansfile+'" not exist');}
- }
- for($j=$global:n;$j -lt $m1.count;$j++){
- [void]$arr.add($m1[$j].groups[1].value);
- }
- $outfile=$folder1.trimend('\')+'\'+$files[$i].BaseName+'.txt';
- [IO.File]::WriteAllLines($outfile, $arr, $enc);
- write-host '';
- }
- }else{write-host ('no xhtmlfile in "'+$folder1+'"');}
复制代码
|