批处理之家 - Powered by Discuz! Board

@echo off
set info=互助互利,支付宝扫码头像,感谢赞助
rem 有问题,可加QQ956535081及时沟通
title %info%
cd /d "%~dp0"
powershell -NoProfile -ExecutionPolicy bypass ^
$file1='源文件.txt';^
$file2='统计结果.txt';^
$range='2-7';^
$dic=New-Object 'System.Collections.Generic.Dictionary[string,int]';^
$text1=[IO.File]::ReadAllText($file1,[Text.Encoding]::Default);^
$r=$range.split('-');^
$m=[regex]::matches($text1, '[\u4E00-\u9FA5]');^
for($i=[int]$r[0];$i -le [int]$r[1];$i++){^
for($j=0;$j -le ($m.count-$i);$j++){^
$s='';^
for($k=$j;$k -lt ($j+$i);$k++){^
$s+=$m[$k].groups[0].value;^
};^
if(-not $dic.ContainsKey($s)){^
$dic.add($s,1);^
}else{^
$dic[$s]++;^
};^
};^
};^
[System.Collections.ArrayList]$result=@();^
foreach($it in $dic.Keys){^
if($dic[$it] -ge 2){^
$t=$it+' '+$dic[$it];^
write-host $t;^
[void]$result.add($t);^
};^
};^
[IO.File]::WriteAllLines($file2, $result, [Text.Encoding]::Default);
echo;%info%
pause

复制代码

@echo off
set info=互助互利,支付宝扫码头像,感谢赞助
rem 有问题,可加QQ956535081及时沟通
title %info%
cd /d "%~dp0"
powershell -NoProfile -ExecutionPolicy bypass ^
$file1='源文件.txt';^
$file2='统计结果.txt';^
$range='2-7';^
$dic=New-Object 'System.Collections.Generic.Dictionary[string,int]';^
$text1=[IO.File]::ReadAllLines($file1,[Text.Encoding]::Default);^
$r=$range.split('-');^
for($i=0;$i -lt $text1.count;$i++){^
$line=$text1[$i].trim();^
if($line -ne ''){^
$arr=$line -split '[^^\u4E00-\u9FA5]+';^
for($j=0;$j -lt $arr.length;$j++){^
for($k=[int]$r[0];$k -le [int]$r[1];$k++){^
for($a=0;$a -le ($arr[$j].length-$k);$a++){^
$s='';^
for($b=$a;$b -lt ($k+$a);$b++){^
$s+=$arr[$j][$b];^
};^
if(-not $dic.ContainsKey($s)){^
$dic.add($s,1);^
}else{^
$dic[$s]++;^
};^
};^
};^
};^
};^
};^
[System.Collections.ArrayList]$result=@();^
foreach($it in $dic.Keys){^
if($dic[$it] -ge 2){^
$t=$it+' '+$dic[$it];^
write-host $t;^
[void]$result.add($t);^
};^
};^
[IO.File]::WriteAllLines($file2, $result, [Text.Encoding]::Default);
echo;%info%
pause

复制代码

<#*,:&cls
@echo off
pushd "%~dp0"
Powershell -NoProfile -ExecutionPolicy RemoteSigned -Command ". ([ScriptBlock]::Create((Get-Content -LiteralPath \"%~0\" -ReadCount 0 | Out-String ))) "
popd
pause
exit /b
#>
$FileList = "源文件.txt"
$FileOut = "词频统计.txt"
function Get-WordCount {
[CmdletBinding()]
param (
[Parameter(Mandatory = $true, Position = 0, ValueFromPipeline = $true, ValueFromPipelineByPropertyName = $true)]
[AllowEmptyCollection()]
[AllowEmptyString()]
[AllowNull()]
[string[]]$Sentences,
[Parameter(Mandatory = $false, Position = 1)]
[ValidateNotNullOrEmpty()]
[ValidateScript( { $_ -gt 0 })]
[int[]]$WordLengthList = @(2, 3, 4, 5, 6, 7),
[Parameter(Mandatory = $false, Position = 2)]
[switch]$IncludePunctuations
)
begin {
$dicWordCount = @{ }
for ($i = 0; $i -lt $WordLengthList.Count; $i++) {
$dicWordCount.Add($WordLengthList[$i], (New-Object "System.Collections.Generic.Dictionary[string, int]"))
}
# $reWord = New-Object System.Text.RegularExpressions.Regex -ArgumentList @("\w+")
$reWord = New-Object System.Text.RegularExpressions.Regex -ArgumentList @("[\u4E00-\u9FA5]+")
}
process {
foreach ($Sentence in $Sentences) {
if ($IncludePunctuations) {
$WordList = , $Sentence
} else {
$WordList = $reWord.Matches($Sentence) | ForEach-Object { $_.Value }
}
if ($WordList) {
foreach ($Word in $WordList) {
foreach ($WordLength in $WordLengthList) {
$PosTail = $Word.Length - $WordLength
$dicCtr = $dicWordCount.Item($WordLength)
for ($i = 0; $i -le $PosTail; $i++) {
$dicCtr[$Word.SubString($i, $WordLength)]++
}
}
}
}
}
}
end {
$dicWordCount
}
}
$dicWordCount = Get-WordCount -Sentences (Get-Content -ReadCount 0 -Path $FileList)
Set-Content -Value (
$dicWordCount.Keys | Sort-Object | ForEach-Object {
$dicCtr = $dicWordCount[$_]
foreach ($key in $dicCtr.Keys) {
'{0,-10} : {1}' -f $key, $dicCtr[$key]
}
}) -LiteralPath $FileOut

复制代码