本帖最后由 flashercs 于 2021-2-12 01:22 编辑
保存为.bat- <#*,:&cls
- @echo off
- pushd "%~dp0"
- powershell -NoProfile -ExecutionPolicy RemoteSigned -Command ". ([ScriptBlock]::Create((Get-Content -LiteralPath \"%~0\" -ReadCount 0 | Out-String ))) "
- popd
- pause
- exit /b
- #>
- # 如果替换,则值为1;如果不替换,则值为0
- $替换前8 = 1
- $替换后8 = 1
- # 输出目录
- $dirOut = ".\newDir"
- # 前后汉字数量,默认是8
- $CJKCount = 8
-
- if (-not (Test-Path -Path $dirOut)) {
- New-Item -Path $dirOut -ItemType Directory
- } elseif (-not (Test-Path -Path $dirOut -PathType Container)) {
- Remove-Item -Path $dirOut -Force
- New-Item -Path $dirOut -ItemType Directory
- }
- function Get-Encoding {
- # output: [System.Text.Encoding], $null
- [CmdletBinding(DefaultParameterSetName = "PathSet")]
- param (
- [Parameter(ParameterSetName = "StreamSet", Mandatory = $true)]
- [ValidateNotNullOrEmpty()]
- [System.IO.Stream]$Stream,
- [Parameter(ParameterSetName = "PathSet", Mandatory = $true, Position = 0)]
- [ValidateNotNullOrEmpty()]
- [System.String]$Path,
- [Parameter(Mandatory = $false, Position = 1)]
- [System.UInt32]$ReadCount = 1024
- )
- $utf8BOMThrow = New-Object System.Text.UTF8Encoding -ArgumentList @($true, $true)
- $utf8NoBOMThrow = New-Object System.Text.UTF8Encoding -ArgumentList @($false, $true)
- $utf16LEBOMThrow = New-Object System.Text.UnicodeEncoding -ArgumentList @($false, $true, $true)
- $utf16LENoBOMThrow = New-Object System.Text.UnicodeEncoding -ArgumentList @($false, $false, $true)
- $utf16BEBOMThrow = New-Object System.Text.UnicodeEncoding -ArgumentList @($true, $true, $true)
- $utf16BENoBOMThrow = New-Object System.Text.UnicodeEncoding -ArgumentList @($true, $false, $true)
- # type encoding,bool bom,bool throw,Text.Encoding encoding,byte[] preamble,string strPreamble
- $arrUTF8Bom = $utf8BOMThrow.GetPreamble()
- $arrUTF16LEBom = $utf16LEBOMThrow.GetPreamble()
- $arrUTF16BEBom = $utf16BEBOMThrow.GetPreamble()
-
- if ($PSCmdlet.ParameterSetName -eq "PathSet") {
- try {
- $Stream = New-Object System.IO.FileStream -ArgumentList @($Path, [System.IO.FileMode]::Open, [System.IO.FileAccess]::Read, [System.IO.FileShare]::Read)
- } catch {
- return $null
- }
- }
- $byteBuff = New-Object byte[] -ArgumentList 3
- $readCount = $Stream.Read($byteBuff, 0, 3)
- if ($byteBuff[0] -eq $arrUTF8Bom[0] -and $byteBuff[1] -eq $arrUTF8Bom[1] -and $byteBuff[2] -eq $arrUTF8Bom[2]) {
- # utf8bom
- $return = $utf8BOMThrow
- } elseif ($byteBuff[0] -eq $arrUTF16LEBom[0] -and $byteBuff[1] -eq $arrUTF16LEBom[1]) {
- # utf16lebom
- $return = $utf16LEBOMThrow
- } elseif ($byteBuff[0] -eq $arrUTF16BEBom[0] -and $byteBuff[1] -eq $arrUTF16BEBom[1]) {
- # utf16bebom
- $return = $utf16BEBOMThrow
- } else {
- # nobom
- if ($ReadCount -gt 0) {
- $charBuff = New-Object char[] -ArgumentList $ReadCount
- }
- # utf16-nobom 都被认为是ANSI编码
- foreach ($encoding in @($utf8NoBOMThrow<# , $utf16LENoBOMThrow, $utf16BENoBOMThrow #>)) {
- try {
- $Stream.Position = 0
- $sr = New-Object System.IO.StreamReader -ArgumentList @($Stream, $encoding, $false)
- if ($ReadCount -gt 0) {
- [void]$sr.Read($charBuff, 0, $ReadCount)
- } else {
- [void]$sr.ReadToEnd()
- }
- $return = $encoding
- break
- } catch {
-
- } finally {
- if ($sr) {
- $sr.Dispose()
- }
- }
- }
- }
- if ($PSCmdlet.ParameterSetName -eq "PathSet") {
- $Stream.Dispose()
- }
- if (!$return) {
- $return = [System.Text.Encoding]::Default
- }
- return $return
- }
- $reCJK = New-Object System.Text.RegularExpressions.Regex -ArgumentList @('\p{IsCJKUnifiedIdeographs}', 'Compiled, Ignorecase')
- # $reCJK = New-Object System.Text.RegularExpressions.Regex -ArgumentList @('\w', 'Compiled, Ignorecase')
- Get-ChildItem -Path .\*.txt -Filter *.txt -Include *.txt | ForEach-Object {
- if (-not $_.PSIsContainer) {
- $encoding = Get-Encoding -Path $_.FullName
- if ($null -eq $encoding) {
- $encoding = [System.Text.Encoding]::GetEncoding(0)
- }
- try {
- [System.IO.File]::WriteAllLines((Join-Path -Path $dirOut -ChildPath $_.Name), [string[]]@([System.IO.File]::ReadAllLines($_.FullName, $encoding) | ForEach-Object {
- $str = $_
- if ($替换前8) {
- $cjkMatches = $reCJK.Matches($str)
- if ($cjkMatches.Count -gt 0) {
- $index = $cjkMatches[[math]::Min($CJKCount - 1, $cjkMatches.Count - 1)].Index
- $str = ($str.Substring(0, $index + 1) -replace '[\p{P}]+', '') + $str.Substring($index + 1)
- }
- }
- if ($替换后8) {
- $cjkMatches = $reCJK.Matches($str)
- if ($cjkMatches.Count -gt 0) {
- $index = $cjkMatches[[math]::Max(0, $cjkMatches.Count - $CJKCount)].Index
- $str = $str.Substring(0, $index) + ($str.Substring($index) -replace '[\p{P}]+', '')
- }
- }
- $str
- }), $encoding)
- } catch {
- $_ | Write-Host -ForegroundColor Red
- }
- }
- }
复制代码
|