本帖最后由 WHY 于 2020-6-27 13:41 编辑
Test.js- var srcDir = 'E:/Test/X42'; //存放xml的文件夹
- var dstFile = 'result.csv'; //输出csv文件名
- var out = [];
- var xml = new ActiveXObject('Microsoft.XMLDOM');
- var fso = new ActiveXObject('Scripting.FileSystemObject');
-
- var getXMLData = function(fp) {
- xml.load(fp);
- var reg = /<value>([^<>]*)</g;
- var arr = xml.selectNodes('//char'), Len = arr.length;
- for(var i=0; i<Len; i++){
- var s = arr[i].xml;
- var a = ['0', ['0', '0'], '0', '0'];
-
- var m = s.match(/<charName>([^<>]*)</);
- if(m) a[0] = '"' + m[1] + '"';
-
- var j = 0;
- while( m = reg.exec(s) ) a[1][j++] = '"' + m[1] + '"';
-
- var m = s.match(/[_"]unicode">([^<>]*)</);
- if(m) a[2] = '"' + m[1] + '"';
-
- var m = s.match(/"PUA">([^<>]*)</);
- if(m) a[3] = '"' + m[1] + '"';
-
- out.push( a.join(',') );
- }
- }
-
- var writeToCsv = function(dstFile) {
- var ado = new ActiveXObject('ADODB.Stream');
- ado.Mode = 3;
- ado.Type = 2;
- ado.Charset = 'utf-8';
- ado.Open();
- ado.WriteText(out.join('\r\n'));
- ado.SaveToFile(dstFile, 2);
- }
-
- var getXmlFile = function(fd){
- var e1 = new Enumerator(fso.getFolder(fd).Files);
- var e2 = new Enumerator(fso.getFolder(fd).SubFolders);
- for(; !e1.atEnd(); e1.moveNext()){
- var fp = e1.item().Path;
- if( !/\.xml$/i.test(fp) ) continue;
- getXMLData(fp);
- }
- for(; !e2.atEnd(); e2.moveNext())getXmlFile(e2.item().Path) ; //递归遍历子目录
- }
-
- getXmlFile(srcDir);
- writeToCsv(dstFile);
-
- WSH.Echo('Done');
复制代码 out.push(a) 效率非常低,改为 out.push(a.join(','))
贴一个不用正则的办法:- $srcDir = 'E:\Test\X42';
- $dstFile = 'Result.CSV';
- $fsw = New-Object System.IO.StreamWriter($dstFile, $false, [Text.Encoding]::UTF8);
- $files = dir -Literal $srcDir -Filter '*.xml' -Recurse -File;
- $count = $files.Count;
-
- for($i=0; $i -lt $count; $i++) {
- [xml]$xml = [IO.File]::ReadAllLines($files[$i].FullName, [Text.Encoding]::UTF8);
- $node = $xml.GetElementsByTagName('char');
- $Len = $node.Count;
- for($j=0; $j -lt $Len; $j++){
- $arr = @('0', '0', '0', '0', '0');
- $charName = $node[$j].charName;
- $value = @( $node[$j].charProp.Value );
- $type = @( $node[$j].mapping.type );
- $text = @( $node[$j].mapping.innerText );
- if( $charName -ne $null ) { $arr[0] = '"' + $charName + '"'; }
- for($k=0; $k -lt 2; $k++) {
- if( $value[$k] -ne $null ) { $arr[1+$k] = '"' + $value[$k] + '"'; }
- if( $type[$k] -ne $null ) {
- if( $type[$k].EndsWith('unicode') ){ $arr[3] = '"' + $text[$k] + '"'; }
- if( $type[$k] -eq 'PUA' ) { $arr[4] = '"' + $text[$k] + '"'; }
- }
- }
- $fsw.WriteLine( $arr -join ',' );
- }
- if($i % 1000 -eq 0 ) { $fsw.Flush(); }
- }
- $fsw.Flush();
- $fsw.Close();
- pause
复制代码
|