批处理之家 - Powered by Discuz! Board

数据是从跨境电商平台获取的类目清单，但是这个清单是二维的，提供了所有键值的关系 [ 父级ID，当前项ID，当前项的名称，以及当前的类目层级 ]
在制作平台类目交易指数数据可视化的时候需要分级显示，于是就必须把这些平面展开的关联信息还原为多层级的数据结构
处理要求：只需要还原ID层级结构，结构中不需要包含项的中文名等信息。同层级的清单使用数组或者哈希映射没有要求，只要能递进枚举就行。
语言：不限

复制代码

{
"data" : [
[ 0, 30, "安全防护", 1 ],
[ 30, 3030, "门禁", 2 ],
[ 30, 3009, "消防器材", 2 ],
[ 30, 3011, "视频监控", 2 ],
[ 3011, 200327188, "视频监控配件", 3 ],
[ 30, 3007, "劳动保护用品", 2 ],
[ 0, 21, "办公、文化及教育用品", 1 ],
[ 21, 2213, "期刊与杂志", 2 ],
[ 21, 212002, "展示告示用品", 2 ],
[ 212002, 21200202, "黑板", 3 ],
[ 212002, 100003132, "白板擦、黑板擦等", 3 ],
[ 100003132, 100003183, "黑板擦", 4 ],
[ 100003132, 100003182, "白板擦", 4 ],
[ 0, 509, "电话和通讯", 1 ],
[ 509, 100001205, "手机配件", 2 ],
[ 509, 50906, "对讲机", 2 ],
[ 0, 7, "电脑和办公", 1 ],
[ 7, 200001083, "笔记本电脑部件及配件", 2 ],
[ 7, 70806, "电脑连线及接插件", 2 ],
[ 0, 44, "消费电子", 1 ],
[ 44, 629, "零配件", 2 ],
[ 44, 100000305, "摄影摄像", 2 ]
]
}

复制代码

{
"0" : {
"21" : {
"212002" : {
"100003132" : {
"100003182" : {},
"100003183" : {}
},
"21200202" : {}
},
"2213" : {}
},
"30" : {
"3007" : {},
"3009" : {},
"3011" : {
"200327188" : {}
},
"3030" : {}
},
"44" : {
"100000305" : {},
"629" : {}
},
"509" : {
"100001205" : {},
"50906" : {}
},
"7" : {
"200001083" : {},
"70806" : {}
}
}
}

复制代码

[ 211111, 200656001, "石膏像", 3 ],
[ 21, 211111, "艺术用品", 2 ],
[ 211111, 201330804, "智能取色笔", 3 ],
[ 0, 21, "办公、文化及教育用品", 1 ],
[ 21, 100003131, "美工工具", 2 ],

复制代码

#Requires -Version 5
$sJson = @'
{
"data" : [
[ 0, 30, "安全防护", 1 ],
[ 30, 3030, "门禁", 2 ],
[ 30, 200327231, "楼宇自动化", 2 ],
[ 30, 200327211, "救灾用品", 2 ],
[ 30, 200001791, "急救箱", 2 ],
[ 30, 3009, "消防器材", 2 ],
[ 30, 200004310, "楼宇对讲", 2 ],
[ 30, 200330186, "物联传感设备", 2 ],
[ 30, 200327212, "公共应急广播系统", 2 ],
[ 30, 3015, "交通安全", 2 ],
[ 3015, 42578, "交通安全_1", 3 ],
[ 42578, 9527, "交通安全_1_1", 4 ],
[ 30, 200327196, "保险柜/保险箱", 2 ],
[ 30, 200004311, "防盗报警设备", 2 ],
[ 30, 200328267, "安检防爆检测设备", 2 ],
[ 30, 200328217, "自卫防身及安保用品", 2 ],
[ 30, 200004309, "智能一卡通系统", 2 ],
[ 30, 200004343, "传输设备及电缆", 2 ],
[ 30, 200332185, "安防无人机和机器人", 2 ],
[ 30, 3011, "视频监控", 2 ],
[ 30, 3007, "劳动保护用品", 2 ],
[ 3007, 0, "劳动保护用品_0", 3 ],
[ 3007, 1, "劳动保护用品_1", 3 ],
[ 0, 21, "办公、文化及教育用品", 1 ],
[ 21, 211111, "艺术用品", 2 ],
[ 21, 100003131, "美工工具", 2 ],
[ 21, 100003135, "教学设备及用品", 2 ],
[ 21, 2213, "期刊与杂志", 2 ],
[ 21, 2209, "地图和地图集", 2 ],
[ 21, 2112, "办公用纸及纸制品", 2 ],
[ 21, 212002, "展示告示用品", 2 ],
[ 21, 200001562, "印刷制品", 2 ],
[ 21, 100003125, "学生用品", 2 ],
[ 21, 100003134, "胶带、胶水、包装带等", 2 ],
[ 21, 211106, "桌上收纳用品", 2 ],
[ 21, 100003155, "笔记本、拍纸本等书写用品", 2 ],
[ 21, 100003129, "办公装订用品", 2 ],
[ 21, 200001743, "钢笔,铅笔及书写工具", 2 ],
[ 21, 200004276, "文具贴纸/儿童贴纸", 2 ],
[ 21, 2139, "绘图工具", 2 ],
[ 21, 200652001, "财会用品", 2 ],
[ 21, 201236701, "书籍", 2 ],
[ 21, 201330702, "邮寄和装运", 2 ],
[ 21, 201338004, "文件夹、文件袋等收纳用品", 2 ],
[ 0, 509, "电话和通讯", 1 ],
[ 509, 100001205, "手机配件", 2 ],
[ 509, 100001204, "通信设备", 2 ],
[ 509, 200380144, "对讲机配附件", 2 ],
[ 509, 50906, "对讲机", 2 ],
[ 509, 201084002, "手机部件", 2 ],
[ 1, 7, "电脑和办公", 1 ],
[ 7, 200001083, "笔记本电脑部件及配件", 2 ],
[ 7, 70806, "电脑连线及接插件", 2 ],
[ 7, 708022, "电脑清洁用品", 2 ],
[ 7, 200001081, "电脑外设", 2 ],
[ 7, 200185144, "开发板及配件", 2 ],
[ 7, 100005329, "切换器", 2 ],
[ 7, 200003782, "办公电子", 2 ],
[ 7, 200001085, "平板电脑配件", 2 ],
[ 0, 44, "消费电子", 1 ],
[ 44, 629, "零配件", 2 ],
[ 44, 100000305, "摄影摄像", 2 ],
[ 44, 100000310, "游戏及配附件", 2 ],
[ 44, 100000308, "家用音视频设备", 2 ],
[ 44, 100000306, "便携音视频设备", 2 ],
[ 44, 200003803, "智能电子", 2 ]
]
}
'@
$oJson = ConvertFrom-Json -InputObject $sJson
# return type: Dictionary[string,object]
function ParseTreeData {
param (
[object[]]$Data
)
$stack = New-Object System.Collections.Stack
$nodeRoot = New-Object 'System.Collections.Generic.SortedDictionary[string,object]'
$stack.Push($nodeRoot)
foreach ($arr in $Data) {
$parentId = $arr[0].ToString()
$currentId = $arr[1].ToString()
$currentLevel = $arr[3]
# tree hierarchy from deep level to shallow level,simulate recurse call back
while ($stack.Count - 1 -gt $currentLevel) {
$null = $stack.Pop()
}
# if recurse call back to level 1, then back to 0, because node in level 1 MAY not appear
if ($stack.Count - 1 -eq $currentLevel -and $currentLevel -eq 1) {
$null = $stack.Pop()
}
# tree hierarchy from shallow level to deep level
if ($stack.Count - 1 -lt $currentLevel ) {
$peek = $stack.Peek()
if (-not $peek.ContainsKey($parentId)) {
$peek.Add($parentId, (New-Object 'System.Collections.Generic.SortedDictionary[string,object]'))
}
$stack.Push($peek[$parentId])
}
# add current item to the tree
$peek = $stack.Peek()
$peek.Add($currentId, (New-Object 'System.Collections.Generic.SortedDictionary[string,object]'))
}
return $nodeRoot
}
$nodeRoot = ParseTreeData -Data $oJson.Data
'Enumerate data hierarchy:'
$nodeRoot["0"]["30"]
$nodeRoot["0"]["30"]["3015"]
$nodeRoot["0"]["30"]["3015"]["42578"]
'ConvertTo-Json:'
$nodeRoot | ConvertTo-Json -Depth 100
# 包含中文名称
"`r`n #################### json data with name:"
class Node {
[string]$Name
[System.Collections.Generic.SortedDictionary[string, Node]]$Nodes = [System.Collections.Generic.SortedDictionary[string, Node]]::new()
Node() { }
Node([string]$name) {
$this.Name = $name
}
}
function ParseTreeData2 {
param (
[object[]]$Data
)
$stack = New-Object System.Collections.Stack
$nodeRoot = New-Object Node
$stack.Push($nodeRoot)
foreach ($arr in $Data) {
$parentId = $arr[0].ToString()
$currentId = $arr[1].ToString()
$currentName = $arr[2]
$currentLevel = $arr[3]
# tree hierarchy from deep level to shallow level,simulate recurse call back
while ($stack.Count - 1 -gt $currentLevel) {
$null = $stack.Pop()
}
# if recurse call back to level 1, then back to 0, because node in level 1 MAY not appear
if ($stack.Count - 1 -eq $currentLevel -and $currentLevel -eq 1) {
$null = $stack.Pop()
}
# tree hierarchy from shallow level to deep level
if ($stack.Count - 1 -lt $currentLevel ) {
$peek = $stack.Peek().Nodes
if (-not $peek.ContainsKey($parentId)) {
$peek.Add($parentId, (New-Object Node))
}
$stack.Push($peek[$parentId])
}
# add current item to the tree
$peek = $stack.Peek().Nodes
$peek.Add($currentId, (New-Object Node -ArgumentList @($currentName)))
}
return $nodeRoot
}
$nodeRoot2 = ParseTreeData2 -Data $oJson.Data
$nodeRoot2.Nodes["0"].Nodes["30"] | Format-Table -AutoSize
$nodeRoot2.Nodes["0"].Nodes["21"].Nodes["100003125"] | Format-Table -AutoSize
$nodeRoot2 | ConvertTo-Json -Depth 100

复制代码

@echo off
setlocal enabledelayedexpansion
for /l %%a in (1,1,5) do set "tab=!tab! "
::排版用的
set n=0
echo;{
rem 从0级开始
for /f "tokens=2 delims=[]" %%a in (test.txt) do (
set str=%%a
rem 有引号，用到变量处理
for /f "tokens=1-4 delims= " %%1 in ("!str!") do (
for /f "tokens=1-4 delims=," %%1 in ("%%1,%%2,3,%%4") do (
rem 第3项有 “,” 号，所以要把它先拿掉
rem 根据最后项数值变化判断处于哪一级，
rem 同级，升级，降级，做不同的处理，并排版。
rem 排序也可以做，就是效率有点低，如果不影响就算了。
if !n! lss %%4 (
if not defined v%%1 (
echo;!tab:~-%%4!"%%1":{
set v%%1=1
) else (
echo;!tab:~-%%4! %%2
)
set one=%%2
set n=%%4
) else (
if !n! gtr %%4 (
echo; !tab:~-%%4!}
set n=%%4
) else (
if defined one (
echo;!tab:~-%%4! "!one!":{},&set one=
)
echo;!tab:~-%%4! "%%2":{},<nul
)
)
))
)
rem 收尾了，做 “}” 号对齐
for /l %%a in (!n!,-1,1) do (
echo;!tab:~-%%a!}
)
echo;}
pause

复制代码

{
"data": [
[0, 1, 0, 1],
[1, 101, 0, 2],
[101, 10101, 0, 3],
[1, 102, 0, 2],
[102, 10201, 0, 3],
[0, 2, 0, 1],
[2, 201, 0, 2],
[201, 20101, 0, 3],
[2, 202, 0, 2],
[202, 20201, 0, 3],
[0, 3, 0, 1],
[3, 301, 0, 2],
[301, 30101, 0, 3],
[3, 302, 0, 2],
[302, 30201, 0, 3]
]
}

复制代码

@echo off
setlocal enabledelayedexpansion
for /l %%a in (1,1,5) do set "tab=!tab! "
::排版用的
set n=0
echo;{
rem 从0级开始
for /f "tokens=2 delims=[]" %%a in (test.txt) do (
set str=%%a
rem 有引号，用到变量处理
for /f "tokens=1-4 delims= " %%1 in ("!str!") do (
for /f "tokens=1-4 delims=," %%1 in ("%%1,%%2,3,%%4") do (
rem 第3项有 “,” 号，所以要把它先拿掉
rem 根据最后项数值变化判断处于哪一级，
rem 同级，升级，降级，做不同的处理，并排版。
rem 排序也可以做，就是效率有点低，如果不影响就算了。
if !n! lss %%4 (
if not defined v%%1 (
echo;!tab:~-%%4!"%%1":{
set v%%1=1
) else (
echo;!tab:~-%%4! %%2
)
set one=%%2
set/a n=%%4,m=n-1
) else (
if !n! gtr %%4 (
for /l %%a in (!m!,-1,%%4) do (
if defined one (
echo;!tab:~-%%a! "!one!":{}&set one=
)
echo; !tab:~-%%a!},
)
set n=%%4
) else (
if defined one (
echo;!tab:~-%%4! "!one!":{},
)
set one=%%2
)
)
))
)
if defined one (
echo;!tab:~-%n%! "!one!":{}&set one=
)
rem 收尾了，做 “}” 号对齐
for /l %%a in (!n!,-1,1) do (
echo;!tab:~-%%a!}
)
echo;}
pause

复制代码

[ 211111, 200656001, "石膏像", 3 ],
[ 21, 211111, "艺术用品", 2 ],
[ 211111, 201330804, "智能取色笔", 3 ],
[ 0, 21, "办公、文化及教育用品", 1 ],
[ 21, 100003131, "美工工具", 2 ],

复制代码

{
"data": [
[0, 1, 0, 1],
[1, 101, 0, 2],
[101, 10101, 0, 3],
[101, 10102, 0, 3],
[101, 10103, 0, 3],
[10103, 1010300, 0, 4],
[101, 10104, 0, 3],
[1, 102, 0, 2],
[102, 10201, 0, 3],
[102, 10202, 0, 3],
[10202, 1020200, 0, 4],
[102, 10203, 0, 3],
[102, 10204, 0, 3]
]
}

复制代码

@echo off
setlocal enabledelayedexpansion
for /l %%a in (1,1,5) do set "tab=!tab! "
::排版用的
set n=0
echo;{
rem 从0级开始
for /f "tokens=2 delims=[]" %%a in (test.txt) do (
set str=%%a
rem 有引号，用到变量处理
for /f "tokens=1-4 delims= " %%1 in ("!str!") do (
for /f "tokens=1-4 delims=," %%1 in ("%%1,%%2,3,%%4") do (
rem 第3项有 “,” 号，所以要把它先拿掉
rem 根据最后项数值变化判断处于哪一级，
rem 同级，升级，降级，做不同的处理，并排版。
rem 排序也可以做，就是效率有点低，如果不影响就算了。
if !n! lss %%4 (
if not defined v%%1 (
echo;!tab:~-%%4!"%%1":{
set v%%1=1
) else (
echo;!tab:~-%%4! %%2
)
set one=%%2
set/a n=%%4
) else (
if !n! gtr %%4 (
set /a m=%%4+1
rem $$$$$$ 修改了这里 $$$$$$$
for /l %%a in (!n!,-1,!m!) do (
if defined one (
echo;!tab:~-%%a! "!one!":{}&set one=
)
echo;!tab:~-%%a!},
set n=%%4
)
set one=%%2
rem $$$$$$$ 修改了这里 $$$$$$
) else (
if defined one (
echo;!tab:~-%%4! "!one!":{},
)
set one=%%2
)
)
))
)
if defined one (
echo;!tab:~-%n%! "!one!":{}&set one=
)
rem 收尾了，做 “}” 号对齐
for /l %%a in (!n!,-1,1) do (
echo;!tab:~-%%a!}
)
echo;}
pause

复制代码

[ 0, 30, "安全防护", 1 ],
[ 30, 3011, "视频监控", 2 ],
[ 3011, 200327188, "视频监控配件", 3 ],
[ 0, 21, "办公、文化及教育用品", 1 ],

复制代码

if ( not exists $ref->{$foo}{$bar} )
{
$ref->{$foo}{$bar} = {}; # $ref->{$foo} 得到 $tree 中 $foo 键的引用，新增{$bar}子键的操作直接作用于 $tree 的对应节点
$ref->{$bar} = $ref->{$foo}{$bar}; # 将 $bar 节点更新到引用表单
}

复制代码

use Modern::Perl;
use File::Slurp;
use Time::HiRes qw/time/;
use JSON qw/from_json/;
use feature 'signatures';
no warnings 'experimental::signatures';
STDOUT->autoflush(1);
my $ta = time();
my $n = 0;
open my $FH, ">:raw", "Category_Tree_benton.json";
say $FH "{";
my %hash;
my $prev;
my $raw = read_file( "gener_big.json" );
my $data = from_json( $raw, {relaxed => 1} );
for my $e ( @{$data->{data}} )
{
my ( $parent, $child, $label, $lv ) = @$e;
next unless defined $lv;
if ( $n < $lv )
{
if ( not defined $hash{$parent} )
{
say $FH "\t"x$lv, qq("$parent"), ":{";
$hash{$parent} = 1;
}
else
{
say $FH "\t"x($lv+1), $child;
}
$prev = $child;
$n = $lv;
}
elsif ( $n > $lv )
{
my $m = $lv + 1;
for my $e ( reverse ($m .. $n) )
{
if ( defined $prev ) {
say $FH "\t"x($e+1), qq("$prev"), ":{}" ;
undef $prev;
}
say $FH "\t"x$e, "},";
$n = $lv;
}
$prev = $child;
}
else
{
if ( defined $prev ) {
say $FH "\t"x($lv+1), qq("$prev"), ":{},";
}
$prev = $child;
}
}
if ( defined $prev )
{
say $FH "\t"x$n, qq("$prev"), ":{}";
undef $prev;
}
# 收尾
grep { say $FH "\t"x$_, "}"; } reverse ( 0 .. $n );
close $FH;
time_delta(\$ta);
sub time_delta ($t)
{
printf "Time Delta: %.2f\n", time() - $$t;
$$t = time();
}

复制代码

#python 3
import json, networkx as nx
with open("Category_Plain.json") as f:
el = [i[:2] for i in json.load(f)['data']]
g = nx.from_edgelist(el, create_using=nx.DiGraph())
print(json.dumps(nx.tree_data(g, root=0), indent=2))

复制代码