我有一个列表/集合的10 s数以百万计的目录,他们的长度需要减少,以满足一个MaxLenghtPath之前,创建新的目录。
使用简单的集合|ForEach需要很长时间,所以我希望使用Foreach-Object -ThrottleLimit 64 -Parallel使用此代码,但仍然非常慢,大约每小时一千个目录。
Clear-Host
$path = "C:\a\15Mil.txt"
$Global:ParentDir = "e:\!Data"
$MaxPathLength = 220
$Global:MaxNumOfDirs = 1000
$Global:NewPathList = New-Object System.Collections.Generic.List[System.Object]
$Global:ShortPathList = New-Object System.Collections.Generic.List[System.Object]
$NewPathsArray = [System.Collections.Concurrent.ConcurrentDictionary[string,object]]::new()
$ShortPathsArray = [System.Collections.Concurrent.ConcurrentDictionary[string,object]]::new()
Remove-Item -Path $ParentDir -Force -Recurse -ErrorAction SilentlyContinue
New-Item -Path $ParentDir -Force -ItemType Directory | Out-Null
$dirCount = 0
$NewPathList = (([System.IO.File]::ReadAllLines($path)))
$NewPathList = $newpathlist | get-random -Count $maxnumofdirs
$totalSeconds = (Measure-Command {
:ShortLoop ForEach($SubDir in $NewPathList)
{
[int]$Chars2Remove = $SubDir.length - $MaxPathLength
If (($SubDir.Length -gt $MaxPathLength))
{
[string]$TopLevel = "{0}\{1}\{2}" -f $SubDir.Split("\")
[string]$Path2Shorten = $null
[string]$Path2Shorten = $SubDir -replace([regex]::Escape($TopLevel))
:RemoveCharsLoop For($c=1;$c -le $Chars2Remove;$c++)
{
[string]$MostRecurringChar = (($Path2Shorten.GetEnumerator() | Where-Object { !($_.ToString().Contains("\") ) -and ![string]::IsNullOrEmpty($_) }) |`
Group-Object -NoElement | Sort-Object count -Descending | Select-Object -First 1).Name
[int]$RandIndex = ([regex]::matches($Path2Shorten,'[.$MostRecurringChar]').index | Select-Object -Last 100) | Get-Random -Count 1
$Path2Shorten = $Path2Shorten.Remove($RandIndex,1)
$ShortString = $Path2Shorten
if (($TopLevel.Length + $Path2Shorten.Length ) -le $MaxPathLengthP)
{
Break RemoveCharsLoop
}
}
[String]$SubDir2Create = $TopLevel + $ShortString
#}
}
else {
[String]$SubDir2Create = $null
[String]$SubDir2Create = $SubDir.ToString()
}
$ShortPathList.Add($SubDir2Create)
If ($ShortPathList.count -ge $MaxNumOfDirs)
{
Break ShortLoop
}
}
($ShortPathList | Get-Random -Count $MaxNumOfDirs) | ForEach{$null = New-Item -Path $_ -ItemType Directory -Force -ErrorAction SilentlyContinue}
}
).TotalSeconds
###### Parallel routine bellow ######
CLS
$path = "C:\a\15Mill.txt"
$Global:ParentDir = "e:\!ParalellData"
$MaxPathLength = 200
$NewPathsArray = [System.Collections.Concurrent.ConcurrentDictionary[string,object]]::new()
$ShortPathsArray = [System.Collections.Concurrent.ConcurrentDictionary[string,object]]::new()
$NewPathsArray = (([System.IO.File]::ReadAllLines($path)))
$NumberOfDirStrings = $NewPathsArray.Count - 1
$totalSecondsp = (Measure-Command{
0..$NumberOfDirStrings | Foreach-Object -ThrottleLimit 128 -Parallel {
$NewPathListIndex = $_
$SafeShortPathsArray = $Using:ShortPathsArray
$SafeNewPathsArray = $Using:NewPathsArray
$MaxPathLengthp = $Using:MaxPathLength
$MaxNumOfDirsp = $Using:MaxNumOfDirs
$ParentDirP = $Using:ParentDir
[string]$SubDir = $SafeNewPathsArray[$NewPathListIndex].ToString()
[int]$Chars2Remove = $SubDir.length - $MaxPathLength
If (($SubDir.Length -gt $MaxPathLengthP))
{
[string]$TopLevel = ("{0}\{1}\{2}" -f $SubDir.Split("\"))
[string]$Path2Shorten = $null
[string]$Path2Shorten = ($SubDir -replace([regex]::Escape($TopLevel))).ToString()
:RemoveChar For($c=1;$c -le $Chars2Remove;$c++)
{
$MostRecurringChar = (($Path2Shorten.GetEnumerator() | Where-Object { !($_.ToString().Contains("\") ) -and ![string]::IsNullOrEmpty($_) }) | `
Group-Object -NoElement | Sort-Object count -Descending | Select-Object -First 1).Name
[int]$RandIndex = (([regex]::matches($Path2Shorten,'[.$MostRecurringChar]').index | Select-Object -Last 100) | Get-Random -Count 1)
$Path2Shorten = $Path2Shorten.Remove($RandIndex,1)
$ShortString = $Path2Shorten
if (($TopLevel.Length + $Path2Shorten.Length ) -le $MaxPathLengthP)
{
Break RemoveChar
}
}
$SubDir2Create = $TopLevel + $ShortString
}
else {
[String]$SubDir2Create = $null
[String]$SubDir2Create = $SubDir.ToString()
}
$SafeShortPathsArray[$NewPathListIndex] = ($SubDir2Create)
If ($SafeShortPathsArray.count -ge $MaxNumOfDirsp)
{
Break
}
}
($ShortPathsArray | Get-Random -Count $MaxNumOfDirs) | ForEach{$null = New-Item -Path $_ -ItemType Directory -Force -ErrorAction SilentlyContinue}
}).TotalSeconds
Write-Host "the linear forloop took $totalSeconds seconds"
Write-Host "the linear forloop parallel took $totalSecondsp seconds"
1条答案
按热度按时间4ioopgfo1#
简单说说我的评论。