powershell 如何使用ForEach-Object -Paralell创建数百万个目录?

xbp102n0  于 2023-03-30  发布在  Shell
关注(0)|答案(1)|浏览(125)

我有一个列表/集合的10 s数以百万计的目录,他们的长度需要减少,以满足一个MaxLenghtPath之前,创建新的目录。
使用简单的集合|ForEach需要很长时间,所以我希望使用Foreach-Object -ThrottleLimit 64 -Parallel使用此代码,但仍然非常慢,大约每小时一千个目录。

Clear-Host
$path               = "C:\a\15Mil.txt"
$Global:ParentDir   = "e:\!Data"
$MaxPathLength      = 220
$Global:MaxNumOfDirs          = 1000

$Global:NewPathList = New-Object System.Collections.Generic.List[System.Object] 
$Global:ShortPathList = New-Object System.Collections.Generic.List[System.Object]

$NewPathsArray      = [System.Collections.Concurrent.ConcurrentDictionary[string,object]]::new()
$ShortPathsArray    = [System.Collections.Concurrent.ConcurrentDictionary[string,object]]::new()

Remove-Item -Path $ParentDir -Force -Recurse -ErrorAction SilentlyContinue
New-Item -Path $ParentDir -Force -ItemType Directory | Out-Null
$dirCount = 0
$NewPathList = (([System.IO.File]::ReadAllLines($path)))
$NewPathList = $newpathlist | get-random -Count $maxnumofdirs
$totalSeconds = (Measure-Command {
:ShortLoop ForEach($SubDir in $NewPathList)
{
    [int]$Chars2Remove  = $SubDir.length - $MaxPathLength
    If (($SubDir.Length -gt $MaxPathLength))
    {
        [string]$TopLevel           = "{0}\{1}\{2}" -f $SubDir.Split("\")
        [string]$Path2Shorten       = $null
        [string]$Path2Shorten       = $SubDir -replace([regex]::Escape($TopLevel))
            :RemoveCharsLoop For($c=1;$c -le $Chars2Remove;$c++)
            {
            [string]$MostRecurringChar     = (($Path2Shorten.GetEnumerator() | Where-Object { !($_.ToString().Contains("\") ) -and ![string]::IsNullOrEmpty($_) }) |`
                                                Group-Object -NoElement | Sort-Object count -Descending | Select-Object -First 1).Name
            [int]$RandIndex = ([regex]::matches($Path2Shorten,'[.$MostRecurringChar]').index | Select-Object -Last 100) | Get-Random -Count 1
            $Path2Shorten = $Path2Shorten.Remove($RandIndex,1)
            $ShortString = $Path2Shorten
                if (($TopLevel.Length + $Path2Shorten.Length ) -le $MaxPathLengthP)
                {
                    Break RemoveCharsLoop
                }

            }
        [String]$SubDir2Create      = $TopLevel  + $ShortString
        #}
    }
    else {
        [String]$SubDir2Create      = $null
        [String]$SubDir2Create      = $SubDir.ToString()
    }
    $ShortPathList.Add($SubDir2Create)
    If ($ShortPathList.count -ge $MaxNumOfDirs)
    {
        Break ShortLoop
    }

}
($ShortPathList | Get-Random -Count $MaxNumOfDirs) | ForEach{$null = New-Item -Path $_ -ItemType Directory -Force  -ErrorAction SilentlyContinue}
}
).TotalSeconds

###### Parallel routine  bellow ###### 
CLS
$path               = "C:\a\15Mill.txt"
$Global:ParentDir   = "e:\!ParalellData"
$MaxPathLength      = 200

$NewPathsArray      = [System.Collections.Concurrent.ConcurrentDictionary[string,object]]::new()
$ShortPathsArray    = [System.Collections.Concurrent.ConcurrentDictionary[string,object]]::new()
$NewPathsArray      = (([System.IO.File]::ReadAllLines($path)))
$NumberOfDirStrings = $NewPathsArray.Count - 1
$totalSecondsp = (Measure-Command{

0..$NumberOfDirStrings | Foreach-Object -ThrottleLimit 128 -Parallel {
    $NewPathListIndex       = $_
    $SafeShortPathsArray    = $Using:ShortPathsArray
    $SafeNewPathsArray      = $Using:NewPathsArray
    $MaxPathLengthp         = $Using:MaxPathLength
    $MaxNumOfDirsp          = $Using:MaxNumOfDirs
    $ParentDirP              = $Using:ParentDir
    [string]$SubDir         = $SafeNewPathsArray[$NewPathListIndex].ToString()
    [int]$Chars2Remove      = $SubDir.length - $MaxPathLength
    If (($SubDir.Length -gt $MaxPathLengthP))
    {
        [string]$TopLevel           = ("{0}\{1}\{2}" -f $SubDir.Split("\"))
        [string]$Path2Shorten       = $null
        [string]$Path2Shorten       = ($SubDir -replace([regex]::Escape($TopLevel))).ToString()
        :RemoveChar For($c=1;$c -le $Chars2Remove;$c++)
            {
                $MostRecurringChar     = (($Path2Shorten.GetEnumerator() | Where-Object { !($_.ToString().Contains("\") ) -and ![string]::IsNullOrEmpty($_) }) | `
                                            Group-Object -NoElement | Sort-Object count -Descending | Select-Object -First 1).Name
                [int]$RandIndex = (([regex]::matches($Path2Shorten,'[.$MostRecurringChar]').index | Select-Object -Last 100) | Get-Random -Count 1)
                $Path2Shorten = $Path2Shorten.Remove($RandIndex,1)
                $ShortString = $Path2Shorten
                if (($TopLevel.Length + $Path2Shorten.Length ) -le $MaxPathLengthP)
                {
                    Break RemoveChar
                }
            }

        $SubDir2Create      = $TopLevel  + $ShortString

    }
    else {
        [String]$SubDir2Create      = $null
        [String]$SubDir2Create      = $SubDir.ToString()
    }
    $SafeShortPathsArray[$NewPathListIndex] = ($SubDir2Create)
    If ($SafeShortPathsArray.count -ge $MaxNumOfDirsp)
    {
        Break
    }

}
($ShortPathsArray |  Get-Random -Count $MaxNumOfDirs) | ForEach{$null = New-Item -Path $_ -ItemType Directory -Force  -ErrorAction SilentlyContinue}
}).TotalSeconds
Write-Host "the linear forloop took $totalSeconds seconds"
Write-Host "the linear forloop parallel took $totalSecondsp seconds"
4ioopgfo

4ioopgfo1#

简单说说我的评论。

Clear-Host
$jobs = (
'Get-Process', 
'Get-Service', 
'Get-EventLog -LogName Application'
).ForEach({Start-Job -ScriptBlock {param($PSItem) & $PSitem} -ArgumentList $PSItem})

Get-Job -Verbose
# Results
<#
Id     Name            PSJobTypeName   State         HasMoreData     Location             Command                  
--     ----            -------------   -----         -----------     --------             -------                  
1      Job1            BackgroundJob   Completed     False           localhost             param($command) & $co...
3      Job3            BackgroundJob   Completed     False           localhost             param($command) & $co...
5      Job5            BackgroundJob   Failed        False           localhost             param($command) & $co...
7      Job7            BackgroundJob   Running       True            localhost             param($command) & $co...
9      Job9            BackgroundJob   Running       True            localhost             param($command) & $co...
11     Job11           BackgroundJob   Running       True            localhost             param($command) & $co...
#>

($results = ($jobs).ForEach({Receive-Job $PSItem -Wait -AutoRemove}))

相关问题