I'm a bit new to PowerShell (version 5) and was wondering if there are any improvements I could make to the following script. Any suggestions for style, code, etc. are all welcome.
The script creates a csv of file attributes and extended properties recursively from a directory parameter. I'm going to then load the csv files into SQL Server.
I'm not sure if there is a more efficient way of filtering the files by last modified date. I need to run it against 30 million files in batches of 500,000.
I've included an example of the UNC path that I use to produce the results file name in the $searchPath
variable.
I'm currently running this script in a .bat
file. Is there a better way of running these scripts concurrently?
PowerShell Script
$searchPath = '\\server\ca$\los angeles\documents\*.*'; #An example of the path format used as a parameter for $args[0] $resultsFileName = ''; $startDate = '01-Jul-2018'; $endDate = '31-Jul-2019'; $shell = New-Object -COMObject Shell.Application; #set date defaults $dateTimeFormat = 'dd-MMM-yyyy HH:mm:ss.fff'; $executeStart = Get-Date; $executeStart = $executeStart.ToString($dateTimeFormat); Write-Host 'Execute Start:' $executeStart; #get the parent folder from the search path Write-Host 'Search Path:' $searchPath; $folder = ($searchPath).Substring(1, ($searchPath).Length - 4); $parent = Split-Path $folder; #if the path is not in the expected format dont set the results file name if($parent.Length -gt 2) { #get the state name $state = Split-Path $parent; $state = Split-Path $state -Leaf; $state = $state -replace '\$',''; $state = $state.ToLower(); #get the office name $office = Split-Path $parent -Leaf; $office = $office -replace '\W','-'; $office = $office.ToLower(); $resultsFileName = $state + '_' + $office; }; #format the result file name and path $resultFileTimestamp = Get-Date -format 'yyyyMMdd_HHmmss.fff'; if($resultsFileName -eq '') {$resultsFileName = 'results_' + $resultsFileName + '_' + $resultFileTimestamp}; $resultsFile = "C:\Temp\Results\$resultsFileName.csv"; Write-Host 'Results File:' $resultsFile; $linenumber = 1; #get the file attributes from the recursively from the search path Get-Childitem -recurse -Path $searchPath | ? {($_.LastWriteTime -gt $startDate -AND $_.LastWriteTime -lt $endDate) -OR ($_.CreationTime -gt $startDate -AND $_.CreationTime -lt $endDate)} | ForEach-Object { $fullName = $_.FullName; $folder = Split-Path $fullName; $file = Split-Path $fullName -Leaf; $shell = New-Object -COMObject Shell.Application; $shellfolder = $shell.Namespace($folder); $shellfile = $shellfolder.ParseName($file); #loop through the extended properties looking for the columns we want for ($a ; $a -le 325; $a++) { if($shellfolder.getDetailsOf($File, $a)) { $keyValue = $shellfolder.GetDetailsOf($null, $a) switch ( $keyValue ) { 'Attributes' { $Attributes = $shellfolder.GetDetailsOf($shellfile, $a) } 'Title' { $Title = $shellfolder.GetDetailsOf($shellfile, $a) } 'Authors' { $Authors = $shellfolder.GetDetailsOf($shellfile, $a) } 'Last printed' { $LastPrinted = $shellfolder.GetDetailsOf($shellfile, $a) } 'Date last saved' { $DateLastSaved = $shellfolder.GetDetailsOf($shellfile, $a) } 'Pages' { $Pages = $shellfolder.GetDetailsOf($shellfile, $a) } 'Word count' { $WordCount = $shellfolder.GetDetailsOf($shellfile, $a) } 'Total editing time' { $TotalEditingTime = $shellfolder.GetDetailsOf($shellfile, $a) } 'File count' { $FileCount = $shellfolder.GetDetailsOf($shellfile, $a) } } } } $a=0; #format extended properties $LastPrinted = $LastPrinted -replace '[^\p{L}\p{Nd}/(/}/_/:/ ]', ''; #replace non date characters if($LastPrinted -ne '') {$LastPrinted = [datetime]::parseexact($LastPrinted, 'd/MM/yyyy h:mm tt', $null).ToString($dateTimeFormat) } else {$LastPrinted = 'NULL'}; $DateLastSaved = $DateLastSaved -replace '[^\p{L}\p{Nd}/(/}/_/:/ ]', ''; #replace non date characters if($DateLastSaved -ne '') {$DateLastSaved = [datetime]::parseexact($DateLastSaved, 'd/MM/yyyy h:mm tt', $null).ToString($dateTimeFormat) } else {$DateLastSaved = 'NULL'}; $Title = $Title.replace("`n","").replace("`r",""); #remove carriage return line feed from string $Authors = $Authors.replace("`n","").replace("`r",""); #remove carriage return line feed from string #show the user what file number the script is on $currentFileDateTime = Get-Date -format $dateTimeFormat; Write-Host $linenumber, $currentFileDateTime, $fullName; #format the output of the csv file Get-Content $fullName | Measure-Object -Character -Word | ` Select-Object -ExcludeProperty Property ` @{ Name = 'LineNumber'; Expression={$linenumber}} ` , @{ Name = 'ExtractTime'; Expression={ Get-Date -format $dateTimeFormat }} ` , @{ Name = 'FullName'; Expression={ $fullName }} ` , @{ Name = 'FilePath'; Expression={ $folder }} ` , @{ Name = 'FileName'; Expression={ $file }} ` , @{ Name = 'FileSizeKb'; Expression={ (Get-Item $fullName).length / 1024 }} ` , @{ Name = 'CreationTime'; Expression={(Get-ChildItem $fullName).CreationTime.ToString($dateTimeFormat) }} ` , @{ Name = 'LastWriteTime'; Expression={(Get-ItemProperty $fullName).LastWriteTime.ToString($dateTimeFormat) }} ` , @{ Name = 'Attributes'; Expression={ $Attributes.ToString() }} ` , @{ Name = 'Title'; Expression={ $Title.ToString() }} ` , @{ Name = 'Authors'; Expression={ $Authors.ToString() }} ` , @{ Name = 'LastPrinted'; Expression={ $LastPrinted.ToString() }} ` , @{ Name = 'LastSaved'; Expression={ $DateLastSaved.ToString() }} ` , @{ Name = 'PageCount'; Expression={ $Pages.ToString() }} ` , @{ Name = 'WordCount'; Expression={ $WordCount.ToString() }} ` , Characters ` , @{ Name = 'TotalEditingTime'; Expression={ $TotalEditingTime.ToString() }} ` , @{ Name = 'FileCount'; Expression={ $FileCount.ToString() }}; ` $linenumber ++ ` } | Export-Csv -NoTypeInformation -Path $resultsFile; $executeEnd = Get-Date; $executeEnd = $executeEnd.ToString($dateTimeFormat); Write-Host 'Execute End:' $executeEnd;