globalscape/python/parseSiteRoot.py

76 lines
1.6 KiB
Python
Raw Normal View History

2022-04-27 14:05:07 -05:00
#!/usr/bin/python3
import sys
import re
import string
from collections import defaultdict
currDir = ""
lines = []
folders = defaultdict(int)
foldersOldFiles = defaultdict(int)
parsedCurrentDir = []
while True:
in_line = sys.stdin.readline()
if not in_line:
break
in_line = in_line[:-1]
#m = re.search("Directory: (.*)$", in_line)
m = re.search("gsbdata.InetPub.(.*)$", in_line)
if m:
currDir = m.group(1)
currDir = currDir.lstrip().rstrip()
# folder names split across lines
while True:
in_line = sys.stdin.readline()
in_line = in_line[:-1]
in_line = in_line.lstrip().rstrip()
if(in_line == ''):
break
else:
currDir = currDir + in_line
#folders[currDir] = 0
parsedCurrDir = re.split(r'\\', currDir)
#print(currDir)
continue
#continue
if in_line.startswith("-a----"):
arr = re.split("\s+", in_line)
filelen = int(arr[4]) #string.atoi(arr[4])
for i in range(0, len(parsedCurrDir)):
f = r'\\'.join(parsedCurrDir[0:i])
folders[f] = folders[f] + filelen
if arr[1].endswith("2019") or arr[1].endswith("2018"):
for i in range(0, len(parsedCurrDir)):
f = r'\\'.join(parsedCurrDir[0:i])
foldersOldFiles[f] = foldersOldFiles[f] + filelen
for kv in folders.items():
#print(kv)
formattedOldFilesLen = "{:15d}".format(foldersOldFiles[kv[0]])
formattedLen = "{:15d}".format(kv[1])
if 0 != kv[1]:
percent = 100.0 * float(foldersOldFiles[kv[0]])/float(kv[1])
else:
percent = float(0.0)
line = "%s %s %s%% %s" % (formattedOldFilesLen, formattedLen, "{:6.2f}".format(percent), kv[0])
lines.append(line)
lines.sort()
lines.reverse()
for line in lines:
print(line)