globalscape/python/ex_parser_slow.py

70 lines
1.8 KiB
Python
Raw Permalink Normal View History

2022-04-27 14:05:07 -05:00
# TODO output to a file, logging to a file, better way to pass a file name into open
import time
import platform
full_list = False
results_value = 20
version = platform.architecture()
print(version[0])
start_time = time.time()
fname = "C:/Users/jbranan/Desktop/u_ex201209.log"
fileReadlist = []
with open(fname, encoding='utf8') as file_object:
fileReadlist = file_object.readlines()
user_list = []
result_list = []
filename = fname.rsplit('/')
fn = filename[-1]
print(f'Looking for usernames in {fn}...')
for line in fileReadlist:
if '#' in line:
continue
else:
try:
date, userinfo, otherdata = line.split(' -', 2)
userinfo = userinfo.lstrip()
user, userdata = userinfo.split(' [')
user_list.append(user)
except ValueError:
continue
print("--- %s seconds ---" % (time.time() - start_time))
count_users_time = time.time()
user_cache = []
result_list_dict = {}
print('Building username appearance totals...')
for item in user_list:
if item not in user_cache:
user_cache.append(item)
item_total = user_list.count(item)
if item not in result_list:
result_list_dict[item] = item_total
print("--- %s seconds ---" % (time.time() - count_users_time))
sort_time = time.time()
print('Sorting username appearance totals...')
results = sorted(result_list_dict.items(), key=lambda x: x[1], reverse=True)
print("--- %s seconds ---" % (time.time() - sort_time))
results_time = time.time()
print('Listing username appearance totals...')
if full_list:
for k, v in results:
print(f'User: {k} - Appearances:{v}')
else:
for k, v in results[:results_value]:
print(f'User: {k} - Appearances:{v}')
print("--- %s seconds ---" % (time.time() - results_time))