Untitled
unknown
plain_text
2 years ago
4.4 kB
1
Indexable
Never
def filter_log_data(input_file): print(f"Filtering data in {input_file}") # Define the list of important words important_words = ['unauthorized', 'error','cron','kernel error', 'OS error', 'rejected', 'warning', "error", "fail", "exception", "critical", "security", "authentication", "intrusion", "attack", "status", "performance", "uptime", "load", "config", "setting", "permission", "firewall", "debug", "trace", "stack"] # Define the output filename output_file = f"{input_file.replace('.txt', '-postgre.txt')}" # Open the input and output files with open(input_file, 'r') as input_file, open(output_file, 'w') as output_file: # Loop through each line in the input file for line in input_file: # Check if any of the important words appear in the line if any(word in line.lower() for word in important_words): # If yes, write the line to the output file output_file.write(line) # Set permissions on the output file os.chmod(output_file.name, 0o777) # Compress the output file and remove the uncompressed file with open(output_file.name, 'rb') as input_file, gzip.open(output_file.name + '.gz', 'wb') as output_file_gz: input_data = input_file.read() output_file_gz.write(input_data) os.remove(output_file.name) #os.remove(input_file) # Return the output filename return output_file.name + '.gz' def copying_loglines_and_updating_bookmark(bookmark_record, syslog_record_no, i, log_block_size): print("The function with searching_copying_logs is getting called.") vm_id = i print("VM ID :", vm_id) bookmark_line = bookmark_record number_of_lines = log_block_size print("No of Lines to copy : ", number_of_lines) print("Bookmark Record", bookmark_record) old_line_number = syslog_record_no print('Old line number : ', old_line_number) last_line_no = int(old_line_number) + int(number_of_lines) print('New Line Number :', last_line_no) all_vm_details = get_vm_details() vm_ip = all_vm_details[vm_id][vm_id]['vm_ip'][0] vm_log_filename = all_vm_details[vm_id][vm_id]['vm_log_filename'][0] syslog_record = str(vm_ip) + "_bookmark_log=" print(syslog_record) print("VM Log FIle : ", vm_log_filename) # Finding the line with index as line number last_line_output = linecache.getline(vm_log_filename, last_line_no) print("Content for nth line :", last_line_output) filename = f"{vm_ip}-T{current_time}.txt" print(filename) import itertools X = int(old_line_number) # Starting line number print(f"Old Value is {X}") Y = int(last_line_no) # Ending line number print(f'New Value is {Y}') with open(vm_log_filename, 'r') as input_file, open(filename, 'w') as output_file: ##Filepath change output_file.writelines(itertools.islice(input_file, X - 1, Y)) # Updating the last index in a Bookmark File print("Updating the last index in a Bookmark File") bookmark_record = syslog_record print(f"Bookmark Record for {i} is {bookmark_record}") print(f"Updating Bookmark record for {bookmark_record}") updated_bookmark_rec = f"{bookmark_record}{last_line_no}\n" print(updated_bookmark_rec) with open(bookmark_file, 'r+') as file: lines = file.readlines() file.seek(0) for line_no, line in enumerate(lines): if bookmark_record in line: lines[line_no] = updated_bookmark_rec file.seek(0) file.writelines(lines) file.truncate() # Call filter_log_data to filter the copied data filtered_filename = filter_log_data(filename) # Compress the output file and remove the uncompressed file output_file.name = output_file.name.replace('.txt','.cassandra.txt') with open(output_file.name, 'rb') as input_file, gzip.open(output_file.name + '.gz', 'wb') as output_file_gz: input_data = input_file.read() output_file_gz.write(input_data) os.remove(output_file.name) # Set permissions on the output file os.chmod(output_file_gz.name, 0o777) # Return the filtered filename return filtered_filename