Untitled

mail@pastecode.io avatar
unknown
plain_text
a year ago
12 kB
1
Indexable
Never
#Necessary imports
import string
import pandas as pd
import regex as re
import datetime
import datefinder
import sys
import subprocess
import linecache
from itertools import islice
import time
import gzip
import os

# Declaring Files
agent_file='/Analytics/venv/Jup/CAPE_Apache_Beam/agent.properties'
bookmark_file='/Analytics/venv/Jup/CAPE_Apache_Beam/bookmark_log.properties'


# Get VM List from agent.properties file
def get_VMList():
    with open(agent_file,'rt') as file:
        for line in file:
            if 'VMList' in line:
                vm_list = str(line.split('='))
                vm_list = re.findall(r'\d+', vm_list)
                VMList = list(map(int, vm_list))
    return VMList
print("The VM List is : ",get_VMList())

VMList = get_VMList()


#Get number of lines to be copied
def get_log_block_size():
    
    with open(agent_file,'rt') as file:
        
        for line in file:
            if 'vmmetricskpi.log_block' in line:
                print("Getting the Log Block size")
                log_block_size=line.split('=')
                #print(log_block_size,type(log_block_size))
                no_of_lines=log_block_size[1]

    return no_of_lines

print("Number of Lines : ",get_log_block_size())

print("Getting Current Timestamp")
# Checking Current Timestamp
current_time = time.strftime("%Y-%m-%d-%H-%M-%S")
print(current_time)

# Getting All VM Details
def get_vm_details():
    All_VM_Details = []
    for vm in VMList:
        vm_id=vm
        with open(agent_file,'rt') as file:
            for line in file:
                if str(vm) + ".privatip" in line:
                    VM_Details = {vm_id:{'vm_ip':[],'vm_output_log_filename':[],'vm_log_enable':[],'vm_log_filename':[],'bookmark':[],'bookmark_ip':[],'bookmark_log':[]}};
                    vm_ip = str(line.split('='))
                    vm_ip = re.findall( r'[0-9]+(?:\.[0-9]+){3}', vm_ip)
                    vm_ip = list(map(str, vm_ip))
                    vm_ip = vm_ip[0]
                    VM_Details[vm_id]['vm_ip'].append(vm_ip)
                    bookmark_ip = vm_ip
                    VM_Details[vm_id]['bookmark_ip'].append(bookmark_ip)
                    bookmark_log=bookmark_ip + "_bookmark_log="
                    VM_Details[vm_id]['bookmark_log'].append(bookmark_log)

                if str(vm)+".vmmetricskpi.output_log.filename" in line:
                    vm_output_log_filename =line.split('=')
                    vm_output_log_filename= list(map(str,vm_output_log_filename))
                    vm_output_log_filename = vm_output_log_filename[1].strip()
                    VM_Details[vm_id]['vm_output_log_filename'].append(vm_output_log_filename)

                if str(vm)+".vmmetricskpi.log_enable" in line:
                    vm_log_enable =line.split('=')
                    vm_log_enable= list(map(str,vm_log_enable))
                    vm_log_enable = vm_log_enable[1].strip()
                    VM_Details[vm_id]['vm_log_enable'].append(vm_log_enable)
                
                if str(vm)+".vmmetricskpi.log.filename" in line:
                    vm_log_filename =line.split('=')
                    vm_log_filename= list(map(str,vm_log_filename))
                    vm_log_filename = vm_log_filename[1].strip()
                    bookmark=vm_log_filename.split("-")
                    bookmark= list(map(str,bookmark))
                    bookmark = bookmark[1].strip()
                    VM_Details[vm_id]['vm_log_filename'].append(vm_log_filename)
                    VM_Details[vm_id]['bookmark'].append(bookmark)
                    
        All_VM_Details.append(VM_Details)
        all_vm_details =dict()
        all_vm_details = zip(VMList, All_VM_Details)
        all_vm_details = dict(all_vm_details)
    return all_vm_details

print("VM Details : \n ",get_vm_details())  


def filter_log_data(input_file):
    print(f"Filtering data in {input_file}")
    # Define the list of important words
    important_words = ['unauthorized', 'error','cron','kernel error', 'OS error', 'rejected', 'warning', "error", "fail", "exception", "critical",
            "security", "authentication", "intrusion", "attack", "status", "performance", "uptime", "load", "config", "setting", "permission", "firewall",
            "debug", "trace", "stack"]
    
    # Define the output filename
    output_file = f"{input_file.replace('.txt', '-postgre.txt')}"

    # Open the input and output files
    with open(input_file, 'r') as input_file, open(output_file, 'w') as output_file:
        # Loop through each line in the input file
        for line in input_file:
            # Check if any of the important words appear in the line
            if any(word in line.lower() for word in important_words):
                # If yes, write the line to the output file
                output_file.write(line)

    # Set permissions on the output file
    os.chmod(output_file.name, 0o777)

    # Compress the output file and remove the uncompressed file
    with open(output_file.name, 'rb') as input_file, gzip.open(output_file.name + '.gz', 'wb') as output_file_gz:
        input_data = input_file.read()
        output_file_gz.write(input_data)

    os.remove(output_file.name)
    #os.remove(input_file)
    
    # Return the output filename
    return output_file.name + '.gz'


def copying_loglines_and_updating_bookmark(bookmark_record, syslog_record_no, i, log_block_size):
    print("The function with searching_copying_logs is getting called.")
    vm_id = i
    print("VM ID :", vm_id)
    bookmark_line = bookmark_record
    number_of_lines = log_block_size
    print("No of Lines to copy : ", number_of_lines)
    print("Bookmark Record", bookmark_record)
    old_line_number = syslog_record_no
    print('Old line number : ', old_line_number)
    last_line_no = int(old_line_number) + int(number_of_lines)
    print('New Line Number :', last_line_no)
    all_vm_details = get_vm_details()
    vm_ip = all_vm_details[vm_id][vm_id]['vm_ip'][0]
    vm_log_filename = all_vm_details[vm_id][vm_id]['vm_log_filename'][0]
    syslog_record = str(vm_ip) + "_bookmark_log="
    print(syslog_record)
    print("VM Log FIle : ", vm_log_filename)
    
    # Finding the line with index as line number
    last_line_output = linecache.getline(vm_log_filename, last_line_no)
    print("Content for nth line :", last_line_output)
    
    filename = f"{vm_ip}-T{current_time}.txt"
    print(filename)
    
    import itertools
    X = int(old_line_number)  # Starting line number
    print(f"Old Value is {X}")
    Y = int(last_line_no)  # Ending line number
    print(f'New Value is {Y}')
    
    with open(vm_log_filename, 'r') as input_file, open(filename, 'w') as output_file: ##Filepath change
        output_file.writelines(itertools.islice(input_file, X - 1, Y))
    
    # Updating the last index in a Bookmark File
    print("Updating the last index in a Bookmark File")
    bookmark_record = syslog_record
    print(f"Bookmark Record for {i} is {bookmark_record}")
    print(f"Updating Bookmark record for {bookmark_record}")
    updated_bookmark_rec = f"{bookmark_record}{last_line_no}\n"
    print(updated_bookmark_rec)
    
    with open(bookmark_file, 'r+') as file:
        lines = file.readlines()
        file.seek(0)
        for line_no, line in enumerate(lines):
            if bookmark_record in line:
                lines[line_no] = updated_bookmark_rec
        file.seek(0)
        file.writelines(lines)
        file.truncate()

    # Call filter_log_data to filter the copied data
    filtered_filename = filter_log_data(filename)
    
    # Compress the output file and remove the uncompressed file
    #print("Output File name is :",output_file.name)
    output_file_name = output_file.name
    new_output_file_name = output_file_name.replace('.txt', '-cassandra.txt')
    os.rename(output_file_name, new_output_file_name)
    #print("New File Name :",new_output_file_name)
    with open(new_output_file_name, 'rb') as input_file, gzip.open(new_output_file_name + '.gz', 'wb') as output_file_gz: 
        input_data = input_file.read()
        output_file_gz.write(input_data)


    os.remove(new_output_file_name)
    
    # Set permissions on the output file
    os.chmod(output_file_gz.name, 0o777)

    # Return the filtered filename
    return filtered_filename


    
    
#Performing Operations if Bookmark Record is found
def perform_functions_with_bookmark_record(bookmark_line,i):
    print("The function with perform_functions_with_bookmark_record is getting called.")
    vm_id=i
    print("VM ID :",vm_id)
    bookmark_record=bookmark_line
    number_of_lines=get_log_block_size()
    print("No of Lines to copy : ",number_of_lines)
    print("Bookmark Record",bookmark_record)
    all_vm_details = get_vm_details()
    vm_ip = all_vm_details[vm_id][vm_id]['vm_ip'][0]
    vm_log_filename=all_vm_details[vm_id][vm_id]['vm_log_filename'][0]
    print("VM Log FIle : ",vm_log_filename)
    vm_output_directory_loc=all_vm_details[vm_id][vm_id]['vm_output_log_filename'][0]
    print("VM Output Directory : ",vm_output_directory_loc)
    bookmark_log=all_vm_details[vm_id][vm_id]['bookmark_log'][0]
    syslog_record = bookmark_record.split(str(vm_ip)+'_bookmark_log=')
    syslog_record_no=syslog_record[1]
    print("Syslog Record number for VM {} is {}" .format(i,syslog_record_no))
    copying_loglines_and_updating_bookmark(bookmark_record,syslog_record_no,i,number_of_lines)
    

 # Checking if Bookmark Record is found in RUntime_log.properties
def check_bookmark_record(i):
    vm_id=i
    all_vm_details = get_vm_details()
    vm_ip = all_vm_details[vm_id][vm_id]['vm_ip'][0] ##Access List Element without Quotes
    print(vm_ip) 
    line_to_search = str(vm_ip)+"_bookmark_log="
    print("Bookmark Log : ",line_to_search)
    with open(bookmark_file, "r") as f:
        lines = f.readlines()
        filtered_lines = filter(lambda x: line_to_search in x, lines)
        filtered_lines = [line for line in filtered_lines]
        if filtered_lines:
            for bookmark_line in filtered_lines:
                print(f"Bookmark Log Record found: {bookmark_line}")
                print("Redirecting to perform_functions_with_bookmark_record") 
                perform_functions_with_bookmark_record(bookmark_line,i)
        else:
            ## Making changes here
            bookmark_index=1
            print("Line to search",line_to_search)
            bookmark_line =str(line_to_search) + str(bookmark_index)
            print("Bookmark_log : ", bookmark_line)
            print("No line found containing the specified string: ")
            print("Bookmark Log Record not found.Creating a record and redirecting to syslog File Location")
            with open(bookmark_file, 'a') as file:
                file.write("\n")
                file.write(bookmark_line)
            perform_functions_with_bookmark_record(bookmark_line,i)       
    return None     


#Checking log enable condition is Yes or No                   
def check_log_enable():
    VMList = get_VMList()

    #print(VMList)
    #print("VM Details : ",get_vm_details())
    
    for i in VMList:

        vm_id=i

        all_vm_details = get_vm_details()

        if all_vm_details[vm_id][vm_id]['vm_log_enable']==['y']:

            print(f"Log enable condition is yes.Lets proceed further to check Bookmark Function",vm_id)
            check_bookmark_record(vm_id)      ## Call Function
        else:
            print(f"Log enable condition is no. No action Needed." ,vm_id)
            continue

check_log_enable()