Untitled

import xlwings as xw
import datetime
import re
import pandas as pd
import numpy as np
from openpyxl.utils import range_boundaries as rb, get_column_letter


#@title Imports
import os,string,openpyxl, os,pandas as pd,json
from openpyxl import Workbook

from componentExtractor import ComponentsManager


class ExcelSheetManager:
    def __init__(self, file_path,load_openpyxl=False,dataOrFunction='L'):
        self.file_path = file_path
        self.workbook = None
        self.sheet_names = []
        self.active_sheet = None
        self.data = None

        # Check if file exists
        if os.path.exists(file_path) and load_openpyxl and dataOrFunction=='L':
            self.workbook = openpyxl.load_workbook(file_path,data_only=True)
            self.sheet_names = self.workbook.sheetnames
        elif os.path.exists(file_path) and load_openpyxl and dataOrFunction=='R':
            self.workbook = openpyxl.load_workbook(file_path,data_only=False)
            self.sheet_names = self.workbook.sheetnames
        else:
          print("reading as pd.ExcelFile")
          self.data = pd.ExcelFile(file_path)
    def add_sheet(self,name):
      self.workbook.create_sheet(title=name)
      self.sheet_names = self.workbook.sheetnames
    def get_sheet(self, sheet_name):
        if sheet_name in self.sheet_names:
            return self.workbook[sheet_name]
        else:
            raise ValueError(f"Sheet '{sheet_name}' not found in the workbook.")

    def read_cell_value(self, sheet_name, cell):
        sheet = self.get_sheet(sheet_name)
        return sheet[cell].value if cell in sheet else None

    def set_cell_value(self, sheet_name, cell, value):
        sheet = self.get_sheet(sheet_name)
        if cell in sheet:
            sheet[cell].value = value
        else:
            raise ValueError(f"Cell '{cell}' not found in sheet '{sheet_name}'.")

    def activate_sheet(self, sheet_name):
        self.active_sheet = self.get_sheet(sheet_name)

    def change_activate_sheet(self, sheet_name):
        self.activate_sheet(sheet_name)

    def save_changes(self,name):
        if self.workbook:
            self.workbook.save(name+'.xlsx')

    def read_data(self,sheetname):
      # worksheet = self.workbook[sheetname]
      max_row, max_column = self.get_sheet_size(sheetname)
      max_column = max_column - 1 #from some reason one column added to my data..
      range_str = f"A1:{ExcelSheetManager.generate_column_name(max_column)}{max_row}"
      # print(range_str)
      return self.get_cells_data(sheetname,range_str,False)
    def get_sheet_size(self, sheet_name):
        sheet = self.get_sheet(sheet_name)
        max_row = sheet.max_row
        max_col = sheet.max_column
        return max_row, max_col
    @staticmethod
    def generate_column_name(index:int):
      """
    Function Use : Generate Excel-style column name

    Parameters:
        index (int): Column index .

    Returns:
        str: Excel-style column name.
      """
      column_name = ""
      while index >= 0:
          column_name = string.ascii_uppercase[index % 26] + column_name
          index = index // 26 - 1
          if index < 0:
              break
      return column_name
    def reload_workbook(self):
        if self.workbook:
          self.workbook.save(self.file_path)
          self.workbook = openpyxl.load_workbook(self.file_path, data_only=True)  # Load with data_only=True to get values

    @staticmethod
    def create_empty_table(rows, cols):
        assert isinstance(rows, int) and rows > 0, "Number of rows must be a positive integer."
        assert isinstance(cols, int) and cols > 0, "Number of columns must be a positive integer."
        columns = [ExcelSheetManager.generate_column_name(i) for i in range(cols)]

        empty_table = pd.DataFrame(index=range(rows), columns=columns)
        return empty_table

    def get_cells_data(self, sheet_name, range_str, return_formula=False):
      start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str)
      
      start_col_index = openpyxl.utils.cell.column_index_from_string(start_col_str)
      end_col_index = openpyxl.utils.cell.column_index_from_string(end_col_str)

      num_rows = end_row - start_row + 1
      num_cols = end_col_index - start_col_index + 1
      max_rows,max_columns = self.get_sheet_size(sheet_name)
      assert num_rows <= max_rows or num_cols <= max_columns ,"Number of rows cols are bigger then can be "
      df = self.create_empty_table(num_rows, num_cols)
      sheet = self.get_sheet(sheet_name)

      for i, row in enumerate(sheet.iter_rows(min_row=start_row, max_row=end_row, min_col=start_col_index, max_col=end_col_index)):
          for j, cell in enumerate(row):
              if return_formula and cell.data_type == 'f':
                  df.iloc[i, j] = cell.value
              else:
                  df.iloc[i, j] = cell.value
      return df
    @staticmethod
    def create_empty_table_from_range(range_str):
      start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str)

      start_col_index = ExcelSheetManager.column_index_from_string(start_col_str)
      end_col_index = ExcelSheetManager.column_index_from_string(end_col_str)

      row_labels = range(start_row, end_row + 1)
      col_labels = [ExcelSheetManager.generate_column_name(i) for i in range(start_col_index, end_col_index + 1)]

      empty_table = pd.DataFrame(index=row_labels, columns=col_labels)
      return empty_table

    @staticmethod
    def column_index_from_string(col_str):
      num = 0
      for c in col_str:
        if c.isalpha():
            num = num * 26 + (ord(c.upper()) - ord('A')) + 1
      return num - 1
    @staticmethod
    def split_cell_string(cell_str):
      col_str = ''.join(filter(str.isalpha, cell_str))
      row = ''.join(filter(str.isdigit, cell_str))
      return col_str, int(row)

    @staticmethod
    def split_range_cells_string(range_str):
      start_cell, end_cell = range_str.split(":")
      start_col_str, start_row = ExcelSheetManager.split_cell_string(start_cell)
      end_col_str, end_row = ExcelSheetManager.split_cell_string(end_cell)
      return start_col_str, start_row, end_col_str, end_row

def get_dates_for_next_year():
    dates = []
    # current_date = datetime.now().replace(day=1)  # Set to the first day of the current month
    current_date = datetime.now().replace(day=1)  # Set to the first day of the current month
    current_date = datetime(current_date.year-1,current_date.month,1)
    for _ in range(12):
        dates.append(current_date.strftime("%Y-%m-%d"))
        next_month = (current_date.month % 12) + 1
        year_increment = (current_date.month // 12)
        if next_month == 1:  # Increment year if next month is January
            year_increment = 1
        else:
            year_increment = 0
        current_date = current_date.replace(year=current_date.year + year_increment, month=next_month)
    return dates

def get_first_days_of_past_12_months():
        dates = []
        current_date = datetime.now().replace(day=1)  # Set to the first day of the current month
        for _ in range(12):
            dates.append(current_date.strftime("%Y-%m-%d"))
            prev_month = (current_date.month - 1) if current_date.month > 1 else 12
            year_decrement = -1 if current_date.month == 1 else 0
            current_date = current_date.replace(year=current_date.year + year_decrement, month=prev_month)
        dates.sort()  # Sort the dates in ascending order
        return dates


class ExcelCopyPasteManager:
    def __init__(self, src_file, src_sheet_name, dest_file, dest_sheet_name):
        self.src_file = src_file
        self.src_sheet_name = src_sheet_name
        self.dest_file = dest_file
        self.dest_sheet_name = dest_sheet_name

        try:
            self.src_wb = xw.Book(self.src_file)
            self.src_sheet = self.src_wb.sheets[self.src_sheet_name]

            self.dest_wb = xw.Book(self.dest_file)
            self.dest_sheet = self.dest_wb.sheets[self.dest_sheet_name]
        except Exception as e:
            print(f"Error initializing Excel files: {e}")
            raise
    

    def setNameDest(self,dest_sheet_name):
        try:
            self.dest_sheet = self.dest_wb.sheets[dest_sheet_name]
        except Exception as e:
            print(f"Error initializing Excel files: {e}")
            raise
    def disposeSrc(self):
        self.src_wb.close()
    def disposeDest(self):    
        self.dest_wb.close()
    def saveStateSrc(self):
        self.src_wb.save()
    def saveStateDest(self):
        self.dest_wb.save()
    @staticmethod
    def get_column_letter(column_index):
        return get_column_letter(column_index)
    @staticmethod
    def split_range_str_for_check_ends(range_str):
        range_str += '0'
        minC, minR, maxC, _ = rb(range_str)
        minC = get_column_letter(minC)
        maxC = get_column_letter(maxC)
        print(f"{minC}{minR}:{maxC}?")
        return minC, minR, maxC
    @staticmethod
    def split_range_str(range_str):
        minC, minR, maxC, maxR = rb(range_str)
        minC = get_column_letter(minC)
        maxC = get_column_letter(maxC)
        return minC, minR, maxC, maxR
    @staticmethod
    def indexes_range_str(range_str):
        minC, minR, maxC, maxR = rb(range_str)
        print(minC, minR, maxC, maxR)
        return minC, minR, maxC, maxR

    def where_ends(self, worksheet, startC, startR, endC):
        """
        Determine where the data ends in a given column
        """
        count = startR
        while worksheet.range(f"{startC}{count}").value is not None:
            count += 1
        future_endR = count - 1
        print(f"{startC}{startR}:{endC}{future_endR}")
        return f"{startC}{startR}:{endC}{future_endR}"

    def check(self, manager_excel, worksheet, range_str, to_save=False):
        startC, startR, endC = self.split_range_str_for_check_ends(range_str)[:3]
        future_end_range = self.where_ends(worksheet, startC, startR, endC)
        print(future_end_range)
        if to_save:
            manager_excel.save(f"{self.dest_file}_backup_{datetime.datetime.now().date()}.xlsx")
        return future_end_range
    def check_if_need_clean_data_to_nulls(self, res_range_str1, res_range_str2):
        """
        Check if data in one range extends beyond another
        """
        _, _, _, maxR1 = self.split_range_str(res_range_str1)
        minC2, _, maxC2, maxR2 = self.split_range_str(res_range_str2)
        if maxR1 > maxR2:
            return None
        else:
            # print(f"{minC2}{maxR1}:{maxC2}{maxR2}")
            return f"{minC2}{maxR1}:{maxC2}{maxR2}"

    def copy_range(self, cell_range):
        """
        Copy data from the source range to the destination
        """
        min_col, min_row, max_col, max_row = self.indexes_range_str(cell_range)
  
        src_range = self.src_sheet.range(f"{self.get_column_letter(min_col)}{min_row}:{self.get_column_letter(max_col)}{max_row}")
        dest_range = self.dest_sheet.range(f"{self.get_column_letter(min_col)}{min_row}:{self.get_column_letter(max_col)}{max_row}")

        dest_range.value = src_range.value

        timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
        self.dest_wb.save(f"dest_file_{timestamp}.xlsx")
    def clear_range(self,sheet, range_str):
        sheet.range(range_str).clear_contents()
    def applyValue(self, sheet,place,value):
        sheet.range(place).value = value
    def apply_formula(self, sheet,place,formula):
        sheet.range(place).formula = formula
    def apply_expanded_formula(self, sheet, start_cell, formula):
        """
        for unique , transpose .. etc
        """
        sheet.range(start_cell).formula2 = formula
    def check_formula(self,sheet, cell):
        formula = sheet.range(cell).formula
        print(f"Formula in {cell}: {formula}")
    def insertDfFromFirstCellReference(self,sheet,cell,df):
        sheet.range(cell).value = df
    def readCellsValue(self,sheet,cell):
        """
        1 cell or range..(and return a list) 
        """
        return sheet.range(cell).value
    
    def createNamedRange(self,sheet,cells,name):
        sheet.range(cells).name = name
    def get_range_data_as_dataframe(self,sheet,cell):
        return sheet.range(cell).options(pd.DataFrame,expand='table').value
    def getRangeTable(self,sheet,theRange,directions=None):
        direction_map = {
            'r': 'right',
            'l': 'left',
            'u': 'up',
            'd': 'down'
        }
        if directions != None:
            result_range = sheet.range(theRange)
            for direction in directions:
                if direction.lower() in direction_map:
                    result_range = result_range.expand(direction_map[direction.lower()])
                else:
                    print(f"Warning: Invalid direction '{direction}' ignored.")
        else:
            result_range = sheet.range(theRange)
        range_address = result_range.address
        range_str = range_address.replace("$", "")
        minC, minR, maxC, maxR = self.split_range_str(range_str)
        return result_range.value , range_address , f"{minC}{minR}:{maxC}{maxR}"


    def pipe1(self, range_str):
        """
        Perform the copy-paste operation and clear the destination range
        """
        futureDestRange = self.check(self.dest_wb, self.dest_sheet, range_str, to_save=False)
        startC, startR, endC, maxR = self.split_range_str(futureDestRange)
        rangeDest = f"{startC}{startR}:{endC}{maxR}"

        futureSrcRange = self.check(self.src_wb, self.src_sheet, range_str, to_save=False)
        startC, startR, endC, maxR = self.split_range_str(futureSrcRange)
        rangeSrc = f"{startC}{startR}:{endC}{maxR}"

        dataSrc,_,_ = self.getRangeTable(self.src_sheet,rangeSrc)

        print(f"The expanded fixed range for Src is: {rangeSrc}")
        print(f"The expanded fixed range for dest is: {rangeDest}")
        self.clear_range(self.dest_sheet,rangeDest)
        self.insertDfFromFirstCellReference(self.dest_sheet,rangeSrc,dataSrc)
        return startC, startR, endC, maxR
    
    def pipe2(self,maxR):
        _,_,datesToClean = self.getRangeTable(self.dest_sheet,"AB6","r")
        _, _, maxC, _ = self.split_range_str(datesToClean)
        print(maxC)

        self.clear_range(self.dest_sheet,datesToClean)

        _,_,contentTableToClean = self.getRangeTable(self.dest_sheet,"AB7","dr")
        self.clear_range(self.dest_sheet,contentTableToClean)

        z = [['AB6',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'],
             ['AA7',f"=UNIQUE(A7:A{maxR})"], # no need .. always the same
             ['AB33',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'],
             ]
        for i in range(len(z)):
            self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1])

        self.clear_range(self.dest_sheet,f"O7:W{maxR}")
        #ranges to update with formulas
        ranges_to_update = [[f'O7:O{maxR}','=A7'],
             [f'P7:P{maxR}','=M7'],
             [f'Q7:Q{maxR}',"""=IF(COUNTIF('helper Renewal & Move Ins'!$A:$A,M7)>0,VLOOKUP(M7,'helper Renewal & Move Ins'!A$2:B$118,2,FALSE),IF(ISBLANK(M7),"Unclaimed"))"""],
             [f'R7:R{maxR}',"=DATE(YEAR(I7),MONTH(I7),1)"],
             [f'T7:T{maxR}',"""=IF(D7 = "Lease Signed",1,IF(D7 = "Lease Renewal",0,""))"""],
             [f'U7:U{maxR}',"=MONTH(I7)"],
             [f"V7:V{maxR}","=YEAR(I7)"],
             [f"W7:W{maxR}","=DATE(V7,U7,1)"],
             [f'AB7:{maxC}28',f"=COUNTIFS($A$7:$A{maxR},$AA7,$W$7:$W{maxR},AB$6,$T$7:$T{maxR},0)"],
             ]
        for i in range(len(ranges_to_update)):
            self.apply_formula(self.dest_sheet,ranges_to_update[i][0],ranges_to_update[i][1])


        z = [['AB6',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'],
             ['AA7',f"=UNIQUE(A7:A{maxR})"], # no need .. always the same
             ['AB33',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'],
             ]
        for i in range(len(z)):
            self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1])
    def pipe3(self):
        self.clear_range(self.dest_sheet,"A1:W1000")
        # WHAT ABOUT COLUMN A + B .. nati !
        z = [["I1","=UNIQUE(B2:B1000)"],
             ["J1","=UNIQUE('Renewal & Move Ins actual'!O7:Q5000)"]]
        for i in range(len(z)):
            self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1])

        self.setNameDest("helper Renewal & Move Ins 2")


        z = [["A2","UNIQUE('Renewal & Move Ins actual'!O7:P3000)"],
            [f"J1",f"=UNIQUE(G1:H145)"]]
        for i in range(len(z)):
            self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1])


        _,_,columnProperties = self.getRangeTable(self.dest_sheet,"A2","d")
        _, _, _, maxR = self.split_range_str(columnProperties)

        a = [[f"C2:C{maxR}",f"""=IF(ISBLANK(B2),"Unclaimed",VLOOKUP(B2, 'helper Renewal & Move Ins'!$A$2:$B$500, 2, FALSE))"""],
             [f"G2:G{maxR}",f"=A2"],
             [f"H2:H{maxR}",f"=IF(ISBLANK(G2),"",C2)"]
            ]
        for i in range(len(a)):
            self.apply_formula(self.dest_sheet,a[i][0],a[i][1])
        return maxR

    def pipe4(self,maxR3):
        z = [[f"AA34:AA{maxR3}",f"='helper Renewal & Move Ins 2'!K2"],
             [f"Z34:Z{maxR3}",f"='helper Renewal & Move Ins 2'!J2"],
             [f"Y34:Y{maxR3}",f"=Z34&","&AA34"],]

        for i in range(len(z)):
            self.apply_formula(self.dest_sheet,z[i][0],z[i][1])


        _,_,datesRange = self.getRangeTable(self.dest_sheet,"AB33","r")
        _, _, maxCdates, _ = self.split_range_str(datesRange)

        _,_,contentTableToClean = self.getRangeTable(self.dest_sheet,"AB34","dr")
        self.clear_range(self.dest_sheet,contentTableToClean)


        _,_,contentRange = self.getRangeTable(self.dest_sheet,"AA34","d")
        _, _, _, maxR = self.split_range_str(contentRange)


        ranges_to_update = [[f"AB34:{maxCdates}{maxR}",f"=COUNTIFS($O:$O,$Z34,$Q:$Q,$AA34,$R:$R,AB$33)"],
                     ]
        for i in range(len(ranges_to_update)):
            self.apply_formula(self.dest_sheet,ranges_to_update[i][0],ranges_to_update[i][1])
        return maxR
    def pipe5(self , maxR):
        z = [[f"BE34:BE{maxR}",f"=AA34"],
             [f"BD34:BD{maxR}",f"=Z34"],
             [f"BC34:BC{maxR}",f"=BD34&","&BE34"],]

        for i in range(len(z)):
            self.apply_formula(self.dest_sheet,z[i][0],z[i][1])


        _,_,contentTableToClean = self.getRangeTable(self.dest_sheet,"BF34","dr")
        self.clear_range(self.dest_sheet,contentTableToClean)


        ranges_to_update = [[f"BF34:BQ{maxR}",f"=IFERROR(INDEX($AB$34:$AI${maxR},MATCH($BC34,$Y$34:$Y${maxR},0),MATCH(BF$33,$AB$33:$AI$33,0)),"-")"],
                     ]
        for i in range(len(ranges_to_update)):
            self.apply_formula(self.dest_sheet,ranges_to_update[i][0],ranges_to_update[i][1])

    def pipe6(self,maxR1):
        z = [[f"BT7:BT{maxR1}",f"""=IF($H7="","1/1/1900",TEXT($H7,"m/d/yyyy"))"""],
             [f"BS7:BS{maxR1}",f"=DATEVALUE($BT7:$BT1884)"],]

        for i in range(len(z)):
            self.apply_formula(self.dest_sheet,z[i][0],z[i][1])
    
    def pipeAll(self):
        #coping from M_O_pull_all_leases_v2 to AM Reports the main table.
        _, _, _, maxR1 =excelCP.pipe1(range_str='A7:M')

        excelCP.pip2(maxR1)

        excelCP.setNameDest("helper Renewal & Move Ins")
        maxR3 = excelCP.pipe3()

        excelCP.setNameDest("Renewal-Move-Ins Goals&Actual")
        maxR = excelCP.pipe4(maxR3)

        excelCP.pipe5(maxR)

        excelCP.pipe6(maxR1)


excelCP = ExcelCopyPasteManager('M_O_pull_all_leases_v2.xlsx', 'Report1', 'AM Reports.xlsx', 'Renewal-Move-Ins Goals&Actual')


excelCP.pipeAll()
Editor is loading...