Untitled
unknown
plain_text
a year ago
20 kB
6
Indexable
import xlwings as xw import datetime import re import pandas as pd import numpy as np from openpyxl.utils import range_boundaries as rb, get_column_letter #@title Imports import os,string,openpyxl, os,pandas as pd,json from openpyxl import Workbook from componentExtractor import ComponentsManager class ExcelSheetManager: def __init__(self, file_path,load_openpyxl=False,dataOrFunction='L'): self.file_path = file_path self.workbook = None self.sheet_names = [] self.active_sheet = None self.data = None # Check if file exists if os.path.exists(file_path) and load_openpyxl and dataOrFunction=='L': self.workbook = openpyxl.load_workbook(file_path,data_only=True) self.sheet_names = self.workbook.sheetnames elif os.path.exists(file_path) and load_openpyxl and dataOrFunction=='R': self.workbook = openpyxl.load_workbook(file_path,data_only=False) self.sheet_names = self.workbook.sheetnames else: print("reading as pd.ExcelFile") self.data = pd.ExcelFile(file_path) def add_sheet(self,name): self.workbook.create_sheet(title=name) self.sheet_names = self.workbook.sheetnames def get_sheet(self, sheet_name): if sheet_name in self.sheet_names: return self.workbook[sheet_name] else: raise ValueError(f"Sheet '{sheet_name}' not found in the workbook.") def read_cell_value(self, sheet_name, cell): sheet = self.get_sheet(sheet_name) return sheet[cell].value if cell in sheet else None def set_cell_value(self, sheet_name, cell, value): sheet = self.get_sheet(sheet_name) if cell in sheet: sheet[cell].value = value else: raise ValueError(f"Cell '{cell}' not found in sheet '{sheet_name}'.") def activate_sheet(self, sheet_name): self.active_sheet = self.get_sheet(sheet_name) def change_activate_sheet(self, sheet_name): self.activate_sheet(sheet_name) def save_changes(self,name): if self.workbook: self.workbook.save(name+'.xlsx') def read_data(self,sheetname): # worksheet = self.workbook[sheetname] max_row, max_column = self.get_sheet_size(sheetname) max_column = max_column - 1 #from some reason one column added to my data.. range_str = f"A1:{ExcelSheetManager.generate_column_name(max_column)}{max_row}" # print(range_str) return self.get_cells_data(sheetname,range_str,False) def get_sheet_size(self, sheet_name): sheet = self.get_sheet(sheet_name) max_row = sheet.max_row max_col = sheet.max_column return max_row, max_col @staticmethod def generate_column_name(index:int): """ Function Use : Generate Excel-style column name Parameters: index (int): Column index . Returns: str: Excel-style column name. """ column_name = "" while index >= 0: column_name = string.ascii_uppercase[index % 26] + column_name index = index // 26 - 1 if index < 0: break return column_name def reload_workbook(self): if self.workbook: self.workbook.save(self.file_path) self.workbook = openpyxl.load_workbook(self.file_path, data_only=True) # Load with data_only=True to get values @staticmethod def create_empty_table(rows, cols): assert isinstance(rows, int) and rows > 0, "Number of rows must be a positive integer." assert isinstance(cols, int) and cols > 0, "Number of columns must be a positive integer." columns = [ExcelSheetManager.generate_column_name(i) for i in range(cols)] empty_table = pd.DataFrame(index=range(rows), columns=columns) return empty_table def get_cells_data(self, sheet_name, range_str, return_formula=False): start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str) start_col_index = openpyxl.utils.cell.column_index_from_string(start_col_str) end_col_index = openpyxl.utils.cell.column_index_from_string(end_col_str) num_rows = end_row - start_row + 1 num_cols = end_col_index - start_col_index + 1 max_rows,max_columns = self.get_sheet_size(sheet_name) assert num_rows <= max_rows or num_cols <= max_columns ,"Number of rows cols are bigger then can be " df = self.create_empty_table(num_rows, num_cols) sheet = self.get_sheet(sheet_name) for i, row in enumerate(sheet.iter_rows(min_row=start_row, max_row=end_row, min_col=start_col_index, max_col=end_col_index)): for j, cell in enumerate(row): if return_formula and cell.data_type == 'f': df.iloc[i, j] = cell.value else: df.iloc[i, j] = cell.value return df @staticmethod def create_empty_table_from_range(range_str): start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str) start_col_index = ExcelSheetManager.column_index_from_string(start_col_str) end_col_index = ExcelSheetManager.column_index_from_string(end_col_str) row_labels = range(start_row, end_row + 1) col_labels = [ExcelSheetManager.generate_column_name(i) for i in range(start_col_index, end_col_index + 1)] empty_table = pd.DataFrame(index=row_labels, columns=col_labels) return empty_table @staticmethod def column_index_from_string(col_str): num = 0 for c in col_str: if c.isalpha(): num = num * 26 + (ord(c.upper()) - ord('A')) + 1 return num - 1 @staticmethod def split_cell_string(cell_str): col_str = ''.join(filter(str.isalpha, cell_str)) row = ''.join(filter(str.isdigit, cell_str)) return col_str, int(row) @staticmethod def split_range_cells_string(range_str): start_cell, end_cell = range_str.split(":") start_col_str, start_row = ExcelSheetManager.split_cell_string(start_cell) end_col_str, end_row = ExcelSheetManager.split_cell_string(end_cell) return start_col_str, start_row, end_col_str, end_row def get_dates_for_next_year(): dates = [] # current_date = datetime.now().replace(day=1) # Set to the first day of the current month current_date = datetime.now().replace(day=1) # Set to the first day of the current month current_date = datetime(current_date.year-1,current_date.month,1) for _ in range(12): dates.append(current_date.strftime("%Y-%m-%d")) next_month = (current_date.month % 12) + 1 year_increment = (current_date.month // 12) if next_month == 1: # Increment year if next month is January year_increment = 1 else: year_increment = 0 current_date = current_date.replace(year=current_date.year + year_increment, month=next_month) return dates def get_first_days_of_past_12_months(): dates = [] current_date = datetime.now().replace(day=1) # Set to the first day of the current month for _ in range(12): dates.append(current_date.strftime("%Y-%m-%d")) prev_month = (current_date.month - 1) if current_date.month > 1 else 12 year_decrement = -1 if current_date.month == 1 else 0 current_date = current_date.replace(year=current_date.year + year_decrement, month=prev_month) dates.sort() # Sort the dates in ascending order return dates class ExcelCopyPasteManager: def __init__(self, src_file, src_sheet_name, dest_file, dest_sheet_name): self.src_file = src_file self.src_sheet_name = src_sheet_name self.dest_file = dest_file self.dest_sheet_name = dest_sheet_name try: self.src_wb = xw.Book(self.src_file) self.src_sheet = self.src_wb.sheets[self.src_sheet_name] self.dest_wb = xw.Book(self.dest_file) self.dest_sheet = self.dest_wb.sheets[self.dest_sheet_name] except Exception as e: print(f"Error initializing Excel files: {e}") raise def setNameDest(self,dest_sheet_name): try: self.dest_sheet = self.dest_wb.sheets[dest_sheet_name] except Exception as e: print(f"Error initializing Excel files: {e}") raise def disposeSrc(self): self.src_wb.close() def disposeDest(self): self.dest_wb.close() def saveStateSrc(self): self.src_wb.save() def saveStateDest(self): self.dest_wb.save() @staticmethod def get_column_letter(column_index): return get_column_letter(column_index) @staticmethod def split_range_str_for_check_ends(range_str): range_str += '0' minC, minR, maxC, _ = rb(range_str) minC = get_column_letter(minC) maxC = get_column_letter(maxC) print(f"{minC}{minR}:{maxC}?") return minC, minR, maxC @staticmethod def split_range_str(range_str): minC, minR, maxC, maxR = rb(range_str) minC = get_column_letter(minC) maxC = get_column_letter(maxC) return minC, minR, maxC, maxR @staticmethod def indexes_range_str(range_str): minC, minR, maxC, maxR = rb(range_str) print(minC, minR, maxC, maxR) return minC, minR, maxC, maxR def where_ends(self, worksheet, startC, startR, endC): """ Determine where the data ends in a given column """ count = startR while worksheet.range(f"{startC}{count}").value is not None: count += 1 future_endR = count - 1 print(f"{startC}{startR}:{endC}{future_endR}") return f"{startC}{startR}:{endC}{future_endR}" def check(self, manager_excel, worksheet, range_str, to_save=False): startC, startR, endC = self.split_range_str_for_check_ends(range_str)[:3] future_end_range = self.where_ends(worksheet, startC, startR, endC) print(future_end_range) if to_save: manager_excel.save(f"{self.dest_file}_backup_{datetime.datetime.now().date()}.xlsx") return future_end_range def check_if_need_clean_data_to_nulls(self, res_range_str1, res_range_str2): """ Check if data in one range extends beyond another """ _, _, _, maxR1 = self.split_range_str(res_range_str1) minC2, _, maxC2, maxR2 = self.split_range_str(res_range_str2) if maxR1 > maxR2: return None else: # print(f"{minC2}{maxR1}:{maxC2}{maxR2}") return f"{minC2}{maxR1}:{maxC2}{maxR2}" def copy_range(self, cell_range): """ Copy data from the source range to the destination """ min_col, min_row, max_col, max_row = self.indexes_range_str(cell_range) src_range = self.src_sheet.range(f"{self.get_column_letter(min_col)}{min_row}:{self.get_column_letter(max_col)}{max_row}") dest_range = self.dest_sheet.range(f"{self.get_column_letter(min_col)}{min_row}:{self.get_column_letter(max_col)}{max_row}") dest_range.value = src_range.value timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M") self.dest_wb.save(f"dest_file_{timestamp}.xlsx") def clear_range(self,sheet, range_str): sheet.range(range_str).clear_contents() def applyValue(self, sheet,place,value): sheet.range(place).value = value def apply_formula(self, sheet,place,formula): sheet.range(place).formula = formula def apply_expanded_formula(self, sheet, start_cell, formula): """ for unique , transpose .. etc """ sheet.range(start_cell).formula2 = formula def check_formula(self,sheet, cell): formula = sheet.range(cell).formula print(f"Formula in {cell}: {formula}") def insertDfFromFirstCellReference(self,sheet,cell,df): sheet.range(cell).value = df def readCellsValue(self,sheet,cell): """ 1 cell or range..(and return a list) """ return sheet.range(cell).value def createNamedRange(self,sheet,cells,name): sheet.range(cells).name = name def get_range_data_as_dataframe(self,sheet,cell): return sheet.range(cell).options(pd.DataFrame,expand='table').value def getRangeTable(self,sheet,theRange,directions=None): direction_map = { 'r': 'right', 'l': 'left', 'u': 'up', 'd': 'down' } if directions != None: result_range = sheet.range(theRange) for direction in directions: if direction.lower() in direction_map: result_range = result_range.expand(direction_map[direction.lower()]) else: print(f"Warning: Invalid direction '{direction}' ignored.") else: result_range = sheet.range(theRange) range_address = result_range.address range_str = range_address.replace("$", "") minC, minR, maxC, maxR = self.split_range_str(range_str) return result_range.value , range_address , f"{minC}{minR}:{maxC}{maxR}" def pipe1(self, range_str): """ Perform the copy-paste operation and clear the destination range """ futureDestRange = self.check(self.dest_wb, self.dest_sheet, range_str, to_save=False) startC, startR, endC, maxR = self.split_range_str(futureDestRange) rangeDest = f"{startC}{startR}:{endC}{maxR}" futureSrcRange = self.check(self.src_wb, self.src_sheet, range_str, to_save=False) startC, startR, endC, maxR = self.split_range_str(futureSrcRange) rangeSrc = f"{startC}{startR}:{endC}{maxR}" dataSrc,_,_ = self.getRangeTable(self.src_sheet,rangeSrc) print(f"The expanded fixed range for Src is: {rangeSrc}") print(f"The expanded fixed range for dest is: {rangeDest}") self.clear_range(self.dest_sheet,rangeDest) self.insertDfFromFirstCellReference(self.dest_sheet,rangeSrc,dataSrc) return startC, startR, endC, maxR def pipe2(self,maxR): _,_,datesToClean = self.getRangeTable(self.dest_sheet,"AB6","r") _, _, maxC, _ = self.split_range_str(datesToClean) print(maxC) self.clear_range(self.dest_sheet,datesToClean) _,_,contentTableToClean = self.getRangeTable(self.dest_sheet,"AB7","dr") self.clear_range(self.dest_sheet,contentTableToClean) z = [['AB6',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'], ['AA7',f"=UNIQUE(A7:A{maxR})"], # no need .. always the same ['AB33',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'], ] for i in range(len(z)): self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1]) self.clear_range(self.dest_sheet,f"O7:W{maxR}") #ranges to update with formulas ranges_to_update = [[f'O7:O{maxR}','=A7'], [f'P7:P{maxR}','=M7'], [f'Q7:Q{maxR}',"""=IF(COUNTIF('helper Renewal & Move Ins'!$A:$A,M7)>0,VLOOKUP(M7,'helper Renewal & Move Ins'!A$2:B$118,2,FALSE),IF(ISBLANK(M7),"Unclaimed"))"""], [f'R7:R{maxR}',"=DATE(YEAR(I7),MONTH(I7),1)"], [f'T7:T{maxR}',"""=IF(D7 = "Lease Signed",1,IF(D7 = "Lease Renewal",0,""))"""], [f'U7:U{maxR}',"=MONTH(I7)"], [f"V7:V{maxR}","=YEAR(I7)"], [f"W7:W{maxR}","=DATE(V7,U7,1)"], [f'AB7:{maxC}28',f"=COUNTIFS($A$7:$A{maxR},$AA7,$W$7:$W{maxR},AB$6,$T$7:$T{maxR},0)"], ] for i in range(len(ranges_to_update)): self.apply_formula(self.dest_sheet,ranges_to_update[i][0],ranges_to_update[i][1]) z = [['AB6',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'], ['AA7',f"=UNIQUE(A7:A{maxR})"], # no need .. always the same ['AB33',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'], ] for i in range(len(z)): self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1]) def pipe3(self): self.clear_range(self.dest_sheet,"A1:W1000") # WHAT ABOUT COLUMN A + B .. nati ! z = [["I1","=UNIQUE(B2:B1000)"], ["J1","=UNIQUE('Renewal & Move Ins actual'!O7:Q5000)"]] for i in range(len(z)): self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1]) self.setNameDest("helper Renewal & Move Ins 2") z = [["A2","UNIQUE('Renewal & Move Ins actual'!O7:P3000)"], [f"J1",f"=UNIQUE(G1:H145)"]] for i in range(len(z)): self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1]) _,_,columnProperties = self.getRangeTable(self.dest_sheet,"A2","d") _, _, _, maxR = self.split_range_str(columnProperties) a = [[f"C2:C{maxR}",f"""=IF(ISBLANK(B2),"Unclaimed",VLOOKUP(B2, 'helper Renewal & Move Ins'!$A$2:$B$500, 2, FALSE))"""], [f"G2:G{maxR}",f"=A2"], [f"H2:H{maxR}",f"=IF(ISBLANK(G2),"",C2)"] ] for i in range(len(a)): self.apply_formula(self.dest_sheet,a[i][0],a[i][1]) return maxR def pipe4(self,maxR3): z = [[f"AA34:AA{maxR3}",f"='helper Renewal & Move Ins 2'!K2"], [f"Z34:Z{maxR3}",f"='helper Renewal & Move Ins 2'!J2"], [f"Y34:Y{maxR3}",f"=Z34&","&AA34"],] for i in range(len(z)): self.apply_formula(self.dest_sheet,z[i][0],z[i][1]) _,_,datesRange = self.getRangeTable(self.dest_sheet,"AB33","r") _, _, maxCdates, _ = self.split_range_str(datesRange) _,_,contentTableToClean = self.getRangeTable(self.dest_sheet,"AB34","dr") self.clear_range(self.dest_sheet,contentTableToClean) _,_,contentRange = self.getRangeTable(self.dest_sheet,"AA34","d") _, _, _, maxR = self.split_range_str(contentRange) ranges_to_update = [[f"AB34:{maxCdates}{maxR}",f"=COUNTIFS($O:$O,$Z34,$Q:$Q,$AA34,$R:$R,AB$33)"], ] for i in range(len(ranges_to_update)): self.apply_formula(self.dest_sheet,ranges_to_update[i][0],ranges_to_update[i][1]) return maxR def pipe5(self , maxR): z = [[f"BE34:BE{maxR}",f"=AA34"], [f"BD34:BD{maxR}",f"=Z34"], [f"BC34:BC{maxR}",f"=BD34&","&BE34"],] for i in range(len(z)): self.apply_formula(self.dest_sheet,z[i][0],z[i][1]) _,_,contentTableToClean = self.getRangeTable(self.dest_sheet,"BF34","dr") self.clear_range(self.dest_sheet,contentTableToClean) ranges_to_update = [[f"BF34:BQ{maxR}",f"=IFERROR(INDEX($AB$34:$AI${maxR},MATCH($BC34,$Y$34:$Y${maxR},0),MATCH(BF$33,$AB$33:$AI$33,0)),"-")"], ] for i in range(len(ranges_to_update)): self.apply_formula(self.dest_sheet,ranges_to_update[i][0],ranges_to_update[i][1]) def pipe6(self,maxR1): z = [[f"BT7:BT{maxR1}",f"""=IF($H7="","1/1/1900",TEXT($H7,"m/d/yyyy"))"""], [f"BS7:BS{maxR1}",f"=DATEVALUE($BT7:$BT1884)"],] for i in range(len(z)): self.apply_formula(self.dest_sheet,z[i][0],z[i][1]) def pipeAll(self): #coping from M_O_pull_all_leases_v2 to AM Reports the main table. _, _, _, maxR1 =excelCP.pipe1(range_str='A7:M') excelCP.pip2(maxR1) excelCP.setNameDest("helper Renewal & Move Ins") maxR3 = excelCP.pipe3() excelCP.setNameDest("Renewal-Move-Ins Goals&Actual") maxR = excelCP.pipe4(maxR3) excelCP.pipe5(maxR) excelCP.pipe6(maxR1) excelCP = ExcelCopyPasteManager('M_O_pull_all_leases_v2.xlsx', 'Report1', 'AM Reports.xlsx', 'Renewal-Move-Ins Goals&Actual') excelCP.pipeAll()
Editor is loading...
Leave a Comment