Untitled
unknown
plain_text
2 years ago
20 kB
13
Indexable
import xlwings as xw
import datetime
import re
import pandas as pd
import numpy as np
from openpyxl.utils import range_boundaries as rb, get_column_letter
#@title Imports
import os,string,openpyxl, os,pandas as pd,json
from openpyxl import Workbook
from componentExtractor import ComponentsManager
class ExcelSheetManager:
def __init__(self, file_path,load_openpyxl=False,dataOrFunction='L'):
self.file_path = file_path
self.workbook = None
self.sheet_names = []
self.active_sheet = None
self.data = None
# Check if file exists
if os.path.exists(file_path) and load_openpyxl and dataOrFunction=='L':
self.workbook = openpyxl.load_workbook(file_path,data_only=True)
self.sheet_names = self.workbook.sheetnames
elif os.path.exists(file_path) and load_openpyxl and dataOrFunction=='R':
self.workbook = openpyxl.load_workbook(file_path,data_only=False)
self.sheet_names = self.workbook.sheetnames
else:
print("reading as pd.ExcelFile")
self.data = pd.ExcelFile(file_path)
def add_sheet(self,name):
self.workbook.create_sheet(title=name)
self.sheet_names = self.workbook.sheetnames
def get_sheet(self, sheet_name):
if sheet_name in self.sheet_names:
return self.workbook[sheet_name]
else:
raise ValueError(f"Sheet '{sheet_name}' not found in the workbook.")
def read_cell_value(self, sheet_name, cell):
sheet = self.get_sheet(sheet_name)
return sheet[cell].value if cell in sheet else None
def set_cell_value(self, sheet_name, cell, value):
sheet = self.get_sheet(sheet_name)
if cell in sheet:
sheet[cell].value = value
else:
raise ValueError(f"Cell '{cell}' not found in sheet '{sheet_name}'.")
def activate_sheet(self, sheet_name):
self.active_sheet = self.get_sheet(sheet_name)
def change_activate_sheet(self, sheet_name):
self.activate_sheet(sheet_name)
def save_changes(self,name):
if self.workbook:
self.workbook.save(name+'.xlsx')
def read_data(self,sheetname):
# worksheet = self.workbook[sheetname]
max_row, max_column = self.get_sheet_size(sheetname)
max_column = max_column - 1 #from some reason one column added to my data..
range_str = f"A1:{ExcelSheetManager.generate_column_name(max_column)}{max_row}"
# print(range_str)
return self.get_cells_data(sheetname,range_str,False)
def get_sheet_size(self, sheet_name):
sheet = self.get_sheet(sheet_name)
max_row = sheet.max_row
max_col = sheet.max_column
return max_row, max_col
@staticmethod
def generate_column_name(index:int):
"""
Function Use : Generate Excel-style column name
Parameters:
index (int): Column index .
Returns:
str: Excel-style column name.
"""
column_name = ""
while index >= 0:
column_name = string.ascii_uppercase[index % 26] + column_name
index = index // 26 - 1
if index < 0:
break
return column_name
def reload_workbook(self):
if self.workbook:
self.workbook.save(self.file_path)
self.workbook = openpyxl.load_workbook(self.file_path, data_only=True) # Load with data_only=True to get values
@staticmethod
def create_empty_table(rows, cols):
assert isinstance(rows, int) and rows > 0, "Number of rows must be a positive integer."
assert isinstance(cols, int) and cols > 0, "Number of columns must be a positive integer."
columns = [ExcelSheetManager.generate_column_name(i) for i in range(cols)]
empty_table = pd.DataFrame(index=range(rows), columns=columns)
return empty_table
def get_cells_data(self, sheet_name, range_str, return_formula=False):
start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str)
start_col_index = openpyxl.utils.cell.column_index_from_string(start_col_str)
end_col_index = openpyxl.utils.cell.column_index_from_string(end_col_str)
num_rows = end_row - start_row + 1
num_cols = end_col_index - start_col_index + 1
max_rows,max_columns = self.get_sheet_size(sheet_name)
assert num_rows <= max_rows or num_cols <= max_columns ,"Number of rows cols are bigger then can be "
df = self.create_empty_table(num_rows, num_cols)
sheet = self.get_sheet(sheet_name)
for i, row in enumerate(sheet.iter_rows(min_row=start_row, max_row=end_row, min_col=start_col_index, max_col=end_col_index)):
for j, cell in enumerate(row):
if return_formula and cell.data_type == 'f':
df.iloc[i, j] = cell.value
else:
df.iloc[i, j] = cell.value
return df
@staticmethod
def create_empty_table_from_range(range_str):
start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str)
start_col_index = ExcelSheetManager.column_index_from_string(start_col_str)
end_col_index = ExcelSheetManager.column_index_from_string(end_col_str)
row_labels = range(start_row, end_row + 1)
col_labels = [ExcelSheetManager.generate_column_name(i) for i in range(start_col_index, end_col_index + 1)]
empty_table = pd.DataFrame(index=row_labels, columns=col_labels)
return empty_table
@staticmethod
def column_index_from_string(col_str):
num = 0
for c in col_str:
if c.isalpha():
num = num * 26 + (ord(c.upper()) - ord('A')) + 1
return num - 1
@staticmethod
def split_cell_string(cell_str):
col_str = ''.join(filter(str.isalpha, cell_str))
row = ''.join(filter(str.isdigit, cell_str))
return col_str, int(row)
@staticmethod
def split_range_cells_string(range_str):
start_cell, end_cell = range_str.split(":")
start_col_str, start_row = ExcelSheetManager.split_cell_string(start_cell)
end_col_str, end_row = ExcelSheetManager.split_cell_string(end_cell)
return start_col_str, start_row, end_col_str, end_row
def get_dates_for_next_year():
dates = []
# current_date = datetime.now().replace(day=1) # Set to the first day of the current month
current_date = datetime.now().replace(day=1) # Set to the first day of the current month
current_date = datetime(current_date.year-1,current_date.month,1)
for _ in range(12):
dates.append(current_date.strftime("%Y-%m-%d"))
next_month = (current_date.month % 12) + 1
year_increment = (current_date.month // 12)
if next_month == 1: # Increment year if next month is January
year_increment = 1
else:
year_increment = 0
current_date = current_date.replace(year=current_date.year + year_increment, month=next_month)
return dates
def get_first_days_of_past_12_months():
dates = []
current_date = datetime.now().replace(day=1) # Set to the first day of the current month
for _ in range(12):
dates.append(current_date.strftime("%Y-%m-%d"))
prev_month = (current_date.month - 1) if current_date.month > 1 else 12
year_decrement = -1 if current_date.month == 1 else 0
current_date = current_date.replace(year=current_date.year + year_decrement, month=prev_month)
dates.sort() # Sort the dates in ascending order
return dates
class ExcelCopyPasteManager:
def __init__(self, src_file, src_sheet_name, dest_file, dest_sheet_name):
self.src_file = src_file
self.src_sheet_name = src_sheet_name
self.dest_file = dest_file
self.dest_sheet_name = dest_sheet_name
try:
self.src_wb = xw.Book(self.src_file)
self.src_sheet = self.src_wb.sheets[self.src_sheet_name]
self.dest_wb = xw.Book(self.dest_file)
self.dest_sheet = self.dest_wb.sheets[self.dest_sheet_name]
except Exception as e:
print(f"Error initializing Excel files: {e}")
raise
def setNameDest(self,dest_sheet_name):
try:
self.dest_sheet = self.dest_wb.sheets[dest_sheet_name]
except Exception as e:
print(f"Error initializing Excel files: {e}")
raise
def disposeSrc(self):
self.src_wb.close()
def disposeDest(self):
self.dest_wb.close()
def saveStateSrc(self):
self.src_wb.save()
def saveStateDest(self):
self.dest_wb.save()
@staticmethod
def get_column_letter(column_index):
return get_column_letter(column_index)
@staticmethod
def split_range_str_for_check_ends(range_str):
range_str += '0'
minC, minR, maxC, _ = rb(range_str)
minC = get_column_letter(minC)
maxC = get_column_letter(maxC)
print(f"{minC}{minR}:{maxC}?")
return minC, minR, maxC
@staticmethod
def split_range_str(range_str):
minC, minR, maxC, maxR = rb(range_str)
minC = get_column_letter(minC)
maxC = get_column_letter(maxC)
return minC, minR, maxC, maxR
@staticmethod
def indexes_range_str(range_str):
minC, minR, maxC, maxR = rb(range_str)
print(minC, minR, maxC, maxR)
return minC, minR, maxC, maxR
def where_ends(self, worksheet, startC, startR, endC):
"""
Determine where the data ends in a given column
"""
count = startR
while worksheet.range(f"{startC}{count}").value is not None:
count += 1
future_endR = count - 1
print(f"{startC}{startR}:{endC}{future_endR}")
return f"{startC}{startR}:{endC}{future_endR}"
def check(self, manager_excel, worksheet, range_str, to_save=False):
startC, startR, endC = self.split_range_str_for_check_ends(range_str)[:3]
future_end_range = self.where_ends(worksheet, startC, startR, endC)
print(future_end_range)
if to_save:
manager_excel.save(f"{self.dest_file}_backup_{datetime.datetime.now().date()}.xlsx")
return future_end_range
def check_if_need_clean_data_to_nulls(self, res_range_str1, res_range_str2):
"""
Check if data in one range extends beyond another
"""
_, _, _, maxR1 = self.split_range_str(res_range_str1)
minC2, _, maxC2, maxR2 = self.split_range_str(res_range_str2)
if maxR1 > maxR2:
return None
else:
# print(f"{minC2}{maxR1}:{maxC2}{maxR2}")
return f"{minC2}{maxR1}:{maxC2}{maxR2}"
def copy_range(self, cell_range):
"""
Copy data from the source range to the destination
"""
min_col, min_row, max_col, max_row = self.indexes_range_str(cell_range)
src_range = self.src_sheet.range(f"{self.get_column_letter(min_col)}{min_row}:{self.get_column_letter(max_col)}{max_row}")
dest_range = self.dest_sheet.range(f"{self.get_column_letter(min_col)}{min_row}:{self.get_column_letter(max_col)}{max_row}")
dest_range.value = src_range.value
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
self.dest_wb.save(f"dest_file_{timestamp}.xlsx")
def clear_range(self,sheet, range_str):
sheet.range(range_str).clear_contents()
def applyValue(self, sheet,place,value):
sheet.range(place).value = value
def apply_formula(self, sheet,place,formula):
sheet.range(place).formula = formula
def apply_expanded_formula(self, sheet, start_cell, formula):
"""
for unique , transpose .. etc
"""
sheet.range(start_cell).formula2 = formula
def check_formula(self,sheet, cell):
formula = sheet.range(cell).formula
print(f"Formula in {cell}: {formula}")
def insertDfFromFirstCellReference(self,sheet,cell,df):
sheet.range(cell).value = df
def readCellsValue(self,sheet,cell):
"""
1 cell or range..(and return a list)
"""
return sheet.range(cell).value
def createNamedRange(self,sheet,cells,name):
sheet.range(cells).name = name
def get_range_data_as_dataframe(self,sheet,cell):
return sheet.range(cell).options(pd.DataFrame,expand='table').value
def getRangeTable(self,sheet,theRange,directions=None):
direction_map = {
'r': 'right',
'l': 'left',
'u': 'up',
'd': 'down'
}
if directions != None:
result_range = sheet.range(theRange)
for direction in directions:
if direction.lower() in direction_map:
result_range = result_range.expand(direction_map[direction.lower()])
else:
print(f"Warning: Invalid direction '{direction}' ignored.")
else:
result_range = sheet.range(theRange)
range_address = result_range.address
range_str = range_address.replace("$", "")
minC, minR, maxC, maxR = self.split_range_str(range_str)
return result_range.value , range_address , f"{minC}{minR}:{maxC}{maxR}"
def pipe1(self, range_str):
"""
Perform the copy-paste operation and clear the destination range
"""
futureDestRange = self.check(self.dest_wb, self.dest_sheet, range_str, to_save=False)
startC, startR, endC, maxR = self.split_range_str(futureDestRange)
rangeDest = f"{startC}{startR}:{endC}{maxR}"
futureSrcRange = self.check(self.src_wb, self.src_sheet, range_str, to_save=False)
startC, startR, endC, maxR = self.split_range_str(futureSrcRange)
rangeSrc = f"{startC}{startR}:{endC}{maxR}"
dataSrc,_,_ = self.getRangeTable(self.src_sheet,rangeSrc)
print(f"The expanded fixed range for Src is: {rangeSrc}")
print(f"The expanded fixed range for dest is: {rangeDest}")
self.clear_range(self.dest_sheet,rangeDest)
self.insertDfFromFirstCellReference(self.dest_sheet,rangeSrc,dataSrc)
return startC, startR, endC, maxR
def pipe2(self,maxR):
_,_,datesToClean = self.getRangeTable(self.dest_sheet,"AB6","r")
_, _, maxC, _ = self.split_range_str(datesToClean)
print(maxC)
self.clear_range(self.dest_sheet,datesToClean)
_,_,contentTableToClean = self.getRangeTable(self.dest_sheet,"AB7","dr")
self.clear_range(self.dest_sheet,contentTableToClean)
z = [['AB6',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'],
['AA7',f"=UNIQUE(A7:A{maxR})"], # no need .. always the same
['AB33',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'],
]
for i in range(len(z)):
self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1])
self.clear_range(self.dest_sheet,f"O7:W{maxR}")
#ranges to update with formulas
ranges_to_update = [[f'O7:O{maxR}','=A7'],
[f'P7:P{maxR}','=M7'],
[f'Q7:Q{maxR}',"""=IF(COUNTIF('helper Renewal & Move Ins'!$A:$A,M7)>0,VLOOKUP(M7,'helper Renewal & Move Ins'!A$2:B$118,2,FALSE),IF(ISBLANK(M7),"Unclaimed"))"""],
[f'R7:R{maxR}',"=DATE(YEAR(I7),MONTH(I7),1)"],
[f'T7:T{maxR}',"""=IF(D7 = "Lease Signed",1,IF(D7 = "Lease Renewal",0,""))"""],
[f'U7:U{maxR}',"=MONTH(I7)"],
[f"V7:V{maxR}","=YEAR(I7)"],
[f"W7:W{maxR}","=DATE(V7,U7,1)"],
[f'AB7:{maxC}28',f"=COUNTIFS($A$7:$A{maxR},$AA7,$W$7:$W{maxR},AB$6,$T$7:$T{maxR},0)"],
]
for i in range(len(ranges_to_update)):
self.apply_formula(self.dest_sheet,ranges_to_update[i][0],ranges_to_update[i][1])
z = [['AB6',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'],
['AA7',f"=UNIQUE(A7:A{maxR})"], # no need .. always the same
['AB33',f'=TRANSPOSE(SORT(UNIQUE(W7:W{maxR})))'],
]
for i in range(len(z)):
self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1])
def pipe3(self):
self.clear_range(self.dest_sheet,"A1:W1000")
# WHAT ABOUT COLUMN A + B .. nati !
z = [["I1","=UNIQUE(B2:B1000)"],
["J1","=UNIQUE('Renewal & Move Ins actual'!O7:Q5000)"]]
for i in range(len(z)):
self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1])
self.setNameDest("helper Renewal & Move Ins 2")
z = [["A2","UNIQUE('Renewal & Move Ins actual'!O7:P3000)"],
[f"J1",f"=UNIQUE(G1:H145)"]]
for i in range(len(z)):
self.apply_expanded_formula(self.dest_sheet,z[i][0],z[i][1])
_,_,columnProperties = self.getRangeTable(self.dest_sheet,"A2","d")
_, _, _, maxR = self.split_range_str(columnProperties)
a = [[f"C2:C{maxR}",f"""=IF(ISBLANK(B2),"Unclaimed",VLOOKUP(B2, 'helper Renewal & Move Ins'!$A$2:$B$500, 2, FALSE))"""],
[f"G2:G{maxR}",f"=A2"],
[f"H2:H{maxR}",f"=IF(ISBLANK(G2),"",C2)"]
]
for i in range(len(a)):
self.apply_formula(self.dest_sheet,a[i][0],a[i][1])
return maxR
def pipe4(self,maxR3):
z = [[f"AA34:AA{maxR3}",f"='helper Renewal & Move Ins 2'!K2"],
[f"Z34:Z{maxR3}",f"='helper Renewal & Move Ins 2'!J2"],
[f"Y34:Y{maxR3}",f"=Z34&","&AA34"],]
for i in range(len(z)):
self.apply_formula(self.dest_sheet,z[i][0],z[i][1])
_,_,datesRange = self.getRangeTable(self.dest_sheet,"AB33","r")
_, _, maxCdates, _ = self.split_range_str(datesRange)
_,_,contentTableToClean = self.getRangeTable(self.dest_sheet,"AB34","dr")
self.clear_range(self.dest_sheet,contentTableToClean)
_,_,contentRange = self.getRangeTable(self.dest_sheet,"AA34","d")
_, _, _, maxR = self.split_range_str(contentRange)
ranges_to_update = [[f"AB34:{maxCdates}{maxR}",f"=COUNTIFS($O:$O,$Z34,$Q:$Q,$AA34,$R:$R,AB$33)"],
]
for i in range(len(ranges_to_update)):
self.apply_formula(self.dest_sheet,ranges_to_update[i][0],ranges_to_update[i][1])
return maxR
def pipe5(self , maxR):
z = [[f"BE34:BE{maxR}",f"=AA34"],
[f"BD34:BD{maxR}",f"=Z34"],
[f"BC34:BC{maxR}",f"=BD34&","&BE34"],]
for i in range(len(z)):
self.apply_formula(self.dest_sheet,z[i][0],z[i][1])
_,_,contentTableToClean = self.getRangeTable(self.dest_sheet,"BF34","dr")
self.clear_range(self.dest_sheet,contentTableToClean)
ranges_to_update = [[f"BF34:BQ{maxR}",f"=IFERROR(INDEX($AB$34:$AI${maxR},MATCH($BC34,$Y$34:$Y${maxR},0),MATCH(BF$33,$AB$33:$AI$33,0)),"-")"],
]
for i in range(len(ranges_to_update)):
self.apply_formula(self.dest_sheet,ranges_to_update[i][0],ranges_to_update[i][1])
def pipe6(self,maxR1):
z = [[f"BT7:BT{maxR1}",f"""=IF($H7="","1/1/1900",TEXT($H7,"m/d/yyyy"))"""],
[f"BS7:BS{maxR1}",f"=DATEVALUE($BT7:$BT1884)"],]
for i in range(len(z)):
self.apply_formula(self.dest_sheet,z[i][0],z[i][1])
def pipeAll(self):
#coping from M_O_pull_all_leases_v2 to AM Reports the main table.
_, _, _, maxR1 =excelCP.pipe1(range_str='A7:M')
excelCP.pip2(maxR1)
excelCP.setNameDest("helper Renewal & Move Ins")
maxR3 = excelCP.pipe3()
excelCP.setNameDest("Renewal-Move-Ins Goals&Actual")
maxR = excelCP.pipe4(maxR3)
excelCP.pipe5(maxR)
excelCP.pipe6(maxR1)
excelCP = ExcelCopyPasteManager('M_O_pull_all_leases_v2.xlsx', 'Report1', 'AM Reports.xlsx', 'Renewal-Move-Ins Goals&Actual')
excelCP.pipeAll()
Editor is loading...
Leave a Comment