Untitled
unknown
python
a year ago
8.1 kB
21
Indexable
#@title Imports
!pip install -q langchain langchain-community groq langchain-experimental langchain-groq
import os,string,openpyxl,shutil, os,pandas as pd,random,json,time,glob
from openpyxl import Workbook
from openpyxl.chart import Reference, Series ,BarChart,LineChart,AreaChart
from openpyxl.chart.layout import Layout, ManualLayout
from openpyxl.chart.series import DataPoint
from openpyxl import styles
from openpyxl.chart.label import DataLabelList
from openpyxl.chart.text import RichText
from openpyxl.drawing.text import Paragraph, ParagraphProperties, CharacterProperties, Font
from openpyxl.chart import BarChart3D,AreaChart3D ,LineChart3D
from openpyxl.chart.shapes import GraphicalProperties
from openpyxl.drawing.fill import PatternFillProperties, ColorChoice
from openpyxl.drawing.line import LineProperties
from openpyxl.drawing.colors import ColorChoice
!pip install fastprogress
from fastprogress import master_bar, progress_bar
from tqdm import tqdm
from google.colab import files
from google.colab import userdata
from groq import Groq
from langchain_groq import ChatGroq
from langchain import HuggingFaceHub, LLMChain
from langchain.prompts import PromptTemplate
from langchain_experimental.agents import create_csv_agent
#@title ExcelSheetManager Class
# Note ! added param - dataOrFunction=True
class ExcelSheetManager:
def __init__(self, file_path,load_openpyxl=False,dataOrFunction='L'):
self.file_path = file_path
self.workbook = None
self.sheet_names = []
self.active_sheet = None
self.data = None
# Check if file exists
if os.path.exists(file_path) and load_openpyxl and dataOrFunction=='L':
self.workbook = openpyxl.load_workbook(file_path,data_only=True)
self.sheet_names = self.workbook.sheetnames
elif os.path.exists(file_path) and load_openpyxl and dataOrFunction=='R':
self.workbook = openpyxl.load_workbook(file_path,data_only=False)
self.sheet_names = self.workbook.sheetnames
else:
print("reading as pd.ExcelFile")
self.data = pd.ExcelFile(file_path)
def add_sheet(self,name):
self.workbook.create_sheet(title=name)
self.sheet_names = self.workbook.sheetnames
def get_sheet(self, sheet_name):
if sheet_name in self.sheet_names:
return self.workbook[sheet_name]
else:
raise ValueError(f"Sheet '{sheet_name}' not found in the workbook.")
def read_cell_value(self, sheet_name, cell):
sheet = self.get_sheet(sheet_name)
return sheet[cell].value if cell in sheet else None
def set_cell_value(self, sheet_name, cell, value):
sheet = self.get_sheet(sheet_name)
if cell in sheet:
sheet[cell].value = value
else:
raise ValueError(f"Cell '{cell}' not found in sheet '{sheet_name}'.")
def activate_sheet(self, sheet_name):
self.active_sheet = self.get_sheet(sheet_name)
def change_activate_sheet(self, sheet_name):
self.activate_sheet(sheet_name)
def save_changes(self,name):
if self.workbook:
self.workbook.save(name+'.xlsx')
def read_data(self,sheetname):
worksheet = self.workbook[sheetname]
max_row, max_column = self.get_sheet_size(sheetname)
max_column = max_column - 1 #from some reason one column added to my data..
range_str = f"A1:{ExcelSheetManager.generate_column_name(max_column)}{max_row}"
print(range_str)
return self.get_cells_data(sheetname,range_str,False)
def get_sheet_size(self, sheet_name):
sheet = self.get_sheet(sheet_name)
max_row = sheet.max_row
max_col = sheet.max_column
return max_row, max_col
@staticmethod
def generate_column_name(index:int):
"""
Function Use : Generate Excel-style column name
Parameters:
index (int): Column index .
Returns:
str: Excel-style column name.
"""
column_name = ""
while index >= 0:
column_name = string.ascii_uppercase[index % 26] + column_name
index = index // 26 - 1
if index < 0:
break
return column_name
def reload_workbook(self):
if self.workbook:
self.workbook.save(self.file_path)
self.workbook = openpyxl.load_workbook(self.file_path, data_only=True) # Load with data_only=True to get values
@staticmethod
def create_empty_table(rows, cols):
assert isinstance(rows, int) and rows > 0, "Number of rows must be a positive integer."
assert isinstance(cols, int) and cols > 0, "Number of columns must be a positive integer."
columns = [ExcelSheetManager.generate_column_name(i) for i in range(cols)]
empty_table = pd.DataFrame(index=range(rows), columns=columns)
return empty_table
def get_cells_data(self, sheet_name, range_str, return_formula=False):
start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str)
print(start_col_str,start_row,end_col_str,end_row)
start_col_index = openpyxl.utils.cell.column_index_from_string(start_col_str)
end_col_index = openpyxl.utils.cell.column_index_from_string(end_col_str)
num_rows = end_row - start_row + 1
num_cols = end_col_index - start_col_index + 1
max_rows,max_columns = self.get_sheet_size(sheet_name)
assert num_rows <= max_rows or num_cols <= max_columns ,"Number of rows \ cols are bigger then can be "
df = self.create_empty_table(num_rows, num_cols)
sheet = self.get_sheet(sheet_name)
for i, row in enumerate(sheet.iter_rows(min_row=start_row, max_row=end_row, min_col=start_col_index, max_col=end_col_index)):
for j, cell in enumerate(row):
if return_formula and cell.data_type == 'f':
df.iloc[i, j] = cell.value
else:
df.iloc[i, j] = cell.value
return df
@staticmethod
def create_empty_table_from_range(range_str):
start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str)
start_col_index = ExcelSheetManager.column_index_from_string(start_col_str)
end_col_index = ExcelSheetManager.column_index_from_string(end_col_str)
num_rows = end_row - start_row + 1
num_cols = end_col_index - start_col_index + 1
row_labels = range(start_row, end_row + 1)
col_labels = [ExcelSheetManager.generate_column_name(i) for i in range(start_col_index, end_col_index + 1)]
empty_table = pd.DataFrame(index=row_labels, columns=col_labels)
return empty_table
@staticmethod
def column_index_from_string(col_str):
num = 0
for c in col_str:
if c.isalpha():
num = num * 26 + (ord(c.upper()) - ord('A')) + 1
return num - 1
@staticmethod
def split_cell_string(cell_str):
col_str = ''.join(filter(str.isalpha, cell_str))
row = ''.join(filter(str.isdigit, cell_str))
return col_str, int(row)
@staticmethod
def split_range_cells_string(range_str):
start_cell, end_cell = range_str.split(":")
start_col_str, start_row = ExcelSheetManager.split_cell_string(start_cell)
end_col_str, end_row = ExcelSheetManager.split_cell_string(end_cell)
return start_col_str, start_row, end_col_str, end_row
def createSheet(self,name):
return self.workbook.create_sheet(title=name)
def fix_column_dimensions_string(self,column_to_fix:str,row_it_start_from:int):
self.workbook.column_dimensions[column_to_fix].width =\
len(self.workbook[column_to_fix+str(row_it_start_from)].value)
Template_path = "/content/AM Reports (10).xlsx"
load_with_openpyxl = True
managerExcelMO = ExcelSheetManager(Template_path,load_openpyxl=load_with_openpyxl,dataOrFunction='R')
managerExcelMO.save_changes(f"testAs(10)")
Editor is loading...
Leave a Comment