Untitled
unknown
python
9 months ago
8.1 kB
9
Indexable
#@title Imports !pip install -q langchain langchain-community groq langchain-experimental langchain-groq import os,string,openpyxl,shutil, os,pandas as pd,random,json,time,glob from openpyxl import Workbook from openpyxl.chart import Reference, Series ,BarChart,LineChart,AreaChart from openpyxl.chart.layout import Layout, ManualLayout from openpyxl.chart.series import DataPoint from openpyxl import styles from openpyxl.chart.label import DataLabelList from openpyxl.chart.text import RichText from openpyxl.drawing.text import Paragraph, ParagraphProperties, CharacterProperties, Font from openpyxl.chart import BarChart3D,AreaChart3D ,LineChart3D from openpyxl.chart.shapes import GraphicalProperties from openpyxl.drawing.fill import PatternFillProperties, ColorChoice from openpyxl.drawing.line import LineProperties from openpyxl.drawing.colors import ColorChoice !pip install fastprogress from fastprogress import master_bar, progress_bar from tqdm import tqdm from google.colab import files from google.colab import userdata from groq import Groq from langchain_groq import ChatGroq from langchain import HuggingFaceHub, LLMChain from langchain.prompts import PromptTemplate from langchain_experimental.agents import create_csv_agent #@title ExcelSheetManager Class # Note ! added param - dataOrFunction=True class ExcelSheetManager: def __init__(self, file_path,load_openpyxl=False,dataOrFunction='L'): self.file_path = file_path self.workbook = None self.sheet_names = [] self.active_sheet = None self.data = None # Check if file exists if os.path.exists(file_path) and load_openpyxl and dataOrFunction=='L': self.workbook = openpyxl.load_workbook(file_path,data_only=True) self.sheet_names = self.workbook.sheetnames elif os.path.exists(file_path) and load_openpyxl and dataOrFunction=='R': self.workbook = openpyxl.load_workbook(file_path,data_only=False) self.sheet_names = self.workbook.sheetnames else: print("reading as pd.ExcelFile") self.data = pd.ExcelFile(file_path) def add_sheet(self,name): self.workbook.create_sheet(title=name) self.sheet_names = self.workbook.sheetnames def get_sheet(self, sheet_name): if sheet_name in self.sheet_names: return self.workbook[sheet_name] else: raise ValueError(f"Sheet '{sheet_name}' not found in the workbook.") def read_cell_value(self, sheet_name, cell): sheet = self.get_sheet(sheet_name) return sheet[cell].value if cell in sheet else None def set_cell_value(self, sheet_name, cell, value): sheet = self.get_sheet(sheet_name) if cell in sheet: sheet[cell].value = value else: raise ValueError(f"Cell '{cell}' not found in sheet '{sheet_name}'.") def activate_sheet(self, sheet_name): self.active_sheet = self.get_sheet(sheet_name) def change_activate_sheet(self, sheet_name): self.activate_sheet(sheet_name) def save_changes(self,name): if self.workbook: self.workbook.save(name+'.xlsx') def read_data(self,sheetname): worksheet = self.workbook[sheetname] max_row, max_column = self.get_sheet_size(sheetname) max_column = max_column - 1 #from some reason one column added to my data.. range_str = f"A1:{ExcelSheetManager.generate_column_name(max_column)}{max_row}" print(range_str) return self.get_cells_data(sheetname,range_str,False) def get_sheet_size(self, sheet_name): sheet = self.get_sheet(sheet_name) max_row = sheet.max_row max_col = sheet.max_column return max_row, max_col @staticmethod def generate_column_name(index:int): """ Function Use : Generate Excel-style column name Parameters: index (int): Column index . Returns: str: Excel-style column name. """ column_name = "" while index >= 0: column_name = string.ascii_uppercase[index % 26] + column_name index = index // 26 - 1 if index < 0: break return column_name def reload_workbook(self): if self.workbook: self.workbook.save(self.file_path) self.workbook = openpyxl.load_workbook(self.file_path, data_only=True) # Load with data_only=True to get values @staticmethod def create_empty_table(rows, cols): assert isinstance(rows, int) and rows > 0, "Number of rows must be a positive integer." assert isinstance(cols, int) and cols > 0, "Number of columns must be a positive integer." columns = [ExcelSheetManager.generate_column_name(i) for i in range(cols)] empty_table = pd.DataFrame(index=range(rows), columns=columns) return empty_table def get_cells_data(self, sheet_name, range_str, return_formula=False): start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str) print(start_col_str,start_row,end_col_str,end_row) start_col_index = openpyxl.utils.cell.column_index_from_string(start_col_str) end_col_index = openpyxl.utils.cell.column_index_from_string(end_col_str) num_rows = end_row - start_row + 1 num_cols = end_col_index - start_col_index + 1 max_rows,max_columns = self.get_sheet_size(sheet_name) assert num_rows <= max_rows or num_cols <= max_columns ,"Number of rows \ cols are bigger then can be " df = self.create_empty_table(num_rows, num_cols) sheet = self.get_sheet(sheet_name) for i, row in enumerate(sheet.iter_rows(min_row=start_row, max_row=end_row, min_col=start_col_index, max_col=end_col_index)): for j, cell in enumerate(row): if return_formula and cell.data_type == 'f': df.iloc[i, j] = cell.value else: df.iloc[i, j] = cell.value return df @staticmethod def create_empty_table_from_range(range_str): start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str) start_col_index = ExcelSheetManager.column_index_from_string(start_col_str) end_col_index = ExcelSheetManager.column_index_from_string(end_col_str) num_rows = end_row - start_row + 1 num_cols = end_col_index - start_col_index + 1 row_labels = range(start_row, end_row + 1) col_labels = [ExcelSheetManager.generate_column_name(i) for i in range(start_col_index, end_col_index + 1)] empty_table = pd.DataFrame(index=row_labels, columns=col_labels) return empty_table @staticmethod def column_index_from_string(col_str): num = 0 for c in col_str: if c.isalpha(): num = num * 26 + (ord(c.upper()) - ord('A')) + 1 return num - 1 @staticmethod def split_cell_string(cell_str): col_str = ''.join(filter(str.isalpha, cell_str)) row = ''.join(filter(str.isdigit, cell_str)) return col_str, int(row) @staticmethod def split_range_cells_string(range_str): start_cell, end_cell = range_str.split(":") start_col_str, start_row = ExcelSheetManager.split_cell_string(start_cell) end_col_str, end_row = ExcelSheetManager.split_cell_string(end_cell) return start_col_str, start_row, end_col_str, end_row def createSheet(self,name): return self.workbook.create_sheet(title=name) def fix_column_dimensions_string(self,column_to_fix:str,row_it_start_from:int): self.workbook.column_dimensions[column_to_fix].width =\ len(self.workbook[column_to_fix+str(row_it_start_from)].value) Template_path = "/content/AM Reports (10).xlsx" load_with_openpyxl = True managerExcelMO = ExcelSheetManager(Template_path,load_openpyxl=load_with_openpyxl,dataOrFunction='R') managerExcelMO.save_changes(f"testAs(10)")
Editor is loading...
Leave a Comment