Untitled

 avatar
unknown
python
9 months ago
8.1 kB
9
Indexable
#@title Imports
!pip install -q langchain langchain-community groq langchain-experimental langchain-groq
import os,string,openpyxl,shutil, os,pandas as pd,random,json,time,glob
from openpyxl import Workbook
from openpyxl.chart import  Reference, Series ,BarChart,LineChart,AreaChart
from openpyxl.chart.layout import Layout, ManualLayout
from openpyxl.chart.series import DataPoint
from openpyxl import styles
from openpyxl.chart.label import DataLabelList
from openpyxl.chart.text import RichText
from openpyxl.drawing.text import Paragraph, ParagraphProperties, CharacterProperties, Font
from openpyxl.chart import BarChart3D,AreaChart3D ,LineChart3D
from openpyxl.chart.shapes import GraphicalProperties
from openpyxl.drawing.fill import PatternFillProperties, ColorChoice
from openpyxl.drawing.line import LineProperties
from openpyxl.drawing.colors import ColorChoice
!pip install fastprogress
from fastprogress import master_bar, progress_bar
from tqdm import tqdm
from google.colab import files
from google.colab import userdata
from groq import Groq
from langchain_groq import ChatGroq
from langchain import HuggingFaceHub, LLMChain
from langchain.prompts import PromptTemplate
from langchain_experimental.agents import create_csv_agent


#@title ExcelSheetManager Class
# Note ! added param - dataOrFunction=True
class ExcelSheetManager:
    def __init__(self, file_path,load_openpyxl=False,dataOrFunction='L'):
        self.file_path = file_path
        self.workbook = None
        self.sheet_names = []
        self.active_sheet = None
        self.data = None

        # Check if file exists
        if os.path.exists(file_path) and load_openpyxl and dataOrFunction=='L':
            self.workbook = openpyxl.load_workbook(file_path,data_only=True)
            self.sheet_names = self.workbook.sheetnames
        elif os.path.exists(file_path) and load_openpyxl and dataOrFunction=='R':
            self.workbook = openpyxl.load_workbook(file_path,data_only=False)
            self.sheet_names = self.workbook.sheetnames
        else:
          print("reading as pd.ExcelFile")
          self.data = pd.ExcelFile(file_path)
    def add_sheet(self,name):
      self.workbook.create_sheet(title=name)
      self.sheet_names = self.workbook.sheetnames
    def get_sheet(self, sheet_name):
        if sheet_name in self.sheet_names:
            return self.workbook[sheet_name]
        else:
            raise ValueError(f"Sheet '{sheet_name}' not found in the workbook.")

    def read_cell_value(self, sheet_name, cell):
        sheet = self.get_sheet(sheet_name)
        return sheet[cell].value if cell in sheet else None

    def set_cell_value(self, sheet_name, cell, value):
        sheet = self.get_sheet(sheet_name)
        if cell in sheet:
            sheet[cell].value = value
        else:
            raise ValueError(f"Cell '{cell}' not found in sheet '{sheet_name}'.")

    def activate_sheet(self, sheet_name):
        self.active_sheet = self.get_sheet(sheet_name)

    def change_activate_sheet(self, sheet_name):
        self.activate_sheet(sheet_name)

    def save_changes(self,name):
        if self.workbook:
            self.workbook.save(name+'.xlsx')


    def read_data(self,sheetname):
      worksheet = self.workbook[sheetname]
      max_row, max_column = self.get_sheet_size(sheetname)
      max_column = max_column - 1 #from some reason one column added to my data..
      range_str = f"A1:{ExcelSheetManager.generate_column_name(max_column)}{max_row}"
      print(range_str)
      return self.get_cells_data(sheetname,range_str,False)
    def get_sheet_size(self, sheet_name):
        sheet = self.get_sheet(sheet_name)
        max_row = sheet.max_row
        max_col = sheet.max_column
        return max_row, max_col
    @staticmethod
    def generate_column_name(index:int):
      """
    Function Use : Generate Excel-style column name

    Parameters:
        index (int): Column index .

    Returns:
        str: Excel-style column name.
      """
      column_name = ""
      while index >= 0:
          column_name = string.ascii_uppercase[index % 26] + column_name
          index = index // 26 - 1
          if index < 0:
              break
      return column_name
    def reload_workbook(self):
        if self.workbook:
          self.workbook.save(self.file_path)
          self.workbook = openpyxl.load_workbook(self.file_path, data_only=True)  # Load with data_only=True to get values

    @staticmethod
    def create_empty_table(rows, cols):
        assert isinstance(rows, int) and rows > 0, "Number of rows must be a positive integer."
        assert isinstance(cols, int) and cols > 0, "Number of columns must be a positive integer."
        columns = [ExcelSheetManager.generate_column_name(i) for i in range(cols)]

        empty_table = pd.DataFrame(index=range(rows), columns=columns)
        return empty_table

    def get_cells_data(self, sheet_name, range_str, return_formula=False):
      start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str)
      print(start_col_str,start_row,end_col_str,end_row)
      start_col_index = openpyxl.utils.cell.column_index_from_string(start_col_str)
      end_col_index = openpyxl.utils.cell.column_index_from_string(end_col_str)

      num_rows = end_row - start_row + 1
      num_cols = end_col_index - start_col_index + 1
      max_rows,max_columns = self.get_sheet_size(sheet_name)
      assert num_rows <= max_rows or num_cols <= max_columns ,"Number of rows \ cols are bigger then can be "
      df = self.create_empty_table(num_rows, num_cols)
      sheet = self.get_sheet(sheet_name)

      for i, row in enumerate(sheet.iter_rows(min_row=start_row, max_row=end_row, min_col=start_col_index, max_col=end_col_index)):
          for j, cell in enumerate(row):
              if return_formula and cell.data_type == 'f':
                  df.iloc[i, j] = cell.value
              else:
                  df.iloc[i, j] = cell.value
      return df
    @staticmethod
    def create_empty_table_from_range(range_str):
      start_col_str,start_row,end_col_str,end_row = ExcelSheetManager.split_range_cells_string(range_str)

      start_col_index = ExcelSheetManager.column_index_from_string(start_col_str)
      end_col_index = ExcelSheetManager.column_index_from_string(end_col_str)

      num_rows = end_row - start_row + 1
      num_cols = end_col_index - start_col_index + 1

      row_labels = range(start_row, end_row + 1)
      col_labels = [ExcelSheetManager.generate_column_name(i) for i in range(start_col_index, end_col_index + 1)]

      empty_table = pd.DataFrame(index=row_labels, columns=col_labels)
      return empty_table

    @staticmethod
    def column_index_from_string(col_str):
      num = 0
      for c in col_str:
        if c.isalpha():
            num = num * 26 + (ord(c.upper()) - ord('A')) + 1
      return num - 1
    @staticmethod
    def split_cell_string(cell_str):
      col_str = ''.join(filter(str.isalpha, cell_str))
      row = ''.join(filter(str.isdigit, cell_str))
      return col_str, int(row)

    @staticmethod
    def split_range_cells_string(range_str):
      start_cell, end_cell = range_str.split(":")
      start_col_str, start_row = ExcelSheetManager.split_cell_string(start_cell)
      end_col_str, end_row = ExcelSheetManager.split_cell_string(end_cell)
      return start_col_str, start_row, end_col_str, end_row

    def createSheet(self,name):
      return self.workbook.create_sheet(title=name)

    def fix_column_dimensions_string(self,column_to_fix:str,row_it_start_from:int):
      self.workbook.column_dimensions[column_to_fix].width =\
       len(self.workbook[column_to_fix+str(row_it_start_from)].value)


Template_path = "/content/AM Reports (10).xlsx"
load_with_openpyxl = True
managerExcelMO = ExcelSheetManager(Template_path,load_openpyxl=load_with_openpyxl,dataOrFunction='R')
managerExcelMO.save_changes(f"testAs(10)")

Editor is loading...
Leave a Comment