Untitled

 avatar
unknown
python
2 months ago
5.7 kB
5
Indexable
# Remove the global process_excel_file function and move it entirely into the SEBIScraperGUI class

class SEBIScraperGUI:
    def process_excel_file(self, input_file, output_file):
        """Process the Excel file after scraping is complete"""
        try:
            self.add_log(f"Starting Excel file processing for {input_file}")
            
            # Load the original workbook
            wb = load_workbook(input_file)
            processed = False
            
            # Process Performance Data sheet
            if "Performance Data" in wb.sheetnames:
                self.add_log("Processing Performance Data sheet...")
                sheet = wb["Performance Data"]
                last_row = sheet.max_row
                
                # Shift all data from C3:L_last_row one column to the right
                for row in range(3, last_row + 1):
                    for col in range(12, 2, -1):
                        sheet.cell(row=row, column=col+1).value = sheet.cell(row=row, column=col).value
                
                # Delete column C
                sheet.delete_cols(3)
                
                # Merge adjacent cells in Column A
                column_to_merge = 1
                start_merge = 2
                while start_merge < last_row:
                    end_merge = start_merge
                    while (end_merge + 1 <= last_row and
                           sheet.cell(row=end_merge, column=column_to_merge).value ==
                           sheet.cell(row=end_merge + 1, column=column_to_merge).value):
                        end_merge += 1
                    
                    if end_merge > start_merge:
                        sheet.merge_cells(
                            start_row=start_merge,
                            start_column=column_to_merge,
                            end_row=end_merge,
                            end_column=column_to_merge
                        )
                        merged_cell = sheet.cell(row=start_merge, column=column_to_merge)
                        merged_cell.alignment = Alignment(vertical="center")
                    
                    start_merge = end_merge + 1
                processed = True
                self.add_log("Performance Data sheet processed successfully")
            
            # Process AUM Data sheet
            if "AUM Data" in wb.sheetnames:
                self.add_log("Processing AUM Data sheet...")
                sheet = wb["AUM Data"]
                last_row = sheet.max_row
                
                # Merge adjacent cells in Column A (Portfolio Manager)
                column_to_merge = 1
                start_merge = 2  # Start from row 2 to skip header
                
                while start_merge < last_row:
                    end_merge = start_merge
                    while (end_merge + 1 <= last_row and
                           sheet.cell(row=end_merge, column=column_to_merge).value ==
                           sheet.cell(row=end_merge + 1, column=column_to_merge).value):
                        end_merge += 1
                    
                    if end_merge > start_merge:
                        sheet.merge_cells(
                            start_row=start_merge,
                            start_column=column_to_merge,
                            end_row=end_merge,
                            end_column=column_to_merge
                        )
                        merged_cell = sheet.cell(row=start_merge, column=column_to_merge)
                        merged_cell.alignment = Alignment(vertical="center")
                    
                    start_merge = end_merge + 1
                processed = True
                
                # Auto-adjust column widths for better readability
                for column_cells in sheet.columns:
                    length = max(len(str(cell.value) if cell.value else "") for cell in column_cells)
                    sheet.column_dimensions[get_column_letter(column_cells[0].column)].width = length + 2
                
                self.add_log("AUM Data sheet processed successfully")
            
            if processed:
                wb.save(output_file)
                self.add_log(f"Excel file processing completed successfully. Saved to {output_file}")
                return True
            else:
                self.add_log("No sheets were processed")
                return False
                
        except Exception as e:
            self.add_log(f"Error processing Excel file: {str(e)}")
            messagebox.showerror("Error", f"Failed to process Excel file: {str(e)}")
            return False

    def start_scraping(self):
        # ... (previous code remains the same until the Excel processing part)
        
        # Process Excel file if checkbox is checked
        if self.process_excel_var.get():
            self.update_status("Processing Excel file...")
            input_file = os.path.join(os.getcwd(), "SEBI_Data.xlsx")
            output_file = os.path.join(os.getcwd(), "Final_SEBI_Data.xlsx")
            
            if not os.path.exists(input_file):
                self.add_log(f"Error: Input file {input_file} not found")
                messagebox.showerror("Error", f"Input file not found: {input_file}")
                return
            
            if self.process_excel_file(input_file, output_file):
                self.update_status("Excel processing completed successfully!")
                messagebox.showinfo("Success", f"Excel file processed and saved as {output_file}")
            else:
                self.update_status("Error processing Excel file")
Editor is loading...
Leave a Comment