Untitled
package pl.sonmiike.scraper.parser; @FunctionalInterface public interface Parser<T> { T parse(String text) throws Exception; } @Component public class ScheduleParserImpl implements IScheduleParser { private static final Logger LOG = LoggerFactory.getLogger(ScheduleParserImpl.class); private static final String DATE_PATTERN = "dd.MM.yyyy"; private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern(DATE_PATTERN); public ScheduleEntryDTO parseLectureDetails(String html) { Document document = Jsoup.parse(html); return ScheduleEntryDTO.builder().studentCount(parseInteger(document, "#ctl06_LiczbaStudentowLabel")) .itnStudentCount(parseInteger(document, "#ctl06_LiczbaStudentowITNLabel")) .lectureName(parseElementText(document, "#ctl06_NazwaPrzedmiotyLabel")) .lectureCode(parseElementText(document, "#ctl06_KodPrzedmiotuLabel")) .lectureType(parseLectureType(document)).professor(parseElementText(document, "#ctl06_DydaktycyLabel")) .building(parseElementText(document, "#ctl06_BudynekLabel")) .classRoomNumber(parseElementText(document, "#ctl06_SalaLabel")).lectureDate(parseLectureDate(document)) .startTime(parseTime(document, "#ctl06_GodzRozpLabel")) .endTime(parseTime(document, "#ctl06_GodzZakonLabel")).duration(parseDuration(document)).build(); } private <T> T parseElement(Document document, String cssSelector, Parser<T> parser, T defaultValue) { try { Element element = document.select(cssSelector).first(); return element != null ? parser.parse(element.text().trim()) : defaultValue; } catch (Exception e) { LOG.error("Failed to parse element: {}", cssSelector, e); return defaultValue; } } private String parseElementText(Document document, String cssSelector) { return parseElement(document, cssSelector, text -> text, ""); } private int parseInteger(Document document, String cssSelector) { return parseElement(document, cssSelector, Integer::parseInt, 0); } private LectureType parseLectureType(Document document) { return parseElement(document, "#ctl06_TypZajecLabel", LectureType::fromString, LectureType.UNKNOWN); } private LocalDate parseLectureDate(Document document) { return parseElement(document, "#ctl06_DataLabel", date -> LocalDate.parse(date, DATE_FORMATTER), LocalDate.now()); } private LocalTime parseTime(Document document, String cssSelector) { return parseElement(document, cssSelector, LocalTime::parse, LocalTime.MIN); } private Duration parseDuration(Document document) { return parseElement(document, "#ctl06_CzasTrwaniaLabel", Duration::parse, Duration.ZERO); } }
Leave a Comment