Untitled

 avatar
unknown
java
a month ago
2.9 kB
7
Indexable
package pl.sonmiike.scraper.parser;

@FunctionalInterface
public interface Parser<T>
{
    T parse(String text) throws Exception;
}


@Component
public class ScheduleParserImpl implements IScheduleParser
{

    private static final Logger LOG = LoggerFactory.getLogger(ScheduleParserImpl.class);

    private static final String DATE_PATTERN = "dd.MM.yyyy";
    private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern(DATE_PATTERN);

    public ScheduleEntryDTO parseLectureDetails(String html)
    {
        Document document = Jsoup.parse(html);
        return ScheduleEntryDTO.builder().studentCount(parseInteger(document, "#ctl06_LiczbaStudentowLabel"))
            .itnStudentCount(parseInteger(document, "#ctl06_LiczbaStudentowITNLabel"))
            .lectureName(parseElementText(document, "#ctl06_NazwaPrzedmiotyLabel"))
            .lectureCode(parseElementText(document, "#ctl06_KodPrzedmiotuLabel"))
            .lectureType(parseLectureType(document)).professor(parseElementText(document, "#ctl06_DydaktycyLabel"))
            .building(parseElementText(document, "#ctl06_BudynekLabel"))
            .classRoomNumber(parseElementText(document, "#ctl06_SalaLabel")).lectureDate(parseLectureDate(document))
            .startTime(parseTime(document, "#ctl06_GodzRozpLabel"))
            .endTime(parseTime(document, "#ctl06_GodzZakonLabel")).duration(parseDuration(document)).build();
    }

    private <T> T parseElement(Document document, String cssSelector, Parser<T> parser, T defaultValue)
    {
        try
        {
            Element element = document.select(cssSelector).first();
            return element != null ? parser.parse(element.text().trim()) : defaultValue;
        }
        catch (Exception e)
        {
            LOG.error("Failed to parse element: {}", cssSelector, e);
            return defaultValue;
        }
    }

    private String parseElementText(Document document, String cssSelector)
    {
        return parseElement(document, cssSelector, text -> text, "");
    }

    private int parseInteger(Document document, String cssSelector)
    {
        return parseElement(document, cssSelector, Integer::parseInt, 0);
    }

    private LectureType parseLectureType(Document document)
    {
        return parseElement(document, "#ctl06_TypZajecLabel", LectureType::fromString, LectureType.UNKNOWN);
    }

    private LocalDate parseLectureDate(Document document)
    {
        return parseElement(document, "#ctl06_DataLabel", date -> LocalDate.parse(date, DATE_FORMATTER),
            LocalDate.now());
    }

    private LocalTime parseTime(Document document, String cssSelector)
    {
        return parseElement(document, cssSelector, LocalTime::parse, LocalTime.MIN);
    }

    private Duration parseDuration(Document document)
    {
        return parseElement(document, "#ctl06_CzasTrwaniaLabel", Duration::parse, Duration.ZERO);
    }
}
Leave a Comment