Untitled
unknown
java
a year ago
2.9 kB
12
Indexable
package pl.sonmiike.scraper.parser;
@FunctionalInterface
public interface Parser<T>
{
T parse(String text) throws Exception;
}
@Component
public class ScheduleParserImpl implements IScheduleParser
{
private static final Logger LOG = LoggerFactory.getLogger(ScheduleParserImpl.class);
private static final String DATE_PATTERN = "dd.MM.yyyy";
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern(DATE_PATTERN);
public ScheduleEntryDTO parseLectureDetails(String html)
{
Document document = Jsoup.parse(html);
return ScheduleEntryDTO.builder().studentCount(parseInteger(document, "#ctl06_LiczbaStudentowLabel"))
.itnStudentCount(parseInteger(document, "#ctl06_LiczbaStudentowITNLabel"))
.lectureName(parseElementText(document, "#ctl06_NazwaPrzedmiotyLabel"))
.lectureCode(parseElementText(document, "#ctl06_KodPrzedmiotuLabel"))
.lectureType(parseLectureType(document)).professor(parseElementText(document, "#ctl06_DydaktycyLabel"))
.building(parseElementText(document, "#ctl06_BudynekLabel"))
.classRoomNumber(parseElementText(document, "#ctl06_SalaLabel")).lectureDate(parseLectureDate(document))
.startTime(parseTime(document, "#ctl06_GodzRozpLabel"))
.endTime(parseTime(document, "#ctl06_GodzZakonLabel")).duration(parseDuration(document)).build();
}
private <T> T parseElement(Document document, String cssSelector, Parser<T> parser, T defaultValue)
{
try
{
Element element = document.select(cssSelector).first();
return element != null ? parser.parse(element.text().trim()) : defaultValue;
}
catch (Exception e)
{
LOG.error("Failed to parse element: {}", cssSelector, e);
return defaultValue;
}
}
private String parseElementText(Document document, String cssSelector)
{
return parseElement(document, cssSelector, text -> text, "");
}
private int parseInteger(Document document, String cssSelector)
{
return parseElement(document, cssSelector, Integer::parseInt, 0);
}
private LectureType parseLectureType(Document document)
{
return parseElement(document, "#ctl06_TypZajecLabel", LectureType::fromString, LectureType.UNKNOWN);
}
private LocalDate parseLectureDate(Document document)
{
return parseElement(document, "#ctl06_DataLabel", date -> LocalDate.parse(date, DATE_FORMATTER),
LocalDate.now());
}
private LocalTime parseTime(Document document, String cssSelector)
{
return parseElement(document, cssSelector, LocalTime::parse, LocalTime.MIN);
}
private Duration parseDuration(Document document)
{
return parseElement(document, "#ctl06_CzasTrwaniaLabel", Duration::parse, Duration.ZERO);
}
}Editor is loading...
Leave a Comment