package com.myoa.engineering.crawl.ppomppu.processor.dto; import java.time.Instant; import java.time.ZoneId; import java.time.format.DateTimeFormatter; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle; import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; /** * PpomppuArticleTransformer * * @author Shin Woo-jin (woozu.shin@kakaoent.com) * @since 2021-09-08 */ public final class PpomppuArticleParser { private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss") .withZone(ZoneId.of("Asia/Seoul")); private PpomppuArticleParser() {} public static PpomppuArticle toArticle(Elements articleElement) { final long articleId = PpomppuArticleParser.parseArticleId(articleElement.get(0)); final String title = PpomppuArticleParser.parseTitle(articleElement.get(2)); final String articleUrl = PpomppuArticleParser.parseArticleUrl(articleElement.get(2)); final String thumbnailUrl = PpomppuArticleParser.parseThumbnailUrl(articleElement.get(3)); final int recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6)); final int hit = PpomppuArticleParser.parseHit(articleElement.get(7)); final Instant registeredAt = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5)); return PpomppuArticle.builder() .articleId(articleId) .title(title) .articleUrl(articleUrl) .thumbnailUrl(thumbnailUrl) .recommended(recommended) .hit(hit) .registeredAt(registeredAt) .build(); } public static Long parseArticleId(Element td) { return Long.parseLong(td.text().trim()); } public static String parseTitle(Element td) { return td.getElementsByTag("a").text(); } public static String parseArticleUrl(Element td) { return PpomppuBoardName.ofViewPageUrl(td.getElementsByTag("a").attr("href")); } public static String parseThumbnailUrl(Element td) { return "https:" + td.getElementsByTag("img").get(0).attr("src"); } public static Integer parseRecommended(Element td) { final String voteString = td.text(); final int recommended; if (voteString.isEmpty()) { recommended = 0; } else { final int voteUp = Integer.parseInt(td.text().split(" - ")[0]); final int voteDown = Integer.parseInt(td.text().split(" - ")[1]); recommended = voteUp - voteDown; } return recommended; } public static Integer parseHit(Element td) { return Integer.parseInt(td.text()); } public static Instant parseRegisteredAt(Element td) { final String registeredAtString = td.attr("title"); return DATE_TIME_FORMATTER.parse(registeredAtString, Instant::from); } }