PpomppuNotifier/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleParser.java

86 lines
3.1 KiB
Java

package com.myoa.engineering.crawl.ppomppu.processor.dto;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
/**
* PpomppuArticleTransformer
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
public final class PpomppuArticleParser {
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
.withZone(ZoneId.of("Asia/Seoul"));
private PpomppuArticleParser() {}
public static PpomppuArticle toArticle(Elements articleElement) {
final long articleId = PpomppuArticleParser.parseArticleId(articleElement.get(0));
final String title = PpomppuArticleParser.parseTitle(articleElement.get(2));
final String articleUrl = PpomppuArticleParser.parseArticleUrl(articleElement.get(2));
final String thumbnailUrl = PpomppuArticleParser.parseThumbnailUrl(articleElement.get(3));
final int recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6));
final int hit = PpomppuArticleParser.parseHit(articleElement.get(7));
final Instant registeredAt = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5));
return PpomppuArticle.builder()
.articleId(articleId)
.title(title)
.articleUrl(articleUrl)
.thumbnailUrl(thumbnailUrl)
.recommended(recommended)
.hit(hit)
.registeredAt(registeredAt)
.build();
}
public static Long parseArticleId(Element td) {
return Long.parseLong(td.text().trim());
}
public static String parseTitle(Element td) {
return td.getElementsByTag("a").text();
}
public static String parseArticleUrl(Element td) {
return PpomppuBoardName.ofViewPageUrl(td.getElementsByTag("a").attr("href"));
}
public static String parseThumbnailUrl(Element td) {
return "https:" + td.getElementsByTag("img").get(0).attr("src");
}
public static Integer parseRecommended(Element td) {
final String voteString = td.text();
final int recommended;
if (voteString.isEmpty()) {
recommended = 0;
} else {
final int voteUp = Integer.parseInt(td.text().split(" - ")[0]);
final int voteDown = Integer.parseInt(td.text().split(" - ")[1]);
recommended = voteUp - voteDown;
}
return recommended;
}
public static Integer parseHit(Element td) {
return Integer.parseInt(td.text());
}
public static Instant parseRegisteredAt(Element td) {
final String registeredAtString = td.attr("title");
return DATE_TIME_FORMATTER.parse(registeredAtString, Instant::from);
}
}