86 lines
3.1 KiB
Java
86 lines
3.1 KiB
Java
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
|
|
|
import java.time.Instant;
|
|
import java.time.ZoneId;
|
|
import java.time.format.DateTimeFormatter;
|
|
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.select.Elements;
|
|
|
|
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
|
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
|
|
|
/**
|
|
* PpomppuArticleTransformer
|
|
*
|
|
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
|
* @since 2021-09-08
|
|
*/
|
|
public final class PpomppuArticleParser {
|
|
|
|
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
|
|
.withZone(ZoneId.of("Asia/Seoul"));
|
|
|
|
private PpomppuArticleParser() {}
|
|
|
|
public static PpomppuArticle toArticle(Elements articleElement) {
|
|
final long articleId = PpomppuArticleParser.parseArticleId(articleElement.get(0));
|
|
final String title = PpomppuArticleParser.parseTitle(articleElement.get(2));
|
|
final String articleUrl = PpomppuArticleParser.parseArticleUrl(articleElement.get(2));
|
|
final String thumbnailUrl = PpomppuArticleParser.parseThumbnailUrl(articleElement.get(3));
|
|
final int recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6));
|
|
final int hit = PpomppuArticleParser.parseHit(articleElement.get(7));
|
|
final Instant registeredAt = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5));
|
|
|
|
return PpomppuArticle.builder()
|
|
.articleId(articleId)
|
|
.title(title)
|
|
.articleUrl(articleUrl)
|
|
.thumbnailUrl(thumbnailUrl)
|
|
.recommended(recommended)
|
|
.hit(hit)
|
|
.registeredAt(registeredAt)
|
|
.build();
|
|
}
|
|
|
|
public static Long parseArticleId(Element td) {
|
|
return Long.parseLong(td.text().trim());
|
|
}
|
|
|
|
public static String parseTitle(Element td) {
|
|
return td.getElementsByTag("a").text();
|
|
}
|
|
|
|
public static String parseArticleUrl(Element td) {
|
|
return PpomppuBoardName.ofViewPageUrl(td.getElementsByTag("a").attr("href"));
|
|
}
|
|
|
|
public static String parseThumbnailUrl(Element td) {
|
|
return "https:" + td.getElementsByTag("img").get(0).attr("src");
|
|
}
|
|
|
|
public static Integer parseRecommended(Element td) {
|
|
final String voteString = td.text();
|
|
final int recommended;
|
|
|
|
if (voteString.isEmpty()) {
|
|
recommended = 0;
|
|
} else {
|
|
final int voteUp = Integer.parseInt(td.text().split(" - ")[0]);
|
|
final int voteDown = Integer.parseInt(td.text().split(" - ")[1]);
|
|
recommended = voteUp - voteDown;
|
|
}
|
|
return recommended;
|
|
}
|
|
|
|
public static Integer parseHit(Element td) {
|
|
return Integer.parseInt(td.text());
|
|
}
|
|
|
|
public static Instant parseRegisteredAt(Element td) {
|
|
final String registeredAtString = td.attr("title");
|
|
return DATE_TIME_FORMATTER.parse(registeredAtString, Instant::from);
|
|
}
|
|
|
|
}
|