From 6d315e2a9fecf1d1154a3751bfda3e455891eb49 Mon Sep 17 00:00:00 2001 From: "woo-jin.shin" Date: Sun, 2 Jan 2022 12:48:10 +0900 Subject: [PATCH] [PPN-13] Deal with new page board structure --- build.gradle | 4 +- .../processor/dto/PpomppuArticleDTO.java | 10 --- .../processor/dto/PpomppuArticleParseDTO.java | 66 +++++++++++++++++++ .../processor/dto/PpomppuArticleParser.java | 47 ++++++------- .../dto/PpomppuArticleTransformer.java | 10 +-- .../processor/service/PpomppuFeedService.java | 16 +++-- .../receiver/scheduler/ParseEventEmitter.java | 2 +- .../crawl/ppomppu/support/util/DateUtil.java | 18 +++++ 8 files changed, 120 insertions(+), 53 deletions(-) delete mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleDTO.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleParseDTO.java create mode 100644 support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/DateUtil.java diff --git a/build.gradle b/build.gradle index 3d8c1db..f51210c 100644 --- a/build.gradle +++ b/build.gradle @@ -6,7 +6,7 @@ plugins { } group = 'com.myoa.engineering.crawl.ppomppu' -version = '1.0.3' +version = '1.1.1' sourceCompatibility = '11' configurations { @@ -21,7 +21,7 @@ repositories { allprojects { group = 'com.myoa.engineering.crawl.ppomppu' - version = '1.0.3' + version = '1.1.1' apply plugin: 'java' apply plugin: 'idea' diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleDTO.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleDTO.java deleted file mode 100644 index af2e1a8..0000000 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleDTO.java +++ /dev/null @@ -1,10 +0,0 @@ -package com.myoa.engineering.crawl.ppomppu.processor.dto; - -/** - * PpomppuArticle - * - * @author Shin Woo-jin (woozu.shin@kakaoent.com) - * @since 2021-09-08 - */ -public class PpomppuArticleDTO { -} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleParseDTO.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleParseDTO.java new file mode 100644 index 0000000..51555a1 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleParseDTO.java @@ -0,0 +1,66 @@ +package com.myoa.engineering.crawl.ppomppu.processor.dto; + +import java.time.Instant; + +import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle; +import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil; + +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * PpomppuArticleParseDTO + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +@Getter +@NoArgsConstructor +public class PpomppuArticleParseDTO { + + private String id; + private String articleId; + private String boardName; + private String articleUrl; + private String thumbnailUrl; + private String title; + private String hit; + private Integer recommended; + private String registeredAt; + + @Builder + public PpomppuArticleParseDTO(String id, String articleId, String boardName, String articleUrl, + String thumbnailUrl, String title, String hit, Integer recommended, + String registeredAt) { + this.id = id; + this.articleId = articleId; + this.boardName = boardName; + this.articleUrl = articleUrl; + this.thumbnailUrl = thumbnailUrl; + this.title = title; + this.hit = hit; + this.recommended = recommended; + this.registeredAt = registeredAt; + } + + public boolean isInValidated() { + return articleId == null || articleId.isEmpty() + || hit == null || hit.isEmpty(); + } + + public PpomppuArticle convert() { + if (isInValidated()) { + throw new IllegalArgumentException("PpomppuArticleParseDTO was invalidated"); + } + return PpomppuArticle.builder() + .articleId(Long.parseLong(articleId)) + .title(title) + .articleUrl(articleUrl) + .thumbnailUrl(thumbnailUrl) + .recommended(recommended) + .hit(Integer.parseInt(hit)) + .registeredAt(DateUtil.DATE_TIME_FORMATTER.parse(registeredAt, Instant::from)) + .build(); + } +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleParser.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleParser.java index 60a85eb..35214fb 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleParser.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleParser.java @@ -1,9 +1,5 @@ package com.myoa.engineering.crawl.ppomppu.processor.dto; -import java.time.Instant; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; - import org.jsoup.nodes.Element; import org.jsoup.select.Elements; @@ -18,33 +14,31 @@ import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; */ public final class PpomppuArticleParser { - private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss") - .withZone(ZoneId.of("Asia/Seoul")); - private PpomppuArticleParser() {} public static PpomppuArticle toArticle(Elements articleElement) { - final long articleId = PpomppuArticleParser.parseArticleId(articleElement.get(0)); + final String articleIdString = PpomppuArticleParser.parseArticleId(articleElement.get(0)); final String title = PpomppuArticleParser.parseTitle(articleElement.get(2)); final String articleUrl = PpomppuArticleParser.parseArticleUrl(articleElement.get(2)); final String thumbnailUrl = PpomppuArticleParser.parseThumbnailUrl(articleElement.get(3)); - final int recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6)); - final int hit = PpomppuArticleParser.parseHit(articleElement.get(7)); - final Instant registeredAt = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5)); + final Integer recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6)); + final String hitString = PpomppuArticleParser.parseHit(articleElement.get(7)); + final String registeredAtString = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5)); - return PpomppuArticle.builder() - .articleId(articleId) - .title(title) - .articleUrl(articleUrl) - .thumbnailUrl(thumbnailUrl) - .recommended(recommended) - .hit(hit) - .registeredAt(registeredAt) - .build(); + return PpomppuArticleParseDTO.builder() + .articleId(articleIdString) + .title(title) + .articleUrl(articleUrl) + .thumbnailUrl(thumbnailUrl) + .recommended(recommended) + .hit(hitString) + .registeredAt(registeredAtString) + .build() + .convert(); } - public static Long parseArticleId(Element td) { - return Long.parseLong(td.text().trim()); + public static String parseArticleId(Element td) { + return td.text().trim(); } public static String parseTitle(Element td) { @@ -73,13 +67,12 @@ public final class PpomppuArticleParser { return recommended; } - public static Integer parseHit(Element td) { - return Integer.parseInt(td.text()); + public static String parseHit(Element td) { + return td.text(); } - public static Instant parseRegisteredAt(Element td) { - final String registeredAtString = td.attr("title"); - return DATE_TIME_FORMATTER.parse(registeredAtString, Instant::from); + public static String parseRegisteredAt(Element td) { + return td.attr("title"); } } diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleTransformer.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleTransformer.java index d1fe521..b68bb49 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleTransformer.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleTransformer.java @@ -1,8 +1,6 @@ package com.myoa.engineering.crawl.ppomppu.processor.dto; import java.time.Instant; -import java.time.ZoneId; -import java.time.format.DateTimeFormatter; import java.util.List; import java.util.function.Function; import java.util.stream.Collectors; @@ -11,6 +9,7 @@ import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle; import com.myoa.engineering.crawl.ppomppu.support.dto.BlockMessageDTO; import com.myoa.engineering.crawl.ppomppu.support.dto.SimpleMessageDTO; import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; +import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil; /** * PpomppuArticleTransformer @@ -26,9 +25,6 @@ public final class PpomppuArticleTransformer { private static final String MESSAGE_FORMAT_V2 = "%s *<%s|LINK>*\n%s"; private static final String TITLE_FORMAT_V1 = "_*:hearts: %s | %s*_"; - private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss") - .withZone(ZoneId.of("Asia/Seoul")); - public static final Function TRANSFORM_TO_MESSAGE_DTO = article -> SimpleMessageDTO.builder() .requestedAt(Instant.now()) @@ -47,7 +43,7 @@ public final class PpomppuArticleTransformer { .collect(Collectors.joining("\n\n")); return SimpleMessageDTO.builder() .requestedAt(requestedAt) - .title(DATE_TIME_FORMATTER.format(requestedAt)) + .title(DateUtil.DATE_TIME_FORMATTER.format(requestedAt)) .body(body) .build(); } @@ -62,7 +58,7 @@ public final class PpomppuArticleTransformer { .requestedAt(requestedAt) .title(String.format(TITLE_FORMAT_V1, boardName.getMenuName(), - DATE_TIME_FORMATTER.format(requestedAt))) + DateUtil.DATE_TIME_FORMATTER.format(requestedAt))) .blocks(body) .build(); } diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java index 725a30b..f9ad684 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java @@ -1,16 +1,18 @@ package com.myoa.engineering.crawl.ppomppu.processor.service; +import java.util.Comparator; +import java.util.List; + +import org.jsoup.Jsoup; +import org.jsoup.nodes.Element; +import org.springframework.stereotype.Component; + import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle; import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleParser; import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever; import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; -import java.util.Comparator; -import java.util.List; import lombok.extern.slf4j.Slf4j; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Element; -import org.springframework.stereotype.Component; import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; @@ -33,8 +35,10 @@ public class PpomppuFeedService { public Mono> getArticles(PpomppuBoardName boardName) { final Mono html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath()); final Mono tbody = extractTbodyFromHtml(html); - // .doOnNext(e -> log.info("pre tbody - {}", e.html())); + // .doOnNext(e -> log.info("pre tbody - {}", e.html())); return extractArticlesFromTbody(tbody).map(this::convertFromElement) + .onErrorContinue((t, e) -> log.error("Error occured : {}, value: {}", + e, t.getLocalizedMessage())) .map(e -> e.updateBoardName(boardName)) .sort(Comparator.comparing(PpomppuArticle::getArticleId)) // .doOnNext(e -> log.info("parsed Result: {}", e)) diff --git a/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/scheduler/ParseEventEmitter.java b/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/scheduler/ParseEventEmitter.java index 71abd5e..e164cf5 100644 --- a/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/scheduler/ParseEventEmitter.java +++ b/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/scheduler/ParseEventEmitter.java @@ -28,7 +28,7 @@ public class ParseEventEmitter { this.processorAPIService = processorAPIService; } - @Scheduled(fixedRate = 300 * 1000L) + @Scheduled(fixedRate = 600 * 1000L) public void emitBoards() { log.info("[emitDomesticBoard] trigger fired!"); Arrays.stream(PpomppuBoardName.values()) diff --git a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/DateUtil.java b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/DateUtil.java new file mode 100644 index 0000000..e444987 --- /dev/null +++ b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/DateUtil.java @@ -0,0 +1,18 @@ +package com.myoa.engineering.crawl.ppomppu.support.util; + +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; + +/** + * DateUtil + * @author Shin Woo-jin (woo-jin.shin@linecorp.com) + * @since 2022-01-02 + * + */ +public final class DateUtil { + + private DateUtil() { } + + public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss") + .withZone(ZoneId.of("Asia/Seoul")); +}