From 08e1f99ab0ab18704e5bd3ad6fa461bfa77942ff Mon Sep 17 00:00:00 2001 From: "woozu.shin" Date: Wed, 8 Sep 2021 20:57:20 +0900 Subject: [PATCH 1/2] [PPN-210906-2] [PPN-210906-3] Implement PpomppuFeedService --- processor/build.gradle | 4 +- .../WebFluxExchangeStragiesFactory.java | 35 +++++ .../controller/CrawlAPIController.java | 26 +++- .../ppomppu/processor/domain/Auditable.java | 26 ++++ .../processor/domain/BaseScanDomain.java | 10 ++ .../processor/domain/PpomppuArticle.java | 68 ++++++++++ .../processor/dto/FeedParsedResult.java | 45 +++++++ .../processor/dto/PpomppuArticleDTO.java | 10 ++ .../dto/PpomppuArticleTransformer.java | 76 +++++++++++ .../client/PpomppuBoardFeedRetriever.java | 46 +++++++ .../processor/service/PpomppuFeedService.java | 55 ++++++++ .../processor/util/ObjectMapperFactory.java | 92 +++++++++++++ .../ppomppu/processor/util/ObjectUtil.java | 121 ++++++++++++++++++ .../src/main/resources/logback-spring.xml | 11 ++ .../logback/component/logback-file.xml | 23 ++++ .../resources/logback/logback-development.xml | 19 +++ .../resources/logback/logback-production.xml | 19 +++ .../receiver/dto/FeedParsedResult.java | 25 ++++ .../receiver/shceduler/ParseEventEmitter.java | 4 +- .../ppomppu/support/dto/APIResponse.java | 63 +++++++++ .../ppomppu/support/dto/APIResponseError.java | 49 +++++++ .../support/dto/code/PpomppuBoardName.java | 31 ++++- .../crawl/ppomppu/support/util/WebUtil.java | 15 +++ support/src/main/resources/logback-spring.xml | 11 ++ .../logback/component/logback-file.xml | 23 ++++ .../resources/logback/logback-development.xml | 19 +++ .../resources/logback/logback-production.xml | 19 +++ 27 files changed, 930 insertions(+), 15 deletions(-) create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/Auditable.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/BaseScanDomain.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/PpomppuArticle.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/FeedParsedResult.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleDTO.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleTransformer.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/util/ObjectMapperFactory.java create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/util/ObjectUtil.java create mode 100644 processor/src/main/resources/logback-spring.xml create mode 100644 processor/src/main/resources/logback/component/logback-file.xml create mode 100644 processor/src/main/resources/logback/logback-development.xml create mode 100644 processor/src/main/resources/logback/logback-production.xml create mode 100644 receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/dto/FeedParsedResult.java create mode 100644 support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/APIResponse.java create mode 100644 support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/APIResponseError.java create mode 100644 support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java create mode 100644 support/src/main/resources/logback-spring.xml create mode 100644 support/src/main/resources/logback/component/logback-file.xml create mode 100644 support/src/main/resources/logback/logback-development.xml create mode 100644 support/src/main/resources/logback/logback-production.xml diff --git a/processor/build.gradle b/processor/build.gradle index a586bb2..b38b0cd 100644 --- a/processor/build.gradle +++ b/processor/build.gradle @@ -7,7 +7,9 @@ dependencies { implementation project(':support') // https://projectreactor.io/docs/core/release/reference/#debug-activate implementation 'org.springframework.boot:spring-boot-starter-webflux' - implementation 'org.telegram:telegrambots:5.3.0' + implementation 'org.springframework.boot:spring-boot-starter-data-jpa' + implementation 'com.rometools:rome:1.16.0' + implementation 'org.jsoup:jsoup:1.14.2' annotationProcessor 'org.springframework.boot:spring-boot-configuration-processor' annotationProcessor 'org.projectlombok:lombok' diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java new file mode 100644 index 0000000..930cadf --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java @@ -0,0 +1,35 @@ +package com.myoa.engineering.crawl.ppomppu.processor.configuration.factory; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.myoa.engineering.crawl.ppomppu.processor.util.ObjectMapperFactory; +import org.springframework.http.codec.json.Jackson2JsonDecoder; +import org.springframework.http.codec.json.Jackson2JsonEncoder; +import org.springframework.util.MimeTypeUtils; +import org.springframework.web.reactive.function.client.ExchangeStrategies; + +/** + * WebFluxExchangeStragiesFactory + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +public final class WebFluxExchangeStragiesFactory { + + private WebFluxExchangeStragiesFactory() {} + + public static ExchangeStrategies getDefault() { + final ObjectMapper mapper = ObjectMapperFactory.defaultMapper(); + return ExchangeStrategies.builder() + .codecs(configurer -> { + configurer.defaultCodecs().maxInMemorySize(-1); + configurer.defaultCodecs() + .jackson2JsonEncoder(new Jackson2JsonEncoder(mapper, + MimeTypeUtils.APPLICATION_JSON)); + configurer.defaultCodecs() + .jackson2JsonDecoder(new Jackson2JsonDecoder(mapper, + MimeTypeUtils.APPLICATION_JSON)); + }) + .build(); + } + +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/controller/CrawlAPIController.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/controller/CrawlAPIController.java index 1018a10..99a2ff3 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/controller/CrawlAPIController.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/controller/CrawlAPIController.java @@ -1,30 +1,42 @@ package com.myoa.engineering.crawl.ppomppu.processor.controller; +import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle; +import com.myoa.engineering.crawl.ppomppu.support.dto.APIResponse; +import com.myoa.engineering.crawl.ppomppu.processor.dto.FeedParsedResult; +import com.myoa.engineering.crawl.ppomppu.processor.service.PpomppuFeedService; +import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; +import lombok.extern.slf4j.Slf4j; import org.springframework.web.bind.annotation.PathVariable; import org.springframework.web.bind.annotation.PostMapping; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RestController; - -import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; - -import lombok.extern.slf4j.Slf4j; +import reactor.core.publisher.Flux; import reactor.core.publisher.Mono; /** * CrawlAPIController + * * @author Shin Woo-jin (woo-jin.shin@linecorp.com) * @since 2021-09-05 - * */ @Slf4j @RestController @RequestMapping("/api/v1/crawl") public class CrawlAPIController { + private final PpomppuFeedService ppomppuRSSFeedService; + + public CrawlAPIController(PpomppuFeedService ppomppuRSSFeedService) { + this.ppomppuRSSFeedService = ppomppuRSSFeedService; + } + @PostMapping("/boards/{boardName}") - public Mono crawlBoard(@PathVariable("boardName") PpomppuBoardName boardName) { + public Mono> crawlBoard(@PathVariable("boardName") PpomppuBoardName boardName) { log.info("got request... {}", boardName); - return Mono.just(boardName.getBoardPath()); + FeedParsedResult result = FeedParsedResult.of(boardName); + Flux articles = ppomppuRSSFeedService.getArticles(boardName); + + return articles.then(Mono.just(APIResponse.success(result.done()))); } } diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/Auditable.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/Auditable.java new file mode 100644 index 0000000..e16f83b --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/Auditable.java @@ -0,0 +1,26 @@ +package com.myoa.engineering.crawl.ppomppu.processor.domain; + +import java.io.Serializable; +import java.time.Instant; +import javax.persistence.Column; +import org.springframework.data.annotation.CreatedDate; +import org.springframework.data.annotation.LastModifiedDate; + +/** + * Auditable + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +public abstract class Auditable implements Serializable { + private static final long serialVersionUID = -7105030870015828551L; + + @Column + @CreatedDate + private Instant createdAt; + + @Column + @LastModifiedDate + private Instant modifiedAt; + +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/BaseScanDomain.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/BaseScanDomain.java new file mode 100644 index 0000000..62bcf9f --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/BaseScanDomain.java @@ -0,0 +1,10 @@ +package com.myoa.engineering.crawl.ppomppu.processor.domain; + +/** + * BaseScanDomain + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +public interface BaseScanDomain { +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/PpomppuArticle.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/PpomppuArticle.java new file mode 100644 index 0000000..5c389a1 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/domain/PpomppuArticle.java @@ -0,0 +1,68 @@ +package com.myoa.engineering.crawl.ppomppu.processor.domain; + +import java.time.Instant; +import javax.persistence.Column; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * PpomppuArticle + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +@Getter +@NoArgsConstructor +public class PpomppuArticle extends Auditable { + + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + private Long id; + + @Column + private Long articleId; + + @Column + private String articleUrl; + + @Column + private String title; + + @Column + private Integer hit; + + @Column + private Integer recommended; + + @Column + private Instant registeredAt; + + @Builder + public PpomppuArticle(Long id, Long articleId, String articleUrl, String title, + Integer recommended, Integer hit, Instant registeredAt) { + this.id = id; + this.articleId = articleId; + this.articleUrl = articleUrl; + this.title = title; + this.recommended = recommended; + this.hit = hit; + this.registeredAt = registeredAt; + } + + @Override + public String toString() { + return "PpomppuArticle{" + + "id=" + id + + ", articleId=" + articleId + + ", articleUrl='" + articleUrl + '\'' + + ", title='" + title + '\'' + + ", hit=" + hit + + ", recommended=" + recommended + + ", registeredAt=" + registeredAt + + '}'; + } +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/FeedParsedResult.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/FeedParsedResult.java new file mode 100644 index 0000000..2e2eca6 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/FeedParsedResult.java @@ -0,0 +1,45 @@ +package com.myoa.engineering.crawl.ppomppu.processor.dto; + +import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; +import java.io.Serializable; +import java.time.Instant; +import lombok.Builder; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * FeedParsedResult + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +@Getter +@NoArgsConstructor +public class FeedParsedResult implements Serializable { + + private static final long serialVersionUID = -3771310078623481348L; + + private PpomppuBoardName boardName; + private Instant requestedAt; + private Instant processedAt; + + @Builder + public FeedParsedResult(PpomppuBoardName boardName, Instant requestedAt, Instant processedAt) { + this.boardName = boardName; + this.requestedAt = requestedAt; + this.processedAt = processedAt; + } + + public static FeedParsedResult of(PpomppuBoardName boardName) { + return FeedParsedResult.builder() + .boardName(boardName) + .requestedAt(Instant.now()) + .build(); + } + + public FeedParsedResult done() { + this.processedAt = Instant.now(); + return this; + } + +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleDTO.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleDTO.java new file mode 100644 index 0000000..af2e1a8 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleDTO.java @@ -0,0 +1,10 @@ +package com.myoa.engineering.crawl.ppomppu.processor.dto; + +/** + * PpomppuArticle + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +public class PpomppuArticleDTO { +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleTransformer.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleTransformer.java new file mode 100644 index 0000000..ea00ee8 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/dto/PpomppuArticleTransformer.java @@ -0,0 +1,76 @@ +package com.myoa.engineering.crawl.ppomppu.processor.dto; + +import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle; +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +/** + * PpomppuArticleTransformer + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +public final class PpomppuArticleTransformer { + + private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss") + .withZone(ZoneId.of("Asia/Seoul")); + + private PpomppuArticleTransformer() {} + + public static PpomppuArticle toArticle(Elements articleElement) { + final long articleId = PpomppuArticleTransformer.toArticleId(articleElement.get(0)); + final String title = PpomppuArticleTransformer.toTitle(articleElement.get(2)); + final String articleUrl = PpomppuArticleTransformer.toArticleUrl(articleElement.get(2)); + final int recommended = PpomppuArticleTransformer.toRecommended(articleElement.get(6)); + final int hit = PpomppuArticleTransformer.toHit(articleElement.get(7)); + final Instant registeredAt = PpomppuArticleTransformer.toRegisteredAt(articleElement.get(5)); + + return PpomppuArticle.builder() + .articleId(articleId) + .title(title) + .articleUrl(articleUrl) + .recommended(recommended) + .hit(hit) + .registeredAt(registeredAt) + .build(); + } + + public static Long toArticleId(Element td) { + return Long.parseLong(td.text().trim()); + } + + public static String toTitle(Element td) { + return td.text(); + } + + public static String toArticleUrl(Element td) { + return td.getElementsByTag("a").attr("href"); + } + + public static Integer toRecommended(Element td) { + final String voteString = td.text(); + final int recommended; + + if (voteString.isEmpty()) { + recommended = 0; + } else { + final int voteUp = Integer.parseInt(td.text().split(" - ")[0]); + final int voteDown = Integer.parseInt(td.text().split(" - ")[1]); + recommended = voteUp - voteDown; + } + return recommended; + } + + public static Integer toHit(Element td) { + return Integer.parseInt(td.text()); + } + + public static Instant toRegisteredAt(Element td) { + final String registeredAtString = td.attr("title"); + return DATE_TIME_FORMATTER.parse(registeredAtString, Instant::from); + } + +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java new file mode 100644 index 0000000..f125479 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java @@ -0,0 +1,46 @@ +package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client; + +import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebFluxExchangeStragiesFactory; +import com.myoa.engineering.crawl.ppomppu.support.util.WebUtil; +import lombok.extern.slf4j.Slf4j; +import org.springframework.core.ParameterizedTypeReference; +import org.springframework.stereotype.Component; +import org.springframework.web.reactive.function.client.WebClient; +import org.springframework.web.reactive.function.client.WebClientRequestException; +import reactor.core.publisher.Mono; +import reactor.core.scheduler.Schedulers; + +/** + * PpomppuBoardFeedRetriever + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +@Slf4j +@Component +public class PpomppuBoardFeedRetriever { + + private static final String PPOMPPU_URL = "https://www.ppomppu.co.kr/"; + + private final WebClient webClient; + + public PpomppuBoardFeedRetriever(WebClient.Builder webClientBuilder) { + this.webClient = webClientBuilder.baseUrl(PPOMPPU_URL) + .exchangeStrategies(WebFluxExchangeStragiesFactory.getDefault()) + .defaultHeader("Content-Type", "text/html") + .defaultHeader(WebUtil.HEADER_USER_AGENT_KEY, WebUtil.HEADER_USER_AGENT_VALUE) + .build(); + } + + public Mono getHtml(String uri) { + return webClient.get() + .uri(uri) + .exchangeToMono(e -> e.bodyToMono(new ParameterizedTypeReference() {})) + .publishOn(Schedulers.boundedElastic()) + .onErrorResume(WebClientRequestException.class, t -> { + log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName()); + return Mono.empty(); + }); + } + +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java new file mode 100644 index 0000000..7c1ab38 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java @@ -0,0 +1,55 @@ +package com.myoa.engineering.crawl.ppomppu.processor.service; + +import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle; +import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleTransformer; +import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever; +import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; +import lombok.extern.slf4j.Slf4j; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Element; +import org.springframework.stereotype.Component; +import reactor.core.publisher.Flux; +import reactor.core.publisher.Mono; + +/** + * PpomppuFeedService + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +@Slf4j +@Component +public class PpomppuFeedService { + + private final PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever; + + public PpomppuFeedService(PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever) { + this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever; + } + + public Flux getArticles(PpomppuBoardName boardName) { + final Mono html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath()); + final Mono tbody = extractTbodyFromHtml(html); + + return extractArticlesFromTbody(tbody).map(this::convertFromElement) + .doOnNext(e -> log.info("parsed Result: {}", e)); + } + + private Mono extractTbodyFromHtml(Mono html) { + return html.map(Jsoup::parse) + .mapNotNull(e -> e.getElementById("revolution_main_table")) + .map(e -> e.getElementsByTag("tbody")) + .map(e -> e.stream() + .findFirst() + .orElseThrow(() -> new IndexOutOfBoundsException("no tbody"))); + } + + private Flux extractArticlesFromTbody(Mono tbody) { + return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))), + tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new)))); + } + + private PpomppuArticle convertFromElement(Element element) { + return PpomppuArticleTransformer.toArticle(element.getElementsByTag("td")); + } +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/util/ObjectMapperFactory.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/util/ObjectMapperFactory.java new file mode 100644 index 0000000..811c76f --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/util/ObjectMapperFactory.java @@ -0,0 +1,92 @@ +package com.myoa.engineering.crawl.ppomppu.processor.util; + +import com.fasterxml.jackson.core.JsonGenerator; +import com.fasterxml.jackson.core.JsonParser.Feature; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import com.fasterxml.jackson.databind.SerializerProvider; +import com.fasterxml.jackson.databind.ser.std.StdSerializer; +import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; +import java.io.IOException; +import org.springframework.cache.support.NullValue; +import org.springframework.util.StringUtils; + +/** + * ObjectMapperFactory + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-08-31 + */ +public final class ObjectMapperFactory { + + private ObjectMapperFactory() { + } + + private static final ObjectMapper defaultMapper; + private static final ObjectMapper httpMapper; + + static { + defaultMapper = initDefaultMapper(); + httpMapper = initHttpMapper(); + } + + public static ObjectMapper defaultMapper() { + return defaultMapper; + } + + public static ObjectMapper httpMapper() { + return httpMapper; + } + + private static ObjectMapper initDefaultMapper() { + final ObjectMapper objectMapper = new ObjectMapper(); + objectMapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES); + objectMapper.disable(SerializationFeature.FAIL_ON_EMPTY_BEANS); + objectMapper.disable(SerializationFeature.FAIL_ON_UNWRAPPED_TYPE_IDENTIFIERS); + objectMapper.enable(DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_AS_NULL); + objectMapper.configure(Feature.ALLOW_UNQUOTED_CONTROL_CHARS, true); + objectMapper.enable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS); + + objectMapper.registerModule(new JavaTimeModule()); + return objectMapper; + } + + private static ObjectMapper initHttpMapper() { + final ObjectMapper objectMapper = initDefaultMapper(); + objectMapper.configure(DeserializationFeature.READ_UNKNOWN_ENUM_VALUES_USING_DEFAULT_VALUE, true); + return objectMapper; + } + + + /** + * Copy from {@link GenericJackson2JsonRedisSerializer.NullValueSerializer}. + */ + public static class NullValueSerializer extends StdSerializer { + + private static final long serialVersionUID = 6776419544239897328L; + private final String classIdentifier; + + /** + * @param classIdentifier can be {@literal null} and will be defaulted to {@code @class}. + */ + NullValueSerializer(String classIdentifier) { + super(NullValue.class); + this.classIdentifier = StringUtils.hasText(classIdentifier) ? classIdentifier : "@class"; + } + + /* + * (non-Javadoc) + * @see com.fasterxml.jackson.databind.ser.std.StdSerializer#serialize(java.lang.Object, com + * .fasterxml.jackson.core.JsonGenerator, com.fasterxml.jackson.databind.SerializerProvider) + */ + @Override + public void serialize(NullValue value, JsonGenerator jgen, SerializerProvider provider) + throws IOException { + + jgen.writeStartObject(); + jgen.writeStringField(classIdentifier, NullValue.class.getName()); + jgen.writeEndObject(); + } + } +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/util/ObjectUtil.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/util/ObjectUtil.java new file mode 100644 index 0000000..2f03f12 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/util/ObjectUtil.java @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2019 LINE Corporation. All rights reserved. + * LINE Corporation PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. + */ + +package com.myoa.engineering.crawl.ppomppu.processor.util; + +import java.util.Arrays; +import java.util.Collection; + +/** + * NumberUtils + * + * @author Shin Woo-jin (lp12254@linecorp.com) + * @since 2019-10-28 + */ +public final class ObjectUtil { + + private ObjectUtil() { + } + + /** + * Check if given object is null. + * + * e == object == > false e == null == > true + * + * + * @param e Target object + * @param Unfixed specific type. If you want restrict specific interface, Copy and extend qualifier. + * @return Is null given object? + */ + public static boolean isNullObject(final E e) { + return e == null; + } + + /** + * Check if given object is not null. + * + * e == object == > false e == null == > true + * + * + * @param e Target object + * @param Unfixed specific type. If you want restrict specific interface, Copy and extend qualifier. + * @return Is not null given object? + */ + public static boolean isNotEmpty(final E e) { + return !isNullObject(e); + } + + /** + * Check if there are any null object in given objects. + * + * args == object = > false args == object, object = > false args == null, null, object = > true args + * == null = > true args == null, null = > true + * + * + * @param args Want to check objects that have null. + * @return Is there objects array has null? + */ + public static boolean hasNullObject(Object... args) { + return Arrays.stream(args).anyMatch(ObjectUtil::isNullObject); + } + + /** + * Check given objects are not empty. + * + * args == object = > true args == object, object = > true args == null, null, object = > false args + * == null = > false args == null, null = > false + * + * + * @param args Want to check objects that have null. + * @return Is there objects array has null? + */ + public static boolean hasAllObject(Object... args) { + return Arrays.stream(args).noneMatch(ObjectUtil::isNullObject); + } + + /** + * Check if there are all null object in given objects. + * + * args == object = > false args == object, object = > false args == null, null, object = > false args + * == null = > true args == null, null = > true + * + * + * @param args Want to check objects that have null. + * @return Is there null all of given objects? + */ + public static boolean hasAllNullObjects(final Object... args) { + return Arrays.stream(args).allMatch(ObjectUtil::isNullObject); + } + + /** + * Check if given collection object is null or empty collecton. + * + * e == null = > true e == emptyCollection = > true e == hasElement = > false + * + * + * @param e e is must be Collection object + * @param E is must be extended Collection Class + * @return boolean. given collection is null or empty? + */ + public static > boolean isNullOrEmptyCollection(final E e) { + return e == null || e.isEmpty(); + } + + /** + * Get collection's size. Even it pointed null + * + * @param e e is must be Collection object + * @param E is must be extended Collection Class + * @return integer value. given collection's size. + */ + public static > int getCollectionSize(final E e) { + if (isNullOrEmptyCollection(e)) { + return 0; + } + + return e.size(); + } + +} diff --git a/processor/src/main/resources/logback-spring.xml b/processor/src/main/resources/logback-spring.xml new file mode 100644 index 0000000..f0fcb6c --- /dev/null +++ b/processor/src/main/resources/logback-spring.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/processor/src/main/resources/logback/component/logback-file.xml b/processor/src/main/resources/logback/component/logback-file.xml new file mode 100644 index 0000000..65e54db --- /dev/null +++ b/processor/src/main/resources/logback/component/logback-file.xml @@ -0,0 +1,23 @@ + + + + + ${DIRECTORY}/${LOG_FILE_BASE}_log + + ${DIRECTORY}/${LOG_FILE_BASE}_log.%d{yyyyMMdd}.%i + + 1000MB + + 60 + + + ${FILE_LOG_PATTERN} + ${IMMEDIATE_FLUSH} + + + + 1024 + + + diff --git a/processor/src/main/resources/logback/logback-development.xml b/processor/src/main/resources/logback/logback-development.xml new file mode 100644 index 0000000..458e3d8 --- /dev/null +++ b/processor/src/main/resources/logback/logback-development.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/processor/src/main/resources/logback/logback-production.xml b/processor/src/main/resources/logback/logback-production.xml new file mode 100644 index 0000000..f824e41 --- /dev/null +++ b/processor/src/main/resources/logback/logback-production.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/dto/FeedParsedResult.java b/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/dto/FeedParsedResult.java new file mode 100644 index 0000000..20d0c5e --- /dev/null +++ b/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/dto/FeedParsedResult.java @@ -0,0 +1,25 @@ +package com.myoa.engineering.crawl.ppomppu.receiver.dto; + +import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName; +import java.io.Serializable; +import java.time.Instant; +import lombok.Getter; +import lombok.NoArgsConstructor; + +/** + * FeedParsedResult + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +@Getter +@NoArgsConstructor +public class FeedParsedResult implements Serializable { + + private static final long serialVersionUID = -3771310078623481348L; + + private PpomppuBoardName boardName; + private Instant requestedAt; + private Instant processedAt; + +} diff --git a/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/shceduler/ParseEventEmitter.java b/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/shceduler/ParseEventEmitter.java index d3eab37..5994904 100644 --- a/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/shceduler/ParseEventEmitter.java +++ b/receiver/src/main/java/com/myoa/engineering/crawl/ppomppu/receiver/shceduler/ParseEventEmitter.java @@ -29,12 +29,12 @@ public class ParseEventEmitter { @Scheduled(fixedRate = 60 * 1000L) public void emitDomesticBoard() { log.info("[emitDomesticBoard] trigger fired!"); - processorAPIService.emitParseEvent(PpomppuBoardName.PPOMPPU_DOMESTIC).block(); + processorAPIService.emitParseEvent(PpomppuBoardName.PPOMPPU_DOMESTIC_ETC).block(); } @Scheduled(fixedRate = 300 * 1000L) public void emitOverseaBoard() { log.info("[emitOverseaBoard] trigger fired!"); - processorAPIService.emitParseEvent(PpomppuBoardName.PPOMPPU_OVERSEA).block(); + processorAPIService.emitParseEvent(PpomppuBoardName.PPOMPPU_OVERSEA_ETC).block(); } } diff --git a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/APIResponse.java b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/APIResponse.java new file mode 100644 index 0000000..94df264 --- /dev/null +++ b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/APIResponse.java @@ -0,0 +1,63 @@ +package com.myoa.engineering.crawl.ppomppu.support.dto; + +import java.io.Serializable; +import java.util.Map; +import lombok.Getter; + +/** + * APIResponse + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-07 + */ +@Getter +public class APIResponse implements Serializable { + private static final long serialVersionUID = 1523350013713908487L; + + private boolean success; + private T result; + private APIResponseError error; + + public APIResponse(boolean success, T result, APIResponseError error) { + this.success = success; + this.error = error; + this.result = result; + } + + public APIResponse(boolean success, T result) { + this.success = success; + this.result = result; + } + + public APIResponse(boolean success, APIResponseError error) { + this.success = success; + this.error = error; + } + + public APIResponse() {} + + public static APIResponse success(T result) { + return new APIResponse<>(true, result); + } + + public static APIResponse success() { + return new APIResponse<>(true, null); + } + + public static APIResponse fail(T result, String code) { + return new APIResponse(false, result, APIResponseError.of(code)); + } + + public static APIResponse fail(String code) { + return new APIResponse(false, APIResponseError.of(code)); + } + + public static APIResponse fail(String code, String message) { + return new APIResponse(false, APIResponseError.of(code, message)); + } + + public static APIResponse fail(String code, String message, Map reasons) { + return new APIResponse(false, APIResponseError.of(code, message, reasons)); + } + +} diff --git a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/APIResponseError.java b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/APIResponseError.java new file mode 100644 index 0000000..38801f9 --- /dev/null +++ b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/APIResponseError.java @@ -0,0 +1,49 @@ +package com.myoa.engineering.crawl.ppomppu.support.dto; + +import java.io.Serializable; +import java.util.HashMap; +import java.util.Map; +import lombok.Getter; + +/** + * ResponseError + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-07 + */ +@Getter +public class APIResponseError implements Serializable { + + private static final long serialVersionUID = 931593091836887301L; + + private String code; + private String message; + private Map reasons; + + public APIResponseError() {} + + public APIResponseError(String code, String message, Map reasons) { + this.code = code; + this.message = message; + this.reasons = reasons; + } + + public static APIResponseError of(String code, String message, Map reasons) { + return new APIResponseError<>(code, message, reasons); + } + + public static APIResponseError of( + String code, String message, String reasonKey, String reasonValue) { + final Map reasons = new HashMap<>(); + reasons.put(reasonKey, reasonValue); + return new APIResponseError(code, message, reasons); + } + + public static APIResponseError of(String code, String message) { + return new APIResponseError<>(code, message, null); + } + + public static APIResponseError of(String code) { + return new APIResponseError<>(code, null, null); + } +} diff --git a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/code/PpomppuBoardName.java b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/code/PpomppuBoardName.java index 72a12f2..e254e84 100644 --- a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/code/PpomppuBoardName.java +++ b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/dto/code/PpomppuBoardName.java @@ -5,20 +5,41 @@ import lombok.NoArgsConstructor; /** * PpomppuBoardName + * * @author Shin Woo-jin (woo-jin.shin@linecorp.com) * @since 2021-09-05 - * */ @Getter @NoArgsConstructor public enum PpomppuBoardName { - PPOMPPU_DOMESTIC("ppomppu"), - PPOMPPU_OVERSEA("ppomppu4"), + PPOMPPU_DOMESTIC_ETC("zboard/zboard.php?id=ppomppu&category=1"), + PPOMPPU_DOMESTIC_COMPUTER("zboard/zboard.php?id=ppomppu&category=4"), + PPOMPPU_DOMESTIC_DIGITAL("zboard/zboard.php?id=ppomppu&category=5"), + PPOMPPU_DOMESTIC_FOOD("zboard/zboard.php?id=ppomppu&category=6"), + PPOMPPU_DOMESTIC_BOOK("zboard/zboard.php?id=ppomppu&category=8"), + PPOMPPU_DOMESTIC_APPLIANCES("zboard/zboard.php?id=ppomppu&category=9"), + PPOMPPU_DOMESTIC_PARENTING("zboard/zboard.php?id=ppomppu&category=10"), + PPOMPPU_DOMESTIC_GIFTCARD("zboard/zboard.php?id=ppomppu&category=11"), + PPOMPPU_DOMESTIC_CLOTHES("zboard/zboard.php?id=ppomppu&category=12"), + PPOMPPU_DOMESTIC_COSMETIC("zboard/zboard.php?id=ppomppu&category=13"), + PPOMPPU_DOMESTIC_OUTDOOR("zboard/zboard.php?id=ppomppu&category=15"), + PPOMPPU_OVERSEA_ETC("zboard/zboard.php?id=ppomppu4&category=1"), + PPOMPPU_OVERSEA_APPLIANCES("zboard/zboard.php?id=ppomppu4&category=7"), + PPOMPPU_OVERSEA_TVAV("zboard/zboard.php?id=ppomppu4&category=8"), + PPOMPPU_OVERSEA_COMPUTER("zboard/zboard.php?id=ppomppu4&category=3"), + PPOMPPU_OVERSEA_DIGITAL("zboard/zboard.php?id=ppomppu4&category=4"), + PPOMPPU_OVERSEA_MOBILEACCESSORY("zboard/zboard.php?id=ppomppu4&category=9"), + PPOMPPU_OVERSEA_CLOTHES("zboard/zboard.php?id=ppomppu4&category=5"), + PPOMPPU_OVERSEA_WATCH("zboard/zboard.php?id=ppomppu4&category=2"), + PPOMPPU_OVERSEA_SHOES("zboard/zboard.php?id=ppomppu4&category=11"), + PPOMPPU_OVERSEA_FOOD("zboard/zboard.php?id=ppomppu4&category=10"), + PPOMPPU_OVERSEA_PARENTING("zboard/zboard.php?id=ppomppu4&category=6"), ; - private String boardPath; + private String resourcePath; PpomppuBoardName(String boardPath) { - this.boardPath = boardPath; + this.resourcePath = boardPath; } + } diff --git a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java new file mode 100644 index 0000000..0fc6b26 --- /dev/null +++ b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java @@ -0,0 +1,15 @@ +package com.myoa.engineering.crawl.ppomppu.support.util; + +/** + * WebUtil + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-08 + */ +public final class WebUtil { + + private WebUtil() {} + + public static final String HEADER_USER_AGENT_KEY = "User-Agent"; + public static final String HEADER_USER_AGENT_VALUE = "Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36"; +} diff --git a/support/src/main/resources/logback-spring.xml b/support/src/main/resources/logback-spring.xml new file mode 100644 index 0000000..f0fcb6c --- /dev/null +++ b/support/src/main/resources/logback-spring.xml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/support/src/main/resources/logback/component/logback-file.xml b/support/src/main/resources/logback/component/logback-file.xml new file mode 100644 index 0000000..65e54db --- /dev/null +++ b/support/src/main/resources/logback/component/logback-file.xml @@ -0,0 +1,23 @@ + + + + + ${DIRECTORY}/${LOG_FILE_BASE}_log + + ${DIRECTORY}/${LOG_FILE_BASE}_log.%d{yyyyMMdd}.%i + + 1000MB + + 60 + + + ${FILE_LOG_PATTERN} + ${IMMEDIATE_FLUSH} + + + + 1024 + + + diff --git a/support/src/main/resources/logback/logback-development.xml b/support/src/main/resources/logback/logback-development.xml new file mode 100644 index 0000000..458e3d8 --- /dev/null +++ b/support/src/main/resources/logback/logback-development.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + diff --git a/support/src/main/resources/logback/logback-production.xml b/support/src/main/resources/logback/logback-production.xml new file mode 100644 index 0000000..f824e41 --- /dev/null +++ b/support/src/main/resources/logback/logback-production.xml @@ -0,0 +1,19 @@ + + + + + + + + + + + + + + + + + + + From cf7425faae422e19cde62e9a880bac342009c8bd Mon Sep 17 00:00:00 2001 From: "woozu.shin" Date: Sun, 26 Sep 2021 00:26:32 +0900 Subject: [PATCH 2/2] Implement PpomppuBoardFeedRetriever --- .../factory/WebClientFilterFactory.java | 48 +++++++++++++++++++ .../WebFluxExchangeStragiesFactory.java | 16 ++++++- .../client/PpomppuBoardFeedRetriever.java | 16 +++---- .../processor/service/PpomppuFeedService.java | 7 +-- .../resources/application-development.yml | 3 +- .../{WebUtil.java => WebRequestUtil.java} | 4 +- .../main/resources/development/database.yml | 0 7 files changed, 79 insertions(+), 15 deletions(-) create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebClientFilterFactory.java rename support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/{WebUtil.java => WebRequestUtil.java} (86%) create mode 100644 support/src/main/resources/development/database.yml diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebClientFilterFactory.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebClientFilterFactory.java new file mode 100644 index 0000000..dd74293 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebClientFilterFactory.java @@ -0,0 +1,48 @@ +package com.myoa.engineering.crawl.ppomppu.processor.configuration.factory; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.web.reactive.function.client.ClientRequest; +import org.springframework.web.reactive.function.client.ClientResponse; +import org.springframework.web.reactive.function.client.ExchangeFilterFunction; +import reactor.core.publisher.Mono; + +/** + * WebClientFilterFactory + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-07 + */ +@Slf4j +public final class WebClientFilterFactory { + + private WebClientFilterFactory() {} + + public static ExchangeFilterFunction logRequest() { + return ExchangeFilterFunction.ofRequestProcessor(WebClientFilterFactory::writeRequest); + } + + public static ExchangeFilterFunction logResponse() { + return ExchangeFilterFunction.ofResponseProcessor(WebClientFilterFactory::writeResponse); + } + + private static Mono writeRequest(ClientRequest clientRequest) { + try { + log.info("[WEBCLIENT REQUEST] uri : {} method : {} headers : {}", + clientRequest.url(), clientRequest.method(), clientRequest.headers()); + } catch (Exception e) { + log.error("[WEBCLIENT REQUEST] write request failed", e); + } + return Mono.just(clientRequest); + } + + private static Mono writeResponse(ClientResponse clientResponse) { + try { + log.info("[WEBCLIENT REQUEST] statusCode : {} headers : {}", + clientResponse.rawStatusCode(), clientResponse.headers().asHttpHeaders()); + } catch (Exception e) { + log.error("[WEBCLIENT RESPONSE] write response failed", e); + } + return Mono.just(clientResponse); + } + +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java index 930cadf..ebd9c68 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java @@ -17,7 +17,7 @@ public final class WebFluxExchangeStragiesFactory { private WebFluxExchangeStragiesFactory() {} - public static ExchangeStrategies getDefault() { + public static ExchangeStrategies ofDefault() { final ObjectMapper mapper = ObjectMapperFactory.defaultMapper(); return ExchangeStrategies.builder() .codecs(configurer -> { @@ -31,5 +31,19 @@ public final class WebFluxExchangeStragiesFactory { }) .build(); } + public static ExchangeStrategies ofTextHtml() { + final ObjectMapper mapper = ObjectMapperFactory.defaultMapper(); + return ExchangeStrategies.builder() + .codecs(configurer -> { + configurer.defaultCodecs().maxInMemorySize(-1); + configurer.defaultCodecs() + .jackson2JsonEncoder(new Jackson2JsonEncoder(mapper, + MimeTypeUtils.TEXT_HTML)); + configurer.defaultCodecs() + .jackson2JsonDecoder(new Jackson2JsonDecoder(mapper, + MimeTypeUtils.TEXT_HTML)); + }) + .build(); + } } diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java index f125479..075adf8 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java @@ -1,9 +1,8 @@ package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client; +import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebClientFilterFactory; import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebFluxExchangeStragiesFactory; -import com.myoa.engineering.crawl.ppomppu.support.util.WebUtil; import lombok.extern.slf4j.Slf4j; -import org.springframework.core.ParameterizedTypeReference; import org.springframework.stereotype.Component; import org.springframework.web.reactive.function.client.WebClient; import org.springframework.web.reactive.function.client.WebClientRequestException; @@ -26,21 +25,22 @@ public class PpomppuBoardFeedRetriever { public PpomppuBoardFeedRetriever(WebClient.Builder webClientBuilder) { this.webClient = webClientBuilder.baseUrl(PPOMPPU_URL) - .exchangeStrategies(WebFluxExchangeStragiesFactory.getDefault()) - .defaultHeader("Content-Type", "text/html") - .defaultHeader(WebUtil.HEADER_USER_AGENT_KEY, WebUtil.HEADER_USER_AGENT_VALUE) + .exchangeStrategies(WebFluxExchangeStragiesFactory.ofTextHtml()) + .filter(WebClientFilterFactory.logRequest()) + .filter(WebClientFilterFactory.logResponse()) .build(); } public Mono getHtml(String uri) { return webClient.get() - .uri(uri) - .exchangeToMono(e -> e.bodyToMono(new ParameterizedTypeReference() {})) + .uri("/zboard/zboard.php?id=ppomppu") + .exchangeToMono(e -> e.bodyToMono(String.class)) .publishOn(Schedulers.boundedElastic()) .onErrorResume(WebClientRequestException.class, t -> { log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName()); return Mono.empty(); - }); + }) + .doOnNext(e -> log.info("[getHtml] {}", e)); } } diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java index 7c1ab38..0f64392 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java @@ -29,8 +29,8 @@ public class PpomppuFeedService { public Flux getArticles(PpomppuBoardName boardName) { final Mono html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath()); - final Mono tbody = extractTbodyFromHtml(html); - + final Mono tbody = extractTbodyFromHtml(html) + .doOnNext(e -> log.info("pre tbody - {}", e.html())); return extractArticlesFromTbody(tbody).map(this::convertFromElement) .doOnNext(e -> log.info("parsed Result: {}", e)); } @@ -39,6 +39,7 @@ public class PpomppuFeedService { return html.map(Jsoup::parse) .mapNotNull(e -> e.getElementById("revolution_main_table")) .map(e -> e.getElementsByTag("tbody")) + .doOnNext(e -> log.info("tbody - {}", e.html())) .map(e -> e.stream() .findFirst() .orElseThrow(() -> new IndexOutOfBoundsException("no tbody"))); @@ -46,7 +47,7 @@ public class PpomppuFeedService { private Flux extractArticlesFromTbody(Mono tbody) { return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))), - tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new)))); + tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new)))); } private PpomppuArticle convertFromElement(Element element) { diff --git a/processor/src/main/resources/application-development.yml b/processor/src/main/resources/application-development.yml index 7df42f3..92c9127 100644 --- a/processor/src/main/resources/application-development.yml +++ b/processor/src/main/resources/application-development.yml @@ -3,4 +3,5 @@ spring: activate: on-profile: development import: - - classpath:/development/webclient.yml \ No newline at end of file + - classpath:/development/webclient.yml + - classpath:/development/database.yml \ No newline at end of file diff --git a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebRequestUtil.java similarity index 86% rename from support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java rename to support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebRequestUtil.java index 0fc6b26..9204689 100644 --- a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java +++ b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebRequestUtil.java @@ -6,9 +6,9 @@ package com.myoa.engineering.crawl.ppomppu.support.util; * @author Shin Woo-jin (woozu.shin@kakaoent.com) * @since 2021-09-08 */ -public final class WebUtil { +public final class WebRequestUtil { - private WebUtil() {} + private WebRequestUtil() {} public static final String HEADER_USER_AGENT_KEY = "User-Agent"; public static final String HEADER_USER_AGENT_VALUE = "Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36"; diff --git a/support/src/main/resources/development/database.yml b/support/src/main/resources/development/database.yml new file mode 100644 index 0000000..e69de29