From cf7425faae422e19cde62e9a880bac342009c8bd Mon Sep 17 00:00:00 2001 From: "woozu.shin" Date: Sun, 26 Sep 2021 00:26:32 +0900 Subject: [PATCH] Implement PpomppuBoardFeedRetriever --- .../factory/WebClientFilterFactory.java | 48 +++++++++++++++++++ .../WebFluxExchangeStragiesFactory.java | 16 ++++++- .../client/PpomppuBoardFeedRetriever.java | 16 +++---- .../processor/service/PpomppuFeedService.java | 7 +-- .../resources/application-development.yml | 3 +- .../{WebUtil.java => WebRequestUtil.java} | 4 +- .../main/resources/development/database.yml | 0 7 files changed, 79 insertions(+), 15 deletions(-) create mode 100644 processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebClientFilterFactory.java rename support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/{WebUtil.java => WebRequestUtil.java} (86%) create mode 100644 support/src/main/resources/development/database.yml diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebClientFilterFactory.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebClientFilterFactory.java new file mode 100644 index 0000000..dd74293 --- /dev/null +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebClientFilterFactory.java @@ -0,0 +1,48 @@ +package com.myoa.engineering.crawl.ppomppu.processor.configuration.factory; + +import lombok.extern.slf4j.Slf4j; +import org.springframework.web.reactive.function.client.ClientRequest; +import org.springframework.web.reactive.function.client.ClientResponse; +import org.springframework.web.reactive.function.client.ExchangeFilterFunction; +import reactor.core.publisher.Mono; + +/** + * WebClientFilterFactory + * + * @author Shin Woo-jin (woozu.shin@kakaoent.com) + * @since 2021-09-07 + */ +@Slf4j +public final class WebClientFilterFactory { + + private WebClientFilterFactory() {} + + public static ExchangeFilterFunction logRequest() { + return ExchangeFilterFunction.ofRequestProcessor(WebClientFilterFactory::writeRequest); + } + + public static ExchangeFilterFunction logResponse() { + return ExchangeFilterFunction.ofResponseProcessor(WebClientFilterFactory::writeResponse); + } + + private static Mono writeRequest(ClientRequest clientRequest) { + try { + log.info("[WEBCLIENT REQUEST] uri : {} method : {} headers : {}", + clientRequest.url(), clientRequest.method(), clientRequest.headers()); + } catch (Exception e) { + log.error("[WEBCLIENT REQUEST] write request failed", e); + } + return Mono.just(clientRequest); + } + + private static Mono writeResponse(ClientResponse clientResponse) { + try { + log.info("[WEBCLIENT REQUEST] statusCode : {} headers : {}", + clientResponse.rawStatusCode(), clientResponse.headers().asHttpHeaders()); + } catch (Exception e) { + log.error("[WEBCLIENT RESPONSE] write response failed", e); + } + return Mono.just(clientResponse); + } + +} diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java index 930cadf..ebd9c68 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/configuration/factory/WebFluxExchangeStragiesFactory.java @@ -17,7 +17,7 @@ public final class WebFluxExchangeStragiesFactory { private WebFluxExchangeStragiesFactory() {} - public static ExchangeStrategies getDefault() { + public static ExchangeStrategies ofDefault() { final ObjectMapper mapper = ObjectMapperFactory.defaultMapper(); return ExchangeStrategies.builder() .codecs(configurer -> { @@ -31,5 +31,19 @@ public final class WebFluxExchangeStragiesFactory { }) .build(); } + public static ExchangeStrategies ofTextHtml() { + final ObjectMapper mapper = ObjectMapperFactory.defaultMapper(); + return ExchangeStrategies.builder() + .codecs(configurer -> { + configurer.defaultCodecs().maxInMemorySize(-1); + configurer.defaultCodecs() + .jackson2JsonEncoder(new Jackson2JsonEncoder(mapper, + MimeTypeUtils.TEXT_HTML)); + configurer.defaultCodecs() + .jackson2JsonDecoder(new Jackson2JsonDecoder(mapper, + MimeTypeUtils.TEXT_HTML)); + }) + .build(); + } } diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java index f125479..075adf8 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/infrastructure/client/PpomppuBoardFeedRetriever.java @@ -1,9 +1,8 @@ package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client; +import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebClientFilterFactory; import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebFluxExchangeStragiesFactory; -import com.myoa.engineering.crawl.ppomppu.support.util.WebUtil; import lombok.extern.slf4j.Slf4j; -import org.springframework.core.ParameterizedTypeReference; import org.springframework.stereotype.Component; import org.springframework.web.reactive.function.client.WebClient; import org.springframework.web.reactive.function.client.WebClientRequestException; @@ -26,21 +25,22 @@ public class PpomppuBoardFeedRetriever { public PpomppuBoardFeedRetriever(WebClient.Builder webClientBuilder) { this.webClient = webClientBuilder.baseUrl(PPOMPPU_URL) - .exchangeStrategies(WebFluxExchangeStragiesFactory.getDefault()) - .defaultHeader("Content-Type", "text/html") - .defaultHeader(WebUtil.HEADER_USER_AGENT_KEY, WebUtil.HEADER_USER_AGENT_VALUE) + .exchangeStrategies(WebFluxExchangeStragiesFactory.ofTextHtml()) + .filter(WebClientFilterFactory.logRequest()) + .filter(WebClientFilterFactory.logResponse()) .build(); } public Mono getHtml(String uri) { return webClient.get() - .uri(uri) - .exchangeToMono(e -> e.bodyToMono(new ParameterizedTypeReference() {})) + .uri("/zboard/zboard.php?id=ppomppu") + .exchangeToMono(e -> e.bodyToMono(String.class)) .publishOn(Schedulers.boundedElastic()) .onErrorResume(WebClientRequestException.class, t -> { log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName()); return Mono.empty(); - }); + }) + .doOnNext(e -> log.info("[getHtml] {}", e)); } } diff --git a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java index 7c1ab38..0f64392 100644 --- a/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java +++ b/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java @@ -29,8 +29,8 @@ public class PpomppuFeedService { public Flux getArticles(PpomppuBoardName boardName) { final Mono html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath()); - final Mono tbody = extractTbodyFromHtml(html); - + final Mono tbody = extractTbodyFromHtml(html) + .doOnNext(e -> log.info("pre tbody - {}", e.html())); return extractArticlesFromTbody(tbody).map(this::convertFromElement) .doOnNext(e -> log.info("parsed Result: {}", e)); } @@ -39,6 +39,7 @@ public class PpomppuFeedService { return html.map(Jsoup::parse) .mapNotNull(e -> e.getElementById("revolution_main_table")) .map(e -> e.getElementsByTag("tbody")) + .doOnNext(e -> log.info("tbody - {}", e.html())) .map(e -> e.stream() .findFirst() .orElseThrow(() -> new IndexOutOfBoundsException("no tbody"))); @@ -46,7 +47,7 @@ public class PpomppuFeedService { private Flux extractArticlesFromTbody(Mono tbody) { return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))), - tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new)))); + tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new)))); } private PpomppuArticle convertFromElement(Element element) { diff --git a/processor/src/main/resources/application-development.yml b/processor/src/main/resources/application-development.yml index 7df42f3..92c9127 100644 --- a/processor/src/main/resources/application-development.yml +++ b/processor/src/main/resources/application-development.yml @@ -3,4 +3,5 @@ spring: activate: on-profile: development import: - - classpath:/development/webclient.yml \ No newline at end of file + - classpath:/development/webclient.yml + - classpath:/development/database.yml \ No newline at end of file diff --git a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebRequestUtil.java similarity index 86% rename from support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java rename to support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebRequestUtil.java index 0fc6b26..9204689 100644 --- a/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebUtil.java +++ b/support/src/main/java/com/myoa/engineering/crawl/ppomppu/support/util/WebRequestUtil.java @@ -6,9 +6,9 @@ package com.myoa.engineering.crawl.ppomppu.support.util; * @author Shin Woo-jin (woozu.shin@kakaoent.com) * @since 2021-09-08 */ -public final class WebUtil { +public final class WebRequestUtil { - private WebUtil() {} + private WebRequestUtil() {} public static final String HEADER_USER_AGENT_KEY = "User-Agent"; public static final String HEADER_USER_AGENT_VALUE = "Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36"; diff --git a/support/src/main/resources/development/database.yml b/support/src/main/resources/development/database.yml new file mode 100644 index 0000000..e69de29