Implement PpomppuBoardFeedRetriever
This commit is contained in:
parent
08e1f99ab0
commit
cf7425faae
|
@ -0,0 +1,48 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.configuration.factory;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.web.reactive.function.client.ClientRequest;
|
||||
import org.springframework.web.reactive.function.client.ClientResponse;
|
||||
import org.springframework.web.reactive.function.client.ExchangeFilterFunction;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
/**
|
||||
* WebClientFilterFactory
|
||||
*
|
||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-07
|
||||
*/
|
||||
@Slf4j
|
||||
public final class WebClientFilterFactory {
|
||||
|
||||
private WebClientFilterFactory() {}
|
||||
|
||||
public static ExchangeFilterFunction logRequest() {
|
||||
return ExchangeFilterFunction.ofRequestProcessor(WebClientFilterFactory::writeRequest);
|
||||
}
|
||||
|
||||
public static ExchangeFilterFunction logResponse() {
|
||||
return ExchangeFilterFunction.ofResponseProcessor(WebClientFilterFactory::writeResponse);
|
||||
}
|
||||
|
||||
private static Mono<ClientRequest> writeRequest(ClientRequest clientRequest) {
|
||||
try {
|
||||
log.info("[WEBCLIENT REQUEST] uri : {} method : {} headers : {}",
|
||||
clientRequest.url(), clientRequest.method(), clientRequest.headers());
|
||||
} catch (Exception e) {
|
||||
log.error("[WEBCLIENT REQUEST] write request failed", e);
|
||||
}
|
||||
return Mono.just(clientRequest);
|
||||
}
|
||||
|
||||
private static Mono<ClientResponse> writeResponse(ClientResponse clientResponse) {
|
||||
try {
|
||||
log.info("[WEBCLIENT REQUEST] statusCode : {} headers : {}",
|
||||
clientResponse.rawStatusCode(), clientResponse.headers().asHttpHeaders());
|
||||
} catch (Exception e) {
|
||||
log.error("[WEBCLIENT RESPONSE] write response failed", e);
|
||||
}
|
||||
return Mono.just(clientResponse);
|
||||
}
|
||||
|
||||
}
|
|
@ -17,7 +17,7 @@ public final class WebFluxExchangeStragiesFactory {
|
|||
|
||||
private WebFluxExchangeStragiesFactory() {}
|
||||
|
||||
public static ExchangeStrategies getDefault() {
|
||||
public static ExchangeStrategies ofDefault() {
|
||||
final ObjectMapper mapper = ObjectMapperFactory.defaultMapper();
|
||||
return ExchangeStrategies.builder()
|
||||
.codecs(configurer -> {
|
||||
|
@ -31,5 +31,19 @@ public final class WebFluxExchangeStragiesFactory {
|
|||
})
|
||||
.build();
|
||||
}
|
||||
public static ExchangeStrategies ofTextHtml() {
|
||||
final ObjectMapper mapper = ObjectMapperFactory.defaultMapper();
|
||||
return ExchangeStrategies.builder()
|
||||
.codecs(configurer -> {
|
||||
configurer.defaultCodecs().maxInMemorySize(-1);
|
||||
configurer.defaultCodecs()
|
||||
.jackson2JsonEncoder(new Jackson2JsonEncoder(mapper,
|
||||
MimeTypeUtils.TEXT_HTML));
|
||||
configurer.defaultCodecs()
|
||||
.jackson2JsonDecoder(new Jackson2JsonDecoder(mapper,
|
||||
MimeTypeUtils.TEXT_HTML));
|
||||
})
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,9 +1,8 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client;
|
||||
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebClientFilterFactory;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebFluxExchangeStragiesFactory;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.util.WebUtil;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.core.ParameterizedTypeReference;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
import org.springframework.web.reactive.function.client.WebClientRequestException;
|
||||
|
@ -26,21 +25,22 @@ public class PpomppuBoardFeedRetriever {
|
|||
|
||||
public PpomppuBoardFeedRetriever(WebClient.Builder webClientBuilder) {
|
||||
this.webClient = webClientBuilder.baseUrl(PPOMPPU_URL)
|
||||
.exchangeStrategies(WebFluxExchangeStragiesFactory.getDefault())
|
||||
.defaultHeader("Content-Type", "text/html")
|
||||
.defaultHeader(WebUtil.HEADER_USER_AGENT_KEY, WebUtil.HEADER_USER_AGENT_VALUE)
|
||||
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofTextHtml())
|
||||
.filter(WebClientFilterFactory.logRequest())
|
||||
.filter(WebClientFilterFactory.logResponse())
|
||||
.build();
|
||||
}
|
||||
|
||||
public Mono<String> getHtml(String uri) {
|
||||
return webClient.get()
|
||||
.uri(uri)
|
||||
.exchangeToMono(e -> e.bodyToMono(new ParameterizedTypeReference<String>() {}))
|
||||
.uri("/zboard/zboard.php?id=ppomppu")
|
||||
.exchangeToMono(e -> e.bodyToMono(String.class))
|
||||
.publishOn(Schedulers.boundedElastic())
|
||||
.onErrorResume(WebClientRequestException.class, t -> {
|
||||
log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
|
||||
return Mono.empty();
|
||||
});
|
||||
})
|
||||
.doOnNext(e -> log.info("[getHtml] {}", e));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -29,8 +29,8 @@ public class PpomppuFeedService {
|
|||
|
||||
public Flux<PpomppuArticle> getArticles(PpomppuBoardName boardName) {
|
||||
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
|
||||
final Mono<Element> tbody = extractTbodyFromHtml(html);
|
||||
|
||||
final Mono<Element> tbody = extractTbodyFromHtml(html)
|
||||
.doOnNext(e -> log.info("pre tbody - {}", e.html()));
|
||||
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
|
||||
.doOnNext(e -> log.info("parsed Result: {}", e));
|
||||
}
|
||||
|
@ -39,6 +39,7 @@ public class PpomppuFeedService {
|
|||
return html.map(Jsoup::parse)
|
||||
.mapNotNull(e -> e.getElementById("revolution_main_table"))
|
||||
.map(e -> e.getElementsByTag("tbody"))
|
||||
.doOnNext(e -> log.info("tbody - {}", e.html()))
|
||||
.map(e -> e.stream()
|
||||
.findFirst()
|
||||
.orElseThrow(() -> new IndexOutOfBoundsException("no tbody")));
|
||||
|
@ -46,7 +47,7 @@ public class PpomppuFeedService {
|
|||
|
||||
private Flux<Element> extractArticlesFromTbody(Mono<Element> tbody) {
|
||||
return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))),
|
||||
tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))));
|
||||
tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new))));
|
||||
}
|
||||
|
||||
private PpomppuArticle convertFromElement(Element element) {
|
||||
|
|
|
@ -4,3 +4,4 @@ spring:
|
|||
on-profile: development
|
||||
import:
|
||||
- classpath:/development/webclient.yml
|
||||
- classpath:/development/database.yml
|
|
@ -6,9 +6,9 @@ package com.myoa.engineering.crawl.ppomppu.support.util;
|
|||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-08
|
||||
*/
|
||||
public final class WebUtil {
|
||||
public final class WebRequestUtil {
|
||||
|
||||
private WebUtil() {}
|
||||
private WebRequestUtil() {}
|
||||
|
||||
public static final String HEADER_USER_AGENT_KEY = "User-Agent";
|
||||
public static final String HEADER_USER_AGENT_VALUE = "Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36";
|
Loading…
Reference in New Issue