Implement PpomppuBoardFeedRetriever
This commit is contained in:
		
							parent
							
								
									08e1f99ab0
								
							
						
					
					
						commit
						cf7425faae
					
				| 
						 | 
				
			
			@ -0,0 +1,48 @@
 | 
			
		|||
package com.myoa.engineering.crawl.ppomppu.processor.configuration.factory;
 | 
			
		||||
 | 
			
		||||
import lombok.extern.slf4j.Slf4j;
 | 
			
		||||
import org.springframework.web.reactive.function.client.ClientRequest;
 | 
			
		||||
import org.springframework.web.reactive.function.client.ClientResponse;
 | 
			
		||||
import org.springframework.web.reactive.function.client.ExchangeFilterFunction;
 | 
			
		||||
import reactor.core.publisher.Mono;
 | 
			
		||||
 | 
			
		||||
/**
 | 
			
		||||
 * WebClientFilterFactory
 | 
			
		||||
 *
 | 
			
		||||
 * @author Shin Woo-jin (woozu.shin@kakaoent.com)
 | 
			
		||||
 * @since 2021-09-07
 | 
			
		||||
 */
 | 
			
		||||
@Slf4j
 | 
			
		||||
public final class WebClientFilterFactory {
 | 
			
		||||
 | 
			
		||||
    private WebClientFilterFactory() {}
 | 
			
		||||
 | 
			
		||||
    public static ExchangeFilterFunction logRequest() {
 | 
			
		||||
        return ExchangeFilterFunction.ofRequestProcessor(WebClientFilterFactory::writeRequest);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public static ExchangeFilterFunction logResponse() {
 | 
			
		||||
        return ExchangeFilterFunction.ofResponseProcessor(WebClientFilterFactory::writeResponse);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static Mono<ClientRequest> writeRequest(ClientRequest clientRequest) {
 | 
			
		||||
        try {
 | 
			
		||||
            log.info("[WEBCLIENT REQUEST] uri : {} method : {} headers : {}",
 | 
			
		||||
                     clientRequest.url(), clientRequest.method(), clientRequest.headers());
 | 
			
		||||
        } catch (Exception e) {
 | 
			
		||||
            log.error("[WEBCLIENT REQUEST] write request failed", e);
 | 
			
		||||
        }
 | 
			
		||||
        return Mono.just(clientRequest);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private static Mono<ClientResponse> writeResponse(ClientResponse clientResponse) {
 | 
			
		||||
        try {
 | 
			
		||||
            log.info("[WEBCLIENT REQUEST] statusCode : {} headers : {}",
 | 
			
		||||
                     clientResponse.rawStatusCode(), clientResponse.headers().asHttpHeaders());
 | 
			
		||||
        } catch (Exception e) {
 | 
			
		||||
            log.error("[WEBCLIENT RESPONSE] write response failed", e);
 | 
			
		||||
        }
 | 
			
		||||
        return Mono.just(clientResponse);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -17,7 +17,7 @@ public final class WebFluxExchangeStragiesFactory {
 | 
			
		|||
 | 
			
		||||
    private WebFluxExchangeStragiesFactory() {}
 | 
			
		||||
 | 
			
		||||
    public static ExchangeStrategies getDefault() {
 | 
			
		||||
    public static ExchangeStrategies ofDefault() {
 | 
			
		||||
        final ObjectMapper mapper = ObjectMapperFactory.defaultMapper();
 | 
			
		||||
        return ExchangeStrategies.builder()
 | 
			
		||||
                                 .codecs(configurer -> {
 | 
			
		||||
| 
						 | 
				
			
			@ -31,5 +31,19 @@ public final class WebFluxExchangeStragiesFactory {
 | 
			
		|||
                                 })
 | 
			
		||||
                                 .build();
 | 
			
		||||
    }
 | 
			
		||||
    public static ExchangeStrategies ofTextHtml() {
 | 
			
		||||
        final ObjectMapper mapper = ObjectMapperFactory.defaultMapper();
 | 
			
		||||
        return ExchangeStrategies.builder()
 | 
			
		||||
                                 .codecs(configurer -> {
 | 
			
		||||
                                     configurer.defaultCodecs().maxInMemorySize(-1);
 | 
			
		||||
                                     configurer.defaultCodecs()
 | 
			
		||||
                                               .jackson2JsonEncoder(new Jackson2JsonEncoder(mapper,
 | 
			
		||||
                                                                                            MimeTypeUtils.TEXT_HTML));
 | 
			
		||||
                                     configurer.defaultCodecs()
 | 
			
		||||
                                               .jackson2JsonDecoder(new Jackson2JsonDecoder(mapper,
 | 
			
		||||
                                                                                            MimeTypeUtils.TEXT_HTML));
 | 
			
		||||
                                 })
 | 
			
		||||
                                 .build();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,9 +1,8 @@
 | 
			
		|||
package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client;
 | 
			
		||||
 | 
			
		||||
import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebClientFilterFactory;
 | 
			
		||||
import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebFluxExchangeStragiesFactory;
 | 
			
		||||
import com.myoa.engineering.crawl.ppomppu.support.util.WebUtil;
 | 
			
		||||
import lombok.extern.slf4j.Slf4j;
 | 
			
		||||
import org.springframework.core.ParameterizedTypeReference;
 | 
			
		||||
import org.springframework.stereotype.Component;
 | 
			
		||||
import org.springframework.web.reactive.function.client.WebClient;
 | 
			
		||||
import org.springframework.web.reactive.function.client.WebClientRequestException;
 | 
			
		||||
| 
						 | 
				
			
			@ -26,21 +25,22 @@ public class PpomppuBoardFeedRetriever {
 | 
			
		|||
 | 
			
		||||
    public PpomppuBoardFeedRetriever(WebClient.Builder webClientBuilder) {
 | 
			
		||||
        this.webClient = webClientBuilder.baseUrl(PPOMPPU_URL)
 | 
			
		||||
                                         .exchangeStrategies(WebFluxExchangeStragiesFactory.getDefault())
 | 
			
		||||
                                         .defaultHeader("Content-Type", "text/html")
 | 
			
		||||
                                         .defaultHeader(WebUtil.HEADER_USER_AGENT_KEY, WebUtil.HEADER_USER_AGENT_VALUE)
 | 
			
		||||
                                         .exchangeStrategies(WebFluxExchangeStragiesFactory.ofTextHtml())
 | 
			
		||||
                                         .filter(WebClientFilterFactory.logRequest())
 | 
			
		||||
                                         .filter(WebClientFilterFactory.logResponse())
 | 
			
		||||
                                         .build();
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    public Mono<String> getHtml(String uri) {
 | 
			
		||||
        return webClient.get()
 | 
			
		||||
                        .uri(uri)
 | 
			
		||||
                        .exchangeToMono(e -> e.bodyToMono(new ParameterizedTypeReference<String>() {}))
 | 
			
		||||
                        .uri("/zboard/zboard.php?id=ppomppu")
 | 
			
		||||
                        .exchangeToMono(e -> e.bodyToMono(String.class))
 | 
			
		||||
                        .publishOn(Schedulers.boundedElastic())
 | 
			
		||||
                        .onErrorResume(WebClientRequestException.class, t -> {
 | 
			
		||||
                            log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
 | 
			
		||||
                            return Mono.empty();
 | 
			
		||||
                        });
 | 
			
		||||
                        })
 | 
			
		||||
                        .doOnNext(e -> log.info("[getHtml] {}", e));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -29,8 +29,8 @@ public class PpomppuFeedService {
 | 
			
		|||
 | 
			
		||||
    public Flux<PpomppuArticle> getArticles(PpomppuBoardName boardName) {
 | 
			
		||||
        final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
 | 
			
		||||
        final Mono<Element> tbody = extractTbodyFromHtml(html);
 | 
			
		||||
 | 
			
		||||
        final Mono<Element> tbody = extractTbodyFromHtml(html)
 | 
			
		||||
            .doOnNext(e -> log.info("pre tbody - {}", e.html()));
 | 
			
		||||
        return extractArticlesFromTbody(tbody).map(this::convertFromElement)
 | 
			
		||||
                                              .doOnNext(e -> log.info("parsed Result: {}", e));
 | 
			
		||||
    }
 | 
			
		||||
| 
						 | 
				
			
			@ -39,6 +39,7 @@ public class PpomppuFeedService {
 | 
			
		|||
        return html.map(Jsoup::parse)
 | 
			
		||||
                   .mapNotNull(e -> e.getElementById("revolution_main_table"))
 | 
			
		||||
                   .map(e -> e.getElementsByTag("tbody"))
 | 
			
		||||
                   .doOnNext(e -> log.info("tbody - {}", e.html()))
 | 
			
		||||
                   .map(e -> e.stream()
 | 
			
		||||
                              .findFirst()
 | 
			
		||||
                              .orElseThrow(() -> new IndexOutOfBoundsException("no tbody")));
 | 
			
		||||
| 
						 | 
				
			
			@ -46,7 +47,7 @@ public class PpomppuFeedService {
 | 
			
		|||
 | 
			
		||||
    private Flux<Element> extractArticlesFromTbody(Mono<Element> tbody) {
 | 
			
		||||
        return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))),
 | 
			
		||||
                           tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))));
 | 
			
		||||
                           tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new))));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    private PpomppuArticle convertFromElement(Element element) {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -4,3 +4,4 @@ spring:
 | 
			
		|||
      on-profile: development
 | 
			
		||||
    import:
 | 
			
		||||
      - classpath:/development/webclient.yml
 | 
			
		||||
      - classpath:/development/database.yml
 | 
			
		||||
| 
						 | 
				
			
			@ -6,9 +6,9 @@ package com.myoa.engineering.crawl.ppomppu.support.util;
 | 
			
		|||
 * @author Shin Woo-jin (woozu.shin@kakaoent.com)
 | 
			
		||||
 * @since 2021-09-08
 | 
			
		||||
 */
 | 
			
		||||
public final class WebUtil {
 | 
			
		||||
public final class WebRequestUtil {
 | 
			
		||||
 | 
			
		||||
    private WebUtil() {}
 | 
			
		||||
    private WebRequestUtil() {}
 | 
			
		||||
 | 
			
		||||
    public static final String HEADER_USER_AGENT_KEY = "User-Agent";
 | 
			
		||||
    public static final String HEADER_USER_AGENT_VALUE = "Mozilla/5.0 (Linux; Android 8.0.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36";
 | 
			
		||||
		Loading…
	
		Reference in New Issue