57 lines
2.4 KiB
Java
57 lines
2.4 KiB
Java
package com.myoa.engineering.crawl.ppomppu.processor.service;
|
|
|
|
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
|
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleTransformer;
|
|
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
|
|
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Element;
|
|
import org.springframework.stereotype.Component;
|
|
import reactor.core.publisher.Flux;
|
|
import reactor.core.publisher.Mono;
|
|
|
|
/**
|
|
* PpomppuFeedService
|
|
*
|
|
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
|
* @since 2021-09-08
|
|
*/
|
|
@Slf4j
|
|
@Component
|
|
public class PpomppuFeedService {
|
|
|
|
private final PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever;
|
|
|
|
public PpomppuFeedService(PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever) {
|
|
this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever;
|
|
}
|
|
|
|
public Flux<PpomppuArticle> getArticles(PpomppuBoardName boardName) {
|
|
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
|
|
final Mono<Element> tbody = extractTbodyFromHtml(html)
|
|
.doOnNext(e -> log.info("pre tbody - {}", e.html()));
|
|
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
|
|
.doOnNext(e -> log.info("parsed Result: {}", e));
|
|
}
|
|
|
|
private Mono<Element> extractTbodyFromHtml(Mono<String> html) {
|
|
return html.map(Jsoup::parse)
|
|
.mapNotNull(e -> e.getElementById("revolution_main_table"))
|
|
.map(e -> e.getElementsByTag("tbody"))
|
|
.doOnNext(e -> log.info("tbody - {}", e.html()))
|
|
.map(e -> e.stream()
|
|
.findFirst()
|
|
.orElseThrow(() -> new IndexOutOfBoundsException("no tbody")));
|
|
}
|
|
|
|
private Flux<Element> extractArticlesFromTbody(Mono<Element> tbody) {
|
|
return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))),
|
|
tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new))));
|
|
}
|
|
|
|
private PpomppuArticle convertFromElement(Element element) {
|
|
return PpomppuArticleTransformer.toArticle(element.getElementsByTag("td"));
|
|
}
|
|
}
|