PpomppuNotifier/processor/src/main/java/com/myoa/engineering/crawl/ppomppu/processor/service/PpomppuFeedService.java

63 lines
2.7 KiB
Java

package com.myoa.engineering.crawl.ppomppu.processor.service;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleParser;
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.util.Comparator;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Component;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
/**
* PpomppuFeedService
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
@Slf4j
@Component
public class PpomppuFeedService {
private final PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever;
public PpomppuFeedService(PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever) {
this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever;
}
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
final Mono<Element> tbody = extractTbodyFromHtml(html);
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
.map(e -> e.updateBoardName(boardName))
.sort(Comparator.comparing(PpomppuArticle::getArticleId))
// .doOnNext(e -> log.info("parsed Result: {}", e))
.collectList();
}
private Mono<Element> extractTbodyFromHtml(Mono<String> html) {
return html.map(Jsoup::parse)
.mapNotNull(e -> e.getElementById("revolution_main_table"))
.map(e -> e.getElementsByTag("tbody"))
// .doOnNext(e -> log.info("tbody - {}", e.html()))
.map(e -> e.stream()
.findFirst()
.orElseThrow(() -> new IndexOutOfBoundsException("no tbody")));
}
private Flux<Element> extractArticlesFromTbody(Mono<Element> tbody) {
return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))),
tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new))));
}
private PpomppuArticle convertFromElement(Element element) {
return PpomppuArticleParser.toArticle(element.getElementsByTag("td"));
}
}