63 lines
2.7 KiB
Java
63 lines
2.7 KiB
Java
package com.myoa.engineering.crawl.ppomppu.processor.service;
|
|
|
|
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
|
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleParser;
|
|
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
|
|
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
|
|
|
import java.util.Comparator;
|
|
import java.util.List;
|
|
import lombok.extern.slf4j.Slf4j;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Element;
|
|
import org.springframework.stereotype.Component;
|
|
import reactor.core.publisher.Flux;
|
|
import reactor.core.publisher.Mono;
|
|
|
|
/**
|
|
* PpomppuFeedService
|
|
*
|
|
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
|
* @since 2021-09-08
|
|
*/
|
|
@Slf4j
|
|
@Component
|
|
public class PpomppuFeedService {
|
|
|
|
private final PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever;
|
|
|
|
public PpomppuFeedService(PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever) {
|
|
this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever;
|
|
}
|
|
|
|
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
|
|
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
|
|
final Mono<Element> tbody = extractTbodyFromHtml(html);
|
|
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
|
|
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
|
|
.map(e -> e.updateBoardName(boardName))
|
|
.sort(Comparator.comparing(PpomppuArticle::getArticleId))
|
|
// .doOnNext(e -> log.info("parsed Result: {}", e))
|
|
.collectList();
|
|
}
|
|
|
|
private Mono<Element> extractTbodyFromHtml(Mono<String> html) {
|
|
return html.map(Jsoup::parse)
|
|
.mapNotNull(e -> e.getElementById("revolution_main_table"))
|
|
.map(e -> e.getElementsByTag("tbody"))
|
|
// .doOnNext(e -> log.info("tbody - {}", e.html()))
|
|
.map(e -> e.stream()
|
|
.findFirst()
|
|
.orElseThrow(() -> new IndexOutOfBoundsException("no tbody")));
|
|
}
|
|
|
|
private Flux<Element> extractArticlesFromTbody(Mono<Element> tbody) {
|
|
return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))),
|
|
tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new))));
|
|
}
|
|
|
|
private PpomppuArticle convertFromElement(Element element) {
|
|
return PpomppuArticleParser.toArticle(element.getElementsByTag("td"));
|
|
}
|
|
}
|