[PPN-210926-6] Persist feed articles
This commit is contained in:
parent
ab4ab339f6
commit
86fa1cbe09
|
@ -1,16 +1,17 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.controller;
|
||||
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.APIResponse;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.dto.FeedParsedResult;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.service.PpomppuArticleService;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.service.PpomppuFeedService;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.APIResponse;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.web.bind.annotation.PathVariable;
|
||||
import org.springframework.web.bind.annotation.PostMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
/**
|
||||
|
@ -25,16 +26,21 @@ import reactor.core.publisher.Mono;
|
|||
public class CrawlAPIController {
|
||||
|
||||
private final PpomppuFeedService ppomppuRSSFeedService;
|
||||
private final PpomppuArticleService ppomppuArticleService;
|
||||
|
||||
public CrawlAPIController(PpomppuFeedService ppomppuRSSFeedService) {
|
||||
public CrawlAPIController(PpomppuFeedService ppomppuRSSFeedService,
|
||||
PpomppuArticleService ppomppuArticleService) {
|
||||
this.ppomppuRSSFeedService = ppomppuRSSFeedService;
|
||||
this.ppomppuArticleService = ppomppuArticleService;
|
||||
}
|
||||
|
||||
@PostMapping("/boards/{boardName}")
|
||||
public Mono<APIResponse<FeedParsedResult>> crawlBoard(@PathVariable("boardName") PpomppuBoardName boardName) {
|
||||
log.info("got request... {}", boardName);
|
||||
FeedParsedResult result = FeedParsedResult.of(boardName);
|
||||
Flux<PpomppuArticle> articles = ppomppuRSSFeedService.getArticles(boardName);
|
||||
Mono<List<PpomppuArticle>> articles = ppomppuRSSFeedService.getArticles(boardName)
|
||||
.doOnNext(e -> ppomppuArticleService.filterOnlyNewArticles(boardName, e))
|
||||
.doOnNext(e -> ppomppuArticleService.save(boardName, e));
|
||||
|
||||
return articles.then(Mono.just(APIResponse.success(result.done())));
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
|||
import java.time.Instant;
|
||||
import javax.persistence.Column;
|
||||
import javax.persistence.Entity;
|
||||
import javax.persistence.EnumType;
|
||||
import javax.persistence.Enumerated;
|
||||
import javax.persistence.GeneratedValue;
|
||||
import javax.persistence.GenerationType;
|
||||
|
@ -27,7 +28,7 @@ public class PpomppuArticle extends Auditable {
|
|||
private Long articleId;
|
||||
|
||||
@Column
|
||||
@Enumerated
|
||||
@Enumerated(EnumType.STRING)
|
||||
private PpomppuBoardName boardName;
|
||||
|
||||
@Column
|
||||
|
@ -58,4 +59,8 @@ public class PpomppuArticle extends Auditable {
|
|||
this.registeredAt = registeredAt;
|
||||
}
|
||||
|
||||
public PpomppuArticle updateBoardName(PpomppuBoardName boardName) {
|
||||
this.boardName = boardName;
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,10 +4,13 @@ import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
|||
import java.time.Instant;
|
||||
import javax.persistence.Column;
|
||||
import javax.persistence.Entity;
|
||||
import javax.persistence.EnumType;
|
||||
import javax.persistence.Enumerated;
|
||||
import javax.persistence.GeneratedValue;
|
||||
import javax.persistence.GenerationType;
|
||||
import javax.persistence.Id;
|
||||
import javax.persistence.Table;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
|
@ -25,9 +28,30 @@ public class PpomppuBoardFeedStatus extends Auditable {
|
|||
private Long latestParsedArticleId;
|
||||
|
||||
@Column
|
||||
@Enumerated(EnumType.STRING)
|
||||
private PpomppuBoardName boardName;
|
||||
|
||||
@Column
|
||||
private Instant updatedAt;
|
||||
|
||||
public static PpomppuBoardFeedStatus of(PpomppuBoardName boardName, Long latestArticleId) {
|
||||
return PpomppuBoardFeedStatus.builder()
|
||||
.boardName(boardName)
|
||||
.latestParsedArticleId(latestArticleId)
|
||||
.updatedAt(Instant.now())
|
||||
.build();
|
||||
}
|
||||
|
||||
public void updateArticleId(Long latestArticleId) {
|
||||
this.updatedAt = Instant.now();
|
||||
this.latestParsedArticleId = latestArticleId;
|
||||
}
|
||||
|
||||
@Builder
|
||||
public PpomppuBoardFeedStatus(Long id, Long latestParsedArticleId, PpomppuBoardName boardName, Instant updatedAt) {
|
||||
this.id = id;
|
||||
this.latestParsedArticleId = latestParsedArticleId;
|
||||
this.boardName = boardName;
|
||||
this.updatedAt = updatedAt;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -3,6 +3,8 @@ package com.myoa.engineering.crawl.ppomppu.processor.domain;
|
|||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||
import javax.persistence.Column;
|
||||
import javax.persistence.Entity;
|
||||
import javax.persistence.EnumType;
|
||||
import javax.persistence.Enumerated;
|
||||
import javax.persistence.GeneratedValue;
|
||||
import javax.persistence.GenerationType;
|
||||
import javax.persistence.Id;
|
||||
|
@ -24,6 +26,7 @@ public class SubscribedBoard extends Auditable {
|
|||
private Long userId;
|
||||
|
||||
@Column
|
||||
@Enumerated(EnumType.STRING)
|
||||
private PpomppuBoardName boardName;
|
||||
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
||||
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
@ -47,7 +48,7 @@ public final class PpomppuArticleTransformer {
|
|||
}
|
||||
|
||||
public static String toArticleUrl(Element td) {
|
||||
return td.getElementsByTag("a").attr("href");
|
||||
return PpomppuBoardName.ofViewPageUrl(td.getElementsByTag("a").attr("href"));
|
||||
}
|
||||
|
||||
public static Integer toRecommended(Element td) {
|
||||
|
|
|
@ -2,6 +2,7 @@ package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client;
|
|||
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebClientFilterFactory;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.configuration.factory.WebFluxExchangeStragiesFactory;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
|
@ -19,12 +20,10 @@ import reactor.core.scheduler.Schedulers;
|
|||
@Component
|
||||
public class PpomppuBoardFeedRetriever {
|
||||
|
||||
private static final String PPOMPPU_URL = "https://www.ppomppu.co.kr/";
|
||||
|
||||
private final WebClient webClient;
|
||||
|
||||
public PpomppuBoardFeedRetriever(WebClient.Builder webClientBuilder) {
|
||||
this.webClient = webClientBuilder.baseUrl(PPOMPPU_URL)
|
||||
this.webClient = webClientBuilder.baseUrl(PpomppuBoardName.PPOMPPU_URL)
|
||||
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofTextHtml())
|
||||
.filter(WebClientFilterFactory.logRequest())
|
||||
.filter(WebClientFilterFactory.logResponse())
|
||||
|
@ -33,7 +32,7 @@ public class PpomppuBoardFeedRetriever {
|
|||
|
||||
public Mono<String> getHtml(String uri) {
|
||||
return webClient.get()
|
||||
.uri("/zboard/zboard.php?id=ppomppu")
|
||||
.uri(uri)
|
||||
.exchangeToMono(e -> e.bodyToMono(String.class))
|
||||
.publishOn(Schedulers.boundedElastic())
|
||||
.onErrorResume(WebClientRequestException.class, t -> {
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.repository;
|
||||
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuBoardFeedStatus;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||
import java.util.Optional;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.stereotype.Repository;
|
||||
|
||||
@Repository
|
||||
public interface PpomppuBoardFeedStatusRepository extends JpaRepository<PpomppuBoardFeedStatus, Long> {
|
||||
|
||||
Optional<PpomppuBoardFeedStatus> findByBoardName(PpomppuBoardName boardName);
|
||||
|
||||
}
|
||||
|
|
|
@ -1,11 +1,16 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.service;
|
||||
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuBoardFeedStatus;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.repository.PpomppuArticleRepository;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.repository.PpomppuBoardFeedStatusRepository;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.stream.Collectors;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
|
@ -21,10 +26,34 @@ public class PpomppuArticleService {
|
|||
this.ppomppuBoardFeedStatusRepository = ppomppuBoardFeedStatusRepository;
|
||||
}
|
||||
|
||||
public void save(List<PpomppuArticle> articles) {
|
||||
// TODO get latest parsed article id
|
||||
// TODO filter articles
|
||||
@Transactional(readOnly = true)
|
||||
public List<PpomppuArticle> filterOnlyNewArticles(PpomppuBoardName boardName, List<PpomppuArticle> articles) {
|
||||
Optional<PpomppuBoardFeedStatus> boardFeedStatus = ppomppuBoardFeedStatusRepository.findByBoardName(boardName);
|
||||
Long latestArticleId = boardFeedStatus.map(PpomppuBoardFeedStatus::getLatestParsedArticleId)
|
||||
.orElse(0L);
|
||||
|
||||
return articles.stream()
|
||||
.filter(e -> e.getArticleId().compareTo(latestArticleId) > 0)
|
||||
.collect(Collectors.toList());
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void save(PpomppuBoardName boardName, List<PpomppuArticle> articles) {
|
||||
Long latestArticleId = articles.stream()
|
||||
.map(PpomppuArticle::getArticleId)
|
||||
.max(Long::compareTo)
|
||||
.orElse(0L);
|
||||
|
||||
// save PpomppuBoardFeedStatus
|
||||
Optional<PpomppuBoardFeedStatus> boardFeedStatus = ppomppuBoardFeedStatusRepository.findByBoardName(boardName);
|
||||
boardFeedStatus.ifPresentOrElse(e -> {
|
||||
e.updateArticleId(latestArticleId);
|
||||
ppomppuBoardFeedStatusRepository.save(e);
|
||||
},
|
||||
() -> ppomppuBoardFeedStatusRepository.save(PpomppuBoardFeedStatus.of(boardName,
|
||||
latestArticleId)));
|
||||
|
||||
// save real articles.
|
||||
ppomppuArticleRepository.saveAll(articles);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -4,6 +4,7 @@ import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
|||
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleTransformer;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Element;
|
||||
|
@ -27,12 +28,14 @@ public class PpomppuFeedService {
|
|||
this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever;
|
||||
}
|
||||
|
||||
public Flux<PpomppuArticle> getArticles(PpomppuBoardName boardName) {
|
||||
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
|
||||
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
|
||||
final Mono<Element> tbody = extractTbodyFromHtml(html)
|
||||
.doOnNext(e -> log.info("pre tbody - {}", e.html()));
|
||||
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
|
||||
.doOnNext(e -> log.info("parsed Result: {}", e));
|
||||
.map(e -> e.updateBoardName(boardName))
|
||||
.doOnNext(e -> log.info("parsed Result: {}", e))
|
||||
.collectList();
|
||||
}
|
||||
|
||||
private Mono<Element> extractTbodyFromHtml(Mono<String> html) {
|
||||
|
|
|
@ -12,34 +12,42 @@ import lombok.NoArgsConstructor;
|
|||
@Getter
|
||||
@NoArgsConstructor
|
||||
public enum PpomppuBoardName {
|
||||
PPOMPPU_DOMESTIC_ETC("zboard/zboard.php?id=ppomppu&category=1"),
|
||||
PPOMPPU_DOMESTIC_COMPUTER("zboard/zboard.php?id=ppomppu&category=4"),
|
||||
PPOMPPU_DOMESTIC_DIGITAL("zboard/zboard.php?id=ppomppu&category=5"),
|
||||
PPOMPPU_DOMESTIC_FOOD("zboard/zboard.php?id=ppomppu&category=6"),
|
||||
PPOMPPU_DOMESTIC_BOOK("zboard/zboard.php?id=ppomppu&category=8"),
|
||||
PPOMPPU_DOMESTIC_APPLIANCES("zboard/zboard.php?id=ppomppu&category=9"),
|
||||
PPOMPPU_DOMESTIC_PARENTING("zboard/zboard.php?id=ppomppu&category=10"),
|
||||
PPOMPPU_DOMESTIC_GIFTCARD("zboard/zboard.php?id=ppomppu&category=11"),
|
||||
PPOMPPU_DOMESTIC_CLOTHES("zboard/zboard.php?id=ppomppu&category=12"),
|
||||
PPOMPPU_DOMESTIC_COSMETIC("zboard/zboard.php?id=ppomppu&category=13"),
|
||||
PPOMPPU_DOMESTIC_OUTDOOR("zboard/zboard.php?id=ppomppu&category=15"),
|
||||
PPOMPPU_OVERSEA_ETC("zboard/zboard.php?id=ppomppu4&category=1"),
|
||||
PPOMPPU_OVERSEA_APPLIANCES("zboard/zboard.php?id=ppomppu4&category=7"),
|
||||
PPOMPPU_OVERSEA_TVAV("zboard/zboard.php?id=ppomppu4&category=8"),
|
||||
PPOMPPU_OVERSEA_COMPUTER("zboard/zboard.php?id=ppomppu4&category=3"),
|
||||
PPOMPPU_OVERSEA_DIGITAL("zboard/zboard.php?id=ppomppu4&category=4"),
|
||||
PPOMPPU_OVERSEA_MOBILEACCESSORY("zboard/zboard.php?id=ppomppu4&category=9"),
|
||||
PPOMPPU_OVERSEA_CLOTHES("zboard/zboard.php?id=ppomppu4&category=5"),
|
||||
PPOMPPU_OVERSEA_WATCH("zboard/zboard.php?id=ppomppu4&category=2"),
|
||||
PPOMPPU_OVERSEA_SHOES("zboard/zboard.php?id=ppomppu4&category=11"),
|
||||
PPOMPPU_OVERSEA_FOOD("zboard/zboard.php?id=ppomppu4&category=10"),
|
||||
PPOMPPU_OVERSEA_PARENTING("zboard/zboard.php?id=ppomppu4&category=6"),
|
||||
PPOMPPU_DOMESTIC_ETC("zboard/zboard.php?id=ppomppu&category=1", true),
|
||||
PPOMPPU_DOMESTIC_COMPUTER("zboard/zboard.php?id=ppomppu&category=4", true),
|
||||
PPOMPPU_DOMESTIC_DIGITAL("zboard/zboard.php?id=ppomppu&category=5", true),
|
||||
PPOMPPU_DOMESTIC_FOOD("zboard/zboard.php?id=ppomppu&category=6", true),
|
||||
PPOMPPU_DOMESTIC_BOOK("zboard/zboard.php?id=ppomppu&category=8", true),
|
||||
PPOMPPU_DOMESTIC_APPLIANCES("zboard/zboard.php?id=ppomppu&category=9", true),
|
||||
PPOMPPU_DOMESTIC_PARENTING("zboard/zboard.php?id=ppomppu&category=10", true),
|
||||
PPOMPPU_DOMESTIC_GIFTCARD("zboard/zboard.php?id=ppomppu&category=11", true),
|
||||
PPOMPPU_DOMESTIC_CLOTHES("zboard/zboard.php?id=ppomppu&category=12", true),
|
||||
PPOMPPU_DOMESTIC_COSMETIC("zboard/zboard.php?id=ppomppu&category=13", true),
|
||||
PPOMPPU_DOMESTIC_OUTDOOR("zboard/zboard.php?id=ppomppu&category=15", true),
|
||||
PPOMPPU_OVERSEA_ETC("zboard/zboard.php?id=ppomppu4&category=1", true),
|
||||
PPOMPPU_OVERSEA_APPLIANCES("zboard/zboard.php?id=ppomppu4&category=7", true),
|
||||
PPOMPPU_OVERSEA_TVAV("zboard/zboard.php?id=ppomppu4&category=8", true),
|
||||
PPOMPPU_OVERSEA_COMPUTER("zboard/zboard.php?id=ppomppu4&category=3", true),
|
||||
PPOMPPU_OVERSEA_DIGITAL("zboard/zboard.php?id=ppomppu4&category=4", true),
|
||||
PPOMPPU_OVERSEA_MOBILEACCESSORY("zboard/zboard.php?id=ppomppu4&category=9", true),
|
||||
PPOMPPU_OVERSEA_CLOTHES("zboard/zboard.php?id=ppomppu4&category=5", true),
|
||||
PPOMPPU_OVERSEA_WATCH("zboard/zboard.php?id=ppomppu4&category=2", true),
|
||||
PPOMPPU_OVERSEA_SHOES("zboard/zboard.php?id=ppomppu4&category=11", true),
|
||||
PPOMPPU_OVERSEA_FOOD("zboard/zboard.php?id=ppomppu4&category=10", true),
|
||||
PPOMPPU_OVERSEA_PARENTING("zboard/zboard.php?id=ppomppu4&category=6", true),
|
||||
;
|
||||
|
||||
private String resourcePath;
|
||||
private boolean crawlWithDefaultTimer;
|
||||
|
||||
PpomppuBoardName(String boardPath) {
|
||||
PpomppuBoardName(String boardPath, boolean crawlWithDefaultTimer) {
|
||||
this.resourcePath = boardPath;
|
||||
this.crawlWithDefaultTimer = crawlWithDefaultTimer;
|
||||
}
|
||||
|
||||
public static final String PPOMPPU_URL = "https://www.ppomppu.co.kr/";
|
||||
|
||||
public static String ofViewPageUrl(String articleUrl) {
|
||||
return PPOMPPU_URL + "zboard/" + articleUrl;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue