[PPN-13] Deal with new page board structure
This commit is contained in:
parent
8eb431a812
commit
6d315e2a9f
|
@ -6,7 +6,7 @@ plugins {
|
|||
}
|
||||
|
||||
group = 'com.myoa.engineering.crawl.ppomppu'
|
||||
version = '1.0.3'
|
||||
version = '1.1.1'
|
||||
sourceCompatibility = '11'
|
||||
|
||||
configurations {
|
||||
|
@ -21,7 +21,7 @@ repositories {
|
|||
|
||||
allprojects {
|
||||
group = 'com.myoa.engineering.crawl.ppomppu'
|
||||
version = '1.0.3'
|
||||
version = '1.1.1'
|
||||
|
||||
apply plugin: 'java'
|
||||
apply plugin: 'idea'
|
||||
|
|
|
@ -1,10 +0,0 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
||||
|
||||
/**
|
||||
* PpomppuArticle
|
||||
*
|
||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-08
|
||||
*/
|
||||
public class PpomppuArticleDTO {
|
||||
}
|
|
@ -0,0 +1,66 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
||||
|
||||
import java.time.Instant;
|
||||
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
/**
|
||||
* PpomppuArticleParseDTO
|
||||
*
|
||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||
* @since 2021-09-08
|
||||
*/
|
||||
@Getter
|
||||
@NoArgsConstructor
|
||||
public class PpomppuArticleParseDTO {
|
||||
|
||||
private String id;
|
||||
private String articleId;
|
||||
private String boardName;
|
||||
private String articleUrl;
|
||||
private String thumbnailUrl;
|
||||
private String title;
|
||||
private String hit;
|
||||
private Integer recommended;
|
||||
private String registeredAt;
|
||||
|
||||
@Builder
|
||||
public PpomppuArticleParseDTO(String id, String articleId, String boardName, String articleUrl,
|
||||
String thumbnailUrl, String title, String hit, Integer recommended,
|
||||
String registeredAt) {
|
||||
this.id = id;
|
||||
this.articleId = articleId;
|
||||
this.boardName = boardName;
|
||||
this.articleUrl = articleUrl;
|
||||
this.thumbnailUrl = thumbnailUrl;
|
||||
this.title = title;
|
||||
this.hit = hit;
|
||||
this.recommended = recommended;
|
||||
this.registeredAt = registeredAt;
|
||||
}
|
||||
|
||||
public boolean isInValidated() {
|
||||
return articleId == null || articleId.isEmpty()
|
||||
|| hit == null || hit.isEmpty();
|
||||
}
|
||||
|
||||
public PpomppuArticle convert() {
|
||||
if (isInValidated()) {
|
||||
throw new IllegalArgumentException("PpomppuArticleParseDTO was invalidated");
|
||||
}
|
||||
return PpomppuArticle.builder()
|
||||
.articleId(Long.parseLong(articleId))
|
||||
.title(title)
|
||||
.articleUrl(articleUrl)
|
||||
.thumbnailUrl(thumbnailUrl)
|
||||
.recommended(recommended)
|
||||
.hit(Integer.parseInt(hit))
|
||||
.registeredAt(DateUtil.DATE_TIME_FORMATTER.parse(registeredAt, Instant::from))
|
||||
.build();
|
||||
}
|
||||
}
|
|
@ -1,9 +1,5 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
|
||||
|
@ -18,33 +14,31 @@ import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
|||
*/
|
||||
public final class PpomppuArticleParser {
|
||||
|
||||
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
|
||||
.withZone(ZoneId.of("Asia/Seoul"));
|
||||
|
||||
private PpomppuArticleParser() {}
|
||||
|
||||
public static PpomppuArticle toArticle(Elements articleElement) {
|
||||
final long articleId = PpomppuArticleParser.parseArticleId(articleElement.get(0));
|
||||
final String articleIdString = PpomppuArticleParser.parseArticleId(articleElement.get(0));
|
||||
final String title = PpomppuArticleParser.parseTitle(articleElement.get(2));
|
||||
final String articleUrl = PpomppuArticleParser.parseArticleUrl(articleElement.get(2));
|
||||
final String thumbnailUrl = PpomppuArticleParser.parseThumbnailUrl(articleElement.get(3));
|
||||
final int recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6));
|
||||
final int hit = PpomppuArticleParser.parseHit(articleElement.get(7));
|
||||
final Instant registeredAt = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5));
|
||||
final Integer recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6));
|
||||
final String hitString = PpomppuArticleParser.parseHit(articleElement.get(7));
|
||||
final String registeredAtString = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5));
|
||||
|
||||
return PpomppuArticle.builder()
|
||||
.articleId(articleId)
|
||||
.title(title)
|
||||
.articleUrl(articleUrl)
|
||||
.thumbnailUrl(thumbnailUrl)
|
||||
.recommended(recommended)
|
||||
.hit(hit)
|
||||
.registeredAt(registeredAt)
|
||||
.build();
|
||||
return PpomppuArticleParseDTO.builder()
|
||||
.articleId(articleIdString)
|
||||
.title(title)
|
||||
.articleUrl(articleUrl)
|
||||
.thumbnailUrl(thumbnailUrl)
|
||||
.recommended(recommended)
|
||||
.hit(hitString)
|
||||
.registeredAt(registeredAtString)
|
||||
.build()
|
||||
.convert();
|
||||
}
|
||||
|
||||
public static Long parseArticleId(Element td) {
|
||||
return Long.parseLong(td.text().trim());
|
||||
public static String parseArticleId(Element td) {
|
||||
return td.text().trim();
|
||||
}
|
||||
|
||||
public static String parseTitle(Element td) {
|
||||
|
@ -73,13 +67,12 @@ public final class PpomppuArticleParser {
|
|||
return recommended;
|
||||
}
|
||||
|
||||
public static Integer parseHit(Element td) {
|
||||
return Integer.parseInt(td.text());
|
||||
public static String parseHit(Element td) {
|
||||
return td.text();
|
||||
}
|
||||
|
||||
public static Instant parseRegisteredAt(Element td) {
|
||||
final String registeredAtString = td.attr("title");
|
||||
return DATE_TIME_FORMATTER.parse(registeredAtString, Instant::from);
|
||||
public static String parseRegisteredAt(Element td) {
|
||||
return td.attr("title");
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.List;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
|
@ -11,6 +9,7 @@ import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
|||
import com.myoa.engineering.crawl.ppomppu.support.dto.BlockMessageDTO;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.SimpleMessageDTO;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil;
|
||||
|
||||
/**
|
||||
* PpomppuArticleTransformer
|
||||
|
@ -26,9 +25,6 @@ public final class PpomppuArticleTransformer {
|
|||
private static final String MESSAGE_FORMAT_V2 = "%s *<%s|LINK>*\n%s";
|
||||
private static final String TITLE_FORMAT_V1 = "_*:hearts: %s | %s*_";
|
||||
|
||||
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
||||
.withZone(ZoneId.of("Asia/Seoul"));
|
||||
|
||||
public static final Function<PpomppuArticle, SimpleMessageDTO> TRANSFORM_TO_MESSAGE_DTO = article ->
|
||||
SimpleMessageDTO.builder()
|
||||
.requestedAt(Instant.now())
|
||||
|
@ -47,7 +43,7 @@ public final class PpomppuArticleTransformer {
|
|||
.collect(Collectors.joining("\n\n"));
|
||||
return SimpleMessageDTO.builder()
|
||||
.requestedAt(requestedAt)
|
||||
.title(DATE_TIME_FORMATTER.format(requestedAt))
|
||||
.title(DateUtil.DATE_TIME_FORMATTER.format(requestedAt))
|
||||
.body(body)
|
||||
.build();
|
||||
}
|
||||
|
@ -62,7 +58,7 @@ public final class PpomppuArticleTransformer {
|
|||
.requestedAt(requestedAt)
|
||||
.title(String.format(TITLE_FORMAT_V1,
|
||||
boardName.getMenuName(),
|
||||
DATE_TIME_FORMATTER.format(requestedAt)))
|
||||
DateUtil.DATE_TIME_FORMATTER.format(requestedAt)))
|
||||
.blocks(body)
|
||||
.build();
|
||||
}
|
||||
|
|
|
@ -1,16 +1,18 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.processor.service;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleParser;
|
||||
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
|
||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.springframework.stereotype.Component;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
|
@ -33,8 +35,10 @@ public class PpomppuFeedService {
|
|||
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
|
||||
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
|
||||
final Mono<Element> tbody = extractTbodyFromHtml(html);
|
||||
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
|
||||
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
|
||||
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
|
||||
.onErrorContinue((t, e) -> log.error("Error occured : {}, value: {}",
|
||||
e, t.getLocalizedMessage()))
|
||||
.map(e -> e.updateBoardName(boardName))
|
||||
.sort(Comparator.comparing(PpomppuArticle::getArticleId))
|
||||
// .doOnNext(e -> log.info("parsed Result: {}", e))
|
||||
|
|
|
@ -28,7 +28,7 @@ public class ParseEventEmitter {
|
|||
this.processorAPIService = processorAPIService;
|
||||
}
|
||||
|
||||
@Scheduled(fixedRate = 300 * 1000L)
|
||||
@Scheduled(fixedRate = 600 * 1000L)
|
||||
public void emitBoards() {
|
||||
log.info("[emitDomesticBoard] trigger fired!");
|
||||
Arrays.stream(PpomppuBoardName.values())
|
||||
|
|
|
@ -0,0 +1,18 @@
|
|||
package com.myoa.engineering.crawl.ppomppu.support.util;
|
||||
|
||||
import java.time.ZoneId;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
/**
|
||||
* DateUtil
|
||||
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
|
||||
* @since 2022-01-02
|
||||
*
|
||||
*/
|
||||
public final class DateUtil {
|
||||
|
||||
private DateUtil() { }
|
||||
|
||||
public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
|
||||
.withZone(ZoneId.of("Asia/Seoul"));
|
||||
}
|
Loading…
Reference in New Issue