[PPN-13] Deal with new page board structure

This commit is contained in:
woo-jin.shin 2022-01-02 12:48:10 +09:00
parent 8eb431a812
commit 6d315e2a9f
8 changed files with 120 additions and 53 deletions

View File

@ -6,7 +6,7 @@ plugins {
}
group = 'com.myoa.engineering.crawl.ppomppu'
version = '1.0.3'
version = '1.1.1'
sourceCompatibility = '11'
configurations {
@ -21,7 +21,7 @@ repositories {
allprojects {
group = 'com.myoa.engineering.crawl.ppomppu'
version = '1.0.3'
version = '1.1.1'
apply plugin: 'java'
apply plugin: 'idea'

View File

@ -1,10 +0,0 @@
package com.myoa.engineering.crawl.ppomppu.processor.dto;
/**
* PpomppuArticle
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
public class PpomppuArticleDTO {
}

View File

@ -0,0 +1,66 @@
package com.myoa.engineering.crawl.ppomppu.processor.dto;
import java.time.Instant;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
/**
* PpomppuArticleParseDTO
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
@Getter
@NoArgsConstructor
public class PpomppuArticleParseDTO {
private String id;
private String articleId;
private String boardName;
private String articleUrl;
private String thumbnailUrl;
private String title;
private String hit;
private Integer recommended;
private String registeredAt;
@Builder
public PpomppuArticleParseDTO(String id, String articleId, String boardName, String articleUrl,
String thumbnailUrl, String title, String hit, Integer recommended,
String registeredAt) {
this.id = id;
this.articleId = articleId;
this.boardName = boardName;
this.articleUrl = articleUrl;
this.thumbnailUrl = thumbnailUrl;
this.title = title;
this.hit = hit;
this.recommended = recommended;
this.registeredAt = registeredAt;
}
public boolean isInValidated() {
return articleId == null || articleId.isEmpty()
|| hit == null || hit.isEmpty();
}
public PpomppuArticle convert() {
if (isInValidated()) {
throw new IllegalArgumentException("PpomppuArticleParseDTO was invalidated");
}
return PpomppuArticle.builder()
.articleId(Long.parseLong(articleId))
.title(title)
.articleUrl(articleUrl)
.thumbnailUrl(thumbnailUrl)
.recommended(recommended)
.hit(Integer.parseInt(hit))
.registeredAt(DateUtil.DATE_TIME_FORMATTER.parse(registeredAt, Instant::from))
.build();
}
}

View File

@ -1,9 +1,5 @@
package com.myoa.engineering.crawl.ppomppu.processor.dto;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
@ -18,33 +14,31 @@ import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
*/
public final class PpomppuArticleParser {
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
.withZone(ZoneId.of("Asia/Seoul"));
private PpomppuArticleParser() {}
public static PpomppuArticle toArticle(Elements articleElement) {
final long articleId = PpomppuArticleParser.parseArticleId(articleElement.get(0));
final String articleIdString = PpomppuArticleParser.parseArticleId(articleElement.get(0));
final String title = PpomppuArticleParser.parseTitle(articleElement.get(2));
final String articleUrl = PpomppuArticleParser.parseArticleUrl(articleElement.get(2));
final String thumbnailUrl = PpomppuArticleParser.parseThumbnailUrl(articleElement.get(3));
final int recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6));
final int hit = PpomppuArticleParser.parseHit(articleElement.get(7));
final Instant registeredAt = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5));
final Integer recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6));
final String hitString = PpomppuArticleParser.parseHit(articleElement.get(7));
final String registeredAtString = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5));
return PpomppuArticle.builder()
.articleId(articleId)
.title(title)
.articleUrl(articleUrl)
.thumbnailUrl(thumbnailUrl)
.recommended(recommended)
.hit(hit)
.registeredAt(registeredAt)
.build();
return PpomppuArticleParseDTO.builder()
.articleId(articleIdString)
.title(title)
.articleUrl(articleUrl)
.thumbnailUrl(thumbnailUrl)
.recommended(recommended)
.hit(hitString)
.registeredAt(registeredAtString)
.build()
.convert();
}
public static Long parseArticleId(Element td) {
return Long.parseLong(td.text().trim());
public static String parseArticleId(Element td) {
return td.text().trim();
}
public static String parseTitle(Element td) {
@ -73,13 +67,12 @@ public final class PpomppuArticleParser {
return recommended;
}
public static Integer parseHit(Element td) {
return Integer.parseInt(td.text());
public static String parseHit(Element td) {
return td.text();
}
public static Instant parseRegisteredAt(Element td) {
final String registeredAtString = td.attr("title");
return DATE_TIME_FORMATTER.parse(registeredAtString, Instant::from);
public static String parseRegisteredAt(Element td) {
return td.attr("title");
}
}

View File

@ -1,8 +1,6 @@
package com.myoa.engineering.crawl.ppomppu.processor.dto;
import java.time.Instant;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
@ -11,6 +9,7 @@ import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.support.dto.BlockMessageDTO;
import com.myoa.engineering.crawl.ppomppu.support.dto.SimpleMessageDTO;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil;
/**
* PpomppuArticleTransformer
@ -26,9 +25,6 @@ public final class PpomppuArticleTransformer {
private static final String MESSAGE_FORMAT_V2 = "%s *<%s|LINK>*\n%s";
private static final String TITLE_FORMAT_V1 = "_*:hearts: %s | %s*_";
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
.withZone(ZoneId.of("Asia/Seoul"));
public static final Function<PpomppuArticle, SimpleMessageDTO> TRANSFORM_TO_MESSAGE_DTO = article ->
SimpleMessageDTO.builder()
.requestedAt(Instant.now())
@ -47,7 +43,7 @@ public final class PpomppuArticleTransformer {
.collect(Collectors.joining("\n\n"));
return SimpleMessageDTO.builder()
.requestedAt(requestedAt)
.title(DATE_TIME_FORMATTER.format(requestedAt))
.title(DateUtil.DATE_TIME_FORMATTER.format(requestedAt))
.body(body)
.build();
}
@ -62,7 +58,7 @@ public final class PpomppuArticleTransformer {
.requestedAt(requestedAt)
.title(String.format(TITLE_FORMAT_V1,
boardName.getMenuName(),
DATE_TIME_FORMATTER.format(requestedAt)))
DateUtil.DATE_TIME_FORMATTER.format(requestedAt)))
.blocks(body)
.build();
}

View File

@ -1,16 +1,18 @@
package com.myoa.engineering.crawl.ppomppu.processor.service;
import java.util.Comparator;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Component;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleParser;
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.util.Comparator;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Component;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
@ -33,8 +35,10 @@ public class PpomppuFeedService {
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
final Mono<Element> tbody = extractTbodyFromHtml(html);
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
.onErrorContinue((t, e) -> log.error("Error occured : {}, value: {}",
e, t.getLocalizedMessage()))
.map(e -> e.updateBoardName(boardName))
.sort(Comparator.comparing(PpomppuArticle::getArticleId))
// .doOnNext(e -> log.info("parsed Result: {}", e))

View File

@ -28,7 +28,7 @@ public class ParseEventEmitter {
this.processorAPIService = processorAPIService;
}
@Scheduled(fixedRate = 300 * 1000L)
@Scheduled(fixedRate = 600 * 1000L)
public void emitBoards() {
log.info("[emitDomesticBoard] trigger fired!");
Arrays.stream(PpomppuBoardName.values())

View File

@ -0,0 +1,18 @@
package com.myoa.engineering.crawl.ppomppu.support.util;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
/**
* DateUtil
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2022-01-02
*
*/
public final class DateUtil {
private DateUtil() { }
public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
.withZone(ZoneId.of("Asia/Seoul"));
}