[PPN-13] Deal with new page board structure
This commit is contained in:
parent
8eb431a812
commit
6d315e2a9f
|
@ -6,7 +6,7 @@ plugins {
|
||||||
}
|
}
|
||||||
|
|
||||||
group = 'com.myoa.engineering.crawl.ppomppu'
|
group = 'com.myoa.engineering.crawl.ppomppu'
|
||||||
version = '1.0.3'
|
version = '1.1.1'
|
||||||
sourceCompatibility = '11'
|
sourceCompatibility = '11'
|
||||||
|
|
||||||
configurations {
|
configurations {
|
||||||
|
@ -21,7 +21,7 @@ repositories {
|
||||||
|
|
||||||
allprojects {
|
allprojects {
|
||||||
group = 'com.myoa.engineering.crawl.ppomppu'
|
group = 'com.myoa.engineering.crawl.ppomppu'
|
||||||
version = '1.0.3'
|
version = '1.1.1'
|
||||||
|
|
||||||
apply plugin: 'java'
|
apply plugin: 'java'
|
||||||
apply plugin: 'idea'
|
apply plugin: 'idea'
|
||||||
|
|
|
@ -1,10 +0,0 @@
|
||||||
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* PpomppuArticle
|
|
||||||
*
|
|
||||||
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
|
||||||
* @since 2021-09-08
|
|
||||||
*/
|
|
||||||
public class PpomppuArticleDTO {
|
|
||||||
}
|
|
|
@ -0,0 +1,66 @@
|
||||||
|
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
||||||
|
|
||||||
|
import java.time.Instant;
|
||||||
|
|
||||||
|
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
||||||
|
import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil;
|
||||||
|
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* PpomppuArticleParseDTO
|
||||||
|
*
|
||||||
|
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
|
||||||
|
* @since 2021-09-08
|
||||||
|
*/
|
||||||
|
@Getter
|
||||||
|
@NoArgsConstructor
|
||||||
|
public class PpomppuArticleParseDTO {
|
||||||
|
|
||||||
|
private String id;
|
||||||
|
private String articleId;
|
||||||
|
private String boardName;
|
||||||
|
private String articleUrl;
|
||||||
|
private String thumbnailUrl;
|
||||||
|
private String title;
|
||||||
|
private String hit;
|
||||||
|
private Integer recommended;
|
||||||
|
private String registeredAt;
|
||||||
|
|
||||||
|
@Builder
|
||||||
|
public PpomppuArticleParseDTO(String id, String articleId, String boardName, String articleUrl,
|
||||||
|
String thumbnailUrl, String title, String hit, Integer recommended,
|
||||||
|
String registeredAt) {
|
||||||
|
this.id = id;
|
||||||
|
this.articleId = articleId;
|
||||||
|
this.boardName = boardName;
|
||||||
|
this.articleUrl = articleUrl;
|
||||||
|
this.thumbnailUrl = thumbnailUrl;
|
||||||
|
this.title = title;
|
||||||
|
this.hit = hit;
|
||||||
|
this.recommended = recommended;
|
||||||
|
this.registeredAt = registeredAt;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isInValidated() {
|
||||||
|
return articleId == null || articleId.isEmpty()
|
||||||
|
|| hit == null || hit.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
public PpomppuArticle convert() {
|
||||||
|
if (isInValidated()) {
|
||||||
|
throw new IllegalArgumentException("PpomppuArticleParseDTO was invalidated");
|
||||||
|
}
|
||||||
|
return PpomppuArticle.builder()
|
||||||
|
.articleId(Long.parseLong(articleId))
|
||||||
|
.title(title)
|
||||||
|
.articleUrl(articleUrl)
|
||||||
|
.thumbnailUrl(thumbnailUrl)
|
||||||
|
.recommended(recommended)
|
||||||
|
.hit(Integer.parseInt(hit))
|
||||||
|
.registeredAt(DateUtil.DATE_TIME_FORMATTER.parse(registeredAt, Instant::from))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,9 +1,5 @@
|
||||||
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
||||||
|
|
||||||
import java.time.Instant;
|
|
||||||
import java.time.ZoneId;
|
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
|
|
||||||
import org.jsoup.nodes.Element;
|
import org.jsoup.nodes.Element;
|
||||||
import org.jsoup.select.Elements;
|
import org.jsoup.select.Elements;
|
||||||
|
|
||||||
|
@ -18,33 +14,31 @@ import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||||
*/
|
*/
|
||||||
public final class PpomppuArticleParser {
|
public final class PpomppuArticleParser {
|
||||||
|
|
||||||
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
|
|
||||||
.withZone(ZoneId.of("Asia/Seoul"));
|
|
||||||
|
|
||||||
private PpomppuArticleParser() {}
|
private PpomppuArticleParser() {}
|
||||||
|
|
||||||
public static PpomppuArticle toArticle(Elements articleElement) {
|
public static PpomppuArticle toArticle(Elements articleElement) {
|
||||||
final long articleId = PpomppuArticleParser.parseArticleId(articleElement.get(0));
|
final String articleIdString = PpomppuArticleParser.parseArticleId(articleElement.get(0));
|
||||||
final String title = PpomppuArticleParser.parseTitle(articleElement.get(2));
|
final String title = PpomppuArticleParser.parseTitle(articleElement.get(2));
|
||||||
final String articleUrl = PpomppuArticleParser.parseArticleUrl(articleElement.get(2));
|
final String articleUrl = PpomppuArticleParser.parseArticleUrl(articleElement.get(2));
|
||||||
final String thumbnailUrl = PpomppuArticleParser.parseThumbnailUrl(articleElement.get(3));
|
final String thumbnailUrl = PpomppuArticleParser.parseThumbnailUrl(articleElement.get(3));
|
||||||
final int recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6));
|
final Integer recommended = PpomppuArticleParser.parseRecommended(articleElement.get(6));
|
||||||
final int hit = PpomppuArticleParser.parseHit(articleElement.get(7));
|
final String hitString = PpomppuArticleParser.parseHit(articleElement.get(7));
|
||||||
final Instant registeredAt = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5));
|
final String registeredAtString = PpomppuArticleParser.parseRegisteredAt(articleElement.get(5));
|
||||||
|
|
||||||
return PpomppuArticle.builder()
|
return PpomppuArticleParseDTO.builder()
|
||||||
.articleId(articleId)
|
.articleId(articleIdString)
|
||||||
.title(title)
|
.title(title)
|
||||||
.articleUrl(articleUrl)
|
.articleUrl(articleUrl)
|
||||||
.thumbnailUrl(thumbnailUrl)
|
.thumbnailUrl(thumbnailUrl)
|
||||||
.recommended(recommended)
|
.recommended(recommended)
|
||||||
.hit(hit)
|
.hit(hitString)
|
||||||
.registeredAt(registeredAt)
|
.registeredAt(registeredAtString)
|
||||||
.build();
|
.build()
|
||||||
|
.convert();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Long parseArticleId(Element td) {
|
public static String parseArticleId(Element td) {
|
||||||
return Long.parseLong(td.text().trim());
|
return td.text().trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String parseTitle(Element td) {
|
public static String parseTitle(Element td) {
|
||||||
|
@ -73,13 +67,12 @@ public final class PpomppuArticleParser {
|
||||||
return recommended;
|
return recommended;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Integer parseHit(Element td) {
|
public static String parseHit(Element td) {
|
||||||
return Integer.parseInt(td.text());
|
return td.text();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static Instant parseRegisteredAt(Element td) {
|
public static String parseRegisteredAt(Element td) {
|
||||||
final String registeredAtString = td.attr("title");
|
return td.attr("title");
|
||||||
return DATE_TIME_FORMATTER.parse(registeredAtString, Instant::from);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,8 +1,6 @@
|
||||||
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
package com.myoa.engineering.crawl.ppomppu.processor.dto;
|
||||||
|
|
||||||
import java.time.Instant;
|
import java.time.Instant;
|
||||||
import java.time.ZoneId;
|
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Collectors;
|
import java.util.stream.Collectors;
|
||||||
|
@ -11,6 +9,7 @@ import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
||||||
import com.myoa.engineering.crawl.ppomppu.support.dto.BlockMessageDTO;
|
import com.myoa.engineering.crawl.ppomppu.support.dto.BlockMessageDTO;
|
||||||
import com.myoa.engineering.crawl.ppomppu.support.dto.SimpleMessageDTO;
|
import com.myoa.engineering.crawl.ppomppu.support.dto.SimpleMessageDTO;
|
||||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||||
|
import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* PpomppuArticleTransformer
|
* PpomppuArticleTransformer
|
||||||
|
@ -26,9 +25,6 @@ public final class PpomppuArticleTransformer {
|
||||||
private static final String MESSAGE_FORMAT_V2 = "%s *<%s|LINK>*\n%s";
|
private static final String MESSAGE_FORMAT_V2 = "%s *<%s|LINK>*\n%s";
|
||||||
private static final String TITLE_FORMAT_V1 = "_*:hearts: %s | %s*_";
|
private static final String TITLE_FORMAT_V1 = "_*:hearts: %s | %s*_";
|
||||||
|
|
||||||
private static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")
|
|
||||||
.withZone(ZoneId.of("Asia/Seoul"));
|
|
||||||
|
|
||||||
public static final Function<PpomppuArticle, SimpleMessageDTO> TRANSFORM_TO_MESSAGE_DTO = article ->
|
public static final Function<PpomppuArticle, SimpleMessageDTO> TRANSFORM_TO_MESSAGE_DTO = article ->
|
||||||
SimpleMessageDTO.builder()
|
SimpleMessageDTO.builder()
|
||||||
.requestedAt(Instant.now())
|
.requestedAt(Instant.now())
|
||||||
|
@ -47,7 +43,7 @@ public final class PpomppuArticleTransformer {
|
||||||
.collect(Collectors.joining("\n\n"));
|
.collect(Collectors.joining("\n\n"));
|
||||||
return SimpleMessageDTO.builder()
|
return SimpleMessageDTO.builder()
|
||||||
.requestedAt(requestedAt)
|
.requestedAt(requestedAt)
|
||||||
.title(DATE_TIME_FORMATTER.format(requestedAt))
|
.title(DateUtil.DATE_TIME_FORMATTER.format(requestedAt))
|
||||||
.body(body)
|
.body(body)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
@ -62,7 +58,7 @@ public final class PpomppuArticleTransformer {
|
||||||
.requestedAt(requestedAt)
|
.requestedAt(requestedAt)
|
||||||
.title(String.format(TITLE_FORMAT_V1,
|
.title(String.format(TITLE_FORMAT_V1,
|
||||||
boardName.getMenuName(),
|
boardName.getMenuName(),
|
||||||
DATE_TIME_FORMATTER.format(requestedAt)))
|
DateUtil.DATE_TIME_FORMATTER.format(requestedAt)))
|
||||||
.blocks(body)
|
.blocks(body)
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,16 +1,18 @@
|
||||||
package com.myoa.engineering.crawl.ppomppu.processor.service;
|
package com.myoa.engineering.crawl.ppomppu.processor.service;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
|
||||||
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleParser;
|
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleParser;
|
||||||
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
|
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
|
||||||
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.List;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
import reactor.core.publisher.Flux;
|
import reactor.core.publisher.Flux;
|
||||||
import reactor.core.publisher.Mono;
|
import reactor.core.publisher.Mono;
|
||||||
|
|
||||||
|
@ -33,8 +35,10 @@ public class PpomppuFeedService {
|
||||||
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
|
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
|
||||||
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
|
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
|
||||||
final Mono<Element> tbody = extractTbodyFromHtml(html);
|
final Mono<Element> tbody = extractTbodyFromHtml(html);
|
||||||
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
|
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
|
||||||
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
|
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
|
||||||
|
.onErrorContinue((t, e) -> log.error("Error occured : {}, value: {}",
|
||||||
|
e, t.getLocalizedMessage()))
|
||||||
.map(e -> e.updateBoardName(boardName))
|
.map(e -> e.updateBoardName(boardName))
|
||||||
.sort(Comparator.comparing(PpomppuArticle::getArticleId))
|
.sort(Comparator.comparing(PpomppuArticle::getArticleId))
|
||||||
// .doOnNext(e -> log.info("parsed Result: {}", e))
|
// .doOnNext(e -> log.info("parsed Result: {}", e))
|
||||||
|
|
|
@ -28,7 +28,7 @@ public class ParseEventEmitter {
|
||||||
this.processorAPIService = processorAPIService;
|
this.processorAPIService = processorAPIService;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Scheduled(fixedRate = 300 * 1000L)
|
@Scheduled(fixedRate = 600 * 1000L)
|
||||||
public void emitBoards() {
|
public void emitBoards() {
|
||||||
log.info("[emitDomesticBoard] trigger fired!");
|
log.info("[emitDomesticBoard] trigger fired!");
|
||||||
Arrays.stream(PpomppuBoardName.values())
|
Arrays.stream(PpomppuBoardName.values())
|
||||||
|
|
|
@ -0,0 +1,18 @@
|
||||||
|
package com.myoa.engineering.crawl.ppomppu.support.util;
|
||||||
|
|
||||||
|
import java.time.ZoneId;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DateUtil
|
||||||
|
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
|
||||||
|
* @since 2022-01-02
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
public final class DateUtil {
|
||||||
|
|
||||||
|
private DateUtil() { }
|
||||||
|
|
||||||
|
public static final DateTimeFormatter DATE_TIME_FORMATTER = DateTimeFormatter.ofPattern("yy.MM.dd HH:mm:ss")
|
||||||
|
.withZone(ZoneId.of("Asia/Seoul"));
|
||||||
|
}
|
Loading…
Reference in New Issue