[NO-ISSUE] Change references

This commit is contained in:
woozu-shin
2024-05-09 09:32:39 +09:00
parent 08737664f4
commit 378e6ef68e
34 changed files with 93 additions and 543 deletions

View File

@@ -1,7 +1,7 @@
dependencies {
developmentOnly 'org.springframework.boot:spring-boot-devtools'
runtimeOnly 'com.h2database:h2'
runtimeOnly 'com.mysql:mysql-connector-j'
runtimeOnly 'com.mysql:mysql-connector-j:8.4.0'
compileOnly 'org.projectlombok:lombok'
implementation project(':support')
@@ -17,7 +17,6 @@ dependencies {
implementation 'org.springframework.boot:spring-boot-configuration-processor'
implementation 'org.springframework.cloud:spring-cloud-starter-config'
implementation 'org.springframework.boot:spring-boot-starter-actuator'
implementation 'com.rometools:rome:2.1.0'
implementation 'org.jsoup:jsoup:1.17.2'
implementation 'com.h2database:h2:2.2.224'
implementation "org.springframework.cloud:spring-cloud-starter-openfeign"

View File

@@ -7,13 +7,12 @@ import com.myoa.engineering.crawl.shopping.domain.entity.BaseScanDomain;
import com.myoa.engineering.crawl.shopping.infra.repository.BaseScanRepository;
import com.zaxxer.hikari.HikariConfig;
import com.zaxxer.hikari.HikariDataSource;
import jakarta.persistence.EntityManagerFactory;
import lombok.NonNull;
import org.hibernate.boot.model.naming.CamelCaseToUnderscoresNamingStrategy;
import org.hibernate.boot.model.naming.ImplicitNamingStrategyJpaCompliantImpl;
import org.hibernate.cfg.AvailableSettings;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.boot.orm.jpa.EntityManagerFactoryBuilder;
import org.springframework.boot.orm.jpa.hibernate.SpringImplicitNamingStrategy;
import org.springframework.boot.orm.jpa.hibernate.SpringPhysicalNamingStrategy;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.data.jpa.repository.config.EnableJpaAuditing;
@@ -22,6 +21,7 @@ import org.springframework.orm.jpa.JpaTransactionManager;
import org.springframework.orm.jpa.LocalContainerEntityManagerFactoryBean;
import org.springframework.transaction.PlatformTransactionManager;
import javax.persistence.EntityManagerFactory;
import javax.sql.DataSource;
import java.util.Enumeration;
import java.util.HashMap;
@@ -96,8 +96,8 @@ public class ShoppingCrawlerDatasourceConfiguration {
properties.put(AvailableSettings.SHOW_SQL, hibernateProperties.getShowSql());
properties.put(AvailableSettings.HBM2DDL_AUTO, hibernateProperties.getHbm2ddlAuto());
properties.put(AvailableSettings.CONNECTION_PROVIDER_DISABLES_AUTOCOMMIT, hibernateProperties.getDisableAutoCommit());
properties.put(AvailableSettings.IMPLICIT_NAMING_STRATEGY, ImplicitNamingStrategyJpaCompliantImpl.class.getName());
properties.put(AvailableSettings.PHYSICAL_NAMING_STRATEGY, CamelCaseToUnderscoresNamingStrategy.class.getName());
properties.put(AvailableSettings.IMPLICIT_NAMING_STRATEGY, SpringImplicitNamingStrategy.class.getName());
properties.put(AvailableSettings.PHYSICAL_NAMING_STRATEGY, SpringPhysicalNamingStrategy.class.getName());
properties.put(AvailableSettings.GENERATE_STATISTICS, "false");
// properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS, "true");
// properties.put(AvailableSettings.GLOBALLY_QUOTED_IDENTIFIERS_SKIP_COLUMN_DEFINITIONS, "true");

View File

@@ -3,10 +3,11 @@ package com.myoa.engineering.crawl.shopping.domain.entity;
import lombok.Getter;
import org.springframework.data.annotation.CreatedDate;
import org.springframework.data.annotation.LastModifiedDate;
import jakarta.persistence.*;
import org.springframework.data.jpa.domain.support.AuditingEntityListener;
import javax.persistence.Column;
import javax.persistence.EntityListeners;
import javax.persistence.MappedSuperclass;
import java.io.Serializable;
import java.time.Instant;

View File

@@ -1,19 +1,14 @@
package com.myoa.engineering.crawl.ppomppu.processor.domain;
package com.myoa.engineering.crawl.shopping.domain.entity.v1;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import java.time.Instant;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import javax.persistence.*;
import java.time.Instant;
@Getter
@NoArgsConstructor
@Entity

View File

@@ -2,11 +2,11 @@ package com.myoa.engineering.crawl.shopping.domain.entity.v1;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import jakarta.persistence.*;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import javax.persistence.*;
import java.time.Instant;
@Getter

View File

@@ -4,7 +4,7 @@ import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import lombok.Getter;
import lombok.NoArgsConstructor;
import jakarta.persistence.*;
import javax.persistence.*;
import java.time.Instant;
@Getter

View File

@@ -5,7 +5,7 @@ import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName
import lombok.Getter;
import lombok.NoArgsConstructor;
import jakarta.persistence.*;
import javax.persistence.*;
@Getter
@NoArgsConstructor

View File

@@ -4,7 +4,7 @@ import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import lombok.Getter;
import lombok.NoArgsConstructor;
import jakarta.persistence.*;
import javax.persistence.*;
import java.time.Instant;
@Getter

View File

@@ -1,7 +1,7 @@
package com.myoa.engineering.crawl.shopping.domain.entity.v2;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import jakarta.persistence.*;
import javax.persistence.*;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;

View File

@@ -2,7 +2,7 @@ package com.myoa.engineering.crawl.shopping.domain.entity.v2;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
import jakarta.persistence.*;
import javax.persistence.*;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;

View File

@@ -2,7 +2,7 @@ package com.myoa.engineering.crawl.shopping.domain.entity.v2;
import com.myoa.engineering.crawl.shopping.domain.entity.Auditable;
import com.myoa.engineering.crawl.shopping.support.dto.constant.CrawlTarget;
import jakarta.persistence.*;
import javax.persistence.*;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;

View File

@@ -1,14 +1,13 @@
package com.myoa.engineering.crawl.ppomppu.processor.dto;
import java.time.Instant;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil;
package com.myoa.engineering.crawl.shopping.dto;
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
import com.myoa.engineering.crawl.shopping.support.util.DateUtil;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import java.time.Instant;
/**
* PpomppuArticleParseDTO
*
@@ -46,7 +45,7 @@ public class PpomppuArticleParseDTO {
public boolean isInValidated() {
return articleId == null || articleId.isEmpty()
|| hit == null || hit.isEmpty();
|| hit == null || hit.isEmpty();
}
public PpomppuArticle convert() {

View File

@@ -1,39 +1,40 @@
package com.myoa.engineering.crawl.ppomppu.processor.dto;
package com.myoa.engineering.crawl.shopping.dto;
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
import com.myoa.engineering.crawl.shopping.support.dto.BlockMessageDTO;
import com.myoa.engineering.crawl.shopping.support.dto.SimpleMessageDTO;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import com.myoa.engineering.crawl.shopping.support.util.DateUtil;
import java.time.Instant;
import java.util.List;
import java.util.function.Function;
import java.util.stream.Collectors;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.support.dto.BlockMessageDTO;
import com.myoa.engineering.crawl.ppomppu.support.dto.SimpleMessageDTO;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import com.myoa.engineering.crawl.ppomppu.support.util.DateUtil;
/**
* PpomppuArticleTransformer
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-21
*
*/
public final class PpomppuArticleTransformer {
private PpomppuArticleTransformer() {}
private PpomppuArticleTransformer() {
}
private static final String MESSAGE_FORMAT_V1 = "%s)) <%s|LINK> `%s` ";
private static final String MESSAGE_FORMAT_V2 = "%s *<%s|LINK>*\n%s";
private static final String TITLE_FORMAT_V1 = "_*:hearts: %s | %s*_";
public static final Function<PpomppuArticle, SimpleMessageDTO> TRANSFORM_TO_MESSAGE_DTO = article ->
SimpleMessageDTO.builder()
.requestedAt(Instant.now())
.publishedAt(article.getRegisteredAt())
.title(String.format(MESSAGE_FORMAT_V1,
article.getBoardName().getMenuName(), article.getArticleUrl(),
article.getTitle()))
.body(article.getArticleUrl())
.build();
SimpleMessageDTO.builder()
.requestedAt(Instant.now())
.publishedAt(article.getRegisteredAt())
.title(String.format(MESSAGE_FORMAT_V1,
article.getBoardName().getMenuName(), article.getArticleUrl(),
article.getTitle()))
.body(article.getArticleUrl())
.build();
// https://stackoverflow.com/questions/24882927/using-streams-to-convert-a-list-of-objects-into-a-string-obtained-from-the-tostr
public static SimpleMessageDTO transformToSimpleMessage(List<PpomppuArticle> articles) {

View File

@@ -1,4 +1,4 @@
package com.myoa.engineering.crawl.ppomppu.sender.dto;
package com.myoa.engineering.crawl.shopping.dto.slack;
import com.fasterxml.jackson.annotation.JsonInclude;

View File

@@ -1,4 +1,4 @@
package com.myoa.engineering.crawl.ppomppu.sender.dto;
package com.myoa.engineering.crawl.shopping.dto.slack;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;

View File

@@ -1,4 +1,4 @@
package com.myoa.engineering.crawl.ppomppu.sender.dto;
package com.myoa.engineering.crawl.shopping.dto.slack;
import java.io.Serializable;

View File

@@ -1,4 +1,4 @@
package com.myoa.engineering.crawl.ppomppu.sender.dto;
package com.myoa.engineering.crawl.shopping.dto.slack;
import lombok.AllArgsConstructor;
import lombok.Getter;

View File

@@ -1,21 +1,19 @@
package com.myoa.engineering.crawl.shopping.dto.slack;
import java.util.List;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.myoa.engineering.crawl.ppomppu.support.dto.BlockMessageDTO;
import com.myoa.engineering.crawl.ppomppu.support.dto.BlockMessageDTO.Block;
import com.myoa.engineering.crawl.shopping.support.dto.BlockMessageDTO;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import java.util.List;
/**
* SlackMessageDTO
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-14
*
*/
@Getter
@NoArgsConstructor
@@ -51,6 +49,7 @@ public class SlackMessageDTO implements MessageDTO {
slackSectionMessageBlock.applyImageaccessory(block.getImageUrl(), block.getAltText());
addBlock(slackSectionMessageBlock);
}
public void addSectionBlock(String rawBlockMessage) {
addBlock(SlackSectionMessageBlock.ofMarkDown(rawBlockMessage));
}

View File

@@ -1,16 +1,15 @@
package com.myoa.engineering.crawl.ppomppu.sender.dto;
package com.myoa.engineering.crawl.shopping.dto.slack;
import com.fasterxml.jackson.annotation.JsonInclude;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
/**
* SectionBlock
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-30
*
*/
@Getter
@NoArgsConstructor

View File

@@ -1,70 +0,0 @@
package com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client;
import org.springframework.core.ParameterizedTypeReference;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.stereotype.Component;
import org.springframework.web.reactive.function.client.WebClient;
import org.springframework.web.reactive.function.client.WebClientRequestException;
import com.myoa.engineering.crawl.ppomppu.processor.dto.constant.WebClientPropertiesUnitName;
import com.myoa.engineering.crawl.ppomppu.support.dto.BlockMessageDTO;
import com.myoa.engineering.crawl.ppomppu.support.dto.SimpleMessageDTO;
import com.myoa.engineering.crawl.ppomppu.support.webclient.factory.WebClientFilterFactory;
import com.myoa.engineering.crawl.ppomppu.support.webclient.factory.WebFluxExchangeStragiesFactory;
import com.myoa.engineering.crawl.ppomppu.support.webclient.properties.WebClientProperties;
import com.myoa.engineering.crawl.ppomppu.support.webclient.properties.WebClientProperties.WebClientPropertiesUnit;
import lombok.extern.slf4j.Slf4j;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;
/**
* PpomppuNotifierSenderAPIClient
*
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-17
*/
@Slf4j
@Component
public class MessageSenderAPIClient {
private final WebClient webClient;
public MessageSenderAPIClient(WebClientProperties webClientProperties) {
WebClientPropertiesUnit webClientPropertiesUnit =
webClientProperties.find(WebClientPropertiesUnitName.PPOMPPU_NOTIFIER_SENDER_API.getUnitName());
this.webClient = WebClient.builder()
.baseUrl(webClientPropertiesUnit.getBaseUrl())
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofDefault())
.defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_VALUE)
// .filter(WebClientFilterFactory.logRequest())
// .filter(WebClientFilterFactory.logResponse())
.build();
}
public Mono<String> sendSimpleMessageToSlack(SimpleMessageDTO dto) {
return webClient.post()
.uri("/api/v1/messages/sendSimpleMessage/messengers/slack")
.bodyValue(dto)
.exchangeToMono(e -> e.bodyToMono(new ParameterizedTypeReference<String>() {}))
.publishOn(Schedulers.boundedElastic())
.onErrorResume(WebClientRequestException.class, t -> {
log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
return Mono.empty();
});
}
public Mono<String> sendBlockMessageToSlack(BlockMessageDTO dto) {
return webClient.post()
.uri("/api/v1/messages/sendBlockMessage/messengers/slack")
.bodyValue(dto)
.exchangeToMono(e -> e.bodyToMono(new ParameterizedTypeReference<String>() {}))
.publishOn(Schedulers.boundedElastic())
.onErrorResume(WebClientRequestException.class, t -> {
log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
return Mono.empty();
});
}
}

View File

@@ -1,52 +0,0 @@
package com.myoa.engineering.crawl.ppomppu.sender.infrastructure.client;
import java.util.ArrayList;
import org.springframework.stereotype.Component;
import com.myoa.engineering.crawl.ppomppu.sender.configuration.properties.SlackSecretProperties;
import com.myoa.engineering.crawl.ppomppu.sender.configuration.properties.SlackSecretProperties.SlackSecretPropertiesUnit;
import com.myoa.engineering.crawl.ppomppu.sender.dto.SlackMessageDTO;
import lombok.extern.slf4j.Slf4j;
@Slf4j
@Component
public class MongeShoppingBotSlackMessageSender extends SlackMessageSender {
private static final String SLACK_SECRET_UNIT_NAME = "monge_shopping_bot";
private final SlackSecretPropertiesUnit slackProperties;
public MongeShoppingBotSlackMessageSender(SlackSecretProperties slackSecretProperties) {
super(slackSecretProperties.find(SLACK_SECRET_UNIT_NAME).getToken());
this.slackProperties = slackSecretProperties.find(SLACK_SECRET_UNIT_NAME);
}
public SlackMessageDTO ofMessageTemplate() {
return SlackMessageDTO.builder()
.channel(slackProperties.getChannel())
.iconEmoji(slackProperties.getIconEmoji())
.username(slackProperties.getUsername())
.build();
}
public SlackMessageDTO ofMessage(String text) {
return SlackMessageDTO.builder()
.channel(slackProperties.getChannel())
.iconEmoji(slackProperties.getIconEmoji())
.username(slackProperties.getUsername())
.text(text)
.build();
}
public SlackMessageDTO ofBlockMessageBased() {
return SlackMessageDTO.builder()
.channel(slackProperties.getChannel())
.iconEmoji(slackProperties.getIconEmoji())
.username(slackProperties.getUsername())
.blocks(new ArrayList<>())
.build();
}
}

View File

@@ -1,47 +0,0 @@
package com.myoa.engineering.crawl.shopping.infra.client.slack;
import lombok.extern.slf4j.Slf4j;
/**
* SlackMessageSender
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
@Slf4j
public class SlackMessageSender { /* implements MessageSender<SlackMessageDTO> {*/
/*
private static final String SLACK_API_URL = "https://slack.com/api";
private final WebClient webClient;
public SlackMessageSender(String apiSecret) {
this.webClient = WebClient.builder()
.baseUrl(SLACK_API_URL)
.defaultHeader(HttpHeaders.AUTHORIZATION, "Bearer " + apiSecret)
.exchangeStrategies(WebFluxExchangeStragiesFactory.ofDefault())
.defaultHeader(HttpHeaders.CONTENT_TYPE, MediaType.APPLICATION_JSON_UTF8_VALUE)
.defaultHeader(HttpHeaders.ACCEPT_CHARSET, "UTF-8")
.filter(WebClientFilterFactory.logRequest())
.filter(WebClientFilterFactory.logResponse())
.build();
}
@Override
public Mono<String> sendMessage(SlackMessageDTO message) {
return webClient.post()
.uri("/chat.postMessage")
.bodyValue(message)
.exchangeToMono(e -> e.bodyToMono(String.class))
.publishOn(Schedulers.boundedElastic())
.onErrorResume(WebClientRequestException.class, t -> {
log.info("Exception occured, ignoring. : {}", t.getClass().getSimpleName());
return Mono.empty();
})
.doOnNext(e -> log.info("[sendMessage] {}", e));
}
*/
}

View File

@@ -4,7 +4,7 @@ import com.myoa.engineering.crawl.shopping.domain.entity.v2.Article;
import com.myoa.engineering.crawl.shopping.domain.model.v2.ArticleModel;
import com.myoa.engineering.crawl.shopping.event.ArticleUpsertEvent;
import com.myoa.engineering.crawl.shopping.infra.repository.v2.ArticleRepository;
import jakarta.transaction.Transactional;
import javax.transaction.Transactional;
import org.springframework.context.ApplicationEventPublisher;
import org.springframework.stereotype.Service;

View File

@@ -1,44 +0,0 @@
package com.myoa.engineering.crawl.ppomppu.processor.service;
import java.util.List;
import org.springframework.stereotype.Service;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleTransformer;
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.MessageSenderAPIClient;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import lombok.extern.slf4j.Slf4j;
import reactor.core.publisher.Mono;
/**
* MessageSenderService
* @author Shin Woo-jin (woo-jin.shin@linecorp.com)
* @since 2021-11-21
*
*/
@Slf4j
@Service
public class MessageSenderService {
private final MessageSenderAPIClient messageSenderAPIClient;
public MessageSenderService(MessageSenderAPIClient messageSenderAPIClient) {
this.messageSenderAPIClient = messageSenderAPIClient;
}
public Mono<String> sendSimpleMessageToSlack(PpomppuArticle article) {
return messageSenderAPIClient.sendSimpleMessageToSlack(PpomppuArticleTransformer.TRANSFORM_TO_MESSAGE_DTO.apply(article));
}
public Mono<String> sendSimpleMessageToSlack(List<PpomppuArticle> articles) {
return messageSenderAPIClient.sendSimpleMessageToSlack(PpomppuArticleTransformer.transformToSimpleMessage(articles));
}
public Mono<String> sendBlockMessageToSlack(PpomppuBoardName boardName, List<PpomppuArticle> articles) {
return messageSenderAPIClient.sendBlockMessageToSlack(
PpomppuArticleTransformer.transformToBlockMessage(boardName, articles));
}
}

View File

@@ -1,66 +0,0 @@
package com.myoa.engineering.crawl.ppomppu.processor.service;
import java.util.Comparator;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Component;
import com.myoa.engineering.crawl.ppomppu.processor.domain.PpomppuArticle;
import com.myoa.engineering.crawl.ppomppu.processor.dto.PpomppuArticleParser;
import com.myoa.engineering.crawl.ppomppu.processor.infrastructure.client.PpomppuBoardFeedRetriever;
import com.myoa.engineering.crawl.ppomppu.support.dto.code.PpomppuBoardName;
import lombok.extern.slf4j.Slf4j;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
/**
* PpomppuFeedService
*
* @author Shin Woo-jin (woozu.shin@kakaoent.com)
* @since 2021-09-08
*/
@Slf4j
@Component
public class PpomppuFeedService {
private final PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever;
public PpomppuFeedService(PpomppuBoardFeedRetriever ppomppuBoardFeedRetriever) {
this.ppomppuBoardFeedRetriever = ppomppuBoardFeedRetriever;
}
public Mono<List<PpomppuArticle>> getArticles(PpomppuBoardName boardName) {
final Mono<String> html = ppomppuBoardFeedRetriever.getHtml(boardName.getResourcePath());
final Mono<Element> tbody = extractTbodyFromHtml(html);
// .doOnNext(e -> log.info("pre tbody - {}", e.html()));
return extractArticlesFromTbody(tbody).map(this::convertFromElement)
.onErrorContinue((t, e) -> log.error("Error occured : {}, value: {}",
e, t.getLocalizedMessage()))
.map(e -> e.updateBoardName(boardName))
.sort(Comparator.comparing(PpomppuArticle::getArticleId))
// .doOnNext(e -> log.info("parsed Result: {}", e))
.collectList();
}
private Mono<Element> extractTbodyFromHtml(Mono<String> html) {
return html.map(Jsoup::parse)
.mapNotNull(e -> e.getElementById("revolution_main_table"))
.map(e -> e.getElementsByTag("tbody"))
// .doOnNext(e -> log.info("tbody - {}", e.html()))
.map(e -> e.stream()
.findFirst()
.orElseThrow(() -> new IndexOutOfBoundsException("no tbody")));
}
private Flux<Element> extractArticlesFromTbody(Mono<Element> tbody) {
return Flux.concat(tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list0").toArray(Element[]::new))),
tbody.flatMapMany(e -> Flux.fromArray(e.select("tr.list1").toArray(Element[]::new))));
}
private PpomppuArticle convertFromElement(Element element) {
return PpomppuArticleParser.toArticle(element.getElementsByTag("td"));
}
}

View File

@@ -1,118 +1,6 @@
package com.myoa.engineering.crawl.shopping.crawlhandler.parser;
import com.myoa.engineering.crawl.shopping.domain.entity.v1.PpomppuArticle;
import com.myoa.engineering.crawl.shopping.support.dto.constant.PpomppuBoardName;
import com.myoa.engineering.crawl.shopping.util.DateTimeUtils;
import com.myoa.engineering.crawl.shopping.util.NumberUtils;
import com.myoa.engineering.crawl.shopping.util.TestDataUtils;
import io.micrometer.core.instrument.util.StringUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.junit.jupiter.api.Test;
import java.time.ZonedDateTime;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
class PpomppuArticleParserV2Test {
@Test
void test1() {
String data = TestDataUtils.fileToString("testdata/zboard/file1.html");
Document document = Jsoup.parse(data);
Elements trList = document.getElementById("revolution_main_table").getElementsByTag("tr");
List<PpomppuArticle> articles = trList.stream()
.filter(this::isRealArticle)
.map(this::parse)
.toList();
System.out.println(articles.size());
System.out.println(articles.get(0));
System.out.println(articles.get(1));
}
private boolean isRealArticle(Element tr) {
Elements tdList = tr.getElementsByTag("td");
if (tdList.size() != 6) {
return false;
}
if (!hasOnlyNumeric(tdList.get(0))) {
return false;
}
return true;
}
Pattern pattern_numeric = Pattern.compile("\\d+");
private boolean hasOnlyNumeric(Element td) {
return pattern_numeric.matcher(td.text()).matches();
}
public PpomppuArticle parse(Element tr) {
Elements tdList = tr.getElementsByTag("td");
Long articleId = Long.parseLong(tdList.get(0).text());
String title = tdList.get(2).text();
String articleUrl = parseArticleUrl(tdList.get(2).getElementsByTag("a").attr("href"));
PpomppuBoardName boardName = parseBoardName(title);
Integer recommended = parseRecommended(tdList.get(4));
Integer hit = NumberUtils.parseInt(tdList.get(5).text(), 0);
ZonedDateTime registeredAt = DateTimeUtils.parse(tdList.get(3).text());
return PpomppuArticle.builder()
.articleId(articleId)
.title(title)
.boardName(boardName)
.articleUrl(articleUrl)
.recommended(recommended)
.hit(hit)
.registeredAt(registeredAt)
.build();
}
public Long parseArticleId(Element td) {
return Long.parseLong(td.text().trim());
}
public static Integer parseRecommended(Element td) {
final String voteString = td.text();
if (StringUtils.isEmpty(voteString)) {
return null;
}
final int voteUp = Integer.parseInt(td.text().split(" - ")[0]);
final int voteDown = Integer.parseInt(td.text().split(" - ")[1]);
int recommended = voteUp - voteDown;
return recommended;
}
public static String parseArticleUrl(String data) {
return PpomppuBoardName.ofViewPageUrl(data);
}
Pattern patternBoardName = Pattern.compile("\\[(.+?)\\]");
public PpomppuBoardName parseBoardName(String fullTitle) {
Matcher matcher = patternBoardName.matcher(fullTitle);
String lastMatched = null;
while (matcher.find()) {
lastMatched = matcher.group(1);
}
return PpomppuBoardName.ofBoardName(lastMatched, true);
}
@Test
void test2() {
PpomppuBoardName boardName = parseBoardName("[자사몰]푸마 메쉬 폼스트라이프 [에디션] 5종 [세트] (18,220원/무료)6 [의류/잡화]");
System.out.println(boardName);
}
}